|
task,metric,value,err,version
|
|
anli_r1,acc,0.342,0.01500870618212173,0
|
|
anli_r2,acc,0.34,0.014987482264363937,0
|
|
anli_r3,acc,0.335,0.013630871843821479,0
|
|
arc_challenge,acc,0.34044368600682595,0.013847460518892981,0
|
|
arc_challenge,acc_norm,0.36945392491467577,0.0141045783664919,0
|
|
arc_easy,acc,0.680976430976431,0.009564133249441074,0
|
|
arc_easy,acc_norm,0.6658249158249159,0.009679106032919058,0
|
|
boolq,acc,0.6501529051987768,0.008341409251946758,1
|
|
cb,acc,0.48214285714285715,0.06737697508644648,1
|
|
cb,f1,0.31573655103066867,,1
|
|
copa,acc,0.83,0.03775251680686371,0
|
|
hellaswag,acc,0.4805815574586736,0.00498601693867853,0
|
|
hellaswag,acc_norm,0.6378211511651065,0.004796478664403837,0
|
|
piqa,acc,0.7383025027203483,0.010255630772708229,0
|
|
piqa,acc_norm,0.735038084874864,0.010296557993316037,0
|
|
rte,acc,0.5487364620938628,0.029953149241808946,0
|
|
sciq,acc,0.927,0.00823035471524406,0
|
|
sciq,acc_norm,0.921,0.008534156773333442,0
|
|
storycloze_2016,acc,0.7493319080705505,0.010022263975606228,0
|
|
winogrande,acc,0.6503551696921863,0.013402073680850508,0
|
|
|