|
task,metric,value,err,version
|
|
anli_r1,acc,0.34,0.014987482264363937,0
|
|
anli_r2,acc,0.327,0.014842213153411242,0
|
|
anli_r3,acc,0.33416666666666667,0.013622434813136783,0
|
|
arc_challenge,acc,0.27303754266211605,0.013019332762635746,0
|
|
arc_challenge,acc_norm,0.2841296928327645,0.013179442447653886,0
|
|
arc_easy,acc,0.5951178451178452,0.010072423960395703,0
|
|
arc_easy,acc_norm,0.561026936026936,0.01018307601297206,0
|
|
boolq,acc,0.5813455657492355,0.008628545022868549,1
|
|
cb,acc,0.4642857142857143,0.06724777654937658,1
|
|
cb,f1,0.32523809523809527,,1
|
|
copa,acc,0.74,0.04408440022768079,0
|
|
hellaswag,acc,0.4448317068313085,0.00495931519801116,0
|
|
hellaswag,acc_norm,0.578370842461661,0.004928105880776072,0
|
|
piqa,acc,0.7285092491838956,0.010376251176596137,0
|
|
piqa,acc_norm,0.735582154515778,0.01028978724476716,0
|
|
rte,acc,0.5451263537906137,0.029973636495415255,0
|
|
sciq,acc,0.862,0.0109121526325044,0
|
|
sciq,acc_norm,0.828,0.011939788882495321,0
|
|
storycloze_2016,acc,0.7033671833244255,0.01056281918156322,0
|
|
winogrande,acc,0.5588003157063931,0.013954975072834731,0
|
|
|