|
task,metric,value,err,version
|
|
anli_r1,acc,0.33,0.014876872027456732,0
|
|
anli_r2,acc,0.33,0.014876872027456736,0
|
|
anli_r3,acc,0.33166666666666667,0.01359683672948516,0
|
|
arc_challenge,acc,0.3378839590443686,0.013822047922283507,0
|
|
arc_challenge,acc_norm,0.3660409556313993,0.014077223108470144,0
|
|
arc_easy,acc,0.6742424242424242,0.009616642976885964,0
|
|
arc_easy,acc_norm,0.6523569023569024,0.009771868846830909,0
|
|
boolq,acc,0.6428134556574924,0.008380743796951404,1
|
|
cb,acc,0.5178571428571429,0.06737697508644648,1
|
|
cb,f1,0.35968427443837275,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.4790878311093408,0.004985415250690917,0
|
|
hellaswag,acc_norm,0.6304521011750648,0.004816958817726088,0
|
|
piqa,acc,0.7404787812840044,0.010227939888173918,0
|
|
piqa,acc_norm,0.7388465723612623,0.010248738649935587,0
|
|
rte,acc,0.5703971119133574,0.02979666882912467,0
|
|
sciq,acc,0.923,0.008434580140240644,0
|
|
sciq,acc_norm,0.901,0.009449248027662746,0
|
|
storycloze_2016,acc,0.7365045430251203,0.010187168219156485,0
|
|
winogrande,acc,0.6235201262825573,0.013616931960667187,0
|
|
|