|
task,metric,value,err,version
|
|
anli_r1,acc,0.338,0.014965960710224468,0
|
|
anli_r2,acc,0.316,0.01470919305605713,0
|
|
anli_r3,acc,0.3383333333333333,0.013664144006618275,0
|
|
arc_challenge,acc,0.29436860068259385,0.013318528460539426,0
|
|
arc_challenge,acc_norm,0.31313993174061433,0.013552671543623497,0
|
|
arc_easy,acc,0.6380471380471381,0.00986099146668848,0
|
|
arc_easy,acc_norm,0.609006734006734,0.01001299223254063,0
|
|
boolq,acc,0.57217125382263,0.008653474894637178,1
|
|
cb,acc,0.5178571428571429,0.06737697508644648,1
|
|
cb,f1,0.3633879781420766,,1
|
|
copa,acc,0.74,0.044084400227680794,0
|
|
hellaswag,acc,0.45030870344552876,0.004965078477435578,0
|
|
hellaswag,acc_norm,0.5908185620394344,0.0049067795231926645,0
|
|
piqa,acc,0.7181719260065288,0.010496675231258152,0
|
|
piqa,acc_norm,0.7247007616974973,0.010421429277369531,0
|
|
rte,acc,0.5379061371841155,0.030009848912529113,0
|
|
sciq,acc,0.879,0.010318210380946095,0
|
|
sciq,acc_norm,0.864,0.01084535023047299,0
|
|
storycloze_2016,acc,0.6814537680384821,0.01077416522976135,0
|
|
winogrande,acc,0.5737963693764798,0.013898585965412338,0
|
|
|