|
task,metric,value,err,version
|
|
anli_r1,acc,0.332,0.014899597242811483,0
|
|
anli_r2,acc,0.335,0.014933117490932572,0
|
|
anli_r3,acc,0.3383333333333333,0.013664144006618275,0
|
|
arc_challenge,acc,0.28071672354948807,0.013131238126975576,0
|
|
arc_challenge,acc_norm,0.28498293515358364,0.013191348179838793,0
|
|
arc_easy,acc,0.6106902356902357,0.01000521278287814,0
|
|
arc_easy,acc_norm,0.5315656565656566,0.010239317603199509,0
|
|
boolq,acc,0.5675840978593272,0.008664798701065799,1
|
|
cb,acc,0.44642857142857145,0.06703189227942398,1
|
|
cb,f1,0.24357864357864356,,1
|
|
copa,acc,0.81,0.03942772444036623,0
|
|
hellaswag,acc,0.46703843855805616,0.0049789271647928835,0
|
|
hellaswag,acc_norm,0.6088428599880502,0.004870121051762726,0
|
|
piqa,acc,0.7524483133841132,0.010069703966857102,0
|
|
piqa,acc_norm,0.7540805223068553,0.010047331865625184,0
|
|
rte,acc,0.5234657039711191,0.03006330041190266,0
|
|
sciq,acc,0.838,0.011657267771304405,0
|
|
sciq,acc_norm,0.734,0.01397996564514516,0
|
|
storycloze_2016,acc,0.7140566541956174,0.010449259851345843,0
|
|
winogrande,acc,0.590370955011839,0.013821049109655462,0
|
|
|