|
task,metric,value,err,version
|
|
anli_r1,acc,0.325,0.014818724459095524,0
|
|
anli_r2,acc,0.328,0.014853842487270336,0
|
|
anli_r3,acc,0.3416666666666667,0.013696658778002519,0
|
|
arc_challenge,acc,0.2764505119453925,0.013069662474252427,0
|
|
arc_challenge,acc_norm,0.3003412969283277,0.013395909309957,0
|
|
arc_easy,acc,0.6026936026936027,0.010041053078884277,0
|
|
arc_easy,acc_norm,0.5833333333333334,0.010116282977781253,0
|
|
boolq,acc,0.582262996941896,0.008625883905552707,1
|
|
cb,acc,0.44642857142857145,0.06703189227942397,1
|
|
cb,f1,0.28883861236802416,,1
|
|
copa,acc,0.82,0.038612291966536955,0
|
|
hellaswag,acc,0.4342760406293567,0.004946485466544626,0
|
|
hellaswag,acc_norm,0.5595498904600678,0.0049542655953734695,0
|
|
piqa,acc,0.7170837867247007,0.010508949177489686,0
|
|
piqa,acc_norm,0.7241566920565833,0.010427805502729119,0
|
|
rte,acc,0.5342960288808665,0.030025579819366426,0
|
|
sciq,acc,0.87,0.010640169792499344,0
|
|
sciq,acc_norm,0.849,0.011328165223341674,0
|
|
storycloze_2016,acc,0.7055050774986639,0.010540668963800296,0
|
|
winogrande,acc,0.5769534333070244,0.013885055359056476,0
|
|
|