|
task,metric,value,err,version
|
|
anli_r1,acc,0.341,0.014998131348402706,0
|
|
anli_r2,acc,0.336,0.014944140233795027,0
|
|
anli_r3,acc,0.33,0.013579531277800918,0
|
|
arc_challenge,acc,0.302901023890785,0.013428241573185349,0
|
|
arc_challenge,acc_norm,0.30631399317406144,0.013470584417276513,0
|
|
arc_easy,acc,0.6287878787878788,0.009913599001845737,0
|
|
arc_easy,acc_norm,0.5938552188552189,0.010077409815364058,0
|
|
boolq,acc,0.5694189602446483,0.008660360145988744,1
|
|
cb,acc,0.42857142857142855,0.06672848092813058,1
|
|
cb,f1,0.293715318105562,,1
|
|
copa,acc,0.81,0.03942772444036623,0
|
|
hellaswag,acc,0.4645488946425015,0.004977223485342026,0
|
|
hellaswag,acc_norm,0.6143198566022705,0.004857607641160633,0
|
|
piqa,acc,0.7426550598476604,0.01019992106479251,0
|
|
piqa,acc_norm,0.7486398258977149,0.010121156016819245,0
|
|
rte,acc,0.555956678700361,0.029907396333795994,0
|
|
sciq,acc,0.885,0.010093407594904617,0
|
|
sciq,acc_norm,0.856,0.01110798754893915,0
|
|
storycloze_2016,acc,0.7071084981293426,0.010523873293246304,0
|
|
winogrande,acc,0.5966850828729282,0.013787257285896236,0
|
|
|