|
task,metric,value,err,version
|
|
anli_r1,acc,0.336,0.014944140233795023,0
|
|
anli_r2,acc,0.331,0.014888272588203938,0
|
|
anli_r3,acc,0.3425,0.013704669762934727,0
|
|
arc_challenge,acc,0.257679180887372,0.012780770562768403,0
|
|
arc_challenge,acc_norm,0.28498293515358364,0.013191348179838793,0
|
|
arc_easy,acc,0.5883838383838383,0.01009821864671491,0
|
|
arc_easy,acc_norm,0.5193602693602694,0.010252089491165522,0
|
|
boolq,acc,0.5923547400611621,0.008594580270731615,1
|
|
cb,acc,0.44642857142857145,0.06703189227942398,1
|
|
cb,f1,0.2956393200295639,,1
|
|
copa,acc,0.78,0.04163331998932261,0
|
|
hellaswag,acc,0.44901414060944034,0.004963771168672082,0
|
|
hellaswag,acc_norm,0.5796654052977495,0.004926038197714521,0
|
|
piqa,acc,0.7274211099020674,0.01038925680329602,0
|
|
piqa,acc_norm,0.7372143634385201,0.010269354068140777,0
|
|
rte,acc,0.516245487364621,0.030080573208738064,0
|
|
sciq,acc,0.816,0.012259457340938584,0
|
|
sciq,acc_norm,0.734,0.013979965645145153,0
|
|
storycloze_2016,acc,0.7071084981293426,0.010523873293246305,0
|
|
winogrande,acc,0.569060773480663,0.013917796623335962,0
|
|
|