|
task,metric,value,err,version
|
|
anli_r1,acc,0.342,0.01500870618212173,0
|
|
anli_r2,acc,0.335,0.01493311749093257,0
|
|
anli_r3,acc,0.3333333333333333,0.013613950010225601,0
|
|
arc_challenge,acc,0.28242320819112626,0.013155456884097224,0
|
|
arc_challenge,acc_norm,0.30802047781569963,0.013491429517292038,0
|
|
arc_easy,acc,0.6338383838383839,0.009885391390947731,0
|
|
arc_easy,acc_norm,0.5551346801346801,0.010197216690356425,0
|
|
boolq,acc,0.5412844036697247,0.008715193815788284,1
|
|
cb,acc,0.32142857142857145,0.06297362289056341,1
|
|
cb,f1,0.21886695057426764,,1
|
|
copa,acc,0.8,0.04020151261036845,0
|
|
hellaswag,acc,0.4554869547898825,0.00496996845825617,0
|
|
hellaswag,acc_norm,0.5894244174467238,0.004909328992915069,0
|
|
piqa,acc,0.7285092491838956,0.010376251176596135,0
|
|
piqa,acc_norm,0.7279651795429815,0.010382763786247388,0
|
|
rte,acc,0.5631768953068592,0.029855247390314935,0
|
|
sciq,acc,0.828,0.011939788882495321,0
|
|
sciq,acc_norm,0.734,0.013979965645145165,0
|
|
storycloze_2016,acc,0.6889363976483164,0.010705164869803167,0
|
|
winogrande,acc,0.595895816890292,0.013791610664670845,0
|
|
|