|
task,metric,value,err,version
|
|
anli_r1,acc,0.315,0.0146966319607925,0
|
|
anli_r2,acc,0.335,0.014933117490932575,0
|
|
anli_r3,acc,0.34,0.013680495725767789,0
|
|
arc_challenge,acc,0.3293515358361775,0.013734057652635473,0
|
|
arc_challenge,acc_norm,0.3387372013651877,0.01383056892797433,0
|
|
arc_easy,acc,0.6523569023569024,0.00977186884683091,0
|
|
arc_easy,acc_norm,0.6266835016835017,0.009925009142802893,0
|
|
boolq,acc,0.6217125382262997,0.008482001133930994,1
|
|
cb,acc,0.4107142857142857,0.0663363415035954,1
|
|
cb,f1,0.2854808590102708,,1
|
|
copa,acc,0.84,0.03684529491774709,0
|
|
hellaswag,acc,0.4934276040629357,0.004989350311751647,0
|
|
hellaswag,acc_norm,0.6552479585739892,0.004743160034271143,0
|
|
piqa,acc,0.7627856365614799,0.00992469493358637,0
|
|
piqa,acc_norm,0.7709466811751904,0.009804509865175505,0
|
|
rte,acc,0.5018050541516246,0.030096267148976626,0
|
|
sciq,acc,0.916,0.008776162089491132,0
|
|
sciq,acc_norm,0.892,0.009820001651345682,0
|
|
storycloze_2016,acc,0.7413148049171566,0.010126662138021712,0
|
|
winogrande,acc,0.6156274664561957,0.013671567600836192,0
|
|
|