|
task,metric,value,err,version
|
|
anli_r1,acc,0.329,0.014865395385928357,0
|
|
anli_r2,acc,0.349,0.015080663991563098,0
|
|
anli_r3,acc,0.33166666666666667,0.013596836729485176,0
|
|
arc_challenge,acc,0.30802047781569963,0.01349142951729204,0
|
|
arc_challenge,acc_norm,0.3216723549488055,0.013650488084494162,0
|
|
arc_easy,acc,0.6401515151515151,0.009848484848484846,0
|
|
arc_easy,acc_norm,0.6136363636363636,0.009991296778159619,0
|
|
boolq,acc,0.5874617737003058,0.008610223886822883,1
|
|
cb,acc,0.4107142857142857,0.0663363415035954,1
|
|
cb,f1,0.2909356725146199,,1
|
|
copa,acc,0.82,0.038612291966536955,0
|
|
hellaswag,acc,0.4449312885879307,0.004959425421382028,0
|
|
hellaswag,acc_norm,0.592212706632145,0.004904189257891276,0
|
|
piqa,acc,0.7236126224156693,0.01043416238827561,0
|
|
piqa,acc_norm,0.7268770402611534,0.010395730264453262,0
|
|
rte,acc,0.5018050541516246,0.030096267148976633,0
|
|
sciq,acc,0.869,0.010674874844837957,0
|
|
sciq,acc_norm,0.863,0.010878848714333316,0
|
|
storycloze_2016,acc,0.6825227151256013,0.010764505409830935,0
|
|
winogrande,acc,0.5706393054459353,0.013911537499969165,0
|
|
|