|
task,metric,value,err,version
|
|
anli_r1,acc,0.337,0.014955087918653605,0
|
|
anli_r2,acc,0.339,0.014976758771620344,0
|
|
anli_r3,acc,0.3308333333333333,0.013588208070708986,0
|
|
arc_challenge,acc,0.2645051194539249,0.012889272949313366,0
|
|
arc_challenge,acc_norm,0.29180887372013653,0.013284525292403501,0
|
|
arc_easy,acc,0.5963804713804713,0.010067368960348226,0
|
|
arc_easy,acc_norm,0.5340909090909091,0.010235908103438687,0
|
|
boolq,acc,0.6125382262996942,0.00852066653613694,1
|
|
cb,acc,0.4107142857142857,0.0663363415035954,1
|
|
cb,f1,0.1940928270042194,,1
|
|
copa,acc,0.73,0.0446196043338474,0
|
|
hellaswag,acc,0.43706432981477794,0.00495009555596467,0
|
|
hellaswag,acc_norm,0.5617406891057558,0.004951594063272048,0
|
|
piqa,acc,0.719804134929271,0.010478122015577082,0
|
|
piqa,acc_norm,0.720892274211099,0.010465657948498233,0
|
|
rte,acc,0.5126353790613718,0.030086851767188564,0
|
|
sciq,acc,0.827,0.011967214137559933,0
|
|
sciq,acc_norm,0.751,0.013681600278702301,0
|
|
storycloze_2016,acc,0.7076429716729022,0.010518239729787741,0
|
|
winogrande,acc,0.5808997632202052,0.013867325192210117,0
|
|
|