|
task,metric,value,err,version
|
|
anli_r1,acc,0.336,0.014944140233795023,0
|
|
anli_r2,acc,0.338,0.014965960710224484,0
|
|
anli_r3,acc,0.33916666666666667,0.013672343491681817,0
|
|
arc_challenge,acc,0.27303754266211605,0.013019332762635744,0
|
|
arc_challenge,acc_norm,0.2960750853242321,0.013340916085246258,0
|
|
arc_easy,acc,0.6014309764309764,0.010046455400477945,0
|
|
arc_easy,acc_norm,0.5597643097643098,0.010186228624515656,0
|
|
boolq,acc,0.5868501529051988,0.00861211754780358,1
|
|
cb,acc,0.4107142857142857,0.0663363415035954,1
|
|
cb,f1,0.2283333333333333,,1
|
|
copa,acc,0.73,0.044619604333847394,0
|
|
hellaswag,acc,0.43457478589922327,0.004946879874422678,0
|
|
hellaswag,acc_norm,0.5644293965345548,0.0049481813670249584,0
|
|
piqa,acc,0.7127312295973884,0.010557291761528637,0
|
|
piqa,acc_norm,0.7159956474428727,0.010521147542454213,0
|
|
rte,acc,0.5342960288808665,0.030025579819366426,0
|
|
sciq,acc,0.863,0.010878848714333308,0
|
|
sciq,acc_norm,0.837,0.01168621271274684,0
|
|
storycloze_2016,acc,0.706574024585783,0.01052948933474447,0
|
|
winogrande,acc,0.5698500394632992,0.013914685094716701,0
|
|
|