|
task,metric,value,err,version
|
|
anli_r1,acc,0.335,0.014933117490932575,0
|
|
anli_r2,acc,0.335,0.014933117490932572,0
|
|
anli_r3,acc,0.3466666666666667,0.013744022550571947,0
|
|
arc_challenge,acc,0.257679180887372,0.012780770562768409,0
|
|
arc_challenge,acc_norm,0.28071672354948807,0.01313123812697558,0
|
|
arc_easy,acc,0.5711279461279462,0.010155440652900154,0
|
|
arc_easy,acc_norm,0.5037878787878788,0.010259489101351842,0
|
|
boolq,acc,0.5978593272171254,0.008575926383211254,1
|
|
cb,acc,0.42857142857142855,0.06672848092813058,1
|
|
cb,f1,0.27104247104247103,,1
|
|
copa,acc,0.71,0.045604802157206845,0
|
|
hellaswag,acc,0.4360685122485561,0.004948824501355481,0
|
|
hellaswag,acc_norm,0.5651264688309102,0.004947272454226218,0
|
|
piqa,acc,0.7415669205658324,0.010213971636773326,0
|
|
piqa,acc_norm,0.7410228509249184,0.010220966031405614,0
|
|
rte,acc,0.5306859205776173,0.030039730592197812,0
|
|
sciq,acc,0.814,0.012310790208412815,0
|
|
sciq,acc_norm,0.737,0.013929286594259734,0
|
|
storycloze_2016,acc,0.6969535008017104,0.010627613073376715,0
|
|
winogrande,acc,0.5595895816890292,0.013952330311915603,0
|
|
|