|
task,metric,value,err,version
|
|
anli_r1,acc,0.34,0.014987482264363935,0
|
|
anli_r2,acc,0.338,0.014965960710224482,0
|
|
anli_r3,acc,0.35333333333333333,0.013804572162314926,0
|
|
arc_challenge,acc,0.257679180887372,0.0127807705627684,0
|
|
arc_challenge,acc_norm,0.2832764505119454,0.013167478735134576,0
|
|
arc_easy,acc,0.5648148148148148,0.01017321643037092,0
|
|
arc_easy,acc_norm,0.5349326599326599,0.01023471305272368,0
|
|
boolq,acc,0.5831804281345566,0.008623192108843679,1
|
|
cb,acc,0.44642857142857145,0.06703189227942398,1
|
|
cb,f1,0.3011143410852713,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.42202748456482775,0.004928735103635839,0
|
|
hellaswag,acc_norm,0.5423222465644294,0.004971874159777697,0
|
|
piqa,acc,0.719260065288357,0.010484325438311829,0
|
|
piqa,acc_norm,0.7263329706202394,0.010402184206229218,0
|
|
rte,acc,0.5415162454873647,0.029992535385373314,0
|
|
sciq,acc,0.861,0.010945263761042955,0
|
|
sciq,acc_norm,0.835,0.01174363286691616,0
|
|
storycloze_2016,acc,0.6766435061464458,0.01081682863306821,0
|
|
winogrande,acc,0.5414364640883977,0.014004146853791892,0
|
|
|