task,metric,value,err,version anli_r1,acc,0.34,0.014987482264363935,0 anli_r2,acc,0.338,0.014965960710224482,0 anli_r3,acc,0.35333333333333333,0.013804572162314926,0 arc_challenge,acc,0.257679180887372,0.0127807705627684,0 arc_challenge,acc_norm,0.2832764505119454,0.013167478735134576,0 arc_easy,acc,0.5648148148148148,0.01017321643037092,0 arc_easy,acc_norm,0.5349326599326599,0.01023471305272368,0 boolq,acc,0.5831804281345566,0.008623192108843679,1 cb,acc,0.44642857142857145,0.06703189227942398,1 cb,f1,0.3011143410852713,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.42202748456482775,0.004928735103635839,0 hellaswag,acc_norm,0.5423222465644294,0.004971874159777697,0 piqa,acc,0.719260065288357,0.010484325438311829,0 piqa,acc_norm,0.7263329706202394,0.010402184206229218,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.861,0.010945263761042955,0 sciq,acc_norm,0.835,0.01174363286691616,0 storycloze_2016,acc,0.6766435061464458,0.01081682863306821,0 winogrande,acc,0.5414364640883977,0.014004146853791892,0