task,metric,value,err,version anli_r1,acc,0.329,0.014865395385928369,0 anli_r2,acc,0.356,0.015149042659306625,0 anli_r3,acc,0.31666666666666665,0.013434078660827386,0 arc_challenge,acc,0.2380546075085324,0.012445770028026201,0 arc_challenge,acc_norm,0.26621160409556316,0.012915774781523223,0 arc_easy,acc,0.4692760942760943,0.010240395584815239,0 arc_easy,acc_norm,0.4473905723905724,0.010202832385415642,0 boolq,acc,0.5085626911314984,0.008743772513106856,1 cb,acc,0.5535714285714286,0.06703189227942395,1 cb,f1,0.3779862414008755,,1 copa,acc,0.68,0.04688261722621504,0 hellaswag,acc,0.3783110934076877,0.004839746491523513,0 hellaswag,acc_norm,0.45130452101175067,0.004966060995315058,0 piqa,acc,0.691512513601741,0.01077616467803716,0 piqa,acc_norm,0.6860718171926007,0.01082792813418964,0 rte,acc,0.49458483754512633,0.030094698123239966,0 sciq,acc,0.715,0.014282120955200485,0 sciq,acc_norm,0.687,0.01467127282297789,0 storycloze_2016,acc,0.6386958845537146,0.011108686479432282,0 winogrande,acc,0.49171270718232046,0.014050555322824194,0