task,metric,value,err,version anli_r1,acc,0.322,0.014782913600996664,0 anli_r2,acc,0.353,0.015120172605483689,0 anli_r3,acc,0.3333333333333333,0.013613950010225593,0 arc_challenge,acc,0.2525597269624573,0.012696728980207706,0 arc_challenge,acc_norm,0.28242320819112626,0.013155456884097222,0 arc_easy,acc,0.5765993265993266,0.010138671005289045,0 arc_easy,acc_norm,0.5517676767676768,0.010204645126856942,0 boolq,acc,0.5834862385321101,0.008622288020674003,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.34540644540644544,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.4303923521210914,0.004941191607317909,0 hellaswag,acc_norm,0.5595498904600678,0.004954265595373475,0 piqa,acc,0.7377584330794341,0.010262502565172449,0 piqa,acc_norm,0.7475516866158868,0.010135665547362355,0 rte,acc,0.49458483754512633,0.030094698123239966,0 sciq,acc,0.881,0.01024421514533666,0 sciq,acc_norm,0.856,0.01110798754893915,0 storycloze_2016,acc,0.6905398182789952,0.01068995674518907,0 winogrande,acc,0.5390686661404893,0.014009521680980316,0