task,metric,value,err,version anli_r1,acc,0.348,0.01507060460376841,0 anli_r2,acc,0.346,0.015050266127564448,0 anli_r3,acc,0.355,0.013819249004047308,0 arc_challenge,acc,0.26535836177474403,0.012902554762313964,0 arc_challenge,acc_norm,0.2977815699658703,0.013363080107244487,0 arc_easy,acc,0.5774410774410774,0.01013597822298108,0 arc_easy,acc_norm,0.555976430976431,0.010195285580783954,0 boolq,acc,0.5776758409785933,0.008638883260317736,1 cb,acc,0.30357142857142855,0.06199938655510754,1 cb,f1,0.24443052074631022,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.43158733320055764,0.00494285345937155,0 hellaswag,acc_norm,0.5638319059948218,0.004948952519517514,0 piqa,acc,0.736126224156692,0.010282996367695562,0 piqa,acc_norm,0.7404787812840044,0.010227939888173925,0 rte,acc,0.48736462093862815,0.030086851767188564,0 sciq,acc,0.887,0.01001655286669686,0 sciq,acc_norm,0.878,0.010354864712936701,0 storycloze_2016,acc,0.692143238909674,0.010674598158758179,0 winogrande,acc,0.5469613259668509,0.013990366632148088,0