task,metric,value,err,version anli_r1,acc,0.345,0.015039986742055235,0 anli_r2,acc,0.362,0.015204840912919501,0 anli_r3,acc,0.31583333333333335,0.013424568830356443,0 arc_challenge,acc,0.24744027303754265,0.012610352663292673,0 arc_challenge,acc_norm,0.25170648464163825,0.012682496334042963,0 arc_easy,acc,0.4739057239057239,0.01024580199024005,0 arc_easy,acc_norm,0.4494949494949495,0.010207308833916035,0 boolq,acc,0.5064220183486239,0.00874433361394033,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.3176319176319176,,1 copa,acc,0.67,0.04725815626252609,0 hellaswag,acc,0.376319458275244,0.004834715814208109,0 hellaswag,acc_norm,0.45140410276837284,0.004966158142645416,0 piqa,acc,0.6670293797606094,0.010995648822619067,0 piqa,acc_norm,0.676278563656148,0.010916765010708767,0 rte,acc,0.5270758122743683,0.030052303463143706,0 sciq,acc,0.708,0.014385511563477343,0 sciq,acc_norm,0.689,0.014645596385722694,0 storycloze_2016,acc,0.6392303580972741,0.011105110530046359,0 winogrande,acc,0.489344909234412,0.014049294536290403,0