task,metric,value,err,version anli_r1,acc,0.348,0.015070604603768408,0 anli_r2,acc,0.333,0.014910846164229857,0 anli_r3,acc,0.33916666666666667,0.013672343491681815,0 arc_challenge,acc,0.24573378839590443,0.012581033453730107,0 arc_challenge,acc_norm,0.2781569965870307,0.013094469919538805,0 arc_easy,acc,0.5521885521885522,0.010203742451111532,0 arc_easy,acc_norm,0.49242424242424243,0.010258605792153321,0 boolq,acc,0.563914373088685,0.00867331277632493,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.1986111111111111,,1 copa,acc,0.76,0.04292346959909282,0 hellaswag,acc,0.42182832105158335,0.0049284209030265504,0 hellaswag,acc_norm,0.5407289384584744,0.004973199296339958,0 piqa,acc,0.7257889009793254,0.010408618664933382,0 piqa,acc_norm,0.7377584330794341,0.010262502565172442,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.797,0.012726073744598283,0 sciq,acc_norm,0.719,0.01422115470843493,0 storycloze_2016,acc,0.6841261357562801,0.010749892827011111,0 winogrande,acc,0.5359116022099447,0.01401619343395831,0