task,metric,value,err,version anli_r1,acc,0.337,0.014955087918653616,0 anli_r2,acc,0.328,0.014853842487270334,0 anli_r3,acc,0.33916666666666667,0.013672343491681822,0 arc_challenge,acc,0.2696245733788396,0.012968040686869143,0 arc_challenge,acc_norm,0.28924914675767915,0.013250012579393443,0 arc_easy,acc,0.57996632996633,0.010127718838529321,0 arc_easy,acc_norm,0.5681818181818182,0.010163945352271733,0 boolq,acc,0.5804281345565749,0.008631175489166726,1 cb,acc,0.32142857142857145,0.06297362289056341,1 cb,f1,0.24285714285714288,,1 copa,acc,0.74,0.04408440022768078,0 hellaswag,acc,0.4298944433379805,0.004940490508240647,0 hellaswag,acc_norm,0.5665206134236208,0.004945424771611602,0 piqa,acc,0.7334058759521219,0.010316749863541367,0 piqa,acc_norm,0.7486398258977149,0.010121156016819245,0 rte,acc,0.4657039711191336,0.030025579819366426,0 sciq,acc,0.891,0.00985982840703719,0 sciq,acc_norm,0.882,0.01020686926438179,0 storycloze_2016,acc,0.6910742918225548,0.010684853966268455,0 winogrande,acc,0.5461720599842147,0.01399244156370706,0