task,metric,value,err,version anli_r1,acc,0.335,0.014933117490932579,0 anli_r2,acc,0.356,0.015149042659306621,0 anli_r3,acc,0.31833333333333336,0.013452948996996292,0 arc_challenge,acc,0.2363481228668942,0.012414960524301834,0 arc_challenge,acc_norm,0.2525597269624573,0.012696728980207706,0 arc_easy,acc,0.47769360269360267,0.010249568404555636,0 arc_easy,acc_norm,0.45454545454545453,0.010217299762709433,0 boolq,acc,0.4941896024464832,0.008744464477761504,1 cb,acc,0.4642857142857143,0.0672477765493766,1 cb,f1,0.2986425339366516,,1 copa,acc,0.64,0.048241815132442176,0 hellaswag,acc,0.37641904003186616,0.0048349694128836315,0 hellaswag,acc_norm,0.4523999203345947,0.004967118575905289,0 piqa,acc,0.6828073993471164,0.01085815545438087,0 piqa,acc_norm,0.6789989118607181,0.010892641574707906,0 rte,acc,0.5234657039711191,0.030063300411902652,0 sciq,acc,0.714,0.01429714686251791,0 sciq,acc_norm,0.696,0.01455320568795043,0 storycloze_2016,acc,0.6306787814003206,0.011160545865067166,0 winogrande,acc,0.4996053670086819,0.014052481306049516,0