task,metric,value,err,version anli_r1,acc,0.333,0.01491084616422987,0 anli_r2,acc,0.337,0.014955087918653605,0 anli_r3,acc,0.3541666666666667,0.013811933499570954,0 arc_challenge,acc,0.2593856655290102,0.012808273573927102,0 arc_challenge,acc_norm,0.2764505119453925,0.013069662474252423,0 arc_easy,acc,0.563973063973064,0.01017545958275974,0 arc_easy,acc_norm,0.5130471380471381,0.01025628992505844,0 boolq,acc,0.5914373088685015,0.00859758050271866,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.34745762711864403,,1 copa,acc,0.74,0.04408440022768077,0 hellaswag,acc,0.4217287392949612,0.004928263494616733,0 hellaswag,acc_norm,0.5414260107548298,0.004972625848702658,0 piqa,acc,0.7290533188248096,0.010369718937426843,0 piqa,acc_norm,0.733949945593036,0.01031003926335282,0 rte,acc,0.5487364620938628,0.029953149241808946,0 sciq,acc,0.847,0.011389500459665539,0 sciq,acc_norm,0.8,0.012655439943366657,0 storycloze_2016,acc,0.677712453233565,0.010807461374996358,0 winogrande,acc,0.5501183898973955,0.013981711904049728,0