task,metric,value,err,version anli_r1,acc,0.357,0.015158521721486773,0 anli_r2,acc,0.35,0.015090650341444231,0 anli_r3,acc,0.34833333333333333,0.01375943749887407,0 arc_challenge,acc,0.2687713310580205,0.012955065963710684,0 arc_challenge,acc_norm,0.2773037542662116,0.013082095839059374,0 arc_easy,acc,0.5909090909090909,0.010088775152615788,0 arc_easy,acc_norm,0.5664983164983165,0.010168640625454103,0 boolq,acc,0.5951070336391437,0.008585393347962317,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.32226930320150665,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.43238398725353516,0.004943945069611458,0 hellaswag,acc_norm,0.5669189404501095,0.004944889545497962,0 piqa,acc,0.7263329706202394,0.010402184206229211,0 piqa,acc_norm,0.733949945593036,0.010310039263352824,0 rte,acc,0.5126353790613718,0.030086851767188564,0 sciq,acc,0.893,0.009779910359847169,0 sciq,acc_norm,0.862,0.010912152632504403,0 storycloze_2016,acc,0.6900053447354356,0.010695042806212555,0 winogrande,acc,0.5493291239147593,0.01398392886904024,0