task,metric,value,err,version anli_r1,acc,0.325,0.014818724459095524,0 anli_r2,acc,0.342,0.01500870618212173,0 anli_r3,acc,0.31666666666666665,0.013434078660827388,0 arc_challenge,acc,0.2909556313993174,0.013273077865907578,0 arc_challenge,acc_norm,0.3122866894197952,0.013542598541688067,0 arc_easy,acc,0.6342592592592593,0.009882988069418822,0 arc_easy,acc_norm,0.6186868686868687,0.009966542497171021,0 boolq,acc,0.608868501529052,0.008535239054221166,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.3289760348583877,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.45160326628161723,0.004966351835028203,0 hellaswag,acc_norm,0.5888269269069907,0.004910409150135493,0 piqa,acc,0.733949945593036,0.010310039263352831,0 piqa,acc_norm,0.7486398258977149,0.010121156016819243,0 rte,acc,0.48375451263537905,0.030080573208738064,0 sciq,acc,0.916,0.008776162089491137,0 sciq,acc_norm,0.915,0.008823426366942312,0 storycloze_2016,acc,0.6953500801710315,0.010643426988646796,0 winogrande,acc,0.5682715074980268,0.01392087211001071,0