task,metric,value,err,version anli_r1,acc,0.339,0.014976758771620339,0 anli_r2,acc,0.342,0.015008706182121728,0 anli_r3,acc,0.37083333333333335,0.01394962856014431,0 arc_challenge,acc,0.2687713310580205,0.01295506596371069,0 arc_challenge,acc_norm,0.2696245733788396,0.012968040686869147,0 arc_easy,acc,0.5791245791245792,0.010130502164066342,0 arc_easy,acc_norm,0.5357744107744108,0.010233488709726539,0 boolq,acc,0.6103975535168196,0.008529228894936293,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.30272108843537415,,1 copa,acc,0.73,0.0446196043338474,0 hellaswag,acc,0.4326827325234017,0.004944351065545858,0 hellaswag,acc_norm,0.5660227046405099,0.004946089230153027,0 piqa,acc,0.7366702937976061,0.010276185322196764,0 piqa,acc_norm,0.7383025027203483,0.010255630772708227,0 rte,acc,0.5415162454873647,0.02999253538537331,0 sciq,acc,0.867,0.010743669132397332,0 sciq,acc_norm,0.815,0.012285191326386686,0 storycloze_2016,acc,0.6862640299305185,0.010730179119317625,0 winogrande,acc,0.5469613259668509,0.01399036663214809,0