task,metric,value,err,version anli_r1,acc,0.335,0.014933117490932572,0 anli_r2,acc,0.352,0.015110404505648664,0 anli_r3,acc,0.3383333333333333,0.013664144006618275,0 arc_challenge,acc,0.2627986348122867,0.012862523175351333,0 arc_challenge,acc_norm,0.2986348122866894,0.013374078615068752,0 arc_easy,acc,0.5635521885521886,0.010176569980111043,0 arc_easy,acc_norm,0.5277777777777778,0.010243938285881117,0 boolq,acc,0.590519877675841,0.008600549751320925,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.26538370159182706,,1 copa,acc,0.73,0.0446196043338474,0 hellaswag,acc,0.4221270663214499,0.004928891895874297,0 hellaswag,acc_norm,0.5425214100776737,0.004971704917267754,0 piqa,acc,0.7306855277475517,0.01035000407058876,0 piqa,acc_norm,0.7372143634385201,0.010269354068140776,0 rte,acc,0.5306859205776173,0.03003973059219781,0 sciq,acc,0.858,0.011043457699378218,0 sciq,acc_norm,0.845,0.011450157470799461,0 storycloze_2016,acc,0.6841261357562801,0.010749892827011111,0 winogrande,acc,0.5327545382794001,0.014022300570434134,0