task,metric,value,err,version anli_r1,acc,0.355,0.01513949154378053,0 anli_r2,acc,0.359,0.015177264224798601,0 anli_r3,acc,0.3308333333333333,0.013588208070708992,0 arc_challenge,acc,0.2551194539249147,0.012739038695202098,0 arc_challenge,acc_norm,0.2858361774744027,0.013203196088537369,0 arc_easy,acc,0.5648148148148148,0.010173216430370927,0 arc_easy,acc_norm,0.5340909090909091,0.010235908103438688,0 boolq,acc,0.5856269113149847,0.00861586377642113,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.3403298350824588,,1 copa,acc,0.7,0.046056618647183814,0 hellaswag,acc,0.4303923521210914,0.004941191607317909,0 hellaswag,acc_norm,0.5592511451902011,0.004954622308739016,0 piqa,acc,0.7328618063112078,0.010323440492612426,0 piqa,acc_norm,0.73449401523395,0.010303308653024432,0 rte,acc,0.5451263537906137,0.029973636495415252,0 sciq,acc,0.862,0.010912152632504411,0 sciq,acc_norm,0.796,0.012749374359024391,0 storycloze_2016,acc,0.689470871191876,0.010700112173178448,0 winogrande,acc,0.5422257300710339,0.014002284504422442,0