task,metric,value,err,version anli_r1,acc,0.339,0.014976758771620342,0 anli_r2,acc,0.353,0.015120172605483692,0 anli_r3,acc,0.33916666666666667,0.013672343491681819,0 arc_challenge,acc,0.25853242320819114,0.012794553754288679,0 arc_challenge,acc_norm,0.2781569965870307,0.0130944699195388,0 arc_easy,acc,0.5669191919191919,0.010167478013701789,0 arc_easy,acc_norm,0.5357744107744108,0.01023348870972654,0 boolq,acc,0.5957186544342508,0.008583313811372065,1 cb,acc,0.44642857142857145,0.06703189227942398,1 cb,f1,0.2956393200295639,,1 copa,acc,0.7,0.046056618647183814,0 hellaswag,acc,0.42162915753833896,0.0049281058807760765,0 hellaswag,acc_norm,0.5436168094005178,0.004970759774676884,0 piqa,acc,0.7295973884657236,0.010363167031620792,0 piqa,acc_norm,0.7285092491838956,0.010376251176596135,0 rte,acc,0.5667870036101083,0.029826764082138274,0 sciq,acc,0.858,0.011043457699378222,0 sciq,acc_norm,0.838,0.011657267771304434,0 storycloze_2016,acc,0.677712453233565,0.010807461374996361,0 winogrande,acc,0.5414364640883977,0.014004146853791902,0