|
task,metric,value,err,version
|
|
anli_r1,acc,0.34,0.014987482264363937,0
|
|
anli_r2,acc,0.326,0.014830507204541033,0
|
|
anli_r3,acc,0.3458333333333333,0.013736245342311014,0
|
|
arc_challenge,acc,0.3165529010238908,0.01359243151906808,0
|
|
arc_challenge,acc_norm,0.3370307167235495,0.013813476652902274,0
|
|
arc_easy,acc,0.6426767676767676,0.009833205612463114,0
|
|
arc_easy,acc_norm,0.6426767676767676,0.009833205612463106,0
|
|
boolq,acc,0.5801223241590214,0.008632045504781744,1
|
|
cb,acc,0.5178571428571429,0.06737697508644648,1
|
|
cb,f1,0.33534439416792355,,1
|
|
copa,acc,0.73,0.044619604333847394,0
|
|
hellaswag,acc,0.45030870344552876,0.0049650784774355715,0
|
|
hellaswag,acc_norm,0.60017924716192,0.004888601874547486,0
|
|
piqa,acc,0.7584330794341676,0.009986718001804461,0
|
|
piqa,acc_norm,0.7600652883569097,0.009963625892809545,0
|
|
rte,acc,0.48375451263537905,0.030080573208738064,0
|
|
sciq,acc,0.932,0.007964887911291603,0
|
|
sciq,acc_norm,0.929,0.008125578442487914,0
|
|
storycloze_2016,acc,0.7012292891501871,0.010584692134739974,0
|
|
winogrande,acc,0.5674822415153907,0.013923911578623827,0
|
|
|