Muennighoff's picture
Add eval
d522938
raw
history blame
1.05 kB
task,metric,value,err,version
anli_r1,acc,0.322,0.014782913600996664,0
anli_r2,acc,0.353,0.015120172605483689,0
anli_r3,acc,0.3333333333333333,0.013613950010225593,0
arc_challenge,acc,0.2525597269624573,0.012696728980207706,0
arc_challenge,acc_norm,0.28242320819112626,0.013155456884097222,0
arc_easy,acc,0.5765993265993266,0.010138671005289045,0
arc_easy,acc_norm,0.5517676767676768,0.010204645126856942,0
boolq,acc,0.5834862385321101,0.008622288020674003,1
cb,acc,0.375,0.06527912098338669,1
cb,f1,0.34540644540644544,,1
copa,acc,0.77,0.04229525846816506,0
hellaswag,acc,0.4303923521210914,0.004941191607317909,0
hellaswag,acc_norm,0.5595498904600678,0.004954265595373475,0
piqa,acc,0.7377584330794341,0.010262502565172449,0
piqa,acc_norm,0.7475516866158868,0.010135665547362355,0
rte,acc,0.49458483754512633,0.030094698123239966,0
sciq,acc,0.881,0.01024421514533666,0
sciq,acc_norm,0.856,0.01110798754893915,0
storycloze_2016,acc,0.6905398182789952,0.01068995674518907,0
winogrande,acc,0.5390686661404893,0.014009521680980316,0