Muennighoff's picture
Add eval
d522938
{
"results": {
"anli_r1": {
"acc": 0.357,
"acc_stderr": 0.015158521721486773
},
"anli_r2": {
"acc": 0.35,
"acc_stderr": 0.015090650341444231
},
"anli_r3": {
"acc": 0.34833333333333333,
"acc_stderr": 0.01375943749887407
},
"cb": {
"acc": 0.4642857142857143,
"acc_stderr": 0.06724777654937658,
"f1": 0.32226930320150665
},
"copa": {
"acc": 0.79,
"acc_stderr": 0.040936018074033256
},
"hellaswag": {
"acc": 0.43238398725353516,
"acc_stderr": 0.004943945069611458,
"acc_norm": 0.5669189404501095,
"acc_norm_stderr": 0.004944889545497962
},
"rte": {
"acc": 0.5126353790613718,
"acc_stderr": 0.030086851767188564
},
"winogrande": {
"acc": 0.5493291239147593,
"acc_stderr": 0.01398392886904024
},
"storycloze_2016": {
"acc": 0.6900053447354356,
"acc_stderr": 0.010695042806212555
},
"boolq": {
"acc": 0.5951070336391437,
"acc_stderr": 0.008585393347962317
},
"arc_easy": {
"acc": 0.5909090909090909,
"acc_stderr": 0.010088775152615788,
"acc_norm": 0.5664983164983165,
"acc_norm_stderr": 0.010168640625454103
},
"arc_challenge": {
"acc": 0.2687713310580205,
"acc_stderr": 0.012955065963710684,
"acc_norm": 0.2773037542662116,
"acc_norm_stderr": 0.013082095839059374
},
"sciq": {
"acc": 0.893,
"acc_stderr": 0.009779910359847169,
"acc_norm": 0.862,
"acc_norm_stderr": 0.010912152632504403
},
"piqa": {
"acc": 0.7263329706202394,
"acc_stderr": 0.010402184206229211,
"acc_norm": 0.733949945593036,
"acc_norm_stderr": 0.010310039263352824
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}