Muennighoff's picture
Add eval
990f338
{
"results": {
"anli_r1": {
"acc": 0.35,
"acc_stderr": 0.015090650341444233
},
"anli_r2": {
"acc": 0.34,
"acc_stderr": 0.014987482264363937
},
"anli_r3": {
"acc": 0.33166666666666667,
"acc_stderr": 0.013596836729485163
},
"cb": {
"acc": 0.44642857142857145,
"acc_stderr": 0.06703189227942398,
"f1": 0.3011143410852713
},
"copa": {
"acc": 0.75,
"acc_stderr": 0.04351941398892446
},
"hellaswag": {
"acc": 0.4255128460466043,
"acc_stderr": 0.004934100774481221,
"acc_norm": 0.5439155546703844,
"acc_norm_stderr": 0.004970497804772303
},
"rte": {
"acc": 0.5487364620938628,
"acc_stderr": 0.029953149241808946
},
"winogrande": {
"acc": 0.5295974743488555,
"acc_stderr": 0.014027843827840083
},
"storycloze_2016": {
"acc": 0.6787814003206841,
"acc_stderr": 0.010798029402794913
},
"boolq": {
"acc": 0.5951070336391437,
"acc_stderr": 0.008585393347962315
},
"arc_easy": {
"acc": 0.5656565656565656,
"acc_stderr": 0.010170943451269425,
"acc_norm": 0.5404040404040404,
"acc_norm_stderr": 0.010226230740889027
},
"arc_challenge": {
"acc": 0.2636518771331058,
"acc_stderr": 0.012875929151297061,
"acc_norm": 0.2790102389078498,
"acc_norm_stderr": 0.013106784883601345
},
"sciq": {
"acc": 0.873,
"acc_stderr": 0.010534798620855748,
"acc_norm": 0.855,
"acc_norm_stderr": 0.01113997751789013
},
"piqa": {
"acc": 0.7295973884657236,
"acc_stderr": 0.010363167031620803,
"acc_norm": 0.7323177366702938,
"acc_norm_stderr": 0.010330111189370415
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}