Muennighoff's picture
Add eval
990f338
raw
history blame
2.47 kB
{
"results": {
"anli_r1": {
"acc": 0.339,
"acc_stderr": 0.014976758771620342
},
"anli_r2": {
"acc": 0.353,
"acc_stderr": 0.015120172605483692
},
"anli_r3": {
"acc": 0.33916666666666667,
"acc_stderr": 0.013672343491681819
},
"cb": {
"acc": 0.44642857142857145,
"acc_stderr": 0.06703189227942398,
"f1": 0.2956393200295639
},
"copa": {
"acc": 0.7,
"acc_stderr": 0.046056618647183814
},
"hellaswag": {
"acc": 0.42162915753833896,
"acc_stderr": 0.0049281058807760765,
"acc_norm": 0.5436168094005178,
"acc_norm_stderr": 0.004970759774676884
},
"rte": {
"acc": 0.5667870036101083,
"acc_stderr": 0.029826764082138274
},
"winogrande": {
"acc": 0.5414364640883977,
"acc_stderr": 0.014004146853791902
},
"storycloze_2016": {
"acc": 0.677712453233565,
"acc_stderr": 0.010807461374996361
},
"boolq": {
"acc": 0.5957186544342508,
"acc_stderr": 0.008583313811372065
},
"arc_easy": {
"acc": 0.5669191919191919,
"acc_stderr": 0.010167478013701789,
"acc_norm": 0.5357744107744108,
"acc_norm_stderr": 0.01023348870972654
},
"arc_challenge": {
"acc": 0.25853242320819114,
"acc_stderr": 0.012794553754288679,
"acc_norm": 0.2781569965870307,
"acc_norm_stderr": 0.0130944699195388
},
"sciq": {
"acc": 0.858,
"acc_stderr": 0.011043457699378222,
"acc_norm": 0.838,
"acc_norm_stderr": 0.011657267771304434
},
"piqa": {
"acc": 0.7295973884657236,
"acc_stderr": 0.010363167031620792,
"acc_norm": 0.7285092491838956,
"acc_norm_stderr": 0.010376251176596135
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}