|
task,metric,value,err,version
|
|
anli_r1,acc,0.352,0.015110404505648664,0
|
|
anli_r2,acc,0.372,0.015292149942040577,0
|
|
anli_r3,acc,0.3258333333333333,0.013535422043417447,0
|
|
arc_challenge,acc,0.23378839590443687,0.01236822537850714,0
|
|
arc_challenge,acc_norm,0.24744027303754265,0.01261035266329267,0
|
|
arc_easy,acc,0.4553872053872054,0.01021886178761873,0
|
|
arc_easy,acc_norm,0.43602693602693604,0.010175459582759736,0
|
|
boolq,acc,0.4938837920489297,0.008744400681893475,1
|
|
cb,acc,0.5357142857142857,0.06724777654937658,1
|
|
cb,f1,0.37053140096618353,,1
|
|
copa,acc,0.65,0.047937248544110196,0
|
|
hellaswag,acc,0.3759211312487552,0.004833699243292346,0
|
|
hellaswag,acc_norm,0.44652459669388567,0.0049611615892284164,0
|
|
piqa,acc,0.6806311207834603,0.010877964076613735,0
|
|
piqa,acc_norm,0.6779107725788901,0.010902341695103438,0
|
|
rte,acc,0.5090252707581228,0.030091559826331334,0
|
|
sciq,acc,0.697,0.01453968371053524,0
|
|
sciq,acc_norm,0.692,0.01460648312734276,0
|
|
storycloze_2016,acc,0.6360235168359166,0.011126343044992838,0
|
|
winogrande,acc,0.5224940805051302,0.014038257824059874,0
|
|
|