|
task,metric,value,err,version
|
|
anli_r1,acc,0.335,0.014933117490932577,0
|
|
anli_r2,acc,0.348,0.01507060460376841,0
|
|
anli_r3,acc,0.32166666666666666,0.013490095282989521,0
|
|
arc_challenge,acc,0.23378839590443687,0.01236822537850714,0
|
|
arc_challenge,acc_norm,0.2431740614334471,0.012536554144587092,0
|
|
arc_easy,acc,0.4675925925925926,0.010238210368801893,0
|
|
arc_easy,acc_norm,0.4393939393939394,0.010184134315437665,0
|
|
boolq,acc,0.4948012232415902,0.008744582253526255,1
|
|
cb,acc,0.5178571428571429,0.06737697508644647,1
|
|
cb,f1,0.34263448969331317,,1
|
|
copa,acc,0.63,0.04852365870939099,0
|
|
hellaswag,acc,0.3777136028679546,0.0048382464107862705,0
|
|
hellaswag,acc_norm,0.4529974108743278,0.004967685204073105,0
|
|
piqa,acc,0.6844396082698585,0.010843119201758945,0
|
|
piqa,acc_norm,0.6926006528835691,0.010765602506939063,0
|
|
rte,acc,0.5018050541516246,0.030096267148976633,0
|
|
sciq,acc,0.718,0.01423652621529135,0
|
|
sciq,acc_norm,0.71,0.014356395999905694,0
|
|
storycloze_2016,acc,0.6306787814003206,0.011160545865067172,0
|
|
winogrande,acc,0.5074980268350434,0.014050905521228577,0
|
|
|