|
task,metric,value,err,version
|
|
anli_r1,acc,0.348,0.015070604603768408,0
|
|
anli_r2,acc,0.333,0.014910846164229857,0
|
|
anli_r3,acc,0.33916666666666667,0.013672343491681815,0
|
|
arc_challenge,acc,0.24573378839590443,0.012581033453730107,0
|
|
arc_challenge,acc_norm,0.2781569965870307,0.013094469919538805,0
|
|
arc_easy,acc,0.5521885521885522,0.010203742451111532,0
|
|
arc_easy,acc_norm,0.49242424242424243,0.010258605792153321,0
|
|
boolq,acc,0.563914373088685,0.00867331277632493,1
|
|
cb,acc,0.375,0.06527912098338669,1
|
|
cb,f1,0.1986111111111111,,1
|
|
copa,acc,0.76,0.04292346959909282,0
|
|
hellaswag,acc,0.42182832105158335,0.0049284209030265504,0
|
|
hellaswag,acc_norm,0.5407289384584744,0.004973199296339958,0
|
|
piqa,acc,0.7257889009793254,0.010408618664933382,0
|
|
piqa,acc_norm,0.7377584330794341,0.010262502565172442,0
|
|
rte,acc,0.5415162454873647,0.029992535385373314,0
|
|
sciq,acc,0.797,0.012726073744598283,0
|
|
sciq,acc_norm,0.719,0.01422115470843493,0
|
|
storycloze_2016,acc,0.6841261357562801,0.010749892827011111,0
|
|
winogrande,acc,0.5359116022099447,0.01401619343395831,0
|
|
|