|
task,metric,value,err,version
|
|
anli_r1,acc,0.333,0.014910846164229863,0
|
|
anli_r2,acc,0.338,0.014965960710224494,0
|
|
anli_r3,acc,0.3308333333333333,0.013588208070708995,0
|
|
arc_challenge,acc,0.25,0.012653835621466646,0
|
|
arc_challenge,acc_norm,0.28071672354948807,0.013131238126975578,0
|
|
arc_easy,acc,0.5458754208754208,0.010216507710244106,0
|
|
arc_easy,acc_norm,0.49074074074074076,0.010258024147860673,0
|
|
boolq,acc,0.5963302752293578,0.008581220435616816,1
|
|
cb,acc,0.48214285714285715,0.0673769750864465,1
|
|
cb,f1,0.3082942097026604,,1
|
|
copa,acc,0.72,0.04512608598542127,0
|
|
hellaswag,acc,0.4334793865763792,0.004945424771611596,0
|
|
hellaswag,acc_norm,0.560744871539534,0.0049528205388318985,0
|
|
piqa,acc,0.7399347116430903,0.0102348932490613,0
|
|
piqa,acc_norm,0.7426550598476604,0.01019992106479251,0
|
|
rte,acc,0.5487364620938628,0.029953149241808943,0
|
|
sciq,acc,0.814,0.012310790208412803,0
|
|
sciq,acc_norm,0.711,0.014341711358296177,0
|
|
storycloze_2016,acc,0.6910742918225548,0.010684853966268454,0
|
|
winogrande,acc,0.5461720599842147,0.013992441563707068,0
|
|
|