|
task,metric,value,err,version
|
|
anli_r1,acc,0.357,0.015158521721486773,0
|
|
anli_r2,acc,0.361,0.015195720118175118,0
|
|
anli_r3,acc,0.3525,0.013797164918918362,0
|
|
arc_challenge,acc,0.2815699658703072,0.013143376735009024,0
|
|
arc_challenge,acc_norm,0.2909556313993174,0.013273077865907597,0
|
|
arc_easy,acc,0.5862794612794613,0.010105878530238137,0
|
|
arc_easy,acc_norm,0.5622895622895623,0.010179856486006897,0
|
|
boolq,acc,0.6055045871559633,0.008548152025770934,1
|
|
cb,acc,0.4642857142857143,0.06724777654937658,1
|
|
cb,f1,0.33484504913076335,,1
|
|
copa,acc,0.76,0.04292346959909283,0
|
|
hellaswag,acc,0.4325831507667795,0.004944215937021392,0
|
|
hellaswag,acc_norm,0.5673172674765983,0.004944351065545863,0
|
|
piqa,acc,0.7372143634385201,0.010269354068140767,0
|
|
piqa,acc_norm,0.7372143634385201,0.010269354068140777,0
|
|
rte,acc,0.5306859205776173,0.03003973059219781,0
|
|
sciq,acc,0.889,0.009938701010583726,0
|
|
sciq,acc_norm,0.886,0.010055103435823328,0
|
|
storycloze_2016,acc,0.6873329770176376,0.010720223172953168,0
|
|
winogrande,acc,0.569060773480663,0.01391779662333597,0
|
|
|