|
task,metric,value,err,version
|
|
anli_r1,acc,0.332,0.014899597242811471,0
|
|
anli_r2,acc,0.334,0.014922019523732961,0
|
|
anli_r3,acc,0.3275,0.013553211167251951,0
|
|
arc_challenge,acc,0.2593856655290102,0.012808273573927102,0
|
|
arc_challenge,acc_norm,0.28924914675767915,0.013250012579393443,0
|
|
arc_easy,acc,0.5740740740740741,0.010146568651002255,0
|
|
arc_easy,acc_norm,0.553030303030303,0.01020191492779168,0
|
|
boolq,acc,0.5889908256880734,0.008605429733982185,1
|
|
cb,acc,0.30357142857142855,0.06199938655510754,1
|
|
cb,f1,0.24454009245974814,,1
|
|
copa,acc,0.76,0.04292346959909283,0
|
|
hellaswag,acc,0.4312885879306911,0.004942440746328495,0
|
|
hellaswag,acc_norm,0.55646285600478,0.004957863944093132,0
|
|
piqa,acc,0.7301414581066377,0.010356595421852197,0
|
|
piqa,acc_norm,0.735038084874864,0.010296557993316049,0
|
|
rte,acc,0.516245487364621,0.030080573208738064,0
|
|
sciq,acc,0.88,0.01028132801274739,0
|
|
sciq,acc_norm,0.843,0.011510146979230189,0
|
|
storycloze_2016,acc,0.6958845537145911,0.01063817265519479,0
|
|
winogrande,acc,0.5477505919494869,0.013988256216606008,0
|
|
|