|
dataset,prompt,metric,value
|
|
xnli_ar,GPT-3 style_armt,accuracy,0.3333333333333333
|
|
xnli_ar,MNLI crowdsource_armt,accuracy,0.3550200803212851
|
|
xnli_ar,can we infer_armt,accuracy,0.3333333333333333
|
|
xnli_ar,guaranteed/possible/impossible_armt,accuracy,0.3453815261044177
|
|
xnli_ar,justified in saying_armt,accuracy,0.3333333333333333
|
|
xnli_ar,median,accuracy,0.3333333333333333
|
|
xnli_es,GPT-3 style_esmt,accuracy,0.5771084337349398
|
|
xnli_es,MNLI crowdsource_esmt,accuracy,0.3353413654618474
|
|
xnli_es,can we infer_esmt,accuracy,0.3333333333333333
|
|
xnli_es,guaranteed/possible/impossible_esmt,accuracy,0.3337349397590361
|
|
xnli_es,justified in saying_esmt,accuracy,0.3333333333333333
|
|
xnli_es,median,accuracy,0.3337349397590361
|
|
xnli_fr,GPT-3 style_frmt,accuracy,0.4598393574297189
|
|
xnli_fr,MNLI crowdsource_frmt,accuracy,0.3333333333333333
|
|
xnli_fr,can we infer_frmt,accuracy,0.5791164658634538
|
|
xnli_fr,guaranteed/possible/impossible_frmt,accuracy,0.46947791164658637
|
|
xnli_fr,justified in saying_frmt,accuracy,0.5542168674698795
|
|
xnli_fr,median,accuracy,0.46947791164658637
|
|
xnli_hi,GPT-3 style_himt,accuracy,0.342570281124498
|
|
xnli_hi,MNLI crowdsource_himt,accuracy,0.3333333333333333
|
|
xnli_hi,can we infer_himt,accuracy,0.41606425702811245
|
|
xnli_hi,guaranteed/possible/impossible_himt,accuracy,0.385140562248996
|
|
xnli_hi,justified in saying_himt,accuracy,0.3927710843373494
|
|
xnli_hi,median,accuracy,0.385140562248996
|
|
xnli_sw,GPT-3 style_swmt,accuracy,0.3582329317269076
|
|
xnli_sw,MNLI crowdsource_swmt,accuracy,0.348995983935743
|
|
xnli_sw,can we infer_swmt,accuracy,0.36265060240963853
|
|
xnli_sw,guaranteed/possible/impossible_swmt,accuracy,0.3389558232931727
|
|
xnli_sw,justified in saying_swmt,accuracy,0.3582329317269076
|
|
xnli_sw,median,accuracy,0.3582329317269076
|
|
xnli_ur,GPT-3 style_urmt,accuracy,0.3309236947791165
|
|
xnli_ur,MNLI crowdsource_urmt,accuracy,0.3333333333333333
|
|
xnli_ur,can we infer_urmt,accuracy,0.3409638554216867
|
|
xnli_ur,guaranteed/possible/impossible_urmt,accuracy,0.3333333333333333
|
|
xnli_ur,justified in saying_urmt,accuracy,0.3550200803212851
|
|
xnli_ur,median,accuracy,0.3333333333333333
|
|
xnli_vi,GPT-3 style_vimt,accuracy,0.3333333333333333
|
|
xnli_vi,MNLI crowdsource_vimt,accuracy,0.3333333333333333
|
|
xnli_vi,can we infer_vimt,accuracy,0.3333333333333333
|
|
xnli_vi,guaranteed/possible/impossible_vimt,accuracy,0.39196787148594375
|
|
xnli_vi,justified in saying_vimt,accuracy,0.3333333333333333
|
|
xnli_vi,median,accuracy,0.3333333333333333
|
|
xnli_zh,GPT-3 style_zhmt,accuracy,0.5060240963855421
|
|
xnli_zh,MNLI crowdsource_zhmt,accuracy,0.3437751004016064
|
|
xnli_zh,can we infer_zhmt,accuracy,0.3421686746987952
|
|
xnli_zh,guaranteed/possible/impossible_zhmt,accuracy,0.42811244979919677
|
|
xnli_zh,justified in saying_zhmt,accuracy,0.3377510040160643
|
|
xnli_zh,median,accuracy,0.3437751004016064
|
|
multiple,average,multiple,0.3612951807228916
|
|
|