|
dataset,prompt,metric,value
|
|
xcopa_id,C1 or C2? premise_idmt,accuracy,0.52
|
|
xcopa_id,best_option_idmt,accuracy,0.73
|
|
xcopa_id,cause_effect_idmt,accuracy,0.82
|
|
xcopa_id,i_am_hesitating_idmt,accuracy,0.76
|
|
xcopa_id,plausible_alternatives_idmt,accuracy,0.78
|
|
xcopa_id,median,accuracy,0.76
|
|
xcopa_sw,C1 or C2? premise_swmt,accuracy,0.59
|
|
xcopa_sw,best_option_swmt,accuracy,0.62
|
|
xcopa_sw,cause_effect_swmt,accuracy,0.64
|
|
xcopa_sw,i_am_hesitating_swmt,accuracy,0.63
|
|
xcopa_sw,plausible_alternatives_swmt,accuracy,0.64
|
|
xcopa_sw,median,accuracy,0.63
|
|
xcopa_ta,C1 or C2? premise_tamt,accuracy,0.6
|
|
xcopa_ta,best_option_tamt,accuracy,0.47
|
|
xcopa_ta,cause_effect_tamt,accuracy,0.62
|
|
xcopa_ta,i_am_hesitating_tamt,accuracy,0.64
|
|
xcopa_ta,plausible_alternatives_tamt,accuracy,0.63
|
|
xcopa_ta,median,accuracy,0.62
|
|
xcopa_vi,C1 or C2? premise_vimt,accuracy,0.56
|
|
xcopa_vi,best_option_vimt,accuracy,0.77
|
|
xcopa_vi,cause_effect_vimt,accuracy,0.84
|
|
xcopa_vi,i_am_hesitating_vimt,accuracy,0.82
|
|
xcopa_vi,plausible_alternatives_vimt,accuracy,0.84
|
|
xcopa_vi,median,accuracy,0.82
|
|
xcopa_zh,C1 or C2? premise_zhmt,accuracy,0.62
|
|
xcopa_zh,best_option_zhmt,accuracy,0.72
|
|
xcopa_zh,cause_effect_zhmt,accuracy,0.89
|
|
xcopa_zh,i_am_hesitating_zhmt,accuracy,0.9
|
|
xcopa_zh,plausible_alternatives_zhmt,accuracy,0.86
|
|
xcopa_zh,median,accuracy,0.86
|
|
xstory_cloze_ar,Answer Given options_armt,accuracy,0.8669755129053607
|
|
xstory_cloze_ar,Choose Story Ending_armt,accuracy,0.927862342819325
|
|
xstory_cloze_ar,Generate Ending_armt,accuracy,0.6479152878888154
|
|
xstory_cloze_ar,Novel Correct Ending_armt,accuracy,0.9185969556585043
|
|
xstory_cloze_ar,Story Continuation and Options_armt,accuracy,0.9113170086035738
|
|
xstory_cloze_ar,median,accuracy,0.9113170086035738
|
|
xstory_cloze_es,Answer Given options_esmt,accuracy,0.9272005294506949
|
|
xstory_cloze_es,Choose Story Ending_esmt,accuracy,0.9437458636664461
|
|
xstory_cloze_es,Generate Ending_esmt,accuracy,0.7445400397088021
|
|
xstory_cloze_es,Novel Correct Ending_esmt,accuracy,0.9397749834546658
|
|
xstory_cloze_es,Story Continuation and Options_esmt,accuracy,0.9298477829252151
|
|
xstory_cloze_es,median,accuracy,0.9298477829252151
|
|
xstory_cloze_eu,Answer Given options_eumt,accuracy,0.7452018530774321
|
|
xstory_cloze_eu,Choose Story Ending_eumt,accuracy,0.8676373262739907
|
|
xstory_cloze_eu,Generate Ending_eumt,accuracy,0.6082064857710126
|
|
xstory_cloze_eu,Novel Correct Ending_eumt,accuracy,0.8219722038385175
|
|
xstory_cloze_eu,Story Continuation and Options_eumt,accuracy,0.814030443414957
|
|
xstory_cloze_eu,median,accuracy,0.814030443414957
|
|
xstory_cloze_hi,Answer Given options_himt,accuracy,0.8266048974189278
|
|
xstory_cloze_hi,Choose Story Ending_himt,accuracy,0.8841826604897419
|
|
xstory_cloze_hi,Generate Ending_himt,accuracy,0.657180675049636
|
|
xstory_cloze_hi,Novel Correct Ending_himt,accuracy,0.8669755129053607
|
|
xstory_cloze_hi,Story Continuation and Options_himt,accuracy,0.8689609530112509
|
|
xstory_cloze_hi,median,accuracy,0.8669755129053607
|
|
xstory_cloze_id,Answer Given options_idmt,accuracy,0.8616810059563204
|
|
xstory_cloze_id,Choose Story Ending_idmt,accuracy,0.914626075446724
|
|
xstory_cloze_id,Generate Ending_idmt,accuracy,0.6730641958967571
|
|
xstory_cloze_id,Novel Correct Ending_idmt,accuracy,0.8954334877564527
|
|
xstory_cloze_id,Story Continuation and Options_idmt,accuracy,0.8927862342819325
|
|
xstory_cloze_id,median,accuracy,0.8927862342819325
|
|
xstory_cloze_zh,Answer Given options_zhmt,accuracy,0.9060225016545335
|
|
xstory_cloze_zh,Choose Story Ending_zhmt,accuracy,0.9238914626075446
|
|
xstory_cloze_zh,Generate Ending_zhmt,accuracy,0.686962276637988
|
|
xstory_cloze_zh,Novel Correct Ending_zhmt,accuracy,0.9185969556585043
|
|
xstory_cloze_zh,Story Continuation and Options_zhmt,accuracy,0.9192587690271343
|
|
xstory_cloze_zh,median,accuracy,0.9185969556585043
|
|
xwinograd_fr,Replace_frmt,accuracy,0.6506024096385542
|
|
xwinograd_fr,True or False_frmt,accuracy,0.5662650602409639
|
|
xwinograd_fr,does underscore refer to_frmt,accuracy,0.5542168674698795
|
|
xwinograd_fr,stand for_frmt,accuracy,0.4819277108433735
|
|
xwinograd_fr,underscore refer to_frmt,accuracy,0.6144578313253012
|
|
xwinograd_fr,median,accuracy,0.5662650602409639
|
|
xwinograd_pt,Replace_ptmt,accuracy,0.6425855513307985
|
|
xwinograd_pt,True or False_ptmt,accuracy,0.49809885931558934
|
|
xwinograd_pt,does underscore refer to_ptmt,accuracy,0.6045627376425855
|
|
xwinograd_pt,stand for_ptmt,accuracy,0.5095057034220533
|
|
xwinograd_pt,underscore refer to_ptmt,accuracy,0.6273764258555133
|
|
xwinograd_pt,median,accuracy,0.6045627376425855
|
|
xwinograd_zh,Replace_zhmt,accuracy,0.6845238095238095
|
|
xwinograd_zh,True or False_zhmt,accuracy,0.503968253968254
|
|
xwinograd_zh,does underscore refer to_zhmt,accuracy,0.626984126984127
|
|
xwinograd_zh,stand for_zhmt,accuracy,0.503968253968254
|
|
xwinograd_zh,underscore refer to_zhmt,accuracy,0.7023809523809523
|
|
xwinograd_zh,median,accuracy,0.626984126984127
|
|
multiple,average,multiple,0.7729547044755157
|
|
|