File size: 4,905 Bytes
1bcb202 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
dataset,prompt,metric,value
xcopa_id,C1 or C2? premise_idmt,accuracy,0.51
xcopa_id,best_option_idmt,accuracy,0.53
xcopa_id,cause_effect_idmt,accuracy,0.69
xcopa_id,i_am_hesitating_idmt,accuracy,0.64
xcopa_id,plausible_alternatives_idmt,accuracy,0.7
xcopa_id,median,accuracy,0.64
xcopa_sw,C1 or C2? premise_swmt,accuracy,0.6
xcopa_sw,best_option_swmt,accuracy,0.62
xcopa_sw,cause_effect_swmt,accuracy,0.49
xcopa_sw,i_am_hesitating_swmt,accuracy,0.56
xcopa_sw,plausible_alternatives_swmt,accuracy,0.54
xcopa_sw,median,accuracy,0.56
xcopa_ta,C1 or C2? premise_tamt,accuracy,0.52
xcopa_ta,best_option_tamt,accuracy,0.55
xcopa_ta,cause_effect_tamt,accuracy,0.63
xcopa_ta,i_am_hesitating_tamt,accuracy,0.63
xcopa_ta,plausible_alternatives_tamt,accuracy,0.66
xcopa_ta,median,accuracy,0.63
xcopa_vi,C1 or C2? premise_vimt,accuracy,0.55
xcopa_vi,best_option_vimt,accuracy,0.61
xcopa_vi,cause_effect_vimt,accuracy,0.64
xcopa_vi,i_am_hesitating_vimt,accuracy,0.6
xcopa_vi,plausible_alternatives_vimt,accuracy,0.64
xcopa_vi,median,accuracy,0.61
xcopa_zh,C1 or C2? premise_zhmt,accuracy,0.52
xcopa_zh,best_option_zhmt,accuracy,0.61
xcopa_zh,cause_effect_zhmt,accuracy,0.75
xcopa_zh,i_am_hesitating_zhmt,accuracy,0.72
xcopa_zh,plausible_alternatives_zhmt,accuracy,0.76
xcopa_zh,median,accuracy,0.72
xstory_cloze_ar,Answer Given options_armt,accuracy,0.7061548643282595
xstory_cloze_ar,Choose Story Ending_armt,accuracy,0.786896095301125
xstory_cloze_ar,Generate Ending_armt,accuracy,0.600926538716082
xstory_cloze_ar,Novel Correct Ending_armt,accuracy,0.7511581733951026
xstory_cloze_ar,Story Continuation and Options_armt,accuracy,0.757114493712773
xstory_cloze_ar,median,accuracy,0.7511581733951026
xstory_cloze_es,Answer Given options_esmt,accuracy,0.7902051621442753
xstory_cloze_es,Choose Story Ending_esmt,accuracy,0.8160158835208471
xstory_cloze_es,Generate Ending_esmt,accuracy,0.657180675049636
xstory_cloze_es,Novel Correct Ending_esmt,accuracy,0.784910655195235
xstory_cloze_es,Story Continuation and Options_esmt,accuracy,0.7696889477167439
xstory_cloze_es,median,accuracy,0.784910655195235
xstory_cloze_eu,Answer Given options_eumt,accuracy,0.6227663798808736
xstory_cloze_eu,Choose Story Ending_eumt,accuracy,0.6763732627399074
xstory_cloze_eu,Generate Ending_eumt,accuracy,0.5737921906022502
xstory_cloze_eu,Novel Correct Ending_eumt,accuracy,0.686300463269358
xstory_cloze_eu,Story Continuation and Options_eumt,accuracy,0.6637988087359364
xstory_cloze_eu,median,accuracy,0.6637988087359364
xstory_cloze_hi,Answer Given options_himt,accuracy,0.6697551290536069
xstory_cloze_hi,Choose Story Ending_himt,accuracy,0.7160820648577101
xstory_cloze_hi,Generate Ending_himt,accuracy,0.5923229649238915
xstory_cloze_hi,Novel Correct Ending_himt,accuracy,0.6882859033752482
xstory_cloze_hi,Story Continuation and Options_himt,accuracy,0.7048312375909993
xstory_cloze_hi,median,accuracy,0.6882859033752482
xstory_cloze_id,Answer Given options_idmt,accuracy,0.7346128391793514
xstory_cloze_id,Choose Story Ending_idmt,accuracy,0.7511581733951026
xstory_cloze_id,Generate Ending_idmt,accuracy,0.6201191264063534
xstory_cloze_id,Novel Correct Ending_idmt,accuracy,0.728656518861681
xstory_cloze_id,Story Continuation and Options_idmt,accuracy,0.7412309728656519
xstory_cloze_id,median,accuracy,0.7346128391793514
xstory_cloze_zh,Answer Given options_zhmt,accuracy,0.7425545996029119
xstory_cloze_zh,Choose Story Ending_zhmt,accuracy,0.7941760423560555
xstory_cloze_zh,Generate Ending_zhmt,accuracy,0.6247518199867638
xstory_cloze_zh,Novel Correct Ending_zhmt,accuracy,0.7842488418266049
xstory_cloze_zh,Story Continuation and Options_zhmt,accuracy,0.8034414295168762
xstory_cloze_zh,median,accuracy,0.7842488418266049
xwinograd_fr,Replace_frmt,accuracy,0.5180722891566265
xwinograd_fr,True or False_frmt,accuracy,0.46987951807228917
xwinograd_fr,does underscore refer to_frmt,accuracy,0.5421686746987951
xwinograd_fr,stand for_frmt,accuracy,0.5060240963855421
xwinograd_fr,underscore refer to_frmt,accuracy,0.5421686746987951
xwinograd_fr,median,accuracy,0.5180722891566265
xwinograd_pt,Replace_ptmt,accuracy,0.5057034220532319
xwinograd_pt,True or False_ptmt,accuracy,0.5133079847908745
xwinograd_pt,does underscore refer to_ptmt,accuracy,0.5209125475285171
xwinograd_pt,stand for_ptmt,accuracy,0.5209125475285171
xwinograd_pt,underscore refer to_ptmt,accuracy,0.49049429657794674
xwinograd_pt,median,accuracy,0.5133079847908745
xwinograd_zh,Replace_zhmt,accuracy,0.5238095238095238
xwinograd_zh,True or False_zhmt,accuracy,0.5138888888888888
xwinograd_zh,does underscore refer to_zhmt,accuracy,0.49404761904761907
xwinograd_zh,stand for_zhmt,accuracy,0.49603174603174605
xwinograd_zh,underscore refer to_zhmt,accuracy,0.503968253968254
xwinograd_zh,median,accuracy,0.503968253968254
multiple,average,multiple,0.6501688392588024
|