asahi417 commited on
Commit
35a6c78
1 Parent(s): ebc2fa5

model update

Browse files
Files changed (1) hide show
  1. README.md +93 -80
README.md CHANGED
@@ -46,236 +46,242 @@ model-index:
46
  - name: MoverScore
47
  type: moverscore
48
  value: 0.6499011626820898
 
 
 
 
 
 
49
  - task:
50
  name: Text2text Generation
51
  type: text2text-generation
52
  dataset:
53
- name: lmqg/qg_squadshifts
54
- type: reddit
55
- args: reddit
56
  metrics:
57
  - name: BLEU4
58
  type: bleu4
59
- value: 0.059525104157825456
60
  - name: ROUGE-L
61
  type: rouge-l
62
- value: 0.22365090580055863
63
  - name: METEOR
64
  type: meteor
65
- value: 0.21499800504546457
66
  - name: BERTScore
67
  type: bertscore
68
- value: 0.9095144685254328
69
  - name: MoverScore
70
  type: moverscore
71
- value: 0.6059332247878408
72
  - task:
73
  name: Text2text Generation
74
  type: text2text-generation
75
  dataset:
76
  name: lmqg/qg_squadshifts
77
- type: new_wiki
78
- args: new_wiki
79
  metrics:
80
  - name: BLEU4
81
  type: bleu4
82
- value: 0.11118273173452982
83
  - name: ROUGE-L
84
  type: rouge-l
85
- value: 0.2967546690273089
86
  - name: METEOR
87
  type: meteor
88
- value: 0.27315087810722966
89
  - name: BERTScore
90
  type: bertscore
91
- value: 0.9322739617807421
92
  - name: MoverScore
93
  type: moverscore
94
- value: 0.6623000084761579
95
  - task:
96
  name: Text2text Generation
97
  type: text2text-generation
98
  dataset:
99
  name: lmqg/qg_subjqa
100
- type: tripadvisor
101
- args: tripadvisor
102
  metrics:
103
  - name: BLEU4
104
  type: bleu4
105
- value: 8.380171318718442e-07
106
  - name: ROUGE-L
107
  type: rouge-l
108
- value: 0.1402922852924756
109
  - name: METEOR
110
  type: meteor
111
- value: 0.1372146070365174
112
  - name: BERTScore
113
  type: bertscore
114
- value: 0.8891002409937424
115
  - name: MoverScore
116
  type: moverscore
117
- value: 0.5604572211470809
118
  - task:
119
  name: Text2text Generation
120
  type: text2text-generation
121
  dataset:
122
- name: lmqg/qg_squadshifts
123
- type: nyt
124
- args: nyt
125
  metrics:
126
  - name: BLEU4
127
  type: bleu4
128
- value: 0.08117757543966063
129
  - name: ROUGE-L
130
  type: rouge-l
131
- value: 0.25292097720734297
132
  - name: METEOR
133
  type: meteor
134
- value: 0.25254205113198686
135
  - name: BERTScore
136
  type: bertscore
137
- value: 0.9249009759439454
138
  - name: MoverScore
139
  type: moverscore
140
- value: 0.6406329128556304
141
  - task:
142
  name: Text2text Generation
143
  type: text2text-generation
144
  dataset:
145
  name: lmqg/qg_subjqa
146
- type: restaurants
147
- args: restaurants
148
  metrics:
149
  - name: BLEU4
150
  type: bleu4
151
- value: 1.1301750984972448e-06
152
  - name: ROUGE-L
153
  type: rouge-l
154
- value: 0.13083168975354642
155
  - name: METEOR
156
  type: meteor
157
- value: 0.12419733006916912
158
  - name: BERTScore
159
  type: bertscore
160
- value: 0.8797711839570719
161
  - name: MoverScore
162
  type: moverscore
163
- value: 0.5542757411268555
164
  - task:
165
  name: Text2text Generation
166
  type: text2text-generation
167
  dataset:
168
  name: lmqg/qg_subjqa
169
- type: electronics
170
- args: electronics
171
  metrics:
172
  - name: BLEU4
173
  type: bleu4
174
- value: 0.00866799444965211
175
  - name: ROUGE-L
176
  type: rouge-l
177
- value: 0.1601628874804186
178
  - name: METEOR
179
  type: meteor
180
- value: 0.15348605312210778
181
  - name: BERTScore
182
  type: bertscore
183
- value: 0.8783386920680519
184
  - name: MoverScore
185
  type: moverscore
186
- value: 0.5634845371093992
187
  - task:
188
  name: Text2text Generation
189
  type: text2text-generation
190
  dataset:
191
- name: lmqg/qg_subjqa
192
- type: books
193
- args: books
194
  metrics:
195
  - name: BLEU4
196
  type: bleu4
197
- value: 0.006278914808207679
198
  - name: ROUGE-L
199
  type: rouge-l
200
- value: 0.12368226019088967
201
  - name: METEOR
202
  type: meteor
203
- value: 0.11576293675813865
204
  - name: BERTScore
205
  type: bertscore
206
- value: 0.8807110440044503
207
  - name: MoverScore
208
  type: moverscore
209
- value: 0.5555905941686486
210
  - task:
211
  name: Text2text Generation
212
  type: text2text-generation
213
  dataset:
214
  name: lmqg/qg_subjqa
215
- type: movies
216
- args: movies
217
  metrics:
218
  - name: BLEU4
219
  type: bleu4
220
- value: 1.0121579426501661e-06
221
  - name: ROUGE-L
222
  type: rouge-l
223
- value: 0.12508697028506718
224
  - name: METEOR
225
  type: meteor
226
- value: 0.11862284941640638
227
  - name: BERTScore
228
  type: bertscore
229
- value: 0.8748829724726739
230
  - name: MoverScore
231
  type: moverscore
232
- value: 0.5528899173535703
233
  - task:
234
  name: Text2text Generation
235
  type: text2text-generation
236
  dataset:
237
- name: lmqg/qg_subjqa
238
- type: grocery
239
- args: grocery
240
  metrics:
241
  - name: BLEU4
242
  type: bleu4
243
- value: 0.00528043272450429
244
  - name: ROUGE-L
245
  type: rouge-l
246
- value: 0.12343711316491492
247
  - name: METEOR
248
  type: meteor
249
- value: 0.15133496445452477
250
  - name: BERTScore
251
  type: bertscore
252
- value: 0.8778951253890991
253
  - name: MoverScore
254
  type: moverscore
255
- value: 0.5701949938103265
256
  - task:
257
  name: Text2text Generation
258
  type: text2text-generation
259
  dataset:
260
  name: lmqg/qg_squadshifts
261
- type: amazon
262
- args: amazon
263
  metrics:
264
  - name: BLEU4
265
  type: bleu4
266
- value: 0.06530369842068952
267
  - name: ROUGE-L
268
  type: rouge-l
269
- value: 0.25030985091008146
270
  - name: METEOR
271
  type: meteor
272
- value: 0.2229994442645732
273
  - name: BERTScore
274
  type: bertscore
275
- value: 0.9092814804525936
276
  - name: MoverScore
277
  type: moverscore
278
- value: 0.6086538514008419
279
  ---
280
 
281
  # Model Card of `lmqg/bart-large-squad`
@@ -342,21 +348,28 @@ question = pipe('<hl> Beyonce <hl> further expanded her acting career, starring
342
  | [lmqg/qg_squad](https://huggingface.co/datasets/lmqg/qg_squad) | default | 0.262 | 0.538 | 0.271 | 0.91 | 0.65 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval/metric.first.sentence.paragraph_answer.question.lmqg_qg_squad.default.json) |
343
 
344
 
 
 
 
 
 
 
 
345
 
346
  ### Out-of-domain Metrics
347
 
348
  | Dataset | Type | BLEU4 | ROUGE-L | METEOR | BERTScore | MoverScore | Link |
349
  |:--------|:-----|------:|--------:|-------:|----------:|-----------:|-----:|
350
- | [lmqg/qg_squadshifts](https://huggingface.co/datasets/lmqg/qg_squadshifts) | reddit | 0.06 | 0.224 | 0.215 | 0.91 | 0.606 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_squadshifts.reddit.json) |
351
- | [lmqg/qg_squadshifts](https://huggingface.co/datasets/lmqg/qg_squadshifts) | new_wiki | 0.111 | 0.297 | 0.273 | 0.932 | 0.662 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_squadshifts.new_wiki.json) |
352
  | [lmqg/qg_subjqa](https://huggingface.co/datasets/lmqg/qg_subjqa) | tripadvisor | 0.0 | 0.14 | 0.137 | 0.889 | 0.56 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_subjqa.tripadvisor.json) |
353
- | [lmqg/qg_squadshifts](https://huggingface.co/datasets/lmqg/qg_squadshifts) | nyt | 0.081 | 0.253 | 0.253 | 0.925 | 0.641 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_squadshifts.nyt.json) |
354
- | [lmqg/qg_subjqa](https://huggingface.co/datasets/lmqg/qg_subjqa) | restaurants | 0.0 | 0.131 | 0.124 | 0.88 | 0.554 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_subjqa.restaurants.json) |
355
- | [lmqg/qg_subjqa](https://huggingface.co/datasets/lmqg/qg_subjqa) | electronics | 0.009 | 0.16 | 0.153 | 0.878 | 0.563 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_subjqa.electronics.json) |
356
  | [lmqg/qg_subjqa](https://huggingface.co/datasets/lmqg/qg_subjqa) | books | 0.006 | 0.124 | 0.116 | 0.881 | 0.556 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_subjqa.books.json) |
 
357
  | [lmqg/qg_subjqa](https://huggingface.co/datasets/lmqg/qg_subjqa) | movies | 0.0 | 0.125 | 0.119 | 0.875 | 0.553 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_subjqa.movies.json) |
358
  | [lmqg/qg_subjqa](https://huggingface.co/datasets/lmqg/qg_subjqa) | grocery | 0.005 | 0.123 | 0.151 | 0.878 | 0.57 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_subjqa.grocery.json) |
359
- | [lmqg/qg_squadshifts](https://huggingface.co/datasets/lmqg/qg_squadshifts) | amazon | 0.065 | 0.25 | 0.223 | 0.909 | 0.609 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_squadshifts.amazon.json) |
 
 
 
360
 
361
 
362
  ## Training hyperparameters
 
46
  - name: MoverScore
47
  type: moverscore
48
  value: 0.6499011626820898
49
+ - name: QAAlignedF1Score (BERTScore)
50
+ type: qa_aligned_f1_score_bertscore
51
+ value: 0.9553719665829591
52
+ - name: QAAlignedF1Score (MoverScore)
53
+ type: qa_aligned_f1_score_moverscore
54
+ value: 0.7082452551815105
55
  - task:
56
  name: Text2text Generation
57
  type: text2text-generation
58
  dataset:
59
+ name: lmqg/qg_subjqa
60
+ type: tripadvisor
61
+ args: tripadvisor
62
  metrics:
63
  - name: BLEU4
64
  type: bleu4
65
+ value: 8.380171318718442e-07
66
  - name: ROUGE-L
67
  type: rouge-l
68
+ value: 0.1402922852924756
69
  - name: METEOR
70
  type: meteor
71
+ value: 0.1372146070365174
72
  - name: BERTScore
73
  type: bertscore
74
+ value: 0.8891002409937424
75
  - name: MoverScore
76
  type: moverscore
77
+ value: 0.5604572211470809
78
  - task:
79
  name: Text2text Generation
80
  type: text2text-generation
81
  dataset:
82
  name: lmqg/qg_squadshifts
83
+ type: amazon
84
+ args: amazon
85
  metrics:
86
  - name: BLEU4
87
  type: bleu4
88
+ value: 0.06530369842068952
89
  - name: ROUGE-L
90
  type: rouge-l
91
+ value: 0.25030985091008146
92
  - name: METEOR
93
  type: meteor
94
+ value: 0.2229994442645732
95
  - name: BERTScore
96
  type: bertscore
97
+ value: 0.9092814804525936
98
  - name: MoverScore
99
  type: moverscore
100
+ value: 0.6086538514008419
101
  - task:
102
  name: Text2text Generation
103
  type: text2text-generation
104
  dataset:
105
  name: lmqg/qg_subjqa
106
+ type: books
107
+ args: books
108
  metrics:
109
  - name: BLEU4
110
  type: bleu4
111
+ value: 0.006278914808207679
112
  - name: ROUGE-L
113
  type: rouge-l
114
+ value: 0.12368226019088967
115
  - name: METEOR
116
  type: meteor
117
+ value: 0.11576293675813865
118
  - name: BERTScore
119
  type: bertscore
120
+ value: 0.8807110440044503
121
  - name: MoverScore
122
  type: moverscore
123
+ value: 0.5555905941686486
124
  - task:
125
  name: Text2text Generation
126
  type: text2text-generation
127
  dataset:
128
+ name: lmqg/qg_subjqa
129
+ type: restaurants
130
+ args: restaurants
131
  metrics:
132
  - name: BLEU4
133
  type: bleu4
134
+ value: 1.1301750984972448e-06
135
  - name: ROUGE-L
136
  type: rouge-l
137
+ value: 0.13083168975354642
138
  - name: METEOR
139
  type: meteor
140
+ value: 0.12419733006916912
141
  - name: BERTScore
142
  type: bertscore
143
+ value: 0.8797711839570719
144
  - name: MoverScore
145
  type: moverscore
146
+ value: 0.5542757411268555
147
  - task:
148
  name: Text2text Generation
149
  type: text2text-generation
150
  dataset:
151
  name: lmqg/qg_subjqa
152
+ type: movies
153
+ args: movies
154
  metrics:
155
  - name: BLEU4
156
  type: bleu4
157
+ value: 1.0121579426501661e-06
158
  - name: ROUGE-L
159
  type: rouge-l
160
+ value: 0.12508697028506718
161
  - name: METEOR
162
  type: meteor
163
+ value: 0.11862284941640638
164
  - name: BERTScore
165
  type: bertscore
166
+ value: 0.8748829724726739
167
  - name: MoverScore
168
  type: moverscore
169
+ value: 0.5528899173535703
170
  - task:
171
  name: Text2text Generation
172
  type: text2text-generation
173
  dataset:
174
  name: lmqg/qg_subjqa
175
+ type: grocery
176
+ args: grocery
177
  metrics:
178
  - name: BLEU4
179
  type: bleu4
180
+ value: 0.00528043272450429
181
  - name: ROUGE-L
182
  type: rouge-l
183
+ value: 0.12343711316491492
184
  - name: METEOR
185
  type: meteor
186
+ value: 0.15133496445452477
187
  - name: BERTScore
188
  type: bertscore
189
+ value: 0.8778951253890991
190
  - name: MoverScore
191
  type: moverscore
192
+ value: 0.5701949938103265
193
  - task:
194
  name: Text2text Generation
195
  type: text2text-generation
196
  dataset:
197
+ name: lmqg/qg_squadshifts
198
+ type: nyt
199
+ args: nyt
200
  metrics:
201
  - name: BLEU4
202
  type: bleu4
203
+ value: 0.08117757543966063
204
  - name: ROUGE-L
205
  type: rouge-l
206
+ value: 0.25292097720734297
207
  - name: METEOR
208
  type: meteor
209
+ value: 0.25254205113198686
210
  - name: BERTScore
211
  type: bertscore
212
+ value: 0.9249009759439454
213
  - name: MoverScore
214
  type: moverscore
215
+ value: 0.6406329128556304
216
  - task:
217
  name: Text2text Generation
218
  type: text2text-generation
219
  dataset:
220
  name: lmqg/qg_subjqa
221
+ type: electronics
222
+ args: electronics
223
  metrics:
224
  - name: BLEU4
225
  type: bleu4
226
+ value: 0.00866799444965211
227
  - name: ROUGE-L
228
  type: rouge-l
229
+ value: 0.1601628874804186
230
  - name: METEOR
231
  type: meteor
232
+ value: 0.15348605312210778
233
  - name: BERTScore
234
  type: bertscore
235
+ value: 0.8783386920680519
236
  - name: MoverScore
237
  type: moverscore
238
+ value: 0.5634845371093992
239
  - task:
240
  name: Text2text Generation
241
  type: text2text-generation
242
  dataset:
243
+ name: lmqg/qg_squadshifts
244
+ type: new_wiki
245
+ args: new_wiki
246
  metrics:
247
  - name: BLEU4
248
  type: bleu4
249
+ value: 0.11118273173452982
250
  - name: ROUGE-L
251
  type: rouge-l
252
+ value: 0.2967546690273089
253
  - name: METEOR
254
  type: meteor
255
+ value: 0.27315087810722966
256
  - name: BERTScore
257
  type: bertscore
258
+ value: 0.9322739617807421
259
  - name: MoverScore
260
  type: moverscore
261
+ value: 0.6623000084761579
262
  - task:
263
  name: Text2text Generation
264
  type: text2text-generation
265
  dataset:
266
  name: lmqg/qg_squadshifts
267
+ type: reddit
268
+ args: reddit
269
  metrics:
270
  - name: BLEU4
271
  type: bleu4
272
+ value: 0.059525104157825456
273
  - name: ROUGE-L
274
  type: rouge-l
275
+ value: 0.22365090580055863
276
  - name: METEOR
277
  type: meteor
278
+ value: 0.21499800504546457
279
  - name: BERTScore
280
  type: bertscore
281
+ value: 0.9095144685254328
282
  - name: MoverScore
283
  type: moverscore
284
+ value: 0.6059332247878408
285
  ---
286
 
287
  # Model Card of `lmqg/bart-large-squad`
 
348
  | [lmqg/qg_squad](https://huggingface.co/datasets/lmqg/qg_squad) | default | 0.262 | 0.538 | 0.271 | 0.91 | 0.65 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval/metric.first.sentence.paragraph_answer.question.lmqg_qg_squad.default.json) |
349
 
350
 
351
+ ### Metrics (QAG)
352
+
353
+ | Dataset | Type | QA Aligned F1 Score (BERTScore) | QA Aligned F1 Score (MoverScore) | Link |
354
+ |:--------|:-----|--------------------------------:|---------------------------------:|-----:|
355
+ | [lmqg/qg_squad](https://huggingface.co/datasets/lmqg/qg_squad) | default | 0.955 | 0.708 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval/metric.first.answer.paragraph.questions_answers.lmqg_qg_squad.default.json) |
356
+
357
+
358
 
359
  ### Out-of-domain Metrics
360
 
361
  | Dataset | Type | BLEU4 | ROUGE-L | METEOR | BERTScore | MoverScore | Link |
362
  |:--------|:-----|------:|--------:|-------:|----------:|-----------:|-----:|
 
 
363
  | [lmqg/qg_subjqa](https://huggingface.co/datasets/lmqg/qg_subjqa) | tripadvisor | 0.0 | 0.14 | 0.137 | 0.889 | 0.56 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_subjqa.tripadvisor.json) |
364
+ | [lmqg/qg_squadshifts](https://huggingface.co/datasets/lmqg/qg_squadshifts) | amazon | 0.065 | 0.25 | 0.223 | 0.909 | 0.609 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_squadshifts.amazon.json) |
 
 
365
  | [lmqg/qg_subjqa](https://huggingface.co/datasets/lmqg/qg_subjqa) | books | 0.006 | 0.124 | 0.116 | 0.881 | 0.556 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_subjqa.books.json) |
366
+ | [lmqg/qg_subjqa](https://huggingface.co/datasets/lmqg/qg_subjqa) | restaurants | 0.0 | 0.131 | 0.124 | 0.88 | 0.554 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_subjqa.restaurants.json) |
367
  | [lmqg/qg_subjqa](https://huggingface.co/datasets/lmqg/qg_subjqa) | movies | 0.0 | 0.125 | 0.119 | 0.875 | 0.553 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_subjqa.movies.json) |
368
  | [lmqg/qg_subjqa](https://huggingface.co/datasets/lmqg/qg_subjqa) | grocery | 0.005 | 0.123 | 0.151 | 0.878 | 0.57 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_subjqa.grocery.json) |
369
+ | [lmqg/qg_squadshifts](https://huggingface.co/datasets/lmqg/qg_squadshifts) | nyt | 0.081 | 0.253 | 0.253 | 0.925 | 0.641 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_squadshifts.nyt.json) |
370
+ | [lmqg/qg_subjqa](https://huggingface.co/datasets/lmqg/qg_subjqa) | electronics | 0.009 | 0.16 | 0.153 | 0.878 | 0.563 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_subjqa.electronics.json) |
371
+ | [lmqg/qg_squadshifts](https://huggingface.co/datasets/lmqg/qg_squadshifts) | new_wiki | 0.111 | 0.297 | 0.273 | 0.932 | 0.662 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_squadshifts.new_wiki.json) |
372
+ | [lmqg/qg_squadshifts](https://huggingface.co/datasets/lmqg/qg_squadshifts) | reddit | 0.06 | 0.224 | 0.215 | 0.91 | 0.606 | [link](https://huggingface.co/lmqg/bart-large-squad/raw/main/eval_ood/metric.first.sentence.paragraph_answer.question.lmqg_qg_squadshifts.reddit.json) |
373
 
374
 
375
  ## Training hyperparameters