tiedeman commited on
Commit
df9d988
1 Parent(s): ad9d107

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.spm filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,1228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ language:
4
+ - chm
5
+ - de
6
+ - en
7
+ - es
8
+ - et
9
+ - fi
10
+ - fkv
11
+ - fr
12
+ - hu
13
+ - izh
14
+ - krl
15
+ - kv
16
+ - liv
17
+ - mdf
18
+ - mrj
19
+ - myv
20
+ - pt
21
+ - se
22
+ - sma
23
+ - smn
24
+ - udm
25
+ - vep
26
+ - vot
27
+
28
+ tags:
29
+ - translation
30
+ - opus-mt-tc-bible
31
+
32
+ license: apache-2.0
33
+ model-index:
34
+ - name: opus-mt-tc-bible-big-deu_eng_fra_por_spa-fiu
35
+ results:
36
+ - task:
37
+ name: Translation deu-est
38
+ type: translation
39
+ args: deu-est
40
+ dataset:
41
+ name: flores200-devtest
42
+ type: flores200-devtest
43
+ args: deu-est
44
+ metrics:
45
+ - name: BLEU
46
+ type: bleu
47
+ value: 21.2
48
+ - name: chr-F
49
+ type: chrf
50
+ value: 0.55333
51
+ - task:
52
+ name: Translation deu-fin
53
+ type: translation
54
+ args: deu-fin
55
+ dataset:
56
+ name: flores200-devtest
57
+ type: flores200-devtest
58
+ args: deu-fin
59
+ metrics:
60
+ - name: BLEU
61
+ type: bleu
62
+ value: 18.3
63
+ - name: chr-F
64
+ type: chrf
65
+ value: 0.54020
66
+ - task:
67
+ name: Translation deu-hun
68
+ type: translation
69
+ args: deu-hun
70
+ dataset:
71
+ name: flores200-devtest
72
+ type: flores200-devtest
73
+ args: deu-hun
74
+ metrics:
75
+ - name: BLEU
76
+ type: bleu
77
+ value: 22.0
78
+ - name: chr-F
79
+ type: chrf
80
+ value: 0.53579
81
+ - task:
82
+ name: Translation eng-est
83
+ type: translation
84
+ args: eng-est
85
+ dataset:
86
+ name: flores200-devtest
87
+ type: flores200-devtest
88
+ args: eng-est
89
+ metrics:
90
+ - name: BLEU
91
+ type: bleu
92
+ value: 26.1
93
+ - name: chr-F
94
+ type: chrf
95
+ value: 0.59496
96
+ - task:
97
+ name: Translation eng-fin
98
+ type: translation
99
+ args: eng-fin
100
+ dataset:
101
+ name: flores200-devtest
102
+ type: flores200-devtest
103
+ args: eng-fin
104
+ metrics:
105
+ - name: BLEU
106
+ type: bleu
107
+ value: 23.1
108
+ - name: chr-F
109
+ type: chrf
110
+ value: 0.57811
111
+ - task:
112
+ name: Translation eng-hun
113
+ type: translation
114
+ args: eng-hun
115
+ dataset:
116
+ name: flores200-devtest
117
+ type: flores200-devtest
118
+ args: eng-hun
119
+ metrics:
120
+ - name: BLEU
121
+ type: bleu
122
+ value: 26.7
123
+ - name: chr-F
124
+ type: chrf
125
+ value: 0.57670
126
+ - task:
127
+ name: Translation fra-est
128
+ type: translation
129
+ args: fra-est
130
+ dataset:
131
+ name: flores200-devtest
132
+ type: flores200-devtest
133
+ args: fra-est
134
+ metrics:
135
+ - name: BLEU
136
+ type: bleu
137
+ value: 21.2
138
+ - name: chr-F
139
+ type: chrf
140
+ value: 0.54442
141
+ - task:
142
+ name: Translation fra-fin
143
+ type: translation
144
+ args: fra-fin
145
+ dataset:
146
+ name: flores200-devtest
147
+ type: flores200-devtest
148
+ args: fra-fin
149
+ metrics:
150
+ - name: BLEU
151
+ type: bleu
152
+ value: 18.5
153
+ - name: chr-F
154
+ type: chrf
155
+ value: 0.53768
156
+ - task:
157
+ name: Translation fra-hun
158
+ type: translation
159
+ args: fra-hun
160
+ dataset:
161
+ name: flores200-devtest
162
+ type: flores200-devtest
163
+ args: fra-hun
164
+ metrics:
165
+ - name: BLEU
166
+ type: bleu
167
+ value: 21.2
168
+ - name: chr-F
169
+ type: chrf
170
+ value: 0.52691
171
+ - task:
172
+ name: Translation por-est
173
+ type: translation
174
+ args: por-est
175
+ dataset:
176
+ name: flores200-devtest
177
+ type: flores200-devtest
178
+ args: por-est
179
+ metrics:
180
+ - name: BLEU
181
+ type: bleu
182
+ value: 15.6
183
+ - name: chr-F
184
+ type: chrf
185
+ value: 0.48227
186
+ - task:
187
+ name: Translation por-fin
188
+ type: translation
189
+ args: por-fin
190
+ dataset:
191
+ name: flores200-devtest
192
+ type: flores200-devtest
193
+ args: por-fin
194
+ metrics:
195
+ - name: BLEU
196
+ type: bleu
197
+ value: 18.6
198
+ - name: chr-F
199
+ type: chrf
200
+ value: 0.53772
201
+ - task:
202
+ name: Translation por-hun
203
+ type: translation
204
+ args: por-hun
205
+ dataset:
206
+ name: flores200-devtest
207
+ type: flores200-devtest
208
+ args: por-hun
209
+ metrics:
210
+ - name: BLEU
211
+ type: bleu
212
+ value: 21.8
213
+ - name: chr-F
214
+ type: chrf
215
+ value: 0.53275
216
+ - task:
217
+ name: Translation spa-est
218
+ type: translation
219
+ args: spa-est
220
+ dataset:
221
+ name: flores200-devtest
222
+ type: flores200-devtest
223
+ args: spa-est
224
+ metrics:
225
+ - name: BLEU
226
+ type: bleu
227
+ value: 15.2
228
+ - name: chr-F
229
+ type: chrf
230
+ value: 0.50142
231
+ - task:
232
+ name: Translation spa-fin
233
+ type: translation
234
+ args: spa-fin
235
+ dataset:
236
+ name: flores200-devtest
237
+ type: flores200-devtest
238
+ args: spa-fin
239
+ metrics:
240
+ - name: BLEU
241
+ type: bleu
242
+ value: 13.7
243
+ - name: chr-F
244
+ type: chrf
245
+ value: 0.50401
246
+ - task:
247
+ name: Translation spa-hun
248
+ type: translation
249
+ args: spa-hun
250
+ dataset:
251
+ name: flores200-devtest
252
+ type: flores200-devtest
253
+ args: spa-hun
254
+ metrics:
255
+ - name: BLEU
256
+ type: bleu
257
+ value: 16.4
258
+ - name: chr-F
259
+ type: chrf
260
+ value: 0.49444
261
+ - task:
262
+ name: Translation eng-fin
263
+ type: translation
264
+ args: eng-fin
265
+ dataset:
266
+ name: flores101-devtest
267
+ type: flores_101
268
+ args: eng fin devtest
269
+ metrics:
270
+ - name: BLEU
271
+ type: bleu
272
+ value: 21.9
273
+ - name: chr-F
274
+ type: chrf
275
+ value: 0.57265
276
+ - task:
277
+ name: Translation fra-hun
278
+ type: translation
279
+ args: fra-hun
280
+ dataset:
281
+ name: flores101-devtest
282
+ type: flores_101
283
+ args: fra hun devtest
284
+ metrics:
285
+ - name: BLEU
286
+ type: bleu
287
+ value: 21.2
288
+ - name: chr-F
289
+ type: chrf
290
+ value: 0.52691
291
+ - task:
292
+ name: Translation por-fin
293
+ type: translation
294
+ args: por-fin
295
+ dataset:
296
+ name: flores101-devtest
297
+ type: flores_101
298
+ args: por fin devtest
299
+ metrics:
300
+ - name: BLEU
301
+ type: bleu
302
+ value: 18.6
303
+ - name: chr-F
304
+ type: chrf
305
+ value: 0.53772
306
+ - task:
307
+ name: Translation por-hun
308
+ type: translation
309
+ args: por-hun
310
+ dataset:
311
+ name: flores101-devtest
312
+ type: flores_101
313
+ args: por hun devtest
314
+ metrics:
315
+ - name: BLEU
316
+ type: bleu
317
+ value: 21.8
318
+ - name: chr-F
319
+ type: chrf
320
+ value: 0.53275
321
+ - task:
322
+ name: Translation spa-est
323
+ type: translation
324
+ args: spa-est
325
+ dataset:
326
+ name: flores101-devtest
327
+ type: flores_101
328
+ args: spa est devtest
329
+ metrics:
330
+ - name: BLEU
331
+ type: bleu
332
+ value: 15.2
333
+ - name: chr-F
334
+ type: chrf
335
+ value: 0.50142
336
+ - task:
337
+ name: Translation spa-fin
338
+ type: translation
339
+ args: spa-fin
340
+ dataset:
341
+ name: flores101-devtest
342
+ type: flores_101
343
+ args: spa fin devtest
344
+ metrics:
345
+ - name: BLEU
346
+ type: bleu
347
+ value: 13.7
348
+ - name: chr-F
349
+ type: chrf
350
+ value: 0.50401
351
+ - task:
352
+ name: Translation eng-fin
353
+ type: translation
354
+ args: eng-fin
355
+ dataset:
356
+ name: newstestALL2016
357
+ type: newstestALL2016
358
+ args: eng-fin
359
+ metrics:
360
+ - name: BLEU
361
+ type: bleu
362
+ value: 24.3
363
+ - name: chr-F
364
+ type: chrf
365
+ value: 0.57934
366
+ - task:
367
+ name: Translation eng-fin
368
+ type: translation
369
+ args: eng-fin
370
+ dataset:
371
+ name: newstestALL2017
372
+ type: newstestALL2017
373
+ args: eng-fin
374
+ metrics:
375
+ - name: BLEU
376
+ type: bleu
377
+ value: 26.5
378
+ - name: chr-F
379
+ type: chrf
380
+ value: 0.60204
381
+ - task:
382
+ name: Translation deu-est
383
+ type: translation
384
+ args: deu-est
385
+ dataset:
386
+ name: ntrex128
387
+ type: ntrex128
388
+ args: deu-est
389
+ metrics:
390
+ - name: BLEU
391
+ type: bleu
392
+ value: 18.6
393
+ - name: chr-F
394
+ type: chrf
395
+ value: 0.51761
396
+ - task:
397
+ name: Translation deu-fin
398
+ type: translation
399
+ args: deu-fin
400
+ dataset:
401
+ name: ntrex128
402
+ type: ntrex128
403
+ args: deu-fin
404
+ metrics:
405
+ - name: BLEU
406
+ type: bleu
407
+ value: 15.5
408
+ - name: chr-F
409
+ type: chrf
410
+ value: 0.50759
411
+ - task:
412
+ name: Translation deu-hun
413
+ type: translation
414
+ args: deu-hun
415
+ dataset:
416
+ name: ntrex128
417
+ type: ntrex128
418
+ args: deu-hun
419
+ metrics:
420
+ - name: BLEU
421
+ type: bleu
422
+ value: 15.6
423
+ - name: chr-F
424
+ type: chrf
425
+ value: 0.46171
426
+ - task:
427
+ name: Translation eng-est
428
+ type: translation
429
+ args: eng-est
430
+ dataset:
431
+ name: ntrex128
432
+ type: ntrex128
433
+ args: eng-est
434
+ metrics:
435
+ - name: BLEU
436
+ type: bleu
437
+ value: 24.4
438
+ - name: chr-F
439
+ type: chrf
440
+ value: 0.57099
441
+ - task:
442
+ name: Translation eng-fin
443
+ type: translation
444
+ args: eng-fin
445
+ dataset:
446
+ name: ntrex128
447
+ type: ntrex128
448
+ args: eng-fin
449
+ metrics:
450
+ - name: BLEU
451
+ type: bleu
452
+ value: 18.5
453
+ - name: chr-F
454
+ type: chrf
455
+ value: 0.53413
456
+ - task:
457
+ name: Translation eng-hun
458
+ type: translation
459
+ args: eng-hun
460
+ dataset:
461
+ name: ntrex128
462
+ type: ntrex128
463
+ args: eng-hun
464
+ metrics:
465
+ - name: BLEU
466
+ type: bleu
467
+ value: 16.6
468
+ - name: chr-F
469
+ type: chrf
470
+ value: 0.47342
471
+ - task:
472
+ name: Translation fra-est
473
+ type: translation
474
+ args: fra-est
475
+ dataset:
476
+ name: ntrex128
477
+ type: ntrex128
478
+ args: fra-est
479
+ metrics:
480
+ - name: BLEU
481
+ type: bleu
482
+ value: 17.7
483
+ - name: chr-F
484
+ type: chrf
485
+ value: 0.50712
486
+ - task:
487
+ name: Translation fra-fin
488
+ type: translation
489
+ args: fra-fin
490
+ dataset:
491
+ name: ntrex128
492
+ type: ntrex128
493
+ args: fra-fin
494
+ metrics:
495
+ - name: BLEU
496
+ type: bleu
497
+ value: 14.2
498
+ - name: chr-F
499
+ type: chrf
500
+ value: 0.49215
501
+ - task:
502
+ name: Translation fra-hun
503
+ type: translation
504
+ args: fra-hun
505
+ dataset:
506
+ name: ntrex128
507
+ type: ntrex128
508
+ args: fra-hun
509
+ metrics:
510
+ - name: BLEU
511
+ type: bleu
512
+ value: 14.9
513
+ - name: chr-F
514
+ type: chrf
515
+ value: 0.44873
516
+ - task:
517
+ name: Translation por-est
518
+ type: translation
519
+ args: por-est
520
+ dataset:
521
+ name: ntrex128
522
+ type: ntrex128
523
+ args: por-est
524
+ metrics:
525
+ - name: BLEU
526
+ type: bleu
527
+ value: 15.1
528
+ - name: chr-F
529
+ type: chrf
530
+ value: 0.48098
531
+ - task:
532
+ name: Translation por-fin
533
+ type: translation
534
+ args: por-fin
535
+ dataset:
536
+ name: ntrex128
537
+ type: ntrex128
538
+ args: por-fin
539
+ metrics:
540
+ - name: BLEU
541
+ type: bleu
542
+ value: 15.0
543
+ - name: chr-F
544
+ type: chrf
545
+ value: 0.50875
546
+ - task:
547
+ name: Translation por-hun
548
+ type: translation
549
+ args: por-hun
550
+ dataset:
551
+ name: ntrex128
552
+ type: ntrex128
553
+ args: por-hun
554
+ metrics:
555
+ - name: BLEU
556
+ type: bleu
557
+ value: 15.5
558
+ - name: chr-F
559
+ type: chrf
560
+ value: 0.45817
561
+ - task:
562
+ name: Translation spa-est
563
+ type: translation
564
+ args: spa-est
565
+ dataset:
566
+ name: ntrex128
567
+ type: ntrex128
568
+ args: spa-est
569
+ metrics:
570
+ - name: BLEU
571
+ type: bleu
572
+ value: 18.5
573
+ - name: chr-F
574
+ type: chrf
575
+ value: 0.52158
576
+ - task:
577
+ name: Translation spa-fin
578
+ type: translation
579
+ args: spa-fin
580
+ dataset:
581
+ name: ntrex128
582
+ type: ntrex128
583
+ args: spa-fin
584
+ metrics:
585
+ - name: BLEU
586
+ type: bleu
587
+ value: 15.2
588
+ - name: chr-F
589
+ type: chrf
590
+ value: 0.50947
591
+ - task:
592
+ name: Translation spa-hun
593
+ type: translation
594
+ args: spa-hun
595
+ dataset:
596
+ name: ntrex128
597
+ type: ntrex128
598
+ args: spa-hun
599
+ metrics:
600
+ - name: BLEU
601
+ type: bleu
602
+ value: 16.1
603
+ - name: chr-F
604
+ type: chrf
605
+ value: 0.46051
606
+ - task:
607
+ name: Translation deu-est
608
+ type: translation
609
+ args: deu-est
610
+ dataset:
611
+ name: tatoeba-test-v2021-08-07
612
+ type: tatoeba_mt
613
+ args: deu-est
614
+ metrics:
615
+ - name: BLEU
616
+ type: bleu
617
+ value: 57.8
618
+ - name: chr-F
619
+ type: chrf
620
+ value: 0.76586
621
+ - task:
622
+ name: Translation deu-fin
623
+ type: translation
624
+ args: deu-fin
625
+ dataset:
626
+ name: tatoeba-test-v2021-08-07
627
+ type: tatoeba_mt
628
+ args: deu-fin
629
+ metrics:
630
+ - name: BLEU
631
+ type: bleu
632
+ value: 40.7
633
+ - name: chr-F
634
+ type: chrf
635
+ value: 0.64286
636
+ - task:
637
+ name: Translation deu-hun
638
+ type: translation
639
+ args: deu-hun
640
+ dataset:
641
+ name: tatoeba-test-v2021-08-07
642
+ type: tatoeba_mt
643
+ args: deu-hun
644
+ metrics:
645
+ - name: BLEU
646
+ type: bleu
647
+ value: 31.2
648
+ - name: chr-F
649
+ type: chrf
650
+ value: 0.57007
651
+ - task:
652
+ name: Translation eng-est
653
+ type: translation
654
+ args: eng-est
655
+ dataset:
656
+ name: tatoeba-test-v2021-08-07
657
+ type: tatoeba_mt
658
+ args: eng-est
659
+ metrics:
660
+ - name: BLEU
661
+ type: bleu
662
+ value: 50.6
663
+ - name: chr-F
664
+ type: chrf
665
+ value: 0.69134
666
+ - task:
667
+ name: Translation eng-fin
668
+ type: translation
669
+ args: eng-fin
670
+ dataset:
671
+ name: tatoeba-test-v2021-08-07
672
+ type: tatoeba_mt
673
+ args: eng-fin
674
+ metrics:
675
+ - name: BLEU
676
+ type: bleu
677
+ value: 37.6
678
+ - name: chr-F
679
+ type: chrf
680
+ value: 0.62482
681
+ - task:
682
+ name: Translation eng-hun
683
+ type: translation
684
+ args: eng-hun
685
+ dataset:
686
+ name: tatoeba-test-v2021-08-07
687
+ type: tatoeba_mt
688
+ args: eng-hun
689
+ metrics:
690
+ - name: BLEU
691
+ type: bleu
692
+ value: 35.9
693
+ - name: chr-F
694
+ type: chrf
695
+ value: 0.59750
696
+ - task:
697
+ name: Translation fra-fin
698
+ type: translation
699
+ args: fra-fin
700
+ dataset:
701
+ name: tatoeba-test-v2021-08-07
702
+ type: tatoeba_mt
703
+ args: fra-fin
704
+ metrics:
705
+ - name: BLEU
706
+ type: bleu
707
+ value: 45.0
708
+ - name: chr-F
709
+ type: chrf
710
+ value: 0.65723
711
+ - task:
712
+ name: Translation fra-hun
713
+ type: translation
714
+ args: fra-hun
715
+ dataset:
716
+ name: tatoeba-test-v2021-08-07
717
+ type: tatoeba_mt
718
+ args: fra-hun
719
+ metrics:
720
+ - name: BLEU
721
+ type: bleu
722
+ value: 40.6
723
+ - name: chr-F
724
+ type: chrf
725
+ value: 0.63096
726
+ - task:
727
+ name: Translation multi-multi
728
+ type: translation
729
+ args: multi-multi
730
+ dataset:
731
+ name: tatoeba-test-v2020-07-28-v2023-09-26
732
+ type: tatoeba_mt
733
+ args: multi-multi
734
+ metrics:
735
+ - name: BLEU
736
+ type: bleu
737
+ value: 32.8
738
+ - name: chr-F
739
+ type: chrf
740
+ value: 0.58505
741
+ - task:
742
+ name: Translation por-fin
743
+ type: translation
744
+ args: por-fin
745
+ dataset:
746
+ name: tatoeba-test-v2021-08-07
747
+ type: tatoeba_mt
748
+ args: por-fin
749
+ metrics:
750
+ - name: BLEU
751
+ type: bleu
752
+ value: 58.1
753
+ - name: chr-F
754
+ type: chrf
755
+ value: 0.76811
756
+ - task:
757
+ name: Translation por-hun
758
+ type: translation
759
+ args: por-hun
760
+ dataset:
761
+ name: tatoeba-test-v2021-08-07
762
+ type: tatoeba_mt
763
+ args: por-hun
764
+ metrics:
765
+ - name: BLEU
766
+ type: bleu
767
+ value: 42.5
768
+ - name: chr-F
769
+ type: chrf
770
+ value: 0.64930
771
+ - task:
772
+ name: Translation spa-fin
773
+ type: translation
774
+ args: spa-fin
775
+ dataset:
776
+ name: tatoeba-test-v2021-08-07
777
+ type: tatoeba_mt
778
+ args: spa-fin
779
+ metrics:
780
+ - name: BLEU
781
+ type: bleu
782
+ value: 43.4
783
+ - name: chr-F
784
+ type: chrf
785
+ value: 0.66220
786
+ - task:
787
+ name: Translation spa-hun
788
+ type: translation
789
+ args: spa-hun
790
+ dataset:
791
+ name: tatoeba-test-v2021-08-07
792
+ type: tatoeba_mt
793
+ args: spa-hun
794
+ metrics:
795
+ - name: BLEU
796
+ type: bleu
797
+ value: 42.0
798
+ - name: chr-F
799
+ type: chrf
800
+ value: 0.63596
801
+ - task:
802
+ name: Translation deu-hun
803
+ type: translation
804
+ args: deu-hun
805
+ dataset:
806
+ name: newstest2008
807
+ type: wmt-2008-news
808
+ args: deu-hun
809
+ metrics:
810
+ - name: BLEU
811
+ type: bleu
812
+ value: 17.2
813
+ - name: chr-F
814
+ type: chrf
815
+ value: 0.48855
816
+ - task:
817
+ name: Translation eng-hun
818
+ type: translation
819
+ args: eng-hun
820
+ dataset:
821
+ name: newstest2008
822
+ type: wmt-2008-news
823
+ args: eng-hun
824
+ metrics:
825
+ - name: BLEU
826
+ type: bleu
827
+ value: 15.9
828
+ - name: chr-F
829
+ type: chrf
830
+ value: 0.47636
831
+ - task:
832
+ name: Translation fra-hun
833
+ type: translation
834
+ args: fra-hun
835
+ dataset:
836
+ name: newstest2008
837
+ type: wmt-2008-news
838
+ args: fra-hun
839
+ metrics:
840
+ - name: BLEU
841
+ type: bleu
842
+ value: 17.7
843
+ - name: chr-F
844
+ type: chrf
845
+ value: 0.48598
846
+ - task:
847
+ name: Translation spa-hun
848
+ type: translation
849
+ args: spa-hun
850
+ dataset:
851
+ name: newstest2008
852
+ type: wmt-2008-news
853
+ args: spa-hun
854
+ metrics:
855
+ - name: BLEU
856
+ type: bleu
857
+ value: 17.1
858
+ - name: chr-F
859
+ type: chrf
860
+ value: 0.47888
861
+ - task:
862
+ name: Translation deu-hun
863
+ type: translation
864
+ args: deu-hun
865
+ dataset:
866
+ name: newstest2009
867
+ type: wmt-2009-news
868
+ args: deu-hun
869
+ metrics:
870
+ - name: BLEU
871
+ type: bleu
872
+ value: 18.1
873
+ - name: chr-F
874
+ type: chrf
875
+ value: 0.48692
876
+ - task:
877
+ name: Translation eng-hun
878
+ type: translation
879
+ args: eng-hun
880
+ dataset:
881
+ name: newstest2009
882
+ type: wmt-2009-news
883
+ args: eng-hun
884
+ metrics:
885
+ - name: BLEU
886
+ type: bleu
887
+ value: 18.4
888
+ - name: chr-F
889
+ type: chrf
890
+ value: 0.49507
891
+ - task:
892
+ name: Translation fra-hun
893
+ type: translation
894
+ args: fra-hun
895
+ dataset:
896
+ name: newstest2009
897
+ type: wmt-2009-news
898
+ args: fra-hun
899
+ metrics:
900
+ - name: BLEU
901
+ type: bleu
902
+ value: 18.6
903
+ - name: chr-F
904
+ type: chrf
905
+ value: 0.48961
906
+ - task:
907
+ name: Translation spa-hun
908
+ type: translation
909
+ args: spa-hun
910
+ dataset:
911
+ name: newstest2009
912
+ type: wmt-2009-news
913
+ args: spa-hun
914
+ metrics:
915
+ - name: BLEU
916
+ type: bleu
917
+ value: 18.1
918
+ - name: chr-F
919
+ type: chrf
920
+ value: 0.48496
921
+ - task:
922
+ name: Translation eng-fin
923
+ type: translation
924
+ args: eng-fin
925
+ dataset:
926
+ name: newstest2015
927
+ type: wmt-2015-news
928
+ args: eng-fin
929
+ metrics:
930
+ - name: BLEU
931
+ type: bleu
932
+ value: 22.8
933
+ - name: chr-F
934
+ type: chrf
935
+ value: 0.56896
936
+ - task:
937
+ name: Translation eng-fin
938
+ type: translation
939
+ args: eng-fin
940
+ dataset:
941
+ name: newstest2016
942
+ type: wmt-2016-news
943
+ args: eng-fin
944
+ metrics:
945
+ - name: BLEU
946
+ type: bleu
947
+ value: 24.3
948
+ - name: chr-F
949
+ type: chrf
950
+ value: 0.57934
951
+ - task:
952
+ name: Translation eng-fin
953
+ type: translation
954
+ args: eng-fin
955
+ dataset:
956
+ name: newstest2017
957
+ type: wmt-2017-news
958
+ args: eng-fin
959
+ metrics:
960
+ - name: BLEU
961
+ type: bleu
962
+ value: 26.5
963
+ - name: chr-F
964
+ type: chrf
965
+ value: 0.60204
966
+ - task:
967
+ name: Translation eng-est
968
+ type: translation
969
+ args: eng-est
970
+ dataset:
971
+ name: newstest2018
972
+ type: wmt-2018-news
973
+ args: eng-est
974
+ metrics:
975
+ - name: BLEU
976
+ type: bleu
977
+ value: 23.8
978
+ - name: chr-F
979
+ type: chrf
980
+ value: 0.56276
981
+ - task:
982
+ name: Translation eng-fin
983
+ type: translation
984
+ args: eng-fin
985
+ dataset:
986
+ name: newstest2018
987
+ type: wmt-2018-news
988
+ args: eng-fin
989
+ metrics:
990
+ - name: BLEU
991
+ type: bleu
992
+ value: 17.4
993
+ - name: chr-F
994
+ type: chrf
995
+ value: 0.52953
996
+ - task:
997
+ name: Translation eng-fin
998
+ type: translation
999
+ args: eng-fin
1000
+ dataset:
1001
+ name: newstest2019
1002
+ type: wmt-2019-news
1003
+ args: eng-fin
1004
+ metrics:
1005
+ - name: BLEU
1006
+ type: bleu
1007
+ value: 24.2
1008
+ - name: chr-F
1009
+ type: chrf
1010
+ value: 0.55882
1011
+ ---
1012
+ # opus-mt-tc-bible-big-deu_eng_fra_por_spa-fiu
1013
+
1014
+ ## Table of Contents
1015
+ - [Model Details](#model-details)
1016
+ - [Uses](#uses)
1017
+ - [Risks, Limitations and Biases](#risks-limitations-and-biases)
1018
+ - [How to Get Started With the Model](#how-to-get-started-with-the-model)
1019
+ - [Training](#training)
1020
+ - [Evaluation](#evaluation)
1021
+ - [Citation Information](#citation-information)
1022
+ - [Acknowledgements](#acknowledgements)
1023
+
1024
+ ## Model Details
1025
+
1026
+ Neural machine translation model for translating from unknown (deu+eng+fra+por+spa) to Finno-Ugrian languages (fiu).
1027
+
1028
+ This model is part of the [OPUS-MT project](https://github.com/Helsinki-NLP/Opus-MT), an effort to make neural machine translation models widely available and accessible for many languages in the world. All models are originally trained using the amazing framework of [Marian NMT](https://marian-nmt.github.io/), an efficient NMT implementation written in pure C++. The models have been converted to pyTorch using the transformers library by huggingface. Training data is taken from [OPUS](https://opus.nlpl.eu/) and training pipelines use the procedures of [OPUS-MT-train](https://github.com/Helsinki-NLP/Opus-MT-train).
1029
+ **Model Description:**
1030
+ - **Developed by:** Language Technology Research Group at the University of Helsinki
1031
+ - **Model Type:** Translation (transformer-big)
1032
+ - **Release**: 2024-05-30
1033
+ - **License:** Apache-2.0
1034
+ - **Language(s):**
1035
+ - Source Language(s): deu eng fra por spa
1036
+ - Target Language(s): chm est fin fkv hun izh koi kom kpv krl liv mdf mrj myv sma sme smn udm vep vot vro
1037
+ - Valid Target Language Labels: >>chm<< >>est<< >>fin<< >>fit<< >>fkv<< >>fkv_Latn<< >>hun<< >>izh<< >>kca<< >>koi<< >>kom<< >>kpv<< >>krl<< >>liv<< >>liv_Latn<< >>mdf<< >>mns<< >>mrj<< >>myv<< >>olo<< >>sia<< >>sjd<< >>sje<< >>sjk<< >>sjt<< >>sju<< >>sma<< >>sme<< >>smj<< >>smn<< >>sms<< >>udm<< >>vep<< >>vot<< >>vot_Latn<< >>vro<<
1038
+ - **Original Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-fiu/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip)
1039
+ - **Resources for more information:**
1040
+ - [OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/deu%2Beng%2Bfra%2Bpor%2Bspa-fiu/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30)
1041
+ - [OPUS-MT-train GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
1042
+ - [More information about MarianNMT models in the transformers library](https://huggingface.co/docs/transformers/model_doc/marian)
1043
+ - [Tatoeba Translation Challenge](https://github.com/Helsinki-NLP/Tatoeba-Challenge/)
1044
+ - [HPLT bilingual data v1 (as part of the Tatoeba Translation Challenge dataset)](https://hplt-project.org/datasets/v1)
1045
+ - [A massively parallel Bible corpus](https://aclanthology.org/L14-1215/)
1046
+
1047
+ This is a multilingual translation model with multiple target languages. A sentence initial language token is required in the form of `>>id<<` (id = valid target language ID), e.g. `>>chm<<`
1048
+
1049
+ ## Uses
1050
+
1051
+ This model can be used for translation and text-to-text generation.
1052
+
1053
+ ## Risks, Limitations and Biases
1054
+
1055
+ **CONTENT WARNING: Readers should be aware that the model is trained on various public data sets that may contain content that is disturbing, offensive, and can propagate historical and current stereotypes.**
1056
+
1057
+ Significant research has explored bias and fairness issues with language models (see, e.g., [Sheng et al. (2021)](https://aclanthology.org/2021.acl-long.330.pdf) and [Bender et al. (2021)](https://dl.acm.org/doi/pdf/10.1145/3442188.3445922)).
1058
+
1059
+ ## How to Get Started With the Model
1060
+
1061
+ A short example code:
1062
+
1063
+ ```python
1064
+ from transformers import MarianMTModel, MarianTokenizer
1065
+
1066
+ src_text = [
1067
+ ">>chm<< Replace this with text in an accepted source language.",
1068
+ ">>vro<< This is the second sentence."
1069
+ ]
1070
+
1071
+ model_name = "pytorch-models/opus-mt-tc-bible-big-deu_eng_fra_por_spa-fiu"
1072
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
1073
+ model = MarianMTModel.from_pretrained(model_name)
1074
+ translated = model.generate(**tokenizer(src_text, return_tensors="pt", padding=True))
1075
+
1076
+ for t in translated:
1077
+ print( tokenizer.decode(t, skip_special_tokens=True) )
1078
+ ```
1079
+
1080
+ You can also use OPUS-MT models with the transformers pipelines, for example:
1081
+
1082
+ ```python
1083
+ from transformers import pipeline
1084
+ pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-bible-big-deu_eng_fra_por_spa-fiu")
1085
+ print(pipe(">>chm<< Replace this with text in an accepted source language."))
1086
+ ```
1087
+
1088
+ ## Training
1089
+
1090
+ - **Data**: opusTCv20230926max50+bt+jhubc ([source](https://github.com/Helsinki-NLP/Tatoeba-Challenge))
1091
+ - **Pre-processing**: SentencePiece (spm32k,spm32k)
1092
+ - **Model Type:** transformer-big
1093
+ - **Original MarianNMT Model**: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-fiu/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30.zip)
1094
+ - **Training Scripts**: [GitHub Repo](https://github.com/Helsinki-NLP/OPUS-MT-train)
1095
+
1096
+ ## Evaluation
1097
+
1098
+ * [Model scores at the OPUS-MT dashboard](https://opus.nlpl.eu/dashboard/index.php?pkg=opusmt&test=all&scoreslang=all&chart=standard&model=Tatoeba-MT-models/deu%2Beng%2Bfra%2Bpor%2Bspa-fiu/opusTCv20230926max50%2Bbt%2Bjhubc_transformer-big_2024-05-30)
1099
+ * test set translations: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-fiu/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.test.txt)
1100
+ * test set scores: [opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt](https://object.pouta.csc.fi/Tatoeba-MT-models/deu+eng+fra+por+spa-fiu/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-29.eval.txt)
1101
+ * benchmark results: [benchmark_results.txt](benchmark_results.txt)
1102
+ * benchmark output: [benchmark_translations.zip](benchmark_translations.zip)
1103
+
1104
+ | langpair | testset | chr-F | BLEU | #sent | #words |
1105
+ |----------|---------|-------|-------|-------|--------|
1106
+ | deu-est | tatoeba-test-v2021-08-07 | 0.76586 | 57.8 | 244 | 1413 |
1107
+ | deu-fin | tatoeba-test-v2021-08-07 | 0.64286 | 40.7 | 2647 | 15024 |
1108
+ | deu-hun | tatoeba-test-v2021-08-07 | 0.57007 | 31.2 | 15342 | 105152 |
1109
+ | eng-est | tatoeba-test-v2021-08-07 | 0.69134 | 50.6 | 1359 | 7992 |
1110
+ | eng-fin | tatoeba-test-v2021-08-07 | 0.62482 | 37.6 | 10690 | 65122 |
1111
+ | eng-hun | tatoeba-test-v2021-08-07 | 0.59750 | 35.9 | 13037 | 79562 |
1112
+ | fra-fin | tatoeba-test-v2021-08-07 | 0.65723 | 45.0 | 1920 | 9730 |
1113
+ | fra-hun | tatoeba-test-v2021-08-07 | 0.63096 | 40.6 | 2494 | 13753 |
1114
+ | por-fin | tatoeba-test-v2021-08-07 | 0.76811 | 58.1 | 477 | 2379 |
1115
+ | por-hun | tatoeba-test-v2021-08-07 | 0.64930 | 42.5 | 2500 | 14063 |
1116
+ | spa-fin | tatoeba-test-v2021-08-07 | 0.66220 | 43.4 | 2513 | 14131 |
1117
+ | spa-hun | tatoeba-test-v2021-08-07 | 0.63596 | 42.0 | 2500 | 14599 |
1118
+ | eng-fin | flores101-devtest | 0.57265 | 21.9 | 1012 | 18781 |
1119
+ | fra-hun | flores101-devtest | 0.52691 | 21.2 | 1012 | 22183 |
1120
+ | por-fin | flores101-devtest | 0.53772 | 18.6 | 1012 | 18781 |
1121
+ | por-hun | flores101-devtest | 0.53275 | 21.8 | 1012 | 22183 |
1122
+ | spa-est | flores101-devtest | 0.50142 | 15.2 | 1012 | 19788 |
1123
+ | spa-fin | flores101-devtest | 0.50401 | 13.7 | 1012 | 18781 |
1124
+ | deu-est | flores200-devtest | 0.55333 | 21.2 | 1012 | 19788 |
1125
+ | deu-fin | flores200-devtest | 0.54020 | 18.3 | 1012 | 18781 |
1126
+ | deu-hun | flores200-devtest | 0.53579 | 22.0 | 1012 | 22183 |
1127
+ | eng-est | flores200-devtest | 0.59496 | 26.1 | 1012 | 19788 |
1128
+ | eng-fin | flores200-devtest | 0.57811 | 23.1 | 1012 | 18781 |
1129
+ | eng-hun | flores200-devtest | 0.57670 | 26.7 | 1012 | 22183 |
1130
+ | fra-est | flores200-devtest | 0.54442 | 21.2 | 1012 | 19788 |
1131
+ | fra-fin | flores200-devtest | 0.53768 | 18.5 | 1012 | 18781 |
1132
+ | fra-hun | flores200-devtest | 0.52691 | 21.2 | 1012 | 22183 |
1133
+ | por-est | flores200-devtest | 0.48227 | 15.6 | 1012 | 19788 |
1134
+ | por-fin | flores200-devtest | 0.53772 | 18.6 | 1012 | 18781 |
1135
+ | por-hun | flores200-devtest | 0.53275 | 21.8 | 1012 | 22183 |
1136
+ | spa-est | flores200-devtest | 0.50142 | 15.2 | 1012 | 19788 |
1137
+ | spa-fin | flores200-devtest | 0.50401 | 13.7 | 1012 | 18781 |
1138
+ | spa-hun | flores200-devtest | 0.49444 | 16.4 | 1012 | 22183 |
1139
+ | deu-hun | newssyscomb2009 | 0.49607 | 18.1 | 502 | 9733 |
1140
+ | eng-hun | newssyscomb2009 | 0.50580 | 18.3 | 502 | 9733 |
1141
+ | fra-hun | newssyscomb2009 | 0.49415 | 17.8 | 502 | 9733 |
1142
+ | spa-hun | newssyscomb2009 | 0.48559 | 16.9 | 502 | 9733 |
1143
+ | deu-hun | newstest2008 | 0.48855 | 17.2 | 2051 | 41875 |
1144
+ | eng-hun | newstest2008 | 0.47636 | 15.9 | 2051 | 41875 |
1145
+ | fra-hun | newstest2008 | 0.48598 | 17.7 | 2051 | 41875 |
1146
+ | spa-hun | newstest2008 | 0.47888 | 17.1 | 2051 | 41875 |
1147
+ | deu-hun | newstest2009 | 0.48692 | 18.1 | 2525 | 54965 |
1148
+ | eng-hun | newstest2009 | 0.49507 | 18.4 | 2525 | 54965 |
1149
+ | fra-hun | newstest2009 | 0.48961 | 18.6 | 2525 | 54965 |
1150
+ | spa-hun | newstest2009 | 0.48496 | 18.1 | 2525 | 54965 |
1151
+ | eng-fin | newstest2015 | 0.56896 | 22.8 | 1370 | 19735 |
1152
+ | eng-fin | newstest2016 | 0.57934 | 24.3 | 3000 | 47678 |
1153
+ | eng-fin | newstest2017 | 0.60204 | 26.5 | 3002 | 45269 |
1154
+ | eng-est | newstest2018 | 0.56276 | 23.8 | 2000 | 36269 |
1155
+ | eng-fin | newstest2018 | 0.52953 | 17.4 | 3000 | 44836 |
1156
+ | eng-fin | newstest2019 | 0.55882 | 24.2 | 1997 | 38369 |
1157
+ | eng-fin | newstestALL2016 | 0.57934 | 24.3 | 3000 | 47678 |
1158
+ | eng-fin | newstestALL2017 | 0.60204 | 26.5 | 3002 | 45269 |
1159
+ | eng-fin | newstestB2016 | 0.54388 | 19.9 | 3000 | 45766 |
1160
+ | eng-fin | newstestB2017 | 0.56369 | 22.6 | 3002 | 45506 |
1161
+ | deu-est | ntrex128 | 0.51761 | 18.6 | 1997 | 38420 |
1162
+ | deu-fin | ntrex128 | 0.50759 | 15.5 | 1997 | 35701 |
1163
+ | deu-hun | ntrex128 | 0.46171 | 15.6 | 1997 | 44462 |
1164
+ | eng-est | ntrex128 | 0.57099 | 24.4 | 1997 | 38420 |
1165
+ | eng-fin | ntrex128 | 0.53413 | 18.5 | 1997 | 35701 |
1166
+ | eng-hun | ntrex128 | 0.47342 | 16.6 | 1997 | 44462 |
1167
+ | fra-est | ntrex128 | 0.50712 | 17.7 | 1997 | 38420 |
1168
+ | fra-fin | ntrex128 | 0.49215 | 14.2 | 1997 | 35701 |
1169
+ | fra-hun | ntrex128 | 0.44873 | 14.9 | 1997 | 44462 |
1170
+ | por-est | ntrex128 | 0.48098 | 15.1 | 1997 | 38420 |
1171
+ | por-fin | ntrex128 | 0.50875 | 15.0 | 1997 | 35701 |
1172
+ | por-hun | ntrex128 | 0.45817 | 15.5 | 1997 | 44462 |
1173
+ | spa-est | ntrex128 | 0.52158 | 18.5 | 1997 | 38420 |
1174
+ | spa-fin | ntrex128 | 0.50947 | 15.2 | 1997 | 35701 |
1175
+ | spa-hun | ntrex128 | 0.46051 | 16.1 | 1997 | 44462 |
1176
+
1177
+ ## Citation Information
1178
+
1179
+ * Publications: [Democratizing neural machine translation with OPUS-MT](https://doi.org/10.1007/s10579-023-09704-w) and [OPUS-MT – Building open translation services for the World](https://aclanthology.org/2020.eamt-1.61/) and [The Tatoeba Translation Challenge – Realistic Data Sets for Low Resource and Multilingual MT](https://aclanthology.org/2020.wmt-1.139/) (Please, cite if you use this model.)
1180
+
1181
+ ```bibtex
1182
+ @article{tiedemann2023democratizing,
1183
+ title={Democratizing neural machine translation with {OPUS-MT}},
1184
+ author={Tiedemann, J{\"o}rg and Aulamo, Mikko and Bakshandaeva, Daria and Boggia, Michele and Gr{\"o}nroos, Stig-Arne and Nieminen, Tommi and Raganato, Alessandro and Scherrer, Yves and Vazquez, Raul and Virpioja, Sami},
1185
+ journal={Language Resources and Evaluation},
1186
+ number={58},
1187
+ pages={713--755},
1188
+ year={2023},
1189
+ publisher={Springer Nature},
1190
+ issn={1574-0218},
1191
+ doi={10.1007/s10579-023-09704-w}
1192
+ }
1193
+
1194
+ @inproceedings{tiedemann-thottingal-2020-opus,
1195
+ title = "{OPUS}-{MT} {--} Building open translation services for the World",
1196
+ author = {Tiedemann, J{\"o}rg and Thottingal, Santhosh},
1197
+ booktitle = "Proceedings of the 22nd Annual Conference of the European Association for Machine Translation",
1198
+ month = nov,
1199
+ year = "2020",
1200
+ address = "Lisboa, Portugal",
1201
+ publisher = "European Association for Machine Translation",
1202
+ url = "https://aclanthology.org/2020.eamt-1.61",
1203
+ pages = "479--480",
1204
+ }
1205
+
1206
+ @inproceedings{tiedemann-2020-tatoeba,
1207
+ title = "The Tatoeba Translation Challenge {--} Realistic Data Sets for Low Resource and Multilingual {MT}",
1208
+ author = {Tiedemann, J{\"o}rg},
1209
+ booktitle = "Proceedings of the Fifth Conference on Machine Translation",
1210
+ month = nov,
1211
+ year = "2020",
1212
+ address = "Online",
1213
+ publisher = "Association for Computational Linguistics",
1214
+ url = "https://aclanthology.org/2020.wmt-1.139",
1215
+ pages = "1174--1182",
1216
+ }
1217
+ ```
1218
+
1219
+ ## Acknowledgements
1220
+
1221
+ The work is supported by the [HPLT project](https://hplt-project.org/), funded by the European Union’s Horizon Europe research and innovation programme under grant agreement No 101070350. We are also grateful for the generous computational resources and IT infrastructure provided by [CSC -- IT Center for Science](https://www.csc.fi/), Finland, and the [EuroHPC supercomputer LUMI](https://www.lumi-supercomputer.eu/).
1222
+
1223
+ ## Model conversion info
1224
+
1225
+ * transformers version: 4.45.1
1226
+ * OPUS-MT git hash: 0882077
1227
+ * port time: Tue Oct 8 09:01:19 EEST 2024
1228
+ * port machine: LM0-400-22516.local
benchmark_results.txt ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ multi-multi tatoeba-test-v2020-07-28-v2023-09-26 0.58505 32.8 10000 63462
2
+ deu-est flores101-devtest 0.11132 0.0 1012 19788
3
+ deu-fin flores101-devtest 0.11744 0.0 1012 18781
4
+ deu-hun flores101-devtest 0.10072 0.0 1012 22183
5
+ eng-est flores101-devtest 0.13248 0.0 1012 19788
6
+ eng-fin flores101-devtest 0.57265 21.9 1012 18781
7
+ fra-hun flores101-devtest 0.52691 21.2 1012 22183
8
+ por-fin flores101-devtest 0.53772 18.6 1012 18781
9
+ por-hun flores101-devtest 0.53275 21.8 1012 22183
10
+ spa-est flores101-devtest 0.50142 15.2 1012 19788
11
+ spa-fin flores101-devtest 0.50401 13.7 1012 18781
12
+ deu-est flores200-devtest 0.55333 21.2 1012 19788
13
+ deu-fin flores200-devtest 0.54020 18.3 1012 18781
14
+ deu-hun flores200-devtest 0.53579 22.0 1012 22183
15
+ eng-est flores200-devtest 0.59496 26.1 1012 19788
16
+ eng-fin flores200-devtest 0.57811 23.1 1012 18781
17
+ eng-hun flores200-devtest 0.57670 26.7 1012 22183
18
+ fra-est flores200-devtest 0.54442 21.2 1012 19788
19
+ fra-fin flores200-devtest 0.53768 18.5 1012 18781
20
+ fra-hun flores200-devtest 0.52691 21.2 1012 22183
21
+ por-est flores200-devtest 0.48227 15.6 1012 19788
22
+ por-fin flores200-devtest 0.53772 18.6 1012 18781
23
+ por-hun flores200-devtest 0.53275 21.8 1012 22183
24
+ spa-est flores200-devtest 0.50142 15.2 1012 19788
25
+ spa-fin flores200-devtest 0.50401 13.7 1012 18781
26
+ spa-hun flores200-devtest 0.49444 16.4 1012 22183
27
+ deu-hun newssyscomb2009 0.49607 18.1 502 9733
28
+ eng-hun newssyscomb2009 0.50580 18.3 502 9733
29
+ fra-hun newssyscomb2009 0.49415 17.8 502 9733
30
+ spa-hun newssyscomb2009 0.48559 16.9 502 9733
31
+ deu-hun newstest2008 0.48855 17.2 2051 41875
32
+ eng-hun newstest2008 0.47636 15.9 2051 41875
33
+ fra-hun newstest2008 0.48598 17.7 2051 41875
34
+ spa-hun newstest2008 0.47888 17.1 2051 41875
35
+ deu-hun newstest2009 0.48692 18.1 2525 54965
36
+ eng-hun newstest2009 0.49507 18.4 2525 54965
37
+ fra-hun newstest2009 0.48961 18.6 2525 54965
38
+ spa-hun newstest2009 0.48496 18.1 2525 54965
39
+ eng-fin newstest2015 0.56896 22.8 1370 19735
40
+ eng-fin newstest2016 0.57934 24.3 3000 47678
41
+ eng-fin newstest2017 0.60204 26.5 3002 45269
42
+ eng-est newstest2018 0.56276 23.8 2000 36269
43
+ eng-fin newstest2018 0.52953 17.4 3000 44836
44
+ eng-fin newstest2019 0.55882 24.2 1997 38369
45
+ eng-fin newstestALL2016 0.57934 24.3 3000 47678
46
+ eng-fin newstestALL2017 0.60204 26.5 3002 45269
47
+ eng-fin newstestB2016 0.54388 19.9 3000 45766
48
+ eng-fin newstestB2017 0.56369 22.6 3002 45506
49
+ deu-est ntrex128 0.51761 18.6 1997 38420
50
+ deu-fin ntrex128 0.50759 15.5 1997 35701
51
+ deu-hun ntrex128 0.46171 15.6 1997 44462
52
+ eng-est ntrex128 0.57099 24.4 1997 38420
53
+ eng-fin ntrex128 0.53413 18.5 1997 35701
54
+ eng-hun ntrex128 0.47342 16.6 1997 44462
55
+ fra-est ntrex128 0.50712 17.7 1997 38420
56
+ fra-fin ntrex128 0.49215 14.2 1997 35701
57
+ fra-hun ntrex128 0.44873 14.9 1997 44462
58
+ por-est ntrex128 0.48098 15.1 1997 38420
59
+ por-fin ntrex128 0.50875 15.0 1997 35701
60
+ por-hun ntrex128 0.45817 15.5 1997 44462
61
+ spa-est ntrex128 0.52158 18.5 1997 38420
62
+ spa-fin ntrex128 0.50947 15.2 1997 35701
63
+ spa-hun ntrex128 0.46051 16.1 1997 44462
64
+ deu-est tatoeba-test-v2020-07-28 0.75521 56.8 217 1222
65
+ eng-fin tatoeba-test-v2020-07-28 0.62409 37.7 10000 60517
66
+ fra-hun tatoeba-test-v2020-07-28 0.63130 40.7 2500 13775
67
+ spa-fin tatoeba-test-v2020-07-28 0.66142 43.3 2500 14057
68
+ deu-est tatoeba-test-v2021-03-30 0.75277 56.0 222 1250
69
+ deu-fin tatoeba-test-v2021-03-30 0.64132 40.6 4984 28220
70
+ deu-hun tatoeba-test-v2021-03-30 0.57297 31.5 12232 84799
71
+ eng-fin tatoeba-test-v2021-03-30 0.62443 37.7 10186 61736
72
+ fra-fin tatoeba-test-v2021-03-30 0.65711 45.0 1930 9764
73
+ fra-hun tatoeba-test-v2021-03-30 0.63130 40.7 2500 13775
74
+ por-fin tatoeba-test-v2021-03-30 0.76823 58.2 482 2399
75
+ spa-fin tatoeba-test-v2021-03-30 0.66187 43.3 4999 28123
76
+ deu-est tatoeba-test-v2021-08-07 0.76586 57.8 244 1413
77
+ deu-fin tatoeba-test-v2021-08-07 0.64286 40.7 2647 15024
78
+ deu-hun tatoeba-test-v2021-08-07 0.57007 31.2 15342 105152
79
+ eng-est tatoeba-test-v2021-08-07 0.69134 50.6 1359 7992
80
+ eng-fin tatoeba-test-v2021-08-07 0.62482 37.6 10690 65122
81
+ eng-hun tatoeba-test-v2021-08-07 0.59750 35.9 13037 79562
82
+ fra-fin tatoeba-test-v2021-08-07 0.65723 45.0 1920 9730
83
+ fra-hun tatoeba-test-v2021-08-07 0.63096 40.6 2494 13753
84
+ por-fin tatoeba-test-v2021-08-07 0.76811 58.1 477 2379
85
+ por-hun tatoeba-test-v2021-08-07 0.64930 42.5 2500 14063
86
+ spa-fin tatoeba-test-v2021-08-07 0.66220 43.4 2513 14131
87
+ spa-hun tatoeba-test-v2021-08-07 0.63596 42.0 2500 14599
benchmark_translations.zip ADDED
File without changes
config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "pytorch-models/opus-mt-tc-bible-big-deu_eng_fra_por_spa-fiu",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "MarianMTModel"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "bos_token_id": 0,
10
+ "classifier_dropout": 0.0,
11
+ "d_model": 1024,
12
+ "decoder_attention_heads": 16,
13
+ "decoder_ffn_dim": 4096,
14
+ "decoder_layerdrop": 0.0,
15
+ "decoder_layers": 6,
16
+ "decoder_start_token_id": 59391,
17
+ "decoder_vocab_size": 59392,
18
+ "dropout": 0.1,
19
+ "encoder_attention_heads": 16,
20
+ "encoder_ffn_dim": 4096,
21
+ "encoder_layerdrop": 0.0,
22
+ "encoder_layers": 6,
23
+ "eos_token_id": 628,
24
+ "forced_eos_token_id": null,
25
+ "init_std": 0.02,
26
+ "is_encoder_decoder": true,
27
+ "max_length": null,
28
+ "max_position_embeddings": 1024,
29
+ "model_type": "marian",
30
+ "normalize_embedding": false,
31
+ "num_beams": null,
32
+ "num_hidden_layers": 6,
33
+ "pad_token_id": 59391,
34
+ "scale_embedding": true,
35
+ "share_encoder_decoder_embeddings": true,
36
+ "static_position_embeddings": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.45.1",
39
+ "use_cache": true,
40
+ "vocab_size": 59392
41
+ }
generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bad_words_ids": [
4
+ [
5
+ 59391
6
+ ]
7
+ ],
8
+ "bos_token_id": 0,
9
+ "decoder_start_token_id": 59391,
10
+ "eos_token_id": 628,
11
+ "forced_eos_token_id": 628,
12
+ "max_length": 512,
13
+ "num_beams": 4,
14
+ "pad_token_id": 59391,
15
+ "transformers_version": "4.45.1"
16
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:650b2151352ad0728a322cebe6f03c5d0d93306422cb333d33b8fb5fa5c6a7de
3
+ size 948966320
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83798a0b5320da142950f7dd5e0a4b58092b6442cbf84c0494a8ba5f1109eb3d
3
+ size 949017541
source.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec5d912c2162c1698df77c5723bc53ae508ba625ac69cbb860137777c289d90b
3
+ size 811276
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
target.spm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3abcb324cff53a74ee5b84dd83f32cdee7c4bf1a87fb0dbc238d13f3de97186a
3
+ size 821785
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"source_lang": "deu+eng+fra+por+spa", "target_lang": "fiu", "unk_token": "<unk>", "eos_token": "</s>", "pad_token": "<pad>", "model_max_length": 512, "sp_model_kwargs": {}, "separate_vocabs": false, "special_tokens_map_file": null, "name_or_path": "marian-models/opusTCv20230926max50+bt+jhubc_transformer-big_2024-05-30/deu+eng+fra+por+spa-fiu", "tokenizer_class": "MarianTokenizer"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff