Iker commited on
Commit
797bf18
1 Parent(s): 5c7e660
README.md CHANGED
@@ -13,20 +13,12 @@
13
  <br>
14
  </p>
15
 
16
- Easy-translate is a script for translating large text files in your machine using
17
- the [M2M100 models](https://arxiv.org/pdf/2010.11125.pdf) from Facebook/Meta AI.
18
- We also privide a [script](#evaluate-translations) for Easy-Evaluation of your translations 🥳
19
 
20
 
21
- M2M100 is a multilingual encoder-decoder (seq-to-seq) model trained for Many-to-Many multilingual translation.
22
- It was introduced in this [paper](https://arxiv.org/abs/2010.11125) and first released in [this](https://github.com/pytorch/fairseq/tree/master/examples/m2m_100) repository.
23
- The model that can directly translate between the 9,900 directions of 100 languages.
24
-
25
- Easy-Translate is built on top of 🤗HuggingFace's
26
- [Transformers](https://huggingface.co/docs/transformers/index) and
27
- 🤗HuggingFace's [Accelerate](https://huggingface.co/docs/accelerate/index) library.
28
- We support:
29
 
 
30
  * CPU / multi-CPU / GPU / multi-GPU / TPU acceleration
31
  * BF16 / FP16 / FP32 precision.
32
  * Automatic batch size finder: Forget CUDA OOM errors. Set an initial batch size, if it doesn't fit, we will automatically adjust it.
@@ -80,9 +72,7 @@ accelerate launch translate.py \
80
 
81
  #### Multi-GPU:
82
  See Accelerate documentation for more information (multi-node, TPU, Sharded model...): https://huggingface.co/docs/accelerate/index
83
- You can use the Accelerate CLI to configure the Accelerate environment (Run
84
- `accelerate config` in your terminal) instead of using the
85
- `--multi_gpu and --num_processes` flags.
86
 
87
  ```bash
88
  accelerate launch --multi_gpu --num_processes 2 --num_machines 1 translate.py \
@@ -94,9 +84,7 @@ accelerate launch --multi_gpu --num_processes 2 --num_machines 1 translate.py \
94
  ```
95
 
96
  #### Automatic batch size finder:
97
- We will automatically find a batch size that fits in your GPU memory.
98
- The default initial batch size is 128 (You can set it with the `--starting_batch_size 128` flag).
99
- If we find an Out Of Memory error, we will automatically decrease the batch size until we find a working one.
100
 
101
 
102
 
 
13
  <br>
14
  </p>
15
 
16
+ Easy-translate is a script for translating large text files in your machine using the [M2M100 models](https://arxiv.org/pdf/2010.11125.pdf) from Facebook/Meta AI. We also privide a [script](#evaluate-translations) for Easy-Evaluation of your translations 🥳
 
 
17
 
18
 
19
+ M2M100 is a multilingual encoder-decoder (seq-to-seq) model trained for Many-to-Many multilingual translation. It was introduced in this [paper](https://arxiv.org/abs/2010.11125) and first released in [this](https://github.com/pytorch/fairseq/tree/master/examples/m2m_100) repository. The model that can directly translate between the 9,900 directions of 100 languages.
 
 
 
 
 
 
 
20
 
21
+ Easy-Translate is built on top of 🤗HuggingFace's [Transformers](https://huggingface.co/docs/transformers/index) and 🤗HuggingFace's [Accelerate](https://huggingface.co/docs/accelerate/index) library. We support:
22
  * CPU / multi-CPU / GPU / multi-GPU / TPU acceleration
23
  * BF16 / FP16 / FP32 precision.
24
  * Automatic batch size finder: Forget CUDA OOM errors. Set an initial batch size, if it doesn't fit, we will automatically adjust it.
 
72
 
73
  #### Multi-GPU:
74
  See Accelerate documentation for more information (multi-node, TPU, Sharded model...): https://huggingface.co/docs/accelerate/index
75
+ You can use the Accelerate CLI to configure the Accelerate environment (Run `accelerate config` in your terminal) instead of using the `--multi_gpu and --num_processes` flags.
 
 
76
 
77
  ```bash
78
  accelerate launch --multi_gpu --num_processes 2 --num_machines 1 translate.py \
 
84
  ```
85
 
86
  #### Automatic batch size finder:
87
+ We will automatically find a batch size that fits in your GPU memory. The default initial batch size is 128 (You can set it with the `--starting_batch_size 128` flag). If we find an Out Of Memory error, we will automatically decrease the batch size until we find a working one.
 
 
88
 
89
 
90
 
sample_text/en2es.m2m100_1.2B.json CHANGED
@@ -1 +1,123 @@
1
- {"sacrebleu": {"score": 32.101150640281695, "counts": [19160, 11392, 7558, 5186], "totals": [31477, 30479, 29481, 28485], "precisions": [60.86984147155066, 37.37655434889596, 25.636850853091822, 18.20607337195015], "bp": 1.0, "sys_len": 31477, "ref_len": 30102}, "rouge": {"rouge1": [[0.5852396804366098, 0.6089057437338691, 0.5919486437026797], [0.5964621218261164, 0.6200342221830797, 0.6029705008756368], [0.6068321807422377, 0.6311106822798185, 0.61324805661008]], "rouge2": [[0.3710985389559613, 0.38708055355385995, 0.3761201217327784], [0.3844850790869714, 0.40017782122170353, 0.38920434271970195], [0.3968990790506025, 0.41382310483690327, 0.4022299418726329]], "rougeL": [[0.5351505034410595, 0.5564838960633809, 0.5410602618870524], [0.5457898501195475, 0.5677049056091881, 0.5519189480892548], [0.5575497491149766, 0.5787856637940312, 0.5630101422167583]], "rougeLsum": [[0.5352116089085267, 0.5570236521823667, 0.5415939934790461], [0.5463246235983789, 0.5676427704754348, 0.5522237812823654], [0.5581141358005033, 0.5796683147249665, 0.5630221371759908]]}, "bleu": {"bleu": 0.2842153038526809, "precisions": [0.5535070989616444, 0.33646946844340314, 0.22383069265549602, 0.15653135365661033], "brevity_penalty": 1.0, "length_ratio": 1.0469217970049918, "translation_length": 28314, "reference_length": 27045}, "meteor": {"meteor": 0.4880039569987408}, "ter": {"score": 59.500831946755405, "num_edits": 16092, "ref_length": 27045.0}, "bert_score": {"precision": 0.8192511852383614, "recall": 0.8262866012752056, "f1": 0.8223477345705033, "hashcode": "microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.11(hug_trans=4.18.0)_fast-tokenizer"}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sacrebleu": {
3
+ "score": 32.101150640281695,
4
+ "counts": [
5
+ 19160,
6
+ 11392,
7
+ 7558,
8
+ 5186
9
+ ],
10
+ "totals": [
11
+ 31477,
12
+ 30479,
13
+ 29481,
14
+ 28485
15
+ ],
16
+ "precisions": [
17
+ 60.86984147155066,
18
+ 37.37655434889596,
19
+ 25.636850853091822,
20
+ 18.20607337195015
21
+ ],
22
+ "bp": 1.0,
23
+ "sys_len": 31477,
24
+ "ref_len": 30102
25
+ },
26
+ "rouge": {
27
+ "rouge1": [
28
+ [
29
+ 0.5852396804366098,
30
+ 0.6089057437338691,
31
+ 0.5919486437026797
32
+ ],
33
+ [
34
+ 0.5964621218261164,
35
+ 0.6200342221830797,
36
+ 0.6029705008756368
37
+ ],
38
+ [
39
+ 0.6068321807422377,
40
+ 0.6311106822798185,
41
+ 0.61324805661008
42
+ ]
43
+ ],
44
+ "rouge2": [
45
+ [
46
+ 0.3710985389559613,
47
+ 0.38708055355385995,
48
+ 0.3761201217327784
49
+ ],
50
+ [
51
+ 0.3844850790869714,
52
+ 0.40017782122170353,
53
+ 0.38920434271970195
54
+ ],
55
+ [
56
+ 0.3968990790506025,
57
+ 0.41382310483690327,
58
+ 0.4022299418726329
59
+ ]
60
+ ],
61
+ "rougeL": [
62
+ [
63
+ 0.5351505034410595,
64
+ 0.5564838960633809,
65
+ 0.5410602618870524
66
+ ],
67
+ [
68
+ 0.5457898501195475,
69
+ 0.5677049056091881,
70
+ 0.5519189480892548
71
+ ],
72
+ [
73
+ 0.5575497491149766,
74
+ 0.5787856637940312,
75
+ 0.5630101422167583
76
+ ]
77
+ ],
78
+ "rougeLsum": [
79
+ [
80
+ 0.5352116089085267,
81
+ 0.5570236521823667,
82
+ 0.5415939934790461
83
+ ],
84
+ [
85
+ 0.5463246235983789,
86
+ 0.5676427704754348,
87
+ 0.5522237812823654
88
+ ],
89
+ [
90
+ 0.5581141358005033,
91
+ 0.5796683147249665,
92
+ 0.5630221371759908
93
+ ]
94
+ ]
95
+ },
96
+ "bleu": {
97
+ "bleu": 0.2842153038526809,
98
+ "precisions": [
99
+ 0.5535070989616444,
100
+ 0.33646946844340314,
101
+ 0.22383069265549602,
102
+ 0.15653135365661033
103
+ ],
104
+ "brevity_penalty": 1.0,
105
+ "length_ratio": 1.0469217970049918,
106
+ "translation_length": 28314,
107
+ "reference_length": 27045
108
+ },
109
+ "meteor": {
110
+ "meteor": 0.4880039569987408
111
+ },
112
+ "ter": {
113
+ "score": 59.500831946755405,
114
+ "num_edits": 16092,
115
+ "ref_length": 27045.0
116
+ },
117
+ "bert_score": {
118
+ "precision": 0.8192488248944283,
119
+ "recall": 0.8262857750356197,
120
+ "f1": 0.8223461411595344,
121
+ "hashcode": "microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.11(hug_trans=4.18.0)_fast-tokenizer"
122
+ }
123
+ }
sample_text/en2es.m2m100_418M.json CHANGED
@@ -1 +1,123 @@
1
- {"sacrebleu": {"score": 29.035496917461597, "counts": [18582, 10514, 6681, 4387], "totals": [31477, 30479, 29481, 28485], "precisions": [59.033580074339994, 34.49588241084025, 22.662053525999795, 15.401088292083553], "bp": 1.0, "sys_len": 31477, "ref_len": 30388}, "rouge": {"rouge1": [[0.5661701202298134, 0.5806961045770566, 0.5693885562082325], [0.5768745925790656, 0.5926959547911554, 0.5803693779677083], [0.5871085218904836, 0.6035331460243276, 0.5900979805085623]], "rouge2": [[0.34243414046469267, 0.35226400857606666, 0.34469210847048837], [0.3545484183384055, 0.36470783370743065, 0.3569058648048812], [0.36612813327517263, 0.37717476449671, 0.3689653665404565]], "rougeL": [[0.5129704896656746, 0.526995889564155, 0.5162056185006965], [0.523632841460358, 0.5375452284094455, 0.5267080806612512], [0.5350158816319085, 0.5480980981777757, 0.5372302857012781]], "rougeLsum": [[0.5126805856827783, 0.5265189554049317, 0.5155154093959223], [0.5239559133309495, 0.5380410013947112, 0.5271022617246641], [0.5351934954578494, 0.5491115103854219, 0.5381174565735956]]}, "bleu": {"bleu": 0.2546886610724999, "precisions": [0.5339761248852158, 0.30784155806120955, 0.19560013678331242, 0.1308640025272469], "brevity_penalty": 1.0, "length_ratio": 1.0353982300884956, "translation_length": 28314, "reference_length": 27346}, "meteor": {"meteor": 0.4630996837124251}, "ter": {"score": 61.848167922182405, "num_edits": 16913, "ref_length": 27346.0}, "bert_score": {"precision": 0.8128398380875588, "recall": 0.8185442119538784, "f1": 0.8153291321396827, "hashcode": "microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.11(hug_trans=4.18.0)_fast-tokenizer"}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sacrebleu": {
3
+ "score": 29.035496917461597,
4
+ "counts": [
5
+ 18582,
6
+ 10514,
7
+ 6681,
8
+ 4387
9
+ ],
10
+ "totals": [
11
+ 31477,
12
+ 30479,
13
+ 29481,
14
+ 28485
15
+ ],
16
+ "precisions": [
17
+ 59.033580074339994,
18
+ 34.49588241084025,
19
+ 22.662053525999795,
20
+ 15.401088292083553
21
+ ],
22
+ "bp": 1.0,
23
+ "sys_len": 31477,
24
+ "ref_len": 30388
25
+ },
26
+ "rouge": {
27
+ "rouge1": [
28
+ [
29
+ 0.5661701202298134,
30
+ 0.5806961045770566,
31
+ 0.5693885562082325
32
+ ],
33
+ [
34
+ 0.5768745925790656,
35
+ 0.5926959547911554,
36
+ 0.5803693779677083
37
+ ],
38
+ [
39
+ 0.5871085218904836,
40
+ 0.6035331460243276,
41
+ 0.5900979805085623
42
+ ]
43
+ ],
44
+ "rouge2": [
45
+ [
46
+ 0.34243414046469267,
47
+ 0.35226400857606666,
48
+ 0.34469210847048837
49
+ ],
50
+ [
51
+ 0.3545484183384055,
52
+ 0.36470783370743065,
53
+ 0.3569058648048812
54
+ ],
55
+ [
56
+ 0.36612813327517263,
57
+ 0.37717476449671,
58
+ 0.3689653665404565
59
+ ]
60
+ ],
61
+ "rougeL": [
62
+ [
63
+ 0.5129704896656746,
64
+ 0.526995889564155,
65
+ 0.5162056185006965
66
+ ],
67
+ [
68
+ 0.523632841460358,
69
+ 0.5375452284094455,
70
+ 0.5267080806612512
71
+ ],
72
+ [
73
+ 0.5350158816319085,
74
+ 0.5480980981777757,
75
+ 0.5372302857012781
76
+ ]
77
+ ],
78
+ "rougeLsum": [
79
+ [
80
+ 0.5126805856827783,
81
+ 0.5265189554049317,
82
+ 0.5155154093959223
83
+ ],
84
+ [
85
+ 0.5239559133309495,
86
+ 0.5380410013947112,
87
+ 0.5271022617246641
88
+ ],
89
+ [
90
+ 0.5351934954578494,
91
+ 0.5491115103854219,
92
+ 0.5381174565735956
93
+ ]
94
+ ]
95
+ },
96
+ "bleu": {
97
+ "bleu": 0.2546886610724999,
98
+ "precisions": [
99
+ 0.5339761248852158,
100
+ 0.30784155806120955,
101
+ 0.19560013678331242,
102
+ 0.1308640025272469
103
+ ],
104
+ "brevity_penalty": 1.0,
105
+ "length_ratio": 1.0353982300884956,
106
+ "translation_length": 28314,
107
+ "reference_length": 27346
108
+ },
109
+ "meteor": {
110
+ "meteor": 0.4630996837124251
111
+ },
112
+ "ter": {
113
+ "score": 61.848167922182405,
114
+ "num_edits": 16913,
115
+ "ref_length": 27346.0
116
+ },
117
+ "bert_score": {
118
+ "precision": 0.8128397642374039,
119
+ "recall": 0.8185485603511333,
120
+ "f1": 0.8153312988877296,
121
+ "hashcode": "microsoft/deberta-xlarge-mnli_L40_no-idf_version=0.3.11(hug_trans=4.18.0)_fast-tokenizer"
122
+ }
123
+ }