Update example with domain tags
Browse filesAdd information about domain tags
README.md
CHANGED
@@ -18,14 +18,16 @@ To force the target language id as the first generated token, pass the `forced_b
|
|
18 |
*Note: `M2M100Tokenizer` depends on `sentencepiece`, so make sure to install it before running the example.*
|
19 |
|
20 |
To install `sentencepiece` run `pip install sentencepiece`
|
21 |
-
|
|
|
|
|
22 |
```python
|
23 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
24 |
|
25 |
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/wmt21-dense-24-wide-en-x")
|
26 |
tokenizer = AutoTokenizer.from_pretrained("facebook/wmt21-dense-24-wide-en-x")
|
27 |
|
28 |
-
inputs = tokenizer("One model for many languages.", return_tensors="pt")
|
29 |
|
30 |
# translate English to German
|
31 |
generated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.get_lang_id("de"))
|
|
|
18 |
*Note: `M2M100Tokenizer` depends on `sentencepiece`, so make sure to install it before running the example.*
|
19 |
|
20 |
To install `sentencepiece` run `pip install sentencepiece`
|
21 |
+
Note: Since the model was trained with domain tags, you should append them to the input as well.
|
22 |
+
"wmtdata newsdomain": Use for sentences in the news domain
|
23 |
+
"wmtdata otherdomain": Use for sentences in all other domain
|
24 |
```python
|
25 |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
26 |
|
27 |
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/wmt21-dense-24-wide-en-x")
|
28 |
tokenizer = AutoTokenizer.from_pretrained("facebook/wmt21-dense-24-wide-en-x")
|
29 |
|
30 |
+
inputs = tokenizer("wmtdata newsdomain One model for many languages.", return_tensors="pt")
|
31 |
|
32 |
# translate English to German
|
33 |
generated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.get_lang_id("de"))
|