PereLluis13 commited on
Commit
2ee9ca3
1 Parent(s): 37f3e35

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -1
README.md CHANGED
@@ -146,7 +146,11 @@ def extract_triplets_typed(text):
146
  return triplets
147
 
148
  # Load model and tokenizer
149
- tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-large", src_lang="en_XX", tgt_lang="tp_XX") # Here we set English as source language. To change the source language just change it here or swap the first token of the input for your desired language
 
 
 
 
150
  model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/mrebel-large")
151
  gen_kwargs = {
152
  "max_length": 256,
 
146
  return triplets
147
 
148
  # Load model and tokenizer
149
+ tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-large", src_lang="en_XX", tgt_lang="tp_XX")
150
+ # Here we set English ("en_XX") as source language. To change the source language swap the first token of the input for your desired language or change to supported language. For catalan ("ca_XX") or greek ("el_EL") (not included in mBART pretraining) you need a workaround:
151
+ # tokenizer._src_lang = "ca_XX"
152
+ # tokenizer.cur_lang_code_id = tokenizer.convert_tokens_to_ids("ca_XX")
153
+ # tokenizer.set_src_lang_special_tokens("ca_XX")
154
  model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/mrebel-large")
155
  gen_kwargs = {
156
  "max_length": 256,