PereLluis13 commited on
Commit
ca7d649
1 Parent(s): eda7f18

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -10
README.md CHANGED
@@ -21,11 +21,11 @@ language:
21
  widget:
22
  - text: >-
23
  The Red Hot Chili Peppers were formed in Los Angeles by Kiedis, Flea, guitarist Hillel Slovak and drummer Jack Irons.
24
- example_title: English
25
  inference:
26
  parameters:
27
- src_lang: __en__
28
- tgt_lang: <triplet>
29
  tags:
30
  - seq2seq
31
  - relation-extraction
@@ -61,7 +61,7 @@ from transformers import pipeline
61
 
62
  triplet_extractor = pipeline('translation_xx_to_yy', model='Babelscape/mrebel-base', tokenizer='Babelscape/mrebel-base')
63
  # We need to use the tokenizer manually since we need special tokens.
64
- extracted_text = triplet_extractor.tokenizer.batch_decode([triplet_extractor("The Red Hot Chili Peppers were formed in Los Angeles by Kiedis, Flea, guitarist Hillel Slovak and drummer Jack Irons.", decoder_start_token_id=tokenizer.convert_tokens_to_ids("tp_XX"), src_lang="__en__", tgt_lang="<triplet>", return_tensors=True, return_text=False)[0]["translation_token_ids"]]) # change __en__ for the language of the source.
65
  print(extracted_text[0])
66
  # Function to parse the generated text and extract the triplets
67
  def extract_triplets_typed(text):
@@ -145,11 +145,8 @@ def extract_triplets_typed(text):
145
  return triplets
146
 
147
  # Load model and tokenizer
148
- tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-base", src_lang="en_XX", tgt_lang="tp_XX")
149
- # Here we set English ("en_XX") as source language. To change the source language swap the first token of the input for your desired language or change to supported language. For catalan ("ca_XX") or greek ("el_EL") (not included in mBART pretraining) you need a workaround:
150
- # tokenizer._src_lang = "ca_XX"
151
- # tokenizer.cur_lang_code_id = tokenizer.convert_tokens_to_ids("ca_XX")
152
- # tokenizer.set_src_lang_special_tokens("ca_XX")
153
  model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/mrebel-base")
154
  gen_kwargs = {
155
  "max_length": 256,
@@ -169,7 +166,6 @@ model_inputs = tokenizer(text, max_length=256, padding=True, truncation=True, re
169
  generated_tokens = model.generate(
170
  model_inputs["input_ids"].to(model.device),
171
  attention_mask=model_inputs["attention_mask"].to(model.device),
172
- decoder_start_token_id = tokenizer.convert_tokens_to_ids("tp_XX"),
173
  **gen_kwargs,
174
  )
175
 
 
21
  widget:
22
  - text: >-
23
  The Red Hot Chili Peppers were formed in Los Angeles by Kiedis, Flea, guitarist Hillel Slovak and drummer Jack Irons.
24
+ example_title: English
25
  inference:
26
  parameters:
27
+ src_lang: en
28
+ tgt_lang: en
29
  tags:
30
  - seq2seq
31
  - relation-extraction
 
61
 
62
  triplet_extractor = pipeline('translation_xx_to_yy', model='Babelscape/mrebel-base', tokenizer='Babelscape/mrebel-base')
63
  # We need to use the tokenizer manually since we need special tokens.
64
+ extracted_text = triplet_extractor.tokenizer.batch_decode([triplet_extractor("The Red Hot Chili Peppers were formed in Los Angeles by Kiedis, Flea, guitarist Hillel Slovak and drummer Jack Irons.", src_lang="en", return_tensors=True, return_text=False)[0]["translation_token_ids"]]) # change __en__ for the language of the source.
65
  print(extracted_text[0])
66
  # Function to parse the generated text and extract the triplets
67
  def extract_triplets_typed(text):
 
145
  return triplets
146
 
147
  # Load model and tokenizer
148
+ tokenizer = AutoTokenizer.from_pretrained("Babelscape/mrebel-base", src_lang="en", tgt_lang="en")
149
+ # Here we set English ("en") as source language. To change the source language swap the first token of the input for your desired language or change to supported language.
 
 
 
150
  model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/mrebel-base")
151
  gen_kwargs = {
152
  "max_length": 256,
 
166
  generated_tokens = model.generate(
167
  model_inputs["input_ids"].to(model.device),
168
  attention_mask=model_inputs["attention_mask"].to(model.device),
 
169
  **gen_kwargs,
170
  )
171