emanuelaboros commited on
Commit
964213b
1 Parent(s): 8d26403

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +15 -12
README.md CHANGED
@@ -174,27 +174,30 @@ Example with simulated OCR noise:
174
  ```python
175
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
176
 
177
- tokenizer = AutoTokenizer.from_pretrained("impresso-project/nel-hipe-multilingual")
178
- model = AutoModelForSeq2SeqLM.from_pretrained("impresso-project/nel-hipe-multilingual").eval()
 
 
 
179
 
180
  sentences = ["[START] Un1ted Press [END] - On the h0me fr0nt, the British p0pulace remains steadfast in the f4ce of 0ngoing air raids.",
181
  "In [START] Lon6on [END], trotz d3r Zerstörung, ist der Geist der M3nschen ungeb4ochen, mit Freiwilligen und zivilen Verteidigungseinheiten, die unermüdlich arbeiten, um die Kriegsanstrengungen zu unterstützen.",
182
  "Les rapports des correspondants de la [START] AFP [END] mettent en lumiére la poussée nationale pour augmenter la production dans les usines, essentielle pour fournir au front les matériaux nécessaires à la victoire."]
183
 
 
 
 
 
184
  for sentence in sentences:
185
- outputs = model.generate(
186
- **tokenizer([sentence], return_tensors="pt"),
187
- num_beams=5,
188
- num_return_sequences=5
189
- )
190
-
191
- print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
192
  ```
193
 
194
  ```
195
- ['United Press International >> en ', 'Un1ted Press >> en ', 'Joseph Bradley Varnum >> en ', 'The Press >> en ', 'The Unused Press >> en ']
196
- ['London >> de ', 'Longbourne >> de ', 'Longbon >> de ', 'Longston >> de ', 'Lyon >> de ']
197
- ['Agence France-Presse >> fr ', 'Agence France-Presse >> fr ', 'Agence France-Presse de la Presse écrite >> fr ', 'Agence France-Presse de la porte de Vincennes >> fr ', 'Agence France-Presse de la porte océanique >> fr ']
198
  ```
199
 
200
  ---
 
174
  ```python
175
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
176
 
177
+ NEL_MODEL_NAME = "impresso-project/nel-mgenre-multilingual"
178
+
179
+ # Load the tokenizer and model from the specified pre-trained model name
180
+ # The model used here is "https://huggingface.co/impresso-project/nel-mgenre-multilingual"
181
+ nel_tokenizer = AutoTokenizer.from_pretrained("impresso-project/nel-mgenre-multilingual")
182
 
183
  sentences = ["[START] Un1ted Press [END] - On the h0me fr0nt, the British p0pulace remains steadfast in the f4ce of 0ngoing air raids.",
184
  "In [START] Lon6on [END], trotz d3r Zerstörung, ist der Geist der M3nschen ungeb4ochen, mit Freiwilligen und zivilen Verteidigungseinheiten, die unermüdlich arbeiten, um die Kriegsanstrengungen zu unterstützen.",
185
  "Les rapports des correspondants de la [START] AFP [END] mettent en lumiére la poussée nationale pour augmenter la production dans les usines, essentielle pour fournir au front les matériaux nécessaires à la victoire."]
186
 
187
+ nel_pipeline = pipeline("generic-nel", model=NEL_MODEL_NAME,
188
+ tokenizer=nel_tokenizer,
189
+ trust_remote_code=True,
190
+ device='cpu')
191
  for sentence in sentences:
192
+ print(sentence)
193
+ linked_entity = nel_pipeline(sentence)
194
+ print(linked_entity)
 
 
 
 
195
  ```
196
 
197
  ```
198
+ [{'title': 'United Press International', 'qid': 'Q493845', 'url': 'https://en.wikipedia.org/wiki/United_Press_International'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}, {'title': 'Joseph Bradley Varnum', 'qid': 'Q1706673', 'url': 'https://en.wikipedia.org/wiki/Joseph_Bradley_Varnum'}, {'title': 'The Press', 'qid': 'Q2413590', 'url': 'https://en.wikipedia.org/wiki/The_Press'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}]
199
+ [{'title': 'London', 'qid': 'Q84', 'url': 'https://en.wikipedia.org/wiki/London'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}, {'title': 'Lyon', 'qid': 'Q456', 'url': 'https://en.wikipedia.org/wiki/Lyon'}]
200
+ [{'title': 'Agence France-Presse', 'qid': 'Q40464', 'url': 'https://en.wikipedia.org/wiki/Agence_France-Presse'}, {'title': 'Agence France-Presse', 'qid': 'Q40464', 'url': 'https://en.wikipedia.org/wiki/Agence_France-Presse'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}]
201
  ```
202
 
203
  ---