emanuelaboros
commited on
Commit
•
964213b
1
Parent(s):
8d26403
Update README.md
Browse files
README.md
CHANGED
@@ -174,27 +174,30 @@ Example with simulated OCR noise:
|
|
174 |
```python
|
175 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
176 |
|
177 |
-
|
178 |
-
|
|
|
|
|
|
|
179 |
|
180 |
sentences = ["[START] Un1ted Press [END] - On the h0me fr0nt, the British p0pulace remains steadfast in the f4ce of 0ngoing air raids.",
|
181 |
"In [START] Lon6on [END], trotz d3r Zerstörung, ist der Geist der M3nschen ungeb4ochen, mit Freiwilligen und zivilen Verteidigungseinheiten, die unermüdlich arbeiten, um die Kriegsanstrengungen zu unterstützen.",
|
182 |
"Les rapports des correspondants de la [START] AFP [END] mettent en lumiére la poussée nationale pour augmenter la production dans les usines, essentielle pour fournir au front les matériaux nécessaires à la victoire."]
|
183 |
|
|
|
|
|
|
|
|
|
184 |
for sentence in sentences:
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
num_return_sequences=5
|
189 |
-
)
|
190 |
-
|
191 |
-
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))
|
192 |
```
|
193 |
|
194 |
```
|
195 |
-
['United Press International
|
196 |
-
['London
|
197 |
-
['Agence France-Presse
|
198 |
```
|
199 |
|
200 |
---
|
|
|
174 |
```python
|
175 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
176 |
|
177 |
+
NEL_MODEL_NAME = "impresso-project/nel-mgenre-multilingual"
|
178 |
+
|
179 |
+
# Load the tokenizer and model from the specified pre-trained model name
|
180 |
+
# The model used here is "https://huggingface.co/impresso-project/nel-mgenre-multilingual"
|
181 |
+
nel_tokenizer = AutoTokenizer.from_pretrained("impresso-project/nel-mgenre-multilingual")
|
182 |
|
183 |
sentences = ["[START] Un1ted Press [END] - On the h0me fr0nt, the British p0pulace remains steadfast in the f4ce of 0ngoing air raids.",
|
184 |
"In [START] Lon6on [END], trotz d3r Zerstörung, ist der Geist der M3nschen ungeb4ochen, mit Freiwilligen und zivilen Verteidigungseinheiten, die unermüdlich arbeiten, um die Kriegsanstrengungen zu unterstützen.",
|
185 |
"Les rapports des correspondants de la [START] AFP [END] mettent en lumiére la poussée nationale pour augmenter la production dans les usines, essentielle pour fournir au front les matériaux nécessaires à la victoire."]
|
186 |
|
187 |
+
nel_pipeline = pipeline("generic-nel", model=NEL_MODEL_NAME,
|
188 |
+
tokenizer=nel_tokenizer,
|
189 |
+
trust_remote_code=True,
|
190 |
+
device='cpu')
|
191 |
for sentence in sentences:
|
192 |
+
print(sentence)
|
193 |
+
linked_entity = nel_pipeline(sentence)
|
194 |
+
print(linked_entity)
|
|
|
|
|
|
|
|
|
195 |
```
|
196 |
|
197 |
```
|
198 |
+
[{'title': 'United Press International', 'qid': 'Q493845', 'url': 'https://en.wikipedia.org/wiki/United_Press_International'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}, {'title': 'Joseph Bradley Varnum', 'qid': 'Q1706673', 'url': 'https://en.wikipedia.org/wiki/Joseph_Bradley_Varnum'}, {'title': 'The Press', 'qid': 'Q2413590', 'url': 'https://en.wikipedia.org/wiki/The_Press'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}]
|
199 |
+
[{'title': 'London', 'qid': 'Q84', 'url': 'https://en.wikipedia.org/wiki/London'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}, {'title': 'Lyon', 'qid': 'Q456', 'url': 'https://en.wikipedia.org/wiki/Lyon'}]
|
200 |
+
[{'title': 'Agence France-Presse', 'qid': 'Q40464', 'url': 'https://en.wikipedia.org/wiki/Agence_France-Presse'}, {'title': 'Agence France-Presse', 'qid': 'Q40464', 'url': 'https://en.wikipedia.org/wiki/Agence_France-Presse'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}, {'title': 'NIL', 'qid': 'NIL', 'url': 'None'}]
|
201 |
```
|
202 |
|
203 |
---
|