Emanuela Boros
commited on
Commit
·
c9fc95e
1
Parent(s):
c549c79
update handler
Browse files- generic_ner.py +1 -3
- test.py +46 -0
generic_ner.py
CHANGED
@@ -253,9 +253,7 @@ class MultitaskTokenClassificationPipeline(Pipeline):
|
|
253 |
field_name = entity["entity"].split(".")[
|
254 |
-1
|
255 |
] # Last part of the label as the field
|
256 |
-
|
257 |
-
parent_entity[field_name] = []
|
258 |
-
parent_entity[field_name].append(entity)
|
259 |
nested = True
|
260 |
break
|
261 |
|
|
|
253 |
field_name = entity["entity"].split(".")[
|
254 |
-1
|
255 |
] # Last part of the label as the field
|
256 |
+
parent_entity[field_name] = entity["word"]
|
|
|
|
|
257 |
nested = True
|
258 |
break
|
259 |
|
test.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import necessary modules from the transformers library
|
2 |
+
from transformers import pipeline
|
3 |
+
from transformers import AutoModelForTokenClassification, AutoTokenizer
|
4 |
+
|
5 |
+
# Define the model name to be used for token classification, we use the Impresso NER
|
6 |
+
# that can be found at "https://huggingface.co/impresso-project/ner-stacked-bert-multilingual"
|
7 |
+
MODEL_NAME = "impresso-project/ner-stacked-bert-multilingual"
|
8 |
+
|
9 |
+
# Load the tokenizer corresponding to the specified model name
|
10 |
+
ner_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
11 |
+
|
12 |
+
ner_pipeline = pipeline(
|
13 |
+
"generic-ner",
|
14 |
+
model=MODEL_NAME,
|
15 |
+
tokenizer=ner_tokenizer,
|
16 |
+
trust_remote_code=True,
|
17 |
+
device="cpu",
|
18 |
+
)
|
19 |
+
sentences = [
|
20 |
+
"""In the year 1789, King Louis XVI, ruler of France, convened the Estates-General at the Palace of Versailles,
|
21 |
+
where Marie Antoinette, the Queen of France, alongside Maximilien Robespierre, a leading member of the National Assembly,
|
22 |
+
debated with Jean-Jacques Rousseau, the famous philosopher, and Charles de Talleyrand, the Bishop of Autun,
|
23 |
+
regarding the future of the French monarchy. At the same time, across the Atlantic in Philadelphia,
|
24 |
+
George Washington, the first President of the United States, and Thomas Jefferson, the nation's Secretary of State,
|
25 |
+
were drafting policies for the newly established American government following the signing of the Constitution."""
|
26 |
+
]
|
27 |
+
|
28 |
+
print(sentences[0])
|
29 |
+
|
30 |
+
|
31 |
+
# Helper function to print entities one per row
|
32 |
+
def print_nicely(entities):
|
33 |
+
for entity in entities:
|
34 |
+
print(
|
35 |
+
f"Entity: {entity['entity']} | Confidence: {entity['score']:.2f}% | Text: {entity['word'].strip()} | Start: {entity['start']} | End: {entity['end']}"
|
36 |
+
)
|
37 |
+
|
38 |
+
|
39 |
+
# Visualize stacked entities for each sentence
|
40 |
+
for sentence in sentences:
|
41 |
+
results = ner_pipeline(sentence)
|
42 |
+
|
43 |
+
# Extract coarse and fine entities
|
44 |
+
for key in results.keys():
|
45 |
+
# Visualize the coarse entities
|
46 |
+
print_nicely(results[key])
|