Emanuela Boros
commited on
Commit
•
8d26403
1
Parent(s):
90f2fe8
added pipeline
Browse files- generic_nel.py +3 -30
generic_nel.py
CHANGED
@@ -1,16 +1,9 @@
|
|
1 |
from transformers import Pipeline
|
2 |
-
import numpy as np
|
3 |
-
import torch
|
4 |
import nltk
|
5 |
|
6 |
nltk.download("averaged_perceptron_tagger")
|
7 |
nltk.download("averaged_perceptron_tagger_eng")
|
8 |
-
from nltk.chunk import conlltags2tree
|
9 |
-
from nltk import pos_tag
|
10 |
-
from nltk.tree import Tree
|
11 |
import requests
|
12 |
-
import torch.nn.functional as F
|
13 |
-
import re, string
|
14 |
|
15 |
|
16 |
def get_wikipedia_page_props(input_str: str):
|
@@ -104,7 +97,7 @@ class NelPipeline(Pipeline):
|
|
104 |
wikipedia_predictons = self.tokenizer.batch_decode(
|
105 |
outputs, skip_special_tokens=True
|
106 |
)
|
107 |
-
print(f"Decoded: {wikipedia_predictons}")
|
108 |
|
109 |
return wikipedia_predictons
|
110 |
|
@@ -118,32 +111,12 @@ class NelPipeline(Pipeline):
|
|
118 |
:param kwargs:
|
119 |
:return:
|
120 |
"""
|
121 |
-
|
122 |
-
#
|
123 |
-
# predictions = {}
|
124 |
-
# confidence_scores = {}
|
125 |
-
# for task, logits in tokens_result.logits.items():
|
126 |
-
# predictions[task] = torch.argmax(logits, dim=-1).tolist()[0]
|
127 |
-
# confidence_scores[task] = F.softmax(logits, dim=-1).tolist()[0]
|
128 |
-
#
|
129 |
-
# entities = {}
|
130 |
-
# for task in predictions.keys():
|
131 |
-
# words_list, preds_list, confidence_list = realign(
|
132 |
-
# text_sentence,
|
133 |
-
# predictions[task],
|
134 |
-
# confidence_scores[task],
|
135 |
-
# self.tokenizer,
|
136 |
-
# self.id2label[task],
|
137 |
-
# )
|
138 |
-
#
|
139 |
-
# entities[task] = get_entities(words_list, preds_list, confidence_list, text)
|
140 |
-
#
|
141 |
-
# postprocessed_entities = self.postprocess_entities(entities, text_sentence)
|
142 |
results = []
|
143 |
for wikipedia_name in outputs:
|
144 |
# Get QID
|
145 |
qid = get_wikipedia_page_props(wikipedia_name)
|
146 |
-
print(f"{wikipedia_name} -- QID: {qid}")
|
147 |
|
148 |
# Get Wikipedia title and URL
|
149 |
title, url = get_wikipedia_title(qid)
|
|
|
1 |
from transformers import Pipeline
|
|
|
|
|
2 |
import nltk
|
3 |
|
4 |
nltk.download("averaged_perceptron_tagger")
|
5 |
nltk.download("averaged_perceptron_tagger_eng")
|
|
|
|
|
|
|
6 |
import requests
|
|
|
|
|
7 |
|
8 |
|
9 |
def get_wikipedia_page_props(input_str: str):
|
|
|
97 |
wikipedia_predictons = self.tokenizer.batch_decode(
|
98 |
outputs, skip_special_tokens=True
|
99 |
)
|
100 |
+
# print(f"Decoded: {wikipedia_predictons}")
|
101 |
|
102 |
return wikipedia_predictons
|
103 |
|
|
|
111 |
:param kwargs:
|
112 |
:return:
|
113 |
"""
|
114 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
results = []
|
116 |
for wikipedia_name in outputs:
|
117 |
# Get QID
|
118 |
qid = get_wikipedia_page_props(wikipedia_name)
|
119 |
+
# print(f"{wikipedia_name} -- QID: {qid}")
|
120 |
|
121 |
# Get Wikipedia title and URL
|
122 |
title, url = get_wikipedia_title(qid)
|