Emanuela Boros commited on
Commit
8d26403
1 Parent(s): 90f2fe8

added pipeline

Browse files
Files changed (1) hide show
  1. generic_nel.py +3 -30
generic_nel.py CHANGED
@@ -1,16 +1,9 @@
1
  from transformers import Pipeline
2
- import numpy as np
3
- import torch
4
  import nltk
5
 
6
  nltk.download("averaged_perceptron_tagger")
7
  nltk.download("averaged_perceptron_tagger_eng")
8
- from nltk.chunk import conlltags2tree
9
- from nltk import pos_tag
10
- from nltk.tree import Tree
11
  import requests
12
- import torch.nn.functional as F
13
- import re, string
14
 
15
 
16
  def get_wikipedia_page_props(input_str: str):
@@ -104,7 +97,7 @@ class NelPipeline(Pipeline):
104
  wikipedia_predictons = self.tokenizer.batch_decode(
105
  outputs, skip_special_tokens=True
106
  )
107
- print(f"Decoded: {wikipedia_predictons}")
108
 
109
  return wikipedia_predictons
110
 
@@ -118,32 +111,12 @@ class NelPipeline(Pipeline):
118
  :param kwargs:
119
  :return:
120
  """
121
- # outputs
122
- #
123
- # predictions = {}
124
- # confidence_scores = {}
125
- # for task, logits in tokens_result.logits.items():
126
- # predictions[task] = torch.argmax(logits, dim=-1).tolist()[0]
127
- # confidence_scores[task] = F.softmax(logits, dim=-1).tolist()[0]
128
- #
129
- # entities = {}
130
- # for task in predictions.keys():
131
- # words_list, preds_list, confidence_list = realign(
132
- # text_sentence,
133
- # predictions[task],
134
- # confidence_scores[task],
135
- # self.tokenizer,
136
- # self.id2label[task],
137
- # )
138
- #
139
- # entities[task] = get_entities(words_list, preds_list, confidence_list, text)
140
- #
141
- # postprocessed_entities = self.postprocess_entities(entities, text_sentence)
142
  results = []
143
  for wikipedia_name in outputs:
144
  # Get QID
145
  qid = get_wikipedia_page_props(wikipedia_name)
146
- print(f"{wikipedia_name} -- QID: {qid}")
147
 
148
  # Get Wikipedia title and URL
149
  title, url = get_wikipedia_title(qid)
 
1
  from transformers import Pipeline
 
 
2
  import nltk
3
 
4
  nltk.download("averaged_perceptron_tagger")
5
  nltk.download("averaged_perceptron_tagger_eng")
 
 
 
6
  import requests
 
 
7
 
8
 
9
  def get_wikipedia_page_props(input_str: str):
 
97
  wikipedia_predictons = self.tokenizer.batch_decode(
98
  outputs, skip_special_tokens=True
99
  )
100
+ # print(f"Decoded: {wikipedia_predictons}")
101
 
102
  return wikipedia_predictons
103
 
 
111
  :param kwargs:
112
  :return:
113
  """
114
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  results = []
116
  for wikipedia_name in outputs:
117
  # Get QID
118
  qid = get_wikipedia_page_props(wikipedia_name)
119
+ # print(f"{wikipedia_name} -- QID: {qid}")
120
 
121
  # Get Wikipedia title and URL
122
  title, url = get_wikipedia_title(qid)