Sravan1214 commited on
Commit
82ea83d
1 Parent(s): 91d58be

checking the code

Browse files
Files changed (1) hide show
  1. app.py +6 -20
app.py CHANGED
@@ -10,11 +10,8 @@ from transformers import (DebertaTokenizerFast,
10
  import tensorflow as tf
11
  import spacy
12
  import streamlit as st
13
- from scraper import scrape_text
14
 
15
 
16
- os.environ['TF_USE_LEGACY_KERAS'] = "1"
17
-
18
  class NERLabelEncoder:
19
  '''
20
  Label Encoder to encode and decode the entity labels
@@ -75,7 +72,6 @@ def load_ner_models():
75
 
76
  ner_model, ner_label_encoder, ner_tokenizer, nlp = load_ner_models()
77
 
78
-
79
  ############ NER MODEL & VARS INITIALIZATION END ####################
80
 
81
  ############ NER LOGIC START ####################
@@ -151,9 +147,10 @@ def ner_inference_long_text(txt):
151
  entities = []
152
  doc = nlp(txt)
153
  for sent in doc.sents:
154
- entities.extends(ner_inference(sent.text))
155
  return entities
156
 
 
157
  def get_ner_text(article_txt, ner_result):
158
  res_txt = ''
159
  start = 0
@@ -177,7 +174,6 @@ def get_ner_text(article_txt, ner_result):
177
 
178
  ############ NER LOGIC END ####################
179
 
180
-
181
  ############ SUMMARIZATION MODEL & VARS INITIALIZATION START ####################
182
  SUMM_CHECKPOINT = "facebook/bart-base"
183
  SUMM_INPUT_N_TOKENS = 400
@@ -213,23 +209,13 @@ def summ_inference_tokenize(input_: list, n_tokens: int):
213
  tokenized_data = summ_tokenizer(text=input_, max_length=SUMM_TARGET_N_TOKENS, truncation=True, padding="max_length", return_tensors="tf")
214
  return summ_tokenizer, tokenized_data
215
 
216
- def clean_summary(summary: str):
217
- summary = summary.strip()
218
- if summary[-1] != '.':
219
- sents = summary.split(". ")
220
- summary = ". ".join(sents[:-1])
221
- summary += "."
222
- summary = re.sub(r'^-', "", summary)
223
- summary = summary.strip()
224
- if len(summary) <= 5:
225
- summary = ""
226
- return summary
227
-
228
  def summ_inference(txt: str):
229
  txt = summ_preprocess(txt)
230
- inference_tokenizer, tokenized_data = summ_inference_tokenize(input_=[txt], n_tokens=SUMM_INPUT_N_TOKENS)
 
231
  pred = summ_model.generate(**tokenized_data, max_new_tokens=SUMM_TARGET_N_TOKENS)
232
- result = "" if txt=="" else clean_summary(inference_tokenizer.decode(pred[0], skip_special_tokens=True))
 
233
  return result
234
  ############ SUMMARIZATION MODEL & VARS INITIALIZATION END ####################
235
 
 
10
  import tensorflow as tf
11
  import spacy
12
  import streamlit as st
 
13
 
14
 
 
 
15
  class NERLabelEncoder:
16
  '''
17
  Label Encoder to encode and decode the entity labels
 
72
 
73
  ner_model, ner_label_encoder, ner_tokenizer, nlp = load_ner_models()
74
 
 
75
  ############ NER MODEL & VARS INITIALIZATION END ####################
76
 
77
  ############ NER LOGIC START ####################
 
147
  entities = []
148
  doc = nlp(txt)
149
  for sent in doc.sents:
150
+ entities.extend(ner_inference(sent.text))
151
  return entities
152
 
153
+
154
  def get_ner_text(article_txt, ner_result):
155
  res_txt = ''
156
  start = 0
 
174
 
175
  ############ NER LOGIC END ####################
176
 
 
177
  ############ SUMMARIZATION MODEL & VARS INITIALIZATION START ####################
178
  SUMM_CHECKPOINT = "facebook/bart-base"
179
  SUMM_INPUT_N_TOKENS = 400
 
209
  tokenized_data = summ_tokenizer(text=input_, max_length=SUMM_TARGET_N_TOKENS, truncation=True, padding="max_length", return_tensors="tf")
210
  return summ_tokenizer, tokenized_data
211
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  def summ_inference(txt: str):
213
  txt = summ_preprocess(txt)
214
+ test_data = [txt]
215
+ inference_tokenizer, tokenized_data = summ_inference_tokenize(input_=test_data, n_tokens=SUMM_INPUT_N_TOKENS)
216
  pred = summ_model.generate(**tokenized_data, max_new_tokens=SUMM_TARGET_N_TOKENS)
217
+ result = inference_tokenizer.decode(pred[0])
218
+ result = re.sub("<.*?>", "", result).strip()
219
  return result
220
  ############ SUMMARIZATION MODEL & VARS INITIALIZATION END ####################
221