Sravan1214 commited on
Commit
91d58be
1 Parent(s): 41451bd

Made all the changes for app.py for working

Browse files
Files changed (1) hide show
  1. app.py +13 -166
app.py CHANGED
@@ -150,17 +150,10 @@ def ner_inference(txt):
150
  def ner_inference_long_text(txt):
151
  entities = []
152
  doc = nlp(txt)
153
- n_sents = len([_ for _ in doc.sents])
154
- n = 0
155
- progress_bar = st.progress(0, text=f'Processed 0 / {n_sents} sentences')
156
  for sent in doc.sents:
157
- entities.extend(ner_inference(sent.text))
158
- n += 1
159
- progress_bar.progress(n / n_sents, text=f'Processed {n} / {n_sents} sentences')
160
- # progress_bar.empty()
161
  return entities
162
 
163
-
164
  def get_ner_text(article_txt, ner_result):
165
  res_txt = ''
166
  start = 0
@@ -188,7 +181,7 @@ def get_ner_text(article_txt, ner_result):
188
  ############ SUMMARIZATION MODEL & VARS INITIALIZATION START ####################
189
  SUMM_CHECKPOINT = "facebook/bart-base"
190
  SUMM_INPUT_N_TOKENS = 400
191
- SUMM_TARGET_N_TOKENS = 300
192
 
193
  @st.cache_resource
194
  def load_summarizer_models():
@@ -242,166 +235,20 @@ def summ_inference(txt: str):
242
 
243
  ############## ENTRY POINT START #######################
244
  def main():
245
- st.markdown('''<h3>Text Summarizer</h3>
246
- # <p><a href="https://huggingface.co/spaces/Sravan1214/news_summarizer_ner/blob/main/README.md#new-summarization-and-ner" target="_blank">README</a>
247
- # <br>
248
- # The app works best in summarizing <a href="https://edition.cnn.com/" target="_blank">CNN</a> and
249
- # <a href="https://www.dailymail.co.uk/home/index.html" target="_blank">Daily Mail</a> news articles,
250
- # as the BART model is fine-tuned on them.
251
- # </p>
252
-
253
- ''', unsafe_allow_html=True)
254
- input_type = st.radio('Select an option:', ['Paste news URL', 'Paste news text'],
255
- horizontal=True)
256
-
257
- scrape_error = None
258
- summary_error = None
259
- ner_error = None
260
- summ_result = None
261
- ner_result = None
262
- ner_df = None
263
- article_txt = None
264
-
265
-
266
- if input_type == 'Paste news URL':
267
- article_url = st.text_input("Paste the URL of a news article", "")
268
-
269
- if (st.button("Submit")) or (article_url):
270
- with st.status("Processing...", expanded=True) as status:
271
- status.empty()
272
- # Scraping data Start
273
- try:
274
- st.info("Scraping data from the URL.", icon="ℹ️")
275
- article_txt = scrape_text(article_url)
276
- st.success("Successfully scraped the data.", icon="✅")
277
- except Exception as e:
278
- article_txt = None
279
- scrape_error = str(e)
280
-
281
- # Scraping data End
282
-
283
- if article_txt is not None:
284
- article_txt = re.sub(r'\n+',' ', article_txt)
285
-
286
- # Generating summary start
287
-
288
- try:
289
- st.info("Generating the summary.", icon="ℹ️")
290
- summ_result = summ_inference(article_txt)
291
- except Exception as e:
292
- summ_result = None
293
- summary_error = str(e)
294
- if summ_result is not None:
295
- st.success("Successfully generated the summary.", icon="✅")
296
- else:
297
- st.error("Encountered an error while generating the summary.", icon="🚨")
298
-
299
- # Generating summary end
300
-
301
-
302
- # NER start
303
- try:
304
- st.info("Recognizing the entites.", icon="ℹ️")
305
- ner_result = [[ent, label.upper(), np.round(prob, 3)]
306
- for ent, label, prob in ner_inference_long_text(article_txt)]
307
-
308
- ner_df = pd.DataFrame(ner_result, columns=['entity', 'label', 'confidence'])
309
-
310
- ner_result = get_ner_text(article_txt, ner_result).replace('$', '\$')
311
-
312
- except Exception as e:
313
- ner_result = None
314
- ner_error = str(e)
315
- if ner_result is not None:
316
- st.success("Successfully recognized the entites.", icon="✅")
317
- else:
318
- st.error("Encountered an error while recognizing the entites.", icon="🚨")
319
-
320
- # NER end
321
- else:
322
- st.error("Encountered an error while scraping the data.", icon="🚨")
323
-
324
- if (scrape_error is None) and (summary_error is None) and (ner_error is None):
325
- status.update(label="Done", state="complete", expanded=False)
326
- else:
327
- status.update(label="Error", state="error", expanded=False)
328
-
329
- if scrape_error is not None:
330
- st.error(f"Scrape Error: \n{scrape_error}", icon="🚨")
331
- else:
332
- if summary_error is not None:
333
- st.error(f"Summary Error: \n{summary_error}", icon="🚨")
334
- else:
335
- st.markdown(f"<h4>SUMMARY:</h4>{summ_result}", unsafe_allow_html=True)
336
-
337
- if ner_error is not None:
338
- st.error(f"NER Error \n{ner_error}", icon="🚨")
339
- else:
340
- st.markdown(f"<h4>ENTITIES:</h4>{ner_result}", unsafe_allow_html=True)
341
- # st.dataframe(ner_df, use_container_width=True)
342
-
343
- st.markdown(f"<h4>SCRAPED TEXT:</h4>{article_txt}", unsafe_allow_html=True)
344
-
345
- else:
346
- article_txt = st.text_area("Paste the text of a news article", "", height=150)
347
-
348
- if (st.button("Submit")) or (article_txt):
349
- with st.status("Processing...", expanded=True) as status:
350
- article_txt = re.sub(r'\n+',' ', article_txt)
351
-
352
- # Generating summary start
353
-
354
- try:
355
- st.info("Generating the summary.", icon="ℹ️")
356
- summ_result = summ_inference(article_txt)
357
- except Exception as e:
358
- summ_result = None
359
- summary_error = str(e)
360
- if summ_result is not None:
361
- st.success("Successfully generated the summary.", icon="✅")
362
- else:
363
- st.error("Encountered an error while generating the summary.", icon="🚨")
364
-
365
- # Generating summary end
366
-
367
-
368
- # NER start
369
- try:
370
- st.info("Recognizing the entites.", icon="ℹ️")
371
- ner_result = [[ent, label.upper(), np.round(prob, 3)]
372
  for ent, label, prob in ner_inference_long_text(article_txt)]
373
-
374
- ner_df = pd.DataFrame(ner_result, columns=['entity', 'label', 'confidence'])
375
-
376
- ner_result = get_ner_text(article_txt, ner_result).replace('$', '\$')
377
-
378
- except Exception as e:
379
- ner_result = None
380
- ner_error = str(e)
381
- if ner_result is not None:
382
- st.success("Successfully recognized the entites.", icon="✅")
383
- else:
384
- st.error("Encountered an error while recognizing the entites.", icon="🚨")
385
-
386
- # NER end
387
-
388
- if (summary_error is None) and (ner_error is None):
389
- status.update(label="Done", state="complete", expanded=False)
390
- else:
391
- status.update(label="Error", state="error", expanded=False)
392
-
393
- if summary_error is not None:
394
- st.error(f"Summary Error: \n{summary_error}", icon="🚨")
395
- else:
396
- st.markdown(f"<h4>SUMMARY:</h4>{summ_result}", unsafe_allow_html=True)
397
-
398
- if ner_error is not None:
399
- st.error(f"NER Error \n{ner_error}", icon="🚨")
400
- else:
401
- st.markdown(f"<h4>ENTITIES:</h4>{ner_result}", unsafe_allow_html=True)
402
- # st.dataframe(ner_df, use_container_width=True)
403
 
 
 
404
 
 
 
 
405
 
406
  ############## ENTRY POINT END #######################
407
 
 
150
  def ner_inference_long_text(txt):
151
  entities = []
152
  doc = nlp(txt)
 
 
 
153
  for sent in doc.sents:
154
+ entities.extends(ner_inference(sent.text))
 
 
 
155
  return entities
156
 
 
157
  def get_ner_text(article_txt, ner_result):
158
  res_txt = ''
159
  start = 0
 
181
  ############ SUMMARIZATION MODEL & VARS INITIALIZATION START ####################
182
  SUMM_CHECKPOINT = "facebook/bart-base"
183
  SUMM_INPUT_N_TOKENS = 400
184
+ SUMM_TARGET_N_TOKENS = 100
185
 
186
  @st.cache_resource
187
  def load_summarizer_models():
 
235
 
236
  ############## ENTRY POINT START #######################
237
  def main():
238
+ st.markdown('''<h3>Text Summarizer</h3>
239
+ #<p><a href="https://huggingface.co/spaces/Sravan1214/news-summarizer-ner/blob/main/README.md" target="_blank">README</a></p>''', unsafe_allow_html=True)
240
+ article_txt = st.text_area("Paste the text (the longer, the better):", "", height=200)
241
+ article_txt = re.sub(r'\n+',' ', article_txt)
242
+ if st.button("Submit"):
243
+ ner_result = [[ent, label.upper(), np.round(prob, 3)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  for ent, label, prob in ner_inference_long_text(article_txt)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
+ ner_df = pd.DataFrame(ner_result, columns=['entity', 'label', 'confidence'])
247
+ summ_result = summ_inference(article_txt)
248
 
249
+ ner_txt = get_ner_text(article_txt, ner_result).replace('$', '\$')
250
+
251
+ st.markdown(f"<h4>SUMMARY:</h4>{summ_result}", unsafe_allow_html=True)
252
 
253
  ############## ENTRY POINT END #######################
254