Spaces:
Sleeping
Sleeping
Sravan1214
commited on
Commit
•
91d58be
1
Parent(s):
41451bd
Made all the changes for app.py for working
Browse files
app.py
CHANGED
@@ -150,17 +150,10 @@ def ner_inference(txt):
|
|
150 |
def ner_inference_long_text(txt):
|
151 |
entities = []
|
152 |
doc = nlp(txt)
|
153 |
-
n_sents = len([_ for _ in doc.sents])
|
154 |
-
n = 0
|
155 |
-
progress_bar = st.progress(0, text=f'Processed 0 / {n_sents} sentences')
|
156 |
for sent in doc.sents:
|
157 |
-
entities.
|
158 |
-
n += 1
|
159 |
-
progress_bar.progress(n / n_sents, text=f'Processed {n} / {n_sents} sentences')
|
160 |
-
# progress_bar.empty()
|
161 |
return entities
|
162 |
|
163 |
-
|
164 |
def get_ner_text(article_txt, ner_result):
|
165 |
res_txt = ''
|
166 |
start = 0
|
@@ -188,7 +181,7 @@ def get_ner_text(article_txt, ner_result):
|
|
188 |
############ SUMMARIZATION MODEL & VARS INITIALIZATION START ####################
|
189 |
SUMM_CHECKPOINT = "facebook/bart-base"
|
190 |
SUMM_INPUT_N_TOKENS = 400
|
191 |
-
SUMM_TARGET_N_TOKENS =
|
192 |
|
193 |
@st.cache_resource
|
194 |
def load_summarizer_models():
|
@@ -242,166 +235,20 @@ def summ_inference(txt: str):
|
|
242 |
|
243 |
############## ENTRY POINT START #######################
|
244 |
def main():
|
245 |
-
st.markdown('''<h3>Text Summarizer</h3>
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
# </p>
|
252 |
-
|
253 |
-
''', unsafe_allow_html=True)
|
254 |
-
input_type = st.radio('Select an option:', ['Paste news URL', 'Paste news text'],
|
255 |
-
horizontal=True)
|
256 |
-
|
257 |
-
scrape_error = None
|
258 |
-
summary_error = None
|
259 |
-
ner_error = None
|
260 |
-
summ_result = None
|
261 |
-
ner_result = None
|
262 |
-
ner_df = None
|
263 |
-
article_txt = None
|
264 |
-
|
265 |
-
|
266 |
-
if input_type == 'Paste news URL':
|
267 |
-
article_url = st.text_input("Paste the URL of a news article", "")
|
268 |
-
|
269 |
-
if (st.button("Submit")) or (article_url):
|
270 |
-
with st.status("Processing...", expanded=True) as status:
|
271 |
-
status.empty()
|
272 |
-
# Scraping data Start
|
273 |
-
try:
|
274 |
-
st.info("Scraping data from the URL.", icon="ℹ️")
|
275 |
-
article_txt = scrape_text(article_url)
|
276 |
-
st.success("Successfully scraped the data.", icon="✅")
|
277 |
-
except Exception as e:
|
278 |
-
article_txt = None
|
279 |
-
scrape_error = str(e)
|
280 |
-
|
281 |
-
# Scraping data End
|
282 |
-
|
283 |
-
if article_txt is not None:
|
284 |
-
article_txt = re.sub(r'\n+',' ', article_txt)
|
285 |
-
|
286 |
-
# Generating summary start
|
287 |
-
|
288 |
-
try:
|
289 |
-
st.info("Generating the summary.", icon="ℹ️")
|
290 |
-
summ_result = summ_inference(article_txt)
|
291 |
-
except Exception as e:
|
292 |
-
summ_result = None
|
293 |
-
summary_error = str(e)
|
294 |
-
if summ_result is not None:
|
295 |
-
st.success("Successfully generated the summary.", icon="✅")
|
296 |
-
else:
|
297 |
-
st.error("Encountered an error while generating the summary.", icon="🚨")
|
298 |
-
|
299 |
-
# Generating summary end
|
300 |
-
|
301 |
-
|
302 |
-
# NER start
|
303 |
-
try:
|
304 |
-
st.info("Recognizing the entites.", icon="ℹ️")
|
305 |
-
ner_result = [[ent, label.upper(), np.round(prob, 3)]
|
306 |
-
for ent, label, prob in ner_inference_long_text(article_txt)]
|
307 |
-
|
308 |
-
ner_df = pd.DataFrame(ner_result, columns=['entity', 'label', 'confidence'])
|
309 |
-
|
310 |
-
ner_result = get_ner_text(article_txt, ner_result).replace('$', '\$')
|
311 |
-
|
312 |
-
except Exception as e:
|
313 |
-
ner_result = None
|
314 |
-
ner_error = str(e)
|
315 |
-
if ner_result is not None:
|
316 |
-
st.success("Successfully recognized the entites.", icon="✅")
|
317 |
-
else:
|
318 |
-
st.error("Encountered an error while recognizing the entites.", icon="🚨")
|
319 |
-
|
320 |
-
# NER end
|
321 |
-
else:
|
322 |
-
st.error("Encountered an error while scraping the data.", icon="🚨")
|
323 |
-
|
324 |
-
if (scrape_error is None) and (summary_error is None) and (ner_error is None):
|
325 |
-
status.update(label="Done", state="complete", expanded=False)
|
326 |
-
else:
|
327 |
-
status.update(label="Error", state="error", expanded=False)
|
328 |
-
|
329 |
-
if scrape_error is not None:
|
330 |
-
st.error(f"Scrape Error: \n{scrape_error}", icon="🚨")
|
331 |
-
else:
|
332 |
-
if summary_error is not None:
|
333 |
-
st.error(f"Summary Error: \n{summary_error}", icon="🚨")
|
334 |
-
else:
|
335 |
-
st.markdown(f"<h4>SUMMARY:</h4>{summ_result}", unsafe_allow_html=True)
|
336 |
-
|
337 |
-
if ner_error is not None:
|
338 |
-
st.error(f"NER Error \n{ner_error}", icon="🚨")
|
339 |
-
else:
|
340 |
-
st.markdown(f"<h4>ENTITIES:</h4>{ner_result}", unsafe_allow_html=True)
|
341 |
-
# st.dataframe(ner_df, use_container_width=True)
|
342 |
-
|
343 |
-
st.markdown(f"<h4>SCRAPED TEXT:</h4>{article_txt}", unsafe_allow_html=True)
|
344 |
-
|
345 |
-
else:
|
346 |
-
article_txt = st.text_area("Paste the text of a news article", "", height=150)
|
347 |
-
|
348 |
-
if (st.button("Submit")) or (article_txt):
|
349 |
-
with st.status("Processing...", expanded=True) as status:
|
350 |
-
article_txt = re.sub(r'\n+',' ', article_txt)
|
351 |
-
|
352 |
-
# Generating summary start
|
353 |
-
|
354 |
-
try:
|
355 |
-
st.info("Generating the summary.", icon="ℹ️")
|
356 |
-
summ_result = summ_inference(article_txt)
|
357 |
-
except Exception as e:
|
358 |
-
summ_result = None
|
359 |
-
summary_error = str(e)
|
360 |
-
if summ_result is not None:
|
361 |
-
st.success("Successfully generated the summary.", icon="✅")
|
362 |
-
else:
|
363 |
-
st.error("Encountered an error while generating the summary.", icon="🚨")
|
364 |
-
|
365 |
-
# Generating summary end
|
366 |
-
|
367 |
-
|
368 |
-
# NER start
|
369 |
-
try:
|
370 |
-
st.info("Recognizing the entites.", icon="ℹ️")
|
371 |
-
ner_result = [[ent, label.upper(), np.round(prob, 3)]
|
372 |
for ent, label, prob in ner_inference_long_text(article_txt)]
|
373 |
-
|
374 |
-
ner_df = pd.DataFrame(ner_result, columns=['entity', 'label', 'confidence'])
|
375 |
-
|
376 |
-
ner_result = get_ner_text(article_txt, ner_result).replace('$', '\$')
|
377 |
-
|
378 |
-
except Exception as e:
|
379 |
-
ner_result = None
|
380 |
-
ner_error = str(e)
|
381 |
-
if ner_result is not None:
|
382 |
-
st.success("Successfully recognized the entites.", icon="✅")
|
383 |
-
else:
|
384 |
-
st.error("Encountered an error while recognizing the entites.", icon="🚨")
|
385 |
-
|
386 |
-
# NER end
|
387 |
-
|
388 |
-
if (summary_error is None) and (ner_error is None):
|
389 |
-
status.update(label="Done", state="complete", expanded=False)
|
390 |
-
else:
|
391 |
-
status.update(label="Error", state="error", expanded=False)
|
392 |
-
|
393 |
-
if summary_error is not None:
|
394 |
-
st.error(f"Summary Error: \n{summary_error}", icon="🚨")
|
395 |
-
else:
|
396 |
-
st.markdown(f"<h4>SUMMARY:</h4>{summ_result}", unsafe_allow_html=True)
|
397 |
-
|
398 |
-
if ner_error is not None:
|
399 |
-
st.error(f"NER Error \n{ner_error}", icon="🚨")
|
400 |
-
else:
|
401 |
-
st.markdown(f"<h4>ENTITIES:</h4>{ner_result}", unsafe_allow_html=True)
|
402 |
-
# st.dataframe(ner_df, use_container_width=True)
|
403 |
|
|
|
|
|
404 |
|
|
|
|
|
|
|
405 |
|
406 |
############## ENTRY POINT END #######################
|
407 |
|
|
|
150 |
def ner_inference_long_text(txt):
|
151 |
entities = []
|
152 |
doc = nlp(txt)
|
|
|
|
|
|
|
153 |
for sent in doc.sents:
|
154 |
+
entities.extends(ner_inference(sent.text))
|
|
|
|
|
|
|
155 |
return entities
|
156 |
|
|
|
157 |
def get_ner_text(article_txt, ner_result):
|
158 |
res_txt = ''
|
159 |
start = 0
|
|
|
181 |
############ SUMMARIZATION MODEL & VARS INITIALIZATION START ####################
|
182 |
SUMM_CHECKPOINT = "facebook/bart-base"
|
183 |
SUMM_INPUT_N_TOKENS = 400
|
184 |
+
SUMM_TARGET_N_TOKENS = 100
|
185 |
|
186 |
@st.cache_resource
|
187 |
def load_summarizer_models():
|
|
|
235 |
|
236 |
############## ENTRY POINT START #######################
|
237 |
def main():
|
238 |
+
st.markdown('''<h3>Text Summarizer</h3>
|
239 |
+
#<p><a href="https://huggingface.co/spaces/Sravan1214/news-summarizer-ner/blob/main/README.md" target="_blank">README</a></p>''', unsafe_allow_html=True)
|
240 |
+
article_txt = st.text_area("Paste the text (the longer, the better):", "", height=200)
|
241 |
+
article_txt = re.sub(r'\n+',' ', article_txt)
|
242 |
+
if st.button("Submit"):
|
243 |
+
ner_result = [[ent, label.upper(), np.round(prob, 3)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
for ent, label, prob in ner_inference_long_text(article_txt)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
|
246 |
+
ner_df = pd.DataFrame(ner_result, columns=['entity', 'label', 'confidence'])
|
247 |
+
summ_result = summ_inference(article_txt)
|
248 |
|
249 |
+
ner_txt = get_ner_text(article_txt, ner_result).replace('$', '\$')
|
250 |
+
|
251 |
+
st.markdown(f"<h4>SUMMARY:</h4>{summ_result}", unsafe_allow_html=True)
|
252 |
|
253 |
############## ENTRY POINT END #######################
|
254 |
|