mishrasahil934 commited on
Commit
ad376bb
·
verified ·
1 Parent(s): f2e9173

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -1
app.py CHANGED
@@ -1,9 +1,12 @@
 
 
1
  import os
2
  import base64
3
  import tempfile
4
  import streamlit as st
5
  import fitz # PyMuPDF
6
-
 
7
  from transformers import pipeline
8
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
9
 
@@ -22,6 +25,20 @@ def extract_text_from_pdf(pdf_path):
22
  return text
23
  return None
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # LLM pipeline for summarization
26
  def llm_pipeline(input_text):
27
  pipe_sum = pipeline(
@@ -67,6 +84,23 @@ def main():
67
  if input_text: # Proceed only if text extraction was successful
68
  summary = llm_pipeline(input_text)
69
  st.success(summary)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  # Text Input Section
72
  st.header("Summarize Your Text")
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv()
3
  import os
4
  import base64
5
  import tempfile
6
  import streamlit as st
7
  import fitz # PyMuPDF
8
+ import requests
9
+ from bs4 import BeautifulSoup
10
  from transformers import pipeline
11
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
12
 
 
25
  return text
26
  return None
27
 
28
+ # Web Scraping Function
29
+ def scrape_article(url):
30
+ response = requests.get(url, timeout=10)
31
+ response.raise_for_status() # Raise an error if the request fails
32
+ soup = BeautifulSoup(response.content, 'html.parser')
33
+
34
+ # Extract the main content (common tags for articles)
35
+ paragraphs = soup.find_all('p')
36
+ article_text = "\n".join([para.get_text() for para in paragraphs])
37
+
38
+ if not article_text.strip():
39
+ raise ValueError("Unable to extract content from the page.")
40
+ return article_text
41
+
42
  # LLM pipeline for summarization
43
  def llm_pipeline(input_text):
44
  pipe_sum = pipeline(
 
84
  if input_text: # Proceed only if text extraction was successful
85
  summary = llm_pipeline(input_text)
86
  st.success(summary)
87
+ st.header("Summarize Online Articles")
88
+ url = st.text_input("Enter the URL of the article:")
89
+ if st.button("Summarize Article"):
90
+ if url.strip():
91
+ st.info("Fetching and Summarizing Article...")
92
+ article_text = scrape_article(url)
93
+ if "Error:" in article_text:
94
+ st.error(article_text)
95
+ else:
96
+ st.info("Original Article Content")
97
+ st.write(article_text[:1000] + "..." if len(article_text) > 1000 else article_text)
98
+
99
+ st.info("Summarized Content")
100
+ summary = llm_pipeline(article_text)
101
+ st.success(summary)
102
+ else:
103
+ st.warning("Please enter a valid URL.")
104
 
105
  # Text Input Section
106
  st.header("Summarize Your Text")