Spaces:

mishrasahil934
/

Team_skulk

Sleeping

App Files Files Community

mishrasahil934 commited on Jan 8

Commit

ad376bb

verified ·

1 Parent(s): f2e9173

Create app.py

Browse files

Files changed (1) hide show

app.py +35 -1

app.py CHANGED Viewed

@@ -1,9 +1,12 @@
 import os
 import base64
 import tempfile
 import streamlit as st
 import fitz  # PyMuPDF
 from transformers import pipeline
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
@@ -22,6 +25,20 @@ def extract_text_from_pdf(pdf_path):
         return text
     return None
 # LLM pipeline for summarization
 def llm_pipeline(input_text):
     pipe_sum = pipeline(
@@ -67,6 +84,23 @@ def main():
                 if input_text:  # Proceed only if text extraction was successful
                     summary = llm_pipeline(input_text)
                     st.success(summary)
     # Text Input Section
     st.header("Summarize Your Text")

+from dotenv import load_dotenv
+load_dotenv()
 import os
 import base64
 import tempfile
 import streamlit as st
 import fitz  # PyMuPDF
+import requests
+from bs4 import BeautifulSoup
 from transformers import pipeline
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
         return text
     return None
+# Web Scraping Function
+def scrape_article(url):
+     response = requests.get(url, timeout=10)
+     response.raise_for_status()  # Raise an error if the request fails
+     soup = BeautifulSoup(response.content, 'html.parser')
+        # Extract the main content (common tags for articles)
+     paragraphs = soup.find_all('p')
+     article_text = "\n".join([para.get_text() for para in paragraphs])
+     if not article_text.strip():
+         raise ValueError("Unable to extract content from the page.")
+     return article_text
 # LLM pipeline for summarization
 def llm_pipeline(input_text):
     pipe_sum = pipeline(
                 if input_text:  # Proceed only if text extraction was successful
                     summary = llm_pipeline(input_text)
                     st.success(summary)
+    st.header("Summarize Online Articles")
+    url = st.text_input("Enter the URL of the article:")
+    if st.button("Summarize Article"):
+        if url.strip():
+            st.info("Fetching and Summarizing Article...")
+            article_text = scrape_article(url)
+            if "Error:" in article_text:
+                st.error(article_text)
+            else:
+                st.info("Original Article Content")
+                st.write(article_text[:1000] + "..." if len(article_text) > 1000 else article_text)
+                st.info("Summarized Content")
+                summary = llm_pipeline(article_text)
+                st.success(summary)
+        else:
+            st.warning("Please enter a valid URL.")
     # Text Input Section
     st.header("Summarize Your Text")