Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,12 @@
|
|
|
|
|
|
1 |
import os
|
2 |
import base64
|
3 |
import tempfile
|
4 |
import streamlit as st
|
5 |
import fitz # PyMuPDF
|
6 |
-
|
|
|
7 |
from transformers import pipeline
|
8 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
9 |
|
@@ -22,6 +25,20 @@ def extract_text_from_pdf(pdf_path):
|
|
22 |
return text
|
23 |
return None
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
# LLM pipeline for summarization
|
26 |
def llm_pipeline(input_text):
|
27 |
pipe_sum = pipeline(
|
@@ -67,6 +84,23 @@ def main():
|
|
67 |
if input_text: # Proceed only if text extraction was successful
|
68 |
summary = llm_pipeline(input_text)
|
69 |
st.success(summary)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
|
71 |
# Text Input Section
|
72 |
st.header("Summarize Your Text")
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
load_dotenv()
|
3 |
import os
|
4 |
import base64
|
5 |
import tempfile
|
6 |
import streamlit as st
|
7 |
import fitz # PyMuPDF
|
8 |
+
import requests
|
9 |
+
from bs4 import BeautifulSoup
|
10 |
from transformers import pipeline
|
11 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
12 |
|
|
|
25 |
return text
|
26 |
return None
|
27 |
|
28 |
+
# Web Scraping Function
|
29 |
+
def scrape_article(url):
|
30 |
+
response = requests.get(url, timeout=10)
|
31 |
+
response.raise_for_status() # Raise an error if the request fails
|
32 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
33 |
+
|
34 |
+
# Extract the main content (common tags for articles)
|
35 |
+
paragraphs = soup.find_all('p')
|
36 |
+
article_text = "\n".join([para.get_text() for para in paragraphs])
|
37 |
+
|
38 |
+
if not article_text.strip():
|
39 |
+
raise ValueError("Unable to extract content from the page.")
|
40 |
+
return article_text
|
41 |
+
|
42 |
# LLM pipeline for summarization
|
43 |
def llm_pipeline(input_text):
|
44 |
pipe_sum = pipeline(
|
|
|
84 |
if input_text: # Proceed only if text extraction was successful
|
85 |
summary = llm_pipeline(input_text)
|
86 |
st.success(summary)
|
87 |
+
st.header("Summarize Online Articles")
|
88 |
+
url = st.text_input("Enter the URL of the article:")
|
89 |
+
if st.button("Summarize Article"):
|
90 |
+
if url.strip():
|
91 |
+
st.info("Fetching and Summarizing Article...")
|
92 |
+
article_text = scrape_article(url)
|
93 |
+
if "Error:" in article_text:
|
94 |
+
st.error(article_text)
|
95 |
+
else:
|
96 |
+
st.info("Original Article Content")
|
97 |
+
st.write(article_text[:1000] + "..." if len(article_text) > 1000 else article_text)
|
98 |
+
|
99 |
+
st.info("Summarized Content")
|
100 |
+
summary = llm_pipeline(article_text)
|
101 |
+
st.success(summary)
|
102 |
+
else:
|
103 |
+
st.warning("Please enter a valid URL.")
|
104 |
|
105 |
# Text Input Section
|
106 |
st.header("Summarize Your Text")
|