news-sumarry / app.py
loayshabet's picture
Update app.py
5c1b01d verified
raw
history blame
4.95 kB
import gradio as gr
from transformers import pipeline
import feedparser
from datetime import datetime, timedelta
import pytz
from bs4 import BeautifulSoup
# Global settings
SUMMARIZER_MODEL = "facebook/bart-large-cnn"
RSS_FETCH_INTERVAL = timedelta(hours=8)
ARTICLE_LIMIT = 5
# News sources
NEWS_SOURCES = {
"Technology": {
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml",
"Reuters": "https://www.reutersagency.com/feed/?best-topics=tech&post_type=best"
},
"Business": {
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml",
"Reuters": "https://www.reutersagency.com/feed/?best-topics=business-finance&post_type=best"
},
"Science": {
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Science.xml"
},
"World News": {
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
"BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
"CNN": "http://rss.cnn.com/rss/edition_world.rss",
"Reuters": "https://www.reutersagency.com/feed/?taxonomy=best-regions&post_type=best"
},
"Sports": {
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Sports.xml",
"Reuters": "https://www.reutersagency.com/feed/?best-topics=sports&post_type=best"
},
"Health": {
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Health.xml",
"Politico": "http://rss.politico.com/healthcare.xml",
"Reuters": "https://www.reutersagency.com/feed/?best-topics=health&post_type=best"
},
}
# Summarizer initialization
summarizer = pipeline("summarization", model=SUMMARIZER_MODEL, device=-1)
def fetch_rss_news(categories):
articles = []
cutoff_time = datetime.now(pytz.UTC) - RSS_FETCH_INTERVAL
for category in categories:
for source, url in NEWS_SOURCES.get(category, {}).items():
try:
feed = feedparser.parse(url)
for entry in feed.entries:
published = datetime(*entry.published_parsed[:6], tzinfo=pytz.UTC)
if published > cutoff_time:
articles.append({
"title": entry.title,
"description": BeautifulSoup(entry.description, "html.parser").get_text(),
"link": entry.link,
"category": category,
"source": source,
"published": published
})
except Exception as e:
print(f"Error fetching from {url}: {e}")
articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT]
return articles
def summarize_with_ai(text):
try:
result = summarizer(text, max_length=120, min_length=40, truncation=True)
return result[0]['summary_text']
except Exception as e:
print(f"AI summarization error: {e}")
return "Summary unavailable."
def summarize_with_free_module(text):
# Simple heuristic summarization: return the first few sentences
return '. '.join(text.split('. ')[:3]) + '...'
def summarize_articles(articles, method="AI Model"):
summaries = []
summarizer_function = summarize_with_ai if method == "AI Model" else summarize_with_free_module
for article in articles:
try:
summary = summarizer_function(article["description"])
summaries.append(
"Title: {0}
- Category: {1}
- Source: {2}
- Published: {3}
Summary: {4}
Read more: {5}".format(
article["title"],
article["category"],
article["source"],
article["published"].strftime('%Y-%m-%d %H:%M'),
summary,
article["link"]
)
)
except Exception as e:
print(f"Error summarizing article: {e}")
return summaries
# Gradio Interface
def generate_summary(categories, method):
if not categories:
return "Please select at least one category."
articles = fetch_rss_news(categories)
if not articles:
return "No recent articles found."
summaries = summarize_articles(articles, method)
return "
".join(summaries)
demo = gr.Blocks()
with demo:
gr.Markdown("# AI News Summarizer")
with gr.Row():
categories = gr.CheckboxGroup(choices=list(NEWS_SOURCES.keys()), label="Select Categories")
method = gr.Dropdown(choices=["AI Model", "Free Module"], label="Summarization Method", value="AI Model")
summarize_button = gr.Button("Generate Summary")
output = gr.Textbox(label="Summarized News", lines=15)
summarize_button.click(generate_summary, inputs=[categories, method], outputs=output)
if __name__ == "__main__":
demo.launch()