Spaces:
Running
Running
import gradio as gr | |
from transformers import pipeline | |
import feedparser | |
from datetime import datetime, timedelta | |
import pytz | |
from bs4 import BeautifulSoup | |
# Global settings | |
SUMMARIZER_MODEL = "facebook/bart-large-cnn" | |
RSS_FETCH_INTERVAL = timedelta(hours=8) | |
ARTICLE_LIMIT = 5 | |
# News sources | |
NEWS_SOURCES = { | |
"Technology": { | |
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml", | |
"Reuters": "https://www.reutersagency.com/feed/?best-topics=tech&post_type=best" | |
}, | |
"Business": { | |
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml", | |
"Reuters": "https://www.reutersagency.com/feed/?best-topics=business-finance&post_type=best" | |
}, | |
"Science": { | |
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Science.xml" | |
}, | |
"World News": { | |
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml", | |
"BBC": "http://feeds.bbci.co.uk/news/world/rss.xml", | |
"CNN": "http://rss.cnn.com/rss/edition_world.rss", | |
"Reuters": "https://www.reutersagency.com/feed/?taxonomy=best-regions&post_type=best" | |
}, | |
"Sports": { | |
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Sports.xml", | |
"Reuters": "https://www.reutersagency.com/feed/?best-topics=sports&post_type=best" | |
}, | |
"Health": { | |
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Health.xml", | |
"Politico": "http://rss.politico.com/healthcare.xml", | |
"Reuters": "https://www.reutersagency.com/feed/?best-topics=health&post_type=best" | |
}, | |
} | |
# Summarizer initialization | |
summarizer = pipeline("summarization", model=SUMMARIZER_MODEL, device=-1) | |
def fetch_rss_news(categories): | |
articles = [] | |
cutoff_time = datetime.now(pytz.UTC) - RSS_FETCH_INTERVAL | |
for category in categories: | |
for source, url in NEWS_SOURCES.get(category, {}).items(): | |
try: | |
feed = feedparser.parse(url) | |
for entry in feed.entries: | |
published = datetime(*entry.published_parsed[:6], tzinfo=pytz.UTC) | |
if published > cutoff_time: | |
articles.append({ | |
"title": entry.title, | |
"description": BeautifulSoup(entry.description, "html.parser").get_text(), | |
"link": entry.link, | |
"category": category, | |
"source": source, | |
"published": published | |
}) | |
except Exception as e: | |
print(f"Error fetching from {url}: {e}") | |
articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT] | |
return articles | |
def summarize_with_ai(text): | |
try: | |
result = summarizer(text, max_length=120, min_length=40, truncation=True) | |
return result[0]['summary_text'] | |
except Exception as e: | |
print(f"AI summarization error: {e}") | |
return "Summary unavailable." | |
def summarize_with_free_module(text): | |
# Simple heuristic summarization: return the first few sentences | |
return '. '.join(text.split('. ')[:3]) + '...' | |
def summarize_articles(articles, method="AI Model"): | |
summaries = [] | |
summarizer_function = summarize_with_ai if method == "AI Model" else summarize_with_free_module | |
for article in articles: | |
try: | |
summary = summarizer_function(article["description"]) | |
summaries.append( | |
"Title: {0} | |
- Category: {1} | |
- Source: {2} | |
- Published: {3} | |
Summary: {4} | |
Read more: {5}".format( | |
article["title"], | |
article["category"], | |
article["source"], | |
article["published"].strftime('%Y-%m-%d %H:%M'), | |
summary, | |
article["link"] | |
) | |
) | |
except Exception as e: | |
print(f"Error summarizing article: {e}") | |
return summaries | |
# Gradio Interface | |
def generate_summary(categories, method): | |
if not categories: | |
return "Please select at least one category." | |
articles = fetch_rss_news(categories) | |
if not articles: | |
return "No recent articles found." | |
summaries = summarize_articles(articles, method) | |
return " | |
".join(summaries) | |
demo = gr.Blocks() | |
with demo: | |
gr.Markdown("# AI News Summarizer") | |
with gr.Row(): | |
categories = gr.CheckboxGroup(choices=list(NEWS_SOURCES.keys()), label="Select Categories") | |
method = gr.Dropdown(choices=["AI Model", "Free Module"], label="Summarization Method", value="AI Model") | |
summarize_button = gr.Button("Generate Summary") | |
output = gr.Textbox(label="Summarized News", lines=15) | |
summarize_button.click(generate_summary, inputs=[categories, method], outputs=output) | |
if __name__ == "__main__": | |
demo.launch() | |