import gradio as gr from transformers import pipeline import feedparser from datetime import datetime, timedelta import pytz from bs4 import BeautifulSoup import hashlib import threading # Global settings SUMMARIZER_MODELS = { "Default (facebook/bart-large-cnn)": "facebook/bart-large-cnn", "Free Model (distilbart-cnn-6-6)": "sshleifer/distilbart-cnn-6-6" } CACHE_SIZE = 500 RSS_FETCH_INTERVAL = timedelta(hours=8) ARTICLE_LIMIT = 5 NEWS_SOURCES = { "Technology": {"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml"}, "Business": {"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml"}, "World News": {"BBC": "http://feeds.bbci.co.uk/news/world/rss.xml"} } class NewsCache: def __init__(self, size): self.cache = {} self.size = size self.lock = threading.Lock() def get(self, key): with self.lock: return self.cache.get(key) def set(self, key, value): with self.lock: if len(self.cache) >= self.size: oldest_key = next(iter(self.cache)) del self.cache[oldest_key] self.cache[key] = value cache = NewsCache(CACHE_SIZE) def fetch_rss_news(categories): articles = [] cutoff_time = datetime.now(pytz.UTC) - RSS_FETCH_INTERVAL for category in categories: for source, url in NEWS_SOURCES.get(category, {}).items(): try: feed = feedparser.parse(url) for entry in feed.entries: published = datetime(*entry.published_parsed[:6], tzinfo=pytz.UTC) if published > cutoff_time: articles.append({ "title": entry.title, "description": BeautifulSoup(entry.description, "html.parser").get_text(), "link": entry.link, "category": category, "source": source, "published": published }) except Exception: continue articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT] return articles def summarize_text(text, model_name): summarizer = pipeline("summarization", model=model_name, device=-1) content_hash = hashlib.md5(text.encode()).hexdigest() cached_summary = cache.get(content_hash) if cached_summary: return cached_summary try: result = summarizer(text, max_length=120, min_length=40, truncation=True) summary = result[0]['summary_text'] cache.set(content_hash, summary) return summary except Exception: return "Summary unavailable." def summarize_articles(articles, model_name): summaries = [] for article in articles: content = article["description"] summary = summarize_text(content, model_name) summaries.append(f""" 📰 {article['title']} - 📁 Category: {article['category']} - 💡 Source: {article['source']} - 🔗 Read More: {article['link']} 📃 Summary: {summary} """) return "\n".join(summaries) def generate_summary(selected_categories, model_name): if not selected_categories: return "Please select at least one category." articles = fetch_rss_news(selected_categories) if not articles: return "No recent news found in the selected categories." return summarize_articles(articles, model_name) # Gradio Interface demo = gr.Blocks() with demo: gr.Markdown("# 📰 AI News Summarizer") with gr.Row(): categories = gr.CheckboxGroup( choices=list(NEWS_SOURCES.keys()), label="Select News Categories" ) model_selector = gr.Radio( choices=list(SUMMARIZER_MODELS.keys()), label="Choose Summarization Model", value="Default (facebook/bart-large-cnn)" ) summarize_button = gr.Button("Get News Summary") summary_output = gr.Textbox(label="News Summary", lines=20) def get_summary(selected_categories, selected_model): model_name = SUMMARIZER_MODELS[selected_model] return generate_summary(selected_categories, model_name) summarize_button.click(get_summary, inputs=[categories, model_selector], outputs=summary_output) if __name__ == "__main__": demo.launch()