import gradio as gr from transformers import pipeline import feedparser from datetime import datetime, timedelta import pytz from bs4 import BeautifulSoup import json import os import hashlib import threading # Global settings SUMMARIZER_MODELS = { "Default (facebook/bart-large-cnn)": "facebook/bart-large-cnn", "Free Model (distilbart-cnn-6-6)": "sshleifer/distilbart-cnn-6-6" } CACHE_SIZE = 500 # Cache size for summaries RSS_FETCH_INTERVAL = timedelta(hours=8) ARTICLE_LIMIT = 5 NEWS_SOURCES = { "Technology": { "The New York Times": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml", "TechCrunch": "https://techcrunch.com/feed/" }, "Business": { "The New York Times": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml", "Reuters": "https://www.reutersagency.com/feed/?taxonomy=best-regions&post_type=best" }, "World News": { "BBC": "http://feeds.bbci.co.uk/news/world/rss.xml", "CNN": "http://rss.cnn.com/rss/edition_world.rss" } } class NewsCache: def __init__(self, size): self.cache = {} self.size = size self.lock = threading.Lock() def get(self, key): with self.lock: return self.cache.get(key) def set(self, key, value): with self.lock: if len(self.cache) >= self.size: oldest_key = next(iter(self.cache)) del self.cache[oldest_key] self.cache[key] = value cache = NewsCache(CACHE_SIZE) summarizer = pipeline("summarization", model=SUMMARIZER_MODELS["Default (facebook/bart-large-cnn)"], device=-1) # Utility functions def fetch_rss_news(selected_sources): articles = [] cutoff_time = datetime.now(pytz.UTC) - RSS_FETCH_INTERVAL for category, sources in NEWS_SOURCES.items(): for source, url in sources.items(): if source in selected_sources: try: feed = feedparser.parse(url) for entry in feed.entries: published = datetime(*entry.published_parsed[:6], tzinfo=pytz.UTC) if published > cutoff_time: articles.append({ "title": entry.title, "description": BeautifulSoup(entry.description, "html.parser").get_text(), "link": entry.link, "category": category, "source": source, "published": published }) except Exception: continue articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT] return articles def summarize_text(text): content_hash = hashlib.md5(text.encode()).hexdigest() cached_summary = cache.get(content_hash) if cached_summary: return cached_summary try: result = summarizer(text, max_length=120, min_length=40, truncation=True) summary = result[0]['summary_text'] cache.set(content_hash, summary) return summary except Exception: return "Summary unavailable." def summarize_articles(articles): summaries = [] for article in articles: summary = summarize_text(article["description"]) summaries.append(f""" 📰 {article['title']} - 📁 Category: {article['category']} - 💡 Source: {article['source']} - 🔗 Read More: {article['link']} 📃 Summary: {summary} """) return "\n".join(summaries) def save_preferences(name, selected_sources): if not name or not selected_sources: return "Name and sources are required!" preferences = {"name": name, "sources": selected_sources} try: os.makedirs("user_preferences", exist_ok=True) with open(f"user_preferences/preferences_{name}.json", "w") as f: json.dump(preferences, f) return "Preferences saved successfully!" except Exception: return "Failed to save preferences." def generate_user_summary(name): try: with open(f"user_preferences/preferences_{name}.json") as f: preferences = json.load(f) except FileNotFoundError: return "Preferences not found. Please set your preferences first." selected_sources = preferences.get("sources", []) if not selected_sources: return "No sources selected. Please update your preferences." articles = fetch_rss_news(selected_sources) if not articles: return "No recent news found for the selected sources." return summarize_articles(articles) # Gradio Interface demo = gr.Blocks() with demo: gr.Markdown("# 📰 AI News Summarizer") with gr.Tab("Set Preferences"): name_input = gr.Textbox(label="Your Name") source_checkboxes = [] for category, sources in NEWS_SOURCES.items(): gr.Markdown(f"## {category}") source_checkboxes.append( gr.CheckboxGroup(choices=list(sources.keys()), label=f"Select Sources for {category}") ) save_button = gr.Button("Save Preferences") save_status = gr.Textbox(label="Status") def save_user_preferences(name, *source_selections): combined_sources = [source for selection in source_selections for source in selection] return save_preferences(name, combined_sources) save_button.click(save_user_preferences, inputs=[name_input] + source_checkboxes, outputs=save_status) with gr.Tab("Get News Summary"): name_input_summary = gr.Textbox(label="Your Name") fetch_button = gr.Button("Get Summary") summary_output = gr.Textbox(label="News Summary", lines=20) fetch_button.click(generate_user_summary, inputs=[name_input_summary], outputs=summary_output) if __name__ == "__main__": demo.launch()