import gradio as gr from transformers import pipeline import feedparser from datetime import datetime, timedelta import pytz from bs4 import BeautifulSoup import hashlib import threading import pandas as pd # Global settings SUMMARIZER_MODELS = { "Default (facebook/bart-large-cnn)": "facebook/bart-large-cnn", "Free Model (distilbart-cnn-6-6)": "sshleifer/distilbart-cnn-6-6" } CACHE_SIZE = 500 RSS_FETCH_INTERVAL = timedelta(hours=8) ARTICLE_LIMIT = 5 NEWS_SOURCES = { "Movilizaciones Sindicales": { "Pagina12": "https://www.pagina12.com.ar/rss/edicion-impresa", } } class NewsCache: def __init__(self, size): self.cache = {} self.size = size self.lock = threading.Lock() def get(self, key): with self.lock: return self.cache.get(key) def set(self, key, value): with self.lock: if len(self.cache) >= self.size: oldest_key = next(iter(self.cache)) del self.cache[oldest_key] self.cache[key] = value cache = NewsCache(CACHE_SIZE) def fetch_rss_news(categories): articles = [] cutoff_time = datetime.now(pytz.UTC) - RSS_FETCH_INTERVAL for category in categories: for source, url in NEWS_SOURCES.get(category, {}).items(): try: feed = feedparser.parse(url) for entry in feed.entries: published = datetime(*entry.published_parsed[:6], tzinfo=pytz.UTC) if published > cutoff_time: articles.append({ "title": entry.title, "description": BeautifulSoup(entry.description, "html.parser").get_text(), "link": entry.link, "category": category, "source": source, "published": published }) except Exception: continue articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT] return articles def summarize_text(text, model_name): summarizer = pipeline("summarization", model=model_name, device=-1) content_hash = hashlib.md5(text.encode()).hexdigest() cached_summary = cache.get(content_hash) if cached_summary: return cached_summary try: result = summarizer(text, max_length=120, min_length=40, truncation=True) summary = result[0]['summary_text'] cache.set(content_hash, summary) return summary except Exception: return "Summary unavailable." def summarize_articles(articles, model_name): summaries = [] for article in articles: content = article["description"] summary = summarize_text(content, model_name) summaries.append(f""" 📰 {article['title']} - 📁 Category: {article['category']} - 💡 Source: {article['source']} - 🔗 Read More: {article['link']} 📃 Summary: {summary} """) return "\n".join(summaries) def generate_summary(selected_categories, model_name): if not selected_categories: return "Please select at least one category." articles = fetch_rss_news(selected_categories) if not articles: return "No recent news found in the selected categories." return summarize_articles(articles, model_name) def fetch_union_mobilizations(): articles = [] cutoff_time = datetime.now(pytz.UTC) - timedelta(days=1) for source, url in NEWS_SOURCES["Movilizaciones Sindicales"].items(): try: feed = feedparser.parse(url) for entry in feed.entries: published = datetime(*entry.published_parsed[:6], tzinfo=pytz.UTC) if published > cutoff_time: # Filtrar por movilizaciones sindicales if "movilización" in entry.title.lower() or "sindical" in entry.title.lower(): articles.append({ "title": entry.title, "description": BeautifulSoup(entry.description, "html.parser").get_text(), "link": entry.link, "source": source, "published": published }) except Exception: continue return articles def create_mobilization_table(): articles = fetch_union_mobilizations() if not articles: return "No se encontraron movilizaciones sindicales recientes." # Crear una tabla con pandas df = pd.DataFrame(articles) return df.to_string(index=False) # Gradio Interface demo = gr.Blocks() with demo: gr.Markdown("# 📰 AI News Summarizer") with gr.Row(): categories = gr.CheckboxGroup( choices=list(NEWS_SOURCES.keys()), label="Select News Categories" ) model_selector = gr.Radio( choices=list(SUMMARIZER_MODELS.keys()), label="Choose Summarization Model", value="Default (facebook/bart-large-cnn)" ) summarize_button = gr.Button("Get News Summary") summary_output = gr.Textbox(label="News Summary", lines=20) def get_summary(selected_categories, selected_model): model_name = SUMMARIZER_MODELS[selected_model] return generate_summary(selected_categories, model_name) summarize_button.click(get_summary, inputs=[categories, model_selector], outputs=summary_output) if __name__ == "__main__": demo.launch()