Spaces:

loayshabet
/

news-sumarry

Running

App Files Files Community

loayshabet commited on Nov 26, 2024

Commit

4e3626e

verified ·

1 Parent(s): 29adf37

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -177

app.py CHANGED Viewed

@@ -5,223 +5,161 @@ from datetime import datetime, timedelta
 import json
 import os
 import logging
-import hashlib
 import pytz
 from bs4 import BeautifulSoup
-# إعداد السجلات
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# مصادر الأخبار ورموز RSS
 NEWS_SOURCES = {
-    "Technology": {
-        "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml",
-        "Reuters": "https://www.reutersagency.com/feed/?best-topics=tech&post_type=best"
-    },
-    "Business": {
-        "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml",
-        "Reuters": "https://www.reutersagency.com/feed/?best-topics=business-finance&post_type=best"
-    },
-    "Science": {
-        "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Science.xml"
-    },
-    "World News": {
-        "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
-        "BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
-        "CNN": "http://rss.cnn.com/rss/edition_world.rss",
-        "Reuters": "https://www.reutersagency.com/feed/?taxonomy=best-regions&post_type=best"
-    },
-    "Sports": {
-        "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Sports.xml",
-        "Reuters": "https://www.reutersagency.com/feed/?best-topics=sports&post_type=best"
-    },
-    "Health": {
-        "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Health.xml",
-        "Politico": "http://rss.politico.com/healthcare.xml",
-        "Reuters": "https://www.reutersagency.com/feed/?best-topics=health&post_type=best"
-    },
 }
-# نموذج تلخيص الأخبار
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1)  # استخدام المعالج المركزي
 class NewsCache:
-    """ Class to manage cached summaries for articles. """
-    def __init__(self):
-        self.summaries = {}
-        self.max_cache_size = 1000
-    def store_summary(self, content_hash, summary):
-        if len(self.summaries) >= self.max_cache_size:
-            self.summaries.pop(next(iter(self.summaries)))
-        self.summaries[content_hash] = summary
-    def get_summary(self, content_hash):
-        return self.summaries.get(content_hash)
-news_cache = NewsCache()
-def get_content_hash(content):
-    """Generate a hash for the content."""
-    return hashlib.md5(content.encode()).hexdigest()
-def parse_date(date_str):
-    """Parse date string to datetime object."""
-    try:
-        return datetime.fromisoformat(date_str).astimezone(pytz.UTC)
-    except Exception as e:
-        logging.warning(f"Date parsing error: {e}")
-        return None
-def fetch_news_from_rss(categories):
-    """Fetch news from RSS feeds based on user interests."""
     articles = []
-    cutoff_time = datetime.now(pytz.UTC) - timedelta(hours=24)  # تأكد من استخدام 24 ساعة هنا.
     for category in categories:
-        if category in NEWS_SOURCES:
-            for source, feed_url in NEWS_SOURCES[category].items():
-                try:
-                    feed = feedparser.parse(feed_url)
-                    logging.info(f"Fetched {len(feed.entries)} entries from {source} for category {category}")  # سجل عدد المقاطع
-                    for entry in feed.entries:
-                        published = parse_date(entry.get('published', entry.get('updated')))  # استخدام "updated" إذا لم يكن موجوداً "published"
-                        if published and published > cutoff_time:
-                            articles.append({
-                                'title': entry.get('title', ''),
-                                'description': BeautifulSoup(entry.get('description', ''), 'html.parser').get_text(),
-                                'link': entry.get('link', ''),
-                                'published': entry.get('published', entry.get('updated', '')),
-                                'category': category,
-                                'source': source
-                            })
-                except Exception as e:
-                    logging.error(f"Error fetching from {feed_url}: {e}")
-                    continue
-    logging.info(f"Total articles fetched in last 24 hours: {len(articles)}")  # سجل العدد الإجمالي للمقالات
-    if not articles:  # إذا لم يتم العثور على أي مقالات
-        logging.warning("No articles found within the specified timeframe.")
     return articles
-def generate_summary(text):
-    """Generate AI-powered summary for the article."""
-    content_hash = get_content_hash(text)
-    cached_summary = news_cache.get_summary(content_hash)
     if cached_summary:
         return cached_summary
     try:
-        result = summarizer(
-            text,
-            max_length=200,
-            min_length=50,
-            do_sample=False,
-            truncation=True
-        )
-        if result and len(result) > 0:
-            summary = result[0]['summary_text'].strip()
-            if summary:
-                formatted_summary = "\n• " + "\n• ".join([s.strip() for s in summary.split('.') if s.strip()])
-                news_cache.store_summary(content_hash, formatted_summary)
-                return formatted_summary
     except Exception as e:
-        logging.error(f"Summarization error: {e}")
-    return "Unable to generate summary."
-def get_personalized_summary(name):
-    """Generate personalized news summary."""
-    logging.info(f"Starting summary generation for user: {name}")
-    if not name:
-        return "Please enter your name!"
-    preferences_file = f"user_preferences/preferences_{name}.json"
     try:
-        with open(preferences_file, "r") as f:
             preferences = json.load(f)
     except FileNotFoundError:
-        return "Please set your preferences first!"
     except Exception as e:
-        return f"Error loading preferences: {e}"
-    articles = fetch_news_from_rss(preferences["interests"])
     if not articles:
-        return "No recent news articles found from the last 12 hours."
     summaries = []
     for article in articles:
-        content = article['description']
-        if not content:
-            continue
-        summary = generate_summary(content)
-        if not summary:
-            continue
-        formatted_summary = f"""📰 {article['title']}
-📁 Category: {article['category']}
-⏰ Published: {article['published']}
-{summary}
-🔗 [Read more]({article['link']})
----"""
-        summaries.append(formatted_summary)
-    if not summaries:
-        return "Unable to generate summaries for recent news."
-    return "\n".join(summaries)
-# واجهة Gradio
-demo = gr.Blocks(title="AI News Summarizer")
 with demo:
-    gr.Markdown("# 📰 AI News Summarizer")
     with gr.Tab("Set Preferences"):
         name_input = gr.Textbox(label="Your Name")
-        interests_checkboxes = gr.CheckboxGroup(choices=list(NEWS_SOURCES.keys()), label="News Interests (Select multiple)")
         save_button = gr.Button("Save Preferences")
-        preferences_output = gr.Textbox(label="Status")
-        def save_preferences(name, interests):
-            if not name or not interests:
-                return "Please fill in all required fields!"
-            preferences = {
-                "name": name,
-                "interests": interests,
-                "last_updated": datetime.now().isoformat()
-            }
             try:
-                os.makedirs('user_preferences', exist_ok=True)
                 with open(f"user_preferences/preferences_{name}.json", "w") as f:
                     json.dump(preferences, f)
-                return f"Preferences saved for {name}!"
             except Exception as e:
-                logging.error(f"Error saving preferences: {e}")
-                return f"Error saving preferences: {e}"
-        save_button.click(save_preferences, inputs=[name_input, interests_checkboxes], outputs=[preferences_output])
-    with gr.Tab("Get News Summary"):
-        name_check = gr.Textbox(label="Enter your name to get summary")
-        get_summary_button = gr.Button("Get Summary")
-        summary_output = gr.Markdown(value="Waiting for summary...")
-        get_summary_button.click(get_personalized_summary, inputs=[name_check], outputs=[summary_output])
 if __name__ == "__main__":
     demo.launch()
@@ -241,3 +179,4 @@ if __name__ == "__main__":

 import json
 import os
 import logging
 import pytz
 from bs4 import BeautifulSoup
+import hashlib
+import threading
+# Logging setup
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Global settings
+SUMMARIZER_MODEL = "facebook/bart-large-cnn"  # You can replace this with other summarization models
+CACHE_SIZE = 500  # Maximum number of cached summaries
+RSS_FETCH_INTERVAL = timedelta(hours=8)  # Fetch recent news within the last 8 hours
+TIMEOUT_LIMIT = 30  # Maximum time in seconds to process summaries
+# News sources
 NEWS_SOURCES = {
+    "Technology": {"NYTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml"},
+    "Business": {"Reuters": "https://www.reutersagency.com/feed/?best-topics=business-finance&post_type=best"},
+    "World": {"BBC": "http://feeds.bbci.co.uk/news/world/rss.xml"},
 }
+# Initialize cache
 class NewsCache:
+    def __init__(self, size):
+        self.cache = {}
+        self.size = size
+        self.lock = threading.Lock()
+    def get(self, key):
+        with self.lock:
+            return self.cache.get(key)
+    def set(self, key, value):
+        with self.lock:
+            if len(self.cache) >= self.size:
+                # Remove oldest cached item
+                oldest_key = next(iter(self.cache))
+                del self.cache[oldest_key]
+            self.cache[key] = value
+cache = NewsCache(CACHE_SIZE)
+# Initialize summarizer
+summarizer = pipeline("summarization", model=SUMMARIZER_MODEL, device=-1)
+# Utility functions
+def fetch_rss_news(categories):
+    """Fetch news articles from RSS feeds based on selected categories."""
     articles = []
+    cutoff_time = datetime.now(pytz.UTC) - RSS_FETCH_INTERVAL
     for category in categories:
+        for source, url in NEWS_SOURCES.get(category, {}).items():
+            try:
+                feed = feedparser.parse(url)
+                for entry in feed.entries:
+                    published = datetime(*entry.published_parsed[:6], tzinfo=pytz.UTC)
+                    if published > cutoff_time:
+                        articles.append({
+                            "title": entry.title,
+                            "description": BeautifulSoup(entry.description, "html.parser").get_text(),
+                            "link": entry.link,
+                            "category": category,
+                            "source": source,
+                            "published": published
+                        })
+            except Exception as e:
+                logging.error(f"Failed to fetch from {url}: {e}")
     return articles
+def summarize_text(text):
+    """Summarize the text using the AI model."""
+    content_hash = hashlib.md5(text.encode()).hexdigest()
+    cached_summary = cache.get(content_hash)
     if cached_summary:
         return cached_summary
     try:
+        result = summarizer(text, max_length=120, min_length=40, truncation=True)
+        summary = result[0]['summary_text']
+        cache.set(content_hash, summary)
+        return summary
     except Exception as e:
+        logging.error(f"Summarization failed: {e}")
+        return "Summary unavailable."
+def generate_user_summary(name):
+    """Generate a personalized news summary based on user preferences."""
+    # Load preferences
     try:
+        with open(f"user_preferences/preferences_{name}.json") as f:
             preferences = json.load(f)
     except FileNotFoundError:
+        return "Preferences not found. Please set your preferences first."
     except Exception as e:
+        logging.error(f"Error loading preferences: {e}")
+        return "Failed to load preferences."
+    categories = preferences.get("interests", [])
+    if not categories:
+        return "No categories selected. Please update your preferences."
+    # Fetch news
+    articles = fetch_rss_news(categories)
     if not articles:
+        return "No recent news found in your selected categories."
+    # Summarize articles
     summaries = []
     for article in articles:
+        summary = summarize_text(article["description"])
+        summaries.append(f"""**{article['title']}**
+**Category:** {article['category']} | **Source:** {article['source']} | **Published:** {article['published'].strftime('%Y-%m-%d %H:%M')}
+{summary}
+[Read more]({article['link']})
+---""")
+    return "\n\n".join(summaries) if summaries else "No summaries available."
+# Gradio interface
+demo = gr.Blocks()
 with demo:
+    gr.Markdown("# 📰 Personalized AI News Summarizer")
     with gr.Tab("Set Preferences"):
         name_input = gr.Textbox(label="Your Name")
+        interests = gr.CheckboxGroup(
+            choices=list(NEWS_SOURCES.keys()),
+            label="Select Your Interests"
+        )
         save_button = gr.Button("Save Preferences")
+        save_status = gr.Textbox(label="Status")
+        def save_preferences(name, selected_interests):
+            if not name or not selected_interests:
+                return "Name and interests are required!"
+            preferences = {"name": name, "interests": selected_interests}
             try:
+                os.makedirs("user_preferences", exist_ok=True)
                 with open(f"user_preferences/preferences_{name}.json", "w") as f:
                     json.dump(preferences, f)
+                return "Preferences saved successfully!"
             except Exception as e:
+                logging.error(f"Failed to save preferences: {e}")
+                return "Failed to save preferences."
+        save_button.click(save_preferences, inputs=[name_input, interests], outputs=save_status)
+    with gr.Tab("Get News Summary"):
+        name_input_summary = gr.Textbox(label="Your Name")
+        fetch_button = gr.Button("Get Summary")
+        summary_output = gr.Textbox(label="News Summary", lines=20)
+        fetch_button.click(generate_user_summary, inputs=[name_input_summary], outputs=summary_output)
 if __name__ == "__main__":
     demo.launch()