Spaces:

loayshabet
/

news-sumarry

Running

App Files Files Community

loayshabet commited on Dec 22, 2024

Commit

a21b264

verified ·

1 Parent(s): ac1a421

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -65

app.py CHANGED Viewed

@@ -6,6 +6,11 @@ import pytz
 from bs4 import BeautifulSoup
 import hashlib
 import threading
 # Global settings
 SUMMARIZER_MODELS = {
@@ -17,31 +22,17 @@ RSS_FETCH_INTERVAL = timedelta(hours=8)
 ARTICLE_LIMIT = 5
 # Restructured news sources with fixed categories
-CATEGORIES = ["Technology", "Business", "World News","Sports","Health"]
 NEWS_SOURCES = {
     "Technology": {
         "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml",
-        "reutersagency": "https://www.reutersagency.com/feed/?best-topics=tech&post_type=best",
     },
     "Business": {
         "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml",
-        "reutersagency": "https://www.reutersagency.com/feed/?best-topics=business-finance&post_type=best",
     },
     "World News": {
         "BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
-        "BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
-        "CNN": "http://rss.cnn.com/rss/edition_world.rss",
-        "reutersagency": "https://www.reutersagency.com/feed/?taxonomy=best-regions&post_type=best",
-    },
-    "Sports": {
-        "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Sports.xml",
-        "reutersagency": "https://www.reutersagency.com/feed/?best-topics=sports&post_type=best"
-    },
-    "Health": {
-        "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Health.xml",
-        "politico": "http://rss.politico.com/healthcare.xml",
-        "reutersagency": "https://www.reutersagency.com/feed/?best-topics=health&post_type=best"
-    },
 }
 class NewsCache:
@@ -69,50 +60,71 @@ def fetch_rss_news(tech_sources, business_sources, world_sources):
     # Create a mapping of selected sources
     selected_sources = {
-        "Technology": tech_sources,
-        "Business": business_sources,
-        "World News": world_sources,
-        "Sports": sports_sources,
-        "Health": Health_sources,
     }
     for category, sources in selected_sources.items():
         if not sources:  # Skip if no sources selected for this category
             continue
         for source in sources:
             if source in NEWS_SOURCES[category]:
                 url = NEWS_SOURCES[category][source]
                 try:
                     feed = feedparser.parse(url)
                     for entry in feed.entries:
-                        published = datetime(*entry.published_parsed[:6], tzinfo=pytz.UTC)
-                        if published > cutoff_time:
-                            articles.append({
-                                "title": entry.title,
-                                "description": BeautifulSoup(entry.description, "html.parser").get_text(),
-                                "link": entry.link,
-                                "category": category,
-                                "source": source,
-                                "published": published
-                            })
-                except Exception:
                     continue
     articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT]
     return articles
 def summarize_text(text, model_name):
-    summarizer = pipeline("summarization", model=model_name, device=-1)
-    content_hash = hashlib.md5(text.encode()).hexdigest()
-    cached_summary = cache.get(content_hash)
-    if cached_summary:
-        return cached_summary
     try:
         result = summarizer(text, max_length=120, min_length=40, truncation=True)
         summary = result[0]['summary_text']
         cache.set(content_hash, summary)
         return summary
-    except Exception:
         return "Summary unavailable."
 def summarize_articles(articles, model_name):
@@ -130,12 +142,30 @@ def summarize_articles(articles, model_name):
     return "\n".join(summaries)
 def generate_summary(tech_sources, business_sources, world_sources, model_name):
-    if not any([tech_sources, business_sources, world_sources]):
         return "Please select at least one news source."
-    articles = fetch_rss_news(tech_sources, business_sources, world_sources)
-    if not articles:
-        return "No recent news found from the selected sources."
-    return summarize_articles(articles, model_name)
 # Gradio Interface
 demo = gr.Blocks()
@@ -161,18 +191,7 @@ with demo:
                 label="World News Sources",
                 value=[]
             )
-            sports_sources= gr.CheckboxGroup(
-                choices=list(NEWS_SOURCES["Sports"].keys()),
-                label="Sports Sources",
-                value=[]
-            )
-            Health_sources= gr.CheckboxGroup(
-                choices=list(NEWS_SOURCES["Health"].keys()),
-                label="Health Sources",
-                value=[]
-            )
         with gr.Column():
             model_selector = gr.Radio(
                 choices=list(SUMMARIZER_MODELS.keys()),
@@ -184,8 +203,12 @@ with demo:
     summary_output = gr.Textbox(label="News Summary", lines=20)
     def get_summary(tech_sources, business_sources, world_sources, selected_model):
-        model_name = SUMMARIZER_MODELS[selected_model]
-        return generate_summary(tech_sources, business_sources, world_sources, model_name)
     # Connect the components to the summary function
     summarize_button.click(
@@ -199,10 +222,3 @@ if __name__ == "__main__":

 from bs4 import BeautifulSoup
 import hashlib
 import threading
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 # Global settings
 SUMMARIZER_MODELS = {
 ARTICLE_LIMIT = 5
 # Restructured news sources with fixed categories
+CATEGORIES = ["Technology", "Business", "World News"]
 NEWS_SOURCES = {
     "Technology": {
         "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml",
     },
     "Business": {
         "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml",
     },
     "World News": {
         "BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
+    }
 }
 class NewsCache:
     # Create a mapping of selected sources
     selected_sources = {
+        "Technology": tech_sources if tech_sources else [],
+        "Business": business_sources if business_sources else [],
+        "World News": world_sources if world_sources else []
     }
+    logger.info(f"Selected sources: {selected_sources}")
     for category, sources in selected_sources.items():
         if not sources:  # Skip if no sources selected for this category
             continue
+        logger.info(f"Processing category: {category} with sources: {sources}")
         for source in sources:
             if source in NEWS_SOURCES[category]:
                 url = NEWS_SOURCES[category][source]
                 try:
+                    logger.info(f"Fetching from URL: {url}")
                     feed = feedparser.parse(url)
+                    if hasattr(feed, 'status') and feed.status != 200:
+                        logger.warning(f"Failed to fetch feed from {url}. Status: {feed.status}")
+                        continue
                     for entry in feed.entries:
+                        try:
+                            published = datetime(*entry.published_parsed[:6], tzinfo=pytz.UTC)
+                            if published > cutoff_time:
+                                articles.append({
+                                    "title": entry.title,
+                                    "description": BeautifulSoup(entry.description, "html.parser").get_text(),
+                                    "link": entry.link,
+                                    "category": category,
+                                    "source": source,
+                                    "published": published
+                                })
+                        except (AttributeError, TypeError) as e:
+                            logger.error(f"Error processing entry: {str(e)}")
+                            continue
+                except Exception as e:
+                    logger.error(f"Error fetching feed from {url}: {str(e)}")
                     continue
+    logger.info(f"Total articles fetched: {len(articles)}")
     articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT]
     return articles
 def summarize_text(text, model_name):
     try:
+        summarizer = pipeline("summarization", model=model_name, device=-1)
+        content_hash = hashlib.md5(text.encode()).hexdigest()
+        cached_summary = cache.get(content_hash)
+        if cached_summary:
+            logger.info("Using cached summary")
+            return cached_summary
+        logger.info(f"Generating new summary using model: {model_name}")
         result = summarizer(text, max_length=120, min_length=40, truncation=True)
         summary = result[0]['summary_text']
         cache.set(content_hash, summary)
         return summary
+    except Exception as e:
+        logger.error(f"Error in summarization: {str(e)}")
         return "Summary unavailable."
 def summarize_articles(articles, model_name):
     return "\n".join(summaries)
 def generate_summary(tech_sources, business_sources, world_sources, model_name):
+    logger.info(f"""
+    Generating summary with:
+    - Tech sources: {tech_sources}
+    - Business sources: {business_sources}
+    - World sources: {world_sources}
+    - Model: {model_name}
+    """)
+    # Check if any sources are selected
+    if not any([
+        tech_sources is not None and len(tech_sources) > 0,
+        business_sources is not None and len(business_sources) > 0,
+        world_sources is not None and len(world_sources) > 0
+    ]):
         return "Please select at least one news source."
+    try:
+        articles = fetch_rss_news(tech_sources, business_sources, world_sources)
+        if not articles:
+            return "No recent news found from the selected sources."
+        return summarize_articles(articles, model_name)
+    except Exception as e:
+        logger.error(f"Error in generate_summary: {str(e)}")
+        return f"An error occurred while generating the summary. Please try again."
 # Gradio Interface
 demo = gr.Blocks()
                 label="World News Sources",
                 value=[]
             )
         with gr.Column():
             model_selector = gr.Radio(
                 choices=list(SUMMARIZER_MODELS.keys()),
     summary_output = gr.Textbox(label="News Summary", lines=20)
     def get_summary(tech_sources, business_sources, world_sources, selected_model):
+        try:
+            model_name = SUMMARIZER_MODELS[selected_model]
+            return generate_summary(tech_sources, business_sources, world_sources, model_name)
+        except Exception as e:
+            logger.error(f"Error in get_summary: {str(e)}")
+            return "An error occurred while processing your request. Please try again."
     # Connect the components to the summary function
     summarize_button.click(