Spaces:

loayshabet
/

news-sumarry

Running

App Files Files Community

loayshabet commited on Dec 9, 2024

Commit

9b9dfb3

verified ·

1 Parent(s): 5c1b01d

Update app.py

Browse files

Files changed (1) hide show

app.py +77 -84

app.py CHANGED Viewed

@@ -1,53 +1,49 @@
 import gradio as gr
 from transformers import pipeline
 import feedparser
 from datetime import datetime, timedelta
 import pytz
 from bs4 import BeautifulSoup
 # Global settings
-SUMMARIZER_MODEL = "facebook/bart-large-cnn"
 RSS_FETCH_INTERVAL = timedelta(hours=8)
 ARTICLE_LIMIT = 5
-# News sources
 NEWS_SOURCES = {
-    "Technology": {
-        "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml",
-        "Reuters": "https://www.reutersagency.com/feed/?best-topics=tech&post_type=best"
-    },
-    "Business": {
-        "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml",
-        "Reuters": "https://www.reutersagency.com/feed/?best-topics=business-finance&post_type=best"
-    },
-    "Science": {
-        "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Science.xml"
-    },
-    "World News": {
-        "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
-        "BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
-        "CNN": "http://rss.cnn.com/rss/edition_world.rss",
-        "Reuters": "https://www.reutersagency.com/feed/?taxonomy=best-regions&post_type=best"
-    },
-    "Sports": {
-        "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Sports.xml",
-        "Reuters": "https://www.reutersagency.com/feed/?best-topics=sports&post_type=best"
-    },
-    "Health": {
-        "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Health.xml",
-        "Politico": "http://rss.politico.com/healthcare.xml",
-        "Reuters": "https://www.reutersagency.com/feed/?best-topics=health&post_type=best"
-    },
 }
-# Summarizer initialization
-summarizer = pipeline("summarization", model=SUMMARIZER_MODEL, device=-1)
 def fetch_rss_news(categories):
     articles = []
     cutoff_time = datetime.now(pytz.UTC) - RSS_FETCH_INTERVAL
     for category in categories:
         for source, url in NEWS_SOURCES.get(category, {}).items():
             try:
@@ -63,74 +59,70 @@ def fetch_rss_news(categories):
                             "source": source,
                             "published": published
                         })
-            except Exception as e:
-                print(f"Error fetching from {url}: {e}")
     articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT]
     return articles
-def summarize_with_ai(text):
     try:
         result = summarizer(text, max_length=120, min_length=40, truncation=True)
-        return result[0]['summary_text']
-    except Exception as e:
-        print(f"AI summarization error: {e}")
         return "Summary unavailable."
-def summarize_with_free_module(text):
-    # Simple heuristic summarization: return the first few sentences
-    return '. '.join(text.split('. ')[:3]) + '...'
-def summarize_articles(articles, method="AI Model"):
     summaries = []
-    summarizer_function = summarize_with_ai if method == "AI Model" else summarize_with_free_module
     for article in articles:
-        try:
-            summary = summarizer_function(article["description"])
-            summaries.append(
-                "Title: {0}
-- Category: {1}
-- Source: {2}
-- Published: {3}
-Summary: {4}
-Read more: {5}".format(
-                    article["title"],
-                    article["category"],
-                    article["source"],
-                    article["published"].strftime('%Y-%m-%d %H:%M'),
-                    summary,
-                    article["link"]
-                )
-            )
-        except Exception as e:
-            print(f"Error summarizing article: {e}")
-    return summaries
-# Gradio Interface
-def generate_summary(categories, method):
-    if not categories:
         return "Please select at least one category."
-    articles = fetch_rss_news(categories)
     if not articles:
-        return "No recent articles found."
-    summaries = summarize_articles(articles, method)
-    return "
-".join(summaries)
 demo = gr.Blocks()
 with demo:
-    gr.Markdown("# AI News Summarizer")
     with gr.Row():
-        categories = gr.CheckboxGroup(choices=list(NEWS_SOURCES.keys()), label="Select Categories")
-        method = gr.Dropdown(choices=["AI Model", "Free Module"], label="Summarization Method", value="AI Model")
-    summarize_button = gr.Button("Generate Summary")
-    output = gr.Textbox(label="Summarized News", lines=15)
-    summarize_button.click(generate_summary, inputs=[categories, method], outputs=output)
 if __name__ == "__main__":
     demo.launch()
@@ -140,3 +132,4 @@ if __name__ == "__main__":

 import gradio as gr
 from transformers import pipeline
 import feedparser
 from datetime import datetime, timedelta
 import pytz
 from bs4 import BeautifulSoup
+import hashlib
+import threading
 # Global settings
+SUMMARIZER_MODELS = {
+    "Default (facebook/bart-large-cnn)": "facebook/bart-large-cnn",
+    "Free Model (distilbart-cnn-6-6)": "sshleifer/distilbart-cnn-6-6"
+}
+CACHE_SIZE = 500
 RSS_FETCH_INTERVAL = timedelta(hours=8)
 ARTICLE_LIMIT = 5
 NEWS_SOURCES = {
+    "Technology": {"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml"},
+    "Business": {"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml"},
+    "World News": {"BBC": "http://feeds.bbci.co.uk/news/world/rss.xml"}
 }
+class NewsCache:
+    def __init__(self, size):
+        self.cache = {}
+        self.size = size
+        self.lock = threading.Lock()
+    def get(self, key):
+        with self.lock:
+            return self.cache.get(key)
+    def set(self, key, value):
+        with self.lock:
+            if len(self.cache) >= self.size:
+                oldest_key = next(iter(self.cache))
+                del self.cache[oldest_key]
+            self.cache[key] = value
+cache = NewsCache(CACHE_SIZE)
 def fetch_rss_news(categories):
     articles = []
     cutoff_time = datetime.now(pytz.UTC) - RSS_FETCH_INTERVAL
     for category in categories:
         for source, url in NEWS_SOURCES.get(category, {}).items():
             try:
                             "source": source,
                             "published": published
                         })
+            except Exception:
+                continue
     articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT]
     return articles
+def summarize_text(text, model_name):
+    summarizer = pipeline("summarization", model=model_name, device=-1)
+    content_hash = hashlib.md5(text.encode()).hexdigest()
+    cached_summary = cache.get(content_hash)
+    if cached_summary:
+        return cached_summary
     try:
         result = summarizer(text, max_length=120, min_length=40, truncation=True)
+        summary = result[0]['summary_text']
+        cache.set(content_hash, summary)
+        return summary
+    except Exception:
         return "Summary unavailable."
+def summarize_articles(articles, model_name):
     summaries = []
     for article in articles:
+        content = article["description"]
+        summary = summarize_text(content, model_name)
+        summaries.append(f"""
+        📰 {article['title']}
+        - 📁 Category: {article['category']}
+        - 💡 Source: {article['source']}
+        - 🔗 Read More: {article['link']}
+        📃 Summary: {summary}
+        """)
+    return "\n".join(summaries)
+def generate_summary(selected_categories, model_name):
+    if not selected_categories:
         return "Please select at least one category."
+    articles = fetch_rss_news(selected_categories)
     if not articles:
+        return "No recent news found in the selected categories."
+    return summarize_articles(articles, model_name)
+# Gradio Interface
 demo = gr.Blocks()
 with demo:
+    gr.Markdown("# 📰 AI News Summarizer")
     with gr.Row():
+        categories = gr.CheckboxGroup(
+            choices=list(NEWS_SOURCES.keys()),
+            label="Select News Categories"
+        )
+        model_selector = gr.Radio(
+            choices=list(SUMMARIZER_MODELS.keys()),
+            label="Choose Summarization Model",
+            value="Default (facebook/bart-large-cnn)"
+        )
+    summarize_button = gr.Button("Get News Summary")
+    summary_output = gr.Textbox(label="News Summary", lines=20)
+    def get_summary(selected_categories, selected_model):
+        model_name = SUMMARIZER_MODELS[selected_model]
+        return generate_summary(selected_categories, model_name)
+    summarize_button.click(get_summary, inputs=[categories, model_selector], outputs=summary_output)
 if __name__ == "__main__":
     demo.launch()