loayshabet commited on
Commit
9b9dfb3
Β·
verified Β·
1 Parent(s): 5c1b01d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -84
app.py CHANGED
@@ -1,53 +1,49 @@
1
-
2
  import gradio as gr
3
  from transformers import pipeline
4
  import feedparser
5
  from datetime import datetime, timedelta
6
  import pytz
7
  from bs4 import BeautifulSoup
 
 
8
 
9
  # Global settings
10
- SUMMARIZER_MODEL = "facebook/bart-large-cnn"
 
 
 
 
11
  RSS_FETCH_INTERVAL = timedelta(hours=8)
12
  ARTICLE_LIMIT = 5
13
 
14
- # News sources
15
  NEWS_SOURCES = {
16
- "Technology": {
17
- "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml",
18
- "Reuters": "https://www.reutersagency.com/feed/?best-topics=tech&post_type=best"
19
- },
20
- "Business": {
21
- "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml",
22
- "Reuters": "https://www.reutersagency.com/feed/?best-topics=business-finance&post_type=best"
23
- },
24
- "Science": {
25
- "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Science.xml"
26
- },
27
- "World News": {
28
- "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
29
- "BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
30
- "CNN": "http://rss.cnn.com/rss/edition_world.rss",
31
- "Reuters": "https://www.reutersagency.com/feed/?taxonomy=best-regions&post_type=best"
32
- },
33
- "Sports": {
34
- "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Sports.xml",
35
- "Reuters": "https://www.reutersagency.com/feed/?best-topics=sports&post_type=best"
36
- },
37
- "Health": {
38
- "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Health.xml",
39
- "Politico": "http://rss.politico.com/healthcare.xml",
40
- "Reuters": "https://www.reutersagency.com/feed/?best-topics=health&post_type=best"
41
- },
42
  }
43
 
44
- # Summarizer initialization
45
- summarizer = pipeline("summarization", model=SUMMARIZER_MODEL, device=-1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  def fetch_rss_news(categories):
48
  articles = []
49
  cutoff_time = datetime.now(pytz.UTC) - RSS_FETCH_INTERVAL
50
-
51
  for category in categories:
52
  for source, url in NEWS_SOURCES.get(category, {}).items():
53
  try:
@@ -63,74 +59,70 @@ def fetch_rss_news(categories):
63
  "source": source,
64
  "published": published
65
  })
66
- except Exception as e:
67
- print(f"Error fetching from {url}: {e}")
68
-
69
  articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT]
70
  return articles
71
 
72
- def summarize_with_ai(text):
 
 
 
 
 
73
  try:
74
  result = summarizer(text, max_length=120, min_length=40, truncation=True)
75
- return result[0]['summary_text']
76
- except Exception as e:
77
- print(f"AI summarization error: {e}")
 
78
  return "Summary unavailable."
79
 
80
- def summarize_with_free_module(text):
81
- # Simple heuristic summarization: return the first few sentences
82
- return '. '.join(text.split('. ')[:3]) + '...'
83
-
84
- def summarize_articles(articles, method="AI Model"):
85
  summaries = []
86
- summarizer_function = summarize_with_ai if method == "AI Model" else summarize_with_free_module
87
-
88
  for article in articles:
89
- try:
90
- summary = summarizer_function(article["description"])
91
- summaries.append(
92
- "Title: {0}
93
- - Category: {1}
94
- - Source: {2}
95
- - Published: {3}
96
- Summary: {4}
97
- Read more: {5}".format(
98
- article["title"],
99
- article["category"],
100
- article["source"],
101
- article["published"].strftime('%Y-%m-%d %H:%M'),
102
- summary,
103
- article["link"]
104
- )
105
- )
106
- except Exception as e:
107
- print(f"Error summarizing article: {e}")
108
-
109
- return summaries
110
-
111
- # Gradio Interface
112
- def generate_summary(categories, method):
113
- if not categories:
114
  return "Please select at least one category."
115
- articles = fetch_rss_news(categories)
116
  if not articles:
117
- return "No recent articles found."
118
- summaries = summarize_articles(articles, method)
119
- return "
120
-
121
- ".join(summaries)
122
 
 
123
  demo = gr.Blocks()
124
 
125
  with demo:
126
- gr.Markdown("# AI News Summarizer")
127
  with gr.Row():
128
- categories = gr.CheckboxGroup(choices=list(NEWS_SOURCES.keys()), label="Select Categories")
129
- method = gr.Dropdown(choices=["AI Model", "Free Module"], label="Summarization Method", value="AI Model")
130
- summarize_button = gr.Button("Generate Summary")
131
- output = gr.Textbox(label="Summarized News", lines=15)
132
-
133
- summarize_button.click(generate_summary, inputs=[categories, method], outputs=output)
 
 
 
 
 
 
 
 
 
 
 
134
 
135
  if __name__ == "__main__":
136
  demo.launch()
@@ -140,3 +132,4 @@ if __name__ == "__main__":
140
 
141
 
142
 
 
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import feedparser
4
  from datetime import datetime, timedelta
5
  import pytz
6
  from bs4 import BeautifulSoup
7
+ import hashlib
8
+ import threading
9
 
10
  # Global settings
11
+ SUMMARIZER_MODELS = {
12
+ "Default (facebook/bart-large-cnn)": "facebook/bart-large-cnn",
13
+ "Free Model (distilbart-cnn-6-6)": "sshleifer/distilbart-cnn-6-6"
14
+ }
15
+ CACHE_SIZE = 500
16
  RSS_FETCH_INTERVAL = timedelta(hours=8)
17
  ARTICLE_LIMIT = 5
18
 
 
19
  NEWS_SOURCES = {
20
+ "Technology": {"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml"},
21
+ "Business": {"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml"},
22
+ "World News": {"BBC": "http://feeds.bbci.co.uk/news/world/rss.xml"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  }
24
 
25
+ class NewsCache:
26
+ def __init__(self, size):
27
+ self.cache = {}
28
+ self.size = size
29
+ self.lock = threading.Lock()
30
+
31
+ def get(self, key):
32
+ with self.lock:
33
+ return self.cache.get(key)
34
+
35
+ def set(self, key, value):
36
+ with self.lock:
37
+ if len(self.cache) >= self.size:
38
+ oldest_key = next(iter(self.cache))
39
+ del self.cache[oldest_key]
40
+ self.cache[key] = value
41
+
42
+ cache = NewsCache(CACHE_SIZE)
43
 
44
  def fetch_rss_news(categories):
45
  articles = []
46
  cutoff_time = datetime.now(pytz.UTC) - RSS_FETCH_INTERVAL
 
47
  for category in categories:
48
  for source, url in NEWS_SOURCES.get(category, {}).items():
49
  try:
 
59
  "source": source,
60
  "published": published
61
  })
62
+ except Exception:
63
+ continue
 
64
  articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT]
65
  return articles
66
 
67
+ def summarize_text(text, model_name):
68
+ summarizer = pipeline("summarization", model=model_name, device=-1)
69
+ content_hash = hashlib.md5(text.encode()).hexdigest()
70
+ cached_summary = cache.get(content_hash)
71
+ if cached_summary:
72
+ return cached_summary
73
  try:
74
  result = summarizer(text, max_length=120, min_length=40, truncation=True)
75
+ summary = result[0]['summary_text']
76
+ cache.set(content_hash, summary)
77
+ return summary
78
+ except Exception:
79
  return "Summary unavailable."
80
 
81
+ def summarize_articles(articles, model_name):
 
 
 
 
82
  summaries = []
 
 
83
  for article in articles:
84
+ content = article["description"]
85
+ summary = summarize_text(content, model_name)
86
+ summaries.append(f"""
87
+ πŸ“° {article['title']}
88
+ - πŸ“ Category: {article['category']}
89
+ - πŸ’‘ Source: {article['source']}
90
+ - πŸ”— Read More: {article['link']}
91
+ πŸ“ƒ Summary: {summary}
92
+ """)
93
+ return "\n".join(summaries)
94
+
95
+ def generate_summary(selected_categories, model_name):
96
+ if not selected_categories:
 
 
 
 
 
 
 
 
 
 
 
 
97
  return "Please select at least one category."
98
+ articles = fetch_rss_news(selected_categories)
99
  if not articles:
100
+ return "No recent news found in the selected categories."
101
+ return summarize_articles(articles, model_name)
 
 
 
102
 
103
+ # Gradio Interface
104
  demo = gr.Blocks()
105
 
106
  with demo:
107
+ gr.Markdown("# πŸ“° AI News Summarizer")
108
  with gr.Row():
109
+ categories = gr.CheckboxGroup(
110
+ choices=list(NEWS_SOURCES.keys()),
111
+ label="Select News Categories"
112
+ )
113
+ model_selector = gr.Radio(
114
+ choices=list(SUMMARIZER_MODELS.keys()),
115
+ label="Choose Summarization Model",
116
+ value="Default (facebook/bart-large-cnn)"
117
+ )
118
+ summarize_button = gr.Button("Get News Summary")
119
+ summary_output = gr.Textbox(label="News Summary", lines=20)
120
+
121
+ def get_summary(selected_categories, selected_model):
122
+ model_name = SUMMARIZER_MODELS[selected_model]
123
+ return generate_summary(selected_categories, model_name)
124
+
125
+ summarize_button.click(get_summary, inputs=[categories, model_selector], outputs=summary_output)
126
 
127
  if __name__ == "__main__":
128
  demo.launch()
 
132
 
133
 
134
 
135
+