loayshabet commited on
Commit
a21b264
·
verified ·
1 Parent(s): ac1a421

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -65
app.py CHANGED
@@ -6,6 +6,11 @@ import pytz
6
  from bs4 import BeautifulSoup
7
  import hashlib
8
  import threading
 
 
 
 
 
9
 
10
  # Global settings
11
  SUMMARIZER_MODELS = {
@@ -17,31 +22,17 @@ RSS_FETCH_INTERVAL = timedelta(hours=8)
17
  ARTICLE_LIMIT = 5
18
 
19
  # Restructured news sources with fixed categories
20
- CATEGORIES = ["Technology", "Business", "World News","Sports","Health"]
21
  NEWS_SOURCES = {
22
  "Technology": {
23
  "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml",
24
- "reutersagency": "https://www.reutersagency.com/feed/?best-topics=tech&post_type=best",
25
  },
26
  "Business": {
27
  "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml",
28
- "reutersagency": "https://www.reutersagency.com/feed/?best-topics=business-finance&post_type=best",
29
  },
30
  "World News": {
31
  "BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
32
- "BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
33
- "CNN": "http://rss.cnn.com/rss/edition_world.rss",
34
- "reutersagency": "https://www.reutersagency.com/feed/?taxonomy=best-regions&post_type=best",
35
- },
36
- "Sports": {
37
- "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Sports.xml",
38
- "reutersagency": "https://www.reutersagency.com/feed/?best-topics=sports&post_type=best"
39
- },
40
- "Health": {
41
- "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Health.xml",
42
- "politico": "http://rss.politico.com/healthcare.xml",
43
- "reutersagency": "https://www.reutersagency.com/feed/?best-topics=health&post_type=best"
44
- },
45
  }
46
 
47
  class NewsCache:
@@ -69,50 +60,71 @@ def fetch_rss_news(tech_sources, business_sources, world_sources):
69
 
70
  # Create a mapping of selected sources
71
  selected_sources = {
72
- "Technology": tech_sources,
73
- "Business": business_sources,
74
- "World News": world_sources,
75
- "Sports": sports_sources,
76
- "Health": Health_sources,
77
  }
78
 
 
 
79
  for category, sources in selected_sources.items():
80
  if not sources: # Skip if no sources selected for this category
81
  continue
 
 
 
82
  for source in sources:
83
  if source in NEWS_SOURCES[category]:
84
  url = NEWS_SOURCES[category][source]
85
  try:
 
86
  feed = feedparser.parse(url)
 
 
 
 
 
87
  for entry in feed.entries:
88
- published = datetime(*entry.published_parsed[:6], tzinfo=pytz.UTC)
89
- if published > cutoff_time:
90
- articles.append({
91
- "title": entry.title,
92
- "description": BeautifulSoup(entry.description, "html.parser").get_text(),
93
- "link": entry.link,
94
- "category": category,
95
- "source": source,
96
- "published": published
97
- })
98
- except Exception:
 
 
 
 
 
 
99
  continue
100
-
 
101
  articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT]
102
  return articles
103
 
104
  def summarize_text(text, model_name):
105
- summarizer = pipeline("summarization", model=model_name, device=-1)
106
- content_hash = hashlib.md5(text.encode()).hexdigest()
107
- cached_summary = cache.get(content_hash)
108
- if cached_summary:
109
- return cached_summary
110
  try:
 
 
 
 
 
 
 
 
 
111
  result = summarizer(text, max_length=120, min_length=40, truncation=True)
112
  summary = result[0]['summary_text']
113
  cache.set(content_hash, summary)
114
  return summary
115
- except Exception:
 
116
  return "Summary unavailable."
117
 
118
  def summarize_articles(articles, model_name):
@@ -130,12 +142,30 @@ def summarize_articles(articles, model_name):
130
  return "\n".join(summaries)
131
 
132
  def generate_summary(tech_sources, business_sources, world_sources, model_name):
133
- if not any([tech_sources, business_sources, world_sources]):
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  return "Please select at least one news source."
135
- articles = fetch_rss_news(tech_sources, business_sources, world_sources)
136
- if not articles:
137
- return "No recent news found from the selected sources."
138
- return summarize_articles(articles, model_name)
 
 
 
 
 
139
 
140
  # Gradio Interface
141
  demo = gr.Blocks()
@@ -161,18 +191,7 @@ with demo:
161
  label="World News Sources",
162
  value=[]
163
  )
164
- sports_sources= gr.CheckboxGroup(
165
- choices=list(NEWS_SOURCES["Sports"].keys()),
166
- label="Sports Sources",
167
- value=[]
168
- )
169
- Health_sources= gr.CheckboxGroup(
170
- choices=list(NEWS_SOURCES["Health"].keys()),
171
- label="Health Sources",
172
- value=[]
173
- )
174
-
175
-
176
  with gr.Column():
177
  model_selector = gr.Radio(
178
  choices=list(SUMMARIZER_MODELS.keys()),
@@ -184,8 +203,12 @@ with demo:
184
  summary_output = gr.Textbox(label="News Summary", lines=20)
185
 
186
  def get_summary(tech_sources, business_sources, world_sources, selected_model):
187
- model_name = SUMMARIZER_MODELS[selected_model]
188
- return generate_summary(tech_sources, business_sources, world_sources, model_name)
 
 
 
 
189
 
190
  # Connect the components to the summary function
191
  summarize_button.click(
@@ -199,10 +222,3 @@ if __name__ == "__main__":
199
 
200
 
201
 
202
-
203
-
204
-
205
-
206
-
207
-
208
-
 
6
  from bs4 import BeautifulSoup
7
  import hashlib
8
  import threading
9
+ import logging
10
+
11
+ # Set up logging
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
 
15
  # Global settings
16
  SUMMARIZER_MODELS = {
 
22
  ARTICLE_LIMIT = 5
23
 
24
  # Restructured news sources with fixed categories
25
+ CATEGORIES = ["Technology", "Business", "World News"]
26
  NEWS_SOURCES = {
27
  "Technology": {
28
  "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml",
 
29
  },
30
  "Business": {
31
  "TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml",
 
32
  },
33
  "World News": {
34
  "BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
35
+ }
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
37
 
38
  class NewsCache:
 
60
 
61
  # Create a mapping of selected sources
62
  selected_sources = {
63
+ "Technology": tech_sources if tech_sources else [],
64
+ "Business": business_sources if business_sources else [],
65
+ "World News": world_sources if world_sources else []
 
 
66
  }
67
 
68
+ logger.info(f"Selected sources: {selected_sources}")
69
+
70
  for category, sources in selected_sources.items():
71
  if not sources: # Skip if no sources selected for this category
72
  continue
73
+
74
+ logger.info(f"Processing category: {category} with sources: {sources}")
75
+
76
  for source in sources:
77
  if source in NEWS_SOURCES[category]:
78
  url = NEWS_SOURCES[category][source]
79
  try:
80
+ logger.info(f"Fetching from URL: {url}")
81
  feed = feedparser.parse(url)
82
+
83
+ if hasattr(feed, 'status') and feed.status != 200:
84
+ logger.warning(f"Failed to fetch feed from {url}. Status: {feed.status}")
85
+ continue
86
+
87
  for entry in feed.entries:
88
+ try:
89
+ published = datetime(*entry.published_parsed[:6], tzinfo=pytz.UTC)
90
+ if published > cutoff_time:
91
+ articles.append({
92
+ "title": entry.title,
93
+ "description": BeautifulSoup(entry.description, "html.parser").get_text(),
94
+ "link": entry.link,
95
+ "category": category,
96
+ "source": source,
97
+ "published": published
98
+ })
99
+ except (AttributeError, TypeError) as e:
100
+ logger.error(f"Error processing entry: {str(e)}")
101
+ continue
102
+
103
+ except Exception as e:
104
+ logger.error(f"Error fetching feed from {url}: {str(e)}")
105
  continue
106
+
107
+ logger.info(f"Total articles fetched: {len(articles)}")
108
  articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT]
109
  return articles
110
 
111
  def summarize_text(text, model_name):
 
 
 
 
 
112
  try:
113
+ summarizer = pipeline("summarization", model=model_name, device=-1)
114
+ content_hash = hashlib.md5(text.encode()).hexdigest()
115
+ cached_summary = cache.get(content_hash)
116
+
117
+ if cached_summary:
118
+ logger.info("Using cached summary")
119
+ return cached_summary
120
+
121
+ logger.info(f"Generating new summary using model: {model_name}")
122
  result = summarizer(text, max_length=120, min_length=40, truncation=True)
123
  summary = result[0]['summary_text']
124
  cache.set(content_hash, summary)
125
  return summary
126
+ except Exception as e:
127
+ logger.error(f"Error in summarization: {str(e)}")
128
  return "Summary unavailable."
129
 
130
  def summarize_articles(articles, model_name):
 
142
  return "\n".join(summaries)
143
 
144
  def generate_summary(tech_sources, business_sources, world_sources, model_name):
145
+ logger.info(f"""
146
+ Generating summary with:
147
+ - Tech sources: {tech_sources}
148
+ - Business sources: {business_sources}
149
+ - World sources: {world_sources}
150
+ - Model: {model_name}
151
+ """)
152
+
153
+ # Check if any sources are selected
154
+ if not any([
155
+ tech_sources is not None and len(tech_sources) > 0,
156
+ business_sources is not None and len(business_sources) > 0,
157
+ world_sources is not None and len(world_sources) > 0
158
+ ]):
159
  return "Please select at least one news source."
160
+
161
+ try:
162
+ articles = fetch_rss_news(tech_sources, business_sources, world_sources)
163
+ if not articles:
164
+ return "No recent news found from the selected sources."
165
+ return summarize_articles(articles, model_name)
166
+ except Exception as e:
167
+ logger.error(f"Error in generate_summary: {str(e)}")
168
+ return f"An error occurred while generating the summary. Please try again."
169
 
170
  # Gradio Interface
171
  demo = gr.Blocks()
 
191
  label="World News Sources",
192
  value=[]
193
  )
194
+
 
 
 
 
 
 
 
 
 
 
 
195
  with gr.Column():
196
  model_selector = gr.Radio(
197
  choices=list(SUMMARIZER_MODELS.keys()),
 
203
  summary_output = gr.Textbox(label="News Summary", lines=20)
204
 
205
  def get_summary(tech_sources, business_sources, world_sources, selected_model):
206
+ try:
207
+ model_name = SUMMARIZER_MODELS[selected_model]
208
+ return generate_summary(tech_sources, business_sources, world_sources, model_name)
209
+ except Exception as e:
210
+ logger.error(f"Error in get_summary: {str(e)}")
211
+ return "An error occurred while processing your request. Please try again."
212
 
213
  # Connect the components to the summary function
214
  summarize_button.click(
 
222
 
223
 
224