loayshabet commited on
Commit
120af41
·
verified ·
1 Parent(s): c3f061c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -103
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from transformers import pipeline, MarianMTModel, AutoTokenizer
3
  import feedparser
4
  from datetime import datetime, timedelta
5
  import json
@@ -44,40 +44,21 @@ NEWS_SOURCES = {
44
  }
45
  }
46
 
47
- # Language codes and their corresponding MarianMT model names
48
- LANGUAGE_CODES = {
49
- "English": {"code": "en", "model": None}, # No translation needed for English
50
- "Spanish": {"code": "es", "model": "Helsinki-NLP/opus-mt-en-es"},
51
- "French": {"code": "fr", "model": "Helsinki-NLP/opus-mt-en-fr"},
52
- "German": {"code": "de", "model": "Helsinki-NLP/opus-mt-en-de"},
53
- "Italian": {"code": "it", "model": "Helsinki-NLP/opus-mt-en-it"},
54
- "Portuguese": {"code": "pt", "model": "Helsinki-NLP/opus-mt-en-pt"},
55
- "Dutch": {"code": "nl", "model": "Helsinki-NLP/opus-mt-en-nl"},
56
- "Russian": {"code": "ru", "model": "Helsinki-NLP/opus-mt-en-ru"},
57
- "Chinese": {"code": "zh", "model": "Helsinki-NLP/opus-mt-en-zh"},
58
- "Japanese": {"code": "ja", "model": "Helsinki-NLP/opus-mt-en-jap"},
59
- "Arabic": {"code": "ar", "model": "Helsinki-NLP/opus-mt-en-ar"}
60
- }
61
-
62
  # Initialize global variables
63
  summarizer = None
64
- translators = {}
65
 
66
  class NewsCache:
67
  def __init__(self):
68
  self.summaries = {}
69
- self.translations = {}
70
  self.max_cache_size = 1000
71
 
72
- def store_summary(self, content_hash, summary, language=None):
73
- cache_key = f"{content_hash}_{language}" if language else content_hash
74
  if len(self.summaries) >= self.max_cache_size:
75
  self.summaries.pop(next(iter(self.summaries)))
76
- self.summaries[cache_key] = summary
77
 
78
- def get_summary(self, content_hash, language=None):
79
- cache_key = f"{content_hash}_{language}" if language else content_hash
80
- return self.summaries.get(cache_key)
81
 
82
  news_cache = NewsCache()
83
 
@@ -120,8 +101,8 @@ def fetch_news_from_rss(categories):
120
  return articles
121
 
122
  def initialize_models():
123
- """Initialize the summarization and translation models"""
124
- global summarizer, translators
125
 
126
  try:
127
  # Initialize summarizer
@@ -130,55 +111,13 @@ def initialize_models():
130
  model="facebook/bart-large-cnn",
131
  device=-1 # Use CPU
132
  )
133
-
134
- # Initialize translators for each language
135
- for lang, info in LANGUAGE_CODES.items():
136
- if info["model"]: # Skip English as it doesn't need translation
137
- try:
138
- model = MarianMTModel.from_pretrained(info["model"])
139
- tokenizer = AutoTokenizer.from_pretrained(info["model"])
140
- translators[lang] = (model, tokenizer)
141
- logging.info(f"Initialized translator for {lang}")
142
- logging.info["model"]
143
- except Exception as e:
144
- logging.error(f"Error initializing translator for {lang}: {e}")
145
-
146
  return True
147
  except Exception as e:
148
  logging.error(f"Error initializing models: {e}")
149
  return False
150
 
151
- def translate_text(text, target_language):
152
- """Translate text to target language"""
153
- if target_language == "English" or not text:
154
- return text
155
-
156
- try:
157
- if target_language not in translators:
158
- logging.error(f"Translator not found for {target_language}")
159
- return text
160
-
161
- model, tokenizer = translators[target_language]
162
-
163
- # Split text into chunks to handle long text
164
- max_length = 512
165
- chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
166
- translated_chunks = []
167
-
168
- for chunk in chunks:
169
- inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512)
170
- translated = model.generate(**inputs)
171
- translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
172
- translated_chunks.append(translated_text)
173
-
174
- return " ".join(translated_chunks)
175
-
176
- except Exception as e:
177
- logging.error(f"Translation error: {e}")
178
- return text
179
-
180
- def generate_summary(text, title="", category="", language="English"):
181
- """Generate summary with translation support"""
182
  if not summarizer:
183
  if not initialize_models():
184
  return None
@@ -186,11 +125,11 @@ def generate_summary(text, title="", category="", language="English"):
186
  try:
187
  # Check cache first
188
  content_hash = get_content_hash(text)
189
- cached_summary = news_cache.get_summary(content_hash, language)
190
  if cached_summary:
191
  return cached_summary
192
 
193
- # Generate English summary first
194
  prompt_template = f"""
195
  Analyze and summarize this {category} news article titled "{title}".
196
  Focus on providing:
@@ -221,11 +160,7 @@ Please provide a clear, concise summary that a general audience can understand:"
221
  sentences = summary.split(". ")
222
  formatted_summary = "\n• " + "\n• ".join(filter(None, sentences))
223
 
224
- # Translate if needed
225
- if language != "English":
226
- formatted_summary = translate_text(formatted_summary, language)
227
-
228
- news_cache.store_summary(content_hash, formatted_summary, language)
229
  return formatted_summary
230
 
231
  return None
@@ -235,7 +170,7 @@ Please provide a clear, concise summary that a general audience can understand:"
235
  return None
236
 
237
  def get_personalized_summary(name, progress=gr.Progress()):
238
- """Generate personalized news summary in user's preferred language"""
239
  start_time = time.time()
240
  logging.info(f"Starting summary generation for user: {name}")
241
 
@@ -250,14 +185,12 @@ def get_personalized_summary(name, progress=gr.Progress()):
250
  except Exception as e:
251
  return f"Error loading preferences: {e}"
252
 
253
- user_language = preferences.get("language", "English")
254
-
255
  # Fetch articles with progress
256
  progress(0.2, desc="Fetching recent news...")
257
  articles = fetch_news_from_rss(preferences["interests"])
258
 
259
  if not articles:
260
- return translate_text("No recent news articles found from the last 8 hours. Please try again later.", user_language)
261
 
262
  # Process articles with timeout
263
  progress(0.4, desc="Analyzing and summarizing...")
@@ -284,24 +217,18 @@ def get_personalized_summary(name, progress=gr.Progress()):
284
  if not content:
285
  continue
286
 
287
- summary = generate_summary(content, title, category, user_language)
288
  if not summary:
289
  continue
290
-
291
- # Translate title and category if needed
292
- if user_language != "English":
293
- title = translate_text(title, user_language)
294
- category = translate_text(category, user_language)
295
- published_str = translate_text(published_str, user_language)
296
 
297
  formatted_summary = f"""
298
  📰 {title}
299
- 📁 {translate_text("Category", user_language)}: {category}
300
- {translate_text("Published", user_language)}: {published_str}
301
 
302
  {summary}
303
 
304
- 🔗 {translate_text("Read more", user_language)}: {link}
305
 
306
  ---"""
307
  summaries.append(formatted_summary)
@@ -311,22 +238,17 @@ def get_personalized_summary(name, progress=gr.Progress()):
311
  continue
312
 
313
  if not summaries:
314
- return translate_text("Unable to generate summaries for recent news. Please try again.", user_language)
315
 
316
  progress(1.0, desc="Done!")
317
  return "\n".join(summaries)
318
 
319
  # Gradio interface
320
- with gr.Blocks(title="Enhanced News Summarizer") as demo:
321
- gr.Markdown("# 📰 Enhanced AI News Summarizer")
322
 
323
  with gr.Tab("Set Preferences"):
324
  name_input = gr.Textbox(label="Your Name")
325
- language_dropdown = gr.Dropdown(
326
- choices=list(LANGUAGE_CODES.keys()),
327
- label="Preferred Language",
328
- value="English"
329
- )
330
  interests_checkboxes = gr.CheckboxGroup(
331
  choices=list(NEWS_SOURCES.keys()),
332
  label="News Interests (Select multiple)"
@@ -334,13 +256,12 @@ with gr.Blocks(title="Enhanced News Summarizer") as demo:
334
  save_button = gr.Button("Save Preferences")
335
  preferences_output = gr.Textbox(label="Status")
336
 
337
- def save_preferences(name, language, interests):
338
- if not name or not language or not interests:
339
  return "Please fill in all required fields!"
340
 
341
  preferences = {
342
  "name": name,
343
- "language": language,
344
  "interests": interests,
345
  "last_updated": datetime.now().isoformat()
346
  }
@@ -356,7 +277,7 @@ with gr.Blocks(title="Enhanced News Summarizer") as demo:
356
 
357
  save_button.click(
358
  save_preferences,
359
- inputs=[name_input, language_dropdown, interests_checkboxes],
360
  outputs=[preferences_output]
361
  )
362
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
  import feedparser
4
  from datetime import datetime, timedelta
5
  import json
 
44
  }
45
  }
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  # Initialize global variables
48
  summarizer = None
 
49
 
50
  class NewsCache:
51
  def __init__(self):
52
  self.summaries = {}
 
53
  self.max_cache_size = 1000
54
 
55
+ def store_summary(self, content_hash, summary):
 
56
  if len(self.summaries) >= self.max_cache_size:
57
  self.summaries.pop(next(iter(self.summaries)))
58
+ self.summaries[content_hash] = summary
59
 
60
+ def get_summary(self, content_hash):
61
+ return self.summaries.get(content_hash)
 
62
 
63
  news_cache = NewsCache()
64
 
 
101
  return articles
102
 
103
  def initialize_models():
104
+ """Initialize the summarization model"""
105
+ global summarizer
106
 
107
  try:
108
  # Initialize summarizer
 
111
  model="facebook/bart-large-cnn",
112
  device=-1 # Use CPU
113
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  return True
115
  except Exception as e:
116
  logging.error(f"Error initializing models: {e}")
117
  return False
118
 
119
+ def generate_summary(text, title="", category=""):
120
+ """Generate summary for the article"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  if not summarizer:
122
  if not initialize_models():
123
  return None
 
125
  try:
126
  # Check cache first
127
  content_hash = get_content_hash(text)
128
+ cached_summary = news_cache.get_summary(content_hash)
129
  if cached_summary:
130
  return cached_summary
131
 
132
+ # Generate summary
133
  prompt_template = f"""
134
  Analyze and summarize this {category} news article titled "{title}".
135
  Focus on providing:
 
160
  sentences = summary.split(". ")
161
  formatted_summary = "\n• " + "\n• ".join(filter(None, sentences))
162
 
163
+ news_cache.store_summary(content_hash, formatted_summary)
 
 
 
 
164
  return formatted_summary
165
 
166
  return None
 
170
  return None
171
 
172
  def get_personalized_summary(name, progress=gr.Progress()):
173
+ """Generate personalized news summary"""
174
  start_time = time.time()
175
  logging.info(f"Starting summary generation for user: {name}")
176
 
 
185
  except Exception as e:
186
  return f"Error loading preferences: {e}"
187
 
 
 
188
  # Fetch articles with progress
189
  progress(0.2, desc="Fetching recent news...")
190
  articles = fetch_news_from_rss(preferences["interests"])
191
 
192
  if not articles:
193
+ return "No recent news articles found from the last 8 hours. Please try again later."
194
 
195
  # Process articles with timeout
196
  progress(0.4, desc="Analyzing and summarizing...")
 
217
  if not content:
218
  continue
219
 
220
+ summary = generate_summary(content, title, category)
221
  if not summary:
222
  continue
 
 
 
 
 
 
223
 
224
  formatted_summary = f"""
225
  📰 {title}
226
+ 📁 Category: {category}
227
+ ⏰ Published: {published_str}
228
 
229
  {summary}
230
 
231
+ 🔗 Read more: {link}
232
 
233
  ---"""
234
  summaries.append(formatted_summary)
 
238
  continue
239
 
240
  if not summaries:
241
+ return "Unable to generate summaries for recent news. Please try again."
242
 
243
  progress(1.0, desc="Done!")
244
  return "\n".join(summaries)
245
 
246
  # Gradio interface
247
+ with gr.Blocks(title="News Summarizer") as demo:
248
+ gr.Markdown("# 📰 AI News Summarizer")
249
 
250
  with gr.Tab("Set Preferences"):
251
  name_input = gr.Textbox(label="Your Name")
 
 
 
 
 
252
  interests_checkboxes = gr.CheckboxGroup(
253
  choices=list(NEWS_SOURCES.keys()),
254
  label="News Interests (Select multiple)"
 
256
  save_button = gr.Button("Save Preferences")
257
  preferences_output = gr.Textbox(label="Status")
258
 
259
+ def save_preferences(name, interests):
260
+ if not name or not interests:
261
  return "Please fill in all required fields!"
262
 
263
  preferences = {
264
  "name": name,
 
265
  "interests": interests,
266
  "last_updated": datetime.now().isoformat()
267
  }
 
277
 
278
  save_button.click(
279
  save_preferences,
280
+ inputs=[name_input, interests_checkboxes],
281
  outputs=[preferences_output]
282
  )
283