Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import pipeline
|
3 |
import feedparser
|
4 |
from datetime import datetime, timedelta
|
5 |
import json
|
@@ -44,40 +44,21 @@ NEWS_SOURCES = {
|
|
44 |
}
|
45 |
}
|
46 |
|
47 |
-
# Language codes and their corresponding MarianMT model names
|
48 |
-
LANGUAGE_CODES = {
|
49 |
-
"English": {"code": "en", "model": None}, # No translation needed for English
|
50 |
-
"Spanish": {"code": "es", "model": "Helsinki-NLP/opus-mt-en-es"},
|
51 |
-
"French": {"code": "fr", "model": "Helsinki-NLP/opus-mt-en-fr"},
|
52 |
-
"German": {"code": "de", "model": "Helsinki-NLP/opus-mt-en-de"},
|
53 |
-
"Italian": {"code": "it", "model": "Helsinki-NLP/opus-mt-en-it"},
|
54 |
-
"Portuguese": {"code": "pt", "model": "Helsinki-NLP/opus-mt-en-pt"},
|
55 |
-
"Dutch": {"code": "nl", "model": "Helsinki-NLP/opus-mt-en-nl"},
|
56 |
-
"Russian": {"code": "ru", "model": "Helsinki-NLP/opus-mt-en-ru"},
|
57 |
-
"Chinese": {"code": "zh", "model": "Helsinki-NLP/opus-mt-en-zh"},
|
58 |
-
"Japanese": {"code": "ja", "model": "Helsinki-NLP/opus-mt-en-jap"},
|
59 |
-
"Arabic": {"code": "ar", "model": "Helsinki-NLP/opus-mt-en-ar"}
|
60 |
-
}
|
61 |
-
|
62 |
# Initialize global variables
|
63 |
summarizer = None
|
64 |
-
translators = {}
|
65 |
|
66 |
class NewsCache:
|
67 |
def __init__(self):
|
68 |
self.summaries = {}
|
69 |
-
self.translations = {}
|
70 |
self.max_cache_size = 1000
|
71 |
|
72 |
-
def store_summary(self, content_hash, summary
|
73 |
-
cache_key = f"{content_hash}_{language}" if language else content_hash
|
74 |
if len(self.summaries) >= self.max_cache_size:
|
75 |
self.summaries.pop(next(iter(self.summaries)))
|
76 |
-
self.summaries[
|
77 |
|
78 |
-
def get_summary(self, content_hash
|
79 |
-
|
80 |
-
return self.summaries.get(cache_key)
|
81 |
|
82 |
news_cache = NewsCache()
|
83 |
|
@@ -120,8 +101,8 @@ def fetch_news_from_rss(categories):
|
|
120 |
return articles
|
121 |
|
122 |
def initialize_models():
|
123 |
-
"""Initialize the summarization
|
124 |
-
global summarizer
|
125 |
|
126 |
try:
|
127 |
# Initialize summarizer
|
@@ -130,55 +111,13 @@ def initialize_models():
|
|
130 |
model="facebook/bart-large-cnn",
|
131 |
device=-1 # Use CPU
|
132 |
)
|
133 |
-
|
134 |
-
# Initialize translators for each language
|
135 |
-
for lang, info in LANGUAGE_CODES.items():
|
136 |
-
if info["model"]: # Skip English as it doesn't need translation
|
137 |
-
try:
|
138 |
-
model = MarianMTModel.from_pretrained(info["model"])
|
139 |
-
tokenizer = AutoTokenizer.from_pretrained(info["model"])
|
140 |
-
translators[lang] = (model, tokenizer)
|
141 |
-
logging.info(f"Initialized translator for {lang}")
|
142 |
-
logging.info["model"]
|
143 |
-
except Exception as e:
|
144 |
-
logging.error(f"Error initializing translator for {lang}: {e}")
|
145 |
-
|
146 |
return True
|
147 |
except Exception as e:
|
148 |
logging.error(f"Error initializing models: {e}")
|
149 |
return False
|
150 |
|
151 |
-
def
|
152 |
-
"""
|
153 |
-
if target_language == "English" or not text:
|
154 |
-
return text
|
155 |
-
|
156 |
-
try:
|
157 |
-
if target_language not in translators:
|
158 |
-
logging.error(f"Translator not found for {target_language}")
|
159 |
-
return text
|
160 |
-
|
161 |
-
model, tokenizer = translators[target_language]
|
162 |
-
|
163 |
-
# Split text into chunks to handle long text
|
164 |
-
max_length = 512
|
165 |
-
chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]
|
166 |
-
translated_chunks = []
|
167 |
-
|
168 |
-
for chunk in chunks:
|
169 |
-
inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512)
|
170 |
-
translated = model.generate(**inputs)
|
171 |
-
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
|
172 |
-
translated_chunks.append(translated_text)
|
173 |
-
|
174 |
-
return " ".join(translated_chunks)
|
175 |
-
|
176 |
-
except Exception as e:
|
177 |
-
logging.error(f"Translation error: {e}")
|
178 |
-
return text
|
179 |
-
|
180 |
-
def generate_summary(text, title="", category="", language="English"):
|
181 |
-
"""Generate summary with translation support"""
|
182 |
if not summarizer:
|
183 |
if not initialize_models():
|
184 |
return None
|
@@ -186,11 +125,11 @@ def generate_summary(text, title="", category="", language="English"):
|
|
186 |
try:
|
187 |
# Check cache first
|
188 |
content_hash = get_content_hash(text)
|
189 |
-
cached_summary = news_cache.get_summary(content_hash
|
190 |
if cached_summary:
|
191 |
return cached_summary
|
192 |
|
193 |
-
# Generate
|
194 |
prompt_template = f"""
|
195 |
Analyze and summarize this {category} news article titled "{title}".
|
196 |
Focus on providing:
|
@@ -221,11 +160,7 @@ Please provide a clear, concise summary that a general audience can understand:"
|
|
221 |
sentences = summary.split(". ")
|
222 |
formatted_summary = "\n• " + "\n• ".join(filter(None, sentences))
|
223 |
|
224 |
-
|
225 |
-
if language != "English":
|
226 |
-
formatted_summary = translate_text(formatted_summary, language)
|
227 |
-
|
228 |
-
news_cache.store_summary(content_hash, formatted_summary, language)
|
229 |
return formatted_summary
|
230 |
|
231 |
return None
|
@@ -235,7 +170,7 @@ Please provide a clear, concise summary that a general audience can understand:"
|
|
235 |
return None
|
236 |
|
237 |
def get_personalized_summary(name, progress=gr.Progress()):
|
238 |
-
"""Generate personalized news summary
|
239 |
start_time = time.time()
|
240 |
logging.info(f"Starting summary generation for user: {name}")
|
241 |
|
@@ -250,14 +185,12 @@ def get_personalized_summary(name, progress=gr.Progress()):
|
|
250 |
except Exception as e:
|
251 |
return f"Error loading preferences: {e}"
|
252 |
|
253 |
-
user_language = preferences.get("language", "English")
|
254 |
-
|
255 |
# Fetch articles with progress
|
256 |
progress(0.2, desc="Fetching recent news...")
|
257 |
articles = fetch_news_from_rss(preferences["interests"])
|
258 |
|
259 |
if not articles:
|
260 |
-
return
|
261 |
|
262 |
# Process articles with timeout
|
263 |
progress(0.4, desc="Analyzing and summarizing...")
|
@@ -284,24 +217,18 @@ def get_personalized_summary(name, progress=gr.Progress()):
|
|
284 |
if not content:
|
285 |
continue
|
286 |
|
287 |
-
summary = generate_summary(content, title, category
|
288 |
if not summary:
|
289 |
continue
|
290 |
-
|
291 |
-
# Translate title and category if needed
|
292 |
-
if user_language != "English":
|
293 |
-
title = translate_text(title, user_language)
|
294 |
-
category = translate_text(category, user_language)
|
295 |
-
published_str = translate_text(published_str, user_language)
|
296 |
|
297 |
formatted_summary = f"""
|
298 |
📰 {title}
|
299 |
-
📁
|
300 |
-
⏰
|
301 |
|
302 |
{summary}
|
303 |
|
304 |
-
🔗
|
305 |
|
306 |
---"""
|
307 |
summaries.append(formatted_summary)
|
@@ -311,22 +238,17 @@ def get_personalized_summary(name, progress=gr.Progress()):
|
|
311 |
continue
|
312 |
|
313 |
if not summaries:
|
314 |
-
return
|
315 |
|
316 |
progress(1.0, desc="Done!")
|
317 |
return "\n".join(summaries)
|
318 |
|
319 |
# Gradio interface
|
320 |
-
with gr.Blocks(title="
|
321 |
-
gr.Markdown("# 📰
|
322 |
|
323 |
with gr.Tab("Set Preferences"):
|
324 |
name_input = gr.Textbox(label="Your Name")
|
325 |
-
language_dropdown = gr.Dropdown(
|
326 |
-
choices=list(LANGUAGE_CODES.keys()),
|
327 |
-
label="Preferred Language",
|
328 |
-
value="English"
|
329 |
-
)
|
330 |
interests_checkboxes = gr.CheckboxGroup(
|
331 |
choices=list(NEWS_SOURCES.keys()),
|
332 |
label="News Interests (Select multiple)"
|
@@ -334,13 +256,12 @@ with gr.Blocks(title="Enhanced News Summarizer") as demo:
|
|
334 |
save_button = gr.Button("Save Preferences")
|
335 |
preferences_output = gr.Textbox(label="Status")
|
336 |
|
337 |
-
def save_preferences(name,
|
338 |
-
if not name or not
|
339 |
return "Please fill in all required fields!"
|
340 |
|
341 |
preferences = {
|
342 |
"name": name,
|
343 |
-
"language": language,
|
344 |
"interests": interests,
|
345 |
"last_updated": datetime.now().isoformat()
|
346 |
}
|
@@ -356,7 +277,7 @@ with gr.Blocks(title="Enhanced News Summarizer") as demo:
|
|
356 |
|
357 |
save_button.click(
|
358 |
save_preferences,
|
359 |
-
inputs=[name_input,
|
360 |
outputs=[preferences_output]
|
361 |
)
|
362 |
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
import feedparser
|
4 |
from datetime import datetime, timedelta
|
5 |
import json
|
|
|
44 |
}
|
45 |
}
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
# Initialize global variables
|
48 |
summarizer = None
|
|
|
49 |
|
50 |
class NewsCache:
|
51 |
def __init__(self):
|
52 |
self.summaries = {}
|
|
|
53 |
self.max_cache_size = 1000
|
54 |
|
55 |
+
def store_summary(self, content_hash, summary):
|
|
|
56 |
if len(self.summaries) >= self.max_cache_size:
|
57 |
self.summaries.pop(next(iter(self.summaries)))
|
58 |
+
self.summaries[content_hash] = summary
|
59 |
|
60 |
+
def get_summary(self, content_hash):
|
61 |
+
return self.summaries.get(content_hash)
|
|
|
62 |
|
63 |
news_cache = NewsCache()
|
64 |
|
|
|
101 |
return articles
|
102 |
|
103 |
def initialize_models():
|
104 |
+
"""Initialize the summarization model"""
|
105 |
+
global summarizer
|
106 |
|
107 |
try:
|
108 |
# Initialize summarizer
|
|
|
111 |
model="facebook/bart-large-cnn",
|
112 |
device=-1 # Use CPU
|
113 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
return True
|
115 |
except Exception as e:
|
116 |
logging.error(f"Error initializing models: {e}")
|
117 |
return False
|
118 |
|
119 |
+
def generate_summary(text, title="", category=""):
|
120 |
+
"""Generate summary for the article"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
if not summarizer:
|
122 |
if not initialize_models():
|
123 |
return None
|
|
|
125 |
try:
|
126 |
# Check cache first
|
127 |
content_hash = get_content_hash(text)
|
128 |
+
cached_summary = news_cache.get_summary(content_hash)
|
129 |
if cached_summary:
|
130 |
return cached_summary
|
131 |
|
132 |
+
# Generate summary
|
133 |
prompt_template = f"""
|
134 |
Analyze and summarize this {category} news article titled "{title}".
|
135 |
Focus on providing:
|
|
|
160 |
sentences = summary.split(". ")
|
161 |
formatted_summary = "\n• " + "\n• ".join(filter(None, sentences))
|
162 |
|
163 |
+
news_cache.store_summary(content_hash, formatted_summary)
|
|
|
|
|
|
|
|
|
164 |
return formatted_summary
|
165 |
|
166 |
return None
|
|
|
170 |
return None
|
171 |
|
172 |
def get_personalized_summary(name, progress=gr.Progress()):
|
173 |
+
"""Generate personalized news summary"""
|
174 |
start_time = time.time()
|
175 |
logging.info(f"Starting summary generation for user: {name}")
|
176 |
|
|
|
185 |
except Exception as e:
|
186 |
return f"Error loading preferences: {e}"
|
187 |
|
|
|
|
|
188 |
# Fetch articles with progress
|
189 |
progress(0.2, desc="Fetching recent news...")
|
190 |
articles = fetch_news_from_rss(preferences["interests"])
|
191 |
|
192 |
if not articles:
|
193 |
+
return "No recent news articles found from the last 8 hours. Please try again later."
|
194 |
|
195 |
# Process articles with timeout
|
196 |
progress(0.4, desc="Analyzing and summarizing...")
|
|
|
217 |
if not content:
|
218 |
continue
|
219 |
|
220 |
+
summary = generate_summary(content, title, category)
|
221 |
if not summary:
|
222 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
223 |
|
224 |
formatted_summary = f"""
|
225 |
📰 {title}
|
226 |
+
📁 Category: {category}
|
227 |
+
⏰ Published: {published_str}
|
228 |
|
229 |
{summary}
|
230 |
|
231 |
+
🔗 Read more: {link}
|
232 |
|
233 |
---"""
|
234 |
summaries.append(formatted_summary)
|
|
|
238 |
continue
|
239 |
|
240 |
if not summaries:
|
241 |
+
return "Unable to generate summaries for recent news. Please try again."
|
242 |
|
243 |
progress(1.0, desc="Done!")
|
244 |
return "\n".join(summaries)
|
245 |
|
246 |
# Gradio interface
|
247 |
+
with gr.Blocks(title="News Summarizer") as demo:
|
248 |
+
gr.Markdown("# 📰 AI News Summarizer")
|
249 |
|
250 |
with gr.Tab("Set Preferences"):
|
251 |
name_input = gr.Textbox(label="Your Name")
|
|
|
|
|
|
|
|
|
|
|
252 |
interests_checkboxes = gr.CheckboxGroup(
|
253 |
choices=list(NEWS_SOURCES.keys()),
|
254 |
label="News Interests (Select multiple)"
|
|
|
256 |
save_button = gr.Button("Save Preferences")
|
257 |
preferences_output = gr.Textbox(label="Status")
|
258 |
|
259 |
+
def save_preferences(name, interests):
|
260 |
+
if not name or not interests:
|
261 |
return "Please fill in all required fields!"
|
262 |
|
263 |
preferences = {
|
264 |
"name": name,
|
|
|
265 |
"interests": interests,
|
266 |
"last_updated": datetime.now().isoformat()
|
267 |
}
|
|
|
277 |
|
278 |
save_button.click(
|
279 |
save_preferences,
|
280 |
+
inputs=[name_input, interests_checkboxes],
|
281 |
outputs=[preferences_output]
|
282 |
)
|
283 |
|