Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -7,25 +7,6 @@ from bs4 import BeautifulSoup
|
|
7 |
import hashlib
|
8 |
import threading
|
9 |
|
10 |
-
# Updated Global settings with more detailed news sources
|
11 |
-
NEWS_SOURCES = {
|
12 |
-
"Technology": {
|
13 |
-
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml",
|
14 |
-
"TechCrunch": "http://feeds.feedburner.com/TechCrunch/",
|
15 |
-
"Wired": "https://www.wired.com/feed/rss"
|
16 |
-
},
|
17 |
-
"Business": {
|
18 |
-
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml",
|
19 |
-
"Bloomberg": "https://www.bloomberg.com/feeds/podcasts/surveillance.rss",
|
20 |
-
"Forbes": "https://www.forbes.com/feeds/forbesasia"
|
21 |
-
},
|
22 |
-
"World News": {
|
23 |
-
"BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
|
24 |
-
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
|
25 |
-
"Reuters": "http://feeds.reuters.com/Reuters/worldNews"
|
26 |
-
}
|
27 |
-
}
|
28 |
-
|
29 |
# Global settings
|
30 |
SUMMARIZER_MODELS = {
|
31 |
"Default (facebook/bart-large-cnn)": "facebook/bart-large-cnn",
|
@@ -35,6 +16,12 @@ CACHE_SIZE = 500
|
|
35 |
RSS_FETCH_INTERVAL = timedelta(hours=8)
|
36 |
ARTICLE_LIMIT = 5
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
class NewsCache:
|
39 |
def __init__(self, size):
|
40 |
self.cache = {}
|
@@ -54,15 +41,11 @@ class NewsCache:
|
|
54 |
|
55 |
cache = NewsCache(CACHE_SIZE)
|
56 |
|
57 |
-
def fetch_rss_news(
|
58 |
articles = []
|
59 |
cutoff_time = datetime.now(pytz.UTC) - RSS_FETCH_INTERVAL
|
60 |
-
|
61 |
-
|
62 |
-
for source, url in sources.items():
|
63 |
-
if f"{category} - {source}" not in selected_sources:
|
64 |
-
continue
|
65 |
-
|
66 |
try:
|
67 |
feed = feedparser.parse(url)
|
68 |
for entry in feed.entries:
|
@@ -78,7 +61,6 @@ def fetch_rss_news(selected_sources):
|
|
78 |
})
|
79 |
except Exception:
|
80 |
continue
|
81 |
-
|
82 |
articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT]
|
83 |
return articles
|
84 |
|
@@ -110,14 +92,12 @@ def summarize_articles(articles, model_name):
|
|
110 |
""")
|
111 |
return "\n".join(summaries)
|
112 |
|
113 |
-
def generate_summary(
|
114 |
-
if not
|
115 |
-
return "Please select at least one
|
116 |
-
|
117 |
-
articles = fetch_rss_news(selected_sources)
|
118 |
if not articles:
|
119 |
-
return "No recent news found in the selected
|
120 |
-
|
121 |
return summarize_articles(articles, model_name)
|
122 |
|
123 |
# Gradio Interface
|
@@ -125,44 +105,34 @@ demo = gr.Blocks()
|
|
125 |
|
126 |
with demo:
|
127 |
gr.Markdown("# 📰 AI News Summarizer")
|
128 |
-
with gr.
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
sources_checkboxes[category] = gr.CheckboxGroup(
|
134 |
-
choices=list(sources.keys()),
|
135 |
-
label=f"{category} Sources",
|
136 |
-
interactive=True
|
137 |
-
)
|
138 |
-
|
139 |
-
# Model selector
|
140 |
model_selector = gr.Radio(
|
141 |
choices=list(SUMMARIZER_MODELS.keys()),
|
142 |
label="Choose Summarization Model",
|
143 |
value="Default (facebook/bart-large-cnn)"
|
144 |
)
|
145 |
-
|
146 |
summarize_button = gr.Button("Get News Summary")
|
147 |
summary_output = gr.Textbox(label="News Summary", lines=20)
|
148 |
|
149 |
-
def get_summary(
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
model_name = SUMMARIZER_MODELS[model_name]
|
156 |
-
return generate_summary(selected_sources, model_name)
|
157 |
-
|
158 |
-
summarize_button.click(
|
159 |
-
get_summary,
|
160 |
-
inputs=[sources_checkboxes, model_selector],
|
161 |
-
outputs=summary_output
|
162 |
-
)
|
163 |
|
164 |
if __name__ == "__main__":
|
165 |
demo.launch()
|
166 |
|
167 |
|
168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
import hashlib
|
8 |
import threading
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
# Global settings
|
11 |
SUMMARIZER_MODELS = {
|
12 |
"Default (facebook/bart-large-cnn)": "facebook/bart-large-cnn",
|
|
|
16 |
RSS_FETCH_INTERVAL = timedelta(hours=8)
|
17 |
ARTICLE_LIMIT = 5
|
18 |
|
19 |
+
NEWS_SOURCES = {
|
20 |
+
"Technology": {"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml"},
|
21 |
+
"Business": {"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml"},
|
22 |
+
"World News": {"BBC": "http://feeds.bbci.co.uk/news/world/rss.xml"}
|
23 |
+
}
|
24 |
+
|
25 |
class NewsCache:
|
26 |
def __init__(self, size):
|
27 |
self.cache = {}
|
|
|
41 |
|
42 |
cache = NewsCache(CACHE_SIZE)
|
43 |
|
44 |
+
def fetch_rss_news(categories):
|
45 |
articles = []
|
46 |
cutoff_time = datetime.now(pytz.UTC) - RSS_FETCH_INTERVAL
|
47 |
+
for category in categories:
|
48 |
+
for source, url in NEWS_SOURCES.get(category, {}).items():
|
|
|
|
|
|
|
|
|
49 |
try:
|
50 |
feed = feedparser.parse(url)
|
51 |
for entry in feed.entries:
|
|
|
61 |
})
|
62 |
except Exception:
|
63 |
continue
|
|
|
64 |
articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT]
|
65 |
return articles
|
66 |
|
|
|
92 |
""")
|
93 |
return "\n".join(summaries)
|
94 |
|
95 |
+
def generate_summary(selected_categories, model_name):
|
96 |
+
if not selected_categories:
|
97 |
+
return "Please select at least one category."
|
98 |
+
articles = fetch_rss_news(selected_categories)
|
|
|
99 |
if not articles:
|
100 |
+
return "No recent news found in the selected categories."
|
|
|
101 |
return summarize_articles(articles, model_name)
|
102 |
|
103 |
# Gradio Interface
|
|
|
105 |
|
106 |
with demo:
|
107 |
gr.Markdown("# 📰 AI News Summarizer")
|
108 |
+
with gr.Row():
|
109 |
+
categories = gr.CheckboxGroup(
|
110 |
+
choices=list(NEWS_SOURCES.keys()),
|
111 |
+
label="Select News Categories"
|
112 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
model_selector = gr.Radio(
|
114 |
choices=list(SUMMARIZER_MODELS.keys()),
|
115 |
label="Choose Summarization Model",
|
116 |
value="Default (facebook/bart-large-cnn)"
|
117 |
)
|
|
|
118 |
summarize_button = gr.Button("Get News Summary")
|
119 |
summary_output = gr.Textbox(label="News Summary", lines=20)
|
120 |
|
121 |
+
def get_summary(selected_categories, selected_model):
|
122 |
+
model_name = SUMMARIZER_MODELS[selected_model]
|
123 |
+
return generate_summary(selected_categories, model_name)
|
124 |
+
|
125 |
+
summarize_button.click(get_summary, inputs=[categories, model_selector], outputs=summary_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
if __name__ == "__main__":
|
128 |
demo.launch()
|
129 |
|
130 |
|
131 |
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
|
136 |
+
|
137 |
+
|
138 |
+
|