Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -17,36 +17,13 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
|
|
17 |
SUMMARIZER_MODEL = "facebook/bart-large-cnn" # You can replace this with other summarization models
|
18 |
CACHE_SIZE = 500 # Maximum number of cached summaries
|
19 |
RSS_FETCH_INTERVAL = timedelta(hours=8) # Fetch recent news within the last 8 hours
|
20 |
-
|
21 |
|
22 |
# News sources
|
23 |
NEWS_SOURCES = {
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
},
|
28 |
-
"Business": {
|
29 |
-
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Business.xml",
|
30 |
-
"reutersagency": "https://www.reutersagency.com/feed/?best-topics=business-finance&post_type=best"
|
31 |
-
},
|
32 |
-
"Science": {
|
33 |
-
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Science.xml"
|
34 |
-
},
|
35 |
-
"World News": {
|
36 |
-
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml",
|
37 |
-
"BBC": "http://feeds.bbci.co.uk/news/world/rss.xml",
|
38 |
-
"CNN": "http://rss.cnn.com/rss/edition_world.rss",
|
39 |
-
"reutersagency": "https://www.reutersagency.com/feed/?taxonomy=best-regions&post_type=best"
|
40 |
-
},
|
41 |
-
"Sports": {
|
42 |
-
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Sports.xml",
|
43 |
-
"reutersagency": "https://www.reutersagency.com/feed/?best-topics=sports&post_type=best"
|
44 |
-
},
|
45 |
-
"Health": {
|
46 |
-
"TheNewYorkTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Health.xml",
|
47 |
-
"politico": "http://rss.politico.com/healthcare.xml",
|
48 |
-
"reutersagency": "https://www.reutersagency.com/feed/?best-topics=health&post_type=best"
|
49 |
-
},
|
50 |
}
|
51 |
|
52 |
# Initialize cache
|
@@ -97,6 +74,9 @@ def fetch_rss_news(categories):
|
|
97 |
})
|
98 |
except Exception as e:
|
99 |
logging.error(f"Failed to fetch from {url}: {e}")
|
|
|
|
|
|
|
100 |
return articles
|
101 |
|
102 |
def summarize_text(text):
|
@@ -116,7 +96,7 @@ def summarize_text(text):
|
|
116 |
return "Summary unavailable."
|
117 |
|
118 |
def summarize_articles(articles):
|
119 |
-
"""Summarize
|
120 |
summaries = []
|
121 |
for article in articles:
|
122 |
try:
|
@@ -213,3 +193,4 @@ if __name__ == "__main__":
|
|
213 |
|
214 |
|
215 |
|
|
|
|
17 |
SUMMARIZER_MODEL = "facebook/bart-large-cnn" # You can replace this with other summarization models
|
18 |
CACHE_SIZE = 500 # Maximum number of cached summaries
|
19 |
RSS_FETCH_INTERVAL = timedelta(hours=8) # Fetch recent news within the last 8 hours
|
20 |
+
ARTICLE_LIMIT = 5 # Limit to the last 5 articles
|
21 |
|
22 |
# News sources
|
23 |
NEWS_SOURCES = {
|
24 |
+
"Technology": {"NYTimes": "https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml"},
|
25 |
+
"Business": {"Reuters": "https://www.reutersagency.com/feed/?best-topics=business-finance&post_type=best"},
|
26 |
+
"World": {"BBC": "http://feeds.bbci.co.uk/news/world/rss.xml"},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
}
|
28 |
|
29 |
# Initialize cache
|
|
|
74 |
})
|
75 |
except Exception as e:
|
76 |
logging.error(f"Failed to fetch from {url}: {e}")
|
77 |
+
|
78 |
+
# Sort articles by published date (most recent first) and limit to ARTICLE_LIMIT
|
79 |
+
articles = sorted(articles, key=lambda x: x["published"], reverse=True)[:ARTICLE_LIMIT]
|
80 |
return articles
|
81 |
|
82 |
def summarize_text(text):
|
|
|
96 |
return "Summary unavailable."
|
97 |
|
98 |
def summarize_articles(articles):
|
99 |
+
"""Summarize the last 5 fetched articles."""
|
100 |
summaries = []
|
101 |
for article in articles:
|
102 |
try:
|
|
|
193 |
|
194 |
|
195 |
|
196 |
+
|