webcrawler / trend_crawl.py
Add1E's picture
Update trend_crawl.py
56cfe41 verified
from trendspy import Trends
tr = Trends()
TREND_TOPICS = {
1: "Autos and Vehicles",
2: "Beauty and Fashion",
3: "Business and Finance",
20: "Climate",
4: "Entertainment",
5: "Food and Drink",
6: "Games",
7: "Health",
8: "Hobbies and Leisure",
9: "Jobs and Education",
10: "Law and Government",
11: "Other",
13: "Pets and Animals",
14: "Politics",
15: "Science",
16: "Shopping",
17: "Sports",
18: "Technology",
19: "Travel and Transportation"
}
trends_json = {}
def process_trends_for_country(country_code, trends_list):
if country_code not in trends_json:
trends_json[country_code] = {"All categories" : {}}
for trend in trends_list:
category = None
for topic_id in trend.topics:
if topic_id in TREND_TOPICS:
category = TREND_TOPICS[topic_id]
break
if category is None:
category = TREND_TOPICS[11]
if category not in trends_json[country_code]:
trends_json[country_code][category] = {}
topic_name = trend.keyword
try:
news = tr.trending_now_news_by_ids(trend.news_tokens, max_news=3)
articles = [
{"title": article.title, "href": article.url}
for article in news
]
except Exception as e:
articles=[]
trends_json[country_code]["All categories"][topic_name] = {
"searchQueries": trend.volume,
"articles": articles,
}
trends_json[country_code][category][topic_name] = {
"searchQueries": trend.volume,
"articles": articles,
}
def get_trends(countries: list):
for country in countries:
trends = tr.trending_now(geo=country)
process_trends_for_country(country, trends)
all_categories = trends_json[country]["All categories"]
sorted_all_categories = dict(
sorted(all_categories.items(), key=lambda x: x[1]["searchQueries"], reverse=True)
)
trends_json[country]["All categories"] = sorted_all_categories
return trends_json