Add1E commited on
Commit
7d3cbcd
·
verified ·
1 Parent(s): 723e1ec

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. app.py +335 -290
  3. chromedriver.exe +3 -0
  4. trend_crawl.py +110 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ chromedriver.exe filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,291 +1,336 @@
1
- from pytrends.request import TrendReq
2
- import streamlit as st
3
- import pandas as pd
4
- import xml.etree.ElementTree as ET
5
- import requests
6
- from datetime import datetime
7
- import pytz
8
- import hmac
9
- import os
10
- import time
11
- from PIL import Image
12
-
13
- def parse_url(url):
14
- response = requests.get(url)
15
-
16
- root = ET.fromstring(response.content)
17
- return root
18
-
19
- def convert_into_pd(req_json):
20
- wanted_keys = ["entityNames", "title"]
21
-
22
- final_json = [{ key: ts[key] for key in ts.keys() if key in wanted_keys} for ts in req_json ]
23
-
24
- result_df = pd.DataFrame(final_json)
25
- return result_df
26
-
27
- def find_details(req_json, gewünschter_titel):
28
- gewünschte_details = []
29
- for trend_info in req_json:
30
- if trend_info['title'] == gewünschter_titel:
31
-
32
- for article in trend_info['articles']:
33
- article_details = {
34
- 'url': article['url'],
35
- 'snippet': article['snippet'],
36
- 'articleTitle': article['articleTitle'],
37
- 'time': article['time'],
38
- 'source' : article['source']
39
- }
40
-
41
- gewünschte_details.append(article_details)
42
- return gewünschte_details
43
-
44
- def find_details2(req_json):
45
- gewünschte_details = []
46
-
47
- for article in req_json:
48
- article_details = {
49
- 'url': article['url'],
50
- 'snippet': article['snippet'],
51
- 'articleTitle': article['title'],
52
- 'source' : article['source']
53
-
54
- }
55
-
56
- gewünschte_details.append(article_details)
57
- return gewünschte_details
58
-
59
- if 'reset' not in st.session_state:
60
- st.session_state.reset = False
61
-
62
- def display_trends_from_yesterday():
63
- checkbox_statuses = {}
64
- urls = []
65
-
66
- timezone = 'Europe/Vienna'
67
- today = datetime.now(pytz.timezone(timezone)).date()
68
- feed = parse_url(feed_url1)
69
- entries = []
70
- ns = {'ht': 'https://trends.google.de/trends/trendingsearches/daily'} # Define namespace
71
- for item in feed.findall('.//item'):
72
- pubDate = datetime.strptime(item.find('pubDate').text, '%a, %d %b %Y %H:%M:%S %z').date()
73
- # Filter: Überspringe, wenn pubDate heute ist
74
- if pubDate == today:
75
- continue
76
- entry = {
77
- 'title': item.find('title').text,
78
- 'pubDate': item.find('pubDate').text,
79
- 'approx_traffic': item.find('ht:approx_traffic', ns).text if item.find('ht:approx_traffic', ns) is not None else None,
80
- 'news_items': []
81
- }
82
- for news_item in item.findall('ht:news_item', ns):
83
- news_details = {
84
- 'title': news_item.find('ht:news_item_title', ns).text,
85
- 'snippet': news_item.find('ht:news_item_snippet', ns).text,
86
- 'url': news_item.find('ht:news_item_url', ns).text,
87
- 'source': news_item.find('ht:news_item_source', ns).text
88
- }
89
- entry['news_items'].append(news_details)
90
- entries.append(entry)
91
-
92
- count = 1
93
- for entry in entries:
94
- with st.expander(f"{count}• {entry['title']} | Generated Traffic: {entry['approx_traffic']}"):
95
- st.write(f"Veröffentlichungsdatum : {entry['pubDate']}")
96
- for count2, link in enumerate(entry['news_items'], start=1):
97
- checkbox_label = f"yesterday_{count}_{count2}"
98
- if st.session_state.reset:
99
- st.session_state[checkbox_label] = False
100
- checkbox_statuses[checkbox_label] = st.session_state.get(checkbox_label, False)
101
- checkbox_statuses[checkbox_label] = st.checkbox(
102
- f"{count2}• {link['title']} | {link['source']} | [Go To →]({link['url']})",
103
- value=checkbox_statuses[checkbox_label],
104
- key=checkbox_label
105
- )
106
- if checkbox_statuses[checkbox_label]:
107
- urls.append(link['url'])
108
-
109
- # Button am Ende des Expanders
110
- base_url = os.getenv("url")
111
- query_params = "&".join([f"article-links[]={url}" for url in urls])
112
- full_url = f"{base_url}{query_params}"
113
- st.link_button("Open All Links" , url= full_url)
114
- count += 1
115
-
116
-
117
- def display_articles_for_category(category):
118
- checkbox_statuses = {}
119
- urls = []
120
- # Dictionary zur Verwaltung des Status jeder Checkbox
121
- for index, row in st.session_state["real_trending_searches"][selected_country][category].iterrows():
122
- articles = find_details(st.session_state["base_data"][selected_country][category], row['title'])
123
- for count2, url in enumerate(articles, start=1):
124
- checkbox_label = f"{category}_{index}_{count2}"
125
- if st.session_state.reset:
126
- st.session_state[checkbox_label] = False
127
- checkbox_statuses[checkbox_label] = st.session_state.get(checkbox_label, False)
128
-
129
- for index, row in st.session_state["real_trending_searches"][selected_country][category].iterrows():
130
- count = index + 1
131
- with st.expander(f"{count}• {row['title']} "):
132
- articles = find_details(st.session_state["base_data"][selected_country][category], row['title'])
133
- for count2, url in enumerate(articles, start=1):
134
- checkbox_label = f"{category}_{index}_{count2}"
135
- disabled = not checkbox_statuses[checkbox_label] and sum(checkbox_statuses.values()) >= MAX_CHECKED
136
- checkbox_statuses[checkbox_label] = st.checkbox(
137
- f"{count2}• {url['articleTitle']} | {url['source']} | [Go To →]({url['url']})",
138
- value=checkbox_statuses[checkbox_label],
139
- key=checkbox_label,
140
- disabled=disabled
141
- )
142
- if checkbox_statuses[checkbox_label]:
143
- urls.append(url['url'])
144
- base_url = os.getenv("url")
145
- query_params = "&".join([f"article-links[]={url}" for url in urls])
146
- full_url = f"{base_url}{query_params}"
147
- st.link_button("Open All Links" , url= full_url)
148
-
149
-
150
- # Funktion zum Rendern von Artikeln für heute
151
- def display_articles_for_today(count, index):
152
- checkbox_statuses = {}
153
- urls = []
154
- # Dictionary zur Verwaltung des Status jeder Checkbox
155
- for count2, url in enumerate(index['articles'], start=1):
156
- checkbox_label = f"today_{count}_{count2}"
157
- if st.session_state.reset:
158
- st.session_state[checkbox_label] = False
159
- checkbox_statuses[checkbox_label] = st.session_state.get(checkbox_label, False)
160
-
161
-
162
- with st.expander(f"{count+1}• {index['title']['query']} | Generated Traffic: {index['formattedTraffic']}"):
163
- articles = find_details2(index['articles'])
164
- for count2, url in enumerate(articles, start=1):
165
- checkbox_label = f"today_{count}_{count2}"
166
- disabled = not checkbox_statuses[checkbox_label] and sum(checkbox_statuses.values()) >= MAX_CHECKED
167
- checkbox_statuses[checkbox_label] = st.checkbox(
168
- f"{count2}• {url['articleTitle']} | {url['source']} | [Go To →]({url['url']})",
169
- value=checkbox_statuses[checkbox_label],
170
- key=checkbox_label,
171
- disabled=disabled
172
- )
173
- if checkbox_statuses[checkbox_label]:
174
- urls.append(url['url'])
175
-
176
- # Button am Ende des Expanders
177
- base_url = os.getenv("url")
178
- query_params = "&".join([f"article-links[]={url}" for url in urls])
179
- full_url = f"{base_url}{query_params}"
180
- st.link_button("Open All Links" , url= full_url)
181
-
182
-
183
- categories = {
184
- "Alle": "all",
185
- "Gesundheit": "m",
186
- "Business": "b",
187
- "Headlines": "h",
188
- "Sport": "s",
189
- "Entertainment": "e",
190
- "Technik": "t",
191
- }
192
-
193
- country_list = {
194
- "Germamy" : "DE",
195
- "Austria" : "AT"
196
- }
197
-
198
- pytrend = TrendReq(hl='de-AT', tz=360, timeout=(10,50))
199
-
200
- if 'base_load_finished' not in st.session_state:
201
- st.session_state["real_trending_searches"] = {}
202
- st.session_state["base_data"] = {}
203
- st.session_state["pn"] = "AT"
204
-
205
- if 'base_load_finished' not in st.session_state or st.session_state.reset:
206
- with st.spinner("Loading Trends"):
207
- st.session_state["today"] = {}
208
- for country_name, pn_option in country_list.items():
209
- st.session_state["base_data"][pn_option] = {}
210
- st.session_state["real_trending_searches"][pn_option] = {}
211
- st.session_state["today"][pn_option] = pytrend.today_searches(pn=pn_option)
212
-
213
- for category_name, category_code in categories.items():
214
- st.session_state["base"] = pytrend.realtime_trending_searches(pn=pn_option, cat=category_code, count=75)
215
- st.session_state["base_data"][pn_option][category_name] = st.session_state["base"]
216
- st.session_state["real_trending_searches"][pn_option][category_name] = convert_into_pd(st.session_state["base"])
217
-
218
-
219
- MAX_CHECKED = 3
220
-
221
- def check_password():
222
- """Returns `True` if the user had the correct password."""
223
-
224
- def password_entered():
225
- """Checks whether a password entered by the user is correct."""
226
- if hmac.compare_digest(st.session_state["password"], os.environ.get("PASSWORD")):
227
- st.session_state["password_correct"] = True
228
- del st.session_state["password"] # Don't store the password.
229
- else:
230
- st.session_state["password_correct"] = False
231
-
232
- # Return True if the password is validated.
233
- if st.session_state.get("password_correct", False):
234
- return True
235
-
236
- # Show input for password.
237
- st.text_input(
238
- "Password", type="password", on_change=password_entered, key="password"
239
- )
240
- if "password_correct" in st.session_state:
241
- st.error("😕 Password incorrect")
242
- return False
243
-
244
-
245
- if not check_password():
246
- st.stop() # Do not continue if check_password is not True.
247
-
248
-
249
-
250
-
251
-
252
- if 'selected_option' not in st.session_state:
253
- st.session_state['selected_option'] = "default_value" # You can set a default value as needed
254
-
255
- img = Image.open(r"heute_tensora.png")
256
- st.sidebar.image(img)
257
-
258
- # Now, you can safely use st.session_state['selected_option']
259
- # Selectbox to choose a country
260
- selected_country = st.sidebar.selectbox("Choose a Country", ["AT", "DE"])
261
- feed_url1 = f'https://trends.google.de/trends/trendingsearches/daily/rss?geo={selected_country}'
262
-
263
- # Button to trigger actions
264
- if st.sidebar.button("Change Country"):
265
- if selected_country == "AT":
266
- st.session_state["pn"] = selected_country
267
- elif selected_country == "DE":
268
- st.session_state["pn"] = selected_country
269
-
270
- selected_option = st.sidebar.radio("Choose an option", ["Realzeit Anfragen", "Tagesaktuelle Anfragen", "Trends von Gestern"])
271
- st.warning("Die aufgelisteten Keywörter für erhöhte Reichweite in den Überschriften verwenden")
272
- if selected_option == "Tagesaktuelle Anfragen":
273
-
274
- for count, index in enumerate(st.session_state["today"][selected_country], start=0):
275
- try:
276
- display_articles_for_today(count, index)
277
- except Exception as e:
278
- st.code(e)
279
- continue
280
- elif selected_option == "Realzeit Anfragen":
281
- choices_list = list(st.session_state["real_trending_searches"][selected_country].keys())
282
- if len(categories) == len(choices_list):
283
- st.session_state["base_load_finished"] = True
284
- auswahl = st.selectbox("Select Ressort", choices_list)
285
-
286
- display_articles_for_category(auswahl)
287
- elif selected_option == "Trends von Gestern":
288
- display_trends_from_yesterday()
289
-
290
- if st.session_state.reset:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  st.session_state["reset"] = False
 
1
+ from pytrends.request import TrendReq
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import xml.etree.ElementTree as ET
5
+ import requests
6
+ from datetime import datetime
7
+ import pytz
8
+ import hmac
9
+ import os
10
+ import time
11
+ from PIL import Image
12
+ from trend_crawl import crawl_url
13
+
14
+ def parse_url(url):
15
+ response = requests.get(url)
16
+
17
+ root = ET.fromstring(response.content)
18
+ return root
19
+
20
+ def convert_into_dict(req_json):
21
+
22
+ result = {}
23
+
24
+ # Iterate over each entry in the JSON data
25
+ for entry in req_json:
26
+ # Extract 'entityName' and 'searchQueries' from 'static_data'
27
+ static_data = entry.get("static_data", [])
28
+ if static_data and len(static_data[0]) >= 4:
29
+ entity_name = static_data[0][0] # First element
30
+ search_queries = static_data[0][3] # Fourth element
31
+ else:
32
+ entity_name = None
33
+ search_queries = None
34
+
35
+ # Initialize the entity in the result dictionary if not already present
36
+ if entity_name and entity_name not in result:
37
+ result[entity_name] = {
38
+ "searchQueries": search_queries,
39
+ "articles": []
40
+ }
41
+
42
+ # Extract articles from 'dynamic_data'
43
+ articles = entry.get("dynamic_data", {}).get("article", [])
44
+ for article in articles:
45
+ href = article.get("href")
46
+ article_title = article.get("title")
47
+
48
+ # Append the article information to the corresponding entity's article list
49
+ if entity_name:
50
+ result[entity_name]["articles"].append({
51
+ "href": href,
52
+ "title": article_title
53
+ })
54
+
55
+ return result
56
+
57
+
58
+ def find_details(req_json, gewünschter_titel):
59
+ gewünschte_details = []
60
+ for trend_info in req_json:
61
+ if trend_info['title'] == gewünschter_titel:
62
+
63
+ for article in trend_info['articles']:
64
+ article_details = {
65
+ 'url': article['url'],
66
+ 'snippet': article['snippet'],
67
+ 'articleTitle': article['articleTitle'],
68
+ 'time': article['time'],
69
+ 'source' : article['source']
70
+ }
71
+
72
+ gewünschte_details.append(article_details)
73
+ return gewünschte_details
74
+
75
+ def find_details2(req_json):
76
+ gewünschte_details = []
77
+
78
+ for article in req_json:
79
+ article_details = {
80
+ 'url': article['url'],
81
+ 'snippet': article['snippet'],
82
+ 'articleTitle': article['title'],
83
+ 'source' : article['source']
84
+
85
+ }
86
+
87
+ gewünschte_details.append(article_details)
88
+ return gewünschte_details
89
+
90
+ if 'reset' not in st.session_state:
91
+ st.session_state.reset = False
92
+
93
+ def display_trends_from_yesterday():
94
+ checkbox_statuses = {}
95
+ urls = []
96
+
97
+ timezone = 'Europe/Vienna'
98
+ today = datetime.now(pytz.timezone(timezone)).date()
99
+ feed = parse_url(feed_url1)
100
+ entries = []
101
+ ns = {'ht': 'https://trends.google.de/trends/trendingsearches/daily'} # Define namespace
102
+ for item in feed.findall('.//item'):
103
+ pubDate = datetime.strptime(item.find('pubDate').text, '%a, %d %b %Y %H:%M:%S %z').date()
104
+ # Filter: Überspringe, wenn pubDate heute ist
105
+ if pubDate == today:
106
+ continue
107
+ entry = {
108
+ 'title': item.find('title').text,
109
+ 'pubDate': item.find('pubDate').text,
110
+ 'approx_traffic': item.find('ht:approx_traffic', ns).text if item.find('ht:approx_traffic', ns) is not None else None,
111
+ 'news_items': []
112
+ }
113
+ for news_item in item.findall('ht:news_item', ns):
114
+ news_details = {
115
+ 'title': news_item.find('ht:news_item_title', ns).text,
116
+ 'snippet': news_item.find('ht:news_item_snippet', ns).text,
117
+ 'url': news_item.find('ht:news_item_url', ns).text,
118
+ 'source': news_item.find('ht:news_item_source', ns).text
119
+ }
120
+ entry['news_items'].append(news_details)
121
+ entries.append(entry)
122
+
123
+ count = 1
124
+ for entry in entries:
125
+ with st.expander(f"{count}• {entry['title']} | Generated Traffic: {entry['approx_traffic']}"):
126
+ st.write(f"Veröffentlichungsdatum : {entry['pubDate']}")
127
+ for count2, link in enumerate(entry['news_items'], start=1):
128
+ checkbox_label = f"yesterday_{count}_{count2}"
129
+ if st.session_state.reset:
130
+ st.session_state[checkbox_label] = False
131
+ checkbox_statuses[checkbox_label] = st.session_state.get(checkbox_label, False)
132
+ checkbox_statuses[checkbox_label] = st.checkbox(
133
+ f"{count2}• {link['title']} | {link['source']} | [Go To →]({link['url']})",
134
+ value=checkbox_statuses[checkbox_label],
135
+ key=checkbox_label
136
+ )
137
+ if checkbox_statuses[checkbox_label]:
138
+ urls.append(link['url'])
139
+
140
+ # Button am Ende des Expanders
141
+ base_url = os.getenv("url")
142
+ query_params = "&".join([f"article-links[]={url}" for url in urls])
143
+ full_url = f"{base_url}{query_params}"
144
+ st.link_button("Open All Links" , url= full_url)
145
+ count += 1
146
+
147
+
148
+
149
+ # Function to display articles for a specific category
150
+ def display_articles_for_category(pn_option):
151
+ checkbox_statuses = {}
152
+ urls = []
153
+
154
+
155
+ trending_data = st.session_state["real_trending_searches"][pn_option]
156
+
157
+ if st.session_state.get("reset", False):
158
+ for idx, (topic, data) in enumerate(trending_data.items()):
159
+ for article_index, _ in enumerate(data["articles"]):
160
+ checkbox_label = f"{pn_option}_{idx}_{article_index + 1}"
161
+ st.session_state[checkbox_label] = False
162
+
163
+ for idx, (topic, data) in enumerate(trending_data.items()):
164
+
165
+
166
+ with st.expander(f"{idx + 1}• {topic} | Generated Traffic: {data['searchQueries']}"):
167
+
168
+ for article_index, article in enumerate(data["articles"], start=1):
169
+ checkbox_label = f"{pn_option}_{idx}_{article_index}"
170
+
171
+
172
+ current_value = st.session_state.get(checkbox_label, False)
173
+ checkbox_statuses[checkbox_label] = current_value
174
+
175
+
176
+ disabled = (not current_value) and (sum(checkbox_statuses.values()) >= MAX_CHECKED)
177
+
178
+ checkbox_statuses[checkbox_label] = st.checkbox(
179
+ f"{article_index}{article['title']} | [Go To →]({article['href']})",
180
+ value=current_value,
181
+ key=checkbox_label,
182
+ disabled=disabled
183
+ )
184
+
185
+ if checkbox_statuses[checkbox_label]:
186
+ urls.append(article["href"])
187
+
188
+ base_url = os.getenv("url", "https://example.com/?")
189
+ query_params = "&".join([f"article-links[]={u}" for u in urls])
190
+ full_url = f"{base_url}{query_params}"
191
+ st.link_button("Open All Links", url=full_url)
192
+
193
+ # Funktion zum Rendern von Artikeln für heute
194
+ def display_articles_for_today(count, index):
195
+ checkbox_statuses = {}
196
+ urls = []
197
+ # Dictionary zur Verwaltung des Status jeder Checkbox
198
+ for count2, url in enumerate(index['articles'], start=1):
199
+ checkbox_label = f"today_{count}_{count2}"
200
+ if st.session_state.reset:
201
+ st.session_state[checkbox_label] = False
202
+ checkbox_statuses[checkbox_label] = st.session_state.get(checkbox_label, False)
203
+
204
+
205
+ with st.expander(f"{count+1}• {index['title']['query']} | Generated Traffic: {index['formattedTraffic']}"):
206
+ articles = find_details2(index['articles'])
207
+ for count2, url in enumerate(articles, start=1):
208
+ checkbox_label = f"today_{count}_{count2}"
209
+ disabled = not checkbox_statuses[checkbox_label] and sum(checkbox_statuses.values()) >= MAX_CHECKED
210
+ checkbox_statuses[checkbox_label] = st.checkbox(
211
+ f"{count2}• {url['articleTitle']} | {url['source']} | [Go To →]({url['url']})",
212
+ value=checkbox_statuses[checkbox_label],
213
+ key=checkbox_label,
214
+ disabled=disabled
215
+ )
216
+ if checkbox_statuses[checkbox_label]:
217
+ urls.append(url['url'])
218
+
219
+ # Button am Ende des Expanders
220
+ base_url = os.getenv("url")
221
+ query_params = "&".join([f"article-links[]={url}" for url in urls])
222
+ full_url = f"{base_url}{query_params}"
223
+ st.link_button("Open All Links" , url= full_url)
224
+
225
+
226
+ categories = {
227
+ "Alle": "all"
228
+ # "Gesundheit": "m",
229
+ # "Business": "b",
230
+ # "Headlines": "h",
231
+ # "Sport": "s",
232
+ # "Entertainment": "e",
233
+ # "Technik": "t",
234
+ }
235
+
236
+ country_list = {
237
+ "Germamy" : "DE",
238
+ "Austria" : "AT"
239
+ }
240
+
241
+ pytrend = TrendReq(hl='de-AT', tz=360, timeout=(10,50))
242
+
243
+ if 'base_load_finished' not in st.session_state:
244
+ st.session_state["real_trending_searches"] = {}
245
+ st.session_state["base_data"] = {}
246
+ st.session_state["pn"] = "AT"
247
+ print(st.session_state.reset)
248
+ if 'base_load_finished' not in st.session_state or st.session_state.reset:
249
+ with st.spinner("Loading Trends"):
250
+ st.session_state["today"] = {}
251
+ st.session_state["base"] = {}
252
+ for country_name, pn_option in country_list.items():
253
+ st.session_state["base_data"][pn_option] = {}
254
+ st.session_state["real_trending_searches"][pn_option] = {}
255
+ st.session_state["today"][pn_option] = pytrend.today_searches(pn=pn_option)
256
+
257
+ for category_name, category_code in categories.items():
258
+ st.session_state["base"][pn_option] = crawl_url(url=f"https://trends.google.com/trends/trendingsearches/daily?geo={pn_option}&category=2")
259
+ st.session_state["real_trending_searches"][pn_option] = convert_into_dict(st.session_state["base"][pn_option])
260
+ st.code(st.session_state["real_trending_searches"])
261
+ st.session_state["base_load_finished"]= True
262
+
263
+
264
+ MAX_CHECKED = 3
265
+
266
+ def check_password():
267
+ """Returns `True` if the user had the correct password."""
268
+
269
+ def password_entered():
270
+ """Checks whether a password entered by the user is correct."""
271
+ if hmac.compare_digest(st.session_state["password"], os.environ.get("PASSWORD")):
272
+ st.session_state["password_correct"] = True
273
+ del st.session_state["password"] # Don't store the password.
274
+ else:
275
+ st.session_state["password_correct"] = False
276
+
277
+ # Return True if the password is validated.
278
+ if st.session_state.get("password_correct", False):
279
+ return True
280
+
281
+ # Show input for password.
282
+ st.text_input(
283
+ "Password", type="password", on_change=password_entered, key="password"
284
+ )
285
+ if "password_correct" in st.session_state:
286
+ st.error("😕 Password incorrect")
287
+ return False
288
+
289
+
290
+ if not check_password():
291
+ st.stop() # Do not continue if check_password is not True.
292
+
293
+
294
+
295
+
296
+
297
+ if 'selected_option' not in st.session_state:
298
+ st.session_state['selected_option'] = "default_value" # You can set a default value as needed
299
+
300
+ img = Image.open(r"heute_tensora.png")
301
+ st.sidebar.image(img)
302
+
303
+ # Now, you can safely use st.session_state['selected_option']
304
+ # Selectbox to choose a country
305
+ selected_country = st.sidebar.selectbox("Choose a Country", ["AT"])#, "DE"
306
+ feed_url1 = f'https://trends.google.de/trends/trendingsearches/daily/rss?geo={selected_country}'
307
+
308
+ # Button to trigger actions
309
+ if st.sidebar.button("Change Country"):
310
+ if selected_country == "AT":
311
+ st.session_state["pn"] = selected_country
312
+ # elif selected_country == "DE":
313
+ # st.session_state["pn"] = selected_country
314
+
315
+ selected_option = st.sidebar.radio("Choose an option", ["Realzeit Anfragen", "Tagesaktuelle Anfragen", "Trends von Gestern"])
316
+ st.warning("Die aufgelisteten Keywörter für erhöhte Reichweite in den Überschriften verwenden")
317
+ if selected_option == "Tagesaktuelle Anfragen":
318
+
319
+ for count, index in enumerate(st.session_state["today"][selected_country], start=0):
320
+ try:
321
+ display_articles_for_today(count, index)
322
+ except Exception as e:
323
+ st.code(e)
324
+ continue
325
+ elif selected_option == "Realzeit Anfragen":
326
+ #choices_list = list(st.session_state["real_trending_searches"][selected_country].keys())
327
+ #if len(categories) == len(choices_list):
328
+ # st.session_state["base_load_finished"] = True
329
+ #auswahl = st.selectbox("Select Ressort", choices_list)
330
+
331
+ display_articles_for_category(st.session_state["pn"])
332
+ elif selected_option == "Trends von Gestern":
333
+ display_trends_from_yesterday()
334
+
335
+ if st.session_state.reset:
336
  st.session_state["reset"] = False
chromedriver.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccc3f36df42f3b2ecaa3b79b2840e1913c6c7ea15a8e2c98b5cc0eacbbda963a
3
+ size 17842176
trend_crawl.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from selenium import webdriver
2
+ from selenium.webdriver.common.by import By
3
+ from selenium.webdriver.chrome.service import Service
4
+ from selenium.webdriver.chrome.options import Options
5
+ from selenium.webdriver.support.ui import WebDriverWait
6
+ from selenium.webdriver.support import expected_conditions as EC
7
+ from bs4 import BeautifulSoup
8
+ import os
9
+ import time
10
+
11
+ # Configure Chrome options
12
+ chrome_options = Options()
13
+ chrome_options.add_argument("--headless") # Run in headless mode
14
+ chrome_options.add_argument("--disable-gpu")
15
+ complete_starttime = time.time()
16
+
17
+ # URL of the Google Trends page
18
+
19
+ def setup_driver():
20
+ """Set up the Selenium WebDriver."""
21
+ script_dir = os.path.dirname(os.path.abspath(__file__))
22
+ driver_path = os.path.join(script_dir, 'chromedriver.exe')
23
+ chrome_options = Options()
24
+ chrome_options.add_argument("--headless") # Run in headless mode
25
+ chrome_options.add_argument("--disable-gpu")
26
+ driver = webdriver.Chrome(service=Service(driver_path), options=chrome_options)
27
+ return driver
28
+
29
+ def process_selenium_row(index, selenium_rows, driver):
30
+ """Extract dynamic data using Selenium by clicking on the row."""
31
+ max_retries = 3
32
+ for attempt in range(max_retries):
33
+ try:
34
+ row = selenium_rows[index]
35
+ row.click()
36
+
37
+ # Wait for elements with class="xZCHj" to load
38
+ WebDriverWait(driver, 10).until(
39
+ EC.presence_of_all_elements_located((By.CLASS_NAME, "xZCHj"))
40
+ )
41
+
42
+ links = driver.find_elements(By.CLASS_NAME, "xZCHj")
43
+ dynamic_data = {
44
+ "article": [
45
+ {
46
+ "href": link.get_attribute("href"),
47
+ "title": link.text
48
+ }
49
+ for link in links
50
+ ]
51
+ }
52
+
53
+ if dynamic_data["article"]:
54
+ return dynamic_data
55
+ except Exception as e:
56
+ print(f"Error processing row {index} (Attempt {attempt + 1}): {e}")
57
+ selenium_rows = driver.find_elements(By.CSS_SELECTOR, '[jsname="oKdM2c"]')
58
+
59
+ print(f"Failed to process row {index} after {max_retries} attempts.")
60
+ return {"article": []}
61
+
62
+ def scrape_google_trends(driver, url):
63
+ """Scrape Google Trends data and save to JSON."""
64
+ all_data = []
65
+
66
+ try:
67
+ driver.get(url)
68
+
69
+ WebDriverWait(driver, 20).until(
70
+ EC.presence_of_element_located((By.CSS_SELECTOR, '[jsname="oKdM2c"]'))
71
+ )
72
+
73
+ soup = BeautifulSoup(driver.page_source, "html.parser")
74
+ tables = soup.select('[jsname="cC57zf"]')
75
+
76
+ for table in tables:
77
+ rows_bs = table.find_all("tr")
78
+ selenium_rows = driver.find_elements(By.CSS_SELECTOR, '[jsname="oKdM2c"]')
79
+
80
+ for index, row_bs in enumerate(rows_bs):
81
+ static_data = [
82
+ [div.get_text(strip=True) for div in cell.find_all("div")]
83
+ for cell in row_bs.find_all("td")[1:4]
84
+ ]
85
+ print(static_data)
86
+ dynamic_data = process_selenium_row(index, selenium_rows, driver)
87
+ combined_row = {
88
+ "static_data": static_data,
89
+ "dynamic_data": dynamic_data
90
+ }
91
+ all_data.append(combined_row)
92
+
93
+ return all_data
94
+
95
+ except Exception as e:
96
+ print(f"An error occurred: {e}")
97
+
98
+ finally:
99
+ driver.quit()
100
+
101
+
102
+
103
+ def crawl_url(url):
104
+ """Main function to be called from another script."""
105
+ driver = setup_driver()
106
+ return scrape_google_trends(driver, url)
107
+
108
+ if __name__ == "__main__":
109
+ #crawl_url(url="https://trends.google.com/trends/trendingsearches/daily?geo=AT&category=2")
110
+ driver = setup_driver()