poemsforaphrodite commited on
Commit
0d6414e
1 Parent(s): ea21800

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -212
app.py CHANGED
@@ -1,6 +1,7 @@
1
  # Standard library imports
2
  import datetime
3
  import base64
 
4
 
5
  # Related third-party imports
6
  import streamlit as st
@@ -10,20 +11,19 @@ from googleapiclient.discovery import build
10
  from dotenv import load_dotenv
11
  import pandas as pd
12
  import searchconsole
13
- import os
14
  import cohere
15
  from sklearn.metrics.pairwise import cosine_similarity
16
  import requests
17
  from bs4 import BeautifulSoup
18
 
19
  load_dotenv()
 
20
  # Initialize Cohere client
21
  COHERE_API_KEY = os.environ["COHERE_API_KEY"]
22
  co = cohere.Client(COHERE_API_KEY)
23
 
24
  # Configuration: Set to True if running locally, False if running on Streamlit Cloud
25
  IS_LOCAL = False
26
- #==TREs
27
 
28
  # Constants
29
  SEARCH_TYPES = ["web", "image", "video", "news", "discover", "googleNews"]
@@ -46,14 +46,9 @@ DF_PREVIEW_ROWS = 100
46
  # -------------
47
 
48
  def setup_streamlit():
49
- """
50
- Configures Streamlit's page settings and displays the app title and markdown information.
51
- Sets the page layout, title, and markdown content with links and app description.
52
- """
53
  st.set_page_config(page_title="✨ Simple Google Search Console Data | LeeFoot.co.uk", layout="wide")
54
  st.title("✨ Simple Google Search Console Data | June 2024")
55
  st.markdown(f"### Lightweight GSC Data Extractor. (Max {MAX_ROWS:,} Rows)")
56
-
57
  st.markdown(
58
  """
59
  <p>
@@ -65,10 +60,6 @@ def setup_streamlit():
65
  st.divider()
66
 
67
  def init_session_state():
68
- """
69
- Initialises or updates the Streamlit session state variables for property selection,
70
- search type, date range, dimensions, and device type.
71
- """
72
  if 'selected_property' not in st.session_state:
73
  st.session_state.selected_property = None
74
  if 'selected_search_type' not in st.session_state:
@@ -88,11 +79,11 @@ def init_session_state():
88
  if 'custom_end_date' not in st.session_state:
89
  st.session_state.custom_end_date = datetime.date.today()
90
 
 
 
 
91
 
92
  def fetch_content(url):
93
- """
94
- Fetches the content of a webpage.
95
- """
96
  try:
97
  response = requests.get(url)
98
  response.raise_for_status()
@@ -101,66 +92,39 @@ def fetch_content(url):
101
  return content
102
  except requests.RequestException as e:
103
  return str(e)
104
-
105
  def generate_embeddings(text_list):
106
- """
107
- Generates embeddings for a list of texts using Cohere's API.
108
- """
109
  if not text_list:
110
  return []
111
-
112
  model = 'embed-english-v3.0'
113
  input_type = 'search_document'
114
  response = co.embed(model=model, texts=text_list, input_type=input_type)
115
  embeddings = response.embeddings
116
  return embeddings
117
 
118
-
119
  def calculate_relevancy_scores(df):
120
- """
121
- Calculates relevancy scores for each row in the dataframe.
122
- """
123
  try:
124
- st.write("Calculating relevancy scores...")
125
- st.write(f"Input DataFrame shape: {df.shape}")
126
- st.write(f"Input DataFrame columns: {df.columns}")
127
-
128
  page_contents = [fetch_content(url) for url in df['page']]
129
- st.write(f"Fetched {len(page_contents)} page contents")
130
-
131
  page_embeddings = generate_embeddings(page_contents)
132
- st.write(f"Generated {len(page_embeddings)} page embeddings")
133
-
134
  query_embeddings = generate_embeddings(df['query'].tolist())
135
- st.write(f"Generated {len(query_embeddings)} query embeddings")
136
-
137
  relevancy_scores = cosine_similarity(query_embeddings, page_embeddings).diagonal()
138
- st.write(f"Calculated {len(relevancy_scores)} relevancy scores")
139
- st.write(f"Sample relevancy scores: {relevancy_scores[:5]}")
140
-
141
  df = df.assign(relevancy_score=relevancy_scores)
142
- st.write(f"Assigned relevancy scores to DataFrame")
143
- st.write(f"DataFrame shape after assigning scores: {df.shape}")
144
- st.write(f"DataFrame columns after assigning scores: {df.columns}")
145
- st.write(f"Sample relevancy scores from DataFrame: {df['relevancy_score'].head()}")
146
-
147
  except Exception as e:
148
  st.warning(f"Error calculating relevancy scores: {e}")
149
- df = df.assign(relevancy_score=0) # Default value if calculation fails
150
-
151
  return df
152
- def fetch_data_loading(webproperty, search_type, start_date, end_date, dimensions, device_type=None):
153
- """
154
- Fetches Google Search Console data with a loading indicator and calculates relevancy scores.
155
- """
156
- with st.spinner('Fetching data and calculating relevancy scores...'):
157
- df = fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type)
158
- if not df.empty:
159
- df = calculate_relevancy_scores(df)
160
- st.write(f"Data fetched. Shape: {df.shape}")
161
- return df
162
- # -------------
163
 
 
 
 
 
 
 
 
 
 
 
 
164
  # Google Authentication Functions
165
  # -------------
166
 
@@ -177,30 +141,20 @@ def load_config():
177
  return client_config
178
 
179
  def init_oauth_flow(client_config):
180
- """
181
- Initializes the OAuth flow for Google API authentication using the client configuration.
182
- Sets the necessary scopes and returns the configured Flow object.
183
- """
184
  scopes = ["https://www.googleapis.com/auth/webmasters.readonly"]
185
  flow = Flow.from_client_config(
186
  client_config,
187
  scopes=scopes,
188
- redirect_uri=client_config["web"]["redirect_uris"][0] # Changed from "installed" to "web"
189
  )
190
  return flow
 
191
  def google_auth(client_config):
192
- """
193
- Starts the Google authentication process using OAuth.
194
- Generates and returns the OAuth flow and the authentication URL.
195
- """
196
  flow = init_oauth_flow(client_config)
197
  auth_url, _ = flow.authorization_url(prompt="consent")
198
  return flow, auth_url
 
199
  def auth_search_console(client_config, credentials):
200
- """
201
- Authenticates the user with the Google Search Console API using provided credentials.
202
- Returns an authenticated searchconsole client.
203
- """
204
  token = {
205
  "token": credentials.token,
206
  "refresh_token": credentials.refresh_token,
@@ -217,24 +171,14 @@ def auth_search_console(client_config, credentials):
217
  # -------------
218
 
219
  def list_gsc_properties(credentials):
220
- """
221
- Lists all Google Search Console properties accessible with the given credentials.
222
- Returns a list of property URLs or a message if no properties are found.
223
- """
224
  service = build('webmasters', 'v3', credentials=credentials)
225
  site_list = service.sites().list().execute()
226
  return [site['siteUrl'] for site in site_list.get('siteEntry', [])] or ["No properties found"]
227
 
228
  def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type=None):
229
- """
230
- Fetches Google Search Console data for a specified property, date range, dimensions, and device type.
231
- Handles errors and returns the data as a DataFrame.
232
- """
233
  query = webproperty.query.range(start_date, end_date).search_type(search_type).dimension(*dimensions)
234
-
235
  if 'device' in dimensions and device_type and device_type != 'All Devices':
236
  query = query.filter('device', 'equals', device_type.lower())
237
-
238
  try:
239
  df = query.limit(MAX_ROWS).get().to_dataframe()
240
  return process_gsc_data(df)
@@ -242,88 +186,22 @@ def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, d
242
  show_error(e)
243
  return pd.DataFrame()
244
 
245
- def process_gsc_data(df):
246
- """
247
- Processes the GSC data to return only unique pages with their first query and relevancy score.
248
- """
249
- st.write("Processing GSC data...")
250
- st.write(f"Input DataFrame shape: {df.shape}")
251
- st.write(f"Input DataFrame columns: {df.columns}")
252
-
253
- # Sort the dataframe by page and clicks (descending) to get the most relevant query first
254
- df_sorted = df.sort_values(['page', 'clicks'], ascending=[True, False])
255
-
256
- # Get the first occurrence of each page (which will be the one with the highest clicks)
257
- df_unique = df_sorted.drop_duplicates(subset='page', keep='first').copy()
258
-
259
- st.write(f"Unique pages DataFrame shape: {df_unique.shape}")
260
- st.write(f"Unique pages DataFrame columns: {df_unique.columns}")
261
-
262
- # Ensure 'relevancy_score' column exists and is preserved
263
- if 'relevancy_score' not in df_unique.columns:
264
- st.write("Relevancy score column not found, adding default values")
265
- df_unique['relevancy_score'] = 0 # Default value if column doesn't exist
266
- else:
267
- st.write("Preserving relevancy scores")
268
- # Make sure to keep the original relevancy scores
269
- df_unique['relevancy_score'] = df_sorted.groupby('page')['relevancy_score'].first().values
270
-
271
- # Select only the relevant columns, including the relevancy_score
272
- result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
273
-
274
- st.write(f"Processed data. Shape: {result.shape}")
275
- st.write(f"Columns: {result.columns}")
276
- st.write(f"Sample relevancy scores: {result['relevancy_score'].head()}")
277
-
278
- return result
279
-
280
-
281
  def fetch_data_loading(webproperty, search_type, start_date, end_date, dimensions, device_type=None):
282
- """
283
- Fetches Google Search Console data with a loading indicator and calculates relevancy scores.
284
- """
285
  with st.spinner('Fetching data and calculating relevancy scores...'):
286
  df = fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type)
287
- st.write(f"Data fetched. Shape: {df.shape}")
288
- st.write(f"Columns: {df.columns}")
289
-
290
  if not df.empty:
291
  df = calculate_relevancy_scores(df)
292
- st.write("Relevancy scores calculated.")
293
- st.write(f"DataFrame shape after calculating scores: {df.shape}")
294
- st.write(f"DataFrame columns after calculating scores: {df.columns}")
295
- st.write(f"Sample relevancy scores after calculation: {df['relevancy_score'].head()}")
296
-
297
  processed_df = process_gsc_data(df)
298
- st.write("Data processed.")
299
- st.write(f"Final DataFrame shape: {processed_df.shape}")
300
- st.write(f"Final DataFrame columns: {processed_df.columns}")
301
- st.write(f"Final sample relevancy scores: {processed_df['relevancy_score'].head()}")
302
-
303
  return processed_df
304
- """
305
- Fetches Google Search Console data with a loading indicator. Utilises 'fetch_gsc_data' for data retrieval.
306
- Returns the fetched data as a DataFrame.
307
- """
308
- with st.spinner('Fetching data...'):
309
- return fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type)
310
 
311
  # -------------
312
  # Utility Functions
313
  # -------------
314
 
315
  def update_dimensions(selected_search_type):
316
- """
317
- Updates and returns the list of dimensions based on the selected search type.
318
- Adds 'device' to dimensions if the search type requires it.
319
- """
320
  return BASE_DIMENSIONS + ['device'] if selected_search_type in SEARCH_TYPES else BASE_DIMENSIONS
321
 
322
  def calc_date_range(selection, custom_start=None, custom_end=None):
323
- """
324
- Calculates the date range based on the selected range option.
325
- Returns the start and end dates for the specified range.
326
- """
327
  range_map = {
328
  'Last 7 Days': 7,
329
  'Last 30 Days': 30,
@@ -341,17 +219,9 @@ def calc_date_range(selection, custom_start=None, custom_end=None):
341
  return today - datetime.timedelta(days=range_map.get(selection, 0)), today
342
 
343
  def show_error(e):
344
- """
345
- Displays an error message in the Streamlit app.
346
- Formats and shows the provided error 'e'.
347
- """
348
  st.error(f"An error occurred: {e}")
349
 
350
  def property_change():
351
- """
352
- Updates the 'selected_property' in the Streamlit session state.
353
- Triggered on change of the property selection.
354
- """
355
  st.session_state.selected_property = st.session_state['selected_property_selector']
356
 
357
  # -------------
@@ -359,19 +229,12 @@ def property_change():
359
  # -------------
360
 
361
  def show_dataframe(report):
362
- """
363
- Shows a preview of the first 100 rows of the processed report DataFrame in an expandable section.
364
- """
365
  with st.expander("Preview the First 100 Rows (Unique Pages with Top Query)"):
366
  st.dataframe(report.head(DF_PREVIEW_ROWS))
367
 
368
  def download_csv_link(report):
369
- """
370
- Generates and displays a download link for the report DataFrame in CSV format.
371
- """
372
  def to_csv(df):
373
  return df.to_csv(index=False, encoding='utf-8-sig')
374
-
375
  csv = to_csv(report)
376
  b64_csv = base64.b64encode(csv.encode()).decode()
377
  href = f'<a href="data:file/csv;base64,{b64_csv}" download="search_console_data.csv">Download CSV File</a>'
@@ -382,20 +245,12 @@ def download_csv_link(report):
382
  # -------------
383
 
384
  def show_google_sign_in(auth_url):
385
- """
386
- Displays the Google sign-in button and authentication URL in the Streamlit sidebar.
387
- """
388
  with st.sidebar:
389
  if st.button("Sign in with Google"):
390
- # Open the authentication URL
391
  st.write('Please click the link below to sign in:')
392
  st.markdown(f'[Google Sign-In]({auth_url})', unsafe_allow_html=True)
393
 
394
  def show_property_selector(properties, account):
395
- """
396
- Displays a dropdown selector for Google Search Console properties.
397
- Returns the selected property's webproperty object.
398
- """
399
  selected_property = st.selectbox(
400
  "Select a Search Console Property:",
401
  properties,
@@ -407,10 +262,6 @@ def show_property_selector(properties, account):
407
  return account[selected_property]
408
 
409
  def show_search_type_selector():
410
- """
411
- Displays a dropdown selector for choosing the search type.
412
- Returns the selected search type.
413
- """
414
  return st.selectbox(
415
  "Select Search Type:",
416
  SEARCH_TYPES,
@@ -419,10 +270,6 @@ def show_search_type_selector():
419
  )
420
 
421
  def show_date_range_selector():
422
- """
423
- Displays a dropdown selector for choosing the date range.
424
- Returns the selected date range option.
425
- """
426
  return st.selectbox(
427
  "Select Date Range:",
428
  DATE_RANGE_OPTIONS,
@@ -431,18 +278,10 @@ def show_date_range_selector():
431
  )
432
 
433
  def show_custom_date_inputs():
434
- """
435
- Displays date input fields for custom date range selection.
436
- Updates session state with the selected dates.
437
- """
438
  st.session_state.custom_start_date = st.date_input("Start Date", st.session_state.custom_start_date)
439
  st.session_state.custom_end_date = st.date_input("End Date", st.session_state.custom_end_date)
440
 
441
  def show_dimensions_selector(search_type):
442
- """
443
- Displays a multi-select box for choosing dimensions based on the selected search type.
444
- Returns the selected dimensions.
445
- """
446
  available_dimensions = update_dimensions(search_type)
447
  return st.multiselect(
448
  "Select Dimensions:",
@@ -451,32 +290,24 @@ def show_dimensions_selector(search_type):
451
  key='dimensions_selector'
452
  )
453
 
454
- def show_fetch_data_button(webproperty, search_type, start_date, end_date, selected_dimensions):
455
- """
456
- Displays a button to fetch data based on selected parameters.
457
- Shows the report DataFrame and download link upon successful data fetching.
458
- """
459
- if st.button("Fetch Data"):
460
- report = fetch_data_loading(webproperty, search_type, start_date, end_date, selected_dimensions)
461
-
462
- if report is not None and not report.empty:
463
- show_dataframe(report)
464
- download_csv_link(report)
465
- else:
466
- st.warning("No data found for the selected criteria.")
467
-
468
-
469
-
470
  def show_paginated_dataframe(report, rows_per_page=20):
471
- """
472
- Displays the DataFrame with custom pagination.
473
- """
474
  total_rows = len(report)
475
  total_pages = (total_rows - 1) // rows_per_page + 1
476
 
477
- page = st.number_input("Page", min_value=1, max_value=total_pages, step=1)
 
478
 
479
- start_idx = (page - 1) * rows_per_page
 
 
 
 
 
 
 
 
 
 
480
  end_idx = start_idx + rows_per_page
481
  st.dataframe(report.iloc[start_idx:end_idx])
482
 
@@ -487,16 +318,18 @@ def show_paginated_dataframe(report, rows_per_page=20):
487
  def main():
488
  setup_streamlit()
489
  client_config = load_config()
490
- st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
 
 
491
 
492
  query_params = st.experimental_get_query_params()
493
  auth_code = query_params.get("code", [None])[0]
494
 
495
- if auth_code and not st.session_state.get('credentials'):
496
  st.session_state.auth_flow.fetch_token(code=auth_code)
497
  st.session_state.credentials = st.session_state.auth_flow.credentials
498
 
499
- if not st.session_state.get('credentials'):
500
  show_google_sign_in(st.session_state.auth_url)
501
  else:
502
  init_session_state()
@@ -516,13 +349,19 @@ def main():
516
 
517
  selected_dimensions = show_dimensions_selector(search_type)
518
 
519
- if st.button("Fetch Data and Display"):
520
- report = fetch_data_loading(webproperty, search_type, start_date, end_date, selected_dimensions)
 
 
 
 
521
 
522
- if report is not None and not report.empty:
523
- show_paginated_dataframe(report)
524
- else:
525
- st.warning("No data found for the selected criteria.")
 
526
 
 
527
  if __name__ == "__main__":
528
  main()
 
1
  # Standard library imports
2
  import datetime
3
  import base64
4
+ import os
5
 
6
  # Related third-party imports
7
  import streamlit as st
 
11
  from dotenv import load_dotenv
12
  import pandas as pd
13
  import searchconsole
 
14
  import cohere
15
  from sklearn.metrics.pairwise import cosine_similarity
16
  import requests
17
  from bs4 import BeautifulSoup
18
 
19
  load_dotenv()
20
+
21
  # Initialize Cohere client
22
  COHERE_API_KEY = os.environ["COHERE_API_KEY"]
23
  co = cohere.Client(COHERE_API_KEY)
24
 
25
  # Configuration: Set to True if running locally, False if running on Streamlit Cloud
26
  IS_LOCAL = False
 
27
 
28
  # Constants
29
  SEARCH_TYPES = ["web", "image", "video", "news", "discover", "googleNews"]
 
46
  # -------------
47
 
48
  def setup_streamlit():
 
 
 
 
49
  st.set_page_config(page_title="✨ Simple Google Search Console Data | LeeFoot.co.uk", layout="wide")
50
  st.title("✨ Simple Google Search Console Data | June 2024")
51
  st.markdown(f"### Lightweight GSC Data Extractor. (Max {MAX_ROWS:,} Rows)")
 
52
  st.markdown(
53
  """
54
  <p>
 
60
  st.divider()
61
 
62
  def init_session_state():
 
 
 
 
63
  if 'selected_property' not in st.session_state:
64
  st.session_state.selected_property = None
65
  if 'selected_search_type' not in st.session_state:
 
79
  if 'custom_end_date' not in st.session_state:
80
  st.session_state.custom_end_date = datetime.date.today()
81
 
82
+ # -------------
83
+ # Data Processing Functions
84
+ # -------------
85
 
86
  def fetch_content(url):
 
 
 
87
  try:
88
  response = requests.get(url)
89
  response.raise_for_status()
 
92
  return content
93
  except requests.RequestException as e:
94
  return str(e)
95
+
96
  def generate_embeddings(text_list):
 
 
 
97
  if not text_list:
98
  return []
 
99
  model = 'embed-english-v3.0'
100
  input_type = 'search_document'
101
  response = co.embed(model=model, texts=text_list, input_type=input_type)
102
  embeddings = response.embeddings
103
  return embeddings
104
 
 
105
  def calculate_relevancy_scores(df):
 
 
 
106
  try:
 
 
 
 
107
  page_contents = [fetch_content(url) for url in df['page']]
 
 
108
  page_embeddings = generate_embeddings(page_contents)
 
 
109
  query_embeddings = generate_embeddings(df['query'].tolist())
 
 
110
  relevancy_scores = cosine_similarity(query_embeddings, page_embeddings).diagonal()
 
 
 
111
  df = df.assign(relevancy_score=relevancy_scores)
 
 
 
 
 
112
  except Exception as e:
113
  st.warning(f"Error calculating relevancy scores: {e}")
114
+ df = df.assign(relevancy_score=0)
 
115
  return df
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ def process_gsc_data(df):
118
+ df_sorted = df.sort_values(['page', 'clicks'], ascending=[True, False])
119
+ df_unique = df_sorted.drop_duplicates(subset='page', keep='first').copy()
120
+ if 'relevancy_score' not in df_unique.columns:
121
+ df_unique['relevancy_score'] = 0
122
+ else:
123
+ df_unique['relevancy_score'] = df_sorted.groupby('page')['relevancy_score'].first().values
124
+ result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
125
+ return result
126
+
127
+ # -------------
128
  # Google Authentication Functions
129
  # -------------
130
 
 
141
  return client_config
142
 
143
  def init_oauth_flow(client_config):
 
 
 
 
144
  scopes = ["https://www.googleapis.com/auth/webmasters.readonly"]
145
  flow = Flow.from_client_config(
146
  client_config,
147
  scopes=scopes,
148
+ redirect_uri=client_config["web"]["redirect_uris"][0]
149
  )
150
  return flow
151
+
152
  def google_auth(client_config):
 
 
 
 
153
  flow = init_oauth_flow(client_config)
154
  auth_url, _ = flow.authorization_url(prompt="consent")
155
  return flow, auth_url
156
+
157
  def auth_search_console(client_config, credentials):
 
 
 
 
158
  token = {
159
  "token": credentials.token,
160
  "refresh_token": credentials.refresh_token,
 
171
  # -------------
172
 
173
  def list_gsc_properties(credentials):
 
 
 
 
174
  service = build('webmasters', 'v3', credentials=credentials)
175
  site_list = service.sites().list().execute()
176
  return [site['siteUrl'] for site in site_list.get('siteEntry', [])] or ["No properties found"]
177
 
178
  def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type=None):
 
 
 
 
179
  query = webproperty.query.range(start_date, end_date).search_type(search_type).dimension(*dimensions)
 
180
  if 'device' in dimensions and device_type and device_type != 'All Devices':
181
  query = query.filter('device', 'equals', device_type.lower())
 
182
  try:
183
  df = query.limit(MAX_ROWS).get().to_dataframe()
184
  return process_gsc_data(df)
 
186
  show_error(e)
187
  return pd.DataFrame()
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  def fetch_data_loading(webproperty, search_type, start_date, end_date, dimensions, device_type=None):
 
 
 
190
  with st.spinner('Fetching data and calculating relevancy scores...'):
191
  df = fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type)
 
 
 
192
  if not df.empty:
193
  df = calculate_relevancy_scores(df)
 
 
 
 
 
194
  processed_df = process_gsc_data(df)
 
 
 
 
 
195
  return processed_df
 
 
 
 
 
 
196
 
197
  # -------------
198
  # Utility Functions
199
  # -------------
200
 
201
  def update_dimensions(selected_search_type):
 
 
 
 
202
  return BASE_DIMENSIONS + ['device'] if selected_search_type in SEARCH_TYPES else BASE_DIMENSIONS
203
 
204
  def calc_date_range(selection, custom_start=None, custom_end=None):
 
 
 
 
205
  range_map = {
206
  'Last 7 Days': 7,
207
  'Last 30 Days': 30,
 
219
  return today - datetime.timedelta(days=range_map.get(selection, 0)), today
220
 
221
  def show_error(e):
 
 
 
 
222
  st.error(f"An error occurred: {e}")
223
 
224
  def property_change():
 
 
 
 
225
  st.session_state.selected_property = st.session_state['selected_property_selector']
226
 
227
  # -------------
 
229
  # -------------
230
 
231
  def show_dataframe(report):
 
 
 
232
  with st.expander("Preview the First 100 Rows (Unique Pages with Top Query)"):
233
  st.dataframe(report.head(DF_PREVIEW_ROWS))
234
 
235
  def download_csv_link(report):
 
 
 
236
  def to_csv(df):
237
  return df.to_csv(index=False, encoding='utf-8-sig')
 
238
  csv = to_csv(report)
239
  b64_csv = base64.b64encode(csv.encode()).decode()
240
  href = f'<a href="data:file/csv;base64,{b64_csv}" download="search_console_data.csv">Download CSV File</a>'
 
245
  # -------------
246
 
247
  def show_google_sign_in(auth_url):
 
 
 
248
  with st.sidebar:
249
  if st.button("Sign in with Google"):
 
250
  st.write('Please click the link below to sign in:')
251
  st.markdown(f'[Google Sign-In]({auth_url})', unsafe_allow_html=True)
252
 
253
  def show_property_selector(properties, account):
 
 
 
 
254
  selected_property = st.selectbox(
255
  "Select a Search Console Property:",
256
  properties,
 
262
  return account[selected_property]
263
 
264
  def show_search_type_selector():
 
 
 
 
265
  return st.selectbox(
266
  "Select Search Type:",
267
  SEARCH_TYPES,
 
270
  )
271
 
272
  def show_date_range_selector():
 
 
 
 
273
  return st.selectbox(
274
  "Select Date Range:",
275
  DATE_RANGE_OPTIONS,
 
278
  )
279
 
280
  def show_custom_date_inputs():
 
 
 
 
281
  st.session_state.custom_start_date = st.date_input("Start Date", st.session_state.custom_start_date)
282
  st.session_state.custom_end_date = st.date_input("End Date", st.session_state.custom_end_date)
283
 
284
  def show_dimensions_selector(search_type):
 
 
 
 
285
  available_dimensions = update_dimensions(search_type)
286
  return st.multiselect(
287
  "Select Dimensions:",
 
290
  key='dimensions_selector'
291
  )
292
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  def show_paginated_dataframe(report, rows_per_page=20):
 
 
 
294
  total_rows = len(report)
295
  total_pages = (total_rows - 1) // rows_per_page + 1
296
 
297
+ if 'current_page' not in st.session_state:
298
+ st.session_state.current_page = 1
299
 
300
+ col1, col2, col3 = st.columns([1,3,1])
301
+ with col1:
302
+ if st.button("Previous", disabled=st.session_state.current_page == 1):
303
+ st.session_state.current_page -= 1
304
+ with col2:
305
+ st.write(f"Page {st.session_state.current_page} of {total_pages}")
306
+ with col3:
307
+ if st.button("Next", disabled=st.session_state.current_page == total_pages):
308
+ st.session_state.current_page += 1
309
+
310
+ start_idx = (st.session_state.current_page - 1) * rows_per_page
311
  end_idx = start_idx + rows_per_page
312
  st.dataframe(report.iloc[start_idx:end_idx])
313
 
 
318
  def main():
319
  setup_streamlit()
320
  client_config = load_config()
321
+
322
+ if 'auth_flow' not in st.session_state or 'auth_url' not in st.session_state:
323
+ st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
324
 
325
  query_params = st.experimental_get_query_params()
326
  auth_code = query_params.get("code", [None])[0]
327
 
328
+ if auth_code and 'credentials' not in st.session_state:
329
  st.session_state.auth_flow.fetch_token(code=auth_code)
330
  st.session_state.credentials = st.session_state.auth_flow.credentials
331
 
332
+ if 'credentials' not in st.session_state:
333
  show_google_sign_in(st.session_state.auth_url)
334
  else:
335
  init_session_state()
 
349
 
350
  selected_dimensions = show_dimensions_selector(search_type)
351
 
352
+ if 'report_data' not in st.session_state:
353
+ st.session_state.report_data = None
354
+
355
+ if st.button("Fetch Data"):
356
+ with st.spinner('Fetching data...'):
357
+ st.session_state.report_data = fetch_data_loading(webproperty, search_type, start_date, end_date, selected_dimensions)
358
 
359
+ if st.session_state.report_data is not None and not st.session_state.report_data.empty:
360
+ show_paginated_dataframe(st.session_state.report_data)
361
+ download_csv_link(st.session_state.report_data)
362
+ elif st.session_state.report_data is not None:
363
+ st.warning("No data found for the selected criteria.")
364
 
365
+
366
  if __name__ == "__main__":
367
  main()