poemsforaphrodite commited on
Commit
026bae0
1 Parent(s): a0e3f4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -19
app.py CHANGED
@@ -17,6 +17,7 @@ import requests
17
  from bs4 import BeautifulSoup
18
 
19
  load_dotenv()
 
20
 
21
  # Initialize Cohere client
22
  COHERE_API_KEY = os.environ["COHERE_API_KEY"]
@@ -46,8 +47,9 @@ DF_PREVIEW_ROWS = 100
46
  # -------------
47
 
48
  def setup_streamlit():
49
- st.set_page_config(page_title="GSC Relevency Score", layout="wide")
50
- st.title("GSC Relevency Score")
 
51
  st.divider()
52
 
53
  def init_session_state():
@@ -106,12 +108,20 @@ def calculate_relevancy_scores(df, model_type):
106
  return df
107
 
108
  def process_gsc_data(df):
109
- df_sorted = df.sort_values(['page', 'clicks'], ascending=[True, False])
110
- df_unique = df_sorted.drop_duplicates(subset='page', keep='first').copy()
 
 
 
 
 
 
 
111
  if 'relevancy_score' not in df_unique.columns:
112
  df_unique['relevancy_score'] = 0
113
  else:
114
  df_unique['relevancy_score'] = df_sorted.groupby('page')['relevancy_score'].first().values
 
115
  result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
116
  return result
117
 
@@ -178,13 +188,8 @@ def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, d
178
  return pd.DataFrame()
179
 
180
  def fetch_data_loading(webproperty, search_type, start_date, end_date, dimensions, device_type=None, model_type='english'):
181
- with st.spinner('Fetching data...'):
182
  df = fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type)
183
- if not df.empty:
184
- st.session_state.report_data = df
185
- st.experimental_rerun() # Rerun to display the fetched data immediately
186
-
187
- with st.spinner('Calculating relevancy scores...'):
188
  if not df.empty:
189
  df = calculate_relevancy_scores(df, model_type)
190
  processed_df = process_gsc_data(df)
@@ -303,9 +308,9 @@ def show_paginated_dataframe(report, rows_per_page=20):
303
 
304
  report['clickable_url'] = report['page'].apply(make_clickable)
305
 
306
- # Reorder columns to put clickable_url first
307
- columns = ['clickable_url'] + [col for col in report.columns if col != 'clickable_url' and col != 'page']
308
- report = report[columns]
309
 
310
  total_rows = len(report)
311
  total_pages = (total_rows - 1) // rows_per_page + 1
@@ -339,7 +344,7 @@ def main():
339
  if 'auth_flow' not in st.session_state or 'auth_url' not in st.session_state:
340
  st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
341
 
342
- query_params = st.query_params
343
  auth_code = query_params.get("code", [None])[0]
344
 
345
  if auth_code and 'credentials' not in st.session_state:
@@ -357,7 +362,7 @@ def main():
357
  webproperty = show_property_selector(properties, account)
358
  search_type = show_search_type_selector()
359
  date_range_selection = show_date_range_selector()
360
- model_type = show_model_type_selector()
361
  if date_range_selection == 'Custom Range':
362
  show_custom_date_inputs()
363
  start_date, end_date = st.session_state.custom_start_date, st.session_state.custom_end_date
@@ -371,15 +376,14 @@ def main():
371
 
372
  if st.button("Fetch Data"):
373
  with st.spinner('Fetching data...'):
374
- st.session_state.report_data = fetch_data_loading(webproperty, search_type, start_date, end_date, selected_dimensions, model_type=model_type)
375
 
376
  if st.session_state.report_data is not None and not st.session_state.report_data.empty:
377
- print("hr;;")
378
- print(st.session_state.report_data)
379
-
380
  show_paginated_dataframe(st.session_state.report_data)
381
  download_csv_link(st.session_state.report_data)
382
  elif st.session_state.report_data is not None:
383
  st.warning("No data found for the selected criteria.")
 
 
384
  if __name__ == "__main__":
385
  main()
 
17
  from bs4 import BeautifulSoup
18
 
19
  load_dotenv()
20
+ #test
21
 
22
  # Initialize Cohere client
23
  COHERE_API_KEY = os.environ["COHERE_API_KEY"]
 
47
  # -------------
48
 
49
  def setup_streamlit():
50
+ st.set_page_config(page_title="Simple Google Search Console Data", layout="wide")
51
+ st.title(" Simple Google Search Console Data | June 2024")
52
+ st.markdown(f"### Lightweight GSC Data Extractor. (Max {MAX_ROWS:,} Rows)")
53
  st.divider()
54
 
55
  def init_session_state():
 
108
  return df
109
 
110
  def process_gsc_data(df):
111
+ # Filter for queries below position 10
112
+ df_filtered = df[df['position'] > 10].copy()
113
+
114
+ # Sort by impressions in descending order
115
+ df_sorted = df_filtered.sort_values(['impressions'], ascending=[False])
116
+
117
+ # Keep only the highest impression query for each page
118
+ df_unique = df_sorted.drop_duplicates(subset='page', keep='first')
119
+
120
  if 'relevancy_score' not in df_unique.columns:
121
  df_unique['relevancy_score'] = 0
122
  else:
123
  df_unique['relevancy_score'] = df_sorted.groupby('page')['relevancy_score'].first().values
124
+
125
  result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
126
  return result
127
 
 
188
  return pd.DataFrame()
189
 
190
  def fetch_data_loading(webproperty, search_type, start_date, end_date, dimensions, device_type=None, model_type='english'):
191
+ with st.spinner('Fetching data and calculating relevancy scores...'):
192
  df = fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type)
 
 
 
 
 
193
  if not df.empty:
194
  df = calculate_relevancy_scores(df, model_type)
195
  processed_df = process_gsc_data(df)
 
308
 
309
  report['clickable_url'] = report['page'].apply(make_clickable)
310
 
311
+ # Reorder columns to put clickable_url first and sort by impressions
312
+ columns = ['clickable_url', 'query', 'impressions', 'clicks', 'ctr', 'position', 'relevancy_score']
313
+ report = report[columns].sort_values('impressions', ascending=False)
314
 
315
  total_rows = len(report)
316
  total_pages = (total_rows - 1) // rows_per_page + 1
 
344
  if 'auth_flow' not in st.session_state or 'auth_url' not in st.session_state:
345
  st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
346
 
347
+ query_params = st.experimental_get_query_params()
348
  auth_code = query_params.get("code", [None])[0]
349
 
350
  if auth_code and 'credentials' not in st.session_state:
 
362
  webproperty = show_property_selector(properties, account)
363
  search_type = show_search_type_selector()
364
  date_range_selection = show_date_range_selector()
365
+ model_type = show_model_type_selector() # Add this line
366
  if date_range_selection == 'Custom Range':
367
  show_custom_date_inputs()
368
  start_date, end_date = st.session_state.custom_start_date, st.session_state.custom_end_date
 
376
 
377
  if st.button("Fetch Data"):
378
  with st.spinner('Fetching data...'):
379
+ st.session_state.report_data = fetch_data_loading(webproperty, search_type, start_date, end_date, selected_dimensions, model_type=model_type) # Update this line
380
 
381
  if st.session_state.report_data is not None and not st.session_state.report_data.empty:
 
 
 
382
  show_paginated_dataframe(st.session_state.report_data)
383
  download_csv_link(st.session_state.report_data)
384
  elif st.session_state.report_data is not None:
385
  st.warning("No data found for the selected criteria.")
386
+
387
+
388
  if __name__ == "__main__":
389
  main()