Spaces:
Running
Running
poemsforaphrodite
commited on
Commit
•
026bae0
1
Parent(s):
a0e3f4d
Update app.py
Browse files
app.py
CHANGED
@@ -17,6 +17,7 @@ import requests
|
|
17 |
from bs4 import BeautifulSoup
|
18 |
|
19 |
load_dotenv()
|
|
|
20 |
|
21 |
# Initialize Cohere client
|
22 |
COHERE_API_KEY = os.environ["COHERE_API_KEY"]
|
@@ -46,8 +47,9 @@ DF_PREVIEW_ROWS = 100
|
|
46 |
# -------------
|
47 |
|
48 |
def setup_streamlit():
|
49 |
-
st.set_page_config(page_title="
|
50 |
-
st.title("
|
|
|
51 |
st.divider()
|
52 |
|
53 |
def init_session_state():
|
@@ -106,12 +108,20 @@ def calculate_relevancy_scores(df, model_type):
|
|
106 |
return df
|
107 |
|
108 |
def process_gsc_data(df):
|
109 |
-
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
if 'relevancy_score' not in df_unique.columns:
|
112 |
df_unique['relevancy_score'] = 0
|
113 |
else:
|
114 |
df_unique['relevancy_score'] = df_sorted.groupby('page')['relevancy_score'].first().values
|
|
|
115 |
result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
|
116 |
return result
|
117 |
|
@@ -178,13 +188,8 @@ def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, d
|
|
178 |
return pd.DataFrame()
|
179 |
|
180 |
def fetch_data_loading(webproperty, search_type, start_date, end_date, dimensions, device_type=None, model_type='english'):
|
181 |
-
with st.spinner('Fetching data...'):
|
182 |
df = fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type)
|
183 |
-
if not df.empty:
|
184 |
-
st.session_state.report_data = df
|
185 |
-
st.experimental_rerun() # Rerun to display the fetched data immediately
|
186 |
-
|
187 |
-
with st.spinner('Calculating relevancy scores...'):
|
188 |
if not df.empty:
|
189 |
df = calculate_relevancy_scores(df, model_type)
|
190 |
processed_df = process_gsc_data(df)
|
@@ -303,9 +308,9 @@ def show_paginated_dataframe(report, rows_per_page=20):
|
|
303 |
|
304 |
report['clickable_url'] = report['page'].apply(make_clickable)
|
305 |
|
306 |
-
# Reorder columns to put clickable_url first
|
307 |
-
columns = ['clickable_url'
|
308 |
-
report = report[columns]
|
309 |
|
310 |
total_rows = len(report)
|
311 |
total_pages = (total_rows - 1) // rows_per_page + 1
|
@@ -339,7 +344,7 @@ def main():
|
|
339 |
if 'auth_flow' not in st.session_state or 'auth_url' not in st.session_state:
|
340 |
st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
|
341 |
|
342 |
-
query_params = st.
|
343 |
auth_code = query_params.get("code", [None])[0]
|
344 |
|
345 |
if auth_code and 'credentials' not in st.session_state:
|
@@ -357,7 +362,7 @@ def main():
|
|
357 |
webproperty = show_property_selector(properties, account)
|
358 |
search_type = show_search_type_selector()
|
359 |
date_range_selection = show_date_range_selector()
|
360 |
-
model_type = show_model_type_selector()
|
361 |
if date_range_selection == 'Custom Range':
|
362 |
show_custom_date_inputs()
|
363 |
start_date, end_date = st.session_state.custom_start_date, st.session_state.custom_end_date
|
@@ -371,15 +376,14 @@ def main():
|
|
371 |
|
372 |
if st.button("Fetch Data"):
|
373 |
with st.spinner('Fetching data...'):
|
374 |
-
st.session_state.report_data = fetch_data_loading(webproperty, search_type, start_date, end_date, selected_dimensions, model_type=model_type)
|
375 |
|
376 |
if st.session_state.report_data is not None and not st.session_state.report_data.empty:
|
377 |
-
print("hr;;")
|
378 |
-
print(st.session_state.report_data)
|
379 |
-
|
380 |
show_paginated_dataframe(st.session_state.report_data)
|
381 |
download_csv_link(st.session_state.report_data)
|
382 |
elif st.session_state.report_data is not None:
|
383 |
st.warning("No data found for the selected criteria.")
|
|
|
|
|
384 |
if __name__ == "__main__":
|
385 |
main()
|
|
|
17 |
from bs4 import BeautifulSoup
|
18 |
|
19 |
load_dotenv()
|
20 |
+
#test
|
21 |
|
22 |
# Initialize Cohere client
|
23 |
COHERE_API_KEY = os.environ["COHERE_API_KEY"]
|
|
|
47 |
# -------------
|
48 |
|
49 |
def setup_streamlit():
|
50 |
+
st.set_page_config(page_title="Simple Google Search Console Data", layout="wide")
|
51 |
+
st.title("✨ Simple Google Search Console Data | June 2024")
|
52 |
+
st.markdown(f"### Lightweight GSC Data Extractor. (Max {MAX_ROWS:,} Rows)")
|
53 |
st.divider()
|
54 |
|
55 |
def init_session_state():
|
|
|
108 |
return df
|
109 |
|
110 |
def process_gsc_data(df):
|
111 |
+
# Filter for queries below position 10
|
112 |
+
df_filtered = df[df['position'] > 10].copy()
|
113 |
+
|
114 |
+
# Sort by impressions in descending order
|
115 |
+
df_sorted = df_filtered.sort_values(['impressions'], ascending=[False])
|
116 |
+
|
117 |
+
# Keep only the highest impression query for each page
|
118 |
+
df_unique = df_sorted.drop_duplicates(subset='page', keep='first')
|
119 |
+
|
120 |
if 'relevancy_score' not in df_unique.columns:
|
121 |
df_unique['relevancy_score'] = 0
|
122 |
else:
|
123 |
df_unique['relevancy_score'] = df_sorted.groupby('page')['relevancy_score'].first().values
|
124 |
+
|
125 |
result = df_unique[['page', 'query', 'clicks', 'impressions', 'ctr', 'position', 'relevancy_score']]
|
126 |
return result
|
127 |
|
|
|
188 |
return pd.DataFrame()
|
189 |
|
190 |
def fetch_data_loading(webproperty, search_type, start_date, end_date, dimensions, device_type=None, model_type='english'):
|
191 |
+
with st.spinner('Fetching data and calculating relevancy scores...'):
|
192 |
df = fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, device_type)
|
|
|
|
|
|
|
|
|
|
|
193 |
if not df.empty:
|
194 |
df = calculate_relevancy_scores(df, model_type)
|
195 |
processed_df = process_gsc_data(df)
|
|
|
308 |
|
309 |
report['clickable_url'] = report['page'].apply(make_clickable)
|
310 |
|
311 |
+
# Reorder columns to put clickable_url first and sort by impressions
|
312 |
+
columns = ['clickable_url', 'query', 'impressions', 'clicks', 'ctr', 'position', 'relevancy_score']
|
313 |
+
report = report[columns].sort_values('impressions', ascending=False)
|
314 |
|
315 |
total_rows = len(report)
|
316 |
total_pages = (total_rows - 1) // rows_per_page + 1
|
|
|
344 |
if 'auth_flow' not in st.session_state or 'auth_url' not in st.session_state:
|
345 |
st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
|
346 |
|
347 |
+
query_params = st.experimental_get_query_params()
|
348 |
auth_code = query_params.get("code", [None])[0]
|
349 |
|
350 |
if auth_code and 'credentials' not in st.session_state:
|
|
|
362 |
webproperty = show_property_selector(properties, account)
|
363 |
search_type = show_search_type_selector()
|
364 |
date_range_selection = show_date_range_selector()
|
365 |
+
model_type = show_model_type_selector() # Add this line
|
366 |
if date_range_selection == 'Custom Range':
|
367 |
show_custom_date_inputs()
|
368 |
start_date, end_date = st.session_state.custom_start_date, st.session_state.custom_end_date
|
|
|
376 |
|
377 |
if st.button("Fetch Data"):
|
378 |
with st.spinner('Fetching data...'):
|
379 |
+
st.session_state.report_data = fetch_data_loading(webproperty, search_type, start_date, end_date, selected_dimensions, model_type=model_type) # Update this line
|
380 |
|
381 |
if st.session_state.report_data is not None and not st.session_state.report_data.empty:
|
|
|
|
|
|
|
382 |
show_paginated_dataframe(st.session_state.report_data)
|
383 |
download_csv_link(st.session_state.report_data)
|
384 |
elif st.session_state.report_data is not None:
|
385 |
st.warning("No data found for the selected criteria.")
|
386 |
+
|
387 |
+
|
388 |
if __name__ == "__main__":
|
389 |
main()
|