Spaces:
Running
Running
poemsforaphrodite
commited on
Commit
•
dcc6e39
1
Parent(s):
ac4b067
Update app.py
Browse files
app.py
CHANGED
@@ -17,7 +17,8 @@ import requests
|
|
17 |
from bs4 import BeautifulSoup
|
18 |
|
19 |
load_dotenv()
|
20 |
-
|
|
|
21 |
|
22 |
# Initialize Cohere client
|
23 |
COHERE_API_KEY = os.environ["COHERE_API_KEY"]
|
@@ -287,33 +288,7 @@ def show_dimensions_selector(search_type):
|
|
287 |
key='dimensions_selector'
|
288 |
)
|
289 |
|
290 |
-
import datetime
|
291 |
-
import base64
|
292 |
-
import os
|
293 |
-
|
294 |
-
import streamlit as st
|
295 |
-
from streamlit_elements import elements
|
296 |
-
from google_auth_oauthlib.flow import Flow
|
297 |
-
from googleapiclient.discovery import build
|
298 |
-
from dotenv import load_dotenv
|
299 |
-
import pandas as pd
|
300 |
-
import searchconsole
|
301 |
-
import cohere
|
302 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
303 |
-
import requests
|
304 |
-
from bs4 import BeautifulSoup
|
305 |
-
|
306 |
-
# ... (keep the existing imports and configuration)
|
307 |
|
308 |
-
# -------------
|
309 |
-
# Data Processing Functions
|
310 |
-
# -------------
|
311 |
-
|
312 |
-
# ... (keep existing functions)
|
313 |
-
|
314 |
-
# -------------
|
315 |
-
# Streamlit UI Components
|
316 |
-
# -------------
|
317 |
|
318 |
def show_paginated_dataframe(report, rows_per_page=20, model_type='english'):
|
319 |
# Check if required columns are present
|
@@ -403,7 +378,7 @@ def show_paginated_dataframe(report, rows_per_page=20, model_type='english'):
|
|
403 |
'.calculate-btn { cursor: pointer; padding: 5px 10px; background-color: #4CAF50; color: white; border: none; border-radius: 4px; }',
|
404 |
'.calculate-btn:hover { background-color: #45a049; }',
|
405 |
'</style>',
|
406 |
-
*[f'<script>document.getElementsByTagName("table")[0].rows[{i+1}].cells[6].onclick = function(e) {{ if(e.target.classList.contains("calculate-btn")) calculate_relevancy
|
407 |
for i in range(min(rows_per_page, len(report) - start_idx))]
|
408 |
]) + '</table>')
|
409 |
dataframe_placeholder.markdown(df_html, unsafe_allow_html=True)
|
@@ -411,37 +386,37 @@ def show_paginated_dataframe(report, rows_per_page=20, model_type='english'):
|
|
411 |
# Initial dataframe display
|
412 |
update_dataframe()
|
413 |
|
414 |
-
# JavaScript to handle button clicks
|
415 |
-
st.markdown("""
|
416 |
-
<script>
|
417 |
-
function calculate_relevancy(row_index) {
|
418 |
-
Streamlit.setComponentValue('calculate_relevancy', row_index);
|
419 |
-
}
|
420 |
-
</script>
|
421 |
-
""", unsafe_allow_html=True)
|
422 |
-
|
423 |
# Handle relevancy calculation
|
424 |
if st.session_state.get('calculate_relevancy') is not None:
|
425 |
row_index = st.session_state.calculate_relevancy
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
432 |
|
433 |
return report
|
434 |
|
435 |
-
|
436 |
# -------------
|
437 |
# Main Streamlit App Function
|
438 |
# -------------
|
439 |
|
|
|
440 |
def main():
|
|
|
441 |
setup_streamlit()
|
442 |
client_config = load_config()
|
443 |
|
444 |
if 'auth_flow' not in st.session_state or 'auth_url' not in st.session_state:
|
|
|
445 |
st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
|
446 |
|
447 |
# Directly access query parameters using st.query_params
|
@@ -450,23 +425,27 @@ def main():
|
|
450 |
# Retrieve the 'code' parameter
|
451 |
auth_code = query_params.get("code", None)
|
452 |
|
453 |
-
|
454 |
if auth_code and 'credentials' not in st.session_state:
|
|
|
455 |
st.session_state.auth_flow.fetch_token(code=auth_code)
|
456 |
st.session_state.credentials = st.session_state.auth_flow.credentials
|
|
|
457 |
|
458 |
if 'credentials' not in st.session_state:
|
|
|
459 |
show_google_sign_in(st.session_state.auth_url)
|
460 |
else:
|
|
|
461 |
init_session_state()
|
462 |
account = auth_search_console(client_config, st.session_state.credentials)
|
463 |
properties = list_gsc_properties(st.session_state.credentials)
|
464 |
|
465 |
if properties:
|
|
|
466 |
webproperty = show_property_selector(properties, account)
|
467 |
search_type = show_search_type_selector()
|
468 |
date_range_selection = show_date_range_selector()
|
469 |
-
model_type = show_model_type_selector()
|
470 |
if date_range_selection == 'Custom Range':
|
471 |
show_custom_date_inputs()
|
472 |
start_date, end_date = st.session_state.custom_start_date, st.session_state.custom_end_date
|
@@ -480,15 +459,22 @@ def main():
|
|
480 |
|
481 |
if st.button("Fetch Data"):
|
482 |
with st.spinner('Fetching data...'):
|
|
|
483 |
st.session_state.report_data = fetch_gsc_data(webproperty, search_type, start_date, end_date, selected_dimensions)
|
|
|
484 |
|
485 |
if st.session_state.report_data is not None and not st.session_state.report_data.empty:
|
|
|
486 |
st.write("Data fetched successfully. Click the 'Calculate' button in the Relevancy Score column to calculate the score for each row.")
|
487 |
st.session_state.report_data = show_paginated_dataframe(st.session_state.report_data, model_type=model_type)
|
488 |
download_csv_link(st.session_state.report_data)
|
489 |
elif st.session_state.report_data is not None:
|
|
|
490 |
st.warning("No data found for the selected criteria.")
|
|
|
|
|
|
|
491 |
|
492 |
-
|
493 |
if __name__ == "__main__":
|
|
|
494 |
main()
|
|
|
17 |
from bs4 import BeautifulSoup
|
18 |
|
19 |
load_dotenv()
|
20 |
+
logging.basicConfig(level=logging.INFO)
|
21 |
+
logger = logging.getLogger(__name__)
|
22 |
|
23 |
# Initialize Cohere client
|
24 |
COHERE_API_KEY = os.environ["COHERE_API_KEY"]
|
|
|
288 |
key='dimensions_selector'
|
289 |
)
|
290 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
|
293 |
def show_paginated_dataframe(report, rows_per_page=20, model_type='english'):
|
294 |
# Check if required columns are present
|
|
|
378 |
'.calculate-btn { cursor: pointer; padding: 5px 10px; background-color: #4CAF50; color: white; border: none; border-radius: 4px; }',
|
379 |
'.calculate-btn:hover { background-color: #45a049; }',
|
380 |
'</style>',
|
381 |
+
*[f'<script>document.getElementsByTagName("table")[0].rows[{i+1}].cells[6].onclick = function(e) {{ if(e.target.classList.contains("calculate-btn")) {{ Streamlit.setComponentValue("calculate_relevancy", {start_idx + i}); }} }}</script>'
|
382 |
for i in range(min(rows_per_page, len(report) - start_idx))]
|
383 |
]) + '</table>')
|
384 |
dataframe_placeholder.markdown(df_html, unsafe_allow_html=True)
|
|
|
386 |
# Initial dataframe display
|
387 |
update_dataframe()
|
388 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
389 |
# Handle relevancy calculation
|
390 |
if st.session_state.get('calculate_relevancy') is not None:
|
391 |
row_index = st.session_state.calculate_relevancy
|
392 |
+
logger.info(f"Calculating relevancy for row index: {row_index}")
|
393 |
+
try:
|
394 |
+
page_content = fetch_content(report.iloc[row_index]['page'])
|
395 |
+
query = report.iloc[row_index]['query']
|
396 |
+
relevancy_score = calculate_single_relevancy_score(page_content, query, model_type)
|
397 |
+
logger.info(f"Relevancy score calculated: {relevancy_score}")
|
398 |
+
report.at[row_index, 'relevancy_score'] = f"{relevancy_score:.2f}"
|
399 |
+
st.session_state.calculate_relevancy = None # Reset the state
|
400 |
+
update_dataframe() # Update the dataframe display
|
401 |
+
st.success(f"Relevancy score calculated for row {row_index + 1}")
|
402 |
+
except Exception as e:
|
403 |
+
logger.error(f"Error calculating relevancy score: {str(e)}")
|
404 |
+
st.error(f"Error calculating relevancy score: {str(e)}")
|
405 |
|
406 |
return report
|
407 |
|
|
|
408 |
# -------------
|
409 |
# Main Streamlit App Function
|
410 |
# -------------
|
411 |
|
412 |
+
|
413 |
def main():
|
414 |
+
logger.info("Starting the Streamlit app")
|
415 |
setup_streamlit()
|
416 |
client_config = load_config()
|
417 |
|
418 |
if 'auth_flow' not in st.session_state or 'auth_url' not in st.session_state:
|
419 |
+
logger.info("Initializing Google auth flow")
|
420 |
st.session_state.auth_flow, st.session_state.auth_url = google_auth(client_config)
|
421 |
|
422 |
# Directly access query parameters using st.query_params
|
|
|
425 |
# Retrieve the 'code' parameter
|
426 |
auth_code = query_params.get("code", None)
|
427 |
|
|
|
428 |
if auth_code and 'credentials' not in st.session_state:
|
429 |
+
logger.info("Fetching token with auth code")
|
430 |
st.session_state.auth_flow.fetch_token(code=auth_code)
|
431 |
st.session_state.credentials = st.session_state.auth_flow.credentials
|
432 |
+
logger.info("Credentials stored in session state")
|
433 |
|
434 |
if 'credentials' not in st.session_state:
|
435 |
+
logger.info("No credentials found, showing Google sign-in")
|
436 |
show_google_sign_in(st.session_state.auth_url)
|
437 |
else:
|
438 |
+
logger.info("Credentials found, initializing session state")
|
439 |
init_session_state()
|
440 |
account = auth_search_console(client_config, st.session_state.credentials)
|
441 |
properties = list_gsc_properties(st.session_state.credentials)
|
442 |
|
443 |
if properties:
|
444 |
+
logger.info(f"Found {len(properties)} properties")
|
445 |
webproperty = show_property_selector(properties, account)
|
446 |
search_type = show_search_type_selector()
|
447 |
date_range_selection = show_date_range_selector()
|
448 |
+
model_type = show_model_type_selector()
|
449 |
if date_range_selection == 'Custom Range':
|
450 |
show_custom_date_inputs()
|
451 |
start_date, end_date = st.session_state.custom_start_date, st.session_state.custom_end_date
|
|
|
459 |
|
460 |
if st.button("Fetch Data"):
|
461 |
with st.spinner('Fetching data...'):
|
462 |
+
logger.info(f"Fetching GSC data for {webproperty} from {start_date} to {end_date}")
|
463 |
st.session_state.report_data = fetch_gsc_data(webproperty, search_type, start_date, end_date, selected_dimensions)
|
464 |
+
logger.info(f"Data fetched: {len(st.session_state.report_data)} rows")
|
465 |
|
466 |
if st.session_state.report_data is not None and not st.session_state.report_data.empty:
|
467 |
+
logger.info("Displaying fetched data")
|
468 |
st.write("Data fetched successfully. Click the 'Calculate' button in the Relevancy Score column to calculate the score for each row.")
|
469 |
st.session_state.report_data = show_paginated_dataframe(st.session_state.report_data, model_type=model_type)
|
470 |
download_csv_link(st.session_state.report_data)
|
471 |
elif st.session_state.report_data is not None:
|
472 |
+
logger.warning("No data found for the selected criteria")
|
473 |
st.warning("No data found for the selected criteria.")
|
474 |
+
else:
|
475 |
+
logger.warning("No properties found for the account")
|
476 |
+
st.warning("No properties found for your Google Search Console account.")
|
477 |
|
|
|
478 |
if __name__ == "__main__":
|
479 |
+
logger.info("Application started")
|
480 |
main()
|