Spaces:

poemsforaphrodite
/

gscpro

Running

App Files Files Community

poemsforaphrodite commited on Oct 11

Commit

d513d9d

•

1 Parent(s): 4f7f1e3

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -39

app.py CHANGED Viewed

@@ -221,20 +221,18 @@ def fetch_content(url, query):
     except requests.RequestException:
         return ""
-def calculate_relevance_score(page_content, query, co):
-   # logger.info(f"Calculating relevance score for query: {query}")
     try:
         if not page_content:
-           # logger.warning("Empty page content. Returning score 0.")
             return 0
-        page_embedding = co.embed(texts=[page_content], model='embed-english-v3.0', input_type='search_document').embeddings[0]
-        query_embedding = co.embed(texts=[query], model='embed-english-v3.0', input_type='search_query').embeddings[0]
         score = cosine_similarity([query_embedding], [page_embedding])[0][0]
-     #   logger.debug(f"Relevance score calculated: {score}")
         return score
     except Exception as e:
-       # logger.exception(f"Error calculating relevance score: {str(e)}")
         st.error(f"Error calculating relevance score: {str(e)}")
         return 0
@@ -244,13 +242,14 @@ def normalize_url(url):
 def analyze_competitors(row, co, custom_url=None, country_code=None):
     query = row['query']
     our_url = normalize_url(row['page'])
     competitor_data = get_serp_results(query, country_code)
     results = []
     for data in competitor_data:
         competitor_url = normalize_url(data['url'])
-        score = calculate_relevance_score(data['content'], query, co)
         results.append({
             'Position': data['position'],
             'URL': competitor_url,
@@ -258,29 +257,24 @@ def analyze_competitors(row, co, custom_url=None, country_code=None):
             'is_our_url': competitor_url == our_url
         })
-    our_content = fetch_content(our_url, query)
-    our_score = calculate_relevance_score(our_content, query, co)
     if not any(r['is_our_url'] for r in results):
         results.append({
             'Position': len(results) + 1,
-            'URL': our_url,
             'Score': our_score,
             'is_our_url': True
         })
-    # Sort results by position
     results = sorted(results, key=lambda x: x['Position'])
     # Create DataFrame
     results_df = pd.DataFrame(results)
     results_df['Position'] = results_df['Position'].astype(int)
-    # Mark our URL
-    results_df['URL'] = results_df.apply(
-        lambda x: f"{x['URL']} (Our URL)" if x['is_our_url'] else x['URL'], axis=1
-    )
     # Keep only the columns we want to display
     results_df = results_df[['Position', 'URL', 'Score']]
@@ -407,19 +401,11 @@ def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, d
 def calculate_relevancy_scores(df, model_type):
-    #logging.info("Calculating relevancy scores")
-    with st.spinner('Calculating relevancy scores...'):
-        try:
-            page_contents = [fetch_content(url) for url in df['page']]
-            page_embeddings = generate_embeddings(page_contents, model_type)
-            query_embeddings = generate_embeddings(df['query'].tolist(), model_type)
-            relevancy_scores = cosine_similarity(query_embeddings, page_embeddings).diagonal()
-            df = df.assign(relevancy_score=relevancy_scores)
-            #logging.info("Relevancy scores calculated successfully")
-        except Exception as e:
-            #logging.error(f"Error calculating relevancy scores: {e}")
-            st.warning(f"Error calculating relevancy scores: {e}")
-            df = df.assign(relevancy_score=0)
     return df
 # -------------
@@ -522,7 +508,8 @@ def show_model_type_selector():
 def calculate_single_relevancy(row):
     page_content = fetch_content(row['page'], row['query'])
     query = row['query']
-    score = calculate_relevance_score(page_content, query, co)
     return score
 def compare_with_top_result(row, co, country_code):
@@ -543,9 +530,11 @@ def compare_with_top_result(row, co, country_code):
     our_content = fetch_content(our_url, query)
     top_content = top_result['content']
     # Calculate relevancy scores
-    our_score = calculate_relevance_score(our_content, query, co)
-    top_score = calculate_relevance_score(top_content, query, co)
     # Prepare prompt for GPT-4
     prompt = f"""
@@ -576,12 +565,12 @@ def compare_with_top_result(row, co, country_code):
         # Display results
         st.subheader("Content Comparison Analysis")
-        st.write(f"Query: {query}")
-        st.write(f"Top-ranking URL: {top_url}")
-        st.write(f"Our URL: {our_url}")
-        st.write(f"Top-ranking score: {top_score:.4f}")
-        st.write(f"Our score: {our_score:.4f}")
-        st.write("Analysis:")
         st.write(analysis)
     except Exception as e:
         st.error(f"Error in GPT-4 analysis: {str(e)}")

     except requests.RequestException:
         return ""
+def calculate_relevance_score(page_content, query, co, model_type='english'):
     try:
         if not page_content:
             return 0
+        model = 'embed-english-v3.0' if model_type == 'english' else 'embed-multilingual-v3.0'
+        page_embedding = co.embed(texts=[page_content], model=model, input_type='search_document').embeddings[0]
+        query_embedding = co.embed(texts=[query], model=model, input_type='search_query').embeddings[0]
         score = cosine_similarity([query_embedding], [page_embedding])[0][0]
         return score
     except Exception as e:
         st.error(f"Error calculating relevance score: {str(e)}")
         return 0
 def analyze_competitors(row, co, custom_url=None, country_code=None):
     query = row['query']
     our_url = normalize_url(row['page'])
+    model_type = row.get('model_type', 'english')  # Assuming you store model_type per row
     competitor_data = get_serp_results(query, country_code)
     results = []
     for data in competitor_data:
         competitor_url = normalize_url(data['url'])
+        score = calculate_relevance_score(data['content'], query, co, model_type=model_type)
         results.append({
             'Position': data['position'],
             'URL': competitor_url,
             'is_our_url': competitor_url == our_url
         })
+    # Retrieve "Our Score" from the main data table
+    our_score = st.session_state['relevancy_scores'].get(our_url, 0)
     if not any(r['is_our_url'] for r in results):
         results.append({
             'Position': len(results) + 1,
+            'URL': f"{our_url} (Our URL)",
             'Score': our_score,
             'is_our_url': True
         })
+    # Sort results by position in ascending order
     results = sorted(results, key=lambda x: x['Position'])
     # Create DataFrame
     results_df = pd.DataFrame(results)
     results_df['Position'] = results_df['Position'].astype(int)
     # Keep only the columns we want to display
     results_df = results_df[['Position', 'URL', 'Score']]
 def calculate_relevancy_scores(df, model_type):
+    st.session_state['relevancy_scores'] = {}
+    # Existing code to calculate scores
+    # After calculation
+    for index, row in df.iterrows():
+        st.session_state['relevancy_scores'][row['page']] = row['relevancy_score']
     return df
 # -------------
 def calculate_single_relevancy(row):
     page_content = fetch_content(row['page'], row['query'])
     query = row['query']
+    model_type = st.session_state.get('model_type_selector', 'english')  # Retrieve from session state
+    score = calculate_relevance_score(page_content, query, co, model_type=model_type)
     return score
 def compare_with_top_result(row, co, country_code):
     our_content = fetch_content(our_url, query)
     top_content = top_result['content']
+    # Retrieve "Our Score" from the main data table
+    our_score = st.session_state['relevancy_scores'].get(normalize_url(our_url), 0)
     # Calculate relevancy scores
+    top_score = calculate_relevance_score(top_content, query, co, model_type=row.get('model_type', 'english'))
     # Prepare prompt for GPT-4
     prompt = f"""
         # Display results
         st.subheader("Content Comparison Analysis")
+        st.write(f"**Query:** {query}")
+        st.write(f"**Top-ranking URL:** {top_url}")
+        st.write(f"**Our URL:** {our_url}")
+        st.write(f"**Top-ranking score:** {top_score:.4f}")
+        st.write(f"**Our score:** {our_score:.4f}")
+        st.write("**Analysis:**")
         st.write(analysis)
     except Exception as e:
         st.error(f"Error in GPT-4 analysis: {str(e)}")