Spaces:
Running
Running
poemsforaphrodite
commited on
Commit
•
d513d9d
1
Parent(s):
4f7f1e3
Update app.py
Browse files
app.py
CHANGED
@@ -221,20 +221,18 @@ def fetch_content(url, query):
|
|
221 |
except requests.RequestException:
|
222 |
return ""
|
223 |
|
224 |
-
def calculate_relevance_score(page_content, query, co):
|
225 |
-
# logger.info(f"Calculating relevance score for query: {query}")
|
226 |
try:
|
227 |
if not page_content:
|
228 |
-
# logger.warning("Empty page content. Returning score 0.")
|
229 |
return 0
|
230 |
|
231 |
-
|
232 |
-
|
|
|
|
|
233 |
score = cosine_similarity([query_embedding], [page_embedding])[0][0]
|
234 |
-
# logger.debug(f"Relevance score calculated: {score}")
|
235 |
return score
|
236 |
except Exception as e:
|
237 |
-
# logger.exception(f"Error calculating relevance score: {str(e)}")
|
238 |
st.error(f"Error calculating relevance score: {str(e)}")
|
239 |
return 0
|
240 |
|
@@ -244,13 +242,14 @@ def normalize_url(url):
|
|
244 |
def analyze_competitors(row, co, custom_url=None, country_code=None):
|
245 |
query = row['query']
|
246 |
our_url = normalize_url(row['page'])
|
|
|
247 |
|
248 |
competitor_data = get_serp_results(query, country_code)
|
249 |
|
250 |
results = []
|
251 |
for data in competitor_data:
|
252 |
competitor_url = normalize_url(data['url'])
|
253 |
-
score = calculate_relevance_score(data['content'], query, co)
|
254 |
results.append({
|
255 |
'Position': data['position'],
|
256 |
'URL': competitor_url,
|
@@ -258,29 +257,24 @@ def analyze_competitors(row, co, custom_url=None, country_code=None):
|
|
258 |
'is_our_url': competitor_url == our_url
|
259 |
})
|
260 |
|
261 |
-
|
262 |
-
our_score =
|
263 |
|
264 |
if not any(r['is_our_url'] for r in results):
|
265 |
results.append({
|
266 |
'Position': len(results) + 1,
|
267 |
-
'URL': our_url,
|
268 |
'Score': our_score,
|
269 |
'is_our_url': True
|
270 |
})
|
271 |
|
272 |
-
# Sort results by position
|
273 |
results = sorted(results, key=lambda x: x['Position'])
|
274 |
|
275 |
# Create DataFrame
|
276 |
results_df = pd.DataFrame(results)
|
277 |
results_df['Position'] = results_df['Position'].astype(int)
|
278 |
|
279 |
-
# Mark our URL
|
280 |
-
results_df['URL'] = results_df.apply(
|
281 |
-
lambda x: f"{x['URL']} (Our URL)" if x['is_our_url'] else x['URL'], axis=1
|
282 |
-
)
|
283 |
-
|
284 |
# Keep only the columns we want to display
|
285 |
results_df = results_df[['Position', 'URL', 'Score']]
|
286 |
|
@@ -407,19 +401,11 @@ def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, d
|
|
407 |
|
408 |
|
409 |
def calculate_relevancy_scores(df, model_type):
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
query_embeddings = generate_embeddings(df['query'].tolist(), model_type)
|
416 |
-
relevancy_scores = cosine_similarity(query_embeddings, page_embeddings).diagonal()
|
417 |
-
df = df.assign(relevancy_score=relevancy_scores)
|
418 |
-
#logging.info("Relevancy scores calculated successfully")
|
419 |
-
except Exception as e:
|
420 |
-
#logging.error(f"Error calculating relevancy scores: {e}")
|
421 |
-
st.warning(f"Error calculating relevancy scores: {e}")
|
422 |
-
df = df.assign(relevancy_score=0)
|
423 |
return df
|
424 |
|
425 |
# -------------
|
@@ -522,7 +508,8 @@ def show_model_type_selector():
|
|
522 |
def calculate_single_relevancy(row):
|
523 |
page_content = fetch_content(row['page'], row['query'])
|
524 |
query = row['query']
|
525 |
-
|
|
|
526 |
return score
|
527 |
|
528 |
def compare_with_top_result(row, co, country_code):
|
@@ -543,9 +530,11 @@ def compare_with_top_result(row, co, country_code):
|
|
543 |
our_content = fetch_content(our_url, query)
|
544 |
top_content = top_result['content']
|
545 |
|
|
|
|
|
|
|
546 |
# Calculate relevancy scores
|
547 |
-
|
548 |
-
top_score = calculate_relevance_score(top_content, query, co)
|
549 |
|
550 |
# Prepare prompt for GPT-4
|
551 |
prompt = f"""
|
@@ -576,12 +565,12 @@ def compare_with_top_result(row, co, country_code):
|
|
576 |
|
577 |
# Display results
|
578 |
st.subheader("Content Comparison Analysis")
|
579 |
-
st.write(f"Query
|
580 |
-
st.write(f"Top-ranking URL
|
581 |
-
st.write(f"Our URL
|
582 |
-
st.write(f"Top-ranking score
|
583 |
-
st.write(f"Our score
|
584 |
-
st.write("Analysis
|
585 |
st.write(analysis)
|
586 |
except Exception as e:
|
587 |
st.error(f"Error in GPT-4 analysis: {str(e)}")
|
|
|
221 |
except requests.RequestException:
|
222 |
return ""
|
223 |
|
224 |
+
def calculate_relevance_score(page_content, query, co, model_type='english'):
|
|
|
225 |
try:
|
226 |
if not page_content:
|
|
|
227 |
return 0
|
228 |
|
229 |
+
model = 'embed-english-v3.0' if model_type == 'english' else 'embed-multilingual-v3.0'
|
230 |
+
|
231 |
+
page_embedding = co.embed(texts=[page_content], model=model, input_type='search_document').embeddings[0]
|
232 |
+
query_embedding = co.embed(texts=[query], model=model, input_type='search_query').embeddings[0]
|
233 |
score = cosine_similarity([query_embedding], [page_embedding])[0][0]
|
|
|
234 |
return score
|
235 |
except Exception as e:
|
|
|
236 |
st.error(f"Error calculating relevance score: {str(e)}")
|
237 |
return 0
|
238 |
|
|
|
242 |
def analyze_competitors(row, co, custom_url=None, country_code=None):
|
243 |
query = row['query']
|
244 |
our_url = normalize_url(row['page'])
|
245 |
+
model_type = row.get('model_type', 'english') # Assuming you store model_type per row
|
246 |
|
247 |
competitor_data = get_serp_results(query, country_code)
|
248 |
|
249 |
results = []
|
250 |
for data in competitor_data:
|
251 |
competitor_url = normalize_url(data['url'])
|
252 |
+
score = calculate_relevance_score(data['content'], query, co, model_type=model_type)
|
253 |
results.append({
|
254 |
'Position': data['position'],
|
255 |
'URL': competitor_url,
|
|
|
257 |
'is_our_url': competitor_url == our_url
|
258 |
})
|
259 |
|
260 |
+
# Retrieve "Our Score" from the main data table
|
261 |
+
our_score = st.session_state['relevancy_scores'].get(our_url, 0)
|
262 |
|
263 |
if not any(r['is_our_url'] for r in results):
|
264 |
results.append({
|
265 |
'Position': len(results) + 1,
|
266 |
+
'URL': f"{our_url} (Our URL)",
|
267 |
'Score': our_score,
|
268 |
'is_our_url': True
|
269 |
})
|
270 |
|
271 |
+
# Sort results by position in ascending order
|
272 |
results = sorted(results, key=lambda x: x['Position'])
|
273 |
|
274 |
# Create DataFrame
|
275 |
results_df = pd.DataFrame(results)
|
276 |
results_df['Position'] = results_df['Position'].astype(int)
|
277 |
|
|
|
|
|
|
|
|
|
|
|
278 |
# Keep only the columns we want to display
|
279 |
results_df = results_df[['Position', 'URL', 'Score']]
|
280 |
|
|
|
401 |
|
402 |
|
403 |
def calculate_relevancy_scores(df, model_type):
|
404 |
+
st.session_state['relevancy_scores'] = {}
|
405 |
+
# Existing code to calculate scores
|
406 |
+
# After calculation
|
407 |
+
for index, row in df.iterrows():
|
408 |
+
st.session_state['relevancy_scores'][row['page']] = row['relevancy_score']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
409 |
return df
|
410 |
|
411 |
# -------------
|
|
|
508 |
def calculate_single_relevancy(row):
|
509 |
page_content = fetch_content(row['page'], row['query'])
|
510 |
query = row['query']
|
511 |
+
model_type = st.session_state.get('model_type_selector', 'english') # Retrieve from session state
|
512 |
+
score = calculate_relevance_score(page_content, query, co, model_type=model_type)
|
513 |
return score
|
514 |
|
515 |
def compare_with_top_result(row, co, country_code):
|
|
|
530 |
our_content = fetch_content(our_url, query)
|
531 |
top_content = top_result['content']
|
532 |
|
533 |
+
# Retrieve "Our Score" from the main data table
|
534 |
+
our_score = st.session_state['relevancy_scores'].get(normalize_url(our_url), 0)
|
535 |
+
|
536 |
# Calculate relevancy scores
|
537 |
+
top_score = calculate_relevance_score(top_content, query, co, model_type=row.get('model_type', 'english'))
|
|
|
538 |
|
539 |
# Prepare prompt for GPT-4
|
540 |
prompt = f"""
|
|
|
565 |
|
566 |
# Display results
|
567 |
st.subheader("Content Comparison Analysis")
|
568 |
+
st.write(f"**Query:** {query}")
|
569 |
+
st.write(f"**Top-ranking URL:** {top_url}")
|
570 |
+
st.write(f"**Our URL:** {our_url}")
|
571 |
+
st.write(f"**Top-ranking score:** {top_score:.4f}")
|
572 |
+
st.write(f"**Our score:** {our_score:.4f}")
|
573 |
+
st.write("**Analysis:**")
|
574 |
st.write(analysis)
|
575 |
except Exception as e:
|
576 |
st.error(f"Error in GPT-4 analysis: {str(e)}")
|