poemsforaphrodite commited on
Commit
d513d9d
1 Parent(s): 4f7f1e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -39
app.py CHANGED
@@ -221,20 +221,18 @@ def fetch_content(url, query):
221
  except requests.RequestException:
222
  return ""
223
 
224
- def calculate_relevance_score(page_content, query, co):
225
- # logger.info(f"Calculating relevance score for query: {query}")
226
  try:
227
  if not page_content:
228
- # logger.warning("Empty page content. Returning score 0.")
229
  return 0
230
 
231
- page_embedding = co.embed(texts=[page_content], model='embed-english-v3.0', input_type='search_document').embeddings[0]
232
- query_embedding = co.embed(texts=[query], model='embed-english-v3.0', input_type='search_query').embeddings[0]
 
 
233
  score = cosine_similarity([query_embedding], [page_embedding])[0][0]
234
- # logger.debug(f"Relevance score calculated: {score}")
235
  return score
236
  except Exception as e:
237
- # logger.exception(f"Error calculating relevance score: {str(e)}")
238
  st.error(f"Error calculating relevance score: {str(e)}")
239
  return 0
240
 
@@ -244,13 +242,14 @@ def normalize_url(url):
244
  def analyze_competitors(row, co, custom_url=None, country_code=None):
245
  query = row['query']
246
  our_url = normalize_url(row['page'])
 
247
 
248
  competitor_data = get_serp_results(query, country_code)
249
 
250
  results = []
251
  for data in competitor_data:
252
  competitor_url = normalize_url(data['url'])
253
- score = calculate_relevance_score(data['content'], query, co)
254
  results.append({
255
  'Position': data['position'],
256
  'URL': competitor_url,
@@ -258,29 +257,24 @@ def analyze_competitors(row, co, custom_url=None, country_code=None):
258
  'is_our_url': competitor_url == our_url
259
  })
260
 
261
- our_content = fetch_content(our_url, query)
262
- our_score = calculate_relevance_score(our_content, query, co)
263
 
264
  if not any(r['is_our_url'] for r in results):
265
  results.append({
266
  'Position': len(results) + 1,
267
- 'URL': our_url,
268
  'Score': our_score,
269
  'is_our_url': True
270
  })
271
 
272
- # Sort results by position
273
  results = sorted(results, key=lambda x: x['Position'])
274
 
275
  # Create DataFrame
276
  results_df = pd.DataFrame(results)
277
  results_df['Position'] = results_df['Position'].astype(int)
278
 
279
- # Mark our URL
280
- results_df['URL'] = results_df.apply(
281
- lambda x: f"{x['URL']} (Our URL)" if x['is_our_url'] else x['URL'], axis=1
282
- )
283
-
284
  # Keep only the columns we want to display
285
  results_df = results_df[['Position', 'URL', 'Score']]
286
 
@@ -407,19 +401,11 @@ def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, d
407
 
408
 
409
  def calculate_relevancy_scores(df, model_type):
410
- #logging.info("Calculating relevancy scores")
411
- with st.spinner('Calculating relevancy scores...'):
412
- try:
413
- page_contents = [fetch_content(url) for url in df['page']]
414
- page_embeddings = generate_embeddings(page_contents, model_type)
415
- query_embeddings = generate_embeddings(df['query'].tolist(), model_type)
416
- relevancy_scores = cosine_similarity(query_embeddings, page_embeddings).diagonal()
417
- df = df.assign(relevancy_score=relevancy_scores)
418
- #logging.info("Relevancy scores calculated successfully")
419
- except Exception as e:
420
- #logging.error(f"Error calculating relevancy scores: {e}")
421
- st.warning(f"Error calculating relevancy scores: {e}")
422
- df = df.assign(relevancy_score=0)
423
  return df
424
 
425
  # -------------
@@ -522,7 +508,8 @@ def show_model_type_selector():
522
  def calculate_single_relevancy(row):
523
  page_content = fetch_content(row['page'], row['query'])
524
  query = row['query']
525
- score = calculate_relevance_score(page_content, query, co)
 
526
  return score
527
 
528
  def compare_with_top_result(row, co, country_code):
@@ -543,9 +530,11 @@ def compare_with_top_result(row, co, country_code):
543
  our_content = fetch_content(our_url, query)
544
  top_content = top_result['content']
545
 
 
 
 
546
  # Calculate relevancy scores
547
- our_score = calculate_relevance_score(our_content, query, co)
548
- top_score = calculate_relevance_score(top_content, query, co)
549
 
550
  # Prepare prompt for GPT-4
551
  prompt = f"""
@@ -576,12 +565,12 @@ def compare_with_top_result(row, co, country_code):
576
 
577
  # Display results
578
  st.subheader("Content Comparison Analysis")
579
- st.write(f"Query: {query}")
580
- st.write(f"Top-ranking URL: {top_url}")
581
- st.write(f"Our URL: {our_url}")
582
- st.write(f"Top-ranking score: {top_score:.4f}")
583
- st.write(f"Our score: {our_score:.4f}")
584
- st.write("Analysis:")
585
  st.write(analysis)
586
  except Exception as e:
587
  st.error(f"Error in GPT-4 analysis: {str(e)}")
 
221
  except requests.RequestException:
222
  return ""
223
 
224
+ def calculate_relevance_score(page_content, query, co, model_type='english'):
 
225
  try:
226
  if not page_content:
 
227
  return 0
228
 
229
+ model = 'embed-english-v3.0' if model_type == 'english' else 'embed-multilingual-v3.0'
230
+
231
+ page_embedding = co.embed(texts=[page_content], model=model, input_type='search_document').embeddings[0]
232
+ query_embedding = co.embed(texts=[query], model=model, input_type='search_query').embeddings[0]
233
  score = cosine_similarity([query_embedding], [page_embedding])[0][0]
 
234
  return score
235
  except Exception as e:
 
236
  st.error(f"Error calculating relevance score: {str(e)}")
237
  return 0
238
 
 
242
  def analyze_competitors(row, co, custom_url=None, country_code=None):
243
  query = row['query']
244
  our_url = normalize_url(row['page'])
245
+ model_type = row.get('model_type', 'english') # Assuming you store model_type per row
246
 
247
  competitor_data = get_serp_results(query, country_code)
248
 
249
  results = []
250
  for data in competitor_data:
251
  competitor_url = normalize_url(data['url'])
252
+ score = calculate_relevance_score(data['content'], query, co, model_type=model_type)
253
  results.append({
254
  'Position': data['position'],
255
  'URL': competitor_url,
 
257
  'is_our_url': competitor_url == our_url
258
  })
259
 
260
+ # Retrieve "Our Score" from the main data table
261
+ our_score = st.session_state['relevancy_scores'].get(our_url, 0)
262
 
263
  if not any(r['is_our_url'] for r in results):
264
  results.append({
265
  'Position': len(results) + 1,
266
+ 'URL': f"{our_url} (Our URL)",
267
  'Score': our_score,
268
  'is_our_url': True
269
  })
270
 
271
+ # Sort results by position in ascending order
272
  results = sorted(results, key=lambda x: x['Position'])
273
 
274
  # Create DataFrame
275
  results_df = pd.DataFrame(results)
276
  results_df['Position'] = results_df['Position'].astype(int)
277
 
 
 
 
 
 
278
  # Keep only the columns we want to display
279
  results_df = results_df[['Position', 'URL', 'Score']]
280
 
 
401
 
402
 
403
  def calculate_relevancy_scores(df, model_type):
404
+ st.session_state['relevancy_scores'] = {}
405
+ # Existing code to calculate scores
406
+ # After calculation
407
+ for index, row in df.iterrows():
408
+ st.session_state['relevancy_scores'][row['page']] = row['relevancy_score']
 
 
 
 
 
 
 
 
409
  return df
410
 
411
  # -------------
 
508
  def calculate_single_relevancy(row):
509
  page_content = fetch_content(row['page'], row['query'])
510
  query = row['query']
511
+ model_type = st.session_state.get('model_type_selector', 'english') # Retrieve from session state
512
+ score = calculate_relevance_score(page_content, query, co, model_type=model_type)
513
  return score
514
 
515
  def compare_with_top_result(row, co, country_code):
 
530
  our_content = fetch_content(our_url, query)
531
  top_content = top_result['content']
532
 
533
+ # Retrieve "Our Score" from the main data table
534
+ our_score = st.session_state['relevancy_scores'].get(normalize_url(our_url), 0)
535
+
536
  # Calculate relevancy scores
537
+ top_score = calculate_relevance_score(top_content, query, co, model_type=row.get('model_type', 'english'))
 
538
 
539
  # Prepare prompt for GPT-4
540
  prompt = f"""
 
565
 
566
  # Display results
567
  st.subheader("Content Comparison Analysis")
568
+ st.write(f"**Query:** {query}")
569
+ st.write(f"**Top-ranking URL:** {top_url}")
570
+ st.write(f"**Our URL:** {our_url}")
571
+ st.write(f"**Top-ranking score:** {top_score:.4f}")
572
+ st.write(f"**Our score:** {our_score:.4f}")
573
+ st.write("**Analysis:**")
574
  st.write(analysis)
575
  except Exception as e:
576
  st.error(f"Error in GPT-4 analysis: {str(e)}")