poemsforaphrodite commited on
Commit
302324f
1 Parent(s): d5343ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -20
app.py CHANGED
@@ -143,7 +143,7 @@ def get_serp_results(query):
143
  def fetch_content(url):
144
  logger.info(f"Fetching content from URL: {url}")
145
  try:
146
- response = requests.get(url)
147
  response.raise_for_status()
148
  soup = BeautifulSoup(response.text, 'html.parser')
149
  content = soup.get_text(separator=' ', strip=True)
@@ -175,17 +175,28 @@ def analyze_competitors(row, co):
175
  competitor_urls = get_serp_results(query)
176
 
177
  results = []
178
- for url in [our_url] + competitor_urls: # Include our URL at the beginning
179
- content = fetch_content(url)
180
- score = calculate_relevance_score(content, query, co)
181
- results.append({'url': url, 'relevancy_score': score})
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  results_df = pd.DataFrame(results).sort_values('relevancy_score', ascending=False)
184
 
185
  logger.info(f"Competitor analysis completed. {len(results)} results obtained.")
186
  return results_df
187
 
188
-
189
  def show_competitor_analysis(row, co):
190
  if st.button("Check Competitors", key=f"comp_{row['page']}"):
191
  logger.info(f"Competitor analysis requested for page: {row['page']}")
@@ -194,20 +205,27 @@ def show_competitor_analysis(row, co):
194
  st.write("Relevancy Score Comparison:")
195
  st.dataframe(results_df)
196
 
197
- our_rank = results_df.index[results_df['url'] == row['page']].tolist()[0] + 1
198
- total_results = len(results_df)
199
- our_score = results_df.loc[results_df['url'] == row['page'], 'relevancy_score'].values[0]
200
-
201
- logger.info(f"Our page ranks {our_rank} out of {total_results} in terms of relevancy score.")
202
- st.write(f"Our page ('{row['page']}') ranks {our_rank} out of {total_results} in terms of relevancy score.")
203
- st.write(f"Our relevancy score: {our_score:.4f}")
204
-
205
- if our_rank == 1:
206
- st.success("Your page has the highest relevancy score!")
207
- elif our_rank <= 3:
208
- st.info("Your page is among the top 3 most relevant results.")
209
- elif our_rank > total_results / 2:
210
- st.warning("Your page's relevancy score is in the lower half of the results. Consider optimizing your content.")
 
 
 
 
 
 
 
211
 
212
 
213
  def analyze_competitors(row, co):
@@ -320,6 +338,10 @@ def fetch_gsc_data(webproperty, search_type, start_date, end_date, dimensions, d
320
  def calculate_relevance_score(page_content, query, co):
321
  logger.info(f"Calculating relevance score for query: {query}")
322
  try:
 
 
 
 
323
  page_embedding = co.embed(texts=[page_content], model='embed-english-v3.0', input_type='search_document').embeddings[0]
324
  query_embedding = co.embed(texts=[query], model='embed-english-v3.0', input_type='search_query').embeddings[0]
325
  score = cosine_similarity([query_embedding], [page_embedding])[0][0]
 
143
  def fetch_content(url):
144
  logger.info(f"Fetching content from URL: {url}")
145
  try:
146
+ response = requests.get(url, timeout=10)
147
  response.raise_for_status()
148
  soup = BeautifulSoup(response.text, 'html.parser')
149
  content = soup.get_text(separator=' ', strip=True)
 
175
  competitor_urls = get_serp_results(query)
176
 
177
  results = []
178
+ for url in [our_url] + competitor_urls:
179
+ try:
180
+ logger.debug(f"Fetching content for URL: {url}")
181
+ content = fetch_content(url)
182
+ if not content:
183
+ logger.warning(f"No content fetched for URL: {url}")
184
+ continue
185
+
186
+ logger.debug(f"Calculating relevance score for URL: {url}")
187
+ score = calculate_relevance_score(content, query, co)
188
+
189
+ logger.info(f"URL: {url}, Score: {score}")
190
+ results.append({'url': url, 'relevancy_score': score})
191
+ except Exception as e:
192
+ logger.error(f"Error processing URL {url}: {str(e)}")
193
+ st.error(f"Error processing URL {url}: {str(e)}")
194
 
195
  results_df = pd.DataFrame(results).sort_values('relevancy_score', ascending=False)
196
 
197
  logger.info(f"Competitor analysis completed. {len(results)} results obtained.")
198
  return results_df
199
 
 
200
  def show_competitor_analysis(row, co):
201
  if st.button("Check Competitors", key=f"comp_{row['page']}"):
202
  logger.info(f"Competitor analysis requested for page: {row['page']}")
 
205
  st.write("Relevancy Score Comparison:")
206
  st.dataframe(results_df)
207
 
208
+ our_data = results_df[results_df['url'] == row['page']]
209
+ if our_data.empty:
210
+ st.error(f"Our page '{row['page']}' is not in the results. This indicates an error in fetching or processing the page.")
211
+ logger.error(f"Our page '{row['page']}' is missing from the results.")
212
+ else:
213
+ our_rank = our_data.index[0] + 1
214
+ total_results = len(results_df)
215
+ our_score = our_data['relevancy_score'].values[0]
216
+
217
+ logger.info(f"Our page ranks {our_rank} out of {total_results} in terms of relevancy score.")
218
+ st.write(f"Our page ('{row['page']}') ranks {our_rank} out of {total_results} in terms of relevancy score.")
219
+ st.write(f"Our relevancy score: {our_score:.4f}")
220
+
221
+ if our_score == 0:
222
+ st.warning("Our page's relevancy score is 0. This might indicate an issue with content fetching or score calculation.")
223
+ elif our_rank == 1:
224
+ st.success("Your page has the highest relevancy score!")
225
+ elif our_rank <= 3:
226
+ st.info("Your page is among the top 3 most relevant results.")
227
+ elif our_rank > total_results / 2:
228
+ st.warning("Your page's relevancy score is in the lower half of the results. Consider optimizing your content.")
229
 
230
 
231
  def analyze_competitors(row, co):
 
338
  def calculate_relevance_score(page_content, query, co):
339
  logger.info(f"Calculating relevance score for query: {query}")
340
  try:
341
+ if not page_content:
342
+ logger.warning("Empty page content. Returning score 0.")
343
+ return 0
344
+
345
  page_embedding = co.embed(texts=[page_content], model='embed-english-v3.0', input_type='search_document').embeddings[0]
346
  query_embedding = co.embed(texts=[query], model='embed-english-v3.0', input_type='search_query').embeddings[0]
347
  score = cosine_similarity([query_embedding], [page_embedding])[0][0]