Add1E commited on
Commit
f952acb
·
verified ·
1 Parent(s): 8767759

Update trend_crawl.py

Browse files
Files changed (1) hide show
  1. trend_crawl.py +8 -8
trend_crawl.py CHANGED
@@ -31,17 +31,18 @@ def setup_driver():
31
 
32
  def process_selenium_row(index, selenium_rows, driver):
33
  """Extract dynamic data using Selenium by clicking on the row."""
34
- max_retries = 3
35
  for attempt in range(max_retries):
36
  try:
 
37
  row = selenium_rows[index]
38
- row.click()
39
-
40
- # Wait for elements with class="xZCHj" to load
41
- WebDriverWait(driver, 10).until(
 
42
  EC.presence_of_all_elements_located((By.CLASS_NAME, "xZCHj"))
43
  )
44
-
45
  links = driver.find_elements(By.CLASS_NAME, "xZCHj")
46
  dynamic_data = {
47
  "article": [
@@ -52,12 +53,11 @@ def process_selenium_row(index, selenium_rows, driver):
52
  for link in links
53
  ]
54
  }
55
-
56
  if dynamic_data["article"]:
57
  return dynamic_data
58
  except Exception as e:
59
  print(f"Error processing row {index} (Attempt {attempt + 1}): {e}")
60
- selenium_rows = driver.find_elements(By.CSS_SELECTOR, '[jsname="oKdM2c"]')
61
 
62
  print(f"Failed to process row {index} after {max_retries} attempts.")
63
  return {"article": []}
 
31
 
32
  def process_selenium_row(index, selenium_rows, driver):
33
  """Extract dynamic data using Selenium by clicking on the row."""
34
+ max_retries = 5 # Increase retries
35
  for attempt in range(max_retries):
36
  try:
37
+ selenium_rows = driver.find_elements(By.CSS_SELECTOR, '[jsname="oKdM2c"]') # Refresh rows
38
  row = selenium_rows[index]
39
+ driver.execute_script("arguments[0].click();", row) # Use JavaScript click
40
+ time.sleep(1) # Adjust delay
41
+
42
+ # Wait for dynamic content
43
+ WebDriverWait(driver, 15).until(
44
  EC.presence_of_all_elements_located((By.CLASS_NAME, "xZCHj"))
45
  )
 
46
  links = driver.find_elements(By.CLASS_NAME, "xZCHj")
47
  dynamic_data = {
48
  "article": [
 
53
  for link in links
54
  ]
55
  }
 
56
  if dynamic_data["article"]:
57
  return dynamic_data
58
  except Exception as e:
59
  print(f"Error processing row {index} (Attempt {attempt + 1}): {e}")
60
+ time.sleep(2) # Add delay before retry
61
 
62
  print(f"Failed to process row {index} after {max_retries} attempts.")
63
  return {"article": []}