k4d3 commited on
Commit
6c29921
·
1 Parent(s): fa820f9

Fix: Correct retry logic to prevent multiple restarts

Browse files

- Updated retry_crawl function to retry indefinitely until successful without returning prematurely.
- Simplified the lambda function in the main block to directly submit retry_crawl when --retry is specified.
- Ensured that the script does not restart the crawling process for the same URL multiple times unnecessarily.

Files changed (1) hide show
  1. crawl/crawl +2 -8
crawl/crawl CHANGED
@@ -176,13 +176,12 @@ def retry_crawl(inner_url):
176
  while True:
177
  try:
178
  save_result(inner_url)
179
- return False
180
  except (AttributeError, ValueError) as inner_e:
181
  print(f"[ERROR] 🚫 Failed to crawl {inner_url}, "
182
  f"error: {str(inner_e)}")
183
  print("Retrying in 3 seconds...")
184
  time.sleep(3)
185
- return True
186
 
187
 
188
  if __name__ == "__main__":
@@ -206,12 +205,7 @@ if __name__ == "__main__":
206
  for url in args.urls:
207
  if args.retry:
208
  futures.append(
209
- executor.submit(
210
- lambda u: (
211
- save_result(u) if not retry_crawl(u) else None
212
- ),
213
- url
214
- )
215
  )
216
  else:
217
  futures.append(
 
176
  while True:
177
  try:
178
  save_result(inner_url)
179
+ return
180
  except (AttributeError, ValueError) as inner_e:
181
  print(f"[ERROR] 🚫 Failed to crawl {inner_url}, "
182
  f"error: {str(inner_e)}")
183
  print("Retrying in 3 seconds...")
184
  time.sleep(3)
 
185
 
186
 
187
  if __name__ == "__main__":
 
205
  for url in args.urls:
206
  if args.retry:
207
  futures.append(
208
+ executor.submit(retry_crawl, url)
 
 
 
 
 
209
  )
210
  else:
211
  futures.append(