Hansimov commited on
Commit
4591d96
1 Parent(s): 1db460d

:zap: [Enhance] BatchWebpageFetcher: return url_and_output_path_list

Browse files
Files changed (1) hide show
  1. networks/webpage_fetcher.py +7 -2
networks/webpage_fetcher.py CHANGED
@@ -58,12 +58,16 @@ class BatchWebpageFetcher:
58
  def __init__(self):
59
  self.done_count = 0
60
  self.total_count = 0
 
61
 
62
  def fecth_single_webpage(self, url, overwrite=False, output_parent=None):
63
  webpage_fetcher = WebpageFetcher()
64
- webpage_fetcher.fetch(url=url, overwrite=overwrite, output_parent=output_parent)
 
 
 
65
  self.done_count += 1
66
- logger.success(f"> {self.done_count}/{self.total_count}: {url}")
67
 
68
  def fetch(self, urls, overwrite=False, output_parent=None):
69
  self.urls = urls
@@ -81,6 +85,7 @@ class BatchWebpageFetcher:
81
 
82
  for idx, future in enumerate(concurrent.futures.as_completed(futures)):
83
  result = future.result()
 
84
 
85
 
86
  if __name__ == "__main__":
 
58
  def __init__(self):
59
  self.done_count = 0
60
  self.total_count = 0
61
+ self.url_and_output_path_list = []
62
 
63
  def fecth_single_webpage(self, url, overwrite=False, output_parent=None):
64
  webpage_fetcher = WebpageFetcher()
65
+ output_path = webpage_fetcher.fetch(
66
+ url=url, overwrite=overwrite, output_parent=output_parent
67
+ )
68
+ self.url_and_output_path_list.append({"url": url, "output_path": output_path})
69
  self.done_count += 1
70
+ logger.success(f"> [{self.done_count}/{self.total_count}] Fetched: {url}")
71
 
72
  def fetch(self, urls, overwrite=False, output_parent=None):
73
  self.urls = urls
 
85
 
86
  for idx, future in enumerate(concurrent.futures.as_completed(futures)):
87
  result = future.result()
88
+ return self.url_and_output_path_list
89
 
90
 
91
  if __name__ == "__main__":