pvanand commited on
Commit
9e76abd
·
verified ·
1 Parent(s): 47473dd

Update helper_functions_api.py

Browse files
Files changed (1) hide show
  1. helper_functions_api.py +22 -7
helper_functions_api.py CHANGED
@@ -221,13 +221,28 @@ def process_content(data_format, url, query):
221
  return rephrased_content, url
222
  return "", url
223
 
224
- def fetch_and_extract_content(data_format, urls, query):
225
- with ThreadPoolExecutor(max_workers=len(urls)) as executor:
226
- future_to_url = {
227
- executor.submit(process_content, data_format, url, query): url
228
- for url in urls
229
- }
230
- all_text_with_urls = [future.result() for future in as_completed(future_to_url)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
  return all_text_with_urls
233
 
 
221
  return rephrased_content, url
222
  return "", url
223
 
224
+ def fetch_and_extract_content(
225
+ data_format: str, query: str, urls: List[str], num_refrences: int = 8
226
+ ) -> List[Tuple[str | None, str]]:
227
+ """
228
+ Asynchronously makeing request to urls and doing further process
229
+ """
230
+ all_text_with_urls = []
231
+ start_url = 0
232
+ while (len(all_text_with_urls) != num_refrences) and (start_url < len(urls)):
233
+ end_url = start_url + (num_refrences - len(all_text_with_urls))
234
+ urls_subset = urls[start_url:end_url]
235
+ with ThreadPoolExecutor(max_workers=len(urls_subset)) as executor:
236
+ future_to_url = {
237
+ executor.submit(process_content, data_format, url, query): url
238
+ for url in urls_subset
239
+ }
240
+ all_text_with_urls += [
241
+ future.result()
242
+ for future in as_completed(future_to_url)
243
+ if future.result()[0] != ""
244
+ ]
245
+ start_url = end_url
246
 
247
  return all_text_with_urls
248