rdose commited on
Commit
6176322
·
1 Parent(s): 7d9504d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -0
app.py CHANGED
@@ -308,6 +308,7 @@ def inference(input_batch,isurl,use_archive,filt_companies_topic,limit_companies
308
  elif(EXTRACTOR_NET == 'trafilatura'):
309
  try:
310
  extracted = trafilatura.extract(trafilatura.fetch_url(url), include_comments=False, config=trafilatura_config, include_tables=False)
 
311
  except:
312
  archive = is_in_archive(url)
313
  if archive['archived']:
 
308
  elif(EXTRACTOR_NET == 'trafilatura'):
309
  try:
310
  extracted = trafilatura.extract(trafilatura.fetch_url(url), include_comments=False, config=trafilatura_config, include_tables=False)
311
+ assert len(extracted)>100, "[W] Failed extracting "+url+" retrying with archived version"
312
  except:
313
  archive = is_in_archive(url)
314
  if archive['archived']: