Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -308,6 +308,7 @@ def inference(input_batch,isurl,use_archive,filt_companies_topic,limit_companies
|
|
308 |
elif(EXTRACTOR_NET == 'trafilatura'):
|
309 |
try:
|
310 |
extracted = trafilatura.extract(trafilatura.fetch_url(url), include_comments=False, config=trafilatura_config, include_tables=False)
|
|
|
311 |
except:
|
312 |
archive = is_in_archive(url)
|
313 |
if archive['archived']:
|
|
|
308 |
elif(EXTRACTOR_NET == 'trafilatura'):
|
309 |
try:
|
310 |
extracted = trafilatura.extract(trafilatura.fetch_url(url), include_comments=False, config=trafilatura_config, include_tables=False)
|
311 |
+
assert len(extracted)>100, "[W] Failed extracting "+url+" retrying with archived version"
|
312 |
except:
|
313 |
archive = is_in_archive(url)
|
314 |
if archive['archived']:
|