Spaces:
Runtime error
Runtime error
philipp-zettl
commited on
Update src/text.py
Browse files- src/text.py +2 -1
src/text.py
CHANGED
@@ -100,9 +100,10 @@ def doctree_from_url(url, elem_class='div', class_name='article-body'):
|
|
100 |
article = extract_article(url)
|
101 |
# convert to MD to handle splitting better
|
102 |
article_content = select_content(article.html, elem_class, class_name)
|
103 |
-
requires_title = not any(filter(lambda x: x.startswith('# '), article_content.split('\n')))
|
104 |
|
105 |
if requires_title:
|
|
|
106 |
article_content = f"# {article.title}\n\n{article_content}"
|
107 |
article_content = article_content.replace('\n\n', '\n').replace('#', '%%@@%%')
|
108 |
# fix relative website links
|
|
|
100 |
article = extract_article(url)
|
101 |
# convert to MD to handle splitting better
|
102 |
article_content = select_content(article.html, elem_class, class_name)
|
103 |
+
requires_title = not any(filter(lambda x: x.strip().startswith('# '), article_content.split('\n')))
|
104 |
|
105 |
if requires_title:
|
106 |
+
print('Didn\'t find title, will add it manually...')
|
107 |
article_content = f"# {article.title}\n\n{article_content}"
|
108 |
article_content = article_content.replace('\n\n', '\n').replace('#', '%%@@%%')
|
109 |
# fix relative website links
|