Spaces:
Running
Running
import re | |
import requests | |
from bs4 import BeautifulSoup | |
def read_web(url: str) -> str: | |
if not url: | |
return '' | |
resp = requests.get(url) | |
soup = BeautifulSoup(resp.text, 'html.parser') | |
text = soup.get_text() | |
text = re.sub('\n{3,}', '\n\n', text) | |
return text | |
if __name__ == '__main__': | |
r = read_web('https://en.wikipedia.org/wiki/Wiki') | |
print(r) | |