import re import requests from bs4 import BeautifulSoup def read_web(url: str) -> str: if not url: return '' resp = requests.get(url) soup = BeautifulSoup(resp.text, 'html.parser') text = soup.get_text() text = re.sub('\n{3,}', '\n\n', text) return text if __name__ == '__main__': r = read_web('https://en.wikipedia.org/wiki/Wiki') print(r)