Update app.py
Browse files
app.py
CHANGED
@@ -177,7 +177,10 @@ async def chat(
|
|
177 |
|
178 |
def extract_text_from_webpage(html_content):
|
179 |
"""Extracts visible text from HTML content using BeautifulSoup."""
|
180 |
-
|
|
|
|
|
|
|
181 |
|
182 |
async def fetch_and_extract(url, max_chars, proxy: Optional[str] = None):
|
183 |
"""Fetches a URL and extracts text asynchronously."""
|
@@ -242,7 +245,10 @@ async def web_search_and_extract(
|
|
242 |
|
243 |
def extract_text_from_webpage2(html_content):
|
244 |
"""Extracts visible text from HTML content using BeautifulSoup."""
|
245 |
-
|
|
|
|
|
|
|
246 |
|
247 |
def fetch_and_extract2(url, max_chars):
|
248 |
"""Fetches a URL and extracts text using threading."""
|
|
|
177 |
|
178 |
def extract_text_from_webpage(html_content):
|
179 |
"""Extracts visible text from HTML content using BeautifulSoup."""
|
180 |
+
soup = BeautifulSoup(html_content)
|
181 |
+
for tag in soup(["script", "style", "header", "footer"]):
|
182 |
+
tag.extract()
|
183 |
+
return soup.get_text(strip=True)
|
184 |
|
185 |
async def fetch_and_extract(url, max_chars, proxy: Optional[str] = None):
|
186 |
"""Fetches a URL and extracts text asynchronously."""
|
|
|
245 |
|
246 |
def extract_text_from_webpage2(html_content):
|
247 |
"""Extracts visible text from HTML content using BeautifulSoup."""
|
248 |
+
soup = BeautifulSoup(html_content)
|
249 |
+
for tag in soup(["script", "style", "header", "footer"]):
|
250 |
+
tag.extract()
|
251 |
+
return soup.get_text(strip=True)
|
252 |
|
253 |
def fetch_and_extract2(url, max_chars):
|
254 |
"""Fetches a URL and extracts text using threading."""
|