Spaces:
Sleeping
Sleeping
Update tools/visit_webpage.py
Browse files- tools/visit_webpage.py +7 -21
tools/visit_webpage.py
CHANGED
@@ -1,8 +1,5 @@
|
|
1 |
-
from typing import Any, Optional
|
2 |
from smolagents.tools import Tool
|
3 |
-
import
|
4 |
-
import markdownify
|
5 |
-
import smolagents
|
6 |
import re
|
7 |
|
8 |
class VisitWebpageTool(Tool):
|
@@ -23,25 +20,14 @@ class VisitWebpageTool(Tool):
|
|
23 |
"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
|
24 |
) from e
|
25 |
try:
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
'accept-Encoding': 'gzip, deflate, br',
|
32 |
-
'priority': 'u=0, i',
|
33 |
-
'referer': 'https://duckduckgo.com/',
|
34 |
-
'sec-fetch-dest': 'document',
|
35 |
-
'sec-fetch-mode': 'navigate',
|
36 |
-
'sec-fetch-site': 'cross-site',
|
37 |
-
}
|
38 |
-
|
39 |
-
# Send a GET request to the URL with a 20-second timeout
|
40 |
-
response = requests.get(url, timeout=20, allow_redirects=True, headers=headers)
|
41 |
-
response.raise_for_status() # Raise an exception for bad status codes
|
42 |
|
43 |
# Convert the HTML content to Markdown
|
44 |
-
markdown_content = markdownify(
|
45 |
|
46 |
# Remove multiple line breaks
|
47 |
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
|
|
|
|
1 |
from smolagents.tools import Tool
|
2 |
+
from selenium import webdriver
|
|
|
|
|
3 |
import re
|
4 |
|
5 |
class VisitWebpageTool(Tool):
|
|
|
20 |
"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
|
21 |
) from e
|
22 |
try:
|
23 |
+
options = webdriver.ChromeOptions()
|
24 |
+
options.add_argument("--headless") # Run in headless mode
|
25 |
+
|
26 |
+
driver = webdriver.Chrome(options=options)
|
27 |
+
driver.get(url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
# Convert the HTML content to Markdown
|
30 |
+
markdown_content = markdownify(driver.page_source).strip()
|
31 |
|
32 |
# Remove multiple line breaks
|
33 |
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|