Spaces:
Paused
Paused
import re | |
import json | |
import base64 | |
from selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
from selenium.webdriver.chrome.service import Service | |
from selenium.common.exceptions import TimeoutException | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support.expected_conditions import staleness_of | |
from webdriver_manager.chrome import ChromeDriverManager | |
from selenium.webdriver.common.by import By | |
def html2pdf( | |
source: str, | |
timeout: int = 2, | |
install_driver: bool = True, | |
print_options: dict = {}, | |
): | |
result = __get_pdf_from_html(source, timeout, install_driver, print_options) | |
return result | |
def __send_devtools(driver, cmd, params={}): | |
resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id | |
url = driver.command_executor._url + resource | |
body = json.dumps({"cmd": cmd, "params": params}) | |
response = driver.command_executor._request("POST", url, body) | |
if not response: | |
raise Exception(response.get("value")) | |
return response.get("value") | |
def __get_pdf_from_html( | |
path: str, | |
timeout: int, | |
install_driver: bool, | |
print_options: dict | |
): | |
webdriver_options = Options() | |
webdriver_prefs = {} | |
webdriver_options.add_argument("--headless") | |
webdriver_options.add_argument("--disable-gpu") | |
webdriver_options.add_argument("--no-sandbox") | |
webdriver_options.add_argument("--disable-dev-shm-usage") | |
webdriver_options.experimental_options["prefs"] = webdriver_prefs | |
webdriver_prefs["profile.default_content_settings"] = {"images": 2} | |
if install_driver: | |
service = Service(ChromeDriverManager().install()) | |
driver = webdriver.Chrome(service=service, options=webdriver_options) | |
else: | |
driver = webdriver.Chrome(options=webdriver_options) | |
driver.get(path) | |
try: | |
WebDriverWait(driver, timeout).until( | |
staleness_of(driver.find_element(by=By.TAG_NAME, value="html")) | |
) | |
except TimeoutException: | |
calculated_print_options = { | |
"landscape": False, | |
"displayHeaderFooter": False, | |
"printBackground": True, | |
"preferCSSPageSize": True, | |
} | |
calculated_print_options.update(print_options) | |
result = __send_devtools( | |
driver, "Page.printToPDF", calculated_print_options) | |
driver.quit() | |
return base64.b64decode(result["data"]) | |
def is_valid_url(url: str) -> bool: | |
return bool(re.match(r"(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url)) | |