import json
import requests
from langchain.tools import tool
from unstructured.partition.html import partition_html

class BrowserTools():
  @tool("Scrape website content")
  def scrape_website(website):
    """Useful to scrape a website content"""
    url = f"https://chrome.browserless.io/content?token={config('BROWSERLESS_API_KEY')}"
    payload = json.dumps({"url": website})
    headers = {
      'cache-control': 'no-cache',
      'content-type': 'application/json'
    }
    response = requests.request("POST", url, headers=headers, data=payload)
    elements = partition_html(text=response.text)
    content = "\n\n".join([str(el) for el in elements])

    # Return only the first 5k characters
    return content[:5000]