Spaces:

soulwax89
/

bluesix

Build error

bluesix / autogpt /commands /web_requests.py

soulwax

Initial commit

375afca almost 2 years ago

4.6 kB

	"""Browse a webpage and summarize it using the LLM model"""
	from typing import List, Tuple, Union
	from urllib.parse import urljoin, urlparse

	import requests
	from requests.compat import urljoin
	from requests import Response
	from bs4 import BeautifulSoup

	from autogpt.config import Config
	from autogpt.memory import get_memory
	from autogpt.processing.html import extract_hyperlinks, format_hyperlinks

	CFG = Config()
	memory = get_memory(CFG)

	session = requests.Session()
	session.headers.update({"User-Agent": CFG.user_agent})


	def is_valid_url(url: str) -> bool:
	"""Check if the URL is valid

	Args:
	url (str): The URL to check

	Returns:
	bool: True if the URL is valid, False otherwise
	"""
	try:
	result = urlparse(url)
	return all([result.scheme, result.netloc])
	except ValueError:
	return False


	def sanitize_url(url: str) -> str:
	"""Sanitize the URL

	Args:
	url (str): The URL to sanitize

	Returns:
	str: The sanitized URL
	"""
	return urljoin(url, urlparse(url).path)


	def check_local_file_access(url: str) -> bool:
	"""Check if the URL is a local file

	Args:
	url (str): The URL to check

	Returns:
	bool: True if the URL is a local file, False otherwise
	"""
	local_prefixes = [
	"file:///",
	"file://localhost",
	"http://localhost",
	"https://localhost",
	]
	return any(url.startswith(prefix) for prefix in local_prefixes)


	def get_response(
	url: str, timeout: int = 10
	) -> Union[Tuple[None, str], Tuple[Response, None]]:
	"""Get the response from a URL

	Args:
	url (str): The URL to get the response from
	timeout (int): The timeout for the HTTP request

	Returns:
	Tuple[None, str] \| Tuple[Response, None]: The response and error message

	Raises:
	ValueError: If the URL is invalid
	requests.exceptions.RequestException: If the HTTP request fails
	"""
	try:
	# Restrict access to local files
	if check_local_file_access(url):
	raise ValueError("Access to local files is restricted")

	# Most basic check if the URL is valid:
	if not url.startswith("http://") and not url.startswith("https://"):
	raise ValueError("Invalid URL format")

	sanitized_url = sanitize_url(url)

	response = session.get(sanitized_url, timeout=timeout)

	# Check if the response contains an HTTP error
	if response.status_code >= 400:
	return None, f"Error: HTTP {str(response.status_code)} error"

	return response, None
	except ValueError as ve:
	# Handle invalid URL format
	return None, f"Error: {str(ve)}"

	except requests.exceptions.RequestException as re:
	# Handle exceptions related to the HTTP request
	# (e.g., connection errors, timeouts, etc.)
	return None, f"Error: {str(re)}"


	def scrape_text(url: str) -> str:
	"""Scrape text from a webpage

	Args:
	url (str): The URL to scrape text from

	Returns:
	str: The scraped text
	"""
	response, error_message = get_response(url)
	if error_message:
	return error_message
	if not response:
	return "Error: Could not get response"

	soup = BeautifulSoup(response.text, "html.parser")

	for script in soup(["script", "style"]):
	script.extract()

	text = soup.get_text()
	lines = (line.strip() for line in text.splitlines())
	chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
	text = "\n".join(chunk for chunk in chunks if chunk)

	return text


	def scrape_links(url: str) -> Union[str, List[str]]:
	"""Scrape links from a webpage

	Args:
	url (str): The URL to scrape links from

	Returns:
	Union[str, List[str]]: The scraped links
	"""
	response, error_message = get_response(url)
	if error_message:
	return error_message
	if not response:
	return "Error: Could not get response"
	soup = BeautifulSoup(response.text, "html.parser")

	for script in soup(["script", "style"]):
	script.extract()

	hyperlinks = extract_hyperlinks(soup, url)

	return format_hyperlinks(hyperlinks)


	def create_message(chunk, question):
	"""Create a message for the user to summarize a chunk of text"""
	return {
	"role": "user",
	"content": f'"""{chunk}""" Using the above text, answer the following'
	f' question: "{question}" -- if the question cannot be answered using the'
	" text, summarize the text.",
	}