lavague / app.py
dhuynh95's picture
Upload 2 files
3be9f56 verified
raw
history blame
6.3 kB
import gradio as gr
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from lavague.ActionEngine import ActionEngine
from lavague.defaults import DefaultLocalLLM, DefaultLLM
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
MAX_CHARS = 1500
# Use this action_engine instead to have a local inference
# action_engine = ActionEngine(llm=DefaultLocalLLM())
import os
from llama_index.llms.azure_openai import AzureOpenAI
api_key=os.getenv("AZURE_OPENAI_KEY")
api_version="2023-05-15"
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
model = "gpt-4"
deployment_name = "gpt-4-turbo"
llm = AzureOpenAI(
model=model,
deployment_name=deployment_name,
api_key=api_key,
azure_endpoint=azure_endpoint,
api_version=api_version,
temperature=0.0
)
action_engine = ActionEngine(llm=llm)
## Setup chrome options
chrome_options = Options()
chrome_options.add_argument("--headless") # Ensure GUI is off
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1600,900")
# Set path to chrome/chromedriver as per your configuration
import os.path
homedir = os.path.expanduser("~")
chrome_options.binary_location = "./chrome-linux64/chrome"
webdriver_service = Service("./chromedriver-linux64/chromedriver")
title = """
<div align="center">
<h1>🌊 Welcome to LaVague</h1>
<p>Redefining internet surfing by transforming natural language instructions into seamless browser interactions.</p>
</div>
"""
# Choose Chrome Browser
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
# action_engine = ActionEngine(llm, embedder)
def process_url(url):
driver.get(url)
driver.save_screenshot("screenshot.png")
# This function is supposed to fetch and return the image from the URL.
# Placeholder function: replace with actual image fetching logic.
return "screenshot.png"
def process_instruction(query, url_input):
if url_input != driver.current_url:
driver.get(url_input)
state = driver.page_source
query_engine = action_engine.get_query_engine(state)
streaming_response = query_engine.query(query)
source_nodes = streaming_response.get_formatted_sources(MAX_CHARS)
response = ""
for text in streaming_response.response_gen:
# do something with text as they arrive.
response += text
yield response, source_nodes
import re
def extract_first_python_code(markdown_text):
# Pattern to match the first ```python ``` code block
pattern = r"```python(.*?)```"
# Using re.DOTALL to make '.' match also newlines
match = re.search(pattern, markdown_text, re.DOTALL)
if match:
# Return the first matched group, which is the code inside the ```python ```
return match.group(1).strip()
else:
# Return None if no match is found
return None
def exec_code(code, source_nodes, full_code):
print(code)
code = extract_first_python_code(code)
html = driver.page_source
try:
exec(code)
output = "Successful code execution"
status = """<p style="color: green; font-size: 20px; font-weight: bold;">Success!</p>"""
full_code += code
except Exception as e:
output = f"Error in code execution: {str(e)}"
status = """<p style="color: red; font-size: 20px; font-weight: bold;">Failure! Open the Debug tab for more information</p>"""
return output, code, html, status, full_code
def update_image_display(img):
driver.save_screenshot("screenshot.png")
url = driver.current_url
return "screenshot.png", url
def show_processing_message():
return "Processing..."
def update_image_display(img):
driver.save_screenshot("screenshot.png")
url = driver.current_url
return "screenshot.png", url
base_url = "https://huggingface.co/"
instructions = ["Click on the Datasets item on the menu, between Models and Spaces",
"Click on the search bar 'Filter by name', type 'The Stack', and press 'Enter'",
"Scroll by 500 pixels",]
with gr.Blocks() as demo:
with gr.Tab("LaVague"):
with gr.Row():
gr.HTML(title)
with gr.Row():
url_input = gr.Textbox(value=base_url, label="Enter URL and press 'Enter' to load the page.")
with gr.Row():
with gr.Column(scale=7):
image_display = gr.Image(label="Browser", interactive=False)
with gr.Column(scale=3):
with gr.Accordion(label="Full code", open=False):
full_code = gr.Code(value="", language="python", interactive=False)
code_display = gr.Code(label="Generated code", language="python",
lines=5, interactive=True)
status_html = gr.HTML()
with gr.Row():
with gr.Column(scale=8):
text_area = gr.Textbox(label="Enter instructions and press 'Enter' to generate code.")
gr.Examples(examples=instructions, inputs=text_area)
with gr.Tab("Debug"):
with gr.Row():
with gr.Column():
log_display = gr.Textbox(interactive=False, lines=20)
with gr.Column():
source_display = gr.Code(language="html", label="Retrieved nodes", interactive=False, lines=20)
with gr.Row():
with gr.Accordion(label="Full HTML", open=False):
full_html = gr.Code(language="html", label="Full HTML", interactive=False, lines=20)
# Linking components
url_input.submit(process_url, inputs=url_input, outputs=image_display)
text_area.submit(show_processing_message, outputs=[status_html]).then(
process_instruction, inputs=[text_area, url_input], outputs=[code_display, source_display]
).then(
exec_code, inputs=[code_display, source_display, full_code],
outputs=[log_display, code_display, full_html, status_html, full_code]
).then(
update_image_display, inputs=image_display, outputs=[image_display, url_input]
)
demo.launch(debug=True)