Spaces:
Running
Running
File size: 6,303 Bytes
3be9f56 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
import gradio as gr
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from lavague.ActionEngine import ActionEngine
from lavague.defaults import DefaultLocalLLM, DefaultLLM
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
MAX_CHARS = 1500
# Use this action_engine instead to have a local inference
# action_engine = ActionEngine(llm=DefaultLocalLLM())
import os
from llama_index.llms.azure_openai import AzureOpenAI
api_key=os.getenv("AZURE_OPENAI_KEY")
api_version="2023-05-15"
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
model = "gpt-4"
deployment_name = "gpt-4-turbo"
llm = AzureOpenAI(
model=model,
deployment_name=deployment_name,
api_key=api_key,
azure_endpoint=azure_endpoint,
api_version=api_version,
temperature=0.0
)
action_engine = ActionEngine(llm=llm)
## Setup chrome options
chrome_options = Options()
chrome_options.add_argument("--headless") # Ensure GUI is off
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1600,900")
# Set path to chrome/chromedriver as per your configuration
import os.path
homedir = os.path.expanduser("~")
chrome_options.binary_location = "./chrome-linux64/chrome"
webdriver_service = Service("./chromedriver-linux64/chromedriver")
title = """
<div align="center">
<h1>🌊 Welcome to LaVague</h1>
<p>Redefining internet surfing by transforming natural language instructions into seamless browser interactions.</p>
</div>
"""
# Choose Chrome Browser
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
# action_engine = ActionEngine(llm, embedder)
def process_url(url):
driver.get(url)
driver.save_screenshot("screenshot.png")
# This function is supposed to fetch and return the image from the URL.
# Placeholder function: replace with actual image fetching logic.
return "screenshot.png"
def process_instruction(query, url_input):
if url_input != driver.current_url:
driver.get(url_input)
state = driver.page_source
query_engine = action_engine.get_query_engine(state)
streaming_response = query_engine.query(query)
source_nodes = streaming_response.get_formatted_sources(MAX_CHARS)
response = ""
for text in streaming_response.response_gen:
# do something with text as they arrive.
response += text
yield response, source_nodes
import re
def extract_first_python_code(markdown_text):
# Pattern to match the first ```python ``` code block
pattern = r"```python(.*?)```"
# Using re.DOTALL to make '.' match also newlines
match = re.search(pattern, markdown_text, re.DOTALL)
if match:
# Return the first matched group, which is the code inside the ```python ```
return match.group(1).strip()
else:
# Return None if no match is found
return None
def exec_code(code, source_nodes, full_code):
print(code)
code = extract_first_python_code(code)
html = driver.page_source
try:
exec(code)
output = "Successful code execution"
status = """<p style="color: green; font-size: 20px; font-weight: bold;">Success!</p>"""
full_code += code
except Exception as e:
output = f"Error in code execution: {str(e)}"
status = """<p style="color: red; font-size: 20px; font-weight: bold;">Failure! Open the Debug tab for more information</p>"""
return output, code, html, status, full_code
def update_image_display(img):
driver.save_screenshot("screenshot.png")
url = driver.current_url
return "screenshot.png", url
def show_processing_message():
return "Processing..."
def update_image_display(img):
driver.save_screenshot("screenshot.png")
url = driver.current_url
return "screenshot.png", url
base_url = "https://huggingface.co/"
instructions = ["Click on the Datasets item on the menu, between Models and Spaces",
"Click on the search bar 'Filter by name', type 'The Stack', and press 'Enter'",
"Scroll by 500 pixels",]
with gr.Blocks() as demo:
with gr.Tab("LaVague"):
with gr.Row():
gr.HTML(title)
with gr.Row():
url_input = gr.Textbox(value=base_url, label="Enter URL and press 'Enter' to load the page.")
with gr.Row():
with gr.Column(scale=7):
image_display = gr.Image(label="Browser", interactive=False)
with gr.Column(scale=3):
with gr.Accordion(label="Full code", open=False):
full_code = gr.Code(value="", language="python", interactive=False)
code_display = gr.Code(label="Generated code", language="python",
lines=5, interactive=True)
status_html = gr.HTML()
with gr.Row():
with gr.Column(scale=8):
text_area = gr.Textbox(label="Enter instructions and press 'Enter' to generate code.")
gr.Examples(examples=instructions, inputs=text_area)
with gr.Tab("Debug"):
with gr.Row():
with gr.Column():
log_display = gr.Textbox(interactive=False, lines=20)
with gr.Column():
source_display = gr.Code(language="html", label="Retrieved nodes", interactive=False, lines=20)
with gr.Row():
with gr.Accordion(label="Full HTML", open=False):
full_html = gr.Code(language="html", label="Full HTML", interactive=False, lines=20)
# Linking components
url_input.submit(process_url, inputs=url_input, outputs=image_display)
text_area.submit(show_processing_message, outputs=[status_html]).then(
process_instruction, inputs=[text_area, url_input], outputs=[code_display, source_display]
).then(
exec_code, inputs=[code_display, source_display, full_code],
outputs=[log_display, code_display, full_html, status_html, full_code]
).then(
update_image_display, inputs=image_display, outputs=[image_display, url_input]
)
demo.launch(debug=True) |