dhuynh95 commited on
Commit
3be9f56
·
verified ·
1 Parent(s): 24ed384

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +177 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from selenium import webdriver
3
+ from selenium.webdriver.chrome.service import Service
4
+ from selenium.webdriver.common.by import By
5
+ from selenium.webdriver.chrome.options import Options
6
+ from selenium.webdriver.common.keys import Keys
7
+ from lavague.ActionEngine import ActionEngine
8
+ from lavague.defaults import DefaultLocalLLM, DefaultLLM
9
+ from llama_index.llms.huggingface import HuggingFaceInferenceAPI
10
+
11
+ MAX_CHARS = 1500
12
+
13
+ # Use this action_engine instead to have a local inference
14
+ # action_engine = ActionEngine(llm=DefaultLocalLLM())
15
+
16
+
17
+ import os
18
+ from llama_index.llms.azure_openai import AzureOpenAI
19
+
20
+ api_key=os.getenv("AZURE_OPENAI_KEY")
21
+ api_version="2023-05-15"
22
+ azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
23
+ model = "gpt-4"
24
+ deployment_name = "gpt-4-turbo"
25
+
26
+ llm = AzureOpenAI(
27
+ model=model,
28
+ deployment_name=deployment_name,
29
+ api_key=api_key,
30
+ azure_endpoint=azure_endpoint,
31
+ api_version=api_version,
32
+ temperature=0.0
33
+ )
34
+
35
+ action_engine = ActionEngine(llm=llm)
36
+
37
+ ## Setup chrome options
38
+ chrome_options = Options()
39
+ chrome_options.add_argument("--headless") # Ensure GUI is off
40
+ chrome_options.add_argument("--no-sandbox")
41
+ chrome_options.add_argument("--window-size=1600,900")
42
+
43
+ # Set path to chrome/chromedriver as per your configuration
44
+
45
+ import os.path
46
+ homedir = os.path.expanduser("~")
47
+ chrome_options.binary_location = "./chrome-linux64/chrome"
48
+ webdriver_service = Service("./chromedriver-linux64/chromedriver")
49
+
50
+
51
+ title = """
52
+ <div align="center">
53
+ <h1>🌊 Welcome to LaVague</h1>
54
+ <p>Redefining internet surfing by transforming natural language instructions into seamless browser interactions.</p>
55
+ </div>
56
+ """
57
+
58
+ # Choose Chrome Browser
59
+ driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
60
+
61
+ # action_engine = ActionEngine(llm, embedder)
62
+
63
+ def process_url(url):
64
+ driver.get(url)
65
+ driver.save_screenshot("screenshot.png")
66
+ # This function is supposed to fetch and return the image from the URL.
67
+ # Placeholder function: replace with actual image fetching logic.
68
+ return "screenshot.png"
69
+
70
+ def process_instruction(query, url_input):
71
+ if url_input != driver.current_url:
72
+ driver.get(url_input)
73
+ state = driver.page_source
74
+ query_engine = action_engine.get_query_engine(state)
75
+ streaming_response = query_engine.query(query)
76
+
77
+ source_nodes = streaming_response.get_formatted_sources(MAX_CHARS)
78
+
79
+ response = ""
80
+
81
+ for text in streaming_response.response_gen:
82
+ # do something with text as they arrive.
83
+ response += text
84
+ yield response, source_nodes
85
+
86
+ import re
87
+
88
+ def extract_first_python_code(markdown_text):
89
+ # Pattern to match the first ```python ``` code block
90
+ pattern = r"```python(.*?)```"
91
+
92
+ # Using re.DOTALL to make '.' match also newlines
93
+ match = re.search(pattern, markdown_text, re.DOTALL)
94
+ if match:
95
+ # Return the first matched group, which is the code inside the ```python ```
96
+ return match.group(1).strip()
97
+ else:
98
+ # Return None if no match is found
99
+ return None
100
+
101
+
102
+ def exec_code(code, source_nodes, full_code):
103
+ print(code)
104
+ code = extract_first_python_code(code)
105
+ html = driver.page_source
106
+ try:
107
+ exec(code)
108
+ output = "Successful code execution"
109
+ status = """<p style="color: green; font-size: 20px; font-weight: bold;">Success!</p>"""
110
+ full_code += code
111
+ except Exception as e:
112
+ output = f"Error in code execution: {str(e)}"
113
+ status = """<p style="color: red; font-size: 20px; font-weight: bold;">Failure! Open the Debug tab for more information</p>"""
114
+ return output, code, html, status, full_code
115
+
116
+ def update_image_display(img):
117
+ driver.save_screenshot("screenshot.png")
118
+ url = driver.current_url
119
+ return "screenshot.png", url
120
+
121
+ def show_processing_message():
122
+ return "Processing..."
123
+
124
+ def update_image_display(img):
125
+ driver.save_screenshot("screenshot.png")
126
+ url = driver.current_url
127
+ return "screenshot.png", url
128
+
129
+ base_url = "https://huggingface.co/"
130
+
131
+ instructions = ["Click on the Datasets item on the menu, between Models and Spaces",
132
+ "Click on the search bar 'Filter by name', type 'The Stack', and press 'Enter'",
133
+ "Scroll by 500 pixels",]
134
+
135
+ with gr.Blocks() as demo:
136
+ with gr.Tab("LaVague"):
137
+ with gr.Row():
138
+ gr.HTML(title)
139
+ with gr.Row():
140
+ url_input = gr.Textbox(value=base_url, label="Enter URL and press 'Enter' to load the page.")
141
+
142
+ with gr.Row():
143
+ with gr.Column(scale=7):
144
+ image_display = gr.Image(label="Browser", interactive=False)
145
+
146
+ with gr.Column(scale=3):
147
+ with gr.Accordion(label="Full code", open=False):
148
+ full_code = gr.Code(value="", language="python", interactive=False)
149
+ code_display = gr.Code(label="Generated code", language="python",
150
+ lines=5, interactive=True)
151
+
152
+ status_html = gr.HTML()
153
+ with gr.Row():
154
+ with gr.Column(scale=8):
155
+ text_area = gr.Textbox(label="Enter instructions and press 'Enter' to generate code.")
156
+ gr.Examples(examples=instructions, inputs=text_area)
157
+ with gr.Tab("Debug"):
158
+ with gr.Row():
159
+ with gr.Column():
160
+ log_display = gr.Textbox(interactive=False, lines=20)
161
+ with gr.Column():
162
+ source_display = gr.Code(language="html", label="Retrieved nodes", interactive=False, lines=20)
163
+ with gr.Row():
164
+ with gr.Accordion(label="Full HTML", open=False):
165
+ full_html = gr.Code(language="html", label="Full HTML", interactive=False, lines=20)
166
+
167
+ # Linking components
168
+ url_input.submit(process_url, inputs=url_input, outputs=image_display)
169
+ text_area.submit(show_processing_message, outputs=[status_html]).then(
170
+ process_instruction, inputs=[text_area, url_input], outputs=[code_display, source_display]
171
+ ).then(
172
+ exec_code, inputs=[code_display, source_display, full_code],
173
+ outputs=[log_display, code_display, full_html, status_html, full_code]
174
+ ).then(
175
+ update_image_display, inputs=image_display, outputs=[image_display, url_input]
176
+ )
177
+ demo.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio==4.21.0
2
+ llama_index==0.10.20
3
+ python-dotenv==1.0.1
4
+ selenium==4.18.1
5
+ torch==2.2.1
6
+ transformers==4.38.1