File size: 25,149 Bytes
2ab9625 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 |
<html>
<head>
<script type="module" crossorigin src="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.js"></script>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.css" />
</head>
<body>
<gradio-lite>
<gradio-requirements>
multion
openai
python-dotenv
pyodide-http
</gradio-requirements>
<gradio-file name="app.py" entrypoint>
import pyodide_http
pyodide_http.patch_all()
import gradio as gr
import os
import time
from agent import DevOn
image_temp = "https://miro.medium.com/v2/resize:fit:1200/0*n-2bW82Z6m6U2bij.jpeg"
# devon = DevOn(
# editor_image=image_temp, browser_image=image_temp, scratchpad_image=image_temp
# )
devon = None
multion_api_key = ""
openai_api_key = ""
replit_email = ""
replit_password = ""
def add_message(history, message):
for x in message["files"]:
history.append(((x,), None))
if message["text"] is not None:
history.append((message["text"], None))
return history, gr.MultimodalTextbox(value=None, interactive=False)
def multion_api_key_update(x):
global multion_api_key
multion_api_key = x
def openai_api_key_update(x):
global openai_api_key
openai_api_key = x
def replit_email_update(x):
global replit_email
replit_email = x
def replit_password_update(x):
global replit_password
replit_password = x
def bot(history):
devon = DevOn(
editor_image=image_temp,
browser_image=image_temp,
scratchpad_image=image_temp,
multion_api_key=multion_api_key,
openai_api_key=openai_api_key,
replit_email=replit_email,
replit_password=replit_password,
)
for r in devon.run(history[-1][0]):
text, editor_image, browser_image, scratchpad_image = r
if type(text) == str:
history.append((None, text))
if editor_image is None:
editor_image = devon.editor_image
browser_image = devon.browser_image
scratchpad_image = devon.scratchpad_image
yield history, editor_image, browser_image, scratchpad_image
with gr.Blocks(css="footer {visibility: hidden}") as demo:
with gr.Row():
with gr.Column():
multion_api_key_in = gr.Textbox(label="MultiOn API Key")
openai_api_key_in = gr.Textbox(label="OpenAI API Key")
with gr.Column():
replit_email_in = gr.Textbox(label="Replit Email")
replit_password_in = gr.Textbox(label="Replit Password")
with gr.Row():
with gr.Column():
chatbot = gr.Chatbot(
[], elem_id="chatbot", bubble_full_width=False, height=300
)
chat_input = gr.MultimodalTextbox(
value={
"text": "benchmark the perplexity api's resposne time with the api key abcdef"
},
interactive=True,
file_types=["image"],
placeholder="Enter message or upload file...",
show_label=False,
)
with gr.Column():
if devon:
editor_view = gr.Image(
devon.editor_image,
label="Editor",
)
else:
editor_view = gr.Image()
with gr.Row():
with gr.Column():
if devon:
browser_view = gr.Image(
devon.browser_image,
label="Browser",
)
else:
browser_view = gr.Image()
with gr.Column():
if devon:
scratchpad_view = gr.Image(
devon.scratchpad_image,
label="Scratchpad",
)
else:
scratchpad_view = gr.Image()
chat_msg = chat_input.submit(
add_message, [chatbot, chat_input], [chatbot, chat_input]
)
bot_msg = chat_msg.then(
bot,
[chatbot],
[chatbot, editor_view, browser_view, scratchpad_view],
api_name="bot_response",
)
bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])
multion_api_key_in.change(multion_api_key_update, multion_api_key_in)
openai_api_key_in.change(openai_api_key_update, openai_api_key_in)
replit_email_in.change(replit_email_update, replit_email_in)
replit_password_in.change(replit_password_update, replit_password_in)
# chatbot.like(print_like_dislike, None, None)
if __name__ == "__main__":
demo.queue()
demo.launch()
</gradio-file>
<gradio-file name="agent.py" >
from openai import OpenAI
from prompts import orchestrator_prompt
from prompts import programmer_notes
from prompts import notetaker_notes
from dotenv import load_dotenv
import time
import multion
from multion.client import MultiOn
import os
load_dotenv(".env.local")
# replit_email = os.getenv("REPLIT_EMAIL")
# replit_password = os.getenv("REPLIT_PASSWORD")
# multion_api_key = os.getenv("MULTION_API_KEY")
# multion.login(use_api=True, multion_api_key=multion_api_key)
# runpod_url = os.getenv("RUNPOD_URL")
image_temp = "https://miro.medium.com/v2/resize:fit:1200/0*n-2bW82Z6m6U2bij.jpeg"
class DevOn:
def __init__(
self,
editor_image,
browser_image,
scratchpad_image,
multion_api_key,
openai_api_key,
replit_email,
replit_password,
):
print(multion_api_key, openai_api_key)
self.editor_image = editor_image
self.browser_image = browser_image
self.scratchpad_image = scratchpad_image
self.local = os.getenv("WHERE_EXECUTE") == "local"
self.multion = MultiOn(api_key=multion_api_key)
self.replit_email = replit_email
self.replit_password = replit_password
self.programmer = self.multion.sessions.create(
url="https://replit.com/login", local=self.local, include_screenshot=True
)
self.programmer_logged_in = False
# self.editor_image = self.programmer.screenshot
self.editor_image = self.multion.sessions.screenshot(
session_id=self.programmer.session_id
).screenshot
print(self.editor_image)
time.sleep(1)
# print(self.programmer)
self.researcher = self.multion.sessions.create(
url="https://www.google.com", local=self.local, include_screenshot=True
)
# self.browser_image = self.researcher.screenshot
self.browser_image = self.multion.sessions.screenshot(
session_id=self.researcher.session_id
).screenshot
time.sleep(1)
self.notetaker = self.multion.sessions.create(
url="https://anotepad.com/", local=self.local, include_screenshot=True
)
# self.scratchpad_image = self.notetaker.screenshot
self.scratchpad_image = self.multion.sessions.screenshot(
session_id=self.notetaker.session_id
).screenshot
time.sleep(1)
self.done = True
self.task = ""
self.plan = ""
self.messages = []
self.client = OpenAI(api_key=openai_api_key)
def programmer_login(self):
if self.local:
cmd = "Create a new Python REPL."
else:
cmd = "Log in with the email {email} and the password {password}. Then create a new Python REPL.".format(
email=self.replit_email, password=self.replit_password
)
while True:
self.programmer = self.multion.sessions.step(
self.programmer.session_id,
cmd=cmd + "\n\n" + programmer_notes,
url="https://replit.com/login",
include_screenshot=True,
)
print(self.programmer)
print(
self.multion.sessions.screenshot(
session_id=self.programmer.session_id
).screenshot
)
# time.sleep(1)
# yield ("", self.editor_image, self.browser_image, self.scratchpad_image)
# self.editor_image = self.programmer["screenshot"]
if self.programmer.status == "DONE":
break
self.editor_image = self.multion.sessions.screenshot(
session_id=self.programmer.session_id
).screenshot
time.sleep(1)
def prepare_messages(self):
messages = [
{"role": "user", "content": orchestrator_prompt},
{
"role": "user",
"content": "The Task given to you is: {task}".format(task=self.task),
},
{
"role": "user",
"content": "The current Plan state is: {plan}".format(plan=""),
},
]
for message in self.messages:
messages.append(message)
messages.append(
{
"role": "user",
"content": [
{
"type": "text",
"text": "This is the current state of the Programmer Intern.",
},
{"type": "image_url", "image_url": {"url": self.editor_image}},
],
}
)
messages.append(
{
"role": "user",
"content": [
{
"type": "text",
"text": "This is the current state of the Researcher Intern.",
},
{"type": "image_url", "image_url": {"url": self.browser_image}},
],
}
)
messages.append(
{
"role": "user",
"content": [
{
"type": "text",
"text": "This is the current state of the Notetaker Intern.",
},
{"type": "image_url", "image_url": {"url": self.scratchpad_image}},
],
}
)
return messages
def execute_action(self, action):
action_func = action.split(" ", 1)[0]
if action_func == "submit":
self.done = True
return
elif action_func == "update_plan":
action_arg = action.split(" ", 1)[1]
self.plan = action_arg
return
elif action_func == "programmer":
action_arg = action.split(" ", 1)[1]
while True:
self.programmer = self.multion.sessions.step(
self.programmer.session_id,
cmd=action_arg + "\n\n" + programmer_notes,
url="https://replit.com/login",
include_screenshot=True,
)
print(self.programmer)
if self.programmer.status == "NOT SURE":
self.messages.append(
{
"role": "user",
"content": "The Programmer says: {message}\n\nYour next reply will go to the programmer.".format(
message=self.programmer.message
),
}
)
chat_completion = self.client.chat.completions.create(
messages=self.prepare_messages(),
model="gpt-4-vision-preview",
# max_tokens=200,
)
action_arg = chat_completion.choices[0].message.content
self.messages.append({"role": "assistant", "content": action_arg})
else:
self.messages.append(
{
"role": "user",
"content": "The Programmer says: {message}".format(
message=self.programmer.message
),
}
)
# time.sleep(1)
# self.editor_image = self.programmer["screenshot"]
# yield ("", self.editor_image, self.browser_image, self.scratchpad_image)
if self.programmer.status == "DONE":
break
self.editor_image = self.multion.sessions.screenshot(
session_id=self.programmer.session_id
).screenshot
print(self.editor_image)
time.sleep(1)
return
elif action_func == "researcher":
action_arg = action.split(" ", 1)[1]
while True:
self.researcher = self.multion.sessions.step(
self.researcher.session_id,
cmd=action_arg,
url="https://www.google.com",
include_screenshot=True,
)
print(self.researcher)
self.messages.append(
{
"role": "user",
"content": "The Researcher says: {message}".format(
message=self.researcher.message
),
}
)
# time.sleep(1)
# self.browser_image = self.researcher["screenshot"]
# yield ("", self.editor_image, self.browser_image, self.scratchpad_image)
if self.researcher.status == "DONE":
break
self.browser_image = self.multion.sessions.screenshot(
session_id=self.researcher.session_id
).screenshot
print(self.browser_image)
time.sleep(1)
return
elif action_func == "notetaker":
action_arg = action.split(" ", 1)[1]
while True:
self.notetaker = self.multion.sessions.step(
self.notetaker.session_id,
cmd=action_arg + "\n\n" + notetaker_notes,
url="https://anotepad.com/",
include_screenshot=True,
)
print(self.notetaker)
self.messages.append(
{
"role": "user",
"content": "The Notetaker says: {message}".format(
message=self.notetaker.message
),
}
)
# time.sleep(1)
# self.scratchpad_image = self.notetaker["screenshot"]
# yield ("", self.editor_image, self.browser_image, self.scratchpad_image)
if self.notetaker.status == "DONE":
break
self.scratchpad_image = self.multion.sessions.screenshot(
session_id=self.notetaker.session_id
).screenshot
print(self.scratchpad_image)
time.sleep(1)
return
elif action_func == "clarify":
action_arg = action.split(" ", 1)[1]
return
def orchestrator(self):
if not self.programmer_logged_in:
self.programmer_login()
self.programmer_logged_in = True
messages = self.prepare_messages()
chat_completion = self.client.chat.completions.create(
messages=messages,
model="gpt-4-vision-preview",
# max_tokens=200,
)
response = chat_completion.choices[0].message.content
action, explanation = response.split("Explanation: ", 1)
action = action.split("Action: ", 1)[1]
self.messages.append({"role": "assistant", "content": response})
self.messages.append(
{
"role": "user",
"content": "The current Plan state is: {plan}".format(plan=self.plan),
}
)
print(self.messages)
self.execute_action(action)
# temp
# self.done = True
return explanation
def run(self, prompt):
self.done = False
self.task = prompt
while not self.done:
curr_response = self.orchestrator()
yield (
curr_response,
self.editor_image,
self.browser_image,
self.scratchpad_image,
)
</gradio-file>
<gradio-file name="prompts.py">
orchestrator_prompt = """**General**
- You are DevOn, an expert Software Developer.
- You will be asked to develop a new software project from scratch. You will primarily work in Python. You will deal with large software projects spanning multiple files and user requirements.
- Your lifecycle will essentially circle around the Task, the State, your Plan, your Actions, and your Interns. Each of these are described in detail below.
- To start with, your Plan will be empty. You will receive a State (in the form of 3 images, one from each of your Interns) and a Task. You will construct a Plan outlining the steps you will need to take to complete the Task, then ask your Interns to do things in order to incrementally fulfil the steps and complete the Task.
- With each step, you will also provide an Explanation, explaining to the user what you are currently doing, so they may be able to keep track and monitor your progress. For example:
- Explanation: I am currently updating the plan based on the current state and the Task.
- Explanation: I am currently creating a file called utils.py which will contain utility functions.
**State**
**Interns**
- You have 3 interns who will help you with different tasks - a Programmer, a Researcher, and a Planner. Here’s some info about them:
- Programmer: the Programmer is great at writing code given very specific instructions but isn’t a good long term planner. The Programmer works on Replit. You can ask the Programmer to write some code in certain files, make new files, etc. You can even give loose instructions like “Make a new file and write basic skeleton for an Agent class in it.” Keep in mind that the Programmer works exclusively in an online Replit IDE environment. Make sure your Plan and your Actions take this into consideration.
- Researcher: the Researcher is very handy with a browser and great at finding out technical details, documentation, examples, miscellaneous information, etc. You can ask it to do things like “Find out how to make an LLM call using the Perplexity API”.
- Notetaker: the Notetaker has a notepad and can note down anything you want. You will be able to see the notepad at all times. Anytime you want anything written down just to keep track of it, ask the Planner to do so.
**Plan**
- You have a persistent object to keep track of things: a Plan.
- If the plan is empty, you will create a plan using the current state of things and the given task. You will do so using the update_plan action described below.
**Actions**
- There are 6 actions that you can take at the current time step. You must always take a valid action. You will complete the task by taking actions. You are free to take as many actions as needed (even hundreds), don’t try to rush by compressing multiple actions into one. These are the available actions:
- update_plan <plan>: Update Plan’s value to <plan>. This will replace the old value, not append to it. If there’s something from the old plan you wish to include in the updated one, make sure to include it in the <plan> you provide as an argument. Some examples how you can use this:
- update_plan In order to carry out the task of creating a Flask web server, I will need to take the following steps:
1) …
2) …
3) …
- programmer <task>: Ask the Programmer to carry out a <task>. Some examples of how you can use this:
- programmer Create a new Python file for utils called utils.py and write a generate_random_number() function in it that takes no parameters and returns a random number.
- programmer Go to the model.py file and import generate_random_number() from utils.
- researcher <task>: Ask the Researcher to carry out a <task>. The Researcher will reply to you with the information you asked for. Some examples of how you can use this:
- researcher Find out how the OpenAI API is used.
- researcher What is a SERP API I could use?
- notetaker <note>: Ask the Notetaker to carry out a <task>. Some examples of how you can use this:
- notetaker Note down the following information: MULTION_API_KEY=…
- notetaker Note down the following information: An example Chat Completions API call looks like the following:
from openai import OpenAI
client = OpenAI()
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Who won the world series in 2020?"},
{"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
{"role": "user", "content": "Where was it played?"}
]
)
- clarify <question>: Clarify something about the Task. Sometimes, there may be missing information, such as logins, api keys, or some requirements of the Task may be unclear. Use this Action to clarify things from the user by asking <question>. Use this sparingly. Try and make decisions yourself. Some examples of how you can use this:
- clarify The Task mentions that I need to benchmark the Perplexity API. Could you provide your API Key?
- submit: The Task is completed and you are ready to submit the output (whatever the programmer has so far). This is end the execution. Only do this when you are completely sure.
**Important Notes**
- Respond only by taking an Action (and providing the accompanying Explanation). Any response from you must be one of the above Actions. No other text in the response, just the Action and the Explanation. You will structure your output as such:
”Action: <action>\nExplanation: <explanation>”
- You do not need to ask the Programmer to log in.
- You can see all the Interns screens. If it seems like an Intern has made a mistake or encountered an error, you can tell them about it using the relevant action and ask them to correct it. This is especially important with the Programmer.
- When you ask the programmer to write some code, ask it like this: "programmer memorize the following code and write it in the editor: <code>"
- When you ask the programmer to write some code, make sure the code does not include any double quotation marks, only single quotation marks. E.g. "hello world" should instead be 'hello world'.
- Do not ask the programmer to create new files.
- When writing code, it is preferable to keep it small and simple. Don't write too much fluff.
- Remember to only use single quotation marks.
"""
# programmer_notes = """Important Notes:
# Do not refresh the page ever to check for anything. Only wait. Do not refresh.
# You are working in a terminal environment.
# You will do everything using the terminal and only the terminal.
# If you need to create a new file, do so using the touch command on the terminal.
# If you need to see files in the current directory, do so using ls.
# If you need to view a files content, do so using the cat command on the terminal.
# To enter code into a file, use a single printf command. After the printf command has been completely typed, press enter. Typing the command and pressing enter must be 2 separate steps.
# Do not open a text editor like vim or nano.
# If you need to install a new package, use pip install on the terminal.
# Do not use the same command repeatedly.
# When you write code into a file, write it once, cat it once, then stop. Do no attempt to write again unless it is wrong.
# Remember that you need to press Enter after typing a command into the terminal. Only press enter after the command has been completely typed. Typing the command pressing enter must be 2 separate steps."""
programmer_notes = """Important Notes:
You are a Programmer who works in a Replit Environment exclusively. If you need to install a package, use the Shell and not the Console.
Do not refresh the page ever to check for anything. Only wait. Do not refresh. Do not create new files. Write your code in currently open editor window itself. Do not type double quotation marks. If you are asked to type code containing them, use single quotes instead."""
notetaker_notes = """Important Notes:
Don't write anything in the Note Title field.
Whatever notes you are told to make, write them in one go, don't press enter or type multiple times, because everytime you write, it will replace the prevoius content.
You do not need to Save the note. When asked to note something down, just write it on the notepad. That is enough."""
</gradio-file>
</gradio-lite>
</body>
</html> |