devon

Sleeping

File size: 25,149 Bytes

2ab9625



<html>
	<head>
		<script type="module" crossorigin src="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.js"></script>
		<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.css" />
	</head>
	<body>
		<gradio-lite>

            <gradio-requirements>
            multion
            openai
            python-dotenv
            pyodide-http
            </gradio-requirements>
            
<gradio-file name="app.py" entrypoint>
import pyodide_http
pyodide_http.patch_all()
import gradio as gr
import os
import time
from agent import DevOn

image_temp = "https://miro.medium.com/v2/resize:fit:1200/0*n-2bW82Z6m6U2bij.jpeg"
# devon = DevOn(
#     editor_image=image_temp, browser_image=image_temp, scratchpad_image=image_temp
# )
devon = None
multion_api_key = ""
openai_api_key = ""
replit_email = ""
replit_password = ""


def add_message(history, message):
    for x in message["files"]:
        history.append(((x,), None))
    if message["text"] is not None:
        history.append((message["text"], None))
    return history, gr.MultimodalTextbox(value=None, interactive=False)


def multion_api_key_update(x):
    global multion_api_key
    multion_api_key = x


def openai_api_key_update(x):
    global openai_api_key
    openai_api_key = x


def replit_email_update(x):
    global replit_email
    replit_email = x


def replit_password_update(x):
    global replit_password
    replit_password = x


def bot(history):
    devon = DevOn(
        editor_image=image_temp,
        browser_image=image_temp,
        scratchpad_image=image_temp,
        multion_api_key=multion_api_key,
        openai_api_key=openai_api_key,
        replit_email=replit_email,
        replit_password=replit_password,
    )

    for r in devon.run(history[-1][0]):
        text, editor_image, browser_image, scratchpad_image = r
        if type(text) == str:
            history.append((None, text))
        if editor_image is None:
            editor_image = devon.editor_image
            browser_image = devon.browser_image
            scratchpad_image = devon.scratchpad_image
        yield history, editor_image, browser_image, scratchpad_image


with gr.Blocks(css="footer {visibility: hidden}") as demo:
    with gr.Row():
        with gr.Column():
            multion_api_key_in = gr.Textbox(label="MultiOn API Key")
            openai_api_key_in = gr.Textbox(label="OpenAI API Key")
        with gr.Column():
            replit_email_in = gr.Textbox(label="Replit Email")
            replit_password_in = gr.Textbox(label="Replit Password")
    with gr.Row():
        with gr.Column():
            chatbot = gr.Chatbot(
                [], elem_id="chatbot", bubble_full_width=False, height=300
            )

            chat_input = gr.MultimodalTextbox(
                value={
                    "text": "benchmark the perplexity api's resposne time with the api key abcdef"
                },
                interactive=True,
                file_types=["image"],
                placeholder="Enter message or upload file...",
                show_label=False,
            )
        with gr.Column():
            if devon:
                editor_view = gr.Image(
                    devon.editor_image,
                    label="Editor",
                )
            else:
                editor_view = gr.Image()
    with gr.Row():
        with gr.Column():
            if devon:
                browser_view = gr.Image(
                    devon.browser_image,
                    label="Browser",
                )
            else:
                browser_view = gr.Image()
        with gr.Column():
            if devon:
                scratchpad_view = gr.Image(
                    devon.scratchpad_image,
                    label="Scratchpad",
                )
            else:
                scratchpad_view = gr.Image()

    chat_msg = chat_input.submit(
        add_message, [chatbot, chat_input], [chatbot, chat_input]
    )
    bot_msg = chat_msg.then(
        bot,
        [chatbot],
        [chatbot, editor_view, browser_view, scratchpad_view],
        api_name="bot_response",
    )
    bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])

    multion_api_key_in.change(multion_api_key_update, multion_api_key_in)
    openai_api_key_in.change(openai_api_key_update, openai_api_key_in)
    replit_email_in.change(replit_email_update, replit_email_in)
    replit_password_in.change(replit_password_update, replit_password_in)

    # chatbot.like(print_like_dislike, None, None)

if __name__ == "__main__":
    demo.queue()
    demo.launch()
</gradio-file>

<gradio-file name="agent.py" >
from openai import OpenAI
from prompts import orchestrator_prompt
from prompts import programmer_notes
from prompts import notetaker_notes
from dotenv import load_dotenv
import time
import multion
from multion.client import MultiOn
import os

load_dotenv(".env.local")

# replit_email = os.getenv("REPLIT_EMAIL")
# replit_password = os.getenv("REPLIT_PASSWORD")

# multion_api_key = os.getenv("MULTION_API_KEY")
# multion.login(use_api=True, multion_api_key=multion_api_key)

# runpod_url = os.getenv("RUNPOD_URL")

image_temp = "https://miro.medium.com/v2/resize:fit:1200/0*n-2bW82Z6m6U2bij.jpeg"


class DevOn:
    def __init__(
        self,
        editor_image,
        browser_image,
        scratchpad_image,
        multion_api_key,
        openai_api_key,
        replit_email,
        replit_password,
    ):
        print(multion_api_key, openai_api_key)
        self.editor_image = editor_image
        self.browser_image = browser_image
        self.scratchpad_image = scratchpad_image
        self.local = os.getenv("WHERE_EXECUTE") == "local"

        self.multion = MultiOn(api_key=multion_api_key)

        self.replit_email = replit_email
        self.replit_password = replit_password

        self.programmer = self.multion.sessions.create(
            url="https://replit.com/login", local=self.local, include_screenshot=True
        )
        self.programmer_logged_in = False
        # self.editor_image = self.programmer.screenshot
        self.editor_image = self.multion.sessions.screenshot(
            session_id=self.programmer.session_id
        ).screenshot
        print(self.editor_image)
        time.sleep(1)
        # print(self.programmer)

        self.researcher = self.multion.sessions.create(
            url="https://www.google.com", local=self.local, include_screenshot=True
        )
        # self.browser_image = self.researcher.screenshot
        self.browser_image = self.multion.sessions.screenshot(
            session_id=self.researcher.session_id
        ).screenshot
        time.sleep(1)

        self.notetaker = self.multion.sessions.create(
            url="https://anotepad.com/", local=self.local, include_screenshot=True
        )
        # self.scratchpad_image = self.notetaker.screenshot
        self.scratchpad_image = self.multion.sessions.screenshot(
            session_id=self.notetaker.session_id
        ).screenshot
        time.sleep(1)

        self.done = True
        self.task = ""
        self.plan = ""
        self.messages = []
        self.client = OpenAI(api_key=openai_api_key)

    def programmer_login(self):
        if self.local:
            cmd = "Create a new Python REPL."
        else:
            cmd = "Log in with the email {email} and the password {password}. Then create a new Python REPL.".format(
                email=self.replit_email, password=self.replit_password
            )
        while True:
            self.programmer = self.multion.sessions.step(
                self.programmer.session_id,
                cmd=cmd + "\n\n" + programmer_notes,
                url="https://replit.com/login",
                include_screenshot=True,
            )
            print(self.programmer)
            print(
                self.multion.sessions.screenshot(
                    session_id=self.programmer.session_id
                ).screenshot
            )
            # time.sleep(1)
            # yield ("", self.editor_image, self.browser_image, self.scratchpad_image)
            # self.editor_image = self.programmer["screenshot"]
            if self.programmer.status == "DONE":
                break

        self.editor_image = self.multion.sessions.screenshot(
            session_id=self.programmer.session_id
        ).screenshot
        time.sleep(1)

    def prepare_messages(self):
        messages = [
            {"role": "user", "content": orchestrator_prompt},
            {
                "role": "user",
                "content": "The Task given to you is: {task}".format(task=self.task),
            },
            {
                "role": "user",
                "content": "The current Plan state is: {plan}".format(plan=""),
            },
        ]
        for message in self.messages:
            messages.append(message)

        messages.append(
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "This is the current state of the Programmer Intern.",
                    },
                    {"type": "image_url", "image_url": {"url": self.editor_image}},
                ],
            }
        )
        messages.append(
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "This is the current state of the Researcher Intern.",
                    },
                    {"type": "image_url", "image_url": {"url": self.browser_image}},
                ],
            }
        )
        messages.append(
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "This is the current state of the Notetaker Intern.",
                    },
                    {"type": "image_url", "image_url": {"url": self.scratchpad_image}},
                ],
            }
        )
        return messages

    def execute_action(self, action):

        action_func = action.split(" ", 1)[0]

        if action_func == "submit":
            self.done = True
            return
        elif action_func == "update_plan":
            action_arg = action.split(" ", 1)[1]
            self.plan = action_arg
            return
        elif action_func == "programmer":
            action_arg = action.split(" ", 1)[1]
            while True:
                self.programmer = self.multion.sessions.step(
                    self.programmer.session_id,
                    cmd=action_arg + "\n\n" + programmer_notes,
                    url="https://replit.com/login",
                    include_screenshot=True,
                )
                print(self.programmer)
                if self.programmer.status == "NOT SURE":
                    self.messages.append(
                        {
                            "role": "user",
                            "content": "The Programmer says: {message}\n\nYour next reply will go to the programmer.".format(
                                message=self.programmer.message
                            ),
                        }
                    )
                    chat_completion = self.client.chat.completions.create(
                        messages=self.prepare_messages(),
                        model="gpt-4-vision-preview",
                        # max_tokens=200,
                    )
                    action_arg = chat_completion.choices[0].message.content
                    self.messages.append({"role": "assistant", "content": action_arg})
                else:
                    self.messages.append(
                        {
                            "role": "user",
                            "content": "The Programmer says: {message}".format(
                                message=self.programmer.message
                            ),
                        }
                    )
                # time.sleep(1)
                # self.editor_image = self.programmer["screenshot"]
                # yield ("", self.editor_image, self.browser_image, self.scratchpad_image)
                if self.programmer.status == "DONE":
                    break
            self.editor_image = self.multion.sessions.screenshot(
                session_id=self.programmer.session_id
            ).screenshot
            print(self.editor_image)
            time.sleep(1)
            return
        elif action_func == "researcher":
            action_arg = action.split(" ", 1)[1]
            while True:
                self.researcher = self.multion.sessions.step(
                    self.researcher.session_id,
                    cmd=action_arg,
                    url="https://www.google.com",
                    include_screenshot=True,
                )
                print(self.researcher)
                self.messages.append(
                    {
                        "role": "user",
                        "content": "The Researcher says: {message}".format(
                            message=self.researcher.message
                        ),
                    }
                )
                # time.sleep(1)
                # self.browser_image = self.researcher["screenshot"]
                # yield ("", self.editor_image, self.browser_image, self.scratchpad_image)
                if self.researcher.status == "DONE":
                    break
            self.browser_image = self.multion.sessions.screenshot(
                session_id=self.researcher.session_id
            ).screenshot
            print(self.browser_image)
            time.sleep(1)
            return
        elif action_func == "notetaker":
            action_arg = action.split(" ", 1)[1]
            while True:
                self.notetaker = self.multion.sessions.step(
                    self.notetaker.session_id,
                    cmd=action_arg + "\n\n" + notetaker_notes,
                    url="https://anotepad.com/",
                    include_screenshot=True,
                )
                print(self.notetaker)
                self.messages.append(
                    {
                        "role": "user",
                        "content": "The Notetaker says: {message}".format(
                            message=self.notetaker.message
                        ),
                    }
                )
                # time.sleep(1)
                # self.scratchpad_image = self.notetaker["screenshot"]
                # yield ("", self.editor_image, self.browser_image, self.scratchpad_image)
                if self.notetaker.status == "DONE":
                    break
            self.scratchpad_image = self.multion.sessions.screenshot(
                session_id=self.notetaker.session_id
            ).screenshot
            print(self.scratchpad_image)
            time.sleep(1)
            return
        elif action_func == "clarify":
            action_arg = action.split(" ", 1)[1]
            return

    def orchestrator(self):
        if not self.programmer_logged_in:
            self.programmer_login()
            self.programmer_logged_in = True
        messages = self.prepare_messages()
        chat_completion = self.client.chat.completions.create(
            messages=messages,
            model="gpt-4-vision-preview",
            # max_tokens=200,
        )
        response = chat_completion.choices[0].message.content
        action, explanation = response.split("Explanation: ", 1)
        action = action.split("Action: ", 1)[1]

        self.messages.append({"role": "assistant", "content": response})
        self.messages.append(
            {
                "role": "user",
                "content": "The current Plan state is: {plan}".format(plan=self.plan),
            }
        )
        print(self.messages)

        self.execute_action(action)

        # temp
        # self.done = True
        return explanation

    def run(self, prompt):
        self.done = False
        self.task = prompt
        while not self.done:
            curr_response = self.orchestrator()
            yield (
                curr_response,
                self.editor_image,
                self.browser_image,
                self.scratchpad_image,
            )
    
</gradio-file>

<gradio-file name="prompts.py">
orchestrator_prompt = """**General**

- You are DevOn, an expert Software Developer.
- You will be asked to develop a new software project from scratch. You will primarily work in Python. You will deal with large software projects spanning multiple files and user requirements.
- Your lifecycle will essentially circle around the Task, the State, your Plan, your Actions, and your Interns. Each of these are described in detail below.
- To start with, your Plan will be empty. You will receive a State (in the form of 3 images, one from each of your Interns) and a Task. You will construct a Plan outlining the steps you will need to take to complete the Task, then ask your Interns to do things in order to incrementally fulfil the steps and complete the Task.
- With each step, you will also provide an Explanation, explaining to the user what you are currently doing, so they may be able to keep track and monitor your progress. For example:
    - Explanation: I am currently updating the plan based on the current state and the Task.
    - Explanation: I am currently creating a file called utils.py which will contain utility functions.

**State**

**Interns**

- You have 3 interns who will help you with different tasks - a Programmer, a Researcher, and a Planner. Here’s some info about them:
    - Programmer: the Programmer is great at writing code given very specific instructions but isn’t a good long term planner. The Programmer works on Replit. You can ask the Programmer to write some code in certain files, make new files, etc. You can even give loose instructions like “Make a new file and write basic skeleton for an Agent class in it.” Keep in mind that the Programmer works exclusively in an online Replit IDE environment. Make sure your Plan and your Actions take this into consideration.
    - Researcher: the Researcher is very handy with a browser and great at finding out technical details, documentation, examples, miscellaneous information, etc. You can ask it to do things like “Find out how to make an LLM call using the Perplexity API”.
    - Notetaker: the Notetaker has a notepad and can note down anything you want. You will be able to see the notepad at all times. Anytime you want anything written down just to keep track of it, ask the Planner to do so.

**Plan**

- You have a persistent object to keep track of things: a Plan.
- If the plan is empty, you will create a plan using the current state of things and the given task. You will do so using the update_plan action described below.

**Actions**

- There are 6 actions that you can take at the current time step. You must always take a valid action. You will complete the task by taking actions. You are free to take as many actions as needed (even hundreds), don’t try to rush by compressing multiple actions into one. These are the available actions:
    - update_plan <plan>: Update Plan’s value to <plan>. This will replace the old value, not append to it. If there’s something from the old plan you wish to include in the updated one, make sure to include it in the <plan> you provide as an argument. Some examples how you can use this:
        - update_plan In order to carry out the task of creating a Flask web server, I will need to take the following steps:
        1) …
        2) …
        3) …
    - programmer <task>: Ask the Programmer to carry out a <task>. Some examples of how you can use this:
        - programmer Create a new Python file for utils called utils.py and write a generate_random_number() function in it that takes no parameters and returns a random number.
        - programmer Go to the model.py file and import generate_random_number() from utils.
    - researcher <task>: Ask the Researcher to carry out a <task>. The Researcher will reply to you with the information you asked for. Some examples of how you can use this:
        - researcher Find out how the OpenAI API is used.
        - researcher What is a SERP API I could use?
    - notetaker <note>: Ask the Notetaker to carry out a <task>. Some examples of how you can use this:
        - notetaker Note down the following information: MULTION_API_KEY=…
        - notetaker Note down the following information: An example Chat Completions API call looks like the following:
        from openai import OpenAI
        client = OpenAI()
        
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": "Who won the world series in 2020?"},
            {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
            {"role": "user", "content": "Where was it played?"}
            ]
        )
    - clarify <question>: Clarify something about the Task. Sometimes, there may be missing information, such as logins, api keys, or some requirements of the Task may be unclear. Use this Action to clarify things from the user by asking <question>. Use this sparingly. Try and make decisions yourself. Some examples of how you can use this:
        - clarify The Task mentions that I need to benchmark the Perplexity API. Could you provide your API Key?
    - submit: The Task is completed and you are ready to submit the output (whatever the programmer has so far). This is end the execution. Only do this when you are completely sure.

**Important Notes**

- Respond only by taking an Action (and providing the accompanying Explanation). Any response from you must be one of the above Actions. No other text in the response, just the Action and the Explanation. You will structure your output as such:
”Action: <action>\nExplanation: <explanation>”
- You do not need to ask the Programmer to log in.
- You can see all the Interns screens. If it seems like an Intern has made a mistake or encountered an error, you can tell them about it using the relevant action and ask them to correct it. This is especially important with the Programmer.
- When you ask the programmer to write some code, ask it like this: "programmer memorize the following code and write it in the editor: <code>"
- When you ask the programmer to write some code, make sure the code does not include any double quotation marks, only single quotation marks. E.g. "hello world" should instead be 'hello world'.
- Do not ask the programmer to create new files.
- When writing code, it is preferable to keep it small and simple. Don't write too much fluff.
- Remember to only use single quotation marks.
"""
# programmer_notes = """Important Notes:
# Do not refresh the page ever to check for anything. Only wait. Do not refresh.
# You are working in a terminal environment.
# You will do everything using the terminal and only the terminal.
# If you need to create a new file, do so using the touch command on the terminal.
# If you need to see files in the current directory, do so using ls.
# If you need to view a files content, do so using the cat command on the terminal.
# To enter code into a file, use a single printf command. After the printf command has been completely typed, press enter. Typing the command and pressing enter must be 2 separate steps.
# Do not open a text editor like vim or nano.
# If you need to install a new package, use pip install on the terminal.
# Do not use the same command repeatedly.
# When you write code into a file, write it once, cat it once, then stop. Do no attempt to write again unless it is wrong.
# Remember that you need to press Enter after typing a command into the terminal. Only press enter after the command has been completely typed. Typing the command pressing enter must be 2 separate steps."""

programmer_notes = """Important Notes:
You are a Programmer who works in a Replit Environment exclusively. If you need to install a package, use the Shell and not the Console.
Do not refresh the page ever to check for anything. Only wait. Do not refresh. Do not create new files. Write your code in currently open editor window itself. Do not type double quotation marks. If you are asked to type code containing them, use single quotes instead."""

notetaker_notes = """Important Notes:
Don't write anything in the Note Title field.
Whatever notes you are told to make, write them in one go, don't press enter or type multiple times, because everytime you write, it will replace the prevoius content.
You do not need to Save the note. When asked to note something down, just write it on the notepad. That is enough."""

</gradio-file>
            
        </gradio-lite>
	</body>
</html>