Spaces:

lordspline
/

devon

Sleeping

App Files Files Community

devon / index.html

lordspline

Upload folder using huggingface_hub

2ab9625 verified 8 months ago

raw

history blame

25.1 kB



	<html>
	<head>
	<script type="module" crossorigin src="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.js"></script>
	<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.css" />
	</head>
	<body>
	<gradio-lite>

	<gradio-requirements>
	multion
	openai
	python-dotenv
	pyodide-http
	</gradio-requirements>

	<gradio-file name="app.py" entrypoint>
	import pyodide_http
	pyodide_http.patch_all()
	import gradio as gr
	import os
	import time
	from agent import DevOn

	image_temp = "https://miro.medium.com/v2/resize:fit:1200/0*n-2bW82Z6m6U2bij.jpeg"
	# devon = DevOn(
	# editor_image=image_temp, browser_image=image_temp, scratchpad_image=image_temp
	# )
	devon = None
	multion_api_key = ""
	openai_api_key = ""
	replit_email = ""
	replit_password = ""


	def add_message(history, message):
	for x in message["files"]:
	history.append(((x,), None))
	if message["text"] is not None:
	history.append((message["text"], None))
	return history, gr.MultimodalTextbox(value=None, interactive=False)


	def multion_api_key_update(x):
	global multion_api_key
	multion_api_key = x


	def openai_api_key_update(x):
	global openai_api_key
	openai_api_key = x


	def replit_email_update(x):
	global replit_email
	replit_email = x


	def replit_password_update(x):
	global replit_password
	replit_password = x


	def bot(history):
	devon = DevOn(
	editor_image=image_temp,
	browser_image=image_temp,
	scratchpad_image=image_temp,
	multion_api_key=multion_api_key,
	openai_api_key=openai_api_key,
	replit_email=replit_email,
	replit_password=replit_password,
	)

	for r in devon.run(history[-1][0]):
	text, editor_image, browser_image, scratchpad_image = r
	if type(text) == str:
	history.append((None, text))
	if editor_image is None:
	editor_image = devon.editor_image
	browser_image = devon.browser_image
	scratchpad_image = devon.scratchpad_image
	yield history, editor_image, browser_image, scratchpad_image


	with gr.Blocks(css="footer {visibility: hidden}") as demo:
	with gr.Row():
	with gr.Column():
	multion_api_key_in = gr.Textbox(label="MultiOn API Key")
	openai_api_key_in = gr.Textbox(label="OpenAI API Key")
	with gr.Column():
	replit_email_in = gr.Textbox(label="Replit Email")
	replit_password_in = gr.Textbox(label="Replit Password")
	with gr.Row():
	with gr.Column():
	chatbot = gr.Chatbot(
	[], elem_id="chatbot", bubble_full_width=False, height=300
	)

	chat_input = gr.MultimodalTextbox(
	value={
	"text": "benchmark the perplexity api's resposne time with the api key abcdef"
	},
	interactive=True,
	file_types=["image"],
	placeholder="Enter message or upload file...",
	show_label=False,
	)
	with gr.Column():
	if devon:
	editor_view = gr.Image(
	devon.editor_image,
	label="Editor",
	)
	else:
	editor_view = gr.Image()
	with gr.Row():
	with gr.Column():
	if devon:
	browser_view = gr.Image(
	devon.browser_image,
	label="Browser",
	)
	else:
	browser_view = gr.Image()
	with gr.Column():
	if devon:
	scratchpad_view = gr.Image(
	devon.scratchpad_image,
	label="Scratchpad",
	)
	else:
	scratchpad_view = gr.Image()

	chat_msg = chat_input.submit(
	add_message, [chatbot, chat_input], [chatbot, chat_input]
	)
	bot_msg = chat_msg.then(
	bot,
	[chatbot],
	[chatbot, editor_view, browser_view, scratchpad_view],
	api_name="bot_response",
	)
	bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, [chat_input])

	multion_api_key_in.change(multion_api_key_update, multion_api_key_in)
	openai_api_key_in.change(openai_api_key_update, openai_api_key_in)
	replit_email_in.change(replit_email_update, replit_email_in)
	replit_password_in.change(replit_password_update, replit_password_in)

	# chatbot.like(print_like_dislike, None, None)

	if __name__ == "__main__":
	demo.queue()
	demo.launch()
	</gradio-file>

	<gradio-file name="agent.py" >
	from openai import OpenAI
	from prompts import orchestrator_prompt
	from prompts import programmer_notes
	from prompts import notetaker_notes
	from dotenv import load_dotenv
	import time
	import multion
	from multion.client import MultiOn
	import os

	load_dotenv(".env.local")

	# replit_email = os.getenv("REPLIT_EMAIL")
	# replit_password = os.getenv("REPLIT_PASSWORD")

	# multion_api_key = os.getenv("MULTION_API_KEY")
	# multion.login(use_api=True, multion_api_key=multion_api_key)

	# runpod_url = os.getenv("RUNPOD_URL")

	image_temp = "https://miro.medium.com/v2/resize:fit:1200/0*n-2bW82Z6m6U2bij.jpeg"


	class DevOn:
	def __init__(
	self,
	editor_image,
	browser_image,
	scratchpad_image,
	multion_api_key,
	openai_api_key,
	replit_email,
	replit_password,
	):
	print(multion_api_key, openai_api_key)
	self.editor_image = editor_image
	self.browser_image = browser_image
	self.scratchpad_image = scratchpad_image
	self.local = os.getenv("WHERE_EXECUTE") == "local"

	self.multion = MultiOn(api_key=multion_api_key)

	self.replit_email = replit_email
	self.replit_password = replit_password

	self.programmer = self.multion.sessions.create(
	url="https://replit.com/login", local=self.local, include_screenshot=True
	)
	self.programmer_logged_in = False
	# self.editor_image = self.programmer.screenshot
	self.editor_image = self.multion.sessions.screenshot(
	session_id=self.programmer.session_id
	).screenshot
	print(self.editor_image)
	time.sleep(1)
	# print(self.programmer)

	self.researcher = self.multion.sessions.create(
	url="https://www.google.com", local=self.local, include_screenshot=True
	)
	# self.browser_image = self.researcher.screenshot
	self.browser_image = self.multion.sessions.screenshot(
	session_id=self.researcher.session_id
	).screenshot
	time.sleep(1)

	self.notetaker = self.multion.sessions.create(
	url="https://anotepad.com/", local=self.local, include_screenshot=True
	)
	# self.scratchpad_image = self.notetaker.screenshot
	self.scratchpad_image = self.multion.sessions.screenshot(
	session_id=self.notetaker.session_id
	).screenshot
	time.sleep(1)

	self.done = True
	self.task = ""
	self.plan = ""
	self.messages = []
	self.client = OpenAI(api_key=openai_api_key)

	def programmer_login(self):
	if self.local:
	cmd = "Create a new Python REPL."
	else:
	cmd = "Log in with the email {email} and the password {password}. Then create a new Python REPL.".format(
	email=self.replit_email, password=self.replit_password
	)
	while True:
	self.programmer = self.multion.sessions.step(
	self.programmer.session_id,
	cmd=cmd + "\n\n" + programmer_notes,
	url="https://replit.com/login",
	include_screenshot=True,
	)
	print(self.programmer)
	print(
	self.multion.sessions.screenshot(
	session_id=self.programmer.session_id
	).screenshot
	)
	# time.sleep(1)
	# yield ("", self.editor_image, self.browser_image, self.scratchpad_image)
	# self.editor_image = self.programmer["screenshot"]
	if self.programmer.status == "DONE":
	break

	self.editor_image = self.multion.sessions.screenshot(
	session_id=self.programmer.session_id
	).screenshot
	time.sleep(1)

	def prepare_messages(self):
	messages = [
	{"role": "user", "content": orchestrator_prompt},
	{
	"role": "user",
	"content": "The Task given to you is: {task}".format(task=self.task),
	},
	{
	"role": "user",
	"content": "The current Plan state is: {plan}".format(plan=""),
	},
	]
	for message in self.messages:
	messages.append(message)

	messages.append(
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": "This is the current state of the Programmer Intern.",
	},
	{"type": "image_url", "image_url": {"url": self.editor_image}},
	],
	}
	)
	messages.append(
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": "This is the current state of the Researcher Intern.",
	},
	{"type": "image_url", "image_url": {"url": self.browser_image}},
	],
	}
	)
	messages.append(
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": "This is the current state of the Notetaker Intern.",
	},
	{"type": "image_url", "image_url": {"url": self.scratchpad_image}},
	],
	}
	)
	return messages

	def execute_action(self, action):

	action_func = action.split(" ", 1)[0]

	if action_func == "submit":
	self.done = True
	return
	elif action_func == "update_plan":
	action_arg = action.split(" ", 1)[1]
	self.plan = action_arg
	return
	elif action_func == "programmer":
	action_arg = action.split(" ", 1)[1]
	while True:
	self.programmer = self.multion.sessions.step(
	self.programmer.session_id,
	cmd=action_arg + "\n\n" + programmer_notes,
	url="https://replit.com/login",
	include_screenshot=True,
	)
	print(self.programmer)
	if self.programmer.status == "NOT SURE":
	self.messages.append(
	{
	"role": "user",
	"content": "The Programmer says: {message}\n\nYour next reply will go to the programmer.".format(
	message=self.programmer.message
	),
	}
	)
	chat_completion = self.client.chat.completions.create(
	messages=self.prepare_messages(),
	model="gpt-4-vision-preview",
	# max_tokens=200,
	)
	action_arg = chat_completion.choices[0].message.content
	self.messages.append({"role": "assistant", "content": action_arg})
	else:
	self.messages.append(
	{
	"role": "user",
	"content": "The Programmer says: {message}".format(
	message=self.programmer.message
	),
	}
	)
	# time.sleep(1)
	# self.editor_image = self.programmer["screenshot"]
	# yield ("", self.editor_image, self.browser_image, self.scratchpad_image)
	if self.programmer.status == "DONE":
	break
	self.editor_image = self.multion.sessions.screenshot(
	session_id=self.programmer.session_id
	).screenshot
	print(self.editor_image)
	time.sleep(1)
	return
	elif action_func == "researcher":
	action_arg = action.split(" ", 1)[1]
	while True:
	self.researcher = self.multion.sessions.step(
	self.researcher.session_id,
	cmd=action_arg,
	url="https://www.google.com",
	include_screenshot=True,
	)
	print(self.researcher)
	self.messages.append(
	{
	"role": "user",
	"content": "The Researcher says: {message}".format(
	message=self.researcher.message
	),
	}
	)
	# time.sleep(1)
	# self.browser_image = self.researcher["screenshot"]
	# yield ("", self.editor_image, self.browser_image, self.scratchpad_image)
	if self.researcher.status == "DONE":
	break
	self.browser_image = self.multion.sessions.screenshot(
	session_id=self.researcher.session_id
	).screenshot
	print(self.browser_image)
	time.sleep(1)
	return
	elif action_func == "notetaker":
	action_arg = action.split(" ", 1)[1]
	while True:
	self.notetaker = self.multion.sessions.step(
	self.notetaker.session_id,
	cmd=action_arg + "\n\n" + notetaker_notes,
	url="https://anotepad.com/",
	include_screenshot=True,
	)
	print(self.notetaker)
	self.messages.append(
	{
	"role": "user",
	"content": "The Notetaker says: {message}".format(
	message=self.notetaker.message
	),
	}
	)
	# time.sleep(1)
	# self.scratchpad_image = self.notetaker["screenshot"]
	# yield ("", self.editor_image, self.browser_image, self.scratchpad_image)
	if self.notetaker.status == "DONE":
	break
	self.scratchpad_image = self.multion.sessions.screenshot(
	session_id=self.notetaker.session_id
	).screenshot
	print(self.scratchpad_image)
	time.sleep(1)
	return
	elif action_func == "clarify":
	action_arg = action.split(" ", 1)[1]
	return

	def orchestrator(self):
	if not self.programmer_logged_in:
	self.programmer_login()
	self.programmer_logged_in = True
	messages = self.prepare_messages()
	chat_completion = self.client.chat.completions.create(
	messages=messages,
	model="gpt-4-vision-preview",
	# max_tokens=200,
	)
	response = chat_completion.choices[0].message.content
	action, explanation = response.split("Explanation: ", 1)
	action = action.split("Action: ", 1)[1]

	self.messages.append({"role": "assistant", "content": response})
	self.messages.append(
	{
	"role": "user",
	"content": "The current Plan state is: {plan}".format(plan=self.plan),
	}
	)
	print(self.messages)

	self.execute_action(action)

	# temp
	# self.done = True
	return explanation

	def run(self, prompt):
	self.done = False
	self.task = prompt
	while not self.done:
	curr_response = self.orchestrator()
	yield (
	curr_response,
	self.editor_image,
	self.browser_image,
	self.scratchpad_image,
	)

	</gradio-file>

	<gradio-file name="prompts.py">
	orchestrator_prompt = """General

	- You are DevOn, an expert Software Developer.
	- You will be asked to develop a new software project from scratch. You will primarily work in Python. You will deal with large software projects spanning multiple files and user requirements.
	- Your lifecycle will essentially circle around the Task, the State, your Plan, your Actions, and your Interns. Each of these are described in detail below.
	- To start with, your Plan will be empty. You will receive a State (in the form of 3 images, one from each of your Interns) and a Task. You will construct a Plan outlining the steps you will need to take to complete the Task, then ask your Interns to do things in order to incrementally fulfil the steps and complete the Task.
	- With each step, you will also provide an Explanation, explaining to the user what you are currently doing, so they may be able to keep track and monitor your progress. For example:
	- Explanation: I am currently updating the plan based on the current state and the Task.
	- Explanation: I am currently creating a file called utils.py which will contain utility functions.

	State

	Interns

	- You have 3 interns who will help you with different tasks - a Programmer, a Researcher, and a Planner. Here’s some info about them:
	- Programmer: the Programmer is great at writing code given very specific instructions but isn’t a good long term planner. The Programmer works on Replit. You can ask the Programmer to write some code in certain files, make new files, etc. You can even give loose instructions like “Make a new file and write basic skeleton for an Agent class in it.” Keep in mind that the Programmer works exclusively in an online Replit IDE environment. Make sure your Plan and your Actions take this into consideration.
	- Researcher: the Researcher is very handy with a browser and great at finding out technical details, documentation, examples, miscellaneous information, etc. You can ask it to do things like “Find out how to make an LLM call using the Perplexity API”.
	- Notetaker: the Notetaker has a notepad and can note down anything you want. You will be able to see the notepad at all times. Anytime you want anything written down just to keep track of it, ask the Planner to do so.

	Plan

	- You have a persistent object to keep track of things: a Plan.
	- If the plan is empty, you will create a plan using the current state of things and the given task. You will do so using the update_plan action described below.

	Actions

	- There are 6 actions that you can take at the current time step. You must always take a valid action. You will complete the task by taking actions. You are free to take as many actions as needed (even hundreds), don’t try to rush by compressing multiple actions into one. These are the available actions:
	- update_plan <plan>: Update Plan’s value to <plan>. This will replace the old value, not append to it. If there’s something from the old plan you wish to include in the updated one, make sure to include it in the <plan> you provide as an argument. Some examples how you can use this:
	- update_plan In order to carry out the task of creating a Flask web server, I will need to take the following steps:
	1) …
	2) …
	3) …
	- programmer <task>: Ask the Programmer to carry out a <task>. Some examples of how you can use this:
	- programmer Create a new Python file for utils called utils.py and write a generate_random_number() function in it that takes no parameters and returns a random number.
	- programmer Go to the model.py file and import generate_random_number() from utils.
	- researcher <task>: Ask the Researcher to carry out a <task>. The Researcher will reply to you with the information you asked for. Some examples of how you can use this:
	- researcher Find out how the OpenAI API is used.
	- researcher What is a SERP API I could use?
	- notetaker <note>: Ask the Notetaker to carry out a <task>. Some examples of how you can use this:
	- notetaker Note down the following information: MULTION_API_KEY=…
	- notetaker Note down the following information: An example Chat Completions API call looks like the following:
	from openai import OpenAI
	client = OpenAI()

	response = client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": "Who won the world series in 2020?"},
	{"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."},
	{"role": "user", "content": "Where was it played?"}
	]
	)
	- clarify <question>: Clarify something about the Task. Sometimes, there may be missing information, such as logins, api keys, or some requirements of the Task may be unclear. Use this Action to clarify things from the user by asking <question>. Use this sparingly. Try and make decisions yourself. Some examples of how you can use this:
	- clarify The Task mentions that I need to benchmark the Perplexity API. Could you provide your API Key?
	- submit: The Task is completed and you are ready to submit the output (whatever the programmer has so far). This is end the execution. Only do this when you are completely sure.

	Important Notes

	- Respond only by taking an Action (and providing the accompanying Explanation). Any response from you must be one of the above Actions. No other text in the response, just the Action and the Explanation. You will structure your output as such:
	”Action: <action>\nExplanation: <explanation>”
	- You do not need to ask the Programmer to log in.
	- You can see all the Interns screens. If it seems like an Intern has made a mistake or encountered an error, you can tell them about it using the relevant action and ask them to correct it. This is especially important with the Programmer.
	- When you ask the programmer to write some code, ask it like this: "programmer memorize the following code and write it in the editor: <code>"
	- When you ask the programmer to write some code, make sure the code does not include any double quotation marks, only single quotation marks. E.g. "hello world" should instead be 'hello world'.
	- Do not ask the programmer to create new files.
	- When writing code, it is preferable to keep it small and simple. Don't write too much fluff.
	- Remember to only use single quotation marks.
	"""
	# programmer_notes = """Important Notes:
	# Do not refresh the page ever to check for anything. Only wait. Do not refresh.
	# You are working in a terminal environment.
	# You will do everything using the terminal and only the terminal.
	# If you need to create a new file, do so using the touch command on the terminal.
	# If you need to see files in the current directory, do so using ls.
	# If you need to view a files content, do so using the cat command on the terminal.
	# To enter code into a file, use a single printf command. After the printf command has been completely typed, press enter. Typing the command and pressing enter must be 2 separate steps.
	# Do not open a text editor like vim or nano.
	# If you need to install a new package, use pip install on the terminal.
	# Do not use the same command repeatedly.
	# When you write code into a file, write it once, cat it once, then stop. Do no attempt to write again unless it is wrong.
	# Remember that you need to press Enter after typing a command into the terminal. Only press enter after the command has been completely typed. Typing the command pressing enter must be 2 separate steps."""

	programmer_notes = """Important Notes:
	You are a Programmer who works in a Replit Environment exclusively. If you need to install a package, use the Shell and not the Console.
	Do not refresh the page ever to check for anything. Only wait. Do not refresh. Do not create new files. Write your code in currently open editor window itself. Do not type double quotation marks. If you are asked to type code containing them, use single quotes instead."""

	notetaker_notes = """Important Notes:
	Don't write anything in the Note Title field.
	Whatever notes you are told to make, write them in one go, don't press enter or type multiple times, because everytime you write, it will replace the prevoius content.
	You do not need to Save the note. When asked to note something down, just write it on the notepad. That is enough."""

	</gradio-file>

	</gradio-lite>
	</body>
	</html>