hey-llama-code-editor

Running on CPU Upgrade

App Files Files Community

hey-llama-code-editor / app.py

freddyaboulton HF staff

commit

c681468 about 1 month ago

raw

history blame

4.87 kB

	import gradio as gr
	from gradio_webrtc import WebRTC, ReplyOnPause, AdditionalOutputs
	import numpy as np
	import os
	from twilio.rest import Client
	import base64
	import openai
	import re
	from huggingface_hub import InferenceClient
	from pydub import AudioSegment
	import io

	from dotenv import load_dotenv
	load_dotenv()

	hf_client = InferenceClient()

	spinner_html = open("spinner.html").read()


	account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
	auth_token = os.environ.get("TWILIO_AUTH_TOKEN")

	if account_sid and auth_token:
	client = Client(account_sid, auth_token)

	token = client.tokens.create()

	rtc_configuration = {
	"iceServers": token.ice_servers,
	"iceTransportPolicy": "relay",
	}
	else:
	rtc_configuration = None

	client = openai.OpenAI(
	api_key=os.environ.get("SAMBANOVA_API_KEY"),
	base_url="https://api.sambanova.ai/v1",
	)


	system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response."
	user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"

	def extract_html_content(text):
	"""
	Extract content including HTML tags.
	"""
	match = re.search(r'<!DOCTYPE html>.*?</html>', text, re.DOTALL)
	return match.group(0) if match else None


	def audio_to_bytes(audio: tuple[int, np.ndarray]):
	audio_segment = AudioSegment(
	audio[1].squeeze().tobytes(),
	frame_rate=audio[0],
	sample_width=audio[1].dtype.itemsize,
	channels=1
	)

	# Export the audio segment to MP3 bytes - use a high bitrate to maximise quality
	mp3_io = io.BytesIO()
	audio_segment.export(mp3_io, format="mp3", bitrate="320k")

	# Get the MP3 bytes
	mp3_bytes = mp3_io.getvalue()
	mp3_io.close()
	return mp3_bytes


	def display_in_sandbox(code):
	encoded_html = base64.b64encode(code.encode('utf-8')).decode('utf-8')
	data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
	return f"<iframe src=\"{data_uri}\" width=\"100%\" height=\"600px\"></iframe>"


	def generate(user_message: tuple[int, np.ndarray],
	history: list[dict],
	code: str):
	yield AdditionalOutputs(history, spinner_html)

	text = hf_client.automatic_speech_recognition(audio_to_bytes(user_message)).text

	user_msg_formatted = user_prompt.format(user_message=text, code=code)
	history.append({"role": "user", "content": user_msg_formatted})

	response = client.chat.completions.create(
	model='Meta-Llama-3.1-70B-Instruct',
	messages=history,
	temperature = 0.1,
	top_p = 0.1
	)

	output = response.choices[0].message.content
	html_code = extract_html_content(output)
	history.append({"role": "assistant", "content": output})
	yield AdditionalOutputs(history, html_code)


	with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo:
	history = gr.State([{"role": "system", "content": system_prompt}])
	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML(
	"""
	<h1 style='text-align: center'>
	Llama Code Editor
	</h1>
	<h2 style='text-align: center'>
	Powered by SambaNova and Gradio-WebRTC ⚡️
	</h2>
	<p style='text-align: center'>
	Create and edit single-file HTML applications with just your voice!
	</p>
	<p style='text-align: center'>
	Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
	</p>
	"""
	)
	webrtc = WebRTC(rtc_configuration=rtc_configuration,
	mode="send", modality="audio")
	with gr.Column(scale=10):
	with gr.Tabs():
	with gr.Tab("Sandbox"):
	sandbox = gr.HTML(value=open("sandbox.html").read())
	with gr.Tab("Code"):
	code = gr.Code(language="html", max_lines=50, interactive=False, elem_classes="code-component")
	with gr.Tab("Chat"):
	cb = gr.Chatbot(type="messages")

	webrtc.stream(ReplyOnPause(generate),
	inputs=[webrtc, history, code],
	outputs=[webrtc], time_limit=90,
	concurrency_limit=10)
	webrtc.on_additional_outputs(lambda history, code: (history, code, history),
	outputs=[history, code, cb])
	code.change(display_in_sandbox, code, sandbox, queue=False)

	if __name__ == "__main__":
	demo.launch()