Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import gradio as gr | |
from gradio_webrtc import WebRTC, ReplyOnPause, AdditionalOutputs | |
import numpy as np | |
import os | |
from twilio.rest import Client | |
import base64 | |
import openai | |
import re | |
from huggingface_hub import InferenceClient | |
from pydub import AudioSegment | |
import io | |
from dotenv import load_dotenv | |
load_dotenv() | |
hf_client = InferenceClient() | |
spinner_html = open("spinner.html").read() | |
account_sid = os.environ.get("TWILIO_ACCOUNT_SID") | |
auth_token = os.environ.get("TWILIO_AUTH_TOKEN") | |
if account_sid and auth_token: | |
client = Client(account_sid, auth_token) | |
token = client.tokens.create() | |
rtc_configuration = { | |
"iceServers": token.ice_servers, | |
"iceTransportPolicy": "relay", | |
} | |
else: | |
rtc_configuration = None | |
client = openai.OpenAI( | |
api_key=os.environ.get("SAMBANOVA_API_KEY"), | |
base_url="https://api.sambanova.ai/v1", | |
) | |
system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response." | |
user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}" | |
def extract_html_content(text): | |
""" | |
Extract content including HTML tags. | |
""" | |
match = re.search(r'<!DOCTYPE html>.*?</html>', text, re.DOTALL) | |
return match.group(0) if match else None | |
def audio_to_bytes(audio: tuple[int, np.ndarray]): | |
audio_segment = AudioSegment( | |
audio[1].squeeze().tobytes(), | |
frame_rate=audio[0], | |
sample_width=audio[1].dtype.itemsize, | |
channels=1 | |
) | |
# Export the audio segment to MP3 bytes - use a high bitrate to maximise quality | |
mp3_io = io.BytesIO() | |
audio_segment.export(mp3_io, format="mp3", bitrate="320k") | |
# Get the MP3 bytes | |
mp3_bytes = mp3_io.getvalue() | |
mp3_io.close() | |
return mp3_bytes | |
def display_in_sandbox(code): | |
encoded_html = base64.b64encode(code.encode('utf-8')).decode('utf-8') | |
data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}" | |
return f"<iframe src=\"{data_uri}\" width=\"100%\" height=\"600px\"></iframe>" | |
def generate(user_message: tuple[int, np.ndarray], | |
history: list[dict], | |
code: str): | |
yield AdditionalOutputs(history, spinner_html) | |
text = hf_client.automatic_speech_recognition(audio_to_bytes(user_message)).text | |
user_msg_formatted = user_prompt.format(user_message=text, code=code) | |
history.append({"role": "user", "content": user_msg_formatted}) | |
response = client.chat.completions.create( | |
model='Meta-Llama-3.1-70B-Instruct', | |
messages=history, | |
temperature = 0.1, | |
top_p = 0.1 | |
) | |
output = response.choices[0].message.content | |
html_code = extract_html_content(output) | |
history.append({"role": "assistant", "content": output}) | |
yield AdditionalOutputs(history, html_code) | |
with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo: | |
history = gr.State([{"role": "system", "content": system_prompt}]) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
gr.HTML( | |
""" | |
<h1 style='text-align: center'> | |
Llama Code Editor | |
</h1> | |
<h2 style='text-align: center'> | |
Powered by SambaNova and Gradio-WebRTC ⚡️ | |
</h2> | |
<p style='text-align: center'> | |
Create and edit single-file HTML applications with just your voice! | |
</p> | |
<p style='text-align: center'> | |
Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation. | |
</p> | |
""" | |
) | |
webrtc = WebRTC(rtc_configuration=rtc_configuration, | |
mode="send", modality="audio") | |
with gr.Column(scale=10): | |
with gr.Tabs(): | |
with gr.Tab("Sandbox"): | |
sandbox = gr.HTML(value=open("sandbox.html").read()) | |
with gr.Tab("Code"): | |
code = gr.Code(language="html", max_lines=50, interactive=False, elem_classes="code-component") | |
with gr.Tab("Chat"): | |
cb = gr.Chatbot(type="messages") | |
webrtc.stream(ReplyOnPause(generate), | |
inputs=[webrtc, history, code], | |
outputs=[webrtc], time_limit=90, | |
concurrency_limit=10) | |
webrtc.on_additional_outputs(lambda history, code: (history, code, history), | |
outputs=[history, code, cb]) | |
code.change(display_in_sandbox, code, sandbox, queue=False) | |
if __name__ == "__main__": | |
demo.launch() | |