File size: 4,870 Bytes
5197529
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6242ad8
5197529
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c681468
 
5197529
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import gradio as gr
from gradio_webrtc import WebRTC, ReplyOnPause, AdditionalOutputs
import numpy as np
import os
from twilio.rest import Client
import base64
import openai
import re
from huggingface_hub import InferenceClient
from pydub import AudioSegment
import io

from dotenv import load_dotenv
load_dotenv()

hf_client = InferenceClient()

spinner_html = open("spinner.html").read()


account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
auth_token = os.environ.get("TWILIO_AUTH_TOKEN")

if account_sid and auth_token:
    client = Client(account_sid, auth_token)

    token = client.tokens.create()

    rtc_configuration = {
        "iceServers": token.ice_servers,
        "iceTransportPolicy": "relay",
    }
else:
    rtc_configuration = None

client = openai.OpenAI(
    api_key=os.environ.get("SAMBANOVA_API_KEY"),
    base_url="https://api.sambanova.ai/v1",
)


system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response."
user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"

def extract_html_content(text):
    """
    Extract content including HTML tags.
    """
    match = re.search(r'<!DOCTYPE html>.*?</html>', text, re.DOTALL)
    return match.group(0) if match else None


def audio_to_bytes(audio: tuple[int, np.ndarray]):
    audio_segment = AudioSegment(
        audio[1].squeeze().tobytes(),
        frame_rate=audio[0],
        sample_width=audio[1].dtype.itemsize,
        channels=1
    )

    # Export the audio segment to MP3 bytes - use a high bitrate to maximise quality
    mp3_io = io.BytesIO()
    audio_segment.export(mp3_io, format="mp3", bitrate="320k")

    # Get the MP3 bytes
    mp3_bytes = mp3_io.getvalue()
    mp3_io.close()
    return mp3_bytes


def display_in_sandbox(code):
    encoded_html = base64.b64encode(code.encode('utf-8')).decode('utf-8')
    data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
    return f"<iframe src=\"{data_uri}\" width=\"100%\" height=\"600px\"></iframe>"


def generate(user_message: tuple[int, np.ndarray],
             history: list[dict],
             code: str):
    yield AdditionalOutputs(history, spinner_html)

    text = hf_client.automatic_speech_recognition(audio_to_bytes(user_message)).text

    user_msg_formatted = user_prompt.format(user_message=text, code=code)
    history.append({"role": "user", "content": user_msg_formatted})
    
    response = client.chat.completions.create(
        model='Meta-Llama-3.1-70B-Instruct',
        messages=history,
        temperature =  0.1,
        top_p = 0.1
    )
    
    output = response.choices[0].message.content
    html_code = extract_html_content(output)
    history.append({"role": "assistant", "content": output})
    yield AdditionalOutputs(history, html_code)


with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo:
    history = gr.State([{"role": "system", "content": system_prompt}])
    with gr.Row():
        with gr.Column(scale=1):
            gr.HTML(
                """
                <h1 style='text-align: center'>
                Llama Code Editor
                </h1>
                <h2 style='text-align: center'>
                Powered by SambaNova and Gradio-WebRTC ⚡️
                </h2>
                <p style='text-align: center'>
                Create and edit single-file HTML applications with just your voice!
                </p>
                <p style='text-align: center'>
                Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
                </p>
                """
        )
            webrtc = WebRTC(rtc_configuration=rtc_configuration,
                        mode="send", modality="audio")
        with gr.Column(scale=10):
            with gr.Tabs():
                with gr.Tab("Sandbox"):
                    sandbox = gr.HTML(value=open("sandbox.html").read())
                with gr.Tab("Code"):
                    code = gr.Code(language="html", max_lines=50, interactive=False, elem_classes="code-component")
                with gr.Tab("Chat"):
                    cb = gr.Chatbot(type="messages")
            
    webrtc.stream(ReplyOnPause(generate),
                  inputs=[webrtc, history, code],
                  outputs=[webrtc], time_limit=90,
                  concurrency_limit=10)
    webrtc.on_additional_outputs(lambda history, code: (history, code, history),
                                 outputs=[history, code, cb])
    code.change(display_in_sandbox, code, sandbox, queue=False)

if __name__ == "__main__":
    demo.launch()