Commit
·
1ee1757
1
Parent(s):
bcdc799
code
Browse files- README.md +2 -2
- app.py +48 -37
- requirements.txt +1 -1
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
-
title: Llama 3.2 3b
|
3 |
-
emoji:
|
4 |
colorFrom: green
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
|
|
1 |
---
|
2 |
+
title: Llama 3.2 3b WebRTC
|
3 |
+
emoji: ⚡️
|
4 |
colorFrom: green
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
app.py
CHANGED
@@ -25,12 +25,6 @@ else:
|
|
25 |
rtc_configuration = None
|
26 |
|
27 |
|
28 |
-
def create_client(api_key):
|
29 |
-
return openai.OpenAI(
|
30 |
-
base_url="https://llama3-1-8b.lepton.run/api/v1/",
|
31 |
-
api_key=api_key
|
32 |
-
)
|
33 |
-
|
34 |
|
35 |
def update_or_append_conversation(conversation, id, role, content):
|
36 |
# Find if there's an existing message with the given id
|
@@ -43,11 +37,12 @@ def update_or_append_conversation(conversation, id, role, content):
|
|
43 |
|
44 |
|
45 |
def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[dict],
|
46 |
-
client: openai.OpenAI
|
47 |
if client is None:
|
48 |
raise gr.Error("Please enter a valid API key first.")
|
49 |
|
50 |
-
|
|
|
51 |
audio_data = base64.b64encode(audio_bytes).decode()
|
52 |
|
53 |
try:
|
@@ -55,7 +50,7 @@ def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[di
|
|
55 |
extra_body={
|
56 |
"require_audio": True,
|
57 |
"tts_preset_id": "jessica",
|
58 |
-
"tts_audio_format":
|
59 |
"tts_audio_bitrate": bitrate
|
60 |
},
|
61 |
model="llama3.1-8b",
|
@@ -68,40 +63,48 @@ def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[di
|
|
68 |
id = str(time.time())
|
69 |
full_response = ""
|
70 |
asr_result = ""
|
|
|
71 |
|
72 |
-
for chunk in stream:
|
73 |
if not chunk.choices:
|
74 |
continue
|
75 |
delta = chunk.choices[0].delta
|
76 |
-
content = delta.
|
77 |
audio = getattr(chunk.choices[0], "audio", [])
|
78 |
asr_results = getattr(chunk.choices[0], "asr_results", [])
|
79 |
|
80 |
if asr_results:
|
|
|
81 |
asr_result += "".join(asr_results)
|
82 |
yield id, None, asr_result, None
|
83 |
|
84 |
if content:
|
|
|
85 |
full_response += content
|
86 |
yield id, full_response, None, None
|
87 |
|
88 |
if audio:
|
|
|
89 |
# Accumulate audio bytes and yield them
|
90 |
audio_bytes_accumulated = b''.join([base64.b64decode(a) for a in audio])
|
91 |
-
|
|
|
92 |
audio_array = np.array(audio.get_array_of_samples(), dtype=np.int16).reshape(1, -1)
|
93 |
-
print("audio.
|
94 |
-
print("sampling_rate", audio.frame_rate)
|
95 |
|
96 |
yield id, None, None, (audio.frame_rate, audio_array)
|
|
|
|
|
|
|
|
|
97 |
|
98 |
yield id, full_response, asr_result, None
|
99 |
-
|
100 |
except Exception as e:
|
101 |
raise gr.Error(f"Error during audio streaming: {e}")
|
102 |
|
103 |
def response(audio: tuple[int, np.ndarray], lepton_conversation: list[dict],
|
104 |
-
gradio_conversation: list[dict], client: openai.OpenAI
|
105 |
|
106 |
audio_buffer = io.BytesIO()
|
107 |
segment = AudioSegment(
|
@@ -110,55 +113,63 @@ def response(audio: tuple[int, np.ndarray], lepton_conversation: list[dict],
|
|
110 |
sample_width=audio[1].dtype.itemsize,
|
111 |
channels=1,
|
112 |
)
|
113 |
-
segment.export(audio_buffer, format="
|
114 |
|
115 |
-
generator = generate_response_and_audio(audio_buffer.getvalue(), lepton_conversation, client
|
116 |
|
117 |
for id, text, asr, audio in generator:
|
118 |
if asr:
|
119 |
update_or_append_conversation(lepton_conversation, id, "user", asr)
|
120 |
update_or_append_conversation(gradio_conversation, id, "user", asr)
|
|
|
121 |
if text:
|
122 |
update_or_append_conversation(lepton_conversation, id, "assistant", text)
|
123 |
update_or_append_conversation(gradio_conversation, id, "assistant", text)
|
|
|
124 |
if audio:
|
125 |
-
yield audio
|
126 |
else:
|
127 |
yield AdditionalOutputs(lepton_conversation, gradio_conversation)
|
128 |
|
129 |
|
130 |
-
def set_api_key(
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
|
|
|
|
|
|
|
|
136 |
|
137 |
|
138 |
with gr.Blocks() as demo:
|
139 |
with gr.Group():
|
140 |
with gr.Row():
|
141 |
chatbot = gr.Chatbot(label="Conversation", type="messages")
|
142 |
-
with gr.Row(
|
143 |
-
|
144 |
-
format_dropdown = gr.Dropdown(choices=["mp3", "opus"], value="mp3", label="Output Audio Format")
|
145 |
-
api_key_input = gr.Textbox(type="password", label="Enter your Lepton API Key")
|
146 |
-
set_key_button = gr.Button("Set API Key", variant="primary")
|
147 |
-
with gr.Column(scale=3):
|
148 |
-
audio = WebRTC(modality="audio", mode="send-receive",
|
149 |
label="Audio Stream",
|
150 |
rtc_configuration=rtc_configuration)
|
|
|
|
|
|
|
|
|
151 |
|
152 |
client_state = gr.State(None)
|
153 |
-
lepton_conversation = gr.State([
|
154 |
-
|
155 |
-
|
156 |
|
|
|
|
|
157 |
audio.stream(
|
158 |
ReplyOnPause(response),
|
159 |
-
inputs=[audio, lepton_conversation, chatbot, client_state
|
160 |
outputs=[audio]
|
161 |
)
|
162 |
-
audio.on_additional_outputs(lambda l, g: (l, g), outputs=[lepton_conversation, chatbot]
|
|
|
163 |
|
164 |
demo.launch()
|
|
|
25 |
rtc_configuration = None
|
26 |
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
def update_or_append_conversation(conversation, id, role, content):
|
30 |
# Find if there's an existing message with the given id
|
|
|
37 |
|
38 |
|
39 |
def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[dict],
|
40 |
+
client: openai.OpenAI):
|
41 |
if client is None:
|
42 |
raise gr.Error("Please enter a valid API key first.")
|
43 |
|
44 |
+
# mp3 bitrate
|
45 |
+
bitrate = 128
|
46 |
audio_data = base64.b64encode(audio_bytes).decode()
|
47 |
|
48 |
try:
|
|
|
50 |
extra_body={
|
51 |
"require_audio": True,
|
52 |
"tts_preset_id": "jessica",
|
53 |
+
"tts_audio_format": "mp3",
|
54 |
"tts_audio_bitrate": bitrate
|
55 |
},
|
56 |
model="llama3.1-8b",
|
|
|
63 |
id = str(time.time())
|
64 |
full_response = ""
|
65 |
asr_result = ""
|
66 |
+
all_audio = b""
|
67 |
|
68 |
+
for i, chunk in enumerate(stream):
|
69 |
if not chunk.choices:
|
70 |
continue
|
71 |
delta = chunk.choices[0].delta
|
72 |
+
content = delta.content
|
73 |
audio = getattr(chunk.choices[0], "audio", [])
|
74 |
asr_results = getattr(chunk.choices[0], "asr_results", [])
|
75 |
|
76 |
if asr_results:
|
77 |
+
print(i, "asr_results")
|
78 |
asr_result += "".join(asr_results)
|
79 |
yield id, None, asr_result, None
|
80 |
|
81 |
if content:
|
82 |
+
print(i, "content")
|
83 |
full_response += content
|
84 |
yield id, full_response, None, None
|
85 |
|
86 |
if audio:
|
87 |
+
print(i, "audio")
|
88 |
# Accumulate audio bytes and yield them
|
89 |
audio_bytes_accumulated = b''.join([base64.b64decode(a) for a in audio])
|
90 |
+
all_audio += audio_bytes_accumulated
|
91 |
+
audio = AudioSegment.from_file(io.BytesIO(audio_bytes_accumulated), format="mp3")
|
92 |
audio_array = np.array(audio.get_array_of_samples(), dtype=np.int16).reshape(1, -1)
|
93 |
+
print("audio.frame_rate", audio.frame_rate)
|
|
|
94 |
|
95 |
yield id, None, None, (audio.frame_rate, audio_array)
|
96 |
+
|
97 |
+
if all_audio:
|
98 |
+
all_audio = AudioSegment.from_file(io.BytesIO(all_audio), format="mp3")
|
99 |
+
all_audio.export("all_audio.mp3", format="mp3")
|
100 |
|
101 |
yield id, full_response, asr_result, None
|
102 |
+
print("finishing loop")
|
103 |
except Exception as e:
|
104 |
raise gr.Error(f"Error during audio streaming: {e}")
|
105 |
|
106 |
def response(audio: tuple[int, np.ndarray], lepton_conversation: list[dict],
|
107 |
+
gradio_conversation: list[dict], client: openai.OpenAI):
|
108 |
|
109 |
audio_buffer = io.BytesIO()
|
110 |
segment = AudioSegment(
|
|
|
113 |
sample_width=audio[1].dtype.itemsize,
|
114 |
channels=1,
|
115 |
)
|
116 |
+
segment.export(audio_buffer, format="mp3")
|
117 |
|
118 |
+
generator = generate_response_and_audio(audio_buffer.getvalue(), lepton_conversation, client)
|
119 |
|
120 |
for id, text, asr, audio in generator:
|
121 |
if asr:
|
122 |
update_or_append_conversation(lepton_conversation, id, "user", asr)
|
123 |
update_or_append_conversation(gradio_conversation, id, "user", asr)
|
124 |
+
yield AdditionalOutputs(lepton_conversation, gradio_conversation)
|
125 |
if text:
|
126 |
update_or_append_conversation(lepton_conversation, id, "assistant", text)
|
127 |
update_or_append_conversation(gradio_conversation, id, "assistant", text)
|
128 |
+
yield AdditionalOutputs(lepton_conversation, gradio_conversation)
|
129 |
if audio:
|
130 |
+
yield audio
|
131 |
else:
|
132 |
yield AdditionalOutputs(lepton_conversation, gradio_conversation)
|
133 |
|
134 |
|
135 |
+
def set_api_key(lepton_api_key):
|
136 |
+
try:
|
137 |
+
client = openai.OpenAI(
|
138 |
+
base_url="https://llama3-1-8b.lepton.run/api/v1/",
|
139 |
+
api_key=lepton_api_key
|
140 |
+
)
|
141 |
+
except:
|
142 |
+
raise gr.Error("Invalid API keys. Please try again.")
|
143 |
+
gr.Info("Successfully set API keys.", duration=3)
|
144 |
+
return client, gr.update(visible=True), gr.update(visible=False)
|
145 |
|
146 |
|
147 |
with gr.Blocks() as demo:
|
148 |
with gr.Group():
|
149 |
with gr.Row():
|
150 |
chatbot = gr.Chatbot(label="Conversation", type="messages")
|
151 |
+
with gr.Row(visible=False) as mic_row:
|
152 |
+
audio = WebRTC(modality="audio", mode="send-receive",
|
|
|
|
|
|
|
|
|
|
|
153 |
label="Audio Stream",
|
154 |
rtc_configuration=rtc_configuration)
|
155 |
+
with gr.Row(equal_height=True) as api_row:
|
156 |
+
api_key_input = gr.Textbox(type="password", value=os.getenv("LEPTONAI_API_KEY"),
|
157 |
+
label="Enter Your Lepton AI Key")
|
158 |
+
|
159 |
|
160 |
client_state = gr.State(None)
|
161 |
+
lepton_conversation = gr.State([{"role": "system",
|
162 |
+
"content": "You are a knowledgeable assistant who will engage in spoken conversations with users. "
|
163 |
+
"Keep your answers short and natural as they will be read aloud."}])
|
164 |
|
165 |
+
api_key_input.submit(set_api_key, inputs=[api_key_input],
|
166 |
+
outputs=[client_state, mic_row, api_row])
|
167 |
audio.stream(
|
168 |
ReplyOnPause(response),
|
169 |
+
inputs=[audio, lepton_conversation, chatbot, client_state],
|
170 |
outputs=[audio]
|
171 |
)
|
172 |
+
audio.on_additional_outputs(lambda l, g: (l, g), outputs=[lepton_conversation, chatbot],
|
173 |
+
queue=False, show_progress="hidden")
|
174 |
|
175 |
demo.launch()
|
requirements.txt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
-
gradio_webrtc[vad]==0.0.
|
2 |
openai
|
3 |
twilio
|
|
|
1 |
+
gradio_webrtc[vad]==0.0.12
|
2 |
openai
|
3 |
twilio
|