Commit
•
e9633ca
1
Parent(s):
694882d
add code
Browse files
app.py
CHANGED
@@ -49,7 +49,6 @@ def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[st
|
|
49 |
id = str(time.time())
|
50 |
full_response = ""
|
51 |
asr_result = ""
|
52 |
-
audio_bytes_accumulated = b''
|
53 |
|
54 |
for chunk in stream:
|
55 |
if not chunk.choices:
|
@@ -69,10 +68,15 @@ def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[st
|
|
69 |
|
70 |
if audio:
|
71 |
# Accumulate audio bytes and yield them
|
72 |
-
audio_bytes_accumulated
|
73 |
-
|
|
|
|
|
|
|
74 |
|
75 |
-
|
|
|
|
|
76 |
|
77 |
except Exception as e:
|
78 |
raise gr.Error(f"Error during audio streaming: {e}")
|
@@ -98,8 +102,10 @@ def response(audio: tuple[int, np.ndarray], lepton_conversation: list[dict],
|
|
98 |
if text:
|
99 |
update_or_append_conversation(lepton_conversation, id, "assistant", text)
|
100 |
update_or_append_conversation(gradio_conversation, id, "assistant", text)
|
101 |
-
|
102 |
-
|
|
|
|
|
103 |
|
104 |
|
105 |
with gr.Blocks() as demo:
|
|
|
49 |
id = str(time.time())
|
50 |
full_response = ""
|
51 |
asr_result = ""
|
|
|
52 |
|
53 |
for chunk in stream:
|
54 |
if not chunk.choices:
|
|
|
68 |
|
69 |
if audio:
|
70 |
# Accumulate audio bytes and yield them
|
71 |
+
audio_bytes_accumulated = b''.join([base64.b64decode(a) for a in audio])
|
72 |
+
audio = AudioSegment.from_file(io.BytesIO(audio_bytes_accumulated))
|
73 |
+
audio_array = np.array(audio.get_array_of_samples(), dtype=np.int16).reshape(1, -1)
|
74 |
+
print("audio.shape", audio_array.shape)
|
75 |
+
print("sampling_rate", audio.frame_rate)
|
76 |
|
77 |
+
yield id, None, None, (audio.frame_rate, audio_array)
|
78 |
+
|
79 |
+
yield id, full_response, asr_result, None
|
80 |
|
81 |
except Exception as e:
|
82 |
raise gr.Error(f"Error during audio streaming: {e}")
|
|
|
102 |
if text:
|
103 |
update_or_append_conversation(lepton_conversation, id, "assistant", text)
|
104 |
update_or_append_conversation(gradio_conversation, id, "assistant", text)
|
105 |
+
if audio:
|
106 |
+
yield audio, AdditionalOutputs(lepton_conversation, gradio_conversation)
|
107 |
+
else:
|
108 |
+
yield AdditionalOutputs(lepton_conversation, gradio_conversation)
|
109 |
|
110 |
|
111 |
with gr.Blocks() as demo:
|