llama-3.2-3b-voice-webrtc

Running

freddyaboulton HF staff commited on Nov 1, 2024

Commit

e9633ca

1 Parent(s): 694882d

add code

Files changed (1) hide show

app.py CHANGED Viewed

@@ -49,7 +49,6 @@ def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[st
         id = str(time.time())
         full_response = ""
         asr_result = ""
-        audio_bytes_accumulated = b''
         for chunk in stream:
             if not chunk.choices:
@@ -69,10 +68,15 @@ def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[st
             if audio:
                 # Accumulate audio bytes and yield them
-                audio_bytes_accumulated += b''.join([base64.b64decode(a) for a in audio])
-                yield id, None, None, audio_bytes_accumulated
-        yield id, full_response, asr_result, audio_bytes_accumulated
     except Exception as e:
         raise gr.Error(f"Error during audio streaming: {e}")
@@ -98,8 +102,10 @@ def response(audio: tuple[int, np.ndarray], lepton_conversation: list[dict],
         if text:
             update_or_append_conversation(lepton_conversation, id, "assistant", text)
             update_or_append_conversation(gradio_conversation, id, "assistant", text)
-        yield (np.frombuffer(audio, dtype=np.int16).reshape(1, -1), ), AdditionalOutputs(lepton_conversation, gradio_conversation)
 with gr.Blocks() as demo:

         id = str(time.time())
         full_response = ""
         asr_result = ""
         for chunk in stream:
             if not chunk.choices:
             if audio:
                 # Accumulate audio bytes and yield them
+                audio_bytes_accumulated = b''.join([base64.b64decode(a) for a in audio])
+                audio = AudioSegment.from_file(io.BytesIO(audio_bytes_accumulated))
+                audio_array = np.array(audio.get_array_of_samples(), dtype=np.int16).reshape(1, -1)
+                print("audio.shape", audio_array.shape)
+                print("sampling_rate", audio.frame_rate)
+                yield id, None, None, (audio.frame_rate, audio_array)
+        yield id, full_response, asr_result, None
     except Exception as e:
         raise gr.Error(f"Error during audio streaming: {e}")
         if text:
             update_or_append_conversation(lepton_conversation, id, "assistant", text)
             update_or_append_conversation(gradio_conversation, id, "assistant", text)
+        if audio:
+            yield audio, AdditionalOutputs(lepton_conversation, gradio_conversation)
+        else:
+            yield AdditionalOutputs(lepton_conversation, gradio_conversation)
 with gr.Blocks() as demo: