Spaces:

akhaliq
/

llama-3.2-3b-voice

Running

App Files Files Community

akhaliq HF staff commited on Sep 27, 2024

Commit

7049daf

verified ·

1 Parent(s): 0443391

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -13

app.py CHANGED Viewed

@@ -55,10 +55,10 @@ def process_audio(audio: tuple, state: AppState):
     state.pause_detected = pause_detected
     if state.pause_detected:
-        # Stop recording and trigger response
-        return gr.update(recording=False), state, True
     else:
-        return None, state, False
 def update_or_append_conversation(conversation, id, role, content):
     # Find if there's an existing message with the given id
@@ -124,9 +124,11 @@ def generate_response_and_audio(audio_bytes: bytes, state: AppState):
         raise gr.Error(f"Error during audio streaming: {e}")
 def response(state: AppState):
     if state.stream is None or len(state.stream) == 0:
-        yield None, None, state
-        return
     audio_buffer = io.BytesIO()
     segment = AudioSegment(
@@ -152,6 +154,13 @@ def response(state: AppState):
     state.stream = None
     state.pause_detected = False
 def start_recording_user(state: AppState):
     if not state.stopped:
         return gr.update(recording=True)
@@ -180,7 +189,7 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
-            input_audio = gr.Audio(label="Input Audio", sources="microphone", type="numpy")
         with gr.Column():
             chatbot = gr.Chatbot(label="Conversation", type="messages")
             output_audio = gr.Audio(label="Output Audio", autoplay=True)
@@ -190,22 +199,18 @@ with gr.Blocks() as demo:
     set_key_button.click(set_api_key, inputs=[api_key_input, state], outputs=[api_key_status, state])
     format_dropdown.change(update_format, inputs=[format_dropdown, state], outputs=[state])
-    # Add a dummy output to trigger the response function
-    should_process_response = gr.Variable(False)
     stream = input_audio.stream(
         process_audio,
         [input_audio, state],
-        [input_audio, state, should_process_response],
         stream_every=0.25,  # Reduced to make it more responsive
         time_limit=60,  # Increased to allow for longer messages
     )
-    # When should_process_response is True, call response
     stream.then(
-        response,
         inputs=[state],
-        outputs=[chatbot, output_audio, state]
     )
     # Automatically restart recording after the assistant's response

     state.pause_detected = pause_detected
     if state.pause_detected:
+        # Stop recording
+        return gr.update(recording=False), state
     else:
+        return None, state
 def update_or_append_conversation(conversation, id, role, content):
     # Find if there's an existing message with the given id
         raise gr.Error(f"Error during audio streaming: {e}")
 def response(state: AppState):
+    if not state.pause_detected:
+        return gr.update(), gr.update(), state
     if state.stream is None or len(state.stream) == 0:
+        return gr.update(), gr.update(), state
     audio_buffer = io.BytesIO()
     segment = AudioSegment(
     state.stream = None
     state.pause_detected = False
+def maybe_call_response(state):
+    if state.pause_detected:
+        return response(state)
+    else:
+        # Do nothing
+        return gr.update(), gr.update(), state
 def start_recording_user(state: AppState):
     if not state.stopped:
         return gr.update(recording=True)
     with gr.Row():
         with gr.Column():
+            input_audio = gr.Audio(label="Input Audio", source="microphone", type="numpy")
         with gr.Column():
             chatbot = gr.Chatbot(label="Conversation", type="messages")
             output_audio = gr.Audio(label="Output Audio", autoplay=True)
     set_key_button.click(set_api_key, inputs=[api_key_input, state], outputs=[api_key_status, state])
     format_dropdown.change(update_format, inputs=[format_dropdown, state], outputs=[state])
     stream = input_audio.stream(
         process_audio,
         [input_audio, state],
+        [input_audio, state],
         stream_every=0.25,  # Reduced to make it more responsive
         time_limit=60,  # Increased to allow for longer messages
     )
     stream.then(
+        maybe_call_response,
         inputs=[state],
+        outputs=[chatbot, output_audio, state],
     )
     # Automatically restart recording after the assistant's response