Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -55,10 +55,10 @@ def process_audio(audio: tuple, state: AppState):
|
|
55 |
state.pause_detected = pause_detected
|
56 |
|
57 |
if state.pause_detected:
|
58 |
-
# Stop recording
|
59 |
-
return gr.update(recording=False), state
|
60 |
else:
|
61 |
-
return None, state
|
62 |
|
63 |
def update_or_append_conversation(conversation, id, role, content):
|
64 |
# Find if there's an existing message with the given id
|
@@ -124,9 +124,11 @@ def generate_response_and_audio(audio_bytes: bytes, state: AppState):
|
|
124 |
raise gr.Error(f"Error during audio streaming: {e}")
|
125 |
|
126 |
def response(state: AppState):
|
|
|
|
|
|
|
127 |
if state.stream is None or len(state.stream) == 0:
|
128 |
-
|
129 |
-
return
|
130 |
|
131 |
audio_buffer = io.BytesIO()
|
132 |
segment = AudioSegment(
|
@@ -152,6 +154,13 @@ def response(state: AppState):
|
|
152 |
state.stream = None
|
153 |
state.pause_detected = False
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
def start_recording_user(state: AppState):
|
156 |
if not state.stopped:
|
157 |
return gr.update(recording=True)
|
@@ -180,7 +189,7 @@ with gr.Blocks() as demo:
|
|
180 |
|
181 |
with gr.Row():
|
182 |
with gr.Column():
|
183 |
-
input_audio = gr.Audio(label="Input Audio",
|
184 |
with gr.Column():
|
185 |
chatbot = gr.Chatbot(label="Conversation", type="messages")
|
186 |
output_audio = gr.Audio(label="Output Audio", autoplay=True)
|
@@ -190,22 +199,18 @@ with gr.Blocks() as demo:
|
|
190 |
set_key_button.click(set_api_key, inputs=[api_key_input, state], outputs=[api_key_status, state])
|
191 |
format_dropdown.change(update_format, inputs=[format_dropdown, state], outputs=[state])
|
192 |
|
193 |
-
# Add a dummy output to trigger the response function
|
194 |
-
should_process_response = gr.Variable(False)
|
195 |
-
|
196 |
stream = input_audio.stream(
|
197 |
process_audio,
|
198 |
[input_audio, state],
|
199 |
-
[input_audio, state
|
200 |
stream_every=0.25, # Reduced to make it more responsive
|
201 |
time_limit=60, # Increased to allow for longer messages
|
202 |
)
|
203 |
|
204 |
-
# When should_process_response is True, call response
|
205 |
stream.then(
|
206 |
-
|
207 |
inputs=[state],
|
208 |
-
outputs=[chatbot, output_audio, state]
|
209 |
)
|
210 |
|
211 |
# Automatically restart recording after the assistant's response
|
|
|
55 |
state.pause_detected = pause_detected
|
56 |
|
57 |
if state.pause_detected:
|
58 |
+
# Stop recording
|
59 |
+
return gr.update(recording=False), state
|
60 |
else:
|
61 |
+
return None, state
|
62 |
|
63 |
def update_or_append_conversation(conversation, id, role, content):
|
64 |
# Find if there's an existing message with the given id
|
|
|
124 |
raise gr.Error(f"Error during audio streaming: {e}")
|
125 |
|
126 |
def response(state: AppState):
|
127 |
+
if not state.pause_detected:
|
128 |
+
return gr.update(), gr.update(), state
|
129 |
+
|
130 |
if state.stream is None or len(state.stream) == 0:
|
131 |
+
return gr.update(), gr.update(), state
|
|
|
132 |
|
133 |
audio_buffer = io.BytesIO()
|
134 |
segment = AudioSegment(
|
|
|
154 |
state.stream = None
|
155 |
state.pause_detected = False
|
156 |
|
157 |
+
def maybe_call_response(state):
|
158 |
+
if state.pause_detected:
|
159 |
+
return response(state)
|
160 |
+
else:
|
161 |
+
# Do nothing
|
162 |
+
return gr.update(), gr.update(), state
|
163 |
+
|
164 |
def start_recording_user(state: AppState):
|
165 |
if not state.stopped:
|
166 |
return gr.update(recording=True)
|
|
|
189 |
|
190 |
with gr.Row():
|
191 |
with gr.Column():
|
192 |
+
input_audio = gr.Audio(label="Input Audio", source="microphone", type="numpy")
|
193 |
with gr.Column():
|
194 |
chatbot = gr.Chatbot(label="Conversation", type="messages")
|
195 |
output_audio = gr.Audio(label="Output Audio", autoplay=True)
|
|
|
199 |
set_key_button.click(set_api_key, inputs=[api_key_input, state], outputs=[api_key_status, state])
|
200 |
format_dropdown.change(update_format, inputs=[format_dropdown, state], outputs=[state])
|
201 |
|
|
|
|
|
|
|
202 |
stream = input_audio.stream(
|
203 |
process_audio,
|
204 |
[input_audio, state],
|
205 |
+
[input_audio, state],
|
206 |
stream_every=0.25, # Reduced to make it more responsive
|
207 |
time_limit=60, # Increased to allow for longer messages
|
208 |
)
|
209 |
|
|
|
210 |
stream.then(
|
211 |
+
maybe_call_response,
|
212 |
inputs=[state],
|
213 |
+
outputs=[chatbot, output_audio, state],
|
214 |
)
|
215 |
|
216 |
# Automatically restart recording after the assistant's response
|