akhaliq HF staff commited on
Commit
7049daf
·
verified ·
1 Parent(s): 0443391

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -13
app.py CHANGED
@@ -55,10 +55,10 @@ def process_audio(audio: tuple, state: AppState):
55
  state.pause_detected = pause_detected
56
 
57
  if state.pause_detected:
58
- # Stop recording and trigger response
59
- return gr.update(recording=False), state, True
60
  else:
61
- return None, state, False
62
 
63
  def update_or_append_conversation(conversation, id, role, content):
64
  # Find if there's an existing message with the given id
@@ -124,9 +124,11 @@ def generate_response_and_audio(audio_bytes: bytes, state: AppState):
124
  raise gr.Error(f"Error during audio streaming: {e}")
125
 
126
  def response(state: AppState):
 
 
 
127
  if state.stream is None or len(state.stream) == 0:
128
- yield None, None, state
129
- return
130
 
131
  audio_buffer = io.BytesIO()
132
  segment = AudioSegment(
@@ -152,6 +154,13 @@ def response(state: AppState):
152
  state.stream = None
153
  state.pause_detected = False
154
 
 
 
 
 
 
 
 
155
  def start_recording_user(state: AppState):
156
  if not state.stopped:
157
  return gr.update(recording=True)
@@ -180,7 +189,7 @@ with gr.Blocks() as demo:
180
 
181
  with gr.Row():
182
  with gr.Column():
183
- input_audio = gr.Audio(label="Input Audio", sources="microphone", type="numpy")
184
  with gr.Column():
185
  chatbot = gr.Chatbot(label="Conversation", type="messages")
186
  output_audio = gr.Audio(label="Output Audio", autoplay=True)
@@ -190,22 +199,18 @@ with gr.Blocks() as demo:
190
  set_key_button.click(set_api_key, inputs=[api_key_input, state], outputs=[api_key_status, state])
191
  format_dropdown.change(update_format, inputs=[format_dropdown, state], outputs=[state])
192
 
193
- # Add a dummy output to trigger the response function
194
- should_process_response = gr.Variable(False)
195
-
196
  stream = input_audio.stream(
197
  process_audio,
198
  [input_audio, state],
199
- [input_audio, state, should_process_response],
200
  stream_every=0.25, # Reduced to make it more responsive
201
  time_limit=60, # Increased to allow for longer messages
202
  )
203
 
204
- # When should_process_response is True, call response
205
  stream.then(
206
- response,
207
  inputs=[state],
208
- outputs=[chatbot, output_audio, state]
209
  )
210
 
211
  # Automatically restart recording after the assistant's response
 
55
  state.pause_detected = pause_detected
56
 
57
  if state.pause_detected:
58
+ # Stop recording
59
+ return gr.update(recording=False), state
60
  else:
61
+ return None, state
62
 
63
  def update_or_append_conversation(conversation, id, role, content):
64
  # Find if there's an existing message with the given id
 
124
  raise gr.Error(f"Error during audio streaming: {e}")
125
 
126
  def response(state: AppState):
127
+ if not state.pause_detected:
128
+ return gr.update(), gr.update(), state
129
+
130
  if state.stream is None or len(state.stream) == 0:
131
+ return gr.update(), gr.update(), state
 
132
 
133
  audio_buffer = io.BytesIO()
134
  segment = AudioSegment(
 
154
  state.stream = None
155
  state.pause_detected = False
156
 
157
+ def maybe_call_response(state):
158
+ if state.pause_detected:
159
+ return response(state)
160
+ else:
161
+ # Do nothing
162
+ return gr.update(), gr.update(), state
163
+
164
  def start_recording_user(state: AppState):
165
  if not state.stopped:
166
  return gr.update(recording=True)
 
189
 
190
  with gr.Row():
191
  with gr.Column():
192
+ input_audio = gr.Audio(label="Input Audio", source="microphone", type="numpy")
193
  with gr.Column():
194
  chatbot = gr.Chatbot(label="Conversation", type="messages")
195
  output_audio = gr.Audio(label="Output Audio", autoplay=True)
 
199
  set_key_button.click(set_api_key, inputs=[api_key_input, state], outputs=[api_key_status, state])
200
  format_dropdown.change(update_format, inputs=[format_dropdown, state], outputs=[state])
201
 
 
 
 
202
  stream = input_audio.stream(
203
  process_audio,
204
  [input_audio, state],
205
+ [input_audio, state],
206
  stream_every=0.25, # Reduced to make it more responsive
207
  time_limit=60, # Increased to allow for longer messages
208
  )
209
 
 
210
  stream.then(
211
+ maybe_call_response,
212
  inputs=[state],
213
+ outputs=[chatbot, output_audio, state],
214
  )
215
 
216
  # Automatically restart recording after the assistant's response