akhaliq HF staff commited on
Commit
649a30c
1 Parent(s): dec22aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -31
app.py CHANGED
@@ -9,7 +9,6 @@ from dataclasses import dataclass, field
9
  from threading import Lock
10
  import base64
11
 
12
-
13
  @dataclass
14
  class AppState:
15
  stream: np.ndarray | None = None
@@ -83,30 +82,22 @@ def generate_response_and_audio(audio_bytes: bytes, state: AppState):
83
  stream=True,
84
  )
85
 
86
- full_response = ""
87
- audios = []
88
-
89
  for chunk in stream:
90
  if not chunk.choices:
91
  continue
92
  content = chunk.choices[0].delta.content
93
  audio = getattr(chunk.choices[0], 'audio', [])
94
- if content:
95
- full_response += content
96
- yield full_response, None, state
97
- if audio:
98
- audios.extend(audio)
99
-
100
- final_audio = b''.join([base64.b64decode(a) for a in audios])
101
-
102
- yield full_response, final_audio, state
103
 
104
  except Exception as e:
105
  raise gr.Error(f"Error during audio streaming: {e}")
106
 
107
  def response(state: AppState):
108
  if state.stream is None or len(state.stream) == 0:
109
- return None, None, state
 
110
 
111
  audio_buffer = io.BytesIO()
112
  segment = AudioSegment(
@@ -119,26 +110,24 @@ def response(state: AppState):
119
 
120
  generator = generate_response_and_audio(audio_buffer.getvalue(), state)
121
 
122
- # Process the generator to get the final results
123
- final_text = ""
124
- final_audio = None
 
 
 
 
125
  for text, audio, updated_state in generator:
126
- final_text = text if text else final_text
127
- final_audio = audio if audio else final_audio
128
  state = updated_state
129
-
130
- # Update the chatbot with the final conversation
131
- state.conversation.append({"role": "user", "content": "Audio input"})
132
- state.conversation.append({"role": "assistant", "content": final_text})
133
 
134
  # Reset the audio stream for the next interaction
135
  state.stream = None
136
  state.pause_detected = False
137
 
138
- chatbot_output = state.conversation[-2:] # Get the last two messages
139
-
140
- return chatbot_output, final_audio, state
141
-
142
  def start_recording_user(state: AppState):
143
  if not state.stopped:
144
  return gr.Audio(recording=True)
@@ -167,7 +156,7 @@ with gr.Blocks() as demo:
167
 
168
  with gr.Row():
169
  with gr.Column():
170
- input_audio = gr.Audio(label="Input Audio", sources="microphone", type="numpy")
171
  with gr.Column():
172
  chatbot = gr.Chatbot(label="Conversation", type="messages")
173
  output_audio = gr.Audio(label="Output Audio", autoplay=True)
@@ -188,10 +177,9 @@ with gr.Blocks() as demo:
188
  respond = input_audio.stop_recording(
189
  response,
190
  [state],
191
- [chatbot, output_audio, state]
 
192
  )
193
- # Update the chatbot with the final conversation
194
- respond.then(lambda s: s.conversation, [state], [chatbot])
195
 
196
  # Automatically restart recording after the assistant's response
197
  restart = output_audio.stop(
 
9
  from threading import Lock
10
  import base64
11
 
 
12
  @dataclass
13
  class AppState:
14
  stream: np.ndarray | None = None
 
82
  stream=True,
83
  )
84
 
 
 
 
85
  for chunk in stream:
86
  if not chunk.choices:
87
  continue
88
  content = chunk.choices[0].delta.content
89
  audio = getattr(chunk.choices[0], 'audio', [])
90
+ if content or audio:
91
+ audio_bytes = b''.join([base64.b64decode(a) for a in audio]) if audio else None
92
+ yield content, audio_bytes, state
 
 
 
 
 
 
93
 
94
  except Exception as e:
95
  raise gr.Error(f"Error during audio streaming: {e}")
96
 
97
  def response(state: AppState):
98
  if state.stream is None or len(state.stream) == 0:
99
+ yield None, None, state
100
+ return
101
 
102
  audio_buffer = io.BytesIO()
103
  segment = AudioSegment(
 
110
 
111
  generator = generate_response_and_audio(audio_buffer.getvalue(), state)
112
 
113
+ # Add the user's audio input to the conversation
114
+ state.conversation.append({"role": "user", "content": "Audio input"})
115
+
116
+ # Prepare assistant's message
117
+ assistant_message = {"role": "assistant", "content": ""}
118
+ state.conversation.append(assistant_message)
119
+
120
  for text, audio, updated_state in generator:
121
+ if text:
122
+ assistant_message["content"] += text
123
  state = updated_state
124
+ chatbot_output = state.conversation[-2:] # Get the last two messages
125
+ yield chatbot_output, audio, state
 
 
126
 
127
  # Reset the audio stream for the next interaction
128
  state.stream = None
129
  state.pause_detected = False
130
 
 
 
 
 
131
  def start_recording_user(state: AppState):
132
  if not state.stopped:
133
  return gr.Audio(recording=True)
 
156
 
157
  with gr.Row():
158
  with gr.Column():
159
+ input_audio = gr.Audio(label="Input Audio", source="microphone", type="numpy")
160
  with gr.Column():
161
  chatbot = gr.Chatbot(label="Conversation", type="messages")
162
  output_audio = gr.Audio(label="Output Audio", autoplay=True)
 
177
  respond = input_audio.stop_recording(
178
  response,
179
  [state],
180
+ [chatbot, output_audio, state],
181
+ every=1 # Ensures outputs are updated as they are yielded
182
  )
 
 
183
 
184
  # Automatically restart recording after the assistant's response
185
  restart = output_audio.stop(