freddyaboulton HF staff commited on
Commit
e9633ca
1 Parent(s): 694882d
Files changed (1) hide show
  1. app.py +12 -6
app.py CHANGED
@@ -49,7 +49,6 @@ def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[st
49
  id = str(time.time())
50
  full_response = ""
51
  asr_result = ""
52
- audio_bytes_accumulated = b''
53
 
54
  for chunk in stream:
55
  if not chunk.choices:
@@ -69,10 +68,15 @@ def generate_response_and_audio(audio_bytes: bytes, lepton_conversation: list[st
69
 
70
  if audio:
71
  # Accumulate audio bytes and yield them
72
- audio_bytes_accumulated += b''.join([base64.b64decode(a) for a in audio])
73
- yield id, None, None, audio_bytes_accumulated
 
 
 
74
 
75
- yield id, full_response, asr_result, audio_bytes_accumulated
 
 
76
 
77
  except Exception as e:
78
  raise gr.Error(f"Error during audio streaming: {e}")
@@ -98,8 +102,10 @@ def response(audio: tuple[int, np.ndarray], lepton_conversation: list[dict],
98
  if text:
99
  update_or_append_conversation(lepton_conversation, id, "assistant", text)
100
  update_or_append_conversation(gradio_conversation, id, "assistant", text)
101
-
102
- yield (np.frombuffer(audio, dtype=np.int16).reshape(1, -1), ), AdditionalOutputs(lepton_conversation, gradio_conversation)
 
 
103
 
104
 
105
  with gr.Blocks() as demo:
 
49
  id = str(time.time())
50
  full_response = ""
51
  asr_result = ""
 
52
 
53
  for chunk in stream:
54
  if not chunk.choices:
 
68
 
69
  if audio:
70
  # Accumulate audio bytes and yield them
71
+ audio_bytes_accumulated = b''.join([base64.b64decode(a) for a in audio])
72
+ audio = AudioSegment.from_file(io.BytesIO(audio_bytes_accumulated))
73
+ audio_array = np.array(audio.get_array_of_samples(), dtype=np.int16).reshape(1, -1)
74
+ print("audio.shape", audio_array.shape)
75
+ print("sampling_rate", audio.frame_rate)
76
 
77
+ yield id, None, None, (audio.frame_rate, audio_array)
78
+
79
+ yield id, full_response, asr_result, None
80
 
81
  except Exception as e:
82
  raise gr.Error(f"Error during audio streaming: {e}")
 
102
  if text:
103
  update_or_append_conversation(lepton_conversation, id, "assistant", text)
104
  update_or_append_conversation(gradio_conversation, id, "assistant", text)
105
+ if audio:
106
+ yield audio, AdditionalOutputs(lepton_conversation, gradio_conversation)
107
+ else:
108
+ yield AdditionalOutputs(lepton_conversation, gradio_conversation)
109
 
110
 
111
  with gr.Blocks() as demo: