jbilcke-hf HF staff commited on
Commit
c2cf399
1 Parent(s): fe3096b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -21
app.py CHANGED
@@ -575,20 +575,17 @@ def generate_speech_for_sentence(history, chatbot_role, sentence, return_as_byte
575
  reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
576
  wav_bytestream = (reduced_noise * 32767).astype(np.int16)
577
  wav_bytestream = wav_bytestream.tobytes()
578
-
 
 
 
579
  if audio_stream is not None:
580
- if not return_as_byte:
581
- audio_unique_filename = "/tmp/"+ str(uuid.uuid4())+".wav"
582
- with wave.open(audio_unique_filename, "w") as f:
583
- f.setnchannels(1)
584
- # 2 bytes per sample.
585
- f.setsampwidth(2)
586
- f.setframerate(24000)
587
- f.writeframes(wav_bytestream)
588
-
589
- return (history , gr.Audio(value=audio_unique_filename, autoplay=True))
590
- else:
591
- return (history , gr.Audio(value=wav_bytestream, autoplay=True))
592
  except RuntimeError as e:
593
  if "device-side assert" in str(e):
594
  # cannot do anything on cuda device side error, need tor estart
@@ -634,14 +631,12 @@ def generate_story_and_speech(input_text, chatbot_role):
634
  history_tuples = [tuple(entry) for entry in last_history]
635
 
636
  synthesized_speech = generate_speech_for_sentence(history_tuples, chatbot_role, story_text, return_as_byte=True)
637
- if synthesized_speech:
638
- # Get the Gradio Audio object
639
- audio_obj = synthesized_speech[1]
640
- # Access the BytesIO object containing the WAV file and extract bytes
641
- speech_audio_bytes = audio_obj.data # Use the 'data' attribute to get the bytearray
642
- # Convert the speech audio bytes to base64 for JSON serialization
643
- speech_audio_base64 = base64.b64encode(speech_audio_bytes).decode('utf8')
644
- return {"text": story_text.strip(), "audio": speech_audio_base64}
645
  else:
646
  return {"text": "Failed to generate story (no synthesized speech)", "audio": None}
647
 
 
575
  reduced_noise = nr.reduce_noise(y=float_data, sr=24000,prop_decrease =0.8,n_fft=1024)
576
  wav_bytestream = (reduced_noise * 32767).astype(np.int16)
577
  wav_bytestream = wav_bytestream.tobytes()
578
+
579
+ # Directly encode the WAV bytestream to base64
580
+ base64_audio = base64.b64encode(wav_bytestream).decode('utf8')
581
+
582
  if audio_stream is not None:
583
+ return (history, base64_audio)
584
+ else:
585
+ # Handle the case where the audio stream is None (e.g., silent response)
586
+ return (history, None)
587
+
588
+
 
 
 
 
 
 
589
  except RuntimeError as e:
590
  if "device-side assert" in str(e):
591
  # cannot do anything on cuda device side error, need tor estart
 
631
  history_tuples = [tuple(entry) for entry in last_history]
632
 
633
  synthesized_speech = generate_speech_for_sentence(history_tuples, chatbot_role, story_text, return_as_byte=True)
634
+
635
+ if synthesized_speech:
636
+ # Retrieve the base64 audio string from the tuple
637
+ base64_audio = synthesized_speech[1]
638
+ return {"text": story_text.strip(), "audio": base64_audio}
639
+
 
 
640
  else:
641
  return {"text": "Failed to generate story (no synthesized speech)", "audio": None}
642