aigmixer commited on
Commit
4236dbe
1 Parent(s): e4eb5c5

testing streaming

Browse files
Files changed (1) hide show
  1. app.py +7 -20
app.py CHANGED
@@ -13,40 +13,27 @@ def synthesize_speech(text):
13
  # Check for NSFW content
14
  nsfw_result = nsfw_detector(text)
15
  if nsfw_result[0]['label'] == 'NSFW':
16
- return "NSFW content detected. Cannot process.", None
 
17
 
18
  model_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx")
19
  config_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx.json")
20
  voice = PiperVoice.load(model_path, config_path)
21
 
22
- # Create an in-memory buffer for the WAV file
23
- buffer = BytesIO()
24
- with wave.open(buffer, 'wb') as wav_file:
25
- wav_file.setframerate(voice.config.sample_rate)
26
- wav_file.setsampwidth(2) # 16-bit
27
- wav_file.setnchannels(1) # mono
28
-
29
- # Synthesize speech
30
- voice.synthesize(text, wav_file)
31
-
32
- # Convert buffer to NumPy array for Gradio output
33
- buffer.seek(0)
34
- audio_data = np.frombuffer(buffer.read(), dtype=np.int16)
35
-
36
- return audio_data.tobytes(), None
37
 
38
  # Using Gradio Blocks
39
  with gr.Blocks(theme=gr.themes.Base()) as blocks:
40
  gr.Markdown("# Text to Speech Synthesizer")
41
  gr.Markdown("Enter text to synthesize it into speech using PiperVoice.")
42
  input_text = gr.Textbox(label="Input Text")
43
- output_audio = gr.Audio(label="Synthesized Speech", type="numpy", live=True) # Set live=True
44
- output_text = gr.Textbox(label="Output Text", visible=False) # This is the new text output component
45
  submit_button = gr.Button("Synthesize")
46
 
47
- submit_button.click(synthesize_speech, inputs=input_text, outputs=[output_audio, output_text])
48
 
49
  # Run the app
50
  blocks.launch()
51
 
52
-
 
13
  # Check for NSFW content
14
  nsfw_result = nsfw_detector(text)
15
  if nsfw_result[0]['label'] == 'NSFW':
16
+ yield "NSFW content detected. Cannot process."
17
+ return
18
 
19
  model_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx")
20
  config_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx.json")
21
  voice = PiperVoice.load(model_path, config_path)
22
 
23
+ # Synthesize speech and stream audio
24
+ for audio_chunk in voice.synthesize(text, chunk_size=2048):
25
+ yield audio_chunk.tobytes()
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # Using Gradio Blocks
28
  with gr.Blocks(theme=gr.themes.Base()) as blocks:
29
  gr.Markdown("# Text to Speech Synthesizer")
30
  gr.Markdown("Enter text to synthesize it into speech using PiperVoice.")
31
  input_text = gr.Textbox(label="Input Text")
32
+ output_audio = gr.Audio(label="Synthesized Speech", type="numpy", streaming=True)
 
33
  submit_button = gr.Button("Synthesize")
34
 
35
+ submit_button.click(synthesize_speech, inputs=input_text, outputs=output_audio)
36
 
37
  # Run the app
38
  blocks.launch()
39