Persian_Piper_TTS

Running

App Files Files Community

aigmixer commited on Dec 19, 2023

Commit

4236dbe

•

1 Parent(s): e4eb5c5

testing streaming

Browse files

Files changed (1) hide show

app.py +7 -20

app.py CHANGED Viewed

@@ -13,40 +13,27 @@ def synthesize_speech(text):
     # Check for NSFW content
     nsfw_result = nsfw_detector(text)
     if nsfw_result[0]['label'] == 'NSFW':
-        return "NSFW content detected. Cannot process.", None
     model_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx")
     config_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx.json")
     voice = PiperVoice.load(model_path, config_path)
-    # Create an in-memory buffer for the WAV file
-    buffer = BytesIO()
-    with wave.open(buffer, 'wb') as wav_file:
-        wav_file.setframerate(voice.config.sample_rate)
-        wav_file.setsampwidth(2)  # 16-bit
-        wav_file.setnchannels(1)  # mono
-        # Synthesize speech
-        voice.synthesize(text, wav_file)
-    # Convert buffer to NumPy array for Gradio output
-    buffer.seek(0)
-    audio_data = np.frombuffer(buffer.read(), dtype=np.int16)
-    return audio_data.tobytes(), None
 # Using Gradio Blocks
 with gr.Blocks(theme=gr.themes.Base()) as blocks:
     gr.Markdown("# Text to Speech Synthesizer")
     gr.Markdown("Enter text to synthesize it into speech using PiperVoice.")
     input_text = gr.Textbox(label="Input Text")
-    output_audio = gr.Audio(label="Synthesized Speech", type="numpy", live=True)  # Set live=True
-    output_text = gr.Textbox(label="Output Text", visible=False)  # This is the new text output component
     submit_button = gr.Button("Synthesize")
-    submit_button.click(synthesize_speech, inputs=input_text, outputs=[output_audio, output_text])
 # Run the app
 blocks.launch()

     # Check for NSFW content
     nsfw_result = nsfw_detector(text)
     if nsfw_result[0]['label'] == 'NSFW':
+        yield "NSFW content detected. Cannot process."
+        return
     model_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx")
     config_path = hf_hub_download(repo_id="aigmixer/speaker_00", filename="speaker_00_model.onnx.json")
     voice = PiperVoice.load(model_path, config_path)
+    # Synthesize speech and stream audio
+    for audio_chunk in voice.synthesize(text, chunk_size=2048):
+        yield audio_chunk.tobytes()
 # Using Gradio Blocks
 with gr.Blocks(theme=gr.themes.Base()) as blocks:
     gr.Markdown("# Text to Speech Synthesizer")
     gr.Markdown("Enter text to synthesize it into speech using PiperVoice.")
     input_text = gr.Textbox(label="Input Text")
+    output_audio = gr.Audio(label="Synthesized Speech", type="numpy", streaming=True)
     submit_button = gr.Button("Synthesize")
+    submit_button.click(synthesize_speech, inputs=input_text, outputs=output_audio)
 # Run the app
 blocks.launch()