Spaces:
Build error
Build error
File size: 4,148 Bytes
c962c9a 08d1744 3e0d7e1 08d1744 c962c9a 392fff0 3e0d7e1 c2c3684 6045b6b a6f1a9c e228397 a6f1a9c e228397 4c5bfad b04ebb9 3e0d7e1 b04ebb9 3e0d7e1 02d8bcc c962c9a 08d1744 5369878 f47a772 c962c9a f47a772 c962c9a f47a772 08d1744 f47a772 f4306a8 f41fe5e f47a772 a12c684 f47a772 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
import gradio as gr
from TTS.api import TTS
# Init TTS
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=False)
zh_tts = TTS(model_name="tts_models/zh-CN/baker/tacotron2-DDC-GST", progress_bar=False, gpu=False)
de_tts = TTS(model_name="tts_models/de/thorsten/vits", gpu=False)
es_tts = TTS(model_name="tts_models/es/mai/tacotron2-DDC", progress_bar=False, gpu=False)
def text_to_speech(text: str, speaker_wav, speaker_wav_file, language: str):
if speaker_wav_file and not speaker_wav:
speaker_wav = speaker_wav_file
file_path = "output.wav"
if language == "zh-CN":
# if speaker_wav is not None:
# zh_tts.tts_to_file(text, speaker_wav=speaker_wav, file_path=file_path)
# else:
zh_tts.tts_to_file(text, file_path=file_path)
elif language == "de":
# if speaker_wav is not None:
# de_tts.tts_to_file(text, speaker_wav=speaker_wav, file_path=file_path)
# else:
de_tts.tts_to_file(text, file_path=file_path)
elif language == "es":
# if speaker_wav is not None:
# es_tts.tts_to_file(text, speaker_wav=speaker_wav, file_path=file_path)
# else:
es_tts.tts_to_file(text, file_path=file_path)
else:
if speaker_wav is not None:
tts.tts_to_file(text, speaker_wav=speaker_wav, language=language, file_path=file_path)
else:
tts.tts_to_file(text, speaker=tts.speakers[0], language=language, file_path=file_path)
return file_path
# inputs = [gr.Textbox(label="Input the text", value="", max_lines=3),
# gr.Audio(label="Voice to clone", source="microphone", type="filepath"),
# gr.Audio(label="Voice to clone", type="filepath"),
# gr.Radio(label="Language", choices=["en", "zh-CN", "fr-fr", "pt-br", "de", "es"], value="en"),
# gr.Text(intro_text, font_size=14)]
# outputs = gr.Audio(label="Output")
# demo = gr.Interface(fn=text_to_speech, inputs=inputs, outputs=outputs)
# demo.launch()
title = "Voice-Cloning-Demo"
def toggle(choice):
if choice == "mic":
return gr.update(visible=True, value=None), gr.update(visible=False, value=None)
else:
return gr.update(visible=False, value=None), gr.update(visible=True, value=None)
def handle_language_change(choice):
if choice == "zh-CN" or choice == "de" or choice == "es":
return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
else:
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
warming_text = """Please note that Chinese, German, and Spanish are currently not supported for voice cloning."""
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Input the text", value="", max_lines=3)
lan_input = gr.Radio(label="Language", choices=["en", "fr-fr", "pt-br", "zh-CN", "de", "es"], value="en")
gr.Markdown(warming_text)
radio = gr.Radio(["mic", "file"], value="mic",
label="How would you like to upload your audio?")
audio_input_mic = gr.Audio(label="Voice to clone", source="microphone", type="filepath", visible=True)
audio_input_file = gr.Audio(label="Voice to clone", type="filepath", visible=False)
with gr.Row():
with gr.Column():
btn_clear = gr.Button("Clear")
with gr.Column():
btn = gr.Button("Submit", variant="primary")
with gr.Column():
audio_output = gr.Audio(label="Output")
# gr.Examples(examples, fn=inference, inputs=[audio_file, text_input],
# outputs=audio_output, cache_examples=True)
btn.click(text_to_speech, inputs=[text_input, audio_input_mic,
audio_input_file, lan_input], outputs=audio_output)
radio.change(toggle, radio, [audio_input_mic, audio_input_file])
lan_input.change(handle_language_change, lan_input, [radio, audio_input_mic, audio_input_file])
demo.launch(enable_queue=True) |