## build wavegru-cpp # import os # os.system("./bazelisk-linux-amd64 clean --expunge") # os.system("./bazelisk-linux-amd64 build wavegru_mod -c opt --copt=-march=native") import gradio as gr from inference import load_tacotron_model, load_wavegru_net, mel_to_wav, text_to_mel from wavegru_cpp import extract_weight_mask, load_wavegru_cpp alphabet, tacotron_net, tacotron_config = load_tacotron_model( "./alphabet.txt", "./tacotron.toml", "./pretrained_model_ljs_500k.ckpt" ) wavegru_config, wavegru_net = load_wavegru_net("./wavegru.yaml", "./wavegru.ckpt") wave_cpp_weight_mask = extract_weight_mask(wavegru_net) wavecpp = load_wavegru_cpp(wave_cpp_weight_mask, wavegru_config["upsample_factors"][-1]) def speak(text): mel = text_to_mel(tacotron_net, text, alphabet, tacotron_config) y = mel_to_wav(wavegru_net, wavecpp, mel, wavegru_config) return 24_000, y title = "WaveGRU-TTS" description = "WaveGRU text-to-speech demo." gr.Interface( fn=speak, inputs="text", examples=[ "this is a test!", "October arrived, spreading a damp chill over the grounds and into the castle. Madam Pomfrey, the nurse, was kept busy by a sudden spate of colds among the staff and students.", "Artificial intelligence is intelligence demonstrated by machines, as opposed to natural intelligence displayed by animals including humans.", ], outputs="audio", title=title, description=description, theme="default", allow_screenshot=False, allow_flagging="never", ).launch(debug=False)