File size: 1,163 Bytes
d1a84ee
 
 
495433d
5f58013
73eaac3
d1a84ee
75363d3
 
 
 
73eaac3
df1ad02
 
 
 
 
495433d
df1ad02
d1a84ee
 
 
df1ad02
 
 
495433d
d1a84ee
495433d
df1ad02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import os

## build wavegru-cpp
os.system("./bazelisk-linux-amd64 clean --expunge")
os.system("./bazelisk-linux-amd64 build wavegru_mod -c opt --copt=-march=native")


import gradio as gr

from inference import load_tacotron_model, load_wavegru_net, mel_to_wav, text_to_mel
from wavegru_cpp import extract_weight_mask, load_wavegru_cpp

alphabet, tacotron_net, tacotron_config = load_tacotron_model(
    "./alphabet.txt", "./tacotron.toml", "./pretrained_model_ljs_500k.ckpt"
)


wavegru_config, wavegru_net = load_wavegru_net("./wavegru.yaml", "./wavegru.ckpt")

wave_cpp_weight_mask = extract_weight_mask(wavegru_net)
wavecpp = load_wavegru_cpp(wave_cpp_weight_mask)


def speak(text):
    mel = text_to_mel(tacotron_net, text, alphabet, tacotron_config)
    print(mel.shape)
    y = mel_to_wav(wavegru_net, wavecpp, mel, wavegru_config)
    print(y.shape)
    return 24_000, y


title = "WaveGRU-TTS"
description = "WaveGRU text-to-speech demo."

gr.Interface(
    fn=speak,
    inputs="text",
    outputs="audio",
    title=title,
    description=description,
    theme="default",
    allow_screenshot=False,
    allow_flagging="never",
).launch(debug=False)