Commit
·
d197937
1
Parent(s):
c6ab084
allowing model to synthesize samples using the CPU
Browse files- .DS_Store +0 -0
- app.py +16 -3
- output/.DS_Store +0 -0
- output/ckpt/.DS_Store +0 -0
- utils/.DS_Store +0 -0
- utils/model.py +5 -5
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
app.py
CHANGED
@@ -1,7 +1,20 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
|
3 |
-
|
4 |
-
return "Hello " + name + "!!"
|
5 |
|
6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
import subprocess
|
3 |
|
4 |
+
predefined_texts = ["Example text 1", "Example text 2", "Example text 3"]
|
|
|
5 |
|
6 |
+
|
7 |
+
def synthesize_speech(text, speaker_id):
|
8 |
+
command = f"python3 synthesize.py --text '{text}' --bert_embed 1 --speaker_id {speaker_id} --restore_step 900000 --mode single -p config/EmoV_DB/preprocess.yaml -m config/EmoV_DB/model.yaml -t config/EmoV_DB/train.yaml"
|
9 |
+
output = subprocess.check_output(command, shell=True)
|
10 |
+
# Replace this with the path of the generated audio file
|
11 |
+
audio_file = 'output_file_path'
|
12 |
+
return audio_file
|
13 |
+
|
14 |
+
|
15 |
+
iface = gr.Interface(fn=synthesize_speech,
|
16 |
+
inputs=[gr.inputs.Dropdown(choices=predefined_texts, label="Select a text"),
|
17 |
+
gr.inputs.Slider(minimum=0, maximum=10, step=1, default=0, label="Speaker ID")],
|
18 |
+
outputs=gr.outputs.Audio(type="file"),
|
19 |
+
title="Text-to-Speech Demo")
|
20 |
iface.launch()
|
output/.DS_Store
CHANGED
Binary files a/output/.DS_Store and b/output/.DS_Store differ
|
|
output/ckpt/.DS_Store
CHANGED
Binary files a/output/ckpt/.DS_Store and b/output/ckpt/.DS_Store differ
|
|
utils/.DS_Store
CHANGED
Binary files a/utils/.DS_Store and b/utils/.DS_Store differ
|
|
utils/model.py
CHANGED
@@ -17,8 +17,8 @@ def get_model(args, configs, device, train=False):
|
|
17 |
train_config["path"]["ckpt_path"],
|
18 |
"{}.pth.tar".format(args.restore_step),
|
19 |
)
|
20 |
-
ckpt = torch.load(ckpt_path)
|
21 |
-
model.load_state_dict(ckpt["model"]
|
22 |
|
23 |
if train:
|
24 |
scheduled_optim = ScheduledOptim(
|
@@ -50,7 +50,7 @@ def get_vocoder(config, device):
|
|
50 |
)
|
51 |
elif speaker == "universal":
|
52 |
vocoder = torch.hub.load(
|
53 |
-
"descriptinc/melgan-neurips", "load_melgan", "multi_speaker"
|
54 |
)
|
55 |
vocoder.mel2wav.eval()
|
56 |
vocoder.mel2wav.to(device)
|
@@ -60,9 +60,9 @@ def get_vocoder(config, device):
|
|
60 |
config = hifigan.AttrDict(config)
|
61 |
vocoder = hifigan.Generator(config)
|
62 |
if speaker == "LJSpeech":
|
63 |
-
ckpt = torch.load("hifigan/generator_LJSpeech.pth.tar")
|
64 |
elif speaker == "universal":
|
65 |
-
ckpt = torch.load("hifigan/generator_universal.pth.tar")
|
66 |
vocoder.load_state_dict(ckpt["generator"])
|
67 |
vocoder.eval()
|
68 |
vocoder.remove_weight_norm()
|
|
|
17 |
train_config["path"]["ckpt_path"],
|
18 |
"{}.pth.tar".format(args.restore_step),
|
19 |
)
|
20 |
+
ckpt = torch.load(ckpt_path, map_location=device)
|
21 |
+
model.load_state_dict(ckpt["model"])
|
22 |
|
23 |
if train:
|
24 |
scheduled_optim = ScheduledOptim(
|
|
|
50 |
)
|
51 |
elif speaker == "universal":
|
52 |
vocoder = torch.hub.load(
|
53 |
+
"descriptinc/melgan-neurips", "load_melgan", "multi_speaker",map_location=device
|
54 |
)
|
55 |
vocoder.mel2wav.eval()
|
56 |
vocoder.mel2wav.to(device)
|
|
|
60 |
config = hifigan.AttrDict(config)
|
61 |
vocoder = hifigan.Generator(config)
|
62 |
if speaker == "LJSpeech":
|
63 |
+
ckpt = torch.load("hifigan/generator_LJSpeech.pth.tar",map_location=device)
|
64 |
elif speaker == "universal":
|
65 |
+
ckpt = torch.load("hifigan/generator_universal.pth.tar",map_location=device)
|
66 |
vocoder.load_state_dict(ckpt["generator"])
|
67 |
vocoder.eval()
|
68 |
vocoder.remove_weight_norm()
|