Spaces:
Paused
Paused
File size: 1,388 Bytes
45ee559 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import random
import numpy as np
import torch
from TTS.vocoder.configs import WavernnConfig
from TTS.vocoder.models.wavernn import Wavernn, WavernnArgs
def test_wavernn():
config = WavernnConfig()
config.model_args = WavernnArgs(
rnn_dims=512,
fc_dims=512,
mode="mold",
mulaw=False,
pad=2,
use_aux_net=True,
use_upsample_net=True,
upsample_factors=[4, 8, 8],
feat_dims=80,
compute_dims=128,
res_out_dims=128,
num_res_blocks=10,
)
config.audio.hop_length = 256
config.audio.sample_rate = 2048
dummy_x = torch.rand((2, 1280))
dummy_m = torch.rand((2, 80, 9))
y_size = random.randrange(20, 60)
dummy_y = torch.rand((80, y_size))
# mode: mold
model = Wavernn(config)
output = model(dummy_x, dummy_m)
assert np.all(output.shape == (2, 1280, 30)), output.shape
# mode: gauss
config.model_args.mode = "gauss"
model = Wavernn(config)
output = model(dummy_x, dummy_m)
assert np.all(output.shape == (2, 1280, 2)), output.shape
# mode: quantized
config.model_args.mode = 4
model = Wavernn(config)
output = model(dummy_x, dummy_m)
assert np.all(output.shape == (2, 1280, 2**4)), output.shape
output = model.inference(dummy_y, True, 5500, 550)
assert np.all(output.shape == (256 * (y_size - 1),))
|