Spaces:
Runtime error
Runtime error
File size: 1,904 Bytes
ee21b96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import torch
from examples.textless_nlp.gslm.unit2speech.tacotron2.model import Tacotron2
from examples.textless_nlp.gslm.unit2speech.tacotron2.waveglow_denoiser import (
Denoiser,
)
def load_quantized_audio_from_file(file_path):
base_fname_batch, quantized_units_batch = [], []
with open(file_path) as f:
for line in f:
base_fname, quantized_units_str = line.rstrip().split("|")
quantized_units = [int(q) for q in quantized_units_str.split(" ")]
base_fname_batch.append(base_fname)
quantized_units_batch.append(quantized_units)
return base_fname_batch, quantized_units_batch
def synthesize_audio(model, waveglow, denoiser, inp, lab=None, strength=0.0):
assert inp.size(0) == 1
inp = inp.cuda()
if lab is not None:
lab = torch.LongTensor(1).cuda().fill_(lab)
with torch.no_grad():
_, mel, _, ali, has_eos = model.inference(inp, lab, ret_has_eos=True)
aud = waveglow.infer(mel, sigma=0.666)
aud_dn = denoiser(aud, strength=strength).squeeze(1)
return mel, aud, aud_dn, has_eos
def load_tacotron(tacotron_model_path, max_decoder_steps):
ckpt_dict = torch.load(tacotron_model_path)
hparams = ckpt_dict["hparams"]
hparams.max_decoder_steps = max_decoder_steps
sr = hparams.sampling_rate
model = Tacotron2(hparams)
model.load_state_dict(ckpt_dict["model_dict"])
model = model.cuda().eval().half()
return model, sr, hparams
def load_waveglow(waveglow_path):
waveglow = torch.load(waveglow_path)["model"]
waveglow = waveglow.cuda().eval().half()
for k in waveglow.convinv:
k.float()
denoiser = Denoiser(waveglow)
return waveglow, denoiser
|