Spaces:
Starting
on
A10G
Starting
on
A10G
import os | |
import json | |
import torch | |
import numpy as np | |
import audioldm.hifigan as hifigan | |
HIFIGAN_16K_64 = { | |
"resblock": "1", | |
"num_gpus": 6, | |
"batch_size": 16, | |
"learning_rate": 0.0002, | |
"adam_b1": 0.8, | |
"adam_b2": 0.99, | |
"lr_decay": 0.999, | |
"seed": 1234, | |
"upsample_rates": [5, 4, 2, 2, 2], | |
"upsample_kernel_sizes": [16, 16, 8, 4, 4], | |
"upsample_initial_channel": 1024, | |
"resblock_kernel_sizes": [3, 7, 11], | |
"resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]], | |
"segment_size": 8192, | |
"num_mels": 64, | |
"num_freq": 1025, | |
"n_fft": 1024, | |
"hop_size": 160, | |
"win_size": 1024, | |
"sampling_rate": 16000, | |
"fmin": 0, | |
"fmax": 8000, | |
"fmax_for_loss": None, | |
"num_workers": 4, | |
"dist_config": { | |
"dist_backend": "nccl", | |
"dist_url": "tcp://localhost:54321", | |
"world_size": 1, | |
}, | |
} | |
def get_available_checkpoint_keys(model, ckpt): | |
print("==> Attemp to reload from %s" % ckpt) | |
state_dict = torch.load(ckpt)["state_dict"] | |
current_state_dict = model.state_dict() | |
new_state_dict = {} | |
for k in state_dict.keys(): | |
if ( | |
k in current_state_dict.keys() | |
and current_state_dict[k].size() == state_dict[k].size() | |
): | |
new_state_dict[k] = state_dict[k] | |
else: | |
print("==> WARNING: Skipping %s" % k) | |
print( | |
"%s out of %s keys are matched" | |
% (len(new_state_dict.keys()), len(state_dict.keys())) | |
) | |
return new_state_dict | |
def get_param_num(model): | |
num_param = sum(param.numel() for param in model.parameters()) | |
return num_param | |
def get_vocoder(config, device): | |
config = hifigan.AttrDict(HIFIGAN_16K_64) | |
vocoder = hifigan.Generator(config) | |
vocoder.eval() | |
vocoder.remove_weight_norm() | |
vocoder.to(device) | |
return vocoder | |
def vocoder_infer(mels, vocoder, lengths=None): | |
vocoder.eval() | |
with torch.no_grad(): | |
wavs = vocoder(mels).squeeze(1) | |
wavs = (wavs.cpu().numpy() * 32768).astype("int16") | |
if lengths is not None: | |
wavs = wavs[:, :lengths] | |
return wavs | |