Spaces:
Paused
Paused
File size: 3,008 Bytes
45ee559 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import os
import torch
from tests import get_tests_input_path, get_tests_output_path, get_tests_path
from TTS.config import BaseAudioConfig
from TTS.utils.audio import AudioProcessor
from TTS.vocoder.layers.losses import MelganFeatureLoss, MultiScaleSTFTLoss, STFTLoss, TorchSTFT
TESTS_PATH = get_tests_path()
OUT_PATH = os.path.join(get_tests_output_path(), "audio_tests")
os.makedirs(OUT_PATH, exist_ok=True)
WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav")
ap = AudioProcessor(**BaseAudioConfig().to_dict())
def test_torch_stft():
torch_stft = TorchSTFT(ap.fft_size, ap.hop_length, ap.win_length)
# librosa stft
wav = ap.load_wav(WAV_FILE)
M_librosa = abs(ap._stft(wav)) # pylint: disable=protected-access
# torch stft
wav = torch.from_numpy(wav[None, :]).float()
M_torch = torch_stft(wav)
# check the difference b/w librosa and torch outputs
assert (M_librosa - M_torch[0].data.numpy()).max() < 1e-5
def test_stft_loss():
stft_loss = STFTLoss(ap.fft_size, ap.hop_length, ap.win_length)
wav = ap.load_wav(WAV_FILE)
wav = torch.from_numpy(wav[None, :]).float()
loss_m, loss_sc = stft_loss(wav, wav)
assert loss_m + loss_sc == 0
loss_m, loss_sc = stft_loss(wav, torch.rand_like(wav))
assert loss_sc < 1.0
assert loss_m + loss_sc > 0
def test_multiscale_stft_loss():
stft_loss = MultiScaleSTFTLoss(
[ap.fft_size // 2, ap.fft_size, ap.fft_size * 2],
[ap.hop_length // 2, ap.hop_length, ap.hop_length * 2],
[ap.win_length // 2, ap.win_length, ap.win_length * 2],
)
wav = ap.load_wav(WAV_FILE)
wav = torch.from_numpy(wav[None, :]).float()
loss_m, loss_sc = stft_loss(wav, wav)
assert loss_m + loss_sc == 0
loss_m, loss_sc = stft_loss(wav, torch.rand_like(wav))
assert loss_sc < 1.0
assert loss_m + loss_sc > 0
def test_melgan_feature_loss():
feats_real = []
feats_fake = []
# if all the features are different.
for _ in range(5): # different scales
scale_feats_real = []
scale_feats_fake = []
for _ in range(4): # different layers
scale_feats_real.append(torch.rand([3, 5, 7]))
scale_feats_fake.append(torch.rand([3, 5, 7]))
feats_real.append(scale_feats_real)
feats_fake.append(scale_feats_fake)
loss_func = MelganFeatureLoss()
loss = loss_func(feats_fake, feats_real)
assert loss.item() <= 1.0
feats_real = []
feats_fake = []
# if all the features are the same
for _ in range(5): # different scales
scale_feats_real = []
scale_feats_fake = []
for _ in range(4): # different layers
tensor = torch.rand([3, 5, 7])
scale_feats_real.append(tensor)
scale_feats_fake.append(tensor)
feats_real.append(scale_feats_real)
feats_fake.append(scale_feats_fake)
loss_func = MelganFeatureLoss()
loss = loss_func(feats_fake, feats_real)
assert loss.item() == 0
|