File size: 2,047 Bytes
d1b91e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import librosa
from utils.audio import librosa_wav2spec
from utils.commons.hparams import hparams
import numpy as np

REGISTERED_VOCODERS = {}


def register_vocoder(name):
    def _f(cls):
        REGISTERED_VOCODERS[name] = cls
        return cls

    return _f


def get_vocoder_cls(vocoder_name):
    return REGISTERED_VOCODERS.get(vocoder_name)


class BaseVocoder:
    def spec2wav(self, mel):
        """

        :param mel: [T, 80]
        :return: wav: [T']
        """

        raise NotImplementedError

    @staticmethod
    def wav2spec(wav_fn):
        """

        :param wav_fn: str
        :return: wav, mel: [T, 80]
        """
        wav_spec_dict = librosa_wav2spec(wav_fn, fft_size=hparams['fft_size'],
                                         hop_size=hparams['hop_size'],
                                         win_length=hparams['win_size'],
                                         num_mels=hparams['audio_num_mel_bins'],
                                         fmin=hparams['fmin'],
                                         fmax=hparams['fmax'],
                                         sample_rate=hparams['audio_sample_rate'],
                                         loud_norm=hparams['loud_norm'])
        wav = wav_spec_dict['wav']
        mel = wav_spec_dict['mel']
        return wav, mel

    @staticmethod
    def wav2mfcc(wav_fn):
        fft_size = hparams['fft_size']
        hop_size = hparams['hop_size']
        win_length = hparams['win_size']
        sample_rate = hparams['audio_sample_rate']
        wav, _ = librosa.core.load(wav_fn, sr=sample_rate)
        mfcc = librosa.feature.mfcc(y=wav, sr=sample_rate, n_mfcc=13,
                                    n_fft=fft_size, hop_length=hop_size,
                                    win_length=win_length, pad_mode="constant", power=1.0)
        mfcc_delta = librosa.feature.delta(mfcc, order=1)
        mfcc_delta_delta = librosa.feature.delta(mfcc, order=2)
        mfcc = np.concatenate([mfcc, mfcc_delta, mfcc_delta_delta]).T
        return mfcc