Raizudeen commited on
Commit
6a05650
·
verified ·
1 Parent(s): 6be1ab7

Update audio.py

Browse files
Files changed (1) hide show
  1. audio.py +136 -136
audio.py CHANGED
@@ -1,136 +1,136 @@
1
- import librosa
2
- import librosa.filters
3
- import numpy as np
4
- # import tensorflow as tf
5
- from scipy import signal
6
- from scipy.io import wavfile
7
- from hparams import hparams as hp
8
-
9
- def load_wav(path, sr):
10
- return librosa.core.load(path, sr=sr)[0]
11
-
12
- def save_wav(wav, path, sr):
13
- wav *= 32767 / max(0.01, np.max(np.abs(wav)))
14
- #proposed by @dsmiller
15
- wavfile.write(path, sr, wav.astype(np.int16))
16
-
17
- def save_wavenet_wav(wav, path, sr):
18
- librosa.output.write_wav(path, wav, sr=sr)
19
-
20
- def preemphasis(wav, k, preemphasize=True):
21
- if preemphasize:
22
- return signal.lfilter([1, -k], [1], wav)
23
- return wav
24
-
25
- def inv_preemphasis(wav, k, inv_preemphasize=True):
26
- if inv_preemphasize:
27
- return signal.lfilter([1], [1, -k], wav)
28
- return wav
29
-
30
- def get_hop_size():
31
- hop_size = hp.hop_size
32
- if hop_size is None:
33
- assert hp.frame_shift_ms is not None
34
- hop_size = int(hp.frame_shift_ms / 1000 * hp.sample_rate)
35
- return hop_size
36
-
37
- def linearspectrogram(wav):
38
- D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
39
- S = _amp_to_db(np.abs(D)) - hp.ref_level_db
40
-
41
- if hp.signal_normalization:
42
- return _normalize(S)
43
- return S
44
-
45
- def melspectrogram(wav):
46
- D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
47
- S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db
48
-
49
- if hp.signal_normalization:
50
- return _normalize(S)
51
- return S
52
-
53
- def _lws_processor():
54
- import lws
55
- return lws.lws(hp.n_fft, get_hop_size(), fftsize=hp.win_size, mode="speech")
56
-
57
- def _stft(y):
58
- if hp.use_lws:
59
- return _lws_processor(hp).stft(y).T
60
- else:
61
- return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=get_hop_size(), win_length=hp.win_size)
62
-
63
- ##########################################################
64
- #Those are only correct when using lws!!! (This was messing with Wavenet quality for a long time!)
65
- def num_frames(length, fsize, fshift):
66
- """Compute number of time frames of spectrogram
67
- """
68
- pad = (fsize - fshift)
69
- if length % fshift == 0:
70
- M = (length + pad * 2 - fsize) // fshift + 1
71
- else:
72
- M = (length + pad * 2 - fsize) // fshift + 2
73
- return M
74
-
75
-
76
- def pad_lr(x, fsize, fshift):
77
- """Compute left and right padding
78
- """
79
- M = num_frames(len(x), fsize, fshift)
80
- pad = (fsize - fshift)
81
- T = len(x) + 2 * pad
82
- r = (M - 1) * fshift + fsize - T
83
- return pad, pad + r
84
- ##########################################################
85
- #Librosa correct padding
86
- def librosa_pad_lr(x, fsize, fshift):
87
- return 0, (x.shape[0] // fshift + 1) * fshift - x.shape[0]
88
-
89
- # Conversions
90
- _mel_basis = None
91
-
92
- def _linear_to_mel(spectogram):
93
- global _mel_basis
94
- if _mel_basis is None:
95
- _mel_basis = _build_mel_basis()
96
- return np.dot(_mel_basis, spectogram)
97
-
98
- def _build_mel_basis():
99
- assert hp.fmax <= hp.sample_rate // 2
100
- return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels,
101
- fmin=hp.fmin, fmax=hp.fmax)
102
-
103
- def _amp_to_db(x):
104
- min_level = np.exp(hp.min_level_db / 20 * np.log(10))
105
- return 20 * np.log10(np.maximum(min_level, x))
106
-
107
- def _db_to_amp(x):
108
- return np.power(10.0, (x) * 0.05)
109
-
110
- def _normalize(S):
111
- if hp.allow_clipping_in_normalization:
112
- if hp.symmetric_mels:
113
- return np.clip((2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value,
114
- -hp.max_abs_value, hp.max_abs_value)
115
- else:
116
- return np.clip(hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)), 0, hp.max_abs_value)
117
-
118
- assert S.max() <= 0 and S.min() - hp.min_level_db >= 0
119
- if hp.symmetric_mels:
120
- return (2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value
121
- else:
122
- return hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db))
123
-
124
- def _denormalize(D):
125
- if hp.allow_clipping_in_normalization:
126
- if hp.symmetric_mels:
127
- return (((np.clip(D, -hp.max_abs_value,
128
- hp.max_abs_value) + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value))
129
- + hp.min_level_db)
130
- else:
131
- return ((np.clip(D, 0, hp.max_abs_value) * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
132
-
133
- if hp.symmetric_mels:
134
- return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db)
135
- else:
136
- return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
 
1
+ import librosa
2
+ import librosa.filters
3
+ import numpy as np
4
+ # import tensorflow as tf
5
+ from scipy import signal
6
+ from scipy.io import wavfile
7
+ from hparams import hparams as hp
8
+
9
+ def load_wav(path, sr):
10
+ return librosa.core.load(path, sr=sr)[0]
11
+
12
+ def save_wav(wav, path, sr):
13
+ wav *= 32767 / max(0.01, np.max(np.abs(wav)))
14
+ #proposed by @dsmiller
15
+ wavfile.write(path, sr, wav.astype(np.int16))
16
+
17
+ def save_wavenet_wav(wav, path, sr):
18
+ librosa.output.write_wav(path, wav, sr=sr)
19
+
20
+ def preemphasis(wav, k, preemphasize=True):
21
+ if preemphasize:
22
+ return signal.lfilter([1, -k], [1], wav)
23
+ return wav
24
+
25
+ def inv_preemphasis(wav, k, inv_preemphasize=True):
26
+ if inv_preemphasize:
27
+ return signal.lfilter([1], [1, -k], wav)
28
+ return wav
29
+
30
+ def get_hop_size():
31
+ hop_size = hp.hop_size
32
+ if hop_size is None:
33
+ assert hp.frame_shift_ms is not None
34
+ hop_size = int(hp.frame_shift_ms / 1000 * hp.sample_rate)
35
+ return hop_size
36
+
37
+ def linearspectrogram(wav):
38
+ D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
39
+ S = _amp_to_db(np.abs(D)) - hp.ref_level_db
40
+
41
+ if hp.signal_normalization:
42
+ return _normalize(S)
43
+ return S
44
+
45
+ def melspectrogram(wav):
46
+ D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
47
+ S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db
48
+
49
+ if hp.signal_normalization:
50
+ return _normalize(S)
51
+ return S
52
+
53
+ def _lws_processor():
54
+ import lws
55
+ return lws.lws(hp.n_fft, get_hop_size(), fftsize=hp.win_size, mode="speech")
56
+
57
+ def _stft(y):
58
+ if hp.use_lws:
59
+ return _lws_processor(hp).stft(y).T
60
+ else:
61
+ return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=get_hop_size(), win_length=hp.win_size)
62
+
63
+ ##########################################################
64
+ #Those are only correct when using lws!!! (This was messing with Wavenet quality for a long time!)
65
+ def num_frames(length, fsize, fshift):
66
+ """Compute number of time frames of spectrogram
67
+ """
68
+ pad = (fsize - fshift)
69
+ if length % fshift == 0:
70
+ M = (length + pad * 2 - fsize) // fshift + 1
71
+ else:
72
+ M = (length + pad * 2 - fsize) // fshift + 2
73
+ return M
74
+
75
+
76
+ def pad_lr(x, fsize, fshift):
77
+ """Compute left and right padding
78
+ """
79
+ M = num_frames(len(x), fsize, fshift)
80
+ pad = (fsize - fshift)
81
+ T = len(x) + 2 * pad
82
+ r = (M - 1) * fshift + fsize - T
83
+ return pad, pad + r
84
+ ##########################################################
85
+ #Librosa correct padding
86
+ def librosa_pad_lr(x, fsize, fshift):
87
+ return 0, (x.shape[0] // fshift + 1) * fshift - x.shape[0]
88
+
89
+ # Conversions
90
+ _mel_basis = None
91
+
92
+ def _linear_to_mel(spectogram):
93
+ global _mel_basis
94
+ if _mel_basis is None:
95
+ _mel_basis = _build_mel_basis()
96
+ return np.dot(_mel_basis, spectogram)
97
+
98
+ def _build_mel_basis():
99
+ assert hp.fmax <= hp.sample_rate // 2
100
+ return librosa.filters.mel(sr=hp.sample_rate, n_fft=hp.n_fft, n_mels=hp.num_mels,
101
+ fmin=hp.fmin, fmax=hp.fmax)
102
+
103
+ def _amp_to_db(x):
104
+ min_level = np.exp(hp.min_level_db / 20 * np.log(10))
105
+ return 20 * np.log10(np.maximum(min_level, x))
106
+
107
+ def _db_to_amp(x):
108
+ return np.power(10.0, (x) * 0.05)
109
+
110
+ def _normalize(S):
111
+ if hp.allow_clipping_in_normalization:
112
+ if hp.symmetric_mels:
113
+ return np.clip((2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value,
114
+ -hp.max_abs_value, hp.max_abs_value)
115
+ else:
116
+ return np.clip(hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)), 0, hp.max_abs_value)
117
+
118
+ assert S.max() <= 0 and S.min() - hp.min_level_db >= 0
119
+ if hp.symmetric_mels:
120
+ return (2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value
121
+ else:
122
+ return hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db))
123
+
124
+ def _denormalize(D):
125
+ if hp.allow_clipping_in_normalization:
126
+ if hp.symmetric_mels:
127
+ return (((np.clip(D, -hp.max_abs_value,
128
+ hp.max_abs_value) + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value))
129
+ + hp.min_level_db)
130
+ else:
131
+ return ((np.clip(D, 0, hp.max_abs_value) * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
132
+
133
+ if hp.symmetric_mels:
134
+ return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db)
135
+ else:
136
+ return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)