Upload feature extractor
Browse files- feature_extraction_maest.py +15 -15
- preprocessor_config.json +0 -0
feature_extraction_maest.py
CHANGED
@@ -99,12 +99,21 @@ class MAESTFeatureExtractor(SequenceFeatureExtractor):
|
|
99 |
self.std = std
|
100 |
self.return_attention_mask = return_attention_mask
|
101 |
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
window_length=self.n_fft,
|
104 |
name="hann",
|
105 |
-
)
|
106 |
|
107 |
-
|
108 |
num_frequency_bins=self.n_fft // 2 + 1,
|
109 |
num_mel_filters=self.num_mel_bins,
|
110 |
min_frequency=0,
|
@@ -112,24 +121,15 @@ class MAESTFeatureExtractor(SequenceFeatureExtractor):
|
|
112 |
sampling_rate=self.sampling_rate,
|
113 |
norm="slaney",
|
114 |
mel_scale="slaney",
|
115 |
-
)
|
116 |
-
|
117 |
-
def _extract_fbank_features(
|
118 |
-
self,
|
119 |
-
waveform: np.ndarray,
|
120 |
-
max_length: int,
|
121 |
-
) -> np.ndarray:
|
122 |
-
"""
|
123 |
-
Get mel-spectrogram features using audio_utils.
|
124 |
-
"""
|
125 |
|
126 |
melspec = spectrogram(
|
127 |
waveform,
|
128 |
-
window=
|
129 |
frame_length=self.n_fft,
|
130 |
hop_length=self.hop_length,
|
131 |
power=2,
|
132 |
-
mel_filters=
|
133 |
min_value=1e-30,
|
134 |
mel_floor=1e-30,
|
135 |
pad_mode="constant",
|
|
|
99 |
self.std = std
|
100 |
self.return_attention_mask = return_attention_mask
|
101 |
|
102 |
+
def _extract_fbank_features(
|
103 |
+
self,
|
104 |
+
waveform: np.ndarray,
|
105 |
+
max_length: int,
|
106 |
+
) -> np.ndarray:
|
107 |
+
"""
|
108 |
+
Get mel-spectrogram features using audio_utils.
|
109 |
+
"""
|
110 |
+
|
111 |
+
window = window_function(
|
112 |
window_length=self.n_fft,
|
113 |
name="hann",
|
114 |
+
)
|
115 |
|
116 |
+
mel_fb = mel_filter_bank(
|
117 |
num_frequency_bins=self.n_fft // 2 + 1,
|
118 |
num_mel_filters=self.num_mel_bins,
|
119 |
min_frequency=0,
|
|
|
121 |
sampling_rate=self.sampling_rate,
|
122 |
norm="slaney",
|
123 |
mel_scale="slaney",
|
124 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
melspec = spectrogram(
|
127 |
waveform,
|
128 |
+
window=window,
|
129 |
frame_length=self.n_fft,
|
130 |
hop_length=self.hop_length,
|
131 |
power=2,
|
132 |
+
mel_filters=mel_fb,
|
133 |
min_value=1e-30,
|
134 |
mel_floor=1e-30,
|
135 |
pad_mode="constant",
|
preprocessor_config.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|