Spaces:
Running
on
T4
Running
on
T4
import librosa | |
import numpy as np | |
mel_window_length = 25 | |
mel_window_step = 10 | |
mel_n_channels = 40 | |
sampling_rate = 16000 | |
def wav_to_mel_spectrogram(wav): | |
""" | |
Derives a mel spectrogram ready to be used by the encoder from a preprocessed audio waveform. | |
Note: this not a log-mel spectrogram. | |
""" | |
frames = librosa.feature.melspectrogram( | |
y=wav, | |
sr=sampling_rate, | |
n_fft=int(sampling_rate * mel_window_length / 1000), | |
hop_length=int(sampling_rate * mel_window_step / 1000), | |
n_mels=mel_n_channels, | |
) | |
return frames.astype(np.float32).T | |