yuta0306
first commit
565faca
raw
history blame
No virus
599 Bytes
import librosa
import numpy as np
mel_window_length = 25
mel_window_step = 10
mel_n_channels = 40
sampling_rate = 16000
def wav_to_mel_spectrogram(wav):
"""
Derives a mel spectrogram ready to be used by the encoder from a preprocessed audio waveform.
Note: this not a log-mel spectrogram.
"""
frames = librosa.feature.melspectrogram(
y=wav,
sr=sampling_rate,
n_fft=int(sampling_rate * mel_window_length / 1000),
hop_length=int(sampling_rate * mel_window_step / 1000),
n_mels=mel_n_channels,
)
return frames.astype(np.float32).T