Spaces:
Runtime error
Runtime error
import librosa | |
import numpy as np | |
from typing import Dict, Tuple | |
class AudioProcessor: | |
def __init__(self): | |
self.sample_rate = 16000 | |
self.n_mfcc = 13 | |
self.n_mels = 128 | |
def process_audio(self, audio_path: str) -> Tuple[np.ndarray, Dict]: | |
# Load and preprocess audio | |
waveform, sr = librosa.load(audio_path, sr=self.sample_rate) | |
# Extract features | |
features = { | |
'mfcc': self._extract_mfcc(waveform), | |
'pitch': self._extract_pitch(waveform), | |
'energy': self._extract_energy(waveform) | |
} | |
return waveform, features | |
def _extract_mfcc(self, waveform: np.ndarray) -> np.ndarray: | |
mfccs = librosa.feature.mfcc( | |
y=waveform, | |
sr=self.sample_rate, | |
n_mfcc=self.n_mfcc | |
) | |
return mfccs.mean(axis=1) | |
def _extract_pitch(self, waveform: np.ndarray) -> Dict: | |
f0, voiced_flag, voiced_probs = librosa.pyin( | |
waveform, | |
fmin=librosa.note_to_hz('C2'), | |
fmax=librosa.note_to_hz('C7'), | |
sr=self.sample_rate | |
) | |
return { | |
'mean': float(np.nanmean(f0)), | |
'std': float(np.nanstd(f0)), | |
'max': float(np.nanmax(f0)), | |
'min': float(np.nanmin(f0)) | |
} | |
def _extract_energy(self, waveform: np.ndarray) -> Dict: | |
rms = librosa.feature.rms(y=waveform)[0] | |
return { | |
'mean': float(np.mean(rms)), | |
'std': float(np.std(rms)), | |
'max': float(np.max(rms)), | |
'min': float(np.min(rms)) | |
} |