Spaces:
Sleeping
Sleeping
import librosa | |
import numpy as np | |
def extract_features(file_path): | |
try: | |
print(f"π’ Processing file: {file_path}") | |
# Load audio with fixed sample rate | |
y, sr = librosa.load(file_path, sr=16000) | |
# Trim or pad audio to exactly 5 seconds | |
target_length = sr * 5 | |
if len(y) > target_length: | |
start_sample = np.random.randint(0, len(y) - target_length) | |
y = y[start_sample:start_sample + target_length] | |
elif len(y) < target_length: | |
y = np.pad(y, (0, target_length - len(y)), mode='constant') | |
print("β Audio loaded and standardized (5s, 16kHz)") | |
# Extract features | |
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) | |
chroma = librosa.feature.chroma_stft(y=y, sr=sr) | |
spec_contrast = librosa.feature.spectral_contrast(y=y, sr=sr) | |
zcr = librosa.feature.zero_crossing_rate(y) | |
rms = librosa.feature.rms(y=y) | |
centroid = librosa.feature.spectral_centroid(y=y, sr=sr) | |
bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr) | |
rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr) | |
hnr = librosa.effects.harmonic(y) | |
pitches, _ = librosa.piptrack(y=y, sr=sr) | |
# Aggregate features (mean + std for each feature) | |
features = { | |
"mfcc": np.concatenate([np.mean(mfcc, axis=1), np.std(mfcc, axis=1)]), | |
"chroma": np.concatenate([np.mean(chroma, axis=1), np.std(chroma, axis=1)]), | |
"spectral_contrast": np.concatenate([np.mean(spec_contrast, axis=1), np.std(spec_contrast, axis=1)]), | |
"zcr": [np.mean(zcr), np.std(zcr)], | |
"rms": [np.mean(rms), np.std(rms)], | |
"centroid": [np.mean(centroid), np.std(centroid)], | |
"bandwidth": [np.mean(bandwidth), np.std(bandwidth)], | |
"rolloff": [np.mean(rolloff), np.std(rolloff)], | |
"hnr": [np.mean(hnr), np.std(hnr)], | |
"pitch": [np.mean(pitches), np.std(pitches)] | |
} | |
# Flatten all feature arrays into a single list | |
feature_vector = [] | |
for value in features.values(): | |
feature_vector.extend(value) | |
print(f"β Features successfully extracted and formatted (Total: {len(feature_vector)} features)") | |
return feature_vector | |
except Exception as e: | |
print(f"β Error extracting features from {file_path}: {e}") | |
return None |