import numpy as np import os import logging import soundfile as sf import librosa import traceback logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) def extract_features(file_path): """Extract audio features from a file.""" try: logger.info(f"Starting feature extraction for: {file_path}") # Verify file exists if not os.path.exists(file_path): logger.error(f"File does not exist: {file_path}") return None # Verify file format try: with sf.SoundFile(file_path) as sf_file: logger.info(f"Audio file info: {sf_file.samplerate}Hz, {sf_file.channels} channels") except Exception as e: logger.error(f"Error reading audio file with soundfile: {str(e)}\n{traceback.format_exc()}") return None # Load audio file with error handling try: logger.info("Loading audio file...") y, sr = librosa.load(file_path, duration=30, sr=None) if len(y) == 0: logger.error("Audio file is empty") return None logger.info(f"Successfully loaded audio: {len(y)} samples, {sr}Hz sample rate") except Exception as e: logger.error(f"Error loading audio: {str(e)}\n{traceback.format_exc()}") return None # Ensure minimum duration duration = len(y) / sr logger.info(f"Audio duration: {duration:.2f} seconds") if duration < 1.0: logger.error("Audio file is too short (less than 1 second)") return None features_dict = {} try: # 1. MFCC (13 features x 2 = 26) logger.info("Extracting MFCC features...") mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) features_dict['mfccs_mean'] = np.mean(mfccs, axis=1) features_dict['mfccs_var'] = np.var(mfccs, axis=1) logger.info(f"MFCC features shape: {mfccs.shape}") except Exception as e: logger.error(f"Error extracting MFCC: {str(e)}\n{traceback.format_exc()}") return None try: # 2. Chroma Features logger.info("Extracting chroma features...") chroma = librosa.feature.chroma_stft(y=y, sr=sr) features_dict['chroma'] = np.mean(chroma, axis=1) logger.info(f"Chroma features shape: {chroma.shape}") except Exception as e: logger.error(f"Error extracting chroma features: {str(e)}\n{traceback.format_exc()}") return None # Combine all features try: logger.info("Combining features...") features = np.concatenate([ features_dict['mfccs_mean'], features_dict['mfccs_var'], features_dict['chroma'] ]) logger.info(f"Final feature vector shape: {features.shape}") return features except Exception as e: logger.error(f"Error combining features: {str(e)}\n{traceback.format_exc()}") return None except Exception as e: logger.error(f"Unexpected error in feature extraction: {str(e)}\n{traceback.format_exc()}") return None