import os import zipfile import numpy as np import librosa import gradio as gr from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score # Update this to your path where you manually downloaded the ZIP file zip_path = "C:\\Users\\GUHAN YADAV\\Downloads\\archive (6).zip" extract_path = "dataset" # Check if the downloaded file is a ZIP file and unzip it if zipfile.is_zipfile(zip_path): with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall(extract_path) else: print("The downloaded file is not a valid zip file!") # Load audio features def extract_features(file_path): audio, sample_rate = librosa.load(file_path, sr=16000) # Load audio at 16 kHz mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40) mfccs_scaled = np.mean(mfccs.T, axis=0) # Average the MFCCs return mfccs_scaled # Load and prepare dataset def load_data(dataset_path): features = [] labels = [] emotions = os.listdir(dataset_path) for emotion in emotions: emotion_dir = os.path.join(dataset_path, emotion) if os.path.isdir(emotion_dir): for filename in os.listdir(emotion_dir): if filename.endswith('.wav'): # Check for .wav files file_path = os.path.join(emotion_dir, filename) extracted_features = extract_features(file_path) features.append(extracted_features) labels.append(emotion) return np.array(features), np.array(labels) # Load your local dataset X, y = load_data(extract_path) # Ensure data is loaded if len(X) == 0 or len(y) == 0: raise ValueError("No audio data loaded. Please check your dataset structure.") # Split the dataset into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Initialize and train the classifier model_rf = RandomForestClassifier(n_estimators=100, random_state=42) model_rf.fit(X_train, y_train) # Validate the model predictions = model_rf.predict(X_test) accuracy = accuracy_score(y_test, predictions) print(f"Model Accuracy: {accuracy * 100:.2f}%") # Function to predict emotion from audio input def predict_emotion(audio): audio = audio.flatten() # Flatten the NumPy array extracted_features = extract_features(audio) # Extract features features_reshaped = extracted_features.reshape(1, -1) # Reshape for prediction predicted_emotion = model_rf.predict(features_reshaped) return predicted_emotion[0] # Return the predicted emotion # Gradio Interface iface = gr.Interface( fn=predict_emotion, inputs=gr.Audio(source="microphone", type="numpy"), # Use microphone input outputs="text", title="Voice Emotion Recognition", description="Speak into the microphone to detect emotion (happy, sad, fear, angry, neutral, disgust)." ) if __name__ == "__main__": iface.launch()