emotion_recon / app.py
Guhanselvam's picture
Update app.py
7712a9f verified
import os
import zipfile
import numpy as np
import librosa
import gradio as gr
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Update this to your path where you manually downloaded the ZIP file
zip_path = "C:\\Users\\GUHAN YADAV\\Downloads\\archive (6).zip"
extract_path = "dataset"
# Check if the downloaded file is a ZIP file and unzip it
if zipfile.is_zipfile(zip_path):
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_path)
else:
print("The downloaded file is not a valid zip file!")
# Load audio features
def extract_features(file_path):
audio, sample_rate = librosa.load(file_path, sr=16000) # Load audio at 16 kHz
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
mfccs_scaled = np.mean(mfccs.T, axis=0) # Average the MFCCs
return mfccs_scaled
# Load and prepare dataset
def load_data(dataset_path):
features = []
labels = []
emotions = os.listdir(dataset_path)
for emotion in emotions:
emotion_dir = os.path.join(dataset_path, emotion)
if os.path.isdir(emotion_dir):
for filename in os.listdir(emotion_dir):
if filename.endswith('.wav'): # Check for .wav files
file_path = os.path.join(emotion_dir, filename)
extracted_features = extract_features(file_path)
features.append(extracted_features)
labels.append(emotion)
return np.array(features), np.array(labels)
# Load your local dataset
X, y = load_data(extract_path)
# Ensure data is loaded
if len(X) == 0 or len(y) == 0:
raise ValueError("No audio data loaded. Please check your dataset structure.")
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize and train the classifier
model_rf = RandomForestClassifier(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)
# Validate the model
predictions = model_rf.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
# Function to predict emotion from audio input
def predict_emotion(audio):
audio = audio.flatten() # Flatten the NumPy array
extracted_features = extract_features(audio) # Extract features
features_reshaped = extracted_features.reshape(1, -1) # Reshape for prediction
predicted_emotion = model_rf.predict(features_reshaped)
return predicted_emotion[0] # Return the predicted emotion
# Gradio Interface
iface = gr.Interface(
fn=predict_emotion,
inputs=gr.Audio(source="microphone", type="numpy"), # Use microphone input
outputs="text",
title="Voice Emotion Recognition",
description="Speak into the microphone to detect emotion (happy, sad, fear, angry, neutral, disgust)."
)
if __name__ == "__main__":
iface.launch()