Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import torch | |
import numpy as np | |
from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification | |
# Initialize model and processor | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model_name = "Hatman/audio-emotion-detection" | |
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name) | |
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name) | |
model.to(device) | |
# Define emotion labels | |
EMOTION_LABELS = { | |
0: "angry", | |
1: "disgust", | |
2: "fear", | |
3: "happy", | |
4: "neutral", | |
5: "sad", | |
6: "surprise" | |
} | |
def process_audio(audio): | |
"""Process audio chunk and return emotion""" | |
if audio is None: | |
return "" | |
# Get the audio data | |
if isinstance(audio, tuple): | |
audio = audio[1] | |
# Convert to numpy array if needed | |
audio = np.array(audio) | |
# Ensure we have mono audio | |
if len(audio.shape) > 1: | |
audio = audio.mean(axis=1) | |
try: | |
# Prepare input for the model | |
inputs = feature_extractor( | |
audio, | |
sampling_rate=16000, | |
return_tensors="pt", | |
padding=True | |
) | |
# Move to appropriate device | |
inputs = {k: v.to(device) for k, v in inputs.items()} | |
# Get prediction | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
logits = outputs.logits | |
predicted_id = torch.argmax(logits, dim=-1).item() | |
emotion = EMOTION_LABELS[predicted_id] | |
return emotion | |
except Exception as e: | |
print(f"Error processing audio: {e}") | |
return "Error processing audio" | |
# Create Gradio interface | |
demo = gr.Interface( | |
fn=process_audio, | |
inputs=[ | |
gr.Audio( | |
sources=["microphone"], | |
type="numpy", | |
streaming=True, | |
label="Speak into your microphone", | |
show_label=True | |
) | |
], | |
outputs=gr.Textbox(label="Detected Emotion"), | |
title="Live Emotion Detection", | |
description="Speak into your microphone to detect emotions in real-time.", | |
live=True, | |
allow_flagging=False | |
) | |
# Launch with a small queue for better real-time performance | |
demo.queue(max_size=1).launch(share=True) |