Spaces:
Sleeping
Sleeping
import gradio as gr | |
import numpy as np | |
import torch | |
from transformers import pipeline | |
import librosa | |
import soundfile as sf | |
class EmotionRecognizer: | |
def __init__(self): | |
self.classifier = pipeline( | |
"audio-classification", | |
model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition", | |
device=0 if torch.cuda.is_available() else -1 | |
) | |
self.target_sr = 16000 | |
self.max_duration = 10 | |
def process_audio(self, audio_path): | |
try: | |
audio, orig_sr = sf.read(audio_path) | |
if len(audio.shape) > 1: | |
audio = np.mean(audio, axis=1) | |
if orig_sr != self.target_sr: | |
audio = librosa.resample( | |
y=audio.astype(np.float32), | |
orig_sr=orig_sr, | |
target_sr=self.target_sr | |
) | |
else: | |
audio = audio.astype(np.float32) | |
audio = librosa.util.normalize(audio) | |
max_samples = self.max_duration * self.target_sr | |
if len(audio) > max_samples: | |
audio = audio[:max_samples] | |
else: | |
audio = np.pad(audio, (0, max(0, max_samples - len(audio)))) | |
results = self.classifier( | |
{"array": audio, "sampling_rate": self.target_sr} | |
) | |
labels = [res["label"] for res in results] | |
scores = [res["score"] * 100 for res in results] | |
text_output = "\n".join([f"{label}: {score:.2f}%" for label, score in zip(labels, scores)]) | |
plot_data = {"labels": labels, "values": scores} | |
return text_output, plot_data | |
except Exception as e: | |
return f"Error processing audio: {str(e)}", None | |
def create_interface(): | |
recognizer = EmotionRecognizer() | |
with gr.Blocks(title="Audio Emotion Recognition") as interface: | |
gr.Markdown("# 🎙️ Audio Emotion Recognition") | |
gr.Markdown("Record or upload English speech (3-10 seconds)") | |
with gr.Row(): | |
with gr.Column(): | |
audio_input = gr.Audio( | |
sources=["microphone", "upload"], | |
type="filepath", | |
label="Input Audio" | |
) | |
submit_btn = gr.Button("Analyze", variant="primary") | |
with gr.Column(): | |
text_output = gr.Textbox(label="Results", interactive=False) | |
plot_output = gr.BarPlot( | |
label="Confidence Scores", | |
x="labels", | |
y="values", | |
color="labels", | |
height=300 | |
) | |
submit_btn.click( | |
fn=recognizer.process_audio, | |
inputs=audio_input, | |
outputs=[text_output, plot_output] | |
) | |
return interface | |
if __name__ == "__main__": | |
demo = create_interface() | |
demo.launch() |