Boltz79's picture
Update app.py
ba147ac verified
raw
history blame
3.07 kB
import gradio as gr
import numpy as np
import torch
from transformers import pipeline
import librosa
import soundfile as sf
class EmotionRecognizer:
def __init__(self):
self.classifier = pipeline(
"audio-classification",
model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition",
device=0 if torch.cuda.is_available() else -1
)
self.target_sr = 16000
self.max_duration = 10
def process_audio(self, audio_path):
try:
audio, orig_sr = sf.read(audio_path)
if len(audio.shape) > 1:
audio = np.mean(audio, axis=1)
if orig_sr != self.target_sr:
audio = librosa.resample(
y=audio.astype(np.float32),
orig_sr=orig_sr,
target_sr=self.target_sr
)
else:
audio = audio.astype(np.float32)
audio = librosa.util.normalize(audio)
max_samples = self.max_duration * self.target_sr
if len(audio) > max_samples:
audio = audio[:max_samples]
else:
audio = np.pad(audio, (0, max(0, max_samples - len(audio))))
results = self.classifier(
{"array": audio, "sampling_rate": self.target_sr}
)
labels = [res["label"] for res in results]
scores = [res["score"] * 100 for res in results]
text_output = "\n".join([f"{label}: {score:.2f}%" for label, score in zip(labels, scores)])
plot_data = {"labels": labels, "values": scores}
return text_output, plot_data
except Exception as e:
return f"Error processing audio: {str(e)}", None
def create_interface():
recognizer = EmotionRecognizer()
with gr.Blocks(title="Audio Emotion Recognition") as interface:
gr.Markdown("# 🎙️ Audio Emotion Recognition")
gr.Markdown("Record or upload English speech (3-10 seconds)")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="Input Audio"
)
submit_btn = gr.Button("Analyze", variant="primary")
with gr.Column():
text_output = gr.Textbox(label="Results", interactive=False)
plot_output = gr.BarPlot(
label="Confidence Scores",
x="labels",
y="values",
color="labels",
height=300
)
submit_btn.click(
fn=recognizer.process_audio,
inputs=audio_input,
outputs=[text_output, plot_output]
)
return interface
if __name__ == "__main__":
demo = create_interface()
demo.launch()