Spaces:

Boltz79
/

Sentiment-Analysis

Running

App Files Files Community

Boltz79 commited on Jan 31

Commit

786ea23

verified ·

1 Parent(s): 7ac7ed0

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -160

app.py CHANGED Viewed

@@ -1,168 +1,53 @@
 import gradio as gr
-import numpy as np
-import torch
-from transformers import AutoModelForAudioClassification, AutoFeatureExtractor
-import librosa
-import os
-import warnings
-warnings.filterwarnings("ignore")
-class EmotionRecognizer:
-    def __init__(self):
-        # Initialize the model and feature extractor
-        self.model_name = "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
-        self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
-        self.feature_extractor = AutoFeatureExtractor.from_pretrained(self.model_name)
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.model.to(self.device)
-        self.sample_rate = 16000
-        # Define emotion labels
-        self.labels = ['angry', 'happy', 'sad', 'neutral', 'fearful']
-    def process_audio(self, audio):
-        """Process audio and return emotions with confidence scores"""
-        try:
-            # Check if audio is a tuple (new Gradio audio format)
-            if isinstance(audio, tuple):
-                sample_rate, audio_data = audio
-            else:
-                return "Error: Invalid audio format", None
-            # Resample if necessary
-            if sample_rate != self.sample_rate:
-                audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=self.sample_rate)
-            # Convert to float32 if not already
-            audio_data = audio_data.astype(np.float32)
-            # Extract features
-            inputs = self.feature_extractor(
-                audio_data,
-                sampling_rate=self.sample_rate,
-                return_tensors="pt",
-                padding=True
-            ).to(self.device)
-            # Get model predictions
-            with torch.no_grad():
-                outputs = self.model(**inputs)
-                predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
-            # Process results
-            scores = predictions[0].cpu().numpy()
-            results = [
-                {"label": label, "score": float(score)}
-                for label, score in zip(self.labels, scores)
-            ]
-            # Sort by confidence
-            results.sort(key=lambda x: x["score"], reverse=True)
-            # Format results for display
-            output_text = "Emotion Analysis Results:\n\n"
-            output_text += "\n".join([
-                f"{result['label'].title()}: {result['score']*100:.2f}%"
-                for result in results
-            ])
-            # Prepare plot data
-            plot_data = {
-                "labels": [r["label"].title() for r in results],
-                "values": [r["score"] * 100 for r in results]
-            }
-            return output_text, plot_data
-        except Exception as e:
-            return f"Error processing audio: {str(e)}", None
-def create_interface():
-    # Initialize the emotion recognizer
-    recognizer = EmotionRecognizer()
-    # Define processing function for Gradio
-    def process_audio_file(audio):
-        if audio is None:
-            return "Please provide an audio input.", None
-        output_text, plot_data = recognizer.process_audio(audio)
-        if plot_data is not None:
-            return (
-                output_text,
-                gr.BarPlot.update(
-                    value=plot_data,
-                    x="labels",
-                    y="values",
-                    title="Emotion Confidence Scores",
-                    x_title="Emotions",
-                    y_title="Confidence (%)"
-                )
-            )
-        return output_text, None
-    # Create the Gradio interface
-    with gr.Blocks(title="Audio Emotion Recognition") as interface:
-        gr.Markdown("# 🎭 Audio Emotion Recognition")
-        gr.Markdown("""
-        Upload an audio file or record directly to analyze the emotional content.
-        The model will detect emotions like angry, happy, sad, neutral, and fearful.
-        """)
-        with gr.Row():
-            with gr.Column():
-                # Input audio component (updated format)
-                audio_input = gr.Audio(
-                    label="Upload or Record Audio",
-                    type="numpy",
-                    sources=["microphone", "upload"]
-                )
-                # Process button
-                process_btn = gr.Button("Analyze Emotion", variant="primary")
-            with gr.Column():
-                # Output components
-                output_text = gr.Textbox(
-                    label="Analysis Results",
-                    lines=6
-                )
-                output_plot = gr.BarPlot(
-                    title="Emotion Confidence Scores",
-                    x_title="Emotions",
-                    y_title="Confidence (%)"
-                )
-        # Set up event handler
-        process_btn.click(
-            fn=process_audio_file,
-            inputs=[audio_input],
-            outputs=[output_text, output_plot]
-        )
-        gr.Markdown("""
-        ### Usage Instructions:
-        1. Click the microphone button to record audio or upload an audio file
-        2. Click "Analyze Emotion" to process the audio
-        3. View the results and confidence scores
-        ### Notes:
-        - For best results, ensure clear audio with minimal background noise
-        - Speak naturally and clearly when recording
-        - The model works best with speech in English
-        """)
-    return interface
-def main():
-    # Create and launch the interface
-    interface = create_interface()
-    interface.launch(
-        share=True,
-        server_name="0.0.0.0",
-        server_port=7860
     )
-if __name__ == "__main__":
-    main()

 import gradio as gr
+from transformers import pipeline
+# Load Whisper for speech-to-text
+whisper = pipeline("automatic-speech-recognition", model="openai/whisper-medium")
+# Load a sentiment analysis model
+sentiment_analyzer = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")
+# Function to process audio and analyze tone
+def analyze_call(audio_file):
+    try:
+        # Step 1: Transcribe audio to text using Whisper
+        transcription = whisper(audio_file)["text"]
+        # Step 2: Analyze sentiment of the transcription
+        sentiment_result = sentiment_analyzer(transcription)[0]
+        # Prepare the output
+        output = {
+            "transcription": transcription,
+            "sentiment": sentiment_result["label"],
+            "confidence": round(sentiment_result["score"], 4)
+        }
+        return output
+    except Exception as e:
+        return {"error": str(e)}
+# Gradio Interface
+def gradio_interface(audio):
+    if audio is None:
+        return "Please record or upload an audio file."
+    result = analyze_call(audio)
+    if "error" in result:
+        return f"Error: {result['error']}"
+    return (
+        f"**Transcription:** {result['transcription']}\n\n"
+        f"**Sentiment:** {result['sentiment']}\n\n"
+        f"**Confidence:** {result['confidence']}"
     )
+# Create Gradio app
+interface = gr.Interface(
+    fn=gradio_interface,
+    inputs=gr.Audio(type="filepath", label="Record or Upload Audio"),
+    outputs=gr.Textbox(label="Analysis Result", lines=5),
+    title="Real-Time Call Analysis",
+    description="Record or upload audio to analyze tone and sentiment in real time.",
+    live=False  # Set to False to avoid constant re-runs
+)
+# Launch the app
+interface.launch()