Boltz79 commited on
Commit
cb9a254
·
verified ·
1 Parent(s): 43e873e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -87
app.py CHANGED
@@ -1,132 +1,168 @@
1
  import gradio as gr
2
  import numpy as np
3
- from textblob import TextBlob
4
- import speech_recognition as sr
 
 
 
 
5
 
6
- class SentimentAnalyzer:
7
  def __init__(self):
8
- self.recognizer = sr.Recognizer()
9
-
10
- def audio_to_text(self, audio):
11
- """Convert audio to text using speech recognition"""
12
- try:
13
- # Get audio data from Gradio input
14
- sample_rate, audio_data = audio
15
-
16
- # Convert audio data to audio file format that speech_recognition can use
17
- import io
18
- import scipy.io.wavfile as wav
19
- byte_io = io.BytesIO()
20
- wav.write(byte_io, sample_rate, audio_data.astype(np.int16))
21
- byte_io.seek(0)
22
-
23
- # Use speech recognition
24
- with sr.AudioFile(byte_io) as source:
25
- audio_data = self.recognizer.record(source)
26
- text = self.recognizer.recognize_google(audio_data)
27
- return text
28
- except Exception as e:
29
- return f"Error in speech recognition: {str(e)}"
30
-
31
- def analyze_sentiment(self, text):
32
- """Analyze sentiment using TextBlob"""
33
  try:
34
- blob = TextBlob(text)
35
- # Get polarity (-1 to 1) and subjectivity (0 to 1)
36
- polarity = blob.sentiment.polarity
37
- subjectivity = blob.sentiment.subjectivity
38
-
39
- # Determine sentiment category
40
- if polarity > 0:
41
- sentiment = "Positive"
42
- elif polarity < 0:
43
- sentiment = "Negative"
44
  else:
45
- sentiment = "Neutral"
46
-
47
- # Format results
48
- results_text = f"""
49
- Detected Text: "{text}"
50
-
51
- Analysis Results:
52
- - Overall Sentiment: {sentiment}
53
- - Polarity Score: {polarity:.2f} (-1 to +1)
54
- - Subjectivity Score: {subjectivity:.2f} (0 to 1)
55
- """
56
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  # Prepare plot data
58
  plot_data = {
59
- "labels": ["Polarity", "Subjectivity"],
60
- "values": [polarity * 100, subjectivity * 100] # Convert to percentage for visualization
61
  }
62
-
63
- return results_text, plot_data
64
-
65
  except Exception as e:
66
- return f"Error in sentiment analysis: {str(e)}", None
67
 
68
  def create_interface():
69
- analyzer = SentimentAnalyzer()
 
70
 
71
- def process_audio(audio):
 
72
  if audio is None:
73
  return "Please provide an audio input.", None
 
 
74
 
75
- # Convert audio to text
76
- text = analyzer.audio_to_text(audio)
77
- if text.startswith("Error"):
78
- return text, None
79
-
80
- # Analyze sentiment
81
- return analyzer.analyze_sentiment(text)
 
 
 
 
 
 
82
 
83
- # Create Gradio interface
84
- with gr.Blocks() as interface:
85
- gr.Markdown("# 🎤 Speech Sentiment Analysis")
86
  gr.Markdown("""
87
- Speak or upload an audio file to analyze its emotional content.
88
- The system will convert speech to text and analyze the sentiment.
89
  """)
90
 
91
  with gr.Row():
92
  with gr.Column():
 
93
  audio_input = gr.Audio(
94
  label="Upload or Record Audio",
95
  type="numpy",
96
  sources=["microphone", "upload"]
97
  )
98
- analyze_btn = gr.Button("Analyze Sentiment")
 
 
99
 
100
  with gr.Column():
 
101
  output_text = gr.Textbox(
102
  label="Analysis Results",
103
- lines=8
104
  )
105
  output_plot = gr.BarPlot(
106
- title="Sentiment Scores",
107
- x_title="Metrics",
108
- y_title="Score (%)"
109
  )
110
 
111
- analyze_btn.click(
112
- fn=process_audio,
 
113
  inputs=[audio_input],
114
  outputs=[output_text, output_plot]
115
  )
116
 
117
  gr.Markdown("""
118
- ### How to Use:
119
- 1. Click the microphone button to record or upload an audio file
120
- 2. Click "Analyze Sentiment" to process
121
- 3. View the results showing:
122
- - Detected text from speech
123
- - Overall sentiment (Positive/Negative/Neutral)
124
- - Polarity score (-100% to +100%)
125
- - Subjectivity score (0% to 100%)
 
126
  """)
127
 
128
  return interface
129
 
 
 
 
 
 
 
 
 
 
130
  if __name__ == "__main__":
131
- demo = create_interface()
132
- demo.launch(share=True)
 
1
  import gradio as gr
2
  import numpy as np
3
+ import torch
4
+ from transformers import AutoModelForAudioClassification, AutoFeatureExtractor
5
+ import librosa
6
+ import os
7
+ import warnings
8
+ warnings.filterwarnings("ignore")
9
 
10
+ class EmotionRecognizer:
11
  def __init__(self):
12
+ # Initialize the model and feature extractor
13
+ self.model_name = "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
14
+ self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
15
+ self.feature_extractor = AutoFeatureExtractor.from_pretrained(self.model_name)
16
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
17
+ self.model.to(self.device)
18
+ self.sample_rate = 16000
19
+
20
+ # Define emotion labels
21
+ self.labels = ['angry', 'happy', 'sad', 'neutral', 'fearful']
22
+
23
+ def process_audio(self, audio):
24
+ """Process audio and return emotions with confidence scores"""
 
 
 
 
 
 
 
 
 
 
 
 
25
  try:
26
+ # Check if audio is a tuple (new Gradio audio format)
27
+ if isinstance(audio, tuple):
28
+ sample_rate, audio_data = audio
 
 
 
 
 
 
 
29
  else:
30
+ return "Error: Invalid audio format", None
31
+
32
+ # Resample if necessary
33
+ if sample_rate != self.sample_rate:
34
+ audio_data = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=self.sample_rate)
35
+
36
+ # Convert to float32 if not already
37
+ audio_data = audio_data.astype(np.float32)
38
+
39
+ # Extract features
40
+ inputs = self.feature_extractor(
41
+ audio_data,
42
+ sampling_rate=self.sample_rate,
43
+ return_tensors="pt",
44
+ padding=True
45
+ ).to(self.device)
46
+
47
+ # Get model predictions
48
+ with torch.no_grad():
49
+ outputs = self.model(**inputs)
50
+ predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
51
+
52
+ # Process results
53
+ scores = predictions[0].cpu().numpy()
54
+ results = [
55
+ {"label": label, "score": float(score)}
56
+ for label, score in zip(self.labels, scores)
57
+ ]
58
+
59
+ # Sort by confidence
60
+ results.sort(key=lambda x: x["score"], reverse=True)
61
+
62
+ # Format results for display
63
+ output_text = "Emotion Analysis Results:\n\n"
64
+ output_text += "\n".join([
65
+ f"{result['label'].title()}: {result['score']*100:.2f}%"
66
+ for result in results
67
+ ])
68
+
69
  # Prepare plot data
70
  plot_data = {
71
+ "labels": [r["label"].title() for r in results],
72
+ "values": [r["score"] * 100 for r in results]
73
  }
74
+
75
+ return output_text, plot_data
76
+
77
  except Exception as e:
78
+ return f"Error processing audio: {str(e)}", None
79
 
80
  def create_interface():
81
+ # Initialize the emotion recognizer
82
+ recognizer = EmotionRecognizer()
83
 
84
+ # Define processing function for Gradio
85
+ def process_audio_file(audio):
86
  if audio is None:
87
  return "Please provide an audio input.", None
88
+
89
+ output_text, plot_data = recognizer.process_audio(audio)
90
 
91
+ if plot_data is not None:
92
+ return (
93
+ output_text,
94
+ gr.BarPlot.update(
95
+ value=plot_data,
96
+ x="labels",
97
+ y="values",
98
+ title="Emotion Confidence Scores",
99
+ x_title="Emotions",
100
+ y_title="Confidence (%)"
101
+ )
102
+ )
103
+ return output_text, None
104
 
105
+ # Create the Gradio interface
106
+ with gr.Blocks(title="Audio Emotion Recognition") as interface:
107
+ gr.Markdown("# 🎭 Audio Emotion Recognition")
108
  gr.Markdown("""
109
+ Upload an audio file or record directly to analyze the emotional content.
110
+ The model will detect emotions like angry, happy, sad, neutral, and fearful.
111
  """)
112
 
113
  with gr.Row():
114
  with gr.Column():
115
+ # Input audio component (updated format)
116
  audio_input = gr.Audio(
117
  label="Upload or Record Audio",
118
  type="numpy",
119
  sources=["microphone", "upload"]
120
  )
121
+
122
+ # Process button
123
+ process_btn = gr.Button("Analyze Emotion", variant="primary")
124
 
125
  with gr.Column():
126
+ # Output components
127
  output_text = gr.Textbox(
128
  label="Analysis Results",
129
+ lines=6
130
  )
131
  output_plot = gr.BarPlot(
132
+ title="Emotion Confidence Scores",
133
+ x_title="Emotions",
134
+ y_title="Confidence (%)"
135
  )
136
 
137
+ # Set up event handler
138
+ process_btn.click(
139
+ fn=process_audio_file,
140
  inputs=[audio_input],
141
  outputs=[output_text, output_plot]
142
  )
143
 
144
  gr.Markdown("""
145
+ ### Usage Instructions:
146
+ 1. Click the microphone button to record audio or upload an audio file
147
+ 2. Click "Analyze Emotion" to process the audio
148
+ 3. View the results and confidence scores
149
+
150
+ ### Notes:
151
+ - For best results, ensure clear audio with minimal background noise
152
+ - Speak naturally and clearly when recording
153
+ - The model works best with speech in English
154
  """)
155
 
156
  return interface
157
 
158
+ def main():
159
+ # Create and launch the interface
160
+ interface = create_interface()
161
+ interface.launch(
162
+ share=True,
163
+ server_name="0.0.0.0",
164
+ server_port=7860
165
+ )
166
+
167
  if __name__ == "__main__":
168
+ main()