rayespinozah commited on
Commit
dc86169
Β·
1 Parent(s): 4c150fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +142 -34
app.py CHANGED
@@ -1,6 +1,17 @@
 
 
1
  import gradio as gr
2
  import whisper
3
  from transformers import pipeline
 
 
 
 
 
 
 
 
 
4
 
5
  model = whisper.load_model("base")
6
  sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")
@@ -71,48 +82,145 @@ def inference(audio, sentiment_option):
71
 
72
  return lang.upper(), result.text, sentiment_output
73
 
74
- title = """<h1 align="center">🎀 Multilingual ASR πŸ’¬</h1>"""
75
- image_path = "thmbnail.jpg"
 
76
  description = """
77
- πŸ’» This demo showcases a general-purpose speech recognition model called Whisper. It is trained on a large dataset of diverse audio and supports multilingual speech recognition, speech translation, and language identification tasks.<br><br>
78
- <br>
79
- βš™οΈ Components of the tool:<br>
80
- <br>
81
- &nbsp;&nbsp;&nbsp;&nbsp; - Real-time multilingual speech recognition<br>
82
- &nbsp;&nbsp;&nbsp;&nbsp; - Language identification<br>
83
- &nbsp;&nbsp;&nbsp;&nbsp; - Sentiment analysis of the transcriptions<br>
84
- <br>
85
- 🎯 The sentiment analysis results are provided as a dictionary with different emotions and their corresponding scores.<br>
86
- <br>
87
- πŸ˜ƒ The sentiment analysis results are displayed with emojis representing the corresponding sentiment.<br>
88
- <br>
89
- βœ… The higher the score for a specific emotion, the stronger the presence of that emotion in the transcribed text.<br>
90
- <br>
91
- ❓ Use the microphone for real-time speech recognition.<br>
92
  <br>
93
- ⚑️ The model will transcribe the audio and perform sentiment analysis on the transcribed text.<br>
94
  """
95
 
96
  custom_css = """
97
- #banner-image {
98
- display: block;
99
  margin-left: auto;
100
  margin-right: auto;
101
  }
102
- #chat-message {
103
- font-size: 14px;
104
- min-height: 300px;
 
 
 
 
 
 
105
  }
 
106
  """
107
 
108
- block = gr.Blocks(css=custom_css)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
  with block:
111
  gr.HTML(title)
 
112
 
113
  with gr.Row():
114
- with gr.Column():
115
- gr.Image(image_path, elem_id="banner-image", show_label=False)
 
 
 
116
  with gr.Column():
117
  gr.HTML(description)
118
 
@@ -120,7 +228,7 @@ with block:
120
  with gr.Box():
121
  audio = gr.Audio(
122
  label="Input Audio",
123
- show_label=False,
124
  source="microphone",
125
  type="filepath"
126
  )
@@ -131,21 +239,21 @@ with block:
131
  default="Sentiment Only"
132
  )
133
 
134
- btn = gr.Button("Transcribe")
135
 
136
- lang_str = gr.Textbox(label="Language")
137
 
138
- text = gr.Textbox(label="Transcription")
139
 
140
- sentiment_output = gr.Textbox(label="Sentiment Analysis Results", output=True)
141
 
142
  btn.click(inference, inputs=[audio, sentiment_option], outputs=[lang_str, text, sentiment_output])
143
 
144
  gr.HTML('''
145
  <div class="footer">
146
- <p>Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">Ray Espinoza</a>
147
  </p>
148
  </div>
149
- ''')
150
 
151
- block.launch()
 
1
+ from __future__ import annotations
2
+
3
  import gradio as gr
4
  import whisper
5
  from transformers import pipeline
6
+ from gradio.themes.base import Base
7
+ from gradio.themes.utils import colors, fonts, sizes
8
+ from typing import Iterable
9
+ import os
10
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
11
+ import matplotlib
12
+ #matplotlib.use('TkAgg')
13
+ import matplotlib.pyplot as plt
14
+
15
 
16
  model = whisper.load_model("base")
17
  sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")
 
82
 
83
  return lang.upper(), result.text, sentiment_output
84
 
85
+ title = """<h1 align="center">Audio Sentiment Analysis</h1>"""
86
+ subtitle = """<h6 align="center">Automatic Speech Recognition</h6>"""
87
+ image_path = "/content/drive/MyDrive/Colab Notebooks/Arquitecture_W.jpg"
88
  description = """
89
+ <p align="justify">With cross-modal interaction and AI (tools and pre-trained models in NLP), we can analyze large audio data
90
+ in real-time, such as recorded conversations, customer service calls, or voice recordings, in order to identify and categorize
91
+ emotions (from positive and neutral to sad and angry.</p><br>
92
+
93
+ Components of the tool:<br>
94
+ &nbsp;&nbsp;&nbsp;&nbsp; - Input: Real-time multilingual<br>
95
+ &nbsp;&nbsp;&nbsp;&nbsp; - Video Call speech recognition<br>
96
+ &nbsp;&nbsp;&nbsp;&nbsp; - Pre-trained model: Whisper<br>
97
+ &nbsp;&nbsp;&nbsp;&nbsp; - Model size: Large with 769M Parameters<br>
98
+ &nbsp;&nbsp;&nbsp;&nbsp; - Encoder/Decoder Arquitecture <br>
99
+ &nbsp;&nbsp;&nbsp;&nbsp; - Transcribe, Translate, and Identify Audio<br>
100
+ &nbsp;&nbsp;&nbsp;&nbsp; - Output: Sentiment analysis<br>
 
 
 
101
  <br>
 
102
  """
103
 
104
  custom_css = """
105
+ banner-image {
 
106
  margin-left: auto;
107
  margin-right: auto;
108
  }
109
+ chat-message {
110
+ font-size: 300px;
111
+ min-height: 600px;
112
+ }
113
+
114
+ img {
115
+ border-radius: 8px;
116
+ max-width: 100%;
117
+ height: auto;
118
  }
119
+
120
  """
121
 
122
+ #-----Themes config:
123
+
124
+ class Seafoam(Base):
125
+ def __init__(
126
+ self,
127
+ *,
128
+ primary_hue: colors.Color | str = colors.emerald,
129
+ secondary_hue: colors.Color | str = colors.blue,
130
+ neutral_hue: colors.Color | str = colors.blue,
131
+ spacing_size: sizes.Size | str = sizes.spacing_md,
132
+ radius_size: sizes.Size | str = sizes.radius_md,
133
+ text_size: sizes.Size | str = sizes.text_lg,
134
+ font: fonts.Font
135
+ | str
136
+ | Iterable[fonts.Font | str] = (
137
+ fonts.GoogleFont("Quicksand"),
138
+ "ui-sans-serif",
139
+ "sans-serif",
140
+ ),
141
+ font_mono: fonts.Font
142
+ | str
143
+ | Iterable[fonts.Font | str] = (
144
+ fonts.GoogleFont("IBM Plex Mono"),
145
+ "ui-monospace",
146
+ "monospace",
147
+ ),
148
+ ):
149
+ super().__init__(
150
+ primary_hue=primary_hue,
151
+ secondary_hue=secondary_hue,
152
+ neutral_hue=neutral_hue,
153
+ spacing_size=spacing_size,
154
+ radius_size=radius_size,
155
+ text_size=text_size,
156
+ font=font,
157
+ font_mono=font_mono,
158
+ )
159
+ super().set(
160
+ body_background_fill="repeating-linear-gradient(45deg, *primary_200, *primary_200 10px, *primary_50 10px, *primary_50 20px)",
161
+ body_background_fill_dark="repeating-linear-gradient(45deg, *primary_800, *primary_800 10px, *primary_900 10px, *primary_900 20px)",
162
+ button_primary_background_fill="linear-gradient(90deg, *primary_300, *secondary_400)",
163
+ button_primary_background_fill_hover="linear-gradient(90deg, *primary_200, *secondary_300)",
164
+ button_primary_text_color="white",
165
+ button_primary_background_fill_dark="linear-gradient(90deg, *primary_600, *secondary_800)",
166
+ slider_color="*secondary_300",
167
+ slider_color_dark="*secondary_600",
168
+ block_title_text_weight="600",
169
+ block_border_width="3px",
170
+ block_shadow="*shadow_drop_lg",
171
+ button_shadow="*shadow_drop_lg",
172
+ button_large_padding="32px",
173
+ )
174
+
175
+
176
+ seafoam = Seafoam()
177
+ #
178
+
179
+ lock_symbol = '\U0001F512' # πŸ”’
180
+ unlock_symbol = '\U0001F513' # πŸ”“
181
+ switch_values_symbol = '\U000021C5' # β‡…
182
+
183
+ class FormRow(gr.Row, gr.components.FormComponent):
184
+ """Same as gr.Row but fits inside gradio forms"""
185
+
186
+ def get_block_name(self):
187
+ return "row"
188
+
189
+ class ToolButton(gr.Button, gr.components.FormComponent):
190
+ """Small button with single emoji as text, fits inside gradio forms"""
191
+
192
+ def __init__(self, **kwargs):
193
+ super().__init__(variant="tool", **kwargs)
194
+
195
+ def get_block_name(self):
196
+ return "button"
197
+
198
+ def toggle_aspect_ratio(btn):
199
+ if btn == unlock_symbol:
200
+ return gr.update(value = lock_symbol, variant="primary")
201
+ else:
202
+ return gr.update(value = unlock_symbol, variant="secondary")
203
+
204
+
205
+ #
206
+
207
+
208
+
209
+ block = gr.Blocks(css=custom_css, theme='gradio/default',title="Analytics Projects by Ray Espinoza")
210
+ #block = gr.Blocks(css=custom_css, title="Analytics Projects by Ray Espinoza")
211
+ #block = gr.Blocks(css=".gradio-container {background-color: black}", title="Analytics Projects by Ray Espinoza")
212
+ #block = gr.Blocks(css=".gradio-container {background: url('file=pic4.jpg')}", title="Analytics Projects by Ray Espinoza")
213
 
214
  with block:
215
  gr.HTML(title)
216
+ gr.HTML(subtitle)
217
 
218
  with gr.Row():
219
+ with gr.Column(scale=2):
220
+ gr.Image(image_path, elem_id="banner-image", show_label=False, show_download_button=False)
221
+ #banner-image
222
+ #gr.Markdown(value=image_path, elem_id="img")
223
+ #gr.Image(image_path, elem_id="chat-message", show_label=False)
224
  with gr.Column():
225
  gr.HTML(description)
226
 
 
228
  with gr.Box():
229
  audio = gr.Audio(
230
  label="Input Audio",
231
+ show_label=False,#Here#False
232
  source="microphone",
233
  type="filepath"
234
  )
 
239
  default="Sentiment Only"
240
  )
241
 
242
+ btn = gr.Button("Execute: Transcribe",variant="primary")
243
 
244
+ lang_str = gr.Textbox(label="Language:")
245
 
246
+ text = gr.Textbox(label="Transcription:")
247
 
248
+ sentiment_output = gr.Textbox(label="Sentiment Analysis Results:", output=True)
249
 
250
  btn.click(inference, inputs=[audio, sentiment_option], outputs=[lang_str, text, sentiment_output])
251
 
252
  gr.HTML('''
253
  <div class="footer">
254
+ <p>By <a href="https://github.com/rayespinozah" style="text-decoration: underline;" target="_blank"> Ray Espinoza Github</a>
255
  </p>
256
  </div>
257
+ ''')
258
 
259
+ block.launch(share=True)