rayespinozah commited on
Commit
971b6fe
ยท
1 Parent(s): 0ae6ec6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +151 -0
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ from transformers import pipeline
4
+
5
+ model = whisper.load_model("base")
6
+ sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")
7
+
8
+ def analyze_sentiment(text):
9
+ results = sentiment_analysis(text)
10
+ sentiment_results = {result['label']: result['score'] for result in results}
11
+ return sentiment_results
12
+
13
+ def get_sentiment_emoji(sentiment):
14
+ # Define the emojis corresponding to each sentiment
15
+ emoji_mapping = {
16
+ "disappointment": "๐Ÿ˜ž",
17
+ "sadness": "๐Ÿ˜ข",
18
+ "annoyance": "๐Ÿ˜ ",
19
+ "neutral": "๐Ÿ˜",
20
+ "disapproval": "๐Ÿ‘Ž",
21
+ "realization": "๐Ÿ˜ฎ",
22
+ "nervousness": "๐Ÿ˜ฌ",
23
+ "approval": "๐Ÿ‘",
24
+ "joy": "๐Ÿ˜„",
25
+ "anger": "๐Ÿ˜ก",
26
+ "embarrassment": "๐Ÿ˜ณ",
27
+ "caring": "๐Ÿค—",
28
+ "remorse": "๐Ÿ˜”",
29
+ "disgust": "๐Ÿคข",
30
+ "grief": "๐Ÿ˜ฅ",
31
+ "confusion": "๐Ÿ˜•",
32
+ "relief": "๐Ÿ˜Œ",
33
+ "desire": "๐Ÿ˜",
34
+ "admiration": "๐Ÿ˜Œ",
35
+ "optimism": "๐Ÿ˜Š",
36
+ "fear": "๐Ÿ˜จ",
37
+ "love": "โค๏ธ",
38
+ "excitement": "๐ŸŽ‰",
39
+ "curiosity": "๐Ÿค”",
40
+ "amusement": "๐Ÿ˜„",
41
+ "surprise": "๐Ÿ˜ฒ",
42
+ "gratitude": "๐Ÿ™",
43
+ "pride": "๐Ÿฆ"
44
+ }
45
+ return emoji_mapping.get(sentiment, "")
46
+
47
+ def display_sentiment_results(sentiment_results, option):
48
+ sentiment_text = ""
49
+ for sentiment, score in sentiment_results.items():
50
+ emoji = get_sentiment_emoji(sentiment)
51
+ if option == "Sentiment Only":
52
+ sentiment_text += f"{sentiment} {emoji}\n"
53
+ elif option == "Sentiment + Score":
54
+ sentiment_text += f"{sentiment} {emoji}: {score}\n"
55
+ return sentiment_text
56
+
57
+ def inference(audio, sentiment_option):
58
+ audio = whisper.load_audio(audio)
59
+ audio = whisper.pad_or_trim(audio)
60
+
61
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
62
+
63
+ _, probs = model.detect_language(mel)
64
+ lang = max(probs, key=probs.get)
65
+
66
+ options = whisper.DecodingOptions(fp16=False)
67
+ result = whisper.decode(model, mel, options)
68
+
69
+ sentiment_results = analyze_sentiment(result.text)
70
+ sentiment_output = display_sentiment_results(sentiment_results, sentiment_option)
71
+
72
+ return lang.upper(), result.text, sentiment_output
73
+
74
+ title = """<h1 align="center">๐ŸŽค Multilingual ASR ๐Ÿ’ฌ</h1>"""
75
+ image_path = "thmbnail.jpg"
76
+ description = """
77
+ ๐Ÿ’ป This demo showcases a general-purpose speech recognition model called Whisper. It is trained on a large dataset of diverse audio and supports multilingual speech recognition, speech translation, and language identification tasks.<br><br>
78
+ <br>
79
+ โš™๏ธ Components of the tool:<br>
80
+ <br>
81
+ &nbsp;&nbsp;&nbsp;&nbsp; - Real-time multilingual speech recognition<br>
82
+ &nbsp;&nbsp;&nbsp;&nbsp; - Language identification<br>
83
+ &nbsp;&nbsp;&nbsp;&nbsp; - Sentiment analysis of the transcriptions<br>
84
+ <br>
85
+ ๐ŸŽฏ The sentiment analysis results are provided as a dictionary with different emotions and their corresponding scores.<br>
86
+ <br>
87
+ ๐Ÿ˜ƒ The sentiment analysis results are displayed with emojis representing the corresponding sentiment.<br>
88
+ <br>
89
+ โœ… The higher the score for a specific emotion, the stronger the presence of that emotion in the transcribed text.<br>
90
+ <br>
91
+ โ“ Use the microphone for real-time speech recognition.<br>
92
+ <br>
93
+ โšก๏ธ The model will transcribe the audio and perform sentiment analysis on the transcribed text.<br>
94
+ """
95
+
96
+ custom_css = """
97
+ #banner-image {
98
+ display: block;
99
+ margin-left: auto;
100
+ margin-right: auto;
101
+ }
102
+ #chat-message {
103
+ font-size: 14px;
104
+ min-height: 300px;
105
+ }
106
+ """
107
+
108
+ block = gr.Blocks(css=custom_css)
109
+
110
+ with block:
111
+ gr.HTML(title)
112
+
113
+ with gr.Row():
114
+ with gr.Column():
115
+ gr.Image(image_path, elem_id="banner-image", show_label=False)
116
+ with gr.Column():
117
+ gr.HTML(description)
118
+
119
+ with gr.Group():
120
+ with gr.Box():
121
+ audio = gr.Audio(
122
+ label="Input Audio",
123
+ show_label=False,
124
+ source="microphone",
125
+ type="filepath"
126
+ )
127
+
128
+ sentiment_option = gr.Radio(
129
+ choices=["Sentiment Only", "Sentiment + Score"],
130
+ label="Select an option",
131
+ default="Sentiment Only"
132
+ )
133
+
134
+ btn = gr.Button("Transcribe")
135
+
136
+ lang_str = gr.Textbox(label="Language")
137
+
138
+ text = gr.Textbox(label="Transcription")
139
+
140
+ sentiment_output = gr.Textbox(label="Sentiment Analysis Results", output=True)
141
+
142
+ btn.click(inference, inputs=[audio, sentiment_option], outputs=[lang_str, text, sentiment_output])
143
+
144
+ gr.HTML('''
145
+ <div class="footer">
146
+ <p>Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
147
+ </p>
148
+ </div>
149
+ ''')
150
+
151
+ block.launch()