Update app.py
Browse files
app.py
CHANGED
@@ -6,12 +6,21 @@ import whisper
|
|
6 |
model = whisper.load_model("base")
|
7 |
|
8 |
|
9 |
-
|
10 |
|
11 |
def inference(audio):
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
|
17 |
title="Whisper"
|
@@ -86,6 +95,60 @@ block = gr.Blocks(css=css)
|
|
86 |
|
87 |
|
88 |
with block:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
with gr.Group():
|
90 |
with gr.Box():
|
91 |
with gr.Row().style(mobile_collapse=False, equal_height=True):
|
|
|
6 |
model = whisper.load_model("base")
|
7 |
|
8 |
|
|
|
9 |
|
10 |
def inference(audio):
|
11 |
+
audio = whisper.load_audio(audio)
|
12 |
+
audio = whisper.pad_or_trim(audio)
|
13 |
+
|
14 |
+
mel = whisper.log_mel_spectrogram(audio).to(model.device)
|
15 |
+
|
16 |
+
_, probs = model.detect_language(mel)
|
17 |
+
print(f"Detected language: {max(probs, key=probs.get)}")
|
18 |
+
|
19 |
+
options = whisper.DecodingOptions()
|
20 |
+
result = whisper.decode(model, mel, options)
|
21 |
+
|
22 |
+
print(result.text)
|
23 |
+
return result.text
|
24 |
|
25 |
|
26 |
title="Whisper"
|
|
|
95 |
|
96 |
|
97 |
with block:
|
98 |
+
gr.HTML(
|
99 |
+
"""
|
100 |
+
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
|
101 |
+
<div
|
102 |
+
style="
|
103 |
+
display: inline-flex;
|
104 |
+
align-items: center;
|
105 |
+
gap: 0.8rem;
|
106 |
+
font-size: 1.75rem;
|
107 |
+
"
|
108 |
+
>
|
109 |
+
<svg
|
110 |
+
width="0.65em"
|
111 |
+
height="0.65em"
|
112 |
+
viewBox="0 0 115 115"
|
113 |
+
fill="none"
|
114 |
+
xmlns="http://www.w3.org/2000/svg"
|
115 |
+
>
|
116 |
+
<rect width="23" height="23" fill="white"></rect>
|
117 |
+
<rect y="69" width="23" height="23" fill="white"></rect>
|
118 |
+
<rect x="23" width="23" height="23" fill="#AEAEAE"></rect>
|
119 |
+
<rect x="23" y="69" width="23" height="23" fill="#AEAEAE"></rect>
|
120 |
+
<rect x="46" width="23" height="23" fill="white"></rect>
|
121 |
+
<rect x="46" y="69" width="23" height="23" fill="white"></rect>
|
122 |
+
<rect x="69" width="23" height="23" fill="black"></rect>
|
123 |
+
<rect x="69" y="69" width="23" height="23" fill="black"></rect>
|
124 |
+
<rect x="92" width="23" height="23" fill="#D9D9D9"></rect>
|
125 |
+
<rect x="92" y="69" width="23" height="23" fill="#AEAEAE"></rect>
|
126 |
+
<rect x="115" y="46" width="23" height="23" fill="white"></rect>
|
127 |
+
<rect x="115" y="115" width="23" height="23" fill="white"></rect>
|
128 |
+
<rect x="115" y="69" width="23" height="23" fill="#D9D9D9"></rect>
|
129 |
+
<rect x="92" y="46" width="23" height="23" fill="#AEAEAE"></rect>
|
130 |
+
<rect x="92" y="115" width="23" height="23" fill="#AEAEAE"></rect>
|
131 |
+
<rect x="92" y="69" width="23" height="23" fill="white"></rect>
|
132 |
+
<rect x="69" y="46" width="23" height="23" fill="white"></rect>
|
133 |
+
<rect x="69" y="115" width="23" height="23" fill="white"></rect>
|
134 |
+
<rect x="69" y="69" width="23" height="23" fill="#D9D9D9"></rect>
|
135 |
+
<rect x="46" y="46" width="23" height="23" fill="black"></rect>
|
136 |
+
<rect x="46" y="115" width="23" height="23" fill="black"></rect>
|
137 |
+
<rect x="46" y="69" width="23" height="23" fill="black"></rect>
|
138 |
+
<rect x="23" y="46" width="23" height="23" fill="#D9D9D9"></rect>
|
139 |
+
<rect x="23" y="115" width="23" height="23" fill="#AEAEAE"></rect>
|
140 |
+
<rect x="23" y="69" width="23" height="23" fill="black"></rect>
|
141 |
+
</svg>
|
142 |
+
<h1 style="font-weight: 900; margin-bottom: 7px;">
|
143 |
+
Whisper
|
144 |
+
</h1>
|
145 |
+
</div>
|
146 |
+
<p style="margin-bottom: 10px; font-size: 94%">
|
147 |
+
Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification.
|
148 |
+
</p>
|
149 |
+
</div>
|
150 |
+
"""
|
151 |
+
)
|
152 |
with gr.Group():
|
153 |
with gr.Box():
|
154 |
with gr.Row().style(mobile_collapse=False, equal_height=True):
|