Spaces:
Runtime error
Runtime error
PacmanIncarnate
commited on
Commit
·
8663eb2
1
Parent(s):
51dec24
Create app.py
Browse files
app.py
CHANGED
@@ -1,73 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
from transformers import Wav2Vec2Processor
|
3 |
-
from transformers import AutoModelForCTC
|
4 |
-
from conversationalnlp.models.wav2vec2 import Wav2Vec2Predict
|
5 |
-
from conversationalnlp.models.wav2vec2 import ModelLoader
|
6 |
-
from conversationalnlp.utils import *
|
7 |
-
import soundfile as sf
|
8 |
-
import os
|
9 |
-
|
10 |
-
"""
|
11 |
-
run gradio with
|
12 |
-
>>python app.py
|
13 |
-
"""
|
14 |
-
|
15 |
-
audioheaderpath = os.path.join(
|
16 |
-
os.getcwd(), "temp")
|
17 |
-
|
18 |
-
|
19 |
-
pretrained_model = "codenamewei/speech-to-text"
|
20 |
-
|
21 |
-
processor = Wav2Vec2Processor.from_pretrained(
|
22 |
-
pretrained_model)
|
23 |
-
|
24 |
-
model = AutoModelForCTC.from_pretrained(
|
25 |
-
pretrained_model)
|
26 |
-
|
27 |
-
modelloader = ModelLoader(model, processor)
|
28 |
-
|
29 |
-
predictor = Wav2Vec2Predict(modelloader)
|
30 |
-
|
31 |
-
audiofileexamples = ["example1.flac", "example2.flac"]
|
32 |
-
|
33 |
-
fileextension = ".wav"
|
34 |
-
|
35 |
-
|
36 |
-
def greet(*args):
|
37 |
-
"""
|
38 |
-
List[tuple, tuple]
|
39 |
-
mic: param[0] (int, np.array)
|
40 |
-
audiofile: param[1] (int, np.array)
|
41 |
-
"""
|
42 |
-
|
43 |
-
dictinput = dict(mic=args[0], file=args[1])
|
44 |
-
audiofiles = []
|
45 |
-
|
46 |
-
for key, audioarray in dictinput.items():
|
47 |
-
|
48 |
-
if audioarray is not None:
|
49 |
-
# WORKAROUND: Save to file and reread to get the array shape needed for prediction
|
50 |
-
|
51 |
-
audioabspath = audioheaderpath + "_" + key + fileextension
|
52 |
-
print(f"Audio at path {audioabspath}")
|
53 |
-
sf.write(audioabspath,
|
54 |
-
audioarray[1], audioarray[0])
|
55 |
-
audiofiles.append(audioabspath)
|
56 |
-
|
57 |
-
predictiontexts = predictor.predictfiles(audiofiles)
|
58 |
-
|
59 |
-
mictext = predictiontexts["predicted_text"][0] + "\n" + \
|
60 |
-
predictiontexts["corrected_text"][0] if dictinput['mic'] is not None else ""
|
61 |
-
filetext = predictiontexts["predicted_text"][-1] + "\n" + \
|
62 |
-
predictiontexts["corrected_text"][-1] if dictinput['file'] is not None else ""
|
63 |
-
|
64 |
-
return [mictext, filetext]
|
65 |
-
|
66 |
-
|
67 |
-
demo = gr.Interface(fn=greet,
|
68 |
-
inputs=["mic", "audio"],
|
69 |
-
outputs=["text", "text"],
|
70 |
-
title="Speech-to-Text",
|
71 |
-
examples=[audiofileexamples])
|
72 |
-
|
73 |
-
demo.launch() # share=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|