Files changed (1) hide show
  1. app.py +0 -73
app.py CHANGED
@@ -1,73 +0,0 @@
1
- import gradio as gr
2
- from transformers import Wav2Vec2Processor
3
- from transformers import AutoModelForCTC
4
- from conversationalnlp.models.wav2vec2 import Wav2Vec2Predict
5
- from conversationalnlp.models.wav2vec2 import ModelLoader
6
- from conversationalnlp.utils import *
7
- import soundfile as sf
8
- import os
9
-
10
- """
11
- run gradio with
12
- >>python app.py
13
- """
14
-
15
- audioheaderpath = os.path.join(
16
- os.getcwd(), "temp")
17
-
18
-
19
- pretrained_model = "codenamewei/speech-to-text"
20
-
21
- processor = Wav2Vec2Processor.from_pretrained(
22
- pretrained_model)
23
-
24
- model = AutoModelForCTC.from_pretrained(
25
- pretrained_model)
26
-
27
- modelloader = ModelLoader(model, processor)
28
-
29
- predictor = Wav2Vec2Predict(modelloader)
30
-
31
- audiofileexamples = ["example1.flac", "example2.flac"]
32
-
33
- fileextension = ".wav"
34
-
35
-
36
- def greet(*args):
37
- """
38
- List[tuple, tuple]
39
- mic: param[0] (int, np.array)
40
- audiofile: param[1] (int, np.array)
41
- """
42
-
43
- dictinput = dict(mic=args[0], file=args[1])
44
- audiofiles = []
45
-
46
- for key, audioarray in dictinput.items():
47
-
48
- if audioarray is not None:
49
- # WORKAROUND: Save to file and reread to get the array shape needed for prediction
50
-
51
- audioabspath = audioheaderpath + "_" + key + fileextension
52
- print(f"Audio at path {audioabspath}")
53
- sf.write(audioabspath,
54
- audioarray[1], audioarray[0])
55
- audiofiles.append(audioabspath)
56
-
57
- predictiontexts = predictor.predictfiles(audiofiles)
58
-
59
- mictext = predictiontexts["predicted_text"][0] + "\n" + \
60
- predictiontexts["corrected_text"][0] if dictinput['mic'] is not None else ""
61
- filetext = predictiontexts["predicted_text"][-1] + "\n" + \
62
- predictiontexts["corrected_text"][-1] if dictinput['file'] is not None else ""
63
-
64
- return [mictext, filetext]
65
-
66
-
67
- demo = gr.Interface(fn=greet,
68
- inputs=["mic", "audio"],
69
- outputs=["text", "text"],
70
- title="Speech-to-Text",
71
- examples=[audiofileexamples])
72
-
73
- demo.launch() # share=True)