Spaces:
Running
Running
API-Handler
commited on
Upload 18 files
Browse files- .gitattributes +4 -0
- Dockerfile +20 -0
- Vosk/vosk-model-small-en-us-0.15/README +9 -0
- Vosk/vosk-model-small-en-us-0.15/am/final.mdl +3 -0
- Vosk/vosk-model-small-en-us-0.15/conf/mfcc.conf +7 -0
- Vosk/vosk-model-small-en-us-0.15/conf/model.conf +10 -0
- Vosk/vosk-model-small-en-us-0.15/graph/Gr.fst +3 -0
- Vosk/vosk-model-small-en-us-0.15/graph/HCLr.fst +3 -0
- Vosk/vosk-model-small-en-us-0.15/graph/disambig_tid.int +17 -0
- Vosk/vosk-model-small-en-us-0.15/graph/phones/word_boundary.int +166 -0
- Vosk/vosk-model-small-en-us-0.15/ivector/final.dubm +0 -0
- Vosk/vosk-model-small-en-us-0.15/ivector/final.ie +3 -0
- Vosk/vosk-model-small-en-us-0.15/ivector/final.mat +0 -0
- Vosk/vosk-model-small-en-us-0.15/ivector/global_cmvn.stats +3 -0
- Vosk/vosk-model-small-en-us-0.15/ivector/online_cmvn.conf +1 -0
- Vosk/vosk-model-small-en-us-0.15/ivector/splice.conf +2 -0
- main.py +23 -0
- requirements.txt +5 -0
- vosk_handler.py +28 -0
.gitattributes
CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
Vosk/vosk-model-small-en-us-0.15/am/final.mdl filter=lfs diff=lfs merge=lfs -text
|
37 |
+
Vosk/vosk-model-small-en-us-0.15/graph/Gr.fst filter=lfs diff=lfs merge=lfs -text
|
38 |
+
Vosk/vosk-model-small-en-us-0.15/graph/HCLr.fst filter=lfs diff=lfs merge=lfs -text
|
39 |
+
Vosk/vosk-model-small-en-us-0.15/ivector/final.ie filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official Python runtime as the base image
|
2 |
+
FROM python:3.9-slim
|
3 |
+
|
4 |
+
# Set the working directory in the container
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
# Copy the requirements file into the container
|
8 |
+
COPY requirements.txt .
|
9 |
+
|
10 |
+
# Install the required packages
|
11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
12 |
+
|
13 |
+
# Copy the rest of the application code into the container
|
14 |
+
COPY . .
|
15 |
+
|
16 |
+
# Expose the port that FastAPI will run on
|
17 |
+
EXPOSE 7860
|
18 |
+
|
19 |
+
# Command to run the FastAPI application
|
20 |
+
CMD ["uvicorn", "fastapi_app:app", "--host", "0.0.0.0", "--port", "7860"]
|
Vosk/vosk-model-small-en-us-0.15/README
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
US English model for mobile Vosk applications
|
2 |
+
|
3 |
+
Copyright 2020 Alpha Cephei Inc
|
4 |
+
|
5 |
+
Accuracy: 10.38 (tedlium test) 9.85 (librispeech test-clean)
|
6 |
+
Speed: 0.11xRT (desktop)
|
7 |
+
Latency: 0.15s (right context)
|
8 |
+
|
9 |
+
|
Vosk/vosk-model-small-en-us-0.15/am/final.mdl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75370a0137f9daf8f469dedd7daa4513ae7a621f03240c6e512e2b50b656a7b6
|
3 |
+
size 15962575
|
Vosk/vosk-model-small-en-us-0.15/conf/mfcc.conf
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
--sample-frequency=16000
|
2 |
+
--use-energy=false
|
3 |
+
--num-mel-bins=40
|
4 |
+
--num-ceps=40
|
5 |
+
--low-freq=20
|
6 |
+
--high-freq=7600
|
7 |
+
--allow-downsample=true
|
Vosk/vosk-model-small-en-us-0.15/conf/model.conf
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
--min-active=200
|
2 |
+
--max-active=3000
|
3 |
+
--beam=10.0
|
4 |
+
--lattice-beam=2.0
|
5 |
+
--acoustic-scale=1.0
|
6 |
+
--frame-subsampling-factor=3
|
7 |
+
--endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10
|
8 |
+
--endpoint.rule2.min-trailing-silence=0.5
|
9 |
+
--endpoint.rule3.min-trailing-silence=0.75
|
10 |
+
--endpoint.rule4.min-trailing-silence=1.0
|
Vosk/vosk-model-small-en-us-0.15/graph/Gr.fst
ADDED
Git LFS Details
|
Vosk/vosk-model-small-en-us-0.15/graph/HCLr.fst
ADDED
Git LFS Details
|
Vosk/vosk-model-small-en-us-0.15/graph/disambig_tid.int
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
10015
|
2 |
+
10016
|
3 |
+
10017
|
4 |
+
10018
|
5 |
+
10019
|
6 |
+
10020
|
7 |
+
10021
|
8 |
+
10022
|
9 |
+
10023
|
10 |
+
10024
|
11 |
+
10025
|
12 |
+
10026
|
13 |
+
10027
|
14 |
+
10028
|
15 |
+
10029
|
16 |
+
10030
|
17 |
+
10031
|
Vosk/vosk-model-small-en-us-0.15/graph/phones/word_boundary.int
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
1 nonword
|
2 |
+
2 begin
|
3 |
+
3 end
|
4 |
+
4 internal
|
5 |
+
5 singleton
|
6 |
+
6 nonword
|
7 |
+
7 begin
|
8 |
+
8 end
|
9 |
+
9 internal
|
10 |
+
10 singleton
|
11 |
+
11 begin
|
12 |
+
12 end
|
13 |
+
13 internal
|
14 |
+
14 singleton
|
15 |
+
15 begin
|
16 |
+
16 end
|
17 |
+
17 internal
|
18 |
+
18 singleton
|
19 |
+
19 begin
|
20 |
+
20 end
|
21 |
+
21 internal
|
22 |
+
22 singleton
|
23 |
+
23 begin
|
24 |
+
24 end
|
25 |
+
25 internal
|
26 |
+
26 singleton
|
27 |
+
27 begin
|
28 |
+
28 end
|
29 |
+
29 internal
|
30 |
+
30 singleton
|
31 |
+
31 begin
|
32 |
+
32 end
|
33 |
+
33 internal
|
34 |
+
34 singleton
|
35 |
+
35 begin
|
36 |
+
36 end
|
37 |
+
37 internal
|
38 |
+
38 singleton
|
39 |
+
39 begin
|
40 |
+
40 end
|
41 |
+
41 internal
|
42 |
+
42 singleton
|
43 |
+
43 begin
|
44 |
+
44 end
|
45 |
+
45 internal
|
46 |
+
46 singleton
|
47 |
+
47 begin
|
48 |
+
48 end
|
49 |
+
49 internal
|
50 |
+
50 singleton
|
51 |
+
51 begin
|
52 |
+
52 end
|
53 |
+
53 internal
|
54 |
+
54 singleton
|
55 |
+
55 begin
|
56 |
+
56 end
|
57 |
+
57 internal
|
58 |
+
58 singleton
|
59 |
+
59 begin
|
60 |
+
60 end
|
61 |
+
61 internal
|
62 |
+
62 singleton
|
63 |
+
63 begin
|
64 |
+
64 end
|
65 |
+
65 internal
|
66 |
+
66 singleton
|
67 |
+
67 begin
|
68 |
+
68 end
|
69 |
+
69 internal
|
70 |
+
70 singleton
|
71 |
+
71 begin
|
72 |
+
72 end
|
73 |
+
73 internal
|
74 |
+
74 singleton
|
75 |
+
75 begin
|
76 |
+
76 end
|
77 |
+
77 internal
|
78 |
+
78 singleton
|
79 |
+
79 begin
|
80 |
+
80 end
|
81 |
+
81 internal
|
82 |
+
82 singleton
|
83 |
+
83 begin
|
84 |
+
84 end
|
85 |
+
85 internal
|
86 |
+
86 singleton
|
87 |
+
87 begin
|
88 |
+
88 end
|
89 |
+
89 internal
|
90 |
+
90 singleton
|
91 |
+
91 begin
|
92 |
+
92 end
|
93 |
+
93 internal
|
94 |
+
94 singleton
|
95 |
+
95 begin
|
96 |
+
96 end
|
97 |
+
97 internal
|
98 |
+
98 singleton
|
99 |
+
99 begin
|
100 |
+
100 end
|
101 |
+
101 internal
|
102 |
+
102 singleton
|
103 |
+
103 begin
|
104 |
+
104 end
|
105 |
+
105 internal
|
106 |
+
106 singleton
|
107 |
+
107 begin
|
108 |
+
108 end
|
109 |
+
109 internal
|
110 |
+
110 singleton
|
111 |
+
111 begin
|
112 |
+
112 end
|
113 |
+
113 internal
|
114 |
+
114 singleton
|
115 |
+
115 begin
|
116 |
+
116 end
|
117 |
+
117 internal
|
118 |
+
118 singleton
|
119 |
+
119 begin
|
120 |
+
120 end
|
121 |
+
121 internal
|
122 |
+
122 singleton
|
123 |
+
123 begin
|
124 |
+
124 end
|
125 |
+
125 internal
|
126 |
+
126 singleton
|
127 |
+
127 begin
|
128 |
+
128 end
|
129 |
+
129 internal
|
130 |
+
130 singleton
|
131 |
+
131 begin
|
132 |
+
132 end
|
133 |
+
133 internal
|
134 |
+
134 singleton
|
135 |
+
135 begin
|
136 |
+
136 end
|
137 |
+
137 internal
|
138 |
+
138 singleton
|
139 |
+
139 begin
|
140 |
+
140 end
|
141 |
+
141 internal
|
142 |
+
142 singleton
|
143 |
+
143 begin
|
144 |
+
144 end
|
145 |
+
145 internal
|
146 |
+
146 singleton
|
147 |
+
147 begin
|
148 |
+
148 end
|
149 |
+
149 internal
|
150 |
+
150 singleton
|
151 |
+
151 begin
|
152 |
+
152 end
|
153 |
+
153 internal
|
154 |
+
154 singleton
|
155 |
+
155 begin
|
156 |
+
156 end
|
157 |
+
157 internal
|
158 |
+
158 singleton
|
159 |
+
159 begin
|
160 |
+
160 end
|
161 |
+
161 internal
|
162 |
+
162 singleton
|
163 |
+
163 begin
|
164 |
+
164 end
|
165 |
+
165 internal
|
166 |
+
166 singleton
|
Vosk/vosk-model-small-en-us-0.15/ivector/final.dubm
ADDED
Binary file (168 kB). View file
|
|
Vosk/vosk-model-small-en-us-0.15/ivector/final.ie
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f37faf90c375b9e4740b569398b5829ed9cc07d19be6d441f72c3b71d7efcc6
|
3 |
+
size 8288887
|
Vosk/vosk-model-small-en-us-0.15/ivector/final.mat
ADDED
Binary file (45 kB). View file
|
|
Vosk/vosk-model-small-en-us-0.15/ivector/global_cmvn.stats
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
1.682383e+11 -1.1595e+10 -1.521733e+10 4.32034e+09 -2.257938e+10 -1.969666e+10 -2.559265e+10 -1.535687e+10 -1.276854e+10 -4.494483e+09 -1.209085e+10 -5.64008e+09 -1.134847e+10 -3.419512e+09 -1.079542e+10 -4.145463e+09 -6.637486e+09 -1.11318e+09 -3.479773e+09 -1.245932e+08 -1.386961e+09 6.560655e+07 -2.436518e+08 -4.032432e+07 4.620046e+08 -7.714964e+07 9.551484e+08 -4.119761e+08 8.208582e+08 -7.117156e+08 7.457703e+08 -4.3106e+08 1.202726e+09 2.904036e+08 1.231931e+09 3.629848e+08 6.366939e+08 -4.586172e+08 -5.267629e+08 -3.507819e+08 1.679838e+09
|
3 |
+
1.741141e+13 8.92488e+11 8.743834e+11 8.848896e+11 1.190313e+12 1.160279e+12 1.300066e+12 1.005678e+12 9.39335e+11 8.089614e+11 7.927041e+11 6.882427e+11 6.444235e+11 5.151451e+11 4.825723e+11 3.210106e+11 2.720254e+11 1.772539e+11 1.248102e+11 6.691599e+10 3.599804e+10 1.207574e+10 1.679301e+09 4.594778e+08 5.821614e+09 1.451758e+10 2.55803e+10 3.43277e+10 4.245286e+10 4.784859e+10 4.988591e+10 4.925451e+10 5.074584e+10 4.9557e+10 4.407876e+10 3.421443e+10 3.138606e+10 2.539716e+10 1.948134e+10 1.381167e+10 0 ]
|
Vosk/vosk-model-small-en-us-0.15/ivector/online_cmvn.conf
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
|
Vosk/vosk-model-small-en-us-0.15/ivector/splice.conf
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
--left-context=3
|
2 |
+
--right-context=3
|
main.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, UploadFile
|
2 |
+
from vosk_handler import VoskTranscriber
|
3 |
+
import io
|
4 |
+
|
5 |
+
app = FastAPI()
|
6 |
+
transcriber = VoskTranscriber()
|
7 |
+
|
8 |
+
@app.post("/transcribe/")
|
9 |
+
async def transcribe_audio(audio_file: UploadFile):
|
10 |
+
try:
|
11 |
+
# Read file content into memory
|
12 |
+
content = await audio_file.read()
|
13 |
+
# Create in-memory file-like object
|
14 |
+
audio_data = io.BytesIO(content)
|
15 |
+
# Process the audio
|
16 |
+
result = transcriber.transcribe_audio(audio_data)
|
17 |
+
return result
|
18 |
+
except Exception as e:
|
19 |
+
return {"success": False, "error": str(e)}
|
20 |
+
|
21 |
+
if __name__ == "__main__":
|
22 |
+
import uvicorn
|
23 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi==0.104.1
|
2 |
+
uvicorn==0.24.0
|
3 |
+
vosk==0.3.45
|
4 |
+
requests==2.31.0
|
5 |
+
python-multipart==0.0.6
|
vosk_handler.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from vosk import Model, KaldiRecognizer
|
2 |
+
import wave
|
3 |
+
import json
|
4 |
+
|
5 |
+
class VoskTranscriber:
|
6 |
+
def __init__(self, model_path=r"Vosk\vosk-model-small-en-us-0.15"):
|
7 |
+
self.model = Model(model_path)
|
8 |
+
|
9 |
+
def transcribe_audio(self, audio_data):
|
10 |
+
try:
|
11 |
+
with wave.open(audio_data, "rb") as wf:
|
12 |
+
recognizer = KaldiRecognizer(self.model, wf.getframerate())
|
13 |
+
recognizer.SetWords(True)
|
14 |
+
|
15 |
+
text = ""
|
16 |
+
while data := wf.readframes(4000):
|
17 |
+
if recognizer.AcceptWaveform(data):
|
18 |
+
text += json.loads(recognizer.Result())["text"] + " "
|
19 |
+
|
20 |
+
text += json.loads(recognizer.FinalResult())["text"]
|
21 |
+
return {"success": True, "text": text.strip()}
|
22 |
+
except Exception as e:
|
23 |
+
return {"success": False, "error": str(e)}
|
24 |
+
|
25 |
+
|
26 |
+
if __name__ == "__main__":
|
27 |
+
transcriber = VoskTranscriber()
|
28 |
+
print(transcriber.transcribe_audio("output.wav"))
|