API-Handler commited on
Commit
a4ff920
·
verified ·
1 Parent(s): 7414e80

Upload 18 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Vosk/vosk-model-small-en-us-0.15/am/final.mdl filter=lfs diff=lfs merge=lfs -text
37
+ Vosk/vosk-model-small-en-us-0.15/graph/Gr.fst filter=lfs diff=lfs merge=lfs -text
38
+ Vosk/vosk-model-small-en-us-0.15/graph/HCLr.fst filter=lfs diff=lfs merge=lfs -text
39
+ Vosk/vosk-model-small-en-us-0.15/ivector/final.ie filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as the base image
2
+ FROM python:3.9-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file into the container
8
+ COPY requirements.txt .
9
+
10
+ # Install the required packages
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy the rest of the application code into the container
14
+ COPY . .
15
+
16
+ # Expose the port that FastAPI will run on
17
+ EXPOSE 7860
18
+
19
+ # Command to run the FastAPI application
20
+ CMD ["uvicorn", "fastapi_app:app", "--host", "0.0.0.0", "--port", "7860"]
Vosk/vosk-model-small-en-us-0.15/README ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ US English model for mobile Vosk applications
2
+
3
+ Copyright 2020 Alpha Cephei Inc
4
+
5
+ Accuracy: 10.38 (tedlium test) 9.85 (librispeech test-clean)
6
+ Speed: 0.11xRT (desktop)
7
+ Latency: 0.15s (right context)
8
+
9
+
Vosk/vosk-model-small-en-us-0.15/am/final.mdl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75370a0137f9daf8f469dedd7daa4513ae7a621f03240c6e512e2b50b656a7b6
3
+ size 15962575
Vosk/vosk-model-small-en-us-0.15/conf/mfcc.conf ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ --sample-frequency=16000
2
+ --use-energy=false
3
+ --num-mel-bins=40
4
+ --num-ceps=40
5
+ --low-freq=20
6
+ --high-freq=7600
7
+ --allow-downsample=true
Vosk/vosk-model-small-en-us-0.15/conf/model.conf ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ --min-active=200
2
+ --max-active=3000
3
+ --beam=10.0
4
+ --lattice-beam=2.0
5
+ --acoustic-scale=1.0
6
+ --frame-subsampling-factor=3
7
+ --endpoint.silence-phones=1:2:3:4:5:6:7:8:9:10
8
+ --endpoint.rule2.min-trailing-silence=0.5
9
+ --endpoint.rule3.min-trailing-silence=0.75
10
+ --endpoint.rule4.min-trailing-silence=1.0
Vosk/vosk-model-small-en-us-0.15/graph/Gr.fst ADDED

Git LFS Details

  • SHA256: 023c8b7e30704a9e37765c635c252e608a02f361235bf94abdcf2a5225d85b20
  • Pointer size: 133 Bytes
  • Size of remote file: 24 MB
Vosk/vosk-model-small-en-us-0.15/graph/HCLr.fst ADDED

Git LFS Details

  • SHA256: 5caafba3081e1646545ac6bff0dd7a318e53dcbdc86f237909ce1d2ac1293d34
  • Pointer size: 133 Bytes
  • Size of remote file: 22.4 MB
Vosk/vosk-model-small-en-us-0.15/graph/disambig_tid.int ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 10015
2
+ 10016
3
+ 10017
4
+ 10018
5
+ 10019
6
+ 10020
7
+ 10021
8
+ 10022
9
+ 10023
10
+ 10024
11
+ 10025
12
+ 10026
13
+ 10027
14
+ 10028
15
+ 10029
16
+ 10030
17
+ 10031
Vosk/vosk-model-small-en-us-0.15/graph/phones/word_boundary.int ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1 nonword
2
+ 2 begin
3
+ 3 end
4
+ 4 internal
5
+ 5 singleton
6
+ 6 nonword
7
+ 7 begin
8
+ 8 end
9
+ 9 internal
10
+ 10 singleton
11
+ 11 begin
12
+ 12 end
13
+ 13 internal
14
+ 14 singleton
15
+ 15 begin
16
+ 16 end
17
+ 17 internal
18
+ 18 singleton
19
+ 19 begin
20
+ 20 end
21
+ 21 internal
22
+ 22 singleton
23
+ 23 begin
24
+ 24 end
25
+ 25 internal
26
+ 26 singleton
27
+ 27 begin
28
+ 28 end
29
+ 29 internal
30
+ 30 singleton
31
+ 31 begin
32
+ 32 end
33
+ 33 internal
34
+ 34 singleton
35
+ 35 begin
36
+ 36 end
37
+ 37 internal
38
+ 38 singleton
39
+ 39 begin
40
+ 40 end
41
+ 41 internal
42
+ 42 singleton
43
+ 43 begin
44
+ 44 end
45
+ 45 internal
46
+ 46 singleton
47
+ 47 begin
48
+ 48 end
49
+ 49 internal
50
+ 50 singleton
51
+ 51 begin
52
+ 52 end
53
+ 53 internal
54
+ 54 singleton
55
+ 55 begin
56
+ 56 end
57
+ 57 internal
58
+ 58 singleton
59
+ 59 begin
60
+ 60 end
61
+ 61 internal
62
+ 62 singleton
63
+ 63 begin
64
+ 64 end
65
+ 65 internal
66
+ 66 singleton
67
+ 67 begin
68
+ 68 end
69
+ 69 internal
70
+ 70 singleton
71
+ 71 begin
72
+ 72 end
73
+ 73 internal
74
+ 74 singleton
75
+ 75 begin
76
+ 76 end
77
+ 77 internal
78
+ 78 singleton
79
+ 79 begin
80
+ 80 end
81
+ 81 internal
82
+ 82 singleton
83
+ 83 begin
84
+ 84 end
85
+ 85 internal
86
+ 86 singleton
87
+ 87 begin
88
+ 88 end
89
+ 89 internal
90
+ 90 singleton
91
+ 91 begin
92
+ 92 end
93
+ 93 internal
94
+ 94 singleton
95
+ 95 begin
96
+ 96 end
97
+ 97 internal
98
+ 98 singleton
99
+ 99 begin
100
+ 100 end
101
+ 101 internal
102
+ 102 singleton
103
+ 103 begin
104
+ 104 end
105
+ 105 internal
106
+ 106 singleton
107
+ 107 begin
108
+ 108 end
109
+ 109 internal
110
+ 110 singleton
111
+ 111 begin
112
+ 112 end
113
+ 113 internal
114
+ 114 singleton
115
+ 115 begin
116
+ 116 end
117
+ 117 internal
118
+ 118 singleton
119
+ 119 begin
120
+ 120 end
121
+ 121 internal
122
+ 122 singleton
123
+ 123 begin
124
+ 124 end
125
+ 125 internal
126
+ 126 singleton
127
+ 127 begin
128
+ 128 end
129
+ 129 internal
130
+ 130 singleton
131
+ 131 begin
132
+ 132 end
133
+ 133 internal
134
+ 134 singleton
135
+ 135 begin
136
+ 136 end
137
+ 137 internal
138
+ 138 singleton
139
+ 139 begin
140
+ 140 end
141
+ 141 internal
142
+ 142 singleton
143
+ 143 begin
144
+ 144 end
145
+ 145 internal
146
+ 146 singleton
147
+ 147 begin
148
+ 148 end
149
+ 149 internal
150
+ 150 singleton
151
+ 151 begin
152
+ 152 end
153
+ 153 internal
154
+ 154 singleton
155
+ 155 begin
156
+ 156 end
157
+ 157 internal
158
+ 158 singleton
159
+ 159 begin
160
+ 160 end
161
+ 161 internal
162
+ 162 singleton
163
+ 163 begin
164
+ 164 end
165
+ 165 internal
166
+ 166 singleton
Vosk/vosk-model-small-en-us-0.15/ivector/final.dubm ADDED
Binary file (168 kB). View file
 
Vosk/vosk-model-small-en-us-0.15/ivector/final.ie ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f37faf90c375b9e4740b569398b5829ed9cc07d19be6d441f72c3b71d7efcc6
3
+ size 8288887
Vosk/vosk-model-small-en-us-0.15/ivector/final.mat ADDED
Binary file (45 kB). View file
 
Vosk/vosk-model-small-en-us-0.15/ivector/global_cmvn.stats ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [
2
+ 1.682383e+11 -1.1595e+10 -1.521733e+10 4.32034e+09 -2.257938e+10 -1.969666e+10 -2.559265e+10 -1.535687e+10 -1.276854e+10 -4.494483e+09 -1.209085e+10 -5.64008e+09 -1.134847e+10 -3.419512e+09 -1.079542e+10 -4.145463e+09 -6.637486e+09 -1.11318e+09 -3.479773e+09 -1.245932e+08 -1.386961e+09 6.560655e+07 -2.436518e+08 -4.032432e+07 4.620046e+08 -7.714964e+07 9.551484e+08 -4.119761e+08 8.208582e+08 -7.117156e+08 7.457703e+08 -4.3106e+08 1.202726e+09 2.904036e+08 1.231931e+09 3.629848e+08 6.366939e+08 -4.586172e+08 -5.267629e+08 -3.507819e+08 1.679838e+09
3
+ 1.741141e+13 8.92488e+11 8.743834e+11 8.848896e+11 1.190313e+12 1.160279e+12 1.300066e+12 1.005678e+12 9.39335e+11 8.089614e+11 7.927041e+11 6.882427e+11 6.444235e+11 5.151451e+11 4.825723e+11 3.210106e+11 2.720254e+11 1.772539e+11 1.248102e+11 6.691599e+10 3.599804e+10 1.207574e+10 1.679301e+09 4.594778e+08 5.821614e+09 1.451758e+10 2.55803e+10 3.43277e+10 4.245286e+10 4.784859e+10 4.988591e+10 4.925451e+10 5.074584e+10 4.9557e+10 4.407876e+10 3.421443e+10 3.138606e+10 2.539716e+10 1.948134e+10 1.381167e+10 0 ]
Vosk/vosk-model-small-en-us-0.15/ivector/online_cmvn.conf ADDED
@@ -0,0 +1 @@
 
 
1
+ # configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
Vosk/vosk-model-small-en-us-0.15/ivector/splice.conf ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ --left-context=3
2
+ --right-context=3
main.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile
2
+ from vosk_handler import VoskTranscriber
3
+ import io
4
+
5
+ app = FastAPI()
6
+ transcriber = VoskTranscriber()
7
+
8
+ @app.post("/transcribe/")
9
+ async def transcribe_audio(audio_file: UploadFile):
10
+ try:
11
+ # Read file content into memory
12
+ content = await audio_file.read()
13
+ # Create in-memory file-like object
14
+ audio_data = io.BytesIO(content)
15
+ # Process the audio
16
+ result = transcriber.transcribe_audio(audio_data)
17
+ return result
18
+ except Exception as e:
19
+ return {"success": False, "error": str(e)}
20
+
21
+ if __name__ == "__main__":
22
+ import uvicorn
23
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn==0.24.0
3
+ vosk==0.3.45
4
+ requests==2.31.0
5
+ python-multipart==0.0.6
vosk_handler.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from vosk import Model, KaldiRecognizer
2
+ import wave
3
+ import json
4
+
5
+ class VoskTranscriber:
6
+ def __init__(self, model_path=r"Vosk\vosk-model-small-en-us-0.15"):
7
+ self.model = Model(model_path)
8
+
9
+ def transcribe_audio(self, audio_data):
10
+ try:
11
+ with wave.open(audio_data, "rb") as wf:
12
+ recognizer = KaldiRecognizer(self.model, wf.getframerate())
13
+ recognizer.SetWords(True)
14
+
15
+ text = ""
16
+ while data := wf.readframes(4000):
17
+ if recognizer.AcceptWaveform(data):
18
+ text += json.loads(recognizer.Result())["text"] + " "
19
+
20
+ text += json.loads(recognizer.FinalResult())["text"]
21
+ return {"success": True, "text": text.strip()}
22
+ except Exception as e:
23
+ return {"success": False, "error": str(e)}
24
+
25
+
26
+ if __name__ == "__main__":
27
+ transcriber = VoskTranscriber()
28
+ print(transcriber.transcribe_audio("output.wav"))