update
Browse files- toolbox/k2_sherpa/examples.py +59 -3
- toolbox/k2_sherpa/nn_models.py +106 -30
toolbox/k2_sherpa/examples.py
CHANGED
@@ -43,6 +43,30 @@ examples = [
|
|
43 |
"Yes",
|
44 |
"./data/test_wavs/cantonese/1.wav",
|
45 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
[
|
47 |
"German",
|
48 |
"csukuangfj/wav2vec2.0-torchaudio",
|
@@ -84,11 +108,43 @@ examples = [
|
|
84 |
"./data/test_wavs/french/common_voice_fr_27024649.wav",
|
85 |
],
|
86 |
[
|
87 |
-
"
|
88 |
-
"
|
89 |
"greedy_search",
|
90 |
4,
|
91 |
"No",
|
92 |
-
"./data/test_wavs/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
],
|
94 |
]
|
|
|
43 |
"Yes",
|
44 |
"./data/test_wavs/cantonese/1.wav",
|
45 |
],
|
46 |
+
[
|
47 |
+
"Tibetan",
|
48 |
+
"syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
|
49 |
+
"greedy_search",
|
50 |
+
4,
|
51 |
+
"No",
|
52 |
+
"./data/test_wavs/tibetan/a_0_cacm-A70_31117.wav",
|
53 |
+
],
|
54 |
+
[
|
55 |
+
"Tibetan",
|
56 |
+
"syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
|
57 |
+
"greedy_search",
|
58 |
+
4,
|
59 |
+
"No",
|
60 |
+
"./data/test_wavs/tibetan/a_0_cacm-A70_31116.wav",
|
61 |
+
],
|
62 |
+
[
|
63 |
+
"Tibetan",
|
64 |
+
"syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
|
65 |
+
"greedy_search",
|
66 |
+
4,
|
67 |
+
"No",
|
68 |
+
"./data/test_wavs/tibetan/a_0_cacm-A70_31118.wav",
|
69 |
+
],
|
70 |
[
|
71 |
"German",
|
72 |
"csukuangfj/wav2vec2.0-torchaudio",
|
|
|
108 |
"./data/test_wavs/french/common_voice_fr_27024649.wav",
|
109 |
],
|
110 |
[
|
111 |
+
"Russian",
|
112 |
+
"alphacep/vosk-model-ru",
|
113 |
"greedy_search",
|
114 |
4,
|
115 |
"No",
|
116 |
+
"./data/test_wavs/russian/russian-i-love-you.wav",
|
117 |
+
],
|
118 |
+
[
|
119 |
+
"Russian",
|
120 |
+
"alphacep/vosk-model-ru",
|
121 |
+
"greedy_search",
|
122 |
+
4,
|
123 |
+
"No",
|
124 |
+
"./data/test_wavs/russian/test.wav",
|
125 |
+
],
|
126 |
+
[
|
127 |
+
"Arabic",
|
128 |
+
"AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
|
129 |
+
"greedy_search",
|
130 |
+
4,
|
131 |
+
"No",
|
132 |
+
"./data/test_wavs/arabic/a.wav",
|
133 |
+
],
|
134 |
+
[
|
135 |
+
"Arabic",
|
136 |
+
"AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
|
137 |
+
"greedy_search",
|
138 |
+
4,
|
139 |
+
"No",
|
140 |
+
"./data/test_wavs/arabic/b.wav",
|
141 |
+
],
|
142 |
+
[
|
143 |
+
"Arabic",
|
144 |
+
"AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
|
145 |
+
"greedy_search",
|
146 |
+
4,
|
147 |
+
"No",
|
148 |
+
"./data/test_wavs/arabic/c.wav",
|
149 |
],
|
150 |
]
|
toolbox/k2_sherpa/nn_models.py
CHANGED
@@ -206,6 +206,7 @@ model_map = {
|
|
206 |
"nn_model_file_sub_folder": "exp",
|
207 |
"tokens_file": "./giga-tokens.txt",
|
208 |
"tokens_file_sub_folder": ".",
|
|
|
209 |
"loader": "load_sherpa_offline_recognizer",
|
210 |
},
|
211 |
{
|
@@ -214,6 +215,7 @@ model_map = {
|
|
214 |
"nn_model_file_sub_folder": "exp",
|
215 |
"tokens_file": "tokens.txt",
|
216 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
|
|
217 |
"loader": "load_sherpa_offline_recognizer",
|
218 |
},
|
219 |
{
|
@@ -222,6 +224,7 @@ model_map = {
|
|
222 |
"nn_model_file_sub_folder": "exp",
|
223 |
"tokens_file": "tokens.txt",
|
224 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
|
|
225 |
"loader": "load_sherpa_offline_recognizer",
|
226 |
},
|
227 |
{
|
@@ -230,6 +233,7 @@ model_map = {
|
|
230 |
"nn_model_file_sub_folder": "exp",
|
231 |
"tokens_file": "tokens.txt",
|
232 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
|
|
233 |
"loader": "load_sherpa_offline_recognizer",
|
234 |
},
|
235 |
{
|
@@ -238,6 +242,7 @@ model_map = {
|
|
238 |
"nn_model_file_sub_folder": "exp",
|
239 |
"tokens_file": "tokens.txt",
|
240 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
|
|
241 |
"loader": "load_sherpa_offline_recognizer",
|
242 |
},
|
243 |
{
|
@@ -246,6 +251,7 @@ model_map = {
|
|
246 |
"nn_model_file_sub_folder": "exp",
|
247 |
"tokens_file": "tokens.txt",
|
248 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
|
|
249 |
"loader": "load_sherpa_offline_recognizer",
|
250 |
},
|
251 |
{
|
@@ -254,6 +260,7 @@ model_map = {
|
|
254 |
"nn_model_file_sub_folder": "exp",
|
255 |
"tokens_file": "tokens.txt",
|
256 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
|
|
257 |
"loader": "load_sherpa_offline_recognizer",
|
258 |
},
|
259 |
{
|
@@ -270,6 +277,7 @@ model_map = {
|
|
270 |
"nn_model_file_sub_folder": "exp",
|
271 |
"tokens_file": "tokens.txt",
|
272 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
|
|
273 |
"loader": "load_sherpa_offline_recognizer",
|
274 |
},
|
275 |
{
|
@@ -278,6 +286,7 @@ model_map = {
|
|
278 |
"nn_model_file_sub_folder": "exp",
|
279 |
"tokens_file": "tokens.txt",
|
280 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
|
|
281 |
"loader": "load_sherpa_offline_recognizer",
|
282 |
},
|
283 |
{
|
@@ -286,6 +295,7 @@ model_map = {
|
|
286 |
"nn_model_file_sub_folder": "exp",
|
287 |
"tokens_file": "tokens.txt",
|
288 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
|
|
289 |
"loader": "load_sherpa_offline_recognizer",
|
290 |
},
|
291 |
{
|
@@ -294,6 +304,7 @@ model_map = {
|
|
294 |
"nn_model_file_sub_folder": "exp",
|
295 |
"tokens_file": "tokens.txt",
|
296 |
"tokens_file_sub_folder": "data/lang_bpe",
|
|
|
297 |
"loader": "load_sherpa_offline_recognizer",
|
298 |
},
|
299 |
{
|
@@ -302,6 +313,7 @@ model_map = {
|
|
302 |
"nn_model_file_sub_folder": "exp",
|
303 |
"tokens_file": "tokens.txt",
|
304 |
"tokens_file_sub_folder": "data/lang_bpe",
|
|
|
305 |
"loader": "load_sherpa_offline_recognizer",
|
306 |
},
|
307 |
{
|
@@ -310,6 +322,7 @@ model_map = {
|
|
310 |
"nn_model_file_sub_folder": "exp",
|
311 |
"tokens_file": "tokens.txt",
|
312 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
|
|
313 |
"loader": "load_sherpa_offline_recognizer",
|
314 |
},
|
315 |
{
|
@@ -318,6 +331,7 @@ model_map = {
|
|
318 |
"nn_model_file_sub_folder": ".",
|
319 |
"tokens_file": "units.txt",
|
320 |
"tokens_file_sub_folder": ".",
|
|
|
321 |
"loader": "load_sherpa_offline_recognizer",
|
322 |
},
|
323 |
],
|
@@ -348,6 +362,7 @@ model_map = {
|
|
348 |
"nn_model_file_sub_folder": "exp",
|
349 |
"tokens_file": "tokens.txt",
|
350 |
"tokens_file_sub_folder": "data/lang_char_bpe",
|
|
|
351 |
"loader": "load_sherpa_offline_recognizer",
|
352 |
},
|
353 |
{
|
@@ -356,6 +371,7 @@ model_map = {
|
|
356 |
"nn_model_file_sub_folder": "exp",
|
357 |
"tokens_file": "tokens.txt",
|
358 |
"tokens_file_sub_folder": "data/lang_char",
|
|
|
359 |
"loader": "load_sherpa_offline_recognizer",
|
360 |
},
|
361 |
],
|
@@ -393,34 +409,34 @@ model_map = {
|
|
393 |
"loader": "load_sherpa_onnx_offline_recognizer_from_transducer",
|
394 |
},
|
395 |
],
|
396 |
-
"Japanese": [
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
],
|
424 |
"German": [
|
425 |
{
|
426 |
"repo_id": "csukuangfj/wav2vec2.0-torchaudio",
|
@@ -428,8 +444,7 @@ model_map = {
|
|
428 |
"nn_model_file_sub_folder": ".",
|
429 |
"tokens_file": "tokens-de.txt",
|
430 |
"tokens_file_sub_folder": ".",
|
431 |
-
"
|
432 |
-
"loader": "load_sherpa_offline_recognizer",
|
433 |
},
|
434 |
],
|
435 |
"French": [
|
@@ -446,6 +461,42 @@ model_map = {
|
|
446 |
"loader": "load_sherpa_onnx_online_recognizer_from_transducer",
|
447 |
},
|
448 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
449 |
"Tibetan": [
|
450 |
{
|
451 |
"repo_id": "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
|
@@ -453,6 +504,7 @@ model_map = {
|
|
453 |
"nn_model_file_sub_folder": "exp",
|
454 |
"tokens_file": "tokens.txt",
|
455 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
|
|
456 |
"loader": "load_sherpa_offline_recognizer",
|
457 |
},
|
458 |
{
|
@@ -461,6 +513,7 @@ model_map = {
|
|
461 |
"nn_model_file_sub_folder": "exp",
|
462 |
"tokens_file": "tokens.txt",
|
463 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
|
|
464 |
"loader": "load_sherpa_offline_recognizer",
|
465 |
},
|
466 |
],
|
@@ -551,6 +604,24 @@ def load_sherpa_offline_recognizer(nn_model_file: str,
|
|
551 |
return recognizer
|
552 |
|
553 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
554 |
def load_sherpa_onnx_offline_recognizer_from_paraformer(nn_model_file: str,
|
555 |
tokens_file: str,
|
556 |
sample_rate: int = 16000,
|
@@ -730,6 +801,11 @@ def load_recognizer(local_model_dir: Path,
|
|
730 |
num_active_paths=num_active_paths,
|
731 |
**kwargs_
|
732 |
)
|
|
|
|
|
|
|
|
|
|
|
733 |
elif loader == "load_sherpa_onnx_offline_recognizer_from_paraformer":
|
734 |
recognizer = load_sherpa_onnx_offline_recognizer_from_paraformer(
|
735 |
decoding_method=decoding_method,
|
|
|
206 |
"nn_model_file_sub_folder": "exp",
|
207 |
"tokens_file": "./giga-tokens.txt",
|
208 |
"tokens_file_sub_folder": ".",
|
209 |
+
"normalize_samples": True,
|
210 |
"loader": "load_sherpa_offline_recognizer",
|
211 |
},
|
212 |
{
|
|
|
215 |
"nn_model_file_sub_folder": "exp",
|
216 |
"tokens_file": "tokens.txt",
|
217 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
218 |
+
"normalize_samples": True,
|
219 |
"loader": "load_sherpa_offline_recognizer",
|
220 |
},
|
221 |
{
|
|
|
224 |
"nn_model_file_sub_folder": "exp",
|
225 |
"tokens_file": "tokens.txt",
|
226 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
227 |
+
"normalize_samples": True,
|
228 |
"loader": "load_sherpa_offline_recognizer",
|
229 |
},
|
230 |
{
|
|
|
233 |
"nn_model_file_sub_folder": "exp",
|
234 |
"tokens_file": "tokens.txt",
|
235 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
236 |
+
"normalize_samples": True,
|
237 |
"loader": "load_sherpa_offline_recognizer",
|
238 |
},
|
239 |
{
|
|
|
242 |
"nn_model_file_sub_folder": "exp",
|
243 |
"tokens_file": "tokens.txt",
|
244 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
245 |
+
"normalize_samples": True,
|
246 |
"loader": "load_sherpa_offline_recognizer",
|
247 |
},
|
248 |
{
|
|
|
251 |
"nn_model_file_sub_folder": "exp",
|
252 |
"tokens_file": "tokens.txt",
|
253 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
254 |
+
"normalize_samples": True,
|
255 |
"loader": "load_sherpa_offline_recognizer",
|
256 |
},
|
257 |
{
|
|
|
260 |
"nn_model_file_sub_folder": "exp",
|
261 |
"tokens_file": "tokens.txt",
|
262 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
263 |
+
"normalize_samples": True,
|
264 |
"loader": "load_sherpa_offline_recognizer",
|
265 |
},
|
266 |
{
|
|
|
277 |
"nn_model_file_sub_folder": "exp",
|
278 |
"tokens_file": "tokens.txt",
|
279 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
280 |
+
"normalize_samples": True,
|
281 |
"loader": "load_sherpa_offline_recognizer",
|
282 |
},
|
283 |
{
|
|
|
286 |
"nn_model_file_sub_folder": "exp",
|
287 |
"tokens_file": "tokens.txt",
|
288 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
289 |
+
"normalize_samples": True,
|
290 |
"loader": "load_sherpa_offline_recognizer",
|
291 |
},
|
292 |
{
|
|
|
295 |
"nn_model_file_sub_folder": "exp",
|
296 |
"tokens_file": "tokens.txt",
|
297 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
298 |
+
"normalize_samples": True,
|
299 |
"loader": "load_sherpa_offline_recognizer",
|
300 |
},
|
301 |
{
|
|
|
304 |
"nn_model_file_sub_folder": "exp",
|
305 |
"tokens_file": "tokens.txt",
|
306 |
"tokens_file_sub_folder": "data/lang_bpe",
|
307 |
+
"normalize_samples": True,
|
308 |
"loader": "load_sherpa_offline_recognizer",
|
309 |
},
|
310 |
{
|
|
|
313 |
"nn_model_file_sub_folder": "exp",
|
314 |
"tokens_file": "tokens.txt",
|
315 |
"tokens_file_sub_folder": "data/lang_bpe",
|
316 |
+
"normalize_samples": True,
|
317 |
"loader": "load_sherpa_offline_recognizer",
|
318 |
},
|
319 |
{
|
|
|
322 |
"nn_model_file_sub_folder": "exp",
|
323 |
"tokens_file": "tokens.txt",
|
324 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
325 |
+
"normalize_samples": True,
|
326 |
"loader": "load_sherpa_offline_recognizer",
|
327 |
},
|
328 |
{
|
|
|
331 |
"nn_model_file_sub_folder": ".",
|
332 |
"tokens_file": "units.txt",
|
333 |
"tokens_file_sub_folder": ".",
|
334 |
+
"normalize_samples": False,
|
335 |
"loader": "load_sherpa_offline_recognizer",
|
336 |
},
|
337 |
],
|
|
|
362 |
"nn_model_file_sub_folder": "exp",
|
363 |
"tokens_file": "tokens.txt",
|
364 |
"tokens_file_sub_folder": "data/lang_char_bpe",
|
365 |
+
"normalize_samples": True,
|
366 |
"loader": "load_sherpa_offline_recognizer",
|
367 |
},
|
368 |
{
|
|
|
371 |
"nn_model_file_sub_folder": "exp",
|
372 |
"tokens_file": "tokens.txt",
|
373 |
"tokens_file_sub_folder": "data/lang_char",
|
374 |
+
"normalize_samples": True,
|
375 |
"loader": "load_sherpa_offline_recognizer",
|
376 |
},
|
377 |
],
|
|
|
409 |
"loader": "load_sherpa_onnx_offline_recognizer_from_transducer",
|
410 |
},
|
411 |
],
|
412 |
+
# "Japanese": [
|
413 |
+
# {
|
414 |
+
# "repo_id": "TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-fluent",
|
415 |
+
# "encoder_model_file": "encoder_jit_trace.pt",
|
416 |
+
# "encoder_model_file_sub_folder": "exp_fluent",
|
417 |
+
# "decoder_model_file": "decoder_jit_trace.pt",
|
418 |
+
# "decoder_model_file_sub_folder": "exp_fluent",
|
419 |
+
# "joiner_model_file": "joiner_jit_trace.pt",
|
420 |
+
# "joiner_model_file_sub_folder": "exp_fluent",
|
421 |
+
# "tokens_file": "tokens.txt",
|
422 |
+
# "tokens_file_sub_folder": "data/lang_char",
|
423 |
+
# "normalize_samples": True,
|
424 |
+
# "loader": "load_sherpa_online_recognizer",
|
425 |
+
# },
|
426 |
+
# {
|
427 |
+
# "repo_id": "TeoWenShen/icefall-asr-csj-pruned-transducer-stateless7-streaming-230208-disfluent",
|
428 |
+
# "encoder_model_file": "encoder_jit_trace.pt",
|
429 |
+
# "encoder_model_file_sub_folder": "exp_disfluent",
|
430 |
+
# "decoder_model_file": "decoder_jit_trace.pt",
|
431 |
+
# "decoder_model_file_sub_folder": "exp_disfluent",
|
432 |
+
# "joiner_model_file": "joiner_jit_trace.pt",
|
433 |
+
# "joiner_model_file_sub_folder": "exp_disfluent",
|
434 |
+
# "tokens_file": "tokens.txt",
|
435 |
+
# "tokens_file_sub_folder": "data/lang_char",
|
436 |
+
# "normalize_samples": True,
|
437 |
+
# "loader": "load_sherpa_online_recognizer",
|
438 |
+
# },
|
439 |
+
# ],
|
440 |
"German": [
|
441 |
{
|
442 |
"repo_id": "csukuangfj/wav2vec2.0-torchaudio",
|
|
|
444 |
"nn_model_file_sub_folder": ".",
|
445 |
"tokens_file": "tokens-de.txt",
|
446 |
"tokens_file_sub_folder": ".",
|
447 |
+
"loader": "load_sherpa_offline_recognizer_without_feat_config",
|
|
|
448 |
},
|
449 |
],
|
450 |
"French": [
|
|
|
461 |
"loader": "load_sherpa_onnx_online_recognizer_from_transducer",
|
462 |
},
|
463 |
],
|
464 |
+
"Russian": [
|
465 |
+
{
|
466 |
+
"repo_id": "alphacep/vosk-model-ru",
|
467 |
+
"encoder_model_file": "encoder.onnx",
|
468 |
+
"encoder_model_file_sub_folder": "am-onnx",
|
469 |
+
"decoder_model_file": "decoder.onnx",
|
470 |
+
"decoder_model_file_sub_folder": "am-onnx",
|
471 |
+
"joiner_model_file": "joiner.onnx",
|
472 |
+
"joiner_model_file_sub_folder": "am-onnx",
|
473 |
+
"tokens_file": "tokens.txt",
|
474 |
+
"tokens_file_sub_folder": "lang",
|
475 |
+
"loader": "load_sherpa_onnx_offline_recognizer_from_transducer",
|
476 |
+
},
|
477 |
+
{
|
478 |
+
"repo_id": "alphacep/vosk-model-small-ru",
|
479 |
+
"encoder_model_file": "encoder.onnx",
|
480 |
+
"encoder_model_file_sub_folder": "am",
|
481 |
+
"decoder_model_file": "decoder.onnx",
|
482 |
+
"decoder_model_file_sub_folder": "am",
|
483 |
+
"joiner_model_file": "joiner.onnx",
|
484 |
+
"joiner_model_file_sub_folder": "am",
|
485 |
+
"tokens_file": "tokens.txt",
|
486 |
+
"tokens_file_sub_folder": "lang",
|
487 |
+
"loader": "load_sherpa_onnx_offline_recognizer_from_transducer",
|
488 |
+
},
|
489 |
+
],
|
490 |
+
"Arabic": [
|
491 |
+
{
|
492 |
+
"repo_id": "AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
|
493 |
+
"nn_model_file": "cpu_jit.pt",
|
494 |
+
"nn_model_file_sub_folder": "exp",
|
495 |
+
"tokens_file": "tokens.txt",
|
496 |
+
"tokens_file_sub_folder": "data/lang_bpe_5000",
|
497 |
+
"loader": "load_sherpa_offline_recognizer_without_feat_config",
|
498 |
+
},
|
499 |
+
],
|
500 |
"Tibetan": [
|
501 |
{
|
502 |
"repo_id": "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
|
|
|
504 |
"nn_model_file_sub_folder": "exp",
|
505 |
"tokens_file": "tokens.txt",
|
506 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
507 |
+
"normalize_samples": True,
|
508 |
"loader": "load_sherpa_offline_recognizer",
|
509 |
},
|
510 |
{
|
|
|
513 |
"nn_model_file_sub_folder": "exp",
|
514 |
"tokens_file": "tokens.txt",
|
515 |
"tokens_file_sub_folder": "data/lang_bpe_500",
|
516 |
+
"normalize_samples": True,
|
517 |
"loader": "load_sherpa_offline_recognizer",
|
518 |
},
|
519 |
],
|
|
|
604 |
return recognizer
|
605 |
|
606 |
|
607 |
+
def load_sherpa_offline_recognizer_without_feat_config(nn_model_file: str,
|
608 |
+
tokens_file: str,
|
609 |
+
num_active_paths: int = 2,
|
610 |
+
decoding_method: str = "greedy_search",
|
611 |
+
):
|
612 |
+
config = sherpa.OfflineRecognizerConfig(
|
613 |
+
nn_model=nn_model_file,
|
614 |
+
tokens=tokens_file,
|
615 |
+
use_gpu=False,
|
616 |
+
decoding_method=decoding_method,
|
617 |
+
num_active_paths=num_active_paths,
|
618 |
+
)
|
619 |
+
|
620 |
+
recognizer = sherpa.OfflineRecognizer(config)
|
621 |
+
|
622 |
+
return recognizer
|
623 |
+
|
624 |
+
|
625 |
def load_sherpa_onnx_offline_recognizer_from_paraformer(nn_model_file: str,
|
626 |
tokens_file: str,
|
627 |
sample_rate: int = 16000,
|
|
|
801 |
num_active_paths=num_active_paths,
|
802 |
**kwargs_
|
803 |
)
|
804 |
+
elif loader == "load_sherpa_offline_recognizer_without_feat_config":
|
805 |
+
recognizer = load_sherpa_offline_recognizer_without_feat_config(
|
806 |
+
decoding_method=decoding_method,
|
807 |
+
**kwargs_
|
808 |
+
)
|
809 |
elif loader == "load_sherpa_onnx_offline_recognizer_from_paraformer":
|
810 |
recognizer = load_sherpa_onnx_offline_recognizer_from_paraformer(
|
811 |
decoding_method=decoding_method,
|