Commit
·
4abdf19
1
Parent(s):
129bf83
add tedlium3 models
Browse files
examples.py
CHANGED
@@ -79,6 +79,27 @@ examples = [
|
|
79 |
4,
|
80 |
"./test_wavs/alimeeting/R8009_M8020_N_SPK8026-8026-209.wav",
|
81 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
# librispeech
|
83 |
# https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13/tree/main/test_wavs
|
84 |
[
|
|
|
79 |
4,
|
80 |
"./test_wavs/alimeeting/R8009_M8020_N_SPK8026-8026-209.wav",
|
81 |
],
|
82 |
+
[
|
83 |
+
"English",
|
84 |
+
"videodanchik/icefall-asr-tedlium3-conformer-ctc2",
|
85 |
+
"greedy_search",
|
86 |
+
4,
|
87 |
+
"./test_wavs/tedlium3/DanBarber_2010-219.wav",
|
88 |
+
],
|
89 |
+
[
|
90 |
+
"English",
|
91 |
+
"videodanchik/icefall-asr-tedlium3-conformer-ctc2",
|
92 |
+
"greedy_search",
|
93 |
+
4,
|
94 |
+
"./test_wavs/tedlium3/DanielKahneman_2010-157.wav",
|
95 |
+
],
|
96 |
+
[
|
97 |
+
"English",
|
98 |
+
"videodanchik/icefall-asr-tedlium3-conformer-ctc2",
|
99 |
+
"greedy_search",
|
100 |
+
4,
|
101 |
+
"./test_wavs/tedlium3/RobertGupta_2010U-15.wav",
|
102 |
+
],
|
103 |
# librispeech
|
104 |
# https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13/tree/main/test_wavs
|
105 |
[
|
model.py
CHANGED
@@ -176,7 +176,7 @@ def _get_gigaspeech_pre_trained_model(
|
|
176 |
|
177 |
|
178 |
@lru_cache(maxsize=10)
|
179 |
-
def
|
180 |
repo_id: str,
|
181 |
decoding_method: str,
|
182 |
num_active_paths: int,
|
@@ -186,6 +186,7 @@ def _get_librispeech_pre_trained_model(
|
|
186 |
"csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13", # noqa
|
187 |
"csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11", # noqa
|
188 |
"csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14", # noqa
|
|
|
189 |
], repo_id
|
190 |
|
191 |
filename = "cpu_jit.pt"
|
@@ -205,7 +206,12 @@ def _get_librispeech_pre_trained_model(
|
|
205 |
repo_id=repo_id,
|
206 |
filename=filename,
|
207 |
)
|
208 |
-
|
|
|
|
|
|
|
|
|
|
|
209 |
|
210 |
feat_config = sherpa.FeatureConfig()
|
211 |
feat_config.fbank_opts.frame_opts.samp_freq = sample_rate
|
@@ -548,10 +554,11 @@ chinese_models = {
|
|
548 |
|
549 |
english_models = {
|
550 |
"wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2": _get_gigaspeech_pre_trained_model, # noqa
|
551 |
-
"WeijiZhuang/icefall-asr-librispeech-pruned-transducer-stateless8-2022-12-02":
|
552 |
-
"csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14":
|
553 |
-
"csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11":
|
554 |
-
"csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13":
|
|
|
555 |
"csukuangfj/wenet-english-model": _get_wenet_model,
|
556 |
}
|
557 |
|
|
|
176 |
|
177 |
|
178 |
@lru_cache(maxsize=10)
|
179 |
+
def _get_english_model(
|
180 |
repo_id: str,
|
181 |
decoding_method: str,
|
182 |
num_active_paths: int,
|
|
|
186 |
"csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13", # noqa
|
187 |
"csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11", # noqa
|
188 |
"csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14", # noqa
|
189 |
+
"videodanchik/icefall-asr-tedlium3-conformer-ctc2",
|
190 |
], repo_id
|
191 |
|
192 |
filename = "cpu_jit.pt"
|
|
|
206 |
repo_id=repo_id,
|
207 |
filename=filename,
|
208 |
)
|
209 |
+
subfolder = "data/lang_bpe_500"
|
210 |
+
|
211 |
+
if repo_id == "videodanchik/icefall-asr-tedlium3-conformer-ctc2":
|
212 |
+
subfolder = "data/lang_bpe"
|
213 |
+
|
214 |
+
tokens = _get_token_filename(repo_id=repo_id, subfolder=subfolder)
|
215 |
|
216 |
feat_config = sherpa.FeatureConfig()
|
217 |
feat_config.fbank_opts.frame_opts.samp_freq = sample_rate
|
|
|
554 |
|
555 |
english_models = {
|
556 |
"wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2": _get_gigaspeech_pre_trained_model, # noqa
|
557 |
+
"WeijiZhuang/icefall-asr-librispeech-pruned-transducer-stateless8-2022-12-02": _get_english_model, # noqa
|
558 |
+
"csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14": _get_english_model, # noqa
|
559 |
+
"csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11": _get_english_model, # noqa
|
560 |
+
"csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13": _get_english_model, # noqa
|
561 |
+
"videodanchik/icefall-asr-tedlium3-conformer-ctc2": _get_english_model,
|
562 |
"csukuangfj/wenet-english-model": _get_wenet_model,
|
563 |
}
|
564 |
|
test_wavs/tedlium3/DanBarber_2010-219.wav
ADDED
Binary file (117 kB). View file
|
|
test_wavs/tedlium3/DanielKahneman_2010-157.wav
ADDED
Binary file (404 kB). View file
|
|
test_wavs/tedlium3/RobertGupta_2010U-15.wav
ADDED
Binary file (555 kB). View file
|
|
test_wavs/tedlium3/trans.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
DanBarber_2010-219 well the last year this property had six hundred thousand birds on it
|
2 |
+
RobertGupta_2010U-15 and he was talking about invisible demons and smoke and how someone was poisoning him in his sleep and i was afraid not for myself but i was afraid that i was going to lose him that he was going to sink into one of his states
|
3 |
+
DanielKahneman_2010-157 goes very different ways depending on how you think and whether you think of the remembering self or you think of the experiencing self this is going to influence policy i think in years to come in the united states efforts are being made
|