HoneyTian commited on
Commit
3e3be60
1 Parent(s): 03aae6c
Files changed (2) hide show
  1. main.py +1 -0
  2. toolbox/k2_sherpa/nn_models.py +39 -1
main.py CHANGED
@@ -112,6 +112,7 @@ def process(
112
  loader=m_dict["loader"],
113
  decoding_method=decoding_method,
114
  num_active_paths=num_active_paths,
 
115
  )
116
 
117
  # transcribe
 
112
  loader=m_dict["loader"],
113
  decoding_method=decoding_method,
114
  num_active_paths=num_active_paths,
115
+ normalize_samples=m_dict["normalize_samples"],
116
  )
117
 
118
  # transcribe
toolbox/k2_sherpa/nn_models.py CHANGED
@@ -50,7 +50,19 @@ model_map = {
50
  "tokens_file_sub_folder": "data/lang_char",
51
  "loader": "load_sherpa_offline_recognizer",
52
  "normalize_samples": True,
53
- }
 
 
 
 
 
 
 
 
 
 
 
 
54
  ],
55
  "English": [
56
  {
@@ -156,6 +168,30 @@ def load_sherpa_offline_recognizer_from_paraformer(nn_model_file: str,
156
  return recognizer
157
 
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  def load_recognizer(repo_id: str,
160
  nn_model_file: str,
161
  nn_model_file_sub_folder: str,
@@ -165,6 +201,7 @@ def load_recognizer(repo_id: str,
165
  loader: str,
166
  decoding_method: str = "greedy_search",
167
  num_active_paths: int = 4,
 
168
  ):
169
  if not os.path.exists(local_model_dir):
170
  download_model(
@@ -185,6 +222,7 @@ def load_recognizer(repo_id: str,
185
  tokens_file=tokens_file,
186
  decoding_method=decoding_method,
187
  num_active_paths=num_active_paths,
 
188
  )
189
  elif loader == "load_sherpa_offline_recognizer_from_paraformer":
190
  recognizer = load_sherpa_offline_recognizer_from_paraformer(
 
50
  "tokens_file_sub_folder": "data/lang_char",
51
  "loader": "load_sherpa_offline_recognizer",
52
  "normalize_samples": True,
53
+ },
54
+ {
55
+ "repo_id": "zrjin/sherpa-onnx-zipformer-multi-zh-hans-2023-9-2",
56
+ "encoder_model_file": "encoder-epoch-20-avg-1.onnx",
57
+ "encoder_model_file_sub_folder": ".",
58
+ "decoder_model_file": "decoder-epoch-20-avg-1.onnx",
59
+ "decoder_model_file_sub_folder": ".",
60
+ "joiner_model_file": "joiner-epoch-20-avg-1.onnx",
61
+ "joiner_model_file_sub_folder": ".",
62
+ "tokens_file": "tokens.txt",
63
+ "tokens_file_sub_folder": ".",
64
+ "loader": "load_sherpa_offline_recognizer_from_transducer",
65
+ },
66
  ],
67
  "English": [
68
  {
 
168
  return recognizer
169
 
170
 
171
+ def load_sherpa_offline_recognizer_from_transducer(encoder_model_file: str,
172
+ decoder_model_file: str,
173
+ joiner_model_file: str,
174
+ tokens_file: str,
175
+ sample_rate: int = 16000,
176
+ decoding_method: str = "greedy_search",
177
+ feature_dim: int = 80,
178
+ num_threads: int = 2,
179
+ num_active_paths: int = 2,
180
+ ):
181
+ recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
182
+ encoder=encoder_model_file,
183
+ decoder=decoder_model_file,
184
+ joiner=joiner_model_file,
185
+ tokens=tokens_file,
186
+ num_threads=num_threads,
187
+ sample_rate=sample_rate,
188
+ feature_dim=feature_dim,
189
+ decoding_method=decoding_method,
190
+ max_active_paths=num_active_paths,
191
+ )
192
+ return recognizer
193
+
194
+
195
  def load_recognizer(repo_id: str,
196
  nn_model_file: str,
197
  nn_model_file_sub_folder: str,
 
201
  loader: str,
202
  decoding_method: str = "greedy_search",
203
  num_active_paths: int = 4,
204
+ normalize_samples: bool = False,
205
  ):
206
  if not os.path.exists(local_model_dir):
207
  download_model(
 
222
  tokens_file=tokens_file,
223
  decoding_method=decoding_method,
224
  num_active_paths=num_active_paths,
225
+ normalize_samples=normalize_samples,
226
  )
227
  elif loader == "load_sherpa_offline_recognizer_from_paraformer":
228
  recognizer = load_sherpa_offline_recognizer_from_paraformer(