csukuangfj commited on
Commit
18b91f7
·
1 Parent(s): c88be4e

add a new zh+en model

Browse files
Files changed (1) hide show
  1. model.py +47 -1
model.py CHANGED
@@ -760,12 +760,57 @@ def _get_wenetspeech_pre_trained_model(
760
  return recognizer
761
 
762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
  @lru_cache(maxsize=10)
764
  def _get_chinese_english_mixed_model(
765
  repo_id: str,
766
  decoding_method: str,
767
  num_active_paths: int,
768
- ):
769
  assert repo_id in [
770
  "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
771
  "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
@@ -1705,6 +1750,7 @@ english_models = {
1705
 
1706
  chinese_english_mixed_models = {
1707
  "csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": _get_streaming_zipformer_pre_trained_model,
 
1708
  "csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28": _get_paraformer_pre_trained_model,
1709
  "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh": _get_chinese_english_mixed_model,
1710
  "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5": _get_chinese_english_mixed_model, # noqa
 
760
  return recognizer
761
 
762
 
763
+ @lru_cache(maxsize=10)
764
+ def _get_chinese_english_mixed_model_onnx(
765
+ repo_id: str,
766
+ decoding_method: str,
767
+ num_active_paths: int,
768
+ ) -> sherpa_onnx.OfflineRecognizer:
769
+ assert repo_id in [
770
+ "zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22",
771
+ ], repo_id
772
+
773
+ encoder_model = _get_nn_model_filename(
774
+ repo_id=repo_id,
775
+ filename="encoder-epoch-34-avg-19.int8.onnx",
776
+ subfolder="exp",
777
+ )
778
+
779
+ decoder_model = _get_nn_model_filename(
780
+ repo_id=repo_id,
781
+ filename="encoder-epoch-34-avg-19.onnx",
782
+ subfolder="exp",
783
+ )
784
+
785
+ joiner_model = _get_nn_model_filename(
786
+ repo_id=repo_id,
787
+ filename="joiner-epoch-34-avg-19.int8.onnx",
788
+ subfolder="exp",
789
+ )
790
+
791
+ tokens = _get_token_filename(repo_id=repo_id, subfolder="data/lanb_bbpe_2000")
792
+
793
+ recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
794
+ tokens=tokens,
795
+ encoder=encoder_model,
796
+ decoder=decoder_model,
797
+ joiner=joiner_model,
798
+ num_threads=2,
799
+ sample_rate=16000,
800
+ feature_dim=80,
801
+ decoding_method=decoding_method,
802
+ max_active_paths=num_active_paths,
803
+ )
804
+
805
+ return recognizer
806
+
807
+
808
  @lru_cache(maxsize=10)
809
  def _get_chinese_english_mixed_model(
810
  repo_id: str,
811
  decoding_method: str,
812
  num_active_paths: int,
813
+ ) -> sherpa.OfflineRecognizer:
814
  assert repo_id in [
815
  "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
816
  "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
 
1750
 
1751
  chinese_english_mixed_models = {
1752
  "csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": _get_streaming_zipformer_pre_trained_model,
1753
+ "zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22": _get_chinese_english_mixed_model_onnx,
1754
  "csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28": _get_paraformer_pre_trained_model,
1755
  "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh": _get_chinese_english_mixed_model,
1756
  "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5": _get_chinese_english_mixed_model, # noqa