Commit
·
18b91f7
1
Parent(s):
c88be4e
add a new zh+en model
Browse files
model.py
CHANGED
@@ -760,12 +760,57 @@ def _get_wenetspeech_pre_trained_model(
|
|
760 |
return recognizer
|
761 |
|
762 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
763 |
@lru_cache(maxsize=10)
|
764 |
def _get_chinese_english_mixed_model(
|
765 |
repo_id: str,
|
766 |
decoding_method: str,
|
767 |
num_active_paths: int,
|
768 |
-
):
|
769 |
assert repo_id in [
|
770 |
"luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
|
771 |
"ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
|
@@ -1705,6 +1750,7 @@ english_models = {
|
|
1705 |
|
1706 |
chinese_english_mixed_models = {
|
1707 |
"csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": _get_streaming_zipformer_pre_trained_model,
|
|
|
1708 |
"csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28": _get_paraformer_pre_trained_model,
|
1709 |
"ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh": _get_chinese_english_mixed_model,
|
1710 |
"luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5": _get_chinese_english_mixed_model, # noqa
|
|
|
760 |
return recognizer
|
761 |
|
762 |
|
763 |
+
@lru_cache(maxsize=10)
|
764 |
+
def _get_chinese_english_mixed_model_onnx(
|
765 |
+
repo_id: str,
|
766 |
+
decoding_method: str,
|
767 |
+
num_active_paths: int,
|
768 |
+
) -> sherpa_onnx.OfflineRecognizer:
|
769 |
+
assert repo_id in [
|
770 |
+
"zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22",
|
771 |
+
], repo_id
|
772 |
+
|
773 |
+
encoder_model = _get_nn_model_filename(
|
774 |
+
repo_id=repo_id,
|
775 |
+
filename="encoder-epoch-34-avg-19.int8.onnx",
|
776 |
+
subfolder="exp",
|
777 |
+
)
|
778 |
+
|
779 |
+
decoder_model = _get_nn_model_filename(
|
780 |
+
repo_id=repo_id,
|
781 |
+
filename="encoder-epoch-34-avg-19.onnx",
|
782 |
+
subfolder="exp",
|
783 |
+
)
|
784 |
+
|
785 |
+
joiner_model = _get_nn_model_filename(
|
786 |
+
repo_id=repo_id,
|
787 |
+
filename="joiner-epoch-34-avg-19.int8.onnx",
|
788 |
+
subfolder="exp",
|
789 |
+
)
|
790 |
+
|
791 |
+
tokens = _get_token_filename(repo_id=repo_id, subfolder="data/lanb_bbpe_2000")
|
792 |
+
|
793 |
+
recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
|
794 |
+
tokens=tokens,
|
795 |
+
encoder=encoder_model,
|
796 |
+
decoder=decoder_model,
|
797 |
+
joiner=joiner_model,
|
798 |
+
num_threads=2,
|
799 |
+
sample_rate=16000,
|
800 |
+
feature_dim=80,
|
801 |
+
decoding_method=decoding_method,
|
802 |
+
max_active_paths=num_active_paths,
|
803 |
+
)
|
804 |
+
|
805 |
+
return recognizer
|
806 |
+
|
807 |
+
|
808 |
@lru_cache(maxsize=10)
|
809 |
def _get_chinese_english_mixed_model(
|
810 |
repo_id: str,
|
811 |
decoding_method: str,
|
812 |
num_active_paths: int,
|
813 |
+
) -> sherpa.OfflineRecognizer:
|
814 |
assert repo_id in [
|
815 |
"luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
|
816 |
"ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
|
|
|
1750 |
|
1751 |
chinese_english_mixed_models = {
|
1752 |
"csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20": _get_streaming_zipformer_pre_trained_model,
|
1753 |
+
"zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22": _get_chinese_english_mixed_model_onnx,
|
1754 |
"csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28": _get_paraformer_pre_trained_model,
|
1755 |
"ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh": _get_chinese_english_mixed_model,
|
1756 |
"luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5": _get_chinese_english_mixed_model, # noqa
|