add ctc prefix beam search results

#1
decoding_results/ctc-prefix-beam-search/errs-test-clean-epoch-50_avg-26_beam-4_use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
decoding_results/ctc-prefix-beam-search/errs-test-other-epoch-50_avg-26_beam-4_use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
decoding_results/ctc-prefix-beam-search/log-decode-epoch-50_avg-26_beam-4_use-averaged-model-2024-10-01-17-27-53 ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-10-01 17:27:53,949 INFO [ctc_decode.py:844] Decoding started
2
+ 2024-10-01 17:27:53,949 INFO [ctc_decode.py:852] Device: cuda:0
3
+ 2024-10-01 17:27:53,949 INFO [ctc_decode.py:853] {
4
+ "attention_decoder_attention_dim": 512,
5
+ "attention_decoder_dim": 512,
6
+ "attention_decoder_feedforward_dim": 2048,
7
+ "attention_decoder_num_heads": 8,
8
+ "attention_decoder_num_layers": 6,
9
+ "avg": 26,
10
+ "batch_idx_train": 0,
11
+ "beam": 4,
12
+ "best_train_epoch": -1,
13
+ "best_train_loss": Infinity,
14
+ "best_valid_epoch": -1,
15
+ "best_valid_loss": Infinity,
16
+ "bpe_model": "data/lang_bpe_500/bpe.model",
17
+ "bucketing_sampler": true,
18
+ "causal": false,
19
+ "chunk_size": "16,32,64,-1",
20
+ "cnn_module_kernel": "31,31,15,15,15,31",
21
+ "concatenate_cuts": false,
22
+ "context_size": 2,
23
+ "decoder_dim": 512,
24
+ "decoding_method": "prefix-beam-search",
25
+ "device": "cuda:0",
26
+ "downsampling_factor": "1,2,4,8,4,2",
27
+ "drop_last": true,
28
+ "duration_factor": 1.0,
29
+ "enable_musan": true,
30
+ "enable_spec_aug": true,
31
+ "encoder_dim": "192,256,512,768,512,256",
32
+ "encoder_unmasked_dim": "192,192,256,320,256,192",
33
+ "env_info": {
34
+ "IP address": "10.30.18.160",
35
+ "hostname": "de-74279-k2-train-6-0905175136-74fb5b4b6f-p65lp",
36
+ "icefall-git-branch": "cr-ctc",
37
+ "icefall-git-date": "Sun Sep 29 12:00:45 2024",
38
+ "icefall-git-sha1": "33fa9e8b-dirty",
39
+ "icefall-path": "/star-kw/kangwei/code/icefall_cotrain",
40
+ "k2-build-type": "Release",
41
+ "k2-git-date": "Fri Feb 23 01:48:38 2024",
42
+ "k2-git-sha1": "ff1d435a8d3c4eaa15828a84a7240678a70539a7",
43
+ "k2-path": "/star-kw/kangwei/envs/c5/lib/python3.8/site-packages/k2/__init__.py",
44
+ "k2-version": "1.24.4",
45
+ "k2-with-cuda": true,
46
+ "lhotse-path": "/star-kw/kangwei/envs/c5/lib/python3.8/site-packages/lhotse/__init__.py",
47
+ "lhotse-version": "1.25.0.dev+git.da4d70d.clean",
48
+ "python-version": "3.8",
49
+ "torch-cuda-available": true,
50
+ "torch-cuda-version": "11.8",
51
+ "torch-version": "2.1.0+cu118"
52
+ },
53
+ "epoch": 50,
54
+ "exp_dir": "zipformer/exp_cr_large",
55
+ "feature_dim": 80,
56
+ "feedforward_dim": "512,768,1536,2048,1536,768",
57
+ "frame_shift_ms": 10,
58
+ "full_libri": true,
59
+ "gap": 1.0,
60
+ "hlg_scale": 0.6,
61
+ "ignore_id": -1,
62
+ "input_strategy": "PrecomputedFeatures",
63
+ "iter": 0,
64
+ "joiner_dim": 512,
65
+ "label_smoothing": 0.1,
66
+ "lang_dir": "data/lang_bpe_500",
67
+ "left_context_frames": "64,128,256,-1",
68
+ "lm_avg": 1,
69
+ "lm_dir": "data/lm",
70
+ "lm_epoch": 7,
71
+ "lm_exp_dir": null,
72
+ "lm_scale": 0.3,
73
+ "lm_type": "rnn",
74
+ "lm_vocab_size": 500,
75
+ "log_interval": 50,
76
+ "manifest_dir": "data/fbank",
77
+ "max_active_states": 10000,
78
+ "max_duration": 2000,
79
+ "min_active_states": 30,
80
+ "mini_libri": false,
81
+ "nbest_scale": 1.0,
82
+ "num_buckets": 30,
83
+ "num_encoder_layers": "2,2,4,5,4,2",
84
+ "num_heads": "4,4,4,8,4,4",
85
+ "num_paths": 100,
86
+ "num_workers": 2,
87
+ "on_the_fly_feats": false,
88
+ "output_beam": 8,
89
+ "pos_dim": 48,
90
+ "pos_head_dim": "4",
91
+ "query_head_dim": "32",
92
+ "res_dir": "zipformer/exp_cr_large/prefix-beam-search",
93
+ "reset_interval": 200,
94
+ "return_cuts": true,
95
+ "rnn_lm_embedding_dim": 2048,
96
+ "rnn_lm_hidden_dim": 2048,
97
+ "rnn_lm_num_layers": 3,
98
+ "rnn_lm_tie_weights": true,
99
+ "search_beam": 20,
100
+ "shuffle": true,
101
+ "skip_scoring": false,
102
+ "spec_aug_time_warp_factor": 80,
103
+ "subsampling_factor": 4,
104
+ "suffix": "epoch-50_avg-26_beam-4_use-averaged-model",
105
+ "transformer_lm_dim_feedforward": 2048,
106
+ "transformer_lm_embedding_dim": 768,
107
+ "transformer_lm_encoder_dim": 768,
108
+ "transformer_lm_exp_dir": null,
109
+ "transformer_lm_nhead": 8,
110
+ "transformer_lm_num_layers": 16,
111
+ "transformer_lm_tie_weights": true,
112
+ "use_attention_decoder": false,
113
+ "use_averaged_model": true,
114
+ "use_cr_ctc": false,
115
+ "use_ctc": true,
116
+ "use_double_scores": true,
117
+ "use_transducer": false,
118
+ "valid_interval": 3000,
119
+ "value_head_dim": "12",
120
+ "warm_step": 2000
121
+ }
122
+ 2024-10-01 17:27:54,226 INFO [lexicon.py:168] Loading pre-compiled data/lang_bpe_500/Linv.pt
123
+ 2024-10-01 17:27:54,279 INFO [ctc_decode.py:962] About to create model
124
+ 2024-10-01 17:27:55,310 INFO [ctc_decode.py:1029] Calculating the averaged model over epoch range from 24 (excluded) to 50
125
+ 2024-10-01 17:28:04,596 INFO [ctc_decode.py:1046] Number of model parameters: 147010094
126
+ 2024-10-01 17:28:04,596 INFO [asr_datamodule.py:467] About to get test-clean cuts
127
+ 2024-10-01 17:28:04,610 INFO [asr_datamodule.py:474] About to get test-other cuts
128
+ 2024-10-01 17:28:11,489 INFO [ctc_decode.py:720] batch 0/?, cuts processed until now is 123
129
+ 2024-10-01 17:29:46,243 INFO [ctc_decode.py:739] The transcripts are stored in zipformer/exp_cr_large/prefix-beam-search/recogs-test-clean-epoch-50_avg-26_beam-4_use-averaged-model.txt
130
+ 2024-10-01 17:29:46,335 INFO [utils.py:668] [test-clean_prefix-beam-search] %WER 2.02% [1060 / 52576, 114 ins, 82 del, 864 sub ]
131
+ 2024-10-01 17:29:46,542 INFO [ctc_decode.py:767] Wrote detailed error stats to zipformer/exp_cr_large/prefix-beam-search/errs-test-clean-epoch-50_avg-26_beam-4_use-averaged-model.txt
132
+ 2024-10-01 17:29:46,548 INFO [ctc_decode.py:783]
133
+ For test-clean, WER of different settings are:
134
+ prefix-beam-search 2.02 best for test-clean
135
+
136
+ 2024-10-01 17:29:52,962 INFO [ctc_decode.py:720] batch 0/?, cuts processed until now is 142
137
+ 2024-10-01 17:31:35,541 INFO [ctc_decode.py:739] The transcripts are stored in zipformer/exp_cr_large/prefix-beam-search/recogs-test-other-epoch-50_avg-26_beam-4_use-averaged-model.txt
138
+ 2024-10-01 17:31:35,635 INFO [utils.py:668] [test-other_prefix-beam-search] %WER 4.35% [2276 / 52343, 224 ins, 180 del, 1872 sub ]
139
+ 2024-10-01 17:31:35,842 INFO [ctc_decode.py:767] Wrote detailed error stats to zipformer/exp_cr_large/prefix-beam-search/errs-test-other-epoch-50_avg-26_beam-4_use-averaged-model.txt
140
+ 2024-10-01 17:31:35,848 INFO [ctc_decode.py:783]
141
+ For test-other, WER of different settings are:
142
+ prefix-beam-search 4.35 best for test-other
143
+
144
+ 2024-10-01 17:31:35,848 INFO [ctc_decode.py:1087] Done!
decoding_results/ctc-prefix-beam-search/recogs-test-clean-epoch-50_avg-26_beam-4_use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
decoding_results/ctc-prefix-beam-search/recogs-test-other-epoch-50_avg-26_beam-4_use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
decoding_results/ctc-prefix-beam-search/wer-summary-test-clean-epoch-50_avg-26_beam-4_use-averaged-model.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ settings WER
2
+ prefix-beam-search 2.02
decoding_results/ctc-prefix-beam-search/wer-summary-test-other-epoch-50_avg-26_beam-4_use-averaged-model.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ settings WER
2
+ prefix-beam-search 4.35