jaekookang commited on
Commit
18edec2
1 Parent(s): c54e27c

Update model

Browse files
Files changed (32) hide show
  1. README.md +301 -3
  2. data/en_token_list/bpe_unigram30/bpe.model +3 -0
  3. exp/asr_stats_raw_en_bpe30_sp/train/feats_stats.npz +0 -0
  4. exp/asr_train_asr_transformer_raw_en_bpe30_sp/RESULTS.md +32 -0
  5. exp/asr_train_asr_transformer_raw_en_bpe30_sp/config.yaml +201 -0
  6. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/acc.png +0 -0
  7. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/backward_time.png +0 -0
  8. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/cer.png +0 -0
  9. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/cer_ctc.png +0 -0
  10. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/forward_time.png +0 -0
  11. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/gpu_max_cached_mem_GB.png +0 -0
  12. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/iter_time.png +0 -0
  13. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss.png +0 -0
  14. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss_att.png +0 -0
  15. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss_ctc.png +0 -0
  16. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/optim0_lr0.png +0 -0
  17. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/optim_step_time.png +0 -0
  18. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/train_time.png +0 -0
  19. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/wer.png +0 -0
  20. exp/asr_train_asr_transformer_raw_en_bpe30_sp/valid.acc.ave_10best.pth +3 -0
  21. exp/lm_train_lm_en_bpe30/24epoch.pth +3 -0
  22. exp/lm_train_lm_en_bpe30/config.yaml +148 -0
  23. exp/lm_train_lm_en_bpe30/images/backward_time.png +0 -0
  24. exp/lm_train_lm_en_bpe30/images/forward_time.png +0 -0
  25. exp/lm_train_lm_en_bpe30/images/gpu_max_cached_mem_GB.png +0 -0
  26. exp/lm_train_lm_en_bpe30/images/iter_time.png +0 -0
  27. exp/lm_train_lm_en_bpe30/images/loss.png +0 -0
  28. exp/lm_train_lm_en_bpe30/images/optim0_lr0.png +0 -0
  29. exp/lm_train_lm_en_bpe30/images/optim_step_time.png +0 -0
  30. exp/lm_train_lm_en_bpe30/images/train_time.png +0 -0
  31. exp/lm_train_lm_en_bpe30/perplexity_test/ppl +1 -0
  32. meta.yaml +10 -0
README.md CHANGED
@@ -1,3 +1,301 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: en
7
+ datasets:
8
+ - an4
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `jkang/espnet2_an4_asr`
15
+
16
+ This model was trained by jaekookang using an4 recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ ```bash
21
+ cd espnet
22
+ git checkout 48422215e272812feb9bbac9d7cf4aae6a316bca
23
+ pip install -e .
24
+ cd egs2/an4/asr1
25
+ ./run.sh --skip_data_prep false --skip_train true --download_model jkang/espnet2_an4_asr
26
+ ```
27
+
28
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
29
+ # RESULTS
30
+ ## Environments
31
+ - date: `Tue Feb 1 13:22:35 KST 2022`
32
+ - python version: `3.9.7 (default, Sep 16 2021, 13:09:58) [GCC 7.5.0]`
33
+ - espnet version: `espnet 0.10.6a1`
34
+ - pytorch version: `pytorch 1.10.1`
35
+ - Git hash: `48422215e272812feb9bbac9d7cf4aae6a316bca`
36
+ - Commit date: `Fri Jan 28 17:25:31 2022 +0000`
37
+
38
+ ## asr_train_asr_transformer_raw_en_bpe30_sp
39
+ ### WER
40
+
41
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
42
+ |---|---|---|---|---|---|---|---|---|
43
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|773|91.5|6.5|2.1|0.6|9.2|38.5|
44
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/train_dev|100|591|88.8|7.4|3.7|0.7|11.8|41.0|
45
+
46
+ ### CER
47
+
48
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
49
+ |---|---|---|---|---|---|---|---|---|
50
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|2565|96.6|1.2|2.2|1.0|4.4|38.5|
51
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/train_dev|100|1915|94.0|1.7|4.3|0.4|6.4|41.0|
52
+
53
+ ### TER
54
+
55
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
56
+ |---|---|---|---|---|---|---|---|---|
57
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|2695|96.8|1.1|2.1|0.9|4.2|38.5|
58
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/train_dev|100|2015|94.3|1.6|4.1|0.4|6.1|41.0|
59
+
60
+ ## ASR config
61
+
62
+ <details><summary>expand</summary>
63
+
64
+ ```
65
+ config: conf/train_asr_transformer.yaml
66
+ print_config: false
67
+ log_level: INFO
68
+ dry_run: false
69
+ iterator_type: sequence
70
+ output_dir: exp/asr_train_asr_transformer_raw_en_bpe30_sp
71
+ ngpu: 1
72
+ seed: 0
73
+ num_workers: 1
74
+ num_att_plot: 3
75
+ dist_backend: nccl
76
+ dist_init_method: env://
77
+ dist_world_size: null
78
+ dist_rank: null
79
+ local_rank: 0
80
+ dist_master_addr: null
81
+ dist_master_port: null
82
+ dist_launcher: null
83
+ multiprocessing_distributed: false
84
+ unused_parameters: false
85
+ sharded_ddp: false
86
+ cudnn_enabled: true
87
+ cudnn_benchmark: false
88
+ cudnn_deterministic: true
89
+ collect_stats: false
90
+ write_collected_feats: false
91
+ max_epoch: 200
92
+ patience: null
93
+ val_scheduler_criterion:
94
+ - valid
95
+ - loss
96
+ early_stopping_criterion:
97
+ - valid
98
+ - loss
99
+ - min
100
+ best_model_criterion:
101
+ - - valid
102
+ - acc
103
+ - max
104
+ keep_nbest_models: 10
105
+ nbest_averaging_interval: 0
106
+ grad_clip: 5.0
107
+ grad_clip_type: 2.0
108
+ grad_noise: false
109
+ accum_grad: 1
110
+ no_forward_run: false
111
+ resume: true
112
+ train_dtype: float32
113
+ use_amp: false
114
+ log_interval: null
115
+ use_matplotlib: true
116
+ use_tensorboard: true
117
+ use_wandb: false
118
+ wandb_project: null
119
+ wandb_id: null
120
+ wandb_entity: null
121
+ wandb_name: null
122
+ wandb_model_log_interval: -1
123
+ detect_anomaly: false
124
+ pretrain_path: null
125
+ init_param: []
126
+ ignore_init_mismatch: false
127
+ freeze_param: []
128
+ num_iters_per_epoch: null
129
+ batch_size: 64
130
+ valid_batch_size: null
131
+ batch_bins: 1000000
132
+ valid_batch_bins: null
133
+ train_shape_file:
134
+ - exp/asr_stats_raw_en_bpe30_sp/train/speech_shape
135
+ - exp/asr_stats_raw_en_bpe30_sp/train/text_shape.bpe
136
+ valid_shape_file:
137
+ - exp/asr_stats_raw_en_bpe30_sp/valid/speech_shape
138
+ - exp/asr_stats_raw_en_bpe30_sp/valid/text_shape.bpe
139
+ batch_type: folded
140
+ valid_batch_type: null
141
+ fold_length:
142
+ - 80000
143
+ - 150
144
+ sort_in_batch: descending
145
+ sort_batch: descending
146
+ multiple_iterator: false
147
+ chunk_length: 500
148
+ chunk_shift_ratio: 0.5
149
+ num_cache_chunks: 1024
150
+ train_data_path_and_name_and_type:
151
+ - - dump/raw/train_nodev_sp/wav.scp
152
+ - speech
153
+ - sound
154
+ - - dump/raw/train_nodev_sp/text
155
+ - text
156
+ - text
157
+ valid_data_path_and_name_and_type:
158
+ - - dump/raw/train_dev/wav.scp
159
+ - speech
160
+ - sound
161
+ - - dump/raw/train_dev/text
162
+ - text
163
+ - text
164
+ allow_variable_data_keys: false
165
+ max_cache_size: 0.0
166
+ max_cache_fd: 32
167
+ valid_max_cache_size: null
168
+ optim: adam
169
+ optim_conf:
170
+ lr: 0.001
171
+ scheduler: warmuplr
172
+ scheduler_conf:
173
+ warmup_steps: 2500
174
+ token_list:
175
+ - <blank>
176
+ - <unk>
177
+ - ▁
178
+ - T
179
+ - E
180
+ - O
181
+ - R
182
+ - Y
183
+ - A
184
+ - H
185
+ - U
186
+ - S
187
+ - I
188
+ - F
189
+ - B
190
+ - L
191
+ - P
192
+ - D
193
+ - G
194
+ - M
195
+ - C
196
+ - V
197
+ - X
198
+ - J
199
+ - K
200
+ - Z
201
+ - W
202
+ - N
203
+ - Q
204
+ - <sos/eos>
205
+ init: xavier_uniform
206
+ input_size: null
207
+ ctc_conf:
208
+ dropout_rate: 0.0
209
+ ctc_type: builtin
210
+ reduce: true
211
+ ignore_nan_grad: true
212
+ joint_net_conf: null
213
+ model_conf:
214
+ ctc_weight: 0.3
215
+ lsm_weight: 0.1
216
+ length_normalized_loss: false
217
+ use_preprocessor: true
218
+ token_type: bpe
219
+ bpemodel: data/en_token_list/bpe_unigram30/bpe.model
220
+ non_linguistic_symbols: null
221
+ cleaner: null
222
+ g2p: null
223
+ speech_volume_normalize: null
224
+ rir_scp: null
225
+ rir_apply_prob: 1.0
226
+ noise_scp: null
227
+ noise_apply_prob: 1.0
228
+ noise_db_range: '13_15'
229
+ frontend: default
230
+ frontend_conf:
231
+ fs: 16k
232
+ specaug: null
233
+ specaug_conf: {}
234
+ normalize: global_mvn
235
+ normalize_conf:
236
+ stats_file: exp/asr_stats_raw_en_bpe30_sp/train/feats_stats.npz
237
+ preencoder: null
238
+ preencoder_conf: {}
239
+ encoder: transformer
240
+ encoder_conf:
241
+ output_size: 256
242
+ attention_heads: 4
243
+ linear_units: 2048
244
+ num_blocks: 12
245
+ dropout_rate: 0.1
246
+ positional_dropout_rate: 0.1
247
+ attention_dropout_rate: 0.0
248
+ input_layer: conv2d
249
+ normalize_before: true
250
+ postencoder: null
251
+ postencoder_conf: {}
252
+ decoder: transformer
253
+ decoder_conf:
254
+ attention_heads: 4
255
+ linear_units: 2048
256
+ num_blocks: 6
257
+ dropout_rate: 0.1
258
+ positional_dropout_rate: 0.1
259
+ self_attention_dropout_rate: 0.0
260
+ src_attention_dropout_rate: 0.0
261
+ required:
262
+ - output_dir
263
+ - token_list
264
+ version: 0.10.6a1
265
+ distributed: false
266
+ ```
267
+
268
+ </details>
269
+
270
+
271
+
272
+ ### Citing ESPnet
273
+
274
+ ```BibTex
275
+ @inproceedings{watanabe2018espnet,
276
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
277
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
278
+ year={2018},
279
+ booktitle={Proceedings of Interspeech},
280
+ pages={2207--2211},
281
+ doi={10.21437/Interspeech.2018-1456},
282
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
283
+ }
284
+
285
+
286
+
287
+
288
+ ```
289
+
290
+ or arXiv:
291
+
292
+ ```bibtex
293
+ @misc{watanabe2018espnet,
294
+ title={ESPnet: End-to-End Speech Processing Toolkit},
295
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
296
+ year={2018},
297
+ eprint={1804.00015},
298
+ archivePrefix={arXiv},
299
+ primaryClass={cs.CL}
300
+ }
301
+ ```
data/en_token_list/bpe_unigram30/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:711fda202c8a36a6b2b7e2dfd9eea0b414397420cbd29ec7dafd107f62f5a0e9
3
+ size 237994
exp/asr_stats_raw_en_bpe30_sp/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
exp/asr_train_asr_transformer_raw_en_bpe30_sp/RESULTS.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Tue Feb 1 13:22:35 KST 2022`
5
+ - python version: `3.9.7 (default, Sep 16 2021, 13:09:58) [GCC 7.5.0]`
6
+ - espnet version: `espnet 0.10.6a1`
7
+ - pytorch version: `pytorch 1.10.1`
8
+ - Git hash: `48422215e272812feb9bbac9d7cf4aae6a316bca`
9
+ - Commit date: `Fri Jan 28 17:25:31 2022 +0000`
10
+
11
+ ## asr_train_asr_transformer_raw_en_bpe30_sp
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|773|91.5|6.5|2.1|0.6|9.2|38.5|
17
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/train_dev|100|591|88.8|7.4|3.7|0.7|11.8|41.0|
18
+
19
+ ### CER
20
+
21
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
22
+ |---|---|---|---|---|---|---|---|---|
23
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|2565|96.6|1.2|2.2|1.0|4.4|38.5|
24
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/train_dev|100|1915|94.0|1.7|4.3|0.4|6.4|41.0|
25
+
26
+ ### TER
27
+
28
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
29
+ |---|---|---|---|---|---|---|---|---|
30
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|2695|96.8|1.1|2.1|0.9|4.2|38.5|
31
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/train_dev|100|2015|94.3|1.6|4.1|0.4|6.1|41.0|
32
+
exp/asr_train_asr_transformer_raw_en_bpe30_sp/config.yaml ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_asr_transformer.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_train_asr_transformer_raw_en_bpe30_sp
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 200
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - acc
39
+ - max
40
+ keep_nbest_models: 10
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 1
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ use_wandb: false
54
+ wandb_project: null
55
+ wandb_id: null
56
+ wandb_entity: null
57
+ wandb_name: null
58
+ wandb_model_log_interval: -1
59
+ detect_anomaly: false
60
+ pretrain_path: null
61
+ init_param: []
62
+ ignore_init_mismatch: false
63
+ freeze_param: []
64
+ num_iters_per_epoch: null
65
+ batch_size: 64
66
+ valid_batch_size: null
67
+ batch_bins: 1000000
68
+ valid_batch_bins: null
69
+ train_shape_file:
70
+ - exp/asr_stats_raw_en_bpe30_sp/train/speech_shape
71
+ - exp/asr_stats_raw_en_bpe30_sp/train/text_shape.bpe
72
+ valid_shape_file:
73
+ - exp/asr_stats_raw_en_bpe30_sp/valid/speech_shape
74
+ - exp/asr_stats_raw_en_bpe30_sp/valid/text_shape.bpe
75
+ batch_type: folded
76
+ valid_batch_type: null
77
+ fold_length:
78
+ - 80000
79
+ - 150
80
+ sort_in_batch: descending
81
+ sort_batch: descending
82
+ multiple_iterator: false
83
+ chunk_length: 500
84
+ chunk_shift_ratio: 0.5
85
+ num_cache_chunks: 1024
86
+ train_data_path_and_name_and_type:
87
+ - - dump/raw/train_nodev_sp/wav.scp
88
+ - speech
89
+ - sound
90
+ - - dump/raw/train_nodev_sp/text
91
+ - text
92
+ - text
93
+ valid_data_path_and_name_and_type:
94
+ - - dump/raw/train_dev/wav.scp
95
+ - speech
96
+ - sound
97
+ - - dump/raw/train_dev/text
98
+ - text
99
+ - text
100
+ allow_variable_data_keys: false
101
+ max_cache_size: 0.0
102
+ max_cache_fd: 32
103
+ valid_max_cache_size: null
104
+ optim: adam
105
+ optim_conf:
106
+ lr: 0.001
107
+ scheduler: warmuplr
108
+ scheduler_conf:
109
+ warmup_steps: 2500
110
+ token_list:
111
+ - <blank>
112
+ - <unk>
113
+ - ▁
114
+ - T
115
+ - E
116
+ - O
117
+ - R
118
+ - Y
119
+ - A
120
+ - H
121
+ - U
122
+ - S
123
+ - I
124
+ - F
125
+ - B
126
+ - L
127
+ - P
128
+ - D
129
+ - G
130
+ - M
131
+ - C
132
+ - V
133
+ - X
134
+ - J
135
+ - K
136
+ - Z
137
+ - W
138
+ - N
139
+ - Q
140
+ - <sos/eos>
141
+ init: xavier_uniform
142
+ input_size: null
143
+ ctc_conf:
144
+ dropout_rate: 0.0
145
+ ctc_type: builtin
146
+ reduce: true
147
+ ignore_nan_grad: true
148
+ joint_net_conf: null
149
+ model_conf:
150
+ ctc_weight: 0.3
151
+ lsm_weight: 0.1
152
+ length_normalized_loss: false
153
+ use_preprocessor: true
154
+ token_type: bpe
155
+ bpemodel: data/en_token_list/bpe_unigram30/bpe.model
156
+ non_linguistic_symbols: null
157
+ cleaner: null
158
+ g2p: null
159
+ speech_volume_normalize: null
160
+ rir_scp: null
161
+ rir_apply_prob: 1.0
162
+ noise_scp: null
163
+ noise_apply_prob: 1.0
164
+ noise_db_range: '13_15'
165
+ frontend: default
166
+ frontend_conf:
167
+ fs: 16k
168
+ specaug: null
169
+ specaug_conf: {}
170
+ normalize: global_mvn
171
+ normalize_conf:
172
+ stats_file: exp/asr_stats_raw_en_bpe30_sp/train/feats_stats.npz
173
+ preencoder: null
174
+ preencoder_conf: {}
175
+ encoder: transformer
176
+ encoder_conf:
177
+ output_size: 256
178
+ attention_heads: 4
179
+ linear_units: 2048
180
+ num_blocks: 12
181
+ dropout_rate: 0.1
182
+ positional_dropout_rate: 0.1
183
+ attention_dropout_rate: 0.0
184
+ input_layer: conv2d
185
+ normalize_before: true
186
+ postencoder: null
187
+ postencoder_conf: {}
188
+ decoder: transformer
189
+ decoder_conf:
190
+ attention_heads: 4
191
+ linear_units: 2048
192
+ num_blocks: 6
193
+ dropout_rate: 0.1
194
+ positional_dropout_rate: 0.1
195
+ self_attention_dropout_rate: 0.0
196
+ src_attention_dropout_rate: 0.0
197
+ required:
198
+ - output_dir
199
+ - token_list
200
+ version: 0.10.6a1
201
+ distributed: false
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/acc.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/backward_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/cer.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/cer_ctc.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/forward_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/gpu_max_cached_mem_GB.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/iter_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss_att.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss_ctc.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/optim0_lr0.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/optim_step_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/train_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/wer.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/valid.acc.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f953bf0dca59f8d67e13df8a15db52643c1697af25d03c5fe5b8b9bc22ff7aa6
3
+ size 108678629
exp/lm_train_lm_en_bpe30/24epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9b9cc6ade63ec8cf80d8dc60e5bf9bcb4f08b08b378ebf9db40656341b87602
3
+ size 27239903
exp/lm_train_lm_en_bpe30/config.yaml ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_lm.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/lm_train_lm_en_bpe30
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 40
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ keep_nbest_models: 1
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 1
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ use_wandb: false
54
+ wandb_project: null
55
+ wandb_id: null
56
+ wandb_entity: null
57
+ wandb_name: null
58
+ wandb_model_log_interval: -1
59
+ detect_anomaly: false
60
+ pretrain_path: null
61
+ init_param: []
62
+ ignore_init_mismatch: false
63
+ freeze_param: []
64
+ num_iters_per_epoch: null
65
+ batch_size: 256
66
+ valid_batch_size: null
67
+ batch_bins: 1000000
68
+ valid_batch_bins: null
69
+ train_shape_file:
70
+ - exp/lm_stats_en_bpe30/train/text_shape.bpe
71
+ valid_shape_file:
72
+ - exp/lm_stats_en_bpe30/valid/text_shape.bpe
73
+ batch_type: folded
74
+ valid_batch_type: null
75
+ fold_length:
76
+ - 150
77
+ sort_in_batch: descending
78
+ sort_batch: descending
79
+ multiple_iterator: false
80
+ chunk_length: 500
81
+ chunk_shift_ratio: 0.5
82
+ num_cache_chunks: 1024
83
+ train_data_path_and_name_and_type:
84
+ - - dump/raw/lm_train.txt
85
+ - text
86
+ - text
87
+ valid_data_path_and_name_and_type:
88
+ - - dump/raw/train_dev/text
89
+ - text
90
+ - text
91
+ allow_variable_data_keys: false
92
+ max_cache_size: 0.0
93
+ max_cache_fd: 32
94
+ valid_max_cache_size: null
95
+ optim: adam
96
+ optim_conf:
97
+ lr: 0.1
98
+ scheduler: null
99
+ scheduler_conf: {}
100
+ token_list:
101
+ - <blank>
102
+ - <unk>
103
+ - ▁
104
+ - T
105
+ - E
106
+ - O
107
+ - R
108
+ - Y
109
+ - A
110
+ - H
111
+ - U
112
+ - S
113
+ - I
114
+ - F
115
+ - B
116
+ - L
117
+ - P
118
+ - D
119
+ - G
120
+ - M
121
+ - C
122
+ - V
123
+ - X
124
+ - J
125
+ - K
126
+ - Z
127
+ - W
128
+ - N
129
+ - Q
130
+ - <sos/eos>
131
+ init: null
132
+ model_conf:
133
+ ignore_id: 0
134
+ use_preprocessor: true
135
+ token_type: bpe
136
+ bpemodel: data/en_token_list/bpe_unigram30/bpe.model
137
+ non_linguistic_symbols: null
138
+ cleaner: null
139
+ g2p: null
140
+ lm: seq_rnn
141
+ lm_conf:
142
+ unit: 650
143
+ nlayers: 2
144
+ required:
145
+ - output_dir
146
+ - token_list
147
+ version: 0.10.6a1
148
+ distributed: false
exp/lm_train_lm_en_bpe30/images/backward_time.png ADDED
exp/lm_train_lm_en_bpe30/images/forward_time.png ADDED
exp/lm_train_lm_en_bpe30/images/gpu_max_cached_mem_GB.png ADDED
exp/lm_train_lm_en_bpe30/images/iter_time.png ADDED
exp/lm_train_lm_en_bpe30/images/loss.png ADDED
exp/lm_train_lm_en_bpe30/images/optim0_lr0.png ADDED
exp/lm_train_lm_en_bpe30/images/optim_step_time.png ADDED
exp/lm_train_lm_en_bpe30/images/train_time.png ADDED
exp/lm_train_lm_en_bpe30/perplexity_test/ppl ADDED
@@ -0,0 +1 @@
 
 
1
+ 7.167095967389925
meta.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ espnet: 0.10.6a1
2
+ files:
3
+ asr_model_file: exp/asr_train_asr_transformer_raw_en_bpe30_sp/valid.acc.ave_10best.pth
4
+ lm_file: exp/lm_train_lm_en_bpe30/24epoch.pth
5
+ python: "3.9.7 (default, Sep 16 2021, 13:09:58) \n[GCC 7.5.0]"
6
+ timestamp: 1643689356.175785
7
+ torch: 1.10.1
8
+ yaml_files:
9
+ asr_train_config: exp/asr_train_asr_transformer_raw_en_bpe30_sp/config.yaml
10
+ lm_train_config: exp/lm_train_lm_en_bpe30/config.yaml