jkang commited on
Commit
42497ce
1 Parent(s): df089c0

Update model

Browse files
Files changed (32) hide show
  1. README.md +306 -1
  2. data/en_token_list/bpe_unigram30/bpe.model +3 -0
  3. exp/asr_stats_raw_en_bpe30_sp/train/feats_stats.npz +3 -0
  4. exp/asr_train_asr_transformer_raw_en_bpe30_sp/RESULTS.md +32 -0
  5. exp/asr_train_asr_transformer_raw_en_bpe30_sp/config.yaml +205 -0
  6. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/acc.png +0 -0
  7. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/backward_time.png +0 -0
  8. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/cer.png +0 -0
  9. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/cer_ctc.png +0 -0
  10. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/forward_time.png +0 -0
  11. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/gpu_max_cached_mem_GB.png +0 -0
  12. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/iter_time.png +0 -0
  13. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss.png +0 -0
  14. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss_att.png +0 -0
  15. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss_ctc.png +0 -0
  16. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/optim0_lr0.png +0 -0
  17. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/optim_step_time.png +0 -0
  18. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/train_time.png +0 -0
  19. exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/wer.png +0 -0
  20. exp/asr_train_asr_transformer_raw_en_bpe30_sp/valid.acc.ave_10best.pth +3 -0
  21. exp/lm_train_lm_en_bpe30/23epoch.pth +3 -0
  22. exp/lm_train_lm_en_bpe30/config.yaml +149 -0
  23. exp/lm_train_lm_en_bpe30/images/backward_time.png +0 -0
  24. exp/lm_train_lm_en_bpe30/images/forward_time.png +0 -0
  25. exp/lm_train_lm_en_bpe30/images/gpu_max_cached_mem_GB.png +0 -0
  26. exp/lm_train_lm_en_bpe30/images/iter_time.png +0 -0
  27. exp/lm_train_lm_en_bpe30/images/loss.png +0 -0
  28. exp/lm_train_lm_en_bpe30/images/optim0_lr0.png +0 -0
  29. exp/lm_train_lm_en_bpe30/images/optim_step_time.png +0 -0
  30. exp/lm_train_lm_en_bpe30/images/train_time.png +0 -0
  31. exp/lm_train_lm_en_bpe30/perplexity_test/ppl +1 -0
  32. meta.yaml +10 -0
README.md CHANGED
@@ -1,3 +1,308 @@
1
  ---
2
- license: mit
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: en
7
+ datasets:
8
+ - an4
9
+ license: cc-by-4.0
10
  ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `jkang/espnet2_an4_transformer`
15
+
16
+ This model was trained by jaekookang using an4 recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout c8f11ef7f5c571fbcc34d53da449353bd75037ce
26
+ pip install -e .
27
+ cd egs2/an4/asr1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model jkang/espnet2_an4_transformer
29
+ ```
30
+
31
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
32
+ # RESULTS
33
+ ## Environments
34
+ - date: `Fri Aug 19 17:38:46 KST 2022`
35
+ - python version: `3.9.12 (main, Apr 5 2022, 06:56:58) [GCC 7.5.0]`
36
+ - espnet version: `espnet 202207`
37
+ - pytorch version: `pytorch 1.10.1`
38
+ - Git hash: `c8f11ef7f5c571fbcc34d53da449353bd75037ce`
39
+ - Commit date: `Fri Aug 19 17:20:13 2022 +0900`
40
+
41
+ ## asr_train_asr_transformer_raw_en_bpe30_sp
42
+ ### WER
43
+
44
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
45
+ |---|---|---|---|---|---|---|---|---|
46
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|773|92.0|5.8|2.2|0.4|8.4|33.1|
47
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/train_dev|100|591|89.5|7.3|3.2|0.5|11.0|41.0|
48
+
49
+ ### CER
50
+
51
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
52
+ |---|---|---|---|---|---|---|---|---|
53
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|2565|96.3|1.1|2.6|0.6|4.3|33.1|
54
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/train_dev|100|1915|94.1|1.9|4.0|0.4|6.3|41.0|
55
+
56
+ ### TER
57
+
58
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
59
+ |---|---|---|---|---|---|---|---|---|
60
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|2695|96.4|1.1|2.5|0.6|4.1|33.1|
61
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/train_dev|100|2015|94.4|1.8|3.8|0.3|6.0|41.0|
62
+
63
+ ## ASR config
64
+
65
+ <details><summary>expand</summary>
66
+
67
+ ```
68
+ config: conf/train_asr_transformer.yaml
69
+ print_config: false
70
+ log_level: INFO
71
+ dry_run: false
72
+ iterator_type: sequence
73
+ output_dir: exp/asr_train_asr_transformer_raw_en_bpe30_sp
74
+ ngpu: 1
75
+ seed: 0
76
+ num_workers: 1
77
+ num_att_plot: 3
78
+ dist_backend: nccl
79
+ dist_init_method: env://
80
+ dist_world_size: 4
81
+ dist_rank: 0
82
+ local_rank: 0
83
+ dist_master_addr: localhost
84
+ dist_master_port: 43015
85
+ dist_launcher: null
86
+ multiprocessing_distributed: true
87
+ unused_parameters: false
88
+ sharded_ddp: false
89
+ cudnn_enabled: true
90
+ cudnn_benchmark: false
91
+ cudnn_deterministic: true
92
+ collect_stats: false
93
+ write_collected_feats: false
94
+ max_epoch: 200
95
+ patience: null
96
+ val_scheduler_criterion:
97
+ - valid
98
+ - loss
99
+ early_stopping_criterion:
100
+ - valid
101
+ - loss
102
+ - min
103
+ best_model_criterion:
104
+ - - valid
105
+ - acc
106
+ - max
107
+ keep_nbest_models: 10
108
+ nbest_averaging_interval: 0
109
+ grad_clip: 5.0
110
+ grad_clip_type: 2.0
111
+ grad_noise: false
112
+ accum_grad: 1
113
+ no_forward_run: false
114
+ resume: true
115
+ train_dtype: float32
116
+ use_amp: false
117
+ log_interval: null
118
+ use_matplotlib: true
119
+ use_tensorboard: true
120
+ create_graph_in_tensorboard: false
121
+ use_wandb: false
122
+ wandb_project: null
123
+ wandb_id: null
124
+ wandb_entity: null
125
+ wandb_name: null
126
+ wandb_model_log_interval: -1
127
+ detect_anomaly: false
128
+ pretrain_path: null
129
+ init_param: []
130
+ ignore_init_mismatch: false
131
+ freeze_param: []
132
+ num_iters_per_epoch: null
133
+ batch_size: 64
134
+ valid_batch_size: null
135
+ batch_bins: 1000000
136
+ valid_batch_bins: null
137
+ train_shape_file:
138
+ - exp/asr_stats_raw_en_bpe30_sp/train/speech_shape
139
+ - exp/asr_stats_raw_en_bpe30_sp/train/text_shape.bpe
140
+ valid_shape_file:
141
+ - exp/asr_stats_raw_en_bpe30_sp/valid/speech_shape
142
+ - exp/asr_stats_raw_en_bpe30_sp/valid/text_shape.bpe
143
+ batch_type: folded
144
+ valid_batch_type: null
145
+ fold_length:
146
+ - 80000
147
+ - 150
148
+ sort_in_batch: descending
149
+ sort_batch: descending
150
+ multiple_iterator: false
151
+ chunk_length: 500
152
+ chunk_shift_ratio: 0.5
153
+ num_cache_chunks: 1024
154
+ train_data_path_and_name_and_type:
155
+ - - dump/raw/train_nodev_sp/wav.scp
156
+ - speech
157
+ - sound
158
+ - - dump/raw/train_nodev_sp/text
159
+ - text
160
+ - text
161
+ valid_data_path_and_name_and_type:
162
+ - - dump/raw/train_dev/wav.scp
163
+ - speech
164
+ - sound
165
+ - - dump/raw/train_dev/text
166
+ - text
167
+ - text
168
+ allow_variable_data_keys: false
169
+ max_cache_size: 0.0
170
+ max_cache_fd: 32
171
+ valid_max_cache_size: null
172
+ optim: adam
173
+ optim_conf:
174
+ lr: 0.001
175
+ scheduler: warmuplr
176
+ scheduler_conf:
177
+ warmup_steps: 2500
178
+ token_list:
179
+ - <blank>
180
+ - <unk>
181
+ - ▁
182
+ - T
183
+ - E
184
+ - O
185
+ - R
186
+ - Y
187
+ - A
188
+ - H
189
+ - U
190
+ - S
191
+ - I
192
+ - F
193
+ - B
194
+ - L
195
+ - P
196
+ - D
197
+ - G
198
+ - M
199
+ - C
200
+ - V
201
+ - X
202
+ - J
203
+ - K
204
+ - Z
205
+ - W
206
+ - N
207
+ - Q
208
+ - <sos/eos>
209
+ init: xavier_uniform
210
+ input_size: null
211
+ ctc_conf:
212
+ dropout_rate: 0.0
213
+ ctc_type: builtin
214
+ reduce: true
215
+ ignore_nan_grad: null
216
+ zero_infinity: true
217
+ joint_net_conf: null
218
+ use_preprocessor: true
219
+ token_type: bpe
220
+ bpemodel: data/en_token_list/bpe_unigram30/bpe.model
221
+ non_linguistic_symbols: null
222
+ cleaner: null
223
+ g2p: null
224
+ speech_volume_normalize: null
225
+ rir_scp: null
226
+ rir_apply_prob: 1.0
227
+ noise_scp: null
228
+ noise_apply_prob: 1.0
229
+ noise_db_range: '13_15'
230
+ short_noise_thres: 0.5
231
+ frontend: default
232
+ frontend_conf:
233
+ fs: 16k
234
+ specaug: null
235
+ specaug_conf: {}
236
+ normalize: global_mvn
237
+ normalize_conf:
238
+ stats_file: exp/asr_stats_raw_en_bpe30_sp/train/feats_stats.npz
239
+ model: espnet
240
+ model_conf:
241
+ ctc_weight: 0.3
242
+ lsm_weight: 0.1
243
+ length_normalized_loss: false
244
+ preencoder: null
245
+ preencoder_conf: {}
246
+ encoder: transformer
247
+ encoder_conf:
248
+ output_size: 256
249
+ attention_heads: 4
250
+ linear_units: 2048
251
+ num_blocks: 12
252
+ dropout_rate: 0.1
253
+ positional_dropout_rate: 0.1
254
+ attention_dropout_rate: 0.0
255
+ input_layer: conv2d
256
+ normalize_before: true
257
+ postencoder: null
258
+ postencoder_conf: {}
259
+ decoder: transformer
260
+ decoder_conf:
261
+ attention_heads: 4
262
+ linear_units: 2048
263
+ num_blocks: 6
264
+ dropout_rate: 0.1
265
+ positional_dropout_rate: 0.1
266
+ self_attention_dropout_rate: 0.0
267
+ src_attention_dropout_rate: 0.0
268
+ required:
269
+ - output_dir
270
+ - token_list
271
+ version: '202207'
272
+ distributed: true
273
+ ```
274
+
275
+ </details>
276
+
277
+
278
+
279
+ ### Citing ESPnet
280
+
281
+ ```BibTex
282
+ @inproceedings{watanabe2018espnet,
283
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
284
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
285
+ year={2018},
286
+ booktitle={Proceedings of Interspeech},
287
+ pages={2207--2211},
288
+ doi={10.21437/Interspeech.2018-1456},
289
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
290
+ }
291
+
292
+
293
+
294
+
295
+ ```
296
+
297
+ or arXiv:
298
+
299
+ ```bibtex
300
+ @misc{watanabe2018espnet,
301
+ title={ESPnet: End-to-End Speech Processing Toolkit},
302
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
303
+ year={2018},
304
+ eprint={1804.00015},
305
+ archivePrefix={arXiv},
306
+ primaryClass={cs.CL}
307
+ }
308
+ ```
data/en_token_list/bpe_unigram30/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:711fda202c8a36a6b2b7e2dfd9eea0b414397420cbd29ec7dafd107f62f5a0e9
3
+ size 237994
exp/asr_stats_raw_en_bpe30_sp/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a09378a62d433d3db78f20ed9f1cc3189aa381fb84756c161d2fb9731976412
3
+ size 1402
exp/asr_train_asr_transformer_raw_en_bpe30_sp/RESULTS.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Fri Aug 19 17:38:46 KST 2022`
5
+ - python version: `3.9.12 (main, Apr 5 2022, 06:56:58) [GCC 7.5.0]`
6
+ - espnet version: `espnet 202207`
7
+ - pytorch version: `pytorch 1.10.1`
8
+ - Git hash: `c8f11ef7f5c571fbcc34d53da449353bd75037ce`
9
+ - Commit date: `Fri Aug 19 17:20:13 2022 +0900`
10
+
11
+ ## asr_train_asr_transformer_raw_en_bpe30_sp
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|773|92.0|5.8|2.2|0.4|8.4|33.1|
17
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/train_dev|100|591|89.5|7.3|3.2|0.5|11.0|41.0|
18
+
19
+ ### CER
20
+
21
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
22
+ |---|---|---|---|---|---|---|---|---|
23
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|2565|96.3|1.1|2.6|0.6|4.3|33.1|
24
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/train_dev|100|1915|94.1|1.9|4.0|0.4|6.3|41.0|
25
+
26
+ ### TER
27
+
28
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
29
+ |---|---|---|---|---|---|---|---|---|
30
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/test|130|2695|96.4|1.1|2.5|0.6|4.1|33.1|
31
+ |decode_asr_lm_lm_train_lm_en_bpe30_valid.loss.ave_asr_model_valid.acc.ave/train_dev|100|2015|94.4|1.8|3.8|0.3|6.0|41.0|
32
+
exp/asr_train_asr_transformer_raw_en_bpe30_sp/config.yaml ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_asr_transformer.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_train_asr_transformer_raw_en_bpe30_sp
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 4
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 43015
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 200
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - acc
39
+ - max
40
+ keep_nbest_models: 10
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 1
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ create_graph_in_tensorboard: false
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ wandb_entity: null
58
+ wandb_name: null
59
+ wandb_model_log_interval: -1
60
+ detect_anomaly: false
61
+ pretrain_path: null
62
+ init_param: []
63
+ ignore_init_mismatch: false
64
+ freeze_param: []
65
+ num_iters_per_epoch: null
66
+ batch_size: 64
67
+ valid_batch_size: null
68
+ batch_bins: 1000000
69
+ valid_batch_bins: null
70
+ train_shape_file:
71
+ - exp/asr_stats_raw_en_bpe30_sp/train/speech_shape
72
+ - exp/asr_stats_raw_en_bpe30_sp/train/text_shape.bpe
73
+ valid_shape_file:
74
+ - exp/asr_stats_raw_en_bpe30_sp/valid/speech_shape
75
+ - exp/asr_stats_raw_en_bpe30_sp/valid/text_shape.bpe
76
+ batch_type: folded
77
+ valid_batch_type: null
78
+ fold_length:
79
+ - 80000
80
+ - 150
81
+ sort_in_batch: descending
82
+ sort_batch: descending
83
+ multiple_iterator: false
84
+ chunk_length: 500
85
+ chunk_shift_ratio: 0.5
86
+ num_cache_chunks: 1024
87
+ train_data_path_and_name_and_type:
88
+ - - dump/raw/train_nodev_sp/wav.scp
89
+ - speech
90
+ - sound
91
+ - - dump/raw/train_nodev_sp/text
92
+ - text
93
+ - text
94
+ valid_data_path_and_name_and_type:
95
+ - - dump/raw/train_dev/wav.scp
96
+ - speech
97
+ - sound
98
+ - - dump/raw/train_dev/text
99
+ - text
100
+ - text
101
+ allow_variable_data_keys: false
102
+ max_cache_size: 0.0
103
+ max_cache_fd: 32
104
+ valid_max_cache_size: null
105
+ optim: adam
106
+ optim_conf:
107
+ lr: 0.001
108
+ scheduler: warmuplr
109
+ scheduler_conf:
110
+ warmup_steps: 2500
111
+ token_list:
112
+ - <blank>
113
+ - <unk>
114
+ - ▁
115
+ - T
116
+ - E
117
+ - O
118
+ - R
119
+ - Y
120
+ - A
121
+ - H
122
+ - U
123
+ - S
124
+ - I
125
+ - F
126
+ - B
127
+ - L
128
+ - P
129
+ - D
130
+ - G
131
+ - M
132
+ - C
133
+ - V
134
+ - X
135
+ - J
136
+ - K
137
+ - Z
138
+ - W
139
+ - N
140
+ - Q
141
+ - <sos/eos>
142
+ init: xavier_uniform
143
+ input_size: null
144
+ ctc_conf:
145
+ dropout_rate: 0.0
146
+ ctc_type: builtin
147
+ reduce: true
148
+ ignore_nan_grad: null
149
+ zero_infinity: true
150
+ joint_net_conf: null
151
+ use_preprocessor: true
152
+ token_type: bpe
153
+ bpemodel: data/en_token_list/bpe_unigram30/bpe.model
154
+ non_linguistic_symbols: null
155
+ cleaner: null
156
+ g2p: null
157
+ speech_volume_normalize: null
158
+ rir_scp: null
159
+ rir_apply_prob: 1.0
160
+ noise_scp: null
161
+ noise_apply_prob: 1.0
162
+ noise_db_range: '13_15'
163
+ short_noise_thres: 0.5
164
+ frontend: default
165
+ frontend_conf:
166
+ fs: 16k
167
+ specaug: null
168
+ specaug_conf: {}
169
+ normalize: global_mvn
170
+ normalize_conf:
171
+ stats_file: exp/asr_stats_raw_en_bpe30_sp/train/feats_stats.npz
172
+ model: espnet
173
+ model_conf:
174
+ ctc_weight: 0.3
175
+ lsm_weight: 0.1
176
+ length_normalized_loss: false
177
+ preencoder: null
178
+ preencoder_conf: {}
179
+ encoder: transformer
180
+ encoder_conf:
181
+ output_size: 256
182
+ attention_heads: 4
183
+ linear_units: 2048
184
+ num_blocks: 12
185
+ dropout_rate: 0.1
186
+ positional_dropout_rate: 0.1
187
+ attention_dropout_rate: 0.0
188
+ input_layer: conv2d
189
+ normalize_before: true
190
+ postencoder: null
191
+ postencoder_conf: {}
192
+ decoder: transformer
193
+ decoder_conf:
194
+ attention_heads: 4
195
+ linear_units: 2048
196
+ num_blocks: 6
197
+ dropout_rate: 0.1
198
+ positional_dropout_rate: 0.1
199
+ self_attention_dropout_rate: 0.0
200
+ src_attention_dropout_rate: 0.0
201
+ required:
202
+ - output_dir
203
+ - token_list
204
+ version: '202207'
205
+ distributed: true
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/acc.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/backward_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/cer.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/cer_ctc.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/forward_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/gpu_max_cached_mem_GB.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/iter_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss_att.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/loss_ctc.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/optim0_lr0.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/optim_step_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/train_time.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/images/wer.png ADDED
exp/asr_train_asr_transformer_raw_en_bpe30_sp/valid.acc.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fa42f575245ed4af7176e4393ec048c6cc65dc06d42c9f8441a9ad33e02959b
3
+ size 108678629
exp/lm_train_lm_en_bpe30/23epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9354b3d36b3da2d2a0f54a8efe50fa6df1f696c260a80854614809d835f85fab
3
+ size 27239903
exp/lm_train_lm_en_bpe30/config.yaml ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_lm.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/lm_train_lm_en_bpe30
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 4
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 42755
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 40
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ keep_nbest_models: 1
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 1
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ create_graph_in_tensorboard: false
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ wandb_entity: null
58
+ wandb_name: null
59
+ wandb_model_log_interval: -1
60
+ detect_anomaly: false
61
+ pretrain_path: null
62
+ init_param: []
63
+ ignore_init_mismatch: false
64
+ freeze_param: []
65
+ num_iters_per_epoch: null
66
+ batch_size: 256
67
+ valid_batch_size: null
68
+ batch_bins: 1000000
69
+ valid_batch_bins: null
70
+ train_shape_file:
71
+ - exp/lm_stats_en_bpe30/train/text_shape.bpe
72
+ valid_shape_file:
73
+ - exp/lm_stats_en_bpe30/valid/text_shape.bpe
74
+ batch_type: folded
75
+ valid_batch_type: null
76
+ fold_length:
77
+ - 150
78
+ sort_in_batch: descending
79
+ sort_batch: descending
80
+ multiple_iterator: false
81
+ chunk_length: 500
82
+ chunk_shift_ratio: 0.5
83
+ num_cache_chunks: 1024
84
+ train_data_path_and_name_and_type:
85
+ - - dump/raw/lm_train.txt
86
+ - text
87
+ - text
88
+ valid_data_path_and_name_and_type:
89
+ - - dump/raw/train_dev/text
90
+ - text
91
+ - text
92
+ allow_variable_data_keys: false
93
+ max_cache_size: 0.0
94
+ max_cache_fd: 32
95
+ valid_max_cache_size: null
96
+ optim: adam
97
+ optim_conf:
98
+ lr: 0.1
99
+ scheduler: null
100
+ scheduler_conf: {}
101
+ token_list:
102
+ - <blank>
103
+ - <unk>
104
+ - ▁
105
+ - T
106
+ - E
107
+ - O
108
+ - R
109
+ - Y
110
+ - A
111
+ - H
112
+ - U
113
+ - S
114
+ - I
115
+ - F
116
+ - B
117
+ - L
118
+ - P
119
+ - D
120
+ - G
121
+ - M
122
+ - C
123
+ - V
124
+ - X
125
+ - J
126
+ - K
127
+ - Z
128
+ - W
129
+ - N
130
+ - Q
131
+ - <sos/eos>
132
+ init: null
133
+ model_conf:
134
+ ignore_id: 0
135
+ use_preprocessor: true
136
+ token_type: bpe
137
+ bpemodel: data/en_token_list/bpe_unigram30/bpe.model
138
+ non_linguistic_symbols: null
139
+ cleaner: null
140
+ g2p: null
141
+ lm: seq_rnn
142
+ lm_conf:
143
+ unit: 650
144
+ nlayers: 2
145
+ required:
146
+ - output_dir
147
+ - token_list
148
+ version: '202207'
149
+ distributed: true
exp/lm_train_lm_en_bpe30/images/backward_time.png ADDED
exp/lm_train_lm_en_bpe30/images/forward_time.png ADDED
exp/lm_train_lm_en_bpe30/images/gpu_max_cached_mem_GB.png ADDED
exp/lm_train_lm_en_bpe30/images/iter_time.png ADDED
exp/lm_train_lm_en_bpe30/images/loss.png ADDED
exp/lm_train_lm_en_bpe30/images/optim0_lr0.png ADDED
exp/lm_train_lm_en_bpe30/images/optim_step_time.png ADDED
exp/lm_train_lm_en_bpe30/images/train_time.png ADDED
exp/lm_train_lm_en_bpe30/perplexity_test/ppl ADDED
@@ -0,0 +1 @@
 
 
1
+ 7.336515906190722
meta.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ espnet: '202207'
2
+ files:
3
+ asr_model_file: exp/asr_train_asr_transformer_raw_en_bpe30_sp/valid.acc.ave_10best.pth
4
+ lm_file: exp/lm_train_lm_en_bpe30/23epoch.pth
5
+ python: "3.9.12 (main, Apr 5 2022, 06:56:58) \n[GCC 7.5.0]"
6
+ timestamp: 1660898327.418722
7
+ torch: 1.10.1
8
+ yaml_files:
9
+ asr_train_config: exp/asr_train_asr_transformer_raw_en_bpe30_sp/config.yaml
10
+ lm_train_config: exp/lm_train_lm_en_bpe30/config.yaml