csukuangfj commited on
Commit
8333788
1 Parent(s): 3c6655e

add rnnt from GigaAM

Browse files
GigaAM/rnnt/rnnt_model_config.yaml ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_class: enc_dec_rnnt_bpe
2
+ sample_rate: 16000
3
+ log_prediction: true
4
+ model_defaults:
5
+ enc_hidden: 768
6
+ pred_hidden: 320
7
+ join_hidden: 320
8
+
9
+ preprocessor:
10
+ _target_: __main__.AudioToMelSpectrogramPreprocessor
11
+ sample_rate: 16000
12
+ n_fft: 400
13
+ n_window_size: 400
14
+ window_size: null
15
+ n_window_stride: 160
16
+ window_stride: null
17
+ features: 64
18
+ dither: 0.0
19
+ preemph: null
20
+ log: true
21
+ log_zero_guard_type: clamp
22
+ normalize: null
23
+ pad_to: 0
24
+ mel_norm: null
25
+ window: hann
26
+ log_zero_guard_value: 1e-9
27
+
28
+ tokenizer:
29
+ dir: tokenizer_all_sets/
30
+ type: bpe
31
+
32
+ validation_ds:
33
+ shuffle: False
34
+ manifest_filepath: null
35
+
36
+ encoder:
37
+ _target_: nemo.collections.asr.modules.ConformerEncoder
38
+ feat_in: 64
39
+ feat_out: -1
40
+ n_layers: 16
41
+ d_model: 768
42
+ subsampling: striding
43
+ subsampling_factor: 4
44
+ subsampling_conv_channels: 768
45
+ ff_expansion_factor: 4
46
+ self_attention_model: rel_pos
47
+ pos_emb_max_len: 5000
48
+ n_heads: 16
49
+ xscaling: false
50
+ untie_biases: true
51
+ conv_kernel_size: 31
52
+ dropout: 0.1
53
+ dropout_emb: 0.1
54
+ dropout_att: 0.1
55
+ decoder:
56
+ _target_: nemo.collections.asr.modules.RNNTDecoder
57
+ normalization_mode: null
58
+ random_state_sampling: false
59
+ blank_as_pad: true
60
+ vocab_size: 512
61
+ prednet:
62
+ pred_hidden: 320
63
+ pred_rnn_layers: 1
64
+ t_max: null
65
+ dropout: 0.0
66
+ joint:
67
+ _target_: nemo.collections.asr.modules.RNNTJoint
68
+ log_softmax: null
69
+ fuse_loss_wer: false
70
+ fused_batch_size: 1
71
+ jointnet:
72
+ joint_hidden: 320
73
+ activation: relu
74
+ dropout: 0.0
75
+ encoder_hidden: 768
76
+ optim:
77
+ name: adamw
78
+ lr: 5.0e-05
79
+ betas:
80
+ - 0.9
81
+ - 0.98
82
+ weight_decay: 0.01
83
+ sched:
84
+ name: CosineAnnealing
85
+ warmup_steps: 10000
86
+ warmup_ratio: null
87
+ min_lr: 1.0e-07
88
+ nemo_version: 1.12.0
89
+ decoding:
90
+ strategy: greedy_batch
91
+ preserve_alignments: false
92
+ greedy:
93
+ max_symbols: 3
94
+ beam:
95
+ beam_size: 5
96
+ score_norm: true
97
+
98
+
99
+ loss:
100
+ loss_name: default
101
+ mwer: false
102
+ rnnt_reduction: mean_batch
103
+ wer_coef: false
104
+ subtract_mean: true
105
+ warprnnt_numba_kwargs:
106
+ fastemit_lambda: 0.0
107
+ clamp: -1.0
108
+ rnnt_weight: 0.1
109
+ unique_hyp: true
GigaAM/rnnt/rnnt_model_weights.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9311712a085aba1b103c325f4965faa7b32e950bf0b724720103a94d204d2a9
3
+ size 974419733