ddwkim commited on
Commit
ea0ac2e
·
verified ·
1 Parent(s): a4112b8

Create hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +133 -0
hyperparams.yaml ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Decoding parameters
2
+ # Be sure that the bos and eos index match with the BPEs ones
3
+ blank_index: 0
4
+ bos_index: 0
5
+ eos_index: 0
6
+ lm_weight: 0.1
7
+ beam_size: 4
8
+ nbest: 1
9
+ state_beam: 2.3
10
+ expand_beam: 2.3
11
+
12
+ sample_rate: 16000
13
+ n_fft: 512
14
+ n_mels: 80
15
+ win_length: 32
16
+
17
+ # Transformer
18
+ d_model: 256
19
+ joint_dim: 640
20
+ nhead: 4
21
+ num_encoder_layers: 12
22
+ num_decoder_layers: 0
23
+ d_ffn: 2048
24
+ activation: !name:torch.nn.GELU
25
+ output_neurons: 5000
26
+ dec_dim: 512
27
+
28
+ normalize: !new:speechbrain.processing.features.InputNormalization
29
+ norm_type: global
30
+
31
+ compute_features: !new:speechbrain.lobes.features.Fbank
32
+ sample_rate: !ref <sample_rate>
33
+ n_fft: !ref <n_fft>
34
+ n_mels: !ref <n_mels>
35
+ win_length: !ref <win_length>
36
+
37
+ CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
38
+ input_shape: (8, 10, 80)
39
+ num_blocks: 2
40
+ num_layers_per_block: 1
41
+ out_channels: (64, 32)
42
+ kernel_sizes: (3, 3)
43
+ strides: (2, 2)
44
+ residuals: (False, False)
45
+
46
+ Transformer: !new:speechbrain.lobes.models.transformer.TransformerASR.TransformerASR # yamllint disable-line rule:line-length
47
+ input_size: 640
48
+ tgt_vocab: !ref <output_neurons>
49
+ d_model: !ref <d_model>
50
+ nhead: !ref <nhead>
51
+ num_encoder_layers: !ref <num_encoder_layers>
52
+ num_decoder_layers: !ref <num_decoder_layers>
53
+ d_ffn: !ref <d_ffn>
54
+ activation: !ref <activation>
55
+ encoder_module: conformer
56
+ attention_type: RelPosMHAXL
57
+ normalize_before: True
58
+ causal: False
59
+
60
+ # We must call an encoder wrapper so the decoder isn't run (we don't have any)
61
+ enc: !new:speechbrain.lobes.models.transformer.TransformerASR.EncoderWrapper
62
+ transformer: !ref <Transformer>
63
+
64
+ proj_dec: !new:speechbrain.nnet.linear.Linear
65
+ input_size: !ref <dec_dim>
66
+ n_neurons: !ref <joint_dim>
67
+ bias: False
68
+
69
+ emb: !new:speechbrain.nnet.embedding.Embedding
70
+ num_embeddings: !ref <output_neurons>
71
+ consider_as_one_hot: True
72
+ blank_id: !ref <blank_index>
73
+
74
+ dec: !new:speechbrain.nnet.RNN.LSTM
75
+ input_shape: [null, null, !ref <output_neurons> - 1]
76
+ hidden_size: !ref <dec_dim>
77
+ num_layers: 1
78
+ re_init: True
79
+
80
+ Tjoint: !new:speechbrain.nnet.transducer.transducer_joint.Transducer_joint
81
+ joint: sum # joint [sum | concat]
82
+ nonlinearity: !ref <activation>
83
+
84
+ transducer_lin: !new:speechbrain.nnet.linear.Linear
85
+ input_size: !ref <joint_dim>
86
+ n_neurons: !ref <output_neurons>
87
+ bias: False
88
+
89
+ asr_model: !new:torch.nn.ModuleList
90
+ - [!ref <enc>, !ref <emb>, !ref <dec>, !ref <transducer_lin>]
91
+
92
+ Beamsearcher: !new:speechbrain.decoders.transducer.TransducerBeamSearcher
93
+ decode_network_lst: [!ref <emb>, !ref <dec>, !ref <proj_dec>]
94
+ tjoint: !ref <Tjoint>
95
+ classifier_network: [!ref <transducer_lin>]
96
+ blank_id: !ref <blank_index>
97
+ beam_size: !ref <beam_size>
98
+ nbest: !ref <nbest>
99
+ lm_module: !ref <lm_model>
100
+ lm_weight: !ref <lm_weight>
101
+ state_beam: !ref <state_beam>
102
+ expand_beam: !ref <expand_beam>
103
+
104
+ lm_model: !new:speechbrain.lobes.models.RNNLM.RNNLM
105
+ output_neurons: !ref <output_neurons>
106
+ embedding_dim: 256
107
+ activation: !name:torch.nn.LeakyReLU
108
+ dropout: 0.0
109
+ rnn_layers: 6
110
+ rnn_neurons: 512
111
+ dnn_blocks: 1
112
+ dnn_neurons: 256
113
+ return_hidden: True # For inference
114
+
115
+ tokenizer: !new:sentencepiece.SentencePieceProcessor
116
+
117
+ # We compose the inference (encoder) pipeline.
118
+ encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential
119
+ input_shape: [null, null, !ref <n_mels>]
120
+ compute_features: !ref <compute_features>
121
+ normalize: !ref <normalizer>
122
+ model: !ref <enc>
123
+
124
+ modules:
125
+ normalizer: !ref <normalizer>
126
+ encoder: !ref <encoder>
127
+ decoder: !ref <decoder>
128
+
129
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
130
+ loadables:
131
+ normalizer: !ref <normalizer>
132
+ asr: !ref <asr_model>
133
+ tokenizer: !ref <tokenizer>