Update hyperparams.yaml
Browse files- hyperparams.yaml +33 -5
hyperparams.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
# Hparams NEEDED
|
2 |
HPARAMS_NEEDED: ["wav2vec_output_dim", "emb_size", "dec_neurons", "dec_layers", "output_neurons", "log_softmax", "tokenizer"]
|
3 |
# Modules Needed
|
4 |
-
MODULES_NEEDED: ["encoder_w2v2", "embedding", "ctc_lin", "seq_lin"]
|
5 |
|
6 |
# Pretrain folder (HuggingFace)
|
7 |
output_folder: !ref output_folder_seq2seq_cv_podcast_arhiv_augmentation
|
@@ -25,7 +25,6 @@ bos_index: 0
|
|
25 |
eos_index: 0
|
26 |
unk_index: 0
|
27 |
|
28 |
-
# Decoding parameters
|
29 |
# Decoding parameters
|
30 |
min_decode_ratio: 0.0
|
31 |
max_decode_ratio: 1.0
|
@@ -33,13 +32,28 @@ valid_beam_size: 10
|
|
33 |
test_beam_size: 20
|
34 |
using_eos_threshold: True
|
35 |
eos_threshold: 1.5
|
36 |
-
using_max_attn_shift:
|
37 |
max_attn_shift: 700
|
38 |
length_normalization: True
|
39 |
temperature: 1.0
|
|
|
40 |
# Scoring parameters
|
41 |
coverage_penalty: 1.5
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
# Wav2vec2 encoder
|
45 |
encoder_w2v2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2
|
@@ -89,6 +103,7 @@ modules:
|
|
89 |
decoder: !ref <decoder>
|
90 |
ctc_lin: !ref <ctc_lin>
|
91 |
seq_lin: !ref <seq_lin>
|
|
|
92 |
|
93 |
model: !new:torch.nn.ModuleList
|
94 |
- [!ref <encoder_w2v2>, !ref <embedding>, !ref <decoder>, !ref <ctc_lin>, !ref <seq_lin>]
|
@@ -97,11 +112,22 @@ model: !new:torch.nn.ModuleList
|
|
97 |
coverage_scorer: !new:speechbrain.decoders.scorer.CoverageScorer
|
98 |
vocab_size: !ref <output_neurons>
|
99 |
|
|
|
|
|
|
|
|
|
100 |
scorer: !new:speechbrain.decoders.scorer.ScorerBuilder
|
101 |
full_scorers: [!ref <coverage_scorer>]
|
102 |
weights:
|
103 |
coverage: !ref <coverage_penalty>
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
test_search: !new:speechbrain.decoders.S2SRNNBeamSearcher
|
106 |
embedding: !ref <embedding>
|
107 |
decoder: !ref <decoder>
|
@@ -115,7 +141,7 @@ test_search: !new:speechbrain.decoders.S2SRNNBeamSearcher
|
|
115 |
using_max_attn_shift: !ref <using_max_attn_shift>
|
116 |
max_attn_shift: !ref <max_attn_shift>
|
117 |
temperature: !ref <temperature>
|
118 |
-
scorer: !ref <
|
119 |
|
120 |
|
121 |
############################## Logging and Pretrainer ##########################
|
@@ -123,5 +149,7 @@ test_search: !new:speechbrain.decoders.S2SRNNBeamSearcher
|
|
123 |
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
|
124 |
loadables:
|
125 |
model: !ref <model>
|
|
|
126 |
paths:
|
127 |
model: !ref <pretrained_path>/model.ckpt
|
|
|
|
1 |
# Hparams NEEDED
|
2 |
HPARAMS_NEEDED: ["wav2vec_output_dim", "emb_size", "dec_neurons", "dec_layers", "output_neurons", "log_softmax", "tokenizer"]
|
3 |
# Modules Needed
|
4 |
+
MODULES_NEEDED: ["encoder_w2v2", "embedding", "ctc_lin", "seq_lin", "lm_model"]
|
5 |
|
6 |
# Pretrain folder (HuggingFace)
|
7 |
output_folder: !ref output_folder_seq2seq_cv_podcast_arhiv_augmentation
|
|
|
25 |
eos_index: 0
|
26 |
unk_index: 0
|
27 |
|
|
|
28 |
# Decoding parameters
|
29 |
min_decode_ratio: 0.0
|
30 |
max_decode_ratio: 1.0
|
|
|
32 |
test_beam_size: 20
|
33 |
using_eos_threshold: True
|
34 |
eos_threshold: 1.5
|
35 |
+
using_max_attn_shift: False
|
36 |
max_attn_shift: 700
|
37 |
length_normalization: True
|
38 |
temperature: 1.0
|
39 |
+
temperature_lm: 1.4
|
40 |
# Scoring parameters
|
41 |
coverage_penalty: 1.5
|
42 |
+
lm_weight: 0.4
|
43 |
+
|
44 |
+
|
45 |
+
# This is the RNNLM that is used according to the Huggingface repository
|
46 |
+
# NB: It has to match the pre-trained RNNLM!!
|
47 |
+
lm_model: !new:speechbrain.lobes.models.RNNLM.RNNLM
|
48 |
+
output_neurons: !ref <output_neurons>
|
49 |
+
embedding_dim: !ref <emb_size>
|
50 |
+
activation: !name:torch.nn.LeakyReLU
|
51 |
+
dropout: 0.0
|
52 |
+
rnn_layers: 2
|
53 |
+
rnn_neurons: 2048
|
54 |
+
dnn_blocks: 1
|
55 |
+
dnn_neurons: 512
|
56 |
+
return_hidden: True # For inference
|
57 |
|
58 |
# Wav2vec2 encoder
|
59 |
encoder_w2v2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2
|
|
|
103 |
decoder: !ref <decoder>
|
104 |
ctc_lin: !ref <ctc_lin>
|
105 |
seq_lin: !ref <seq_lin>
|
106 |
+
lm_model: !ref <lm_model>
|
107 |
|
108 |
model: !new:torch.nn.ModuleList
|
109 |
- [!ref <encoder_w2v2>, !ref <embedding>, !ref <decoder>, !ref <ctc_lin>, !ref <seq_lin>]
|
|
|
112 |
coverage_scorer: !new:speechbrain.decoders.scorer.CoverageScorer
|
113 |
vocab_size: !ref <output_neurons>
|
114 |
|
115 |
+
rnnlm_scorer: !new:speechbrain.decoders.scorer.RNNLMScorer
|
116 |
+
language_model: !ref <lm_model>
|
117 |
+
temperature: !ref <temperature_lm>
|
118 |
+
|
119 |
scorer: !new:speechbrain.decoders.scorer.ScorerBuilder
|
120 |
full_scorers: [!ref <coverage_scorer>]
|
121 |
weights:
|
122 |
coverage: !ref <coverage_penalty>
|
123 |
|
124 |
+
scorer_lm: !new:speechbrain.decoders.scorer.ScorerBuilder
|
125 |
+
full_scorers: [!ref <rnnlm_scorer>,
|
126 |
+
!ref <coverage_scorer>]
|
127 |
+
weights:
|
128 |
+
rnnlm: !ref <lm_weight>
|
129 |
+
coverage: !ref <coverage_penalty>
|
130 |
+
|
131 |
test_search: !new:speechbrain.decoders.S2SRNNBeamSearcher
|
132 |
embedding: !ref <embedding>
|
133 |
decoder: !ref <decoder>
|
|
|
141 |
using_max_attn_shift: !ref <using_max_attn_shift>
|
142 |
max_attn_shift: !ref <max_attn_shift>
|
143 |
temperature: !ref <temperature>
|
144 |
+
scorer: !ref <scorer_lm>
|
145 |
|
146 |
|
147 |
############################## Logging and Pretrainer ##########################
|
|
|
149 |
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
|
150 |
loadables:
|
151 |
model: !ref <model>
|
152 |
+
lm: !ref <lm_model>
|
153 |
paths:
|
154 |
model: !ref <pretrained_path>/model.ckpt
|
155 |
+
lm: lm.ckpt
|