Porjaz commited on
Commit
1d790af
1 Parent(s): 2bdbd64

Update hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +33 -5
hyperparams.yaml CHANGED
@@ -1,7 +1,7 @@
1
  # Hparams NEEDED
2
  HPARAMS_NEEDED: ["wav2vec_output_dim", "emb_size", "dec_neurons", "dec_layers", "output_neurons", "log_softmax", "tokenizer"]
3
  # Modules Needed
4
- MODULES_NEEDED: ["encoder_w2v2", "embedding", "ctc_lin", "seq_lin"]
5
 
6
  # Pretrain folder (HuggingFace)
7
  output_folder: !ref output_folder_seq2seq_cv_podcast_arhiv_augmentation
@@ -25,7 +25,6 @@ bos_index: 0
25
  eos_index: 0
26
  unk_index: 0
27
 
28
- # Decoding parameters
29
  # Decoding parameters
30
  min_decode_ratio: 0.0
31
  max_decode_ratio: 1.0
@@ -33,13 +32,28 @@ valid_beam_size: 10
33
  test_beam_size: 20
34
  using_eos_threshold: True
35
  eos_threshold: 1.5
36
- using_max_attn_shift: True
37
  max_attn_shift: 700
38
  length_normalization: True
39
  temperature: 1.0
 
40
  # Scoring parameters
41
  coverage_penalty: 1.5
42
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # Wav2vec2 encoder
45
  encoder_w2v2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2
@@ -89,6 +103,7 @@ modules:
89
  decoder: !ref <decoder>
90
  ctc_lin: !ref <ctc_lin>
91
  seq_lin: !ref <seq_lin>
 
92
 
93
  model: !new:torch.nn.ModuleList
94
  - [!ref <encoder_w2v2>, !ref <embedding>, !ref <decoder>, !ref <ctc_lin>, !ref <seq_lin>]
@@ -97,11 +112,22 @@ model: !new:torch.nn.ModuleList
97
  coverage_scorer: !new:speechbrain.decoders.scorer.CoverageScorer
98
  vocab_size: !ref <output_neurons>
99
 
 
 
 
 
100
  scorer: !new:speechbrain.decoders.scorer.ScorerBuilder
101
  full_scorers: [!ref <coverage_scorer>]
102
  weights:
103
  coverage: !ref <coverage_penalty>
104
 
 
 
 
 
 
 
 
105
  test_search: !new:speechbrain.decoders.S2SRNNBeamSearcher
106
  embedding: !ref <embedding>
107
  decoder: !ref <decoder>
@@ -115,7 +141,7 @@ test_search: !new:speechbrain.decoders.S2SRNNBeamSearcher
115
  using_max_attn_shift: !ref <using_max_attn_shift>
116
  max_attn_shift: !ref <max_attn_shift>
117
  temperature: !ref <temperature>
118
- scorer: !ref <scorer>
119
 
120
 
121
  ############################## Logging and Pretrainer ##########################
@@ -123,5 +149,7 @@ test_search: !new:speechbrain.decoders.S2SRNNBeamSearcher
123
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
124
  loadables:
125
  model: !ref <model>
 
126
  paths:
127
  model: !ref <pretrained_path>/model.ckpt
 
 
1
  # Hparams NEEDED
2
  HPARAMS_NEEDED: ["wav2vec_output_dim", "emb_size", "dec_neurons", "dec_layers", "output_neurons", "log_softmax", "tokenizer"]
3
  # Modules Needed
4
+ MODULES_NEEDED: ["encoder_w2v2", "embedding", "ctc_lin", "seq_lin", "lm_model"]
5
 
6
  # Pretrain folder (HuggingFace)
7
  output_folder: !ref output_folder_seq2seq_cv_podcast_arhiv_augmentation
 
25
  eos_index: 0
26
  unk_index: 0
27
 
 
28
  # Decoding parameters
29
  min_decode_ratio: 0.0
30
  max_decode_ratio: 1.0
 
32
  test_beam_size: 20
33
  using_eos_threshold: True
34
  eos_threshold: 1.5
35
+ using_max_attn_shift: False
36
  max_attn_shift: 700
37
  length_normalization: True
38
  temperature: 1.0
39
+ temperature_lm: 1.4
40
  # Scoring parameters
41
  coverage_penalty: 1.5
42
+ lm_weight: 0.4
43
+
44
+
45
+ # This is the RNNLM that is used according to the Huggingface repository
46
+ # NB: It has to match the pre-trained RNNLM!!
47
+ lm_model: !new:speechbrain.lobes.models.RNNLM.RNNLM
48
+ output_neurons: !ref <output_neurons>
49
+ embedding_dim: !ref <emb_size>
50
+ activation: !name:torch.nn.LeakyReLU
51
+ dropout: 0.0
52
+ rnn_layers: 2
53
+ rnn_neurons: 2048
54
+ dnn_blocks: 1
55
+ dnn_neurons: 512
56
+ return_hidden: True # For inference
57
 
58
  # Wav2vec2 encoder
59
  encoder_w2v2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2
 
103
  decoder: !ref <decoder>
104
  ctc_lin: !ref <ctc_lin>
105
  seq_lin: !ref <seq_lin>
106
+ lm_model: !ref <lm_model>
107
 
108
  model: !new:torch.nn.ModuleList
109
  - [!ref <encoder_w2v2>, !ref <embedding>, !ref <decoder>, !ref <ctc_lin>, !ref <seq_lin>]
 
112
  coverage_scorer: !new:speechbrain.decoders.scorer.CoverageScorer
113
  vocab_size: !ref <output_neurons>
114
 
115
+ rnnlm_scorer: !new:speechbrain.decoders.scorer.RNNLMScorer
116
+ language_model: !ref <lm_model>
117
+ temperature: !ref <temperature_lm>
118
+
119
  scorer: !new:speechbrain.decoders.scorer.ScorerBuilder
120
  full_scorers: [!ref <coverage_scorer>]
121
  weights:
122
  coverage: !ref <coverage_penalty>
123
 
124
+ scorer_lm: !new:speechbrain.decoders.scorer.ScorerBuilder
125
+ full_scorers: [!ref <rnnlm_scorer>,
126
+ !ref <coverage_scorer>]
127
+ weights:
128
+ rnnlm: !ref <lm_weight>
129
+ coverage: !ref <coverage_penalty>
130
+
131
  test_search: !new:speechbrain.decoders.S2SRNNBeamSearcher
132
  embedding: !ref <embedding>
133
  decoder: !ref <decoder>
 
141
  using_max_attn_shift: !ref <using_max_attn_shift>
142
  max_attn_shift: !ref <max_attn_shift>
143
  temperature: !ref <temperature>
144
+ scorer: !ref <scorer_lm>
145
 
146
 
147
  ############################## Logging and Pretrainer ##########################
 
149
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
150
  loadables:
151
  model: !ref <model>
152
+ lm: !ref <lm_model>
153
  paths:
154
  model: !ref <pretrained_path>/model.ckpt
155
+ lm: lm.ckpt