Titouan Parcollet commited on
Commit
964e281
1 Parent(s): 56773ef

update params

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +13 -15
hyperparams.yaml CHANGED
@@ -10,24 +10,23 @@
10
 
11
  # Feature parameters
12
  sample_rate: 16000
13
- n_fft: 400
14
  n_mels: 80
15
 
16
  ####################### Model parameters ###########################
17
  # Transformer
18
  d_model: 512
19
- nhead: 4
20
  num_encoder_layers: 12
21
  num_decoder_layers: 6
22
  d_ffn: 2048
23
  transformer_dropout: 0.1
24
  activation: !name:torch.nn.GELU
25
  output_neurons: 5000
26
- vocab_size: 5000
27
 
28
  # Outputs
29
  blank_index: 0
30
- label_smoothing: 0.0
31
  pad_index: 0
32
  bos_index: 1
33
  eos_index: 2
@@ -45,16 +44,15 @@ ctc_weight_decode: 0.40
45
 
46
  CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
47
  input_shape: (8, 10, 80)
48
- num_blocks: 3
49
  num_layers_per_block: 1
50
- out_channels: (64, 64, 64)
51
- kernel_sizes: (5, 5, 1)
52
- strides: (2, 2, 1)
53
- residuals: (False, False, True)
54
- norm: !name:speechbrain.nnet.normalization.LayerNorm
55
-
56
- Transformer: !new:speechbrain.lobes.models.transformer.TransformerASR.TransformerASR
57
- input_size: 1280
58
  tgt_vocab: !ref <output_neurons>
59
  d_model: !ref <d_model>
60
  nhead: !ref <nhead>
@@ -63,8 +61,8 @@ Transformer: !new:speechbrain.lobes.models.transformer.TransformerASR.Transforme
63
  d_ffn: !ref <d_ffn>
64
  dropout: !ref <transformer_dropout>
65
  activation: !ref <activation>
66
- encoder_module: transformer
67
- attention_type: regularMHA
68
  normalize_before: True
69
  causal: False
70
 
 
10
 
11
  # Feature parameters
12
  sample_rate: 16000
13
+ n_fft: 512
14
  n_mels: 80
15
 
16
  ####################### Model parameters ###########################
17
  # Transformer
18
  d_model: 512
19
+ nhead: 8
20
  num_encoder_layers: 12
21
  num_decoder_layers: 6
22
  d_ffn: 2048
23
  transformer_dropout: 0.1
24
  activation: !name:torch.nn.GELU
25
  output_neurons: 5000
 
26
 
27
  # Outputs
28
  blank_index: 0
29
+ label_smoothing: 0.1
30
  pad_index: 0
31
  bos_index: 1
32
  eos_index: 2
 
44
 
45
  CNN: !new:speechbrain.lobes.models.convolution.ConvolutionFrontEnd
46
  input_shape: (8, 10, 80)
47
+ num_blocks: 2
48
  num_layers_per_block: 1
49
+ out_channels: (64, 32)
50
+ kernel_sizes: (3, 3)
51
+ strides: (2, 2)
52
+ residuals: (False, False)
53
+
54
+ Transformer: !new:speechbrain.lobes.models.transformer.TransformerASR.TransformerASR # yamllint disable-line rule:line-length
55
+ input_size: 640
 
56
  tgt_vocab: !ref <output_neurons>
57
  d_model: !ref <d_model>
58
  nhead: !ref <nhead>
 
61
  d_ffn: !ref <d_ffn>
62
  dropout: !ref <transformer_dropout>
63
  activation: !ref <activation>
64
+ encoder_module: conformer
65
+ attention_type: RelPosMHAXL
66
  normalize_before: True
67
  causal: False
68