File size: 2,063 Bytes
8333788
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
model_class: enc_dec_rnnt_bpe
sample_rate: 16000
log_prediction: true
model_defaults:
  enc_hidden: 768
  pred_hidden: 320
  join_hidden: 320

preprocessor:
  _target_: __main__.AudioToMelSpectrogramPreprocessor
  sample_rate: 16000
  n_fft: 400
  n_window_size: 400
  window_size: null
  n_window_stride: 160
  window_stride: null
  features: 64
  dither: 0.0
  preemph: null
  log: true
  log_zero_guard_type: clamp
  normalize: null
  pad_to: 0
  mel_norm: null
  window: hann
  log_zero_guard_value: 1e-9

tokenizer:
  dir: tokenizer_all_sets/
  type: bpe

validation_ds:
  shuffle: False
  manifest_filepath: null

encoder:
  _target_: nemo.collections.asr.modules.ConformerEncoder
  feat_in: 64
  feat_out: -1
  n_layers: 16
  d_model: 768
  subsampling: striding
  subsampling_factor: 4
  subsampling_conv_channels: 768
  ff_expansion_factor: 4
  self_attention_model: rel_pos
  pos_emb_max_len: 5000
  n_heads: 16
  xscaling: false
  untie_biases: true
  conv_kernel_size: 31
  dropout: 0.1
  dropout_emb: 0.1
  dropout_att: 0.1
decoder:
  _target_: nemo.collections.asr.modules.RNNTDecoder
  normalization_mode: null
  random_state_sampling: false
  blank_as_pad: true
  vocab_size: 512
  prednet:
    pred_hidden: 320
    pred_rnn_layers: 1
    t_max: null
    dropout: 0.0
joint:
  _target_: nemo.collections.asr.modules.RNNTJoint
  log_softmax: null
  fuse_loss_wer: false
  fused_batch_size: 1
  jointnet:
    joint_hidden: 320
    activation: relu
    dropout: 0.0
    encoder_hidden: 768
optim:
  name: adamw
  lr: 5.0e-05
  betas:
  - 0.9
  - 0.98
  weight_decay: 0.01
  sched:
    name: CosineAnnealing
    warmup_steps: 10000
    warmup_ratio: null
    min_lr: 1.0e-07
nemo_version: 1.12.0
decoding:
  strategy: greedy_batch
  preserve_alignments: false
  greedy:
    max_symbols: 3
  beam:
    beam_size: 5
    score_norm: true


loss:
  loss_name: default
  mwer: false
  rnnt_reduction: mean_batch
  wer_coef: false
  subtract_mean: true
  warprnnt_numba_kwargs:
    fastemit_lambda: 0.0
    clamp: -1.0
  rnnt_weight: 0.1
  unique_hyp: true