File size: 3,047 Bytes
3eed963
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
task_name: train
run_name: ljspeech
tags:
- ljspeech
train: true
test: true
ckpt_path: null
seed: 1234
data:
  _target_: matcha.data.text_mel_datamodule.TextMelDataModule
  name: ljspeech
  train_filelist_path: data/LJSpeech-1.1/train.txt
  valid_filelist_path: data/LJSpeech-1.1/val.txt
  batch_size: 32
  num_workers: 20
  pin_memory: true
  cleaners:
  - english_cleaners2
  add_blank: true
  n_spks: 1
  n_fft: 1024
  n_feats: 80
  sample_rate: 22050
  hop_length: 256
  win_length: 1024
  f_min: 0
  f_max: 8000
  data_statistics:
    mel_mean: -5.536622
    mel_std: 2.116101
  seed: ${seed}
  load_durations: false
model:
  _target_: matcha.models.matcha_tts.MatchaTTS
  n_vocab: 178
  n_spks: ${data.n_spks}
  spk_emb_dim: 64
  n_feats: 80
  data_statistics: ${data.data_statistics}
  out_size: null
  prior_loss: true
  use_precomputed_durations: ${data.load_durations}
  encoder:
    encoder_type: RoPE Encoder
    encoder_params:
      n_feats: ${model.n_feats}
      n_channels: 192
      filter_channels: 768
      filter_channels_dp: 256
      n_heads: 2
      n_layers: 6
      kernel_size: 3
      p_dropout: 0.1
      spk_emb_dim: 64
      n_spks: 1
      prenet: true
    duration_predictor_params:
      filter_channels_dp: ${model.encoder.encoder_params.filter_channels_dp}
      kernel_size: 3
      p_dropout: ${model.encoder.encoder_params.p_dropout}
  decoder:
    channels:
    - 256
    - 256
    dropout: 0.05
    attention_head_dim: 64
    n_blocks: 1
    num_mid_blocks: 2
    num_heads: 2
    act_fn: snakebeta
  cfm:
    name: CFM
    solver: euler
    sigma_min: 0.0001
  optimizer:
    _target_: torch.optim.Adam
    _partial_: true
    lr: 0.0001
    weight_decay: 0.0
callbacks:
  model_checkpoint:
    _target_: lightning.pytorch.callbacks.ModelCheckpoint
    dirpath: ${paths.output_dir}/checkpoints
    filename: checkpoint_{epoch:03d}
    monitor: epoch
    verbose: false
    save_last: true
    save_top_k: 10
    mode: max
    auto_insert_metric_name: true
    save_weights_only: false
    every_n_train_steps: null
    train_time_interval: null
    every_n_epochs: 100
    save_on_train_epoch_end: null
  model_summary:
    _target_: lightning.pytorch.callbacks.RichModelSummary
    max_depth: 3
  rich_progress_bar:
    _target_: lightning.pytorch.callbacks.RichProgressBar
logger:
  tensorboard:
    _target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
    save_dir: ${paths.output_dir}/tensorboard/
    name: null
    log_graph: false
    default_hp_metric: true
    prefix: ''
trainer:
  _target_: lightning.pytorch.trainer.Trainer
  default_root_dir: ${paths.output_dir}
  max_epochs: -1
  accelerator: gpu
  devices:
  - 0
  precision: 16-mixed
  check_val_every_n_epoch: 1
  deterministic: false
  gradient_clip_val: 5.0
paths:
  root_dir: ${oc.env:PROJECT_ROOT}
  data_dir: ${paths.root_dir}/data/
  log_dir: ${paths.root_dir}/logs/
  output_dir: ${hydra:runtime.output_dir}
  work_dir: ${hydra:runtime.cwd}
extras:
  ignore_warnings: false
  enforce_tags: true
  print_config: true