general:
  stage: "pretrain"
  corpus_type: "multi-unseen" # (single, multi-seen, multi-unseen)
  source_path: "./data/jvs_22k-low"
  aux_path: "./data/jvs_22k"
  preprocessed_path: "./preprocessed/jvs"
  output_path: "./output/vocfeats/pretrain"
  test_wav_path: null
  feature_type: "vocfeats"
  hifigan_path: "./hifigan/hifigan_jvs_40d_600k"
  power_norm: True
  use_gst: False

preprocess:
  n_train: 90
  n_val: 5
  n_test: 5
  sampling_rate: 22050
  frame_length: 1024
  frame_shift: 256
  fft_length: 1024
  fmin: 0
  fmax: 8000
  n_mels: 80
  cep_order: 40
  f0_extractor: "dio"
  comp_factor: 1.0
  min_magnitude: 0.00001
  max_wav_value: 32768.0
  segment_length: 2

train:
  batchsize: 8
  epoch: 50
  alpha: 0.1
  augment: True
  multi_gpu_mode: False
  num_workers: 4
  learning_rate: 0.005
  grad_clip_thresh: 1.0
  logger_step: 1000
  load_pretrained: False
  pretrained_path: null
  early_stopping: False
  multi_scale_loss:
    use_linear: True
    gamma: 1.0
  feature_loss:
    type: "mae"