general: stage: "ssl" corpus_type: "single" # (single, multi-seen, multi-unseen) source_path: "./data/jsut_22k-low" aux_path: "./data/jsut_22k" preprocessed_path: "./preprocessed/jsut-low" output_path: "./output/vocfeats/jsut-low" test_wav_path: null feature_type: "vocfeats" hifigan_path: "./hifigan/hifigan_jvs_40d_600k" power_norm: True use_gst: False preprocess: n_train: 4950 n_val: 25 n_test: 25 sampling_rate: 22050 frame_length: 1024 frame_shift: 256 fft_length: 1024 fmin: 0 fmax: 8000 n_mels: 80 cep_order: 40 comp_factor: 1.0 min_magnitude: 0.00001 bitrate: "16k" f0_extractor: "harvest" max_wav_value: 32768.0 segment_length: 2 train: batchsize: 4 epoch: 50 epoch_channel: 25 multi_gpu_mode: False num_workers: 4 learning_rate: 0.001 alpha: 0.1 beta: 0.1 grad_clip_thresh: 1.0 logger_step: 1000 load_pretrained: True pretrained_path: null fix_channel: False early_stopping: False multi_scale_loss: use_linear: True gamma: 1.0 feature_loss: type: "mae" dual: enable: True config_path: ./configs/train/vocfeats/dual.yaml