|
analyzer: exp/autoencoder/symAD_libritts_24000_hop300/checkpoint-500000steps.pkl |
|
batch_length: 9600 |
|
batch_size: 16 |
|
config: config/vocoder/AudioDec_v1_symAD_libritts_24000_hop300_clean.yaml |
|
data: |
|
path: /mnt/home/yichiaowu/datasets/LibriTTS/LibriTTS/24000 |
|
subset: |
|
test: test-clean-1utt |
|
train: train-clean-450 |
|
valid: dev-clean-1utt |
|
disable_cudnn: 'False' |
|
discriminator_adv_loss_params: |
|
average_by_discriminators: false |
|
discriminator_grad_norm: -1 |
|
discriminator_optimizer_params: |
|
betas: |
|
- 0.5 |
|
- 0.9 |
|
lr: 0.0002 |
|
weight_decay: 0.0 |
|
discriminator_optimizer_type: Adam |
|
discriminator_params: |
|
follow_official_norm: true |
|
period_discriminator_params: |
|
bias: true |
|
channels: 32 |
|
downsample_scales: |
|
- 3 |
|
- 3 |
|
- 3 |
|
- 3 |
|
- 1 |
|
in_channels: 1 |
|
kernel_sizes: |
|
- 5 |
|
- 3 |
|
max_downsample_channels: 1024 |
|
nonlinear_activation: LeakyReLU |
|
nonlinear_activation_params: |
|
negative_slope: 0.1 |
|
out_channels: 1 |
|
use_spectral_norm: false |
|
use_weight_norm: true |
|
periods: |
|
- 2 |
|
- 3 |
|
- 5 |
|
- 7 |
|
- 11 |
|
scale_discriminator_params: |
|
bias: true |
|
channels: 128 |
|
downsample_scales: |
|
- 4 |
|
- 4 |
|
- 4 |
|
- 4 |
|
- 1 |
|
in_channels: 1 |
|
kernel_sizes: |
|
- 15 |
|
- 41 |
|
- 5 |
|
- 3 |
|
max_downsample_channels: 1024 |
|
max_groups: 16 |
|
nonlinear_activation: LeakyReLU |
|
nonlinear_activation_params: |
|
negative_slope: 0.1 |
|
out_channels: 1 |
|
scale_downsample_pooling: AvgPool1d |
|
scale_downsample_pooling_params: |
|
kernel_size: 4 |
|
padding: 2 |
|
stride: 2 |
|
scales: 3 |
|
discriminator_scheduler_params: |
|
gamma: 0.5 |
|
milestones: |
|
- 200000 |
|
- 400000 |
|
- 600000 |
|
- 800000 |
|
discriminator_scheduler_type: MultiStepLR |
|
discriminator_train_start_steps: 0 |
|
eval_interval_steps: 1000 |
|
exp_root: exp |
|
feat_match_loss_params: |
|
average_by_discriminators: false |
|
average_by_layers: false |
|
include_final_outputs: false |
|
generator_adv_loss_params: |
|
average_by_discriminators: false |
|
generator_grad_norm: -1 |
|
generator_optimizer_params: |
|
betas: |
|
- 0.5 |
|
- 0.9 |
|
lr: 0.0002 |
|
weight_decay: 0.0 |
|
generator_optimizer_type: Adam |
|
generator_params: |
|
bias: true |
|
channels: 512 |
|
groups: 3 |
|
in_channels: 64 |
|
kernel_size: 7 |
|
nonlinear_activation: LeakyReLU |
|
nonlinear_activation_params: |
|
negative_slope: 0.1 |
|
out_channels: 1 |
|
resblock_dilations: |
|
- - 1 |
|
- 3 |
|
- 5 |
|
resblock_kernel_sizes: |
|
- 11 |
|
stats: stats/symAD_libritts_24000_hop300_clean.npy |
|
upsample_kernel_sizes: |
|
- 10 |
|
- 10 |
|
- 8 |
|
- 6 |
|
upsample_scales: |
|
- 5 |
|
- 5 |
|
- 4 |
|
- 3 |
|
use_additional_convs: true |
|
use_weight_norm: true |
|
generator_scheduler_params: |
|
gamma: 0.5 |
|
milestones: |
|
- 200000 |
|
- 400000 |
|
- 600000 |
|
- 800000 |
|
generator_scheduler_type: MultiStepLR |
|
generator_train_start_steps: 1 |
|
lambda_adv: 1.0 |
|
lambda_feat_match: 2.0 |
|
lambda_mel_loss: 45.0 |
|
lambda_shape_loss: 45.0 |
|
lambda_stft_loss: 45.0 |
|
log_interval_steps: 100 |
|
mel_loss_params: |
|
fft_sizes: |
|
- 2048 |
|
fmax: 12000 |
|
fmin: 0 |
|
fs: 24000 |
|
hop_sizes: |
|
- 300 |
|
log_base: null |
|
num_mels: 80 |
|
win_lengths: |
|
- 2048 |
|
window: hann_window |
|
model_type: HiFiGAN |
|
num_workers: 2 |
|
outdir: exp/vocoder/AudioDec_v1_symAD_libritts_24000_hop300_clean |
|
pin_memory: true |
|
resume: '' |
|
sampling_rate: 24000 |
|
save_interval_steps: 100000 |
|
seed: 1337 |
|
shape_loss_params: |
|
winlen: |
|
- 300 |
|
stft_loss_params: |
|
fft_sizes: |
|
- 1024 |
|
- 2048 |
|
- 512 |
|
hop_sizes: |
|
- 120 |
|
- 240 |
|
- 50 |
|
win_lengths: |
|
- 600 |
|
- 1200 |
|
- 240 |
|
window: hann_window |
|
tag: vocoder/AudioDec_v1_symAD_libritts_24000_hop300_clean |
|
train_max_steps: 500000 |
|
train_mode: vocoder |
|
use_feat_match_loss: true |
|
use_mel_loss: true |
|
use_shape_loss: false |
|
use_stft_loss: false |
|
|