|
generator: |
|
name: SoundStream |
|
config: |
|
n_filters: 32 |
|
D: 256 |
|
target_bandwidths: |
|
- 0.5 |
|
- 1 |
|
- 1.5 |
|
- 2 |
|
- 4 |
|
- 6 |
|
ratios: |
|
- 8 |
|
- 5 |
|
- 4 |
|
- 2 |
|
sample_rate: 16000 |
|
bins: 1024 |
|
d_list: |
|
- mfd |
|
mfd: |
|
name: MultiFrequencyDiscriminator |
|
config: |
|
hop_lengths: |
|
- 32 |
|
- 64 |
|
- 128 |
|
- 256 |
|
- 512 |
|
- 1024 |
|
hidden_channels: |
|
- 64 |
|
- 128 |
|
- 256 |
|
- 512 |
|
- 512 |
|
- 512 |
|
domain: double |
|
mel_scale: true |
|
sample_rate: 16000 |
|
mpd: |
|
name: MultiPeriodDiscriminator |
|
config: |
|
period_sizes: |
|
- 2 |
|
- 3 |
|
- 5 |
|
- 7 |
|
- 11 |
|
period_kernel_size: 5 |
|
msd: |
|
name: MultiScaleDiscriminator |
|
config: |
|
num_scales: 3 |
|
pool_kernel_size: 4 |
|
pool_stride: 2 |
|
optimizer: |
|
g: |
|
name: AdamW |
|
config: |
|
lr: 0.0002 |
|
betas: |
|
- 0.8 |
|
- 0.99 |
|
eps: 1.0e-06 |
|
d: |
|
name: AdamW |
|
config: |
|
lr: 0.0002 |
|
betas: |
|
- 0.8 |
|
- 0.99 |
|
eps: 1.0e-06 |
|
lr_scheduler: |
|
g: |
|
name: ExponentialLR |
|
config: |
|
gamma: 0.999 |
|
d: |
|
name: ExponentialLR |
|
config: |
|
gamma: 0.999 |
|
criterion: |
|
g_criterion: |
|
name: losses.generator_loss.GeneratorSTFTLoss |
|
config: |
|
use_mel_loss: false |
|
adv_criterion: MSEGLoss |
|
mel_loss_weight: 45 |
|
use_feature_match: true |
|
feat_match_loss_weight: 20 |
|
use_full_stft_loss: true |
|
use_sub_stft_loss: true |
|
full_stft_loss_weight: 1 |
|
sub_stft_loss_weight: 1 |
|
mel_scale_loss: |
|
sampling_rate: 16000 |
|
n_fft: 1024 |
|
num_mels: 80 |
|
hop_size: 160 |
|
win_size: 800 |
|
fmin: 0 |
|
full_multi_scale_stft_loss: |
|
fft_sizes: |
|
- 512 |
|
- 1024 |
|
- 2048 |
|
win_sizes: |
|
- 480 |
|
- 960 |
|
- 1200 |
|
hop_sizes: |
|
- 120 |
|
- 240 |
|
- 300 |
|
sub_multi_scale_stft_loss: |
|
num_bands: 6 |
|
fft_sizes: |
|
- 128 |
|
- 256 |
|
- 256 |
|
win_sizes: |
|
- 80 |
|
- 120 |
|
- 200 |
|
hop_sizes: |
|
- 20 |
|
- 40 |
|
- 50 |
|
d_criterion: |
|
name: losses.discriminator_loss.MSEDiscriminatorLoss |
|
config: null |
|
commit_loss_weight: 1.0 |
|
codebook_loss_weight: 75 |
|
training_file: /aifs4su/data/zheny/fairseq/vae_v2/codec_final/list/train.txt |
|
validation_file: /aifs4su/data/zheny/fairseq/vae_v2/codec_final/list/valid.txt |
|
seed: 2333 |
|
cudnn_deterministic: false |
|
tensorboard: true |
|
checkpoint_interval: 5000 |
|
summary_interval: 100 |
|
validation_interval: 500 |
|
num_epoches: 20 |
|
print_freq: 10 |
|
discriminator_iter_start: 0 |
|
num_ckpt_keep: 10 |
|
segment_size: 16000 |
|
audio_norm_scale: 0.95 |
|
batch_size: 48 |
|
num_workers: 8 |
|
num_plots: 8 |
|
local_rank: 1000000 |
|
basic_model_config: config/codec_16k_6kbps_v3_vqdp.yaml |
|
exp_model_config: null |
|
log_dir: 0518_20w_ckpts |
|
ngpus_per_node: 8 |
|
sample_rate: 16000 |
|
model_ckpt_dir: 0518_20w_ckpts/model_ckpts |
|
|