Spaces:
Running
Running
{ | |
"base_config": "egs/vocoder/gan/exp_config_base.json", | |
"exp_name": "bigvgan_large", | |
"inference": { | |
"batch_size": 1, | |
}, | |
"model": { | |
"bigvgan": { | |
"activation": "snakebeta", | |
"resblock": "1", | |
"resblock_dilation_sizes": [ | |
[ | |
1, | |
3, | |
5, | |
], | |
[ | |
1, | |
3, | |
5, | |
], | |
[ | |
1, | |
3, | |
5, | |
], | |
], | |
"resblock_kernel_sizes": [ | |
3, | |
7, | |
11, | |
], | |
"snake_logscale": true, | |
"upsample_initial_channel": 1536, | |
"upsample_kernel_sizes": [ | |
8, | |
8, | |
4, | |
4, | |
4, | |
4, | |
], | |
"upsample_rates": [ | |
4, | |
4, | |
2, | |
2, | |
2, | |
2, | |
], | |
}, | |
"discriminators": [ | |
"mpd", | |
"msstftd", | |
], | |
"generator": "bigvgan", | |
"mpd": { | |
"discriminator_channel_multi": 1, | |
"mpd_reshapes": [ | |
2, | |
3, | |
5, | |
7, | |
11, | |
], | |
"use_spectral_norm": false, | |
}, | |
"mrd": { | |
"discriminator_channel_multi": 1, | |
"mrd_override": false, | |
"resolutions": [ | |
[ | |
1024, | |
120, | |
600, | |
], | |
[ | |
2048, | |
240, | |
1200, | |
], | |
[ | |
512, | |
50, | |
240, | |
], | |
], | |
"use_spectral_norm": false, | |
}, | |
"msstftd": { | |
"filters": 32, | |
}, | |
}, | |
"model_type": "GANVocoder", | |
"preprocess": { | |
"audio_dir": "audios", | |
"bits": 8, | |
"contentvec_dir": "contentvec", | |
"cut_mel_frame": 32, | |
"data_augment": false, | |
"dur_dir": "durs", | |
"duration_dir": "duration", | |
"emo2id": "emo2id.json", | |
"energy_dir": "energys", | |
"energy_extract_mode": "from_mel", | |
"energy_norm": false, | |
"extract_audio": true, | |
"extract_contentvec_feature": false, | |
"extract_duration": false, | |
"extract_energy": false, | |
"extract_label": false, | |
"extract_mcep": false, | |
"extract_mel": true, | |
"extract_mert_feature": false, | |
"extract_one_hot": false, | |
"extract_pitch": false, | |
"extract_uv": false, | |
"extract_wenet_feature": false, | |
"extract_whisper_feature": false, | |
"f0_max": 1100, | |
"f0_min": 50, | |
"file_lst": "file.lst", | |
"fmax": 12000, | |
"fmin": 0, | |
"hop_size": 256, | |
"is_mu_law": false, | |
"lab_dir": "labs", | |
"label_dir": "labels", | |
"mcep_dir": "mcep", | |
"mel_dir": "mels", | |
"mel_min_max_norm": false, | |
"min_level_db": -115, | |
"n_fft": 1024, | |
"n_mel": 100, | |
"num_silent_frames": 8, | |
"phone_seq_file": "phone_seq_file", | |
"pitch_bin": 256, | |
"pitch_dir": "pitches", | |
"pitch_extractor": "parselmouth", | |
"pitch_max": 1100.0, | |
"pitch_min": 50.0, | |
"pitch_norm": false, | |
"processed_dir": "processed_data", | |
"ref_level_db": 20, | |
"sample_rate": 24000, | |
"spk2id": "singers.json", | |
"train_file": "train.json", | |
"trim_fft_size": 512, | |
"trim_hop_size": 128, | |
"trim_silence": false, | |
"trim_top_db": 30, | |
"trimmed_wav_dir": "trimmed_wavs", | |
"use_audio": true, | |
"use_dur": false, | |
"use_emoid": false, | |
"use_frame_duration": false, | |
"use_frame_energy": false, | |
"use_frame_pitch": false, | |
"use_lab": false, | |
"use_label": false, | |
"use_log_scale_energy": false, | |
"use_log_scale_pitch": false, | |
"use_mel": true, | |
"use_one_hot": false, | |
"use_phn_seq": false, | |
"use_phone_duration": false, | |
"use_phone_energy": false, | |
"use_phone_pitch": false, | |
"use_spkid": false, | |
"use_uv": false, | |
"use_wav": false, | |
"use_wenet": false, | |
"utt2emo": "utt2emo", | |
"utt2spk": "utt2spk", | |
"uv_dir": "uvs", | |
"valid_file": "test.json", | |
"wav_dir": "wavs", | |
"wenet_dir": "wenet", | |
"win_size": 1024, | |
}, | |
"supported_model_type": [ | |
"GANVocoder", | |
"Fastspeech2", | |
"DiffSVC", | |
"Transformer", | |
"EDM", | |
"CD", | |
], | |
"train": { | |
"adamw": { | |
"adam_b1": 0.8, | |
"adam_b2": 0.99, | |
"lr": 0.0002, | |
}, | |
"batch_size": 4, | |
"criterions": [ | |
"feature", | |
"discriminator", | |
"generator", | |
"mel", | |
], | |
"dataloader": { | |
"num_worker": 4, | |
"pin_memory": true, | |
}, | |
"ddp": true, | |
"epochs": 50000, | |
"exponential_lr": { | |
"lr_decay": 0.999, | |
}, | |
"gradient_accumulation_step": 1, | |
"keep_checkpoint_max": 5, | |
"max_epoch": 1000000, | |
"max_steps": 1000000, | |
"multi_speaker_training": false, | |
"random_seed": 114514, | |
"run_eval": [ | |
true, | |
], | |
"sampler": { | |
"drop_last": true, | |
"holistic_shuffle": true, | |
}, | |
"save_checkpoint_stride": [ | |
200, | |
], | |
"save_checkpoints_steps": 10000, | |
"save_summary_steps": 500, | |
"total_training_steps": 50000, | |
"tracker": [ | |
"tensorboard", | |
], | |
"valid_interval": 10000, | |
}, | |
} |