Spaces:
Paused
Paused
{ | |
"datasets": [ | |
{ | |
"formatter": "kokoro", | |
"path": "DEFINE THIS", | |
"meta_file_train": "metadata.csv", | |
"meta_file_val": null | |
} | |
], | |
"audio": { | |
"fft_size": 1024, | |
"win_length": 1024, | |
"hop_length": 256, | |
"frame_length_ms": null, | |
"frame_shift_ms": null, | |
"sample_rate": 22050, | |
"preemphasis": 0.0, | |
"ref_level_db": 20, | |
"do_trim_silence": true, | |
"trim_db": 60, | |
"power": 1.5, | |
"griffin_lim_iters": 60, | |
"num_mels": 80, | |
"mel_fmin": 50.0, | |
"mel_fmax": 7600.0, | |
"spec_gain": 1, | |
"signal_norm": true, | |
"min_level_db": -100, | |
"symmetric_norm": true, | |
"max_norm": 4.0, | |
"clip_norm": true, | |
"stats_path": "scale_stats.npy" | |
}, | |
"gst":{ | |
"gst_style_input": null, | |
"gst_embedding_dim": 512, | |
"gst_num_heads": 4, | |
"gst_style_tokens": 10, | |
"gst_use_speaker_embedding": false | |
}, | |
"model": "Tacotron2", | |
"run_name": "kokoro-ddc", | |
"run_description": "tacotron2 with DDC and differential spectral loss.", | |
"batch_size": 32, | |
"eval_batch_size": 16, | |
"mixed_precision": true, | |
"distributed": { | |
"backend": "nccl", | |
"url": "tcp:\/\/localhost:54321" | |
}, | |
"reinit_layers": [], | |
"loss_masking": true, | |
"decoder_loss_alpha": 0.5, | |
"postnet_loss_alpha": 0.25, | |
"postnet_diff_spec_alpha": 0.25, | |
"decoder_diff_spec_alpha": 0.25, | |
"decoder_ssim_alpha": 0.5, | |
"postnet_ssim_alpha": 0.25, | |
"ga_alpha": 5.0, | |
"stopnet_pos_weight": 15.0, | |
"run_eval": true, | |
"test_delay_epochs": 10, | |
"test_sentences_file": null, | |
"noam_schedule": false, | |
"grad_clip": 1.0, | |
"epochs": 1000, | |
"lr": 0.0001, | |
"wd": 0.000001, | |
"warmup_steps": 4000, | |
"seq_len_norm": false, | |
"memory_size": -1, | |
"prenet_type": "original", | |
"prenet_dropout": true, | |
"attention_type": "original", | |
"windowing": false, | |
"use_forward_attn": false, | |
"forward_attn_mask": false, | |
"transition_agent": false, | |
"location_attn": true, | |
"bidirectional_decoder": false, | |
"double_decoder_consistency": true, | |
"ddc_r": 7, | |
"attention_heads": 4, | |
"attention_norm": "sigmoid", | |
"r": 7, | |
"gradual_training": [[0, 7, 64], [1, 5, 64], [50000, 3, 32], [130000, 2, 32], [290000, 1, 32]], | |
"stopnet": true, | |
"separate_stopnet": true, | |
"print_step": 25, | |
"tb_plot_step": 100, | |
"print_eval": false, | |
"save_step": 10000, | |
"checkpoint": true, | |
"keep_all_best": false, | |
"keep_after": 10000, | |
"tb_model_param_stats": false, | |
"text_cleaner": "basic_cleaners", | |
"enable_eos_bos_chars": false, | |
"num_loader_workers": 4, | |
"num_val_loader_workers": 4, | |
"batch_group_size": 4, | |
"min_seq_len": 6, | |
"max_seq_len": 153, | |
"compute_input_seq_cache": false, | |
"use_noise_augment": true, | |
"output_path": "DEFINE THIS", | |
"phoneme_cache_path": "DEFINE THIS", | |
"use_phonemes": true, | |
"phoneme_language": "ja-jp", | |
"characters": { | |
"pad": "_", | |
"eos": "~", | |
"bos": "^", | |
"characters": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!'(),-.:;? ", | |
"punctuations": "!'(),-.:;? ", | |
"phonemes": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | |
}, | |
"use_speaker_embedding": false, | |
"use_gst": false, | |
"use_external_speaker_embedding_file": false, | |
"external_speaker_embedding_file": "../../speakers-vctk-en.json" | |
} |