File size: 1,736 Bytes
3045765 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
{
"saving_path": "/home/ubuntu/experiments/a2s_mls",
"resume_checkpoint": null,
"vocoder_type": "SPEECHTOKENIZER",
"vocoder_config_path": null,
"vocoder_ckpt_path": null,
"metapath": [
"/var/data_mls/train.json"
],
"val_metapath": [
"/var/data_mls/test.json"
],
"pretrained_path": null,
"speaker_embedding_dir": null,
"sampledir": "/home/ubuntu/experiments/a2s_mls",
"lr": 0.0005,
"batch_size": 100.0,
"train_bucket_size": 8192,
"training_step": 800000,
"optim_flat_percent": 0.0,
"warmup_step": 10000,
"adam_beta1": 0.9,
"adam_beta2": 0.98,
"ffd_size": 1024,
"hidden_size": 1024,
"enc_nlayers": 8,
"dec_nlayers": 6,
"nheads": 8,
"dropout": 0.1,
"depthwise_conv_kernel_size": 5,
"aligner_softmax_temp": 1.0,
"layer_norm_eps": 1e-05,
"use_sem_tokens": true,
"use_spkr_emb": false,
"use_text_emb": false,
"fairseq": false,
"only_inference": false,
"speaker_embed_dropout": 0.05,
"label_smoothing": 0.0,
"val_check_interval": 1,
"max_dataset_samples": -1,
"check_val_every_n_epoch": 1,
"precision": "bf16",
"nworkers": 12,
"distributed": true,
"accelerator": "gpu",
"version": null,
"accumulate_grad_batches": 1,
"sagemaker": false,
"use_repetition_token": false,
"use_repetition_gating": false,
"repetition_penalty": 1.0,
"sampling_temperature": 1.0,
"top_k": -1,
"min_top_k": 3,
"top_p": 0.8,
"sample_num": 4,
"length_penalty_max_length": 150,
"length_penalty_max_prob": 0.95,
"max_input_length": 2048,
"max_output_length": 2000,
"phone_context_window": 3,
"sample_rate": 16000,
"n_codes": 1024,
"n_cluster_groups": 7,
"first_n_lvls": 7,
"use_pretrained_ckpt_cfg": false,
"n_semantic_codes": 1024
}
|