bambara-tts / config.json
oza75's picture
Rename config_enhanced_01.json to config.json
ddb19a4 verified
raw history blame
No virus
12.2 kB
{
"output_path": "/workspace/coqui-TTS/finetuning/bambara/run/training",
"logger_uri": null,
"run_name": "xtts_lr_8e-06",
"project_name": "BAM_FINE_TUNING_3",
"run_description": "\n GPT XTTS training\n ",
"print_step": 50,
"plot_step": 100,
"model_param_stats": false,
"wandb_entity": null,
"dashboard_logger": "wandb",
"save_on_interrupt": true,
"log_model_step": 1000,
"save_step": 10000,
"save_n_checkpoints": 1,
"save_checkpoints": true,
"save_all_best": false,
"save_best_after": 0,
"target_loss": null,
"print_eval": false,
"test_delay_epochs": 0,
"run_eval": true,
"run_eval_steps": null,
"distributed_backend": "nccl",
"distributed_url": "tcp://localhost:54321",
"mixed_precision": false,
"precision": "fp16",
"epochs": 40,
"batch_size": 3,
"eval_batch_size": 3,
"grad_clip": 0.0,
"scheduler_after_epoch": true,
"lr": 8e-06,
"optimizer": "AdamW",
"optimizer_params": {
"betas": [
0.9,
0.96
],
"eps": 1e-08,
"weight_decay": 0.01
},
"lr_scheduler": "MultiStepLR",
"lr_scheduler_params": {
"milestones": [
900000,
2700000,
5400000
],
"gamma": 0.5,
"last_epoch": -1
},
"use_grad_scaler": false,
"allow_tf32": false,
"cudnn_enable": true,
"cudnn_deterministic": false,
"cudnn_benchmark": false,
"training_seed": 1,
"model": "xtts",
"num_loader_workers": 8,
"num_eval_loader_workers": 0,
"use_noise_augment": false,
"audio": {
"sample_rate": 22050,
"output_sample_rate": 24000,
"dvae_sample_rate": 22050
},
"use_phonemes": false,
"phonemizer": null,
"phoneme_language": null,
"compute_input_seq_cache": false,
"text_cleaner": null,
"enable_eos_bos_chars": false,
"test_sentences_file": "",
"phoneme_cache_path": null,
"characters": null,
"add_blank": false,
"batch_group_size": 48,
"loss_masking": null,
"min_audio_len": 1,
"max_audio_len": Infinity,
"min_text_len": 1,
"max_text_len": Infinity,
"compute_f0": false,
"compute_energy": false,
"compute_linear_spec": false,
"precompute_num_workers": 0,
"start_by_longest": false,
"shuffle": false,
"drop_last": false,
"datasets": [
{
"formatter": "",
"dataset_name": "",
"path": "",
"meta_file_train": "",
"ignored_speakers": null,
"language": "",
"phonemizer": "",
"meta_file_val": "",
"meta_file_attn_mask": ""
}
],
"test_sentences": [
{
"text": "Dumuni b\u025b taa farikolo fan jum\u025bn ?",
"speaker_wav": [
"./reference_audios/bm/speaker_10/0.wav",
"./reference_audios/bm/speaker_10/1.wav",
"./reference_audios/bm/speaker_10/3.wav",
"./reference_audios/bm/speaker_10/4.wav",
"./reference_audios/bm/speaker_10/5.wav",
"./reference_audios/bm/speaker_10/6.wav",
"./reference_audios/bm/speaker_10/7.wav",
"./reference_audios/bm/speaker_10/8.wav",
"./reference_audios/bm/speaker_10/9.wav"
],
"language": "bm"
},
{
"text": "Ni sumaya furak\u025bli damin\u025bna, an ka kan ka to ka fura ta ka taa \u0272\u025b, walima ka to ka pikiri ni s\u0254r\u0254muw k\u025b ka taa \u0272\u025b fo sumaya ka ban pew.",
"speaker_wav": [
"./reference_audios/bm/speaker_14/0.wav",
"./reference_audios/bm/speaker_14/1.wav",
"./reference_audios/bm/speaker_14/2.wav",
"./reference_audios/bm/speaker_14/3.wav",
"./reference_audios/bm/speaker_14/4.wav",
"./reference_audios/bm/speaker_14/5.wav",
"./reference_audios/bm/speaker_14/6.wav",
"./reference_audios/bm/speaker_14/7.wav",
"./reference_audios/bm/speaker_14/8.wav"
],
"language": "bm"
},
{
"text": "A ko k\u025bra degunba ye jamanadenw ma k\u025br\u025bnk\u025br\u025bnna demis\u025bn finitiniw ni m\u0254g\u0254 k\u0254r\u0254baw.",
"speaker_wav": [
"./reference_audios/bm/speaker_15/0.wav",
"./reference_audios/bm/speaker_15/1.wav",
"./reference_audios/bm/speaker_15/2.wav",
"./reference_audios/bm/speaker_15/3.wav",
"./reference_audios/bm/speaker_15/4.wav",
"./reference_audios/bm/speaker_15/6.wav",
"./reference_audios/bm/speaker_15/7.wav"
],
"language": "bm"
},
{
"text": "Silam\u025b dannabaaw Burkina Faso la, u ye Eid El Fitr seli k\u025b seli la min k\u025bra sun kalo laban don na .",
"speaker_wav": [
"./reference_audios/bm/speaker_27/0.wav",
"./reference_audios/bm/speaker_27/1.wav",
"./reference_audios/bm/speaker_27/2.wav",
"./reference_audios/bm/speaker_27/3.wav",
"./reference_audios/bm/speaker_27/7.wav",
"./reference_audios/bm/speaker_27/8.wav",
"./reference_audios/bm/speaker_27/9.wav"
],
"language": "bm"
},
{
"text": "le texte devra attendre l\u2019avis du Conseil constitutionnel avant son examen \u00e0 l\u2019Assembl\u00e9e.",
"speaker_wav": [
"./reference_audios/fr/speaker_100/0.wav",
"./reference_audios/fr/speaker_100/1.wav",
"./reference_audios/fr/speaker_100/2.wav",
"./reference_audios/fr/speaker_100/3.wav",
"./reference_audios/fr/speaker_100/4.wav",
"./reference_audios/fr/speaker_100/5.wav",
"./reference_audios/fr/speaker_100/6.wav",
"./reference_audios/fr/speaker_100/7.wav",
"./reference_audios/fr/speaker_100/8.wav",
"./reference_audios/fr/speaker_100/9.wav"
],
"language": "fr"
},
{
"text": "Below are benchmarks for downsampling and upsampling waveforms between two pairs of sampling rates.",
"speaker_wav": [
"./reference_audios/en/speaker_98/0.wav",
"./reference_audios/en/speaker_98/1.wav",
"./reference_audios/en/speaker_98/2.wav",
"./reference_audios/en/speaker_98/3.wav",
"./reference_audios/en/speaker_98/4.wav",
"./reference_audios/en/speaker_98/5.wav",
"./reference_audios/en/speaker_98/6.wav",
"./reference_audios/en/speaker_98/7.wav",
"./reference_audios/en/speaker_98/8.wav",
"./reference_audios/en/speaker_98/9.wav"
],
"language": "en"
},
{
"text": "La convivencia se asienta en Euskadi con la asignatura pendiente de la memoria",
"speaker_wav": [
"./reference_audios/es/speaker_47/0.wav",
"./reference_audios/es/speaker_47/1.wav",
"./reference_audios/es/speaker_47/2.wav",
"./reference_audios/es/speaker_47/3.wav",
"./reference_audios/es/speaker_47/4.wav",
"./reference_audios/es/speaker_47/5.wav",
"./reference_audios/es/speaker_47/6.wav",
"./reference_audios/es/speaker_47/7.wav",
"./reference_audios/es/speaker_47/8.wav",
"./reference_audios/es/speaker_47/9.wav"
],
"language": "es"
},
{
"text": "Quei mariuoli di troppo alla corte dell\u2019ex sceriffo. Cos\u00ec il sistema Emiliano sta affondando la Puglia",
"speaker_wav": [
"./reference_audios/it/speaker_32/0.wav",
"./reference_audios/it/speaker_32/1.wav",
"./reference_audios/it/speaker_32/2.wav",
"./reference_audios/it/speaker_32/3.wav",
"./reference_audios/it/speaker_32/4.wav",
"./reference_audios/it/speaker_32/5.wav",
"./reference_audios/it/speaker_32/6.wav",
"./reference_audios/it/speaker_32/7.wav",
"./reference_audios/it/speaker_32/8.wav",
"./reference_audios/it/speaker_32/9.wav"
],
"language": "it"
},
{
"text": "Les Insoumis ont obtenu ce mardi 9 avril que le texte soit retir\u00e9 de l\u2019ordre du jour de l\u2019Assembl\u00e9e nationale en attendant un avis du Conseil constitutionnel.",
"speaker_wav": [
"./reference_audios/fr/speaker_100/0.wav",
"./reference_audios/fr/speaker_100/1.wav",
"./reference_audios/fr/speaker_100/2.wav",
"./reference_audios/fr/speaker_100/3.wav",
"./reference_audios/fr/speaker_100/4.wav",
"./reference_audios/fr/speaker_100/5.wav",
"./reference_audios/fr/speaker_100/6.wav",
"./reference_audios/fr/speaker_100/7.wav",
"./reference_audios/fr/speaker_100/8.wav",
"./reference_audios/fr/speaker_100/9.wav"
],
"language": "fr"
}
],
"eval_split_max_size": 256,
"eval_split_size": 0.01,
"use_speaker_weighted_sampler": false,
"speaker_weighted_sampler_alpha": 1.0,
"use_language_weighted_sampler": false,
"language_weighted_sampler_alpha": 1.0,
"use_length_weighted_sampler": false,
"length_weighted_sampler_alpha": 1.0,
"model_args": {
"gpt_batch_size": 1,
"enable_redaction": false,
"kv_cache": true,
"gpt_checkpoint": "",
"clvp_checkpoint": null,
"decoder_checkpoint": null,
"num_chars": 255,
"tokenizer_file": "./vocab.json",
"gpt_max_audio_tokens": 605,
"gpt_max_text_tokens": 402,
"gpt_max_prompt_tokens": 70,
"gpt_layers": 30,
"gpt_n_model_channels": 1024,
"gpt_n_heads": 16,
"gpt_number_text_tokens": 8130,
"gpt_start_text_token": 261,
"gpt_stop_text_token": 0,
"gpt_num_audio_tokens": 1026,
"gpt_start_audio_token": 1024,
"gpt_stop_audio_token": 1025,
"gpt_code_stride_len": 1024,
"gpt_use_masking_gt_prompt_approach": true,
"gpt_use_perceiver_resampler": true,
"input_sample_rate": 22050,
"output_sample_rate": 24000,
"output_hop_length": 256,
"decoder_input_dim": 1024,
"d_vector_dim": 512,
"cond_d_vector_in_each_upsampling_layer": true,
"duration_const": 102400,
"min_conditioning_length": 66150,
"max_conditioning_length": 132300,
"gpt_loss_text_ce_weight": 0.01,
"gpt_loss_mel_ce_weight": 1.0,
"debug_loading_failures": false,
"max_wav_length": 255995,
"max_text_length": 200,
"mel_norm_file": "mel_stats.pth",
"dvae_checkpoint": "dvae.pth",
"xtts_checkpoint": "model.pth",
"vocoder": ""
},
"model_dir": null,
"languages": [
"en",
"es",
"fr",
"de",
"it",
"pt",
"pl",
"tr",
"ru",
"nl",
"cs",
"ar",
"zh-cn",
"hu",
"ko",
"ja",
"hi",
"bm"
],
"temperature": 0.85,
"length_penalty": 1.0,
"repetition_penalty": 2.0,
"top_k": 50,
"top_p": 0.85,
"num_gpt_outputs": 1,
"gpt_cond_len": 12,
"gpt_cond_chunk_len": 4,
"max_ref_len": 10,
"sound_norm_refs": false,
"optimizer_wd_only_on_weights": true,
"weighted_loss_attrs": {},
"weighted_loss_multipliers": {},
"transliterate_bambara": false,
"github_branch": "* dev"
}