{ "project_name": "vits_kbd", "push_to_hub": true, "hub_model_id": "mms_finetune_kbd_murat", "overwrite_output_dir": false, "output_dir": "./tmp/vits_kbd_finetuned_che_model", "dataset_name": "anzorq/kbd_speech", "audio_column_name": "audio", "text_column_name": "transcription", "train_split_name": "train", "eval_split_name": "train", "speaker_id_column_name": "speaker_id", "override_speaker_embeddings": true, "filter_on_speaker_id": 9769125, "full_generation_sample_text": "укъэмыващэрэт тӏэ уэ щӏалэ цӏыкӏур! - жиӏащ лӏыжьым", "max_duration_in_seconds": 20, "min_duration_in_seconds": 1.0, "max_tokens_length": 500, "do_lower_case": true, "model_name_or_path": "anzorq/mms-tts-kbd-discriminator", "preprocessing_num_workers": 4, "do_train": true, "max_steps": 5100, "gradient_accumulation_steps": 1, "gradient_checkpointing": false, "per_device_train_batch_size": 16, "learning_rate": 1e-4, "adam_beta1": 0.8, "adam_beta2": 0.99, "warmup_ratio": 0.01, "group_by_length": false, "do_eval": true, "eval_steps": 100, "per_device_eval_batch_size": 16, "max_eval_samples": 100, "do_step_schedule_per_epoch": true, "weight_disc": 3, "weight_fmaps": 1, "weight_gen": 1, "weight_kl": 1.5, "weight_duration": 1, "weight_mel": 35, "fp16": true, "seed": 456 }