fish-speech-1 / fish_speech /configs /text2semantic_sft.yaml
lengyue233's picture
Init hf space integration
0a3525d verified
defaults:
- base
- model@model.model: dual_ar_8_codebook_small
- _self_
project: text2semantic_sft_medium_dual_ar
max_length: 4096
ckpt_path: results/text2semantic_pretrain_medium_dual_ar/checkpoints/step_000060000.ckpt
resume_weights_only: true
# Lightning Trainer
trainer:
accumulate_grad_batches: 1
gradient_clip_val: 1.0
gradient_clip_algorithm: 'norm'
max_steps: 10_000
precision: bf16-true
limit_val_batches: 10
val_check_interval: 500
# Dataset Configuration
tokenizer:
_target_: transformers.AutoTokenizer.from_pretrained
pretrained_model_name_or_path: fishaudio/speech-lm-v1
# Dataset Configuration
train_dataset:
_target_: fish_speech.datasets.text.AutoAugTextDataset
use_data_server: false
proto_files:
- data/protos/sft/train_Genshin.protos
- data/protos/sft/sft.protos
tokenizer: ${tokenizer}
max_length: ${max_length}
num_codebooks: ${model.model.config.num_codebooks}
use_speaker: false
phones_prob: 0.5
interactive_prob: 0.5
val_dataset:
_target_: fish_speech.datasets.text.AutoAugTextDataset
use_data_server: false
proto_files:
- data/protos/sft/val_Genshin.protos
tokenizer: ${tokenizer}
max_length: ${max_length}
num_codebooks: ${model.model.config.num_codebooks}
use_speaker: false
phones_prob: 0.5
interactive_prob: 0.5
data:
_target_: fish_speech.datasets.text.TextDataModule
train_dataset: ${train_dataset}
val_dataset: ${val_dataset}
num_workers: 4
batch_size: 8
tokenizer: ${tokenizer}
max_length: ${max_length}
# Model Configuration
model:
_target_: fish_speech.models.text2semantic.TextToSemantic
model: {}
optimizer:
_target_: torch.optim.AdamW
_partial_: true
lr: 4e-5
weight_decay: 0
betas: [0.9, 0.95]
eps: 1e-5
lr_scheduler:
_target_: torch.optim.lr_scheduler.LambdaLR
_partial_: true
lr_lambda:
_target_: fish_speech.scheduler.get_cosine_schedule_with_warmup_lr_lambda
_partial_: true
num_warmup_steps: 100
num_training_steps: ${trainer.max_steps}
final_lr_ratio: 0
callbacks:
model_checkpoint:
every_n_train_steps: 1000
save_top_k: 10