Spaces:
Running
on
L4
Running
on
L4
defaults: | |
- base | |
- model@model.model: dual_ar_8_codebook_small | |
- _self_ | |
project: text2semantic_sft_medium_dual_ar | |
max_length: 4096 | |
ckpt_path: results/text2semantic_pretrain_medium_dual_ar/checkpoints/step_000060000.ckpt | |
resume_weights_only: true | |
# Lightning Trainer | |
trainer: | |
accumulate_grad_batches: 1 | |
gradient_clip_val: 1.0 | |
gradient_clip_algorithm: 'norm' | |
max_steps: 10_000 | |
precision: bf16-true | |
limit_val_batches: 10 | |
val_check_interval: 500 | |
# Dataset Configuration | |
tokenizer: | |
_target_: transformers.AutoTokenizer.from_pretrained | |
pretrained_model_name_or_path: fishaudio/speech-lm-v1 | |
# Dataset Configuration | |
train_dataset: | |
_target_: fish_speech.datasets.text.AutoAugTextDataset | |
use_data_server: false | |
proto_files: | |
- data/protos/sft/train_Genshin.protos | |
- data/protos/sft/sft.protos | |
tokenizer: ${tokenizer} | |
max_length: ${max_length} | |
num_codebooks: ${model.model.config.num_codebooks} | |
use_speaker: false | |
phones_prob: 0.5 | |
interactive_prob: 0.5 | |
val_dataset: | |
_target_: fish_speech.datasets.text.AutoAugTextDataset | |
use_data_server: false | |
proto_files: | |
- data/protos/sft/val_Genshin.protos | |
tokenizer: ${tokenizer} | |
max_length: ${max_length} | |
num_codebooks: ${model.model.config.num_codebooks} | |
use_speaker: false | |
phones_prob: 0.5 | |
interactive_prob: 0.5 | |
data: | |
_target_: fish_speech.datasets.text.TextDataModule | |
train_dataset: ${train_dataset} | |
val_dataset: ${val_dataset} | |
num_workers: 4 | |
batch_size: 8 | |
tokenizer: ${tokenizer} | |
max_length: ${max_length} | |
# Model Configuration | |
model: | |
_target_: fish_speech.models.text2semantic.TextToSemantic | |
model: {} | |
optimizer: | |
_target_: torch.optim.AdamW | |
_partial_: true | |
lr: 4e-5 | |
weight_decay: 0 | |
betas: [0.9, 0.95] | |
eps: 1e-5 | |
lr_scheduler: | |
_target_: torch.optim.lr_scheduler.LambdaLR | |
_partial_: true | |
lr_lambda: | |
_target_: fish_speech.scheduler.get_cosine_schedule_with_warmup_lr_lambda | |
_partial_: true | |
num_warmup_steps: 100 | |
num_training_steps: ${trainer.max_steps} | |
final_lr_ratio: 0 | |
callbacks: | |
model_checkpoint: | |
every_n_train_steps: 1000 | |
save_top_k: 10 | |