Spaces:

fishaudio
/

fish-speech-1

Running on A10G

App Files Files Community

fish-speech-1 / fish_speech /configs /text2semantic_finetune.yaml

lengyue233

Init hf space integration

0a3525d verified 8 months ago

raw

history blame

1.86 kB

	defaults:
	- base
	- model@model.model: dual_ar_2_codebook_small
	- _self_

	project: text2semantic_finetune_dual_ar
	max_length: 2048
	ckpt_path: checkpoints/text2semantic-medium-v1-2k.pth
	resume_weights_only: true

	# Lightning Trainer
	trainer:
	accumulate_grad_batches: 1
	gradient_clip_val: 1.0
	gradient_clip_algorithm: 'norm'
	max_steps: 1000
	precision: bf16-true
	limit_val_batches: 10
	val_check_interval: 100

	# Dataset Configuration
	tokenizer:
	_target_: transformers.AutoTokenizer.from_pretrained
	pretrained_model_name_or_path: fishaudio/fish-speech-1

	# Dataset Configuration
	train_dataset:
	_target_: fish_speech.datasets.text.AutoAugTextDataset
	proto_files:
	- data/protos
	tokenizer: ${tokenizer}
	max_length: ${max_length}
	num_codebooks: ${model.model.config.num_codebooks}
	use_speaker: false

	val_dataset:
	_target_: fish_speech.datasets.text.AutoAugTextDataset
	proto_files:
	- data/protos
	tokenizer: ${tokenizer}
	max_length: ${max_length}
	num_codebooks: ${model.model.config.num_codebooks}
	use_speaker: false

	data:
	_target_: fish_speech.datasets.text.TextDataModule
	train_dataset: ${train_dataset}
	val_dataset: ${val_dataset}
	num_workers: 4
	batch_size: 8
	tokenizer: ${tokenizer}
	max_length: ${max_length}

	# Model Configuration
	model:
	_target_: fish_speech.models.text2semantic.TextToSemantic
	model: {}

	optimizer:
	_target_: torch.optim.AdamW
	_partial_: true
	lr: 1e-5
	weight_decay: 0
	betas: [0.9, 0.95]
	eps: 1e-5

	lr_scheduler:
	_target_: torch.optim.lr_scheduler.LambdaLR
	_partial_: true
	lr_lambda:
	_target_: fish_speech.scheduler.get_cosine_schedule_with_warmup_lr_lambda
	_partial_: true
	num_warmup_steps: 100
	num_training_steps: ${trainer.max_steps}

	# Callbacks
	callbacks:
	model_checkpoint:
	every_n_train_steps: 100