MelodyFlow / config /solver /magnet /magnet_32khz.yaml
Gael Le Lan
Initial commit
9d0d223
raw
history blame
1.94 kB
# @package __global__
# This is the training loop solver
# for the base MusicGen model (text-to-music)
# on monophonic audio sampled at 32 kHz
defaults:
- musicgen/default
- /model: lm/musicgen_lm
- override /dset: audio/default
- _self_
lm_model: transformer_lm_magnet
solver: magnet
autocast: true
autocast_dtype: float16
# EnCodec large trained on mono-channel music audio sampled at 32khz
# with a total stride of 640 leading to 50 frames/s.
# rvq.n_q=4, rvq.bins=2048, no quantization dropout
# (transformer_lm card and n_q must be compatible)
compression_model_checkpoint: //pretrained/facebook/encodec_32khz
efficient_attention_backend: xformers # restricted attention implementation supports only xformers at the moment
channels: 1
sample_rate: 32000
deadlock:
use: true # deadlock detection
dataset:
batch_size: 192 # 32 GPUs
sample_on_weight: false # Uniform sampling all the way
sample_on_duration: false # Uniform sampling all the way
optim:
epochs: 500
optimizer: dadam
lr: 1
ema:
use: true
updates: 10
device: cuda
logging:
log_tensorboard: true
schedule:
lr_scheduler: cosine
cosine:
warmup: 4000
lr_min_ratio: 0.0
cycle_length: 1.0
codebooks_pattern:
modeling: parallel
parallel:
empty_initial: -1
transformer_lm:
card: 2048
causal: false
subcodes_context: 5
compression_model_framerate: 50 # NOTE: Must match the actual frame rate of the used compression model
segment_duration: 0
span_len: -1
masking:
span_len: 3
generate:
lm:
max_prompt_len: null
max_gen_len: null
remove_prompts: false
use_sampling: true
temp: 3.0
top_k: 0
top_p: 0.9
max_cfg_coef: 10.0
min_cfg_coef: 1.0
decoding_steps: [60, 10, 10, 10]
anneal_temp: true
span_scoring: 'max'
span_arrangement: 'nonoverlap'
prompted_samples: false
samples:
prompted: false
unprompted: true