feature_extractor: class_path: vocos.feature_extractors.MelSpectrogramFeatures init_args: sample_rate: 24000 n_fft: 1024 hop_length: 256 n_mels: 100 padding: center backbone: class_path: vocos.models.VocosBackbone init_args: input_channels: 100 dim: 768 intermediate_dim: 2304 num_layers: 12 head: class_path: vocos.heads.ISTFTHead init_args: dim: 768 n_fft: 1024 hop_length: 256 padding: center