# ############################################################################ # Model: ECAPA pre-trained with SimCLR using VGGSound # ############################################################################ # Feature parameters n_mels: 80 # Pretrain folder (HuggingFace) pretrained_path: gorinars/sb-ecapa-vggsound-simclr # Output parameters out_n_neurons: 308 # Model params compute_features: !new:speechbrain.lobes.features.Fbank n_mels: 80 left_frames: 0 right_frames: 0 deltas: false sample_rate: 16000 n_fft: 400 win_length: 25 hop_length: 10 f_min: 0 mean_var_norm: !new:speechbrain.processing.features.InputNormalization norm_type: sentence std_norm: False embedding_model: !new:speechbrain.nnet.containers.LengthsCapableSequential input_shape: [null, 1, null] embedding: !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN input_size: !ref channels: [1024, 1024, 1024, 1024, 3072] kernel_sizes: [5, 3, 3, 3, 1] dilations: [1, 2, 3, 4, 1] groups: [1, 1, 1, 1, 1] attention_channels: 128 lin_neurons: 256 projector: !new:crytorch.models.components.pann.SimSiamProjector input_size: 256 hidden_size: 256 output_size: 256 norm_type: bn classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier input_size: 256 out_neurons: !ref modules: compute_features: !ref mean_var_norm: !ref embedding_model: !ref classifier: !ref label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: embedding: !ref projector: !ref paths: embedding: !ref /embedding_model.ckpt projector: !ref /projector.ckpt