# ############################################################################ | |
# Model: SSL with Wav2Vec (training from scratch) | |
# Authors: Artem Ploujnikov, Yingzhi Wang | |
# # ############################################################################ | |
# Seed needs to be set at top of yaml, before objects with parameters are instantiated | |
seed: 42 | |
__set_seed: !apply:torch.manual_seed [!ref <seed>] | |
train_regression_metric: True | |
batch_size: 4 | |
num_workers: 4 | |
src_sample_rate: 24000 | |
tgt_sample_rate: 16000 | |
contrastive: False | |
lr: 0.00001 | |
number_of_epochs: 10 | |
ckpt_interval_minutes: 15 | |
activation: !name:torch.nn.LeakyReLU | |
d_model: 512 | |
d_ffn: 2048 | |
num_layers: 3 | |
nhead: 4 | |
dropout: 0.5 | |
wavlm_source: microsoft/wavlm-large | |
wavlm_save_path: . | |
splits: ["train", "valid", "test"] | |
subset: "full" | |
skip_prep: False | |
wavlm: !new:speechbrain.lobes.models.huggingface_transformers.wavlm.WavLM | |
source: !ref <wavlm_source> | |
output_norm: True | |
save_path: !ref <wavlm_save_path> | |
model: !new:speechbrain.lobes.models.eval.ssl.TransformerRegression | |
base_model: !ref <wavlm> | |
d_model: !ref <d_model> | |
d_ffn: !ref <d_ffn> | |
num_layers: !ref <num_layers> | |
nhead: !ref <nhead> | |
dropout: !ref <dropout> | |
activation: !ref <activation> | |
modules: | |
model: !ref <model> | |
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer | |
loadables: | |
model: !ref <model> |