|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
seed: 914 |
|
__set_seed: !apply:torch.manual_seed [914] |
|
output_folder: results/tdnn_augment/914 |
|
save_folder: results/tdnn_augment/914/save |
|
train_log: results/tdnn_augment/914/train_log.txt |
|
|
|
|
|
data_folder: /fastdata/pcp22wc/audio/VoxCeleb2/dev, /fastdata/pcp22wc/audio/VoxCeleb1/test |
|
train_annotation: results/tdnn_augment/914/save/train.csv |
|
valid_annotation: results/tdnn_augment/914/save/dev.csv |
|
|
|
|
|
rir_folder: /fastdata/pcp22wc/audio |
|
musan_folder: /fastdata/pcp22wc/audio/musan |
|
music_csv: results/tdnn_augment/914/save/music.csv |
|
noise_csv: results/tdnn_augment/914/save/noise.csv |
|
speech_csv: results/tdnn_augment/914/save/speech.csv |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
verification_file: https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt |
|
|
|
skip_prep: true |
|
ckpt_interval_minutes: 15 |
|
|
|
|
|
number_of_epochs: 30 |
|
batch_size: 512 |
|
lr: 0.001 |
|
lr_final: 0.0001 |
|
step_size: 65000 |
|
sample_rate: 16000 |
|
sentence_len: 3.0 |
|
shuffle: true |
|
random_chunk: true |
|
|
|
|
|
n_mels: 80 |
|
deltas: false |
|
|
|
|
|
out_n_neurons: 5994 |
|
|
|
dataloader_options: |
|
batch_size: 512 |
|
shuffle: true |
|
num_workers: 8 |
|
|
|
|
|
compute_features: &id009 !new:speechbrain.lobes.features.Fbank |
|
n_mels: 80 |
|
deltas: false |
|
|
|
embedding_model: &id010 !new:speechbrain.lobes.models.Xvector.Xvector |
|
in_channels: 80 |
|
activation: !name:torch.nn.LeakyReLU |
|
tdnn_blocks: 5 |
|
tdnn_channels: [512, 512, 512, 512, 1500] |
|
tdnn_kernel_sizes: [5, 3, 3, 1, 1] |
|
tdnn_dilations: [1, 2, 3, 1, 1] |
|
lin_neurons: 512 |
|
|
|
classifier: &id011 !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier |
|
input_size: 512 |
|
out_neurons: 5994 |
|
|
|
epoch_counter: &id013 !new:speechbrain.utils.epoch_loop.EpochCounter |
|
limit: 30 |
|
|
|
|
|
augment_wavedrop: &id001 !new:speechbrain.lobes.augment.TimeDomainSpecAugment |
|
sample_rate: 16000 |
|
speeds: [100] |
|
|
|
augment_speed: &id002 !new:speechbrain.lobes.augment.TimeDomainSpecAugment |
|
sample_rate: 16000 |
|
speeds: [95, 100, 105] |
|
|
|
add_rev: &id003 !new:speechbrain.lobes.augment.EnvCorrupt |
|
openrir_folder: /fastdata/pcp22wc/audio |
|
openrir_max_noise_len: 3.0 |
|
reverb_prob: 1.0 |
|
noise_prob: 0.0 |
|
noise_snr_low: 0 |
|
noise_snr_high: 15 |
|
rir_scale_factor: 1.0 |
|
|
|
add_noise: &id004 !new:speechbrain.lobes.augment.EnvCorrupt |
|
openrir_folder: /fastdata/pcp22wc/audio |
|
openrir_max_noise_len: 3.0 |
|
reverb_prob: 0.0 |
|
noise_prob: 1.0 |
|
noise_snr_low: 0 |
|
noise_snr_high: 15 |
|
rir_scale_factor: 1.0 |
|
|
|
add_rev_noise: &id005 !new:speechbrain.lobes.augment.EnvCorrupt |
|
openrir_folder: /fastdata/pcp22wc/audio |
|
openrir_max_noise_len: 3.0 |
|
reverb_prob: 1.0 |
|
noise_prob: 1.0 |
|
noise_snr_low: 0 |
|
noise_snr_high: 15 |
|
rir_scale_factor: 1.0 |
|
|
|
add_noise_musan: &id006 !new:speechbrain.lobes.augment.EnvCorrupt |
|
noise_csv: results/tdnn_augment/914/save/noise.csv |
|
babble_prob: 0.0 |
|
reverb_prob: 0.0 |
|
noise_prob: 1.0 |
|
noise_snr_low: 0 |
|
noise_snr_high: 15 |
|
|
|
add_music_musan: &id007 !new:speechbrain.lobes.augment.EnvCorrupt |
|
noise_csv: results/tdnn_augment/914/save/music.csv |
|
babble_prob: 0.0 |
|
reverb_prob: 0.0 |
|
noise_prob: 1.0 |
|
noise_snr_low: 0 |
|
noise_snr_high: 15 |
|
|
|
add_speech_musan: &id008 !new:speechbrain.lobes.augment.EnvCorrupt |
|
noise_csv: results/tdnn_augment/914/save/speech.csv |
|
babble_prob: 0.0 |
|
reverb_prob: 0.0 |
|
noise_prob: 1.0 |
|
noise_snr_low: 0 |
|
noise_snr_high: 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
augment_pipeline: [*id001, *id002, *id003, *id004, *id005, *id006, *id007, *id008] |
|
concat_augment: true |
|
|
|
mean_var_norm: &id012 !new:speechbrain.processing.features.InputNormalization |
|
|
|
norm_type: sentence |
|
std_norm: false |
|
|
|
modules: |
|
compute_features: *id009 |
|
augment_wavedrop: *id001 |
|
augment_speed: *id002 |
|
add_rev: *id003 |
|
add_noise: *id004 |
|
add_rev_noise: *id005 |
|
add_noise_musan: *id006 |
|
add_music_musan: *id007 |
|
add_speech_musan: *id008 |
|
embedding_model: *id010 |
|
classifier: *id011 |
|
mean_var_norm: *id012 |
|
compute_cost: !new:speechbrain.nnet.losses.LogSoftmaxWrapper |
|
loss_fn: !new:speechbrain.nnet.losses.AdditiveAngularMargin |
|
margin: 0.2 |
|
scale: 30 |
|
|
|
|
|
|
|
opt_class: !name:torch.optim.Adam |
|
lr: 0.001 |
|
weight_decay: 0.000002 |
|
|
|
lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler |
|
initial_value: 0.001 |
|
final_value: 0.0001 |
|
epoch_count: 30 |
|
|
|
|
|
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger |
|
save_file: results/tdnn_augment/914/train_log.txt |
|
|
|
error_stats: !name:speechbrain.utils.metric_stats.MetricStats |
|
metric: !name:speechbrain.nnet.losses.classification_error |
|
reduction: batch |
|
|
|
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer |
|
checkpoints_dir: results/tdnn_augment/914/save |
|
recoverables: |
|
embedding_model: *id010 |
|
classifier: *id011 |
|
normalizer: *id012 |
|
counter: *id013 |
|
|