File size: 4,582 Bytes
a370392 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
# Generated 2021-06-27 from:
# /home/mila/s/subakany/speechbrain_new/recipes/LibriMix/separation/hparams/sepformer-libri3mix.yaml
# yamllint disable
# ################################
# Model: SepFormer for source separation
# https://arxiv.org/abs/2010.13154
# Dataset : Libri3Mix
# ################################
#
# Basic parameters
# Seed needs to be set at top of yaml, before objects with parameters are made
#
seed: 111
__set_seed: !apply:torch.manual_seed [111]
# Data params
# e.g. '/yourpath/Libri3Mix/train-clean-360/'
# the data folder is needed even if dynamic mixing is applied
data_folder: /miniscratch/subakany/LibriMixData/Libri3Mix/
# This is needed only if dynamic mixing is applied
base_folder_dm: /miniscratch/subakany/LibriMixData/LibriSpeech/train-clean-360_processed/
experiment_name: sepformer-libri3mix
output_folder: results/sepformer-libri3mix/111
train_log: results/sepformer-libri3mix/111/train_log.txt
save_folder: results/sepformer-libri3mix/111/save
train_data: results/sepformer-libri3mix/111/save/libri3mix_train-360.csv
valid_data: results/sepformer-libri3mix/111/save/libri3mix_dev.csv
test_data: results/sepformer-libri3mix/111/save/libri3mix_test.csv
skip_prep: false
ckpt_interval_minutes: 60
# Experiment params
auto_mix_prec: true # Set it to True for mixed precision
test_only: true
num_spks: 3
progressbar: true
save_audio: false # Save estimated sources on disk
sample_rate: 8000
# Training parameters
N_epochs: 200
batch_size: 1
lr: 0.00015
clip_grad_norm: 5
loss_upper_lim: 999999 # this is the upper limit for an acceptable loss
# if True, the training sequences are cut to a specified length
limit_training_signal_len: false
# this is the length of sequences if we choose to limit
# the signal length of training sequences
training_signal_len: 32000000
# Set it to True to dynamically create mixtures at training time
dynamic_mixing: true
use_wham_noise: false
# Parameters for data augmentation
use_wavedrop: false
use_speedperturb: true
use_speedperturb_sameforeachsource: false
use_rand_shift: false
min_shift: -8000
max_shift: 8000
speedperturb: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
perturb_prob: 1.0
drop_freq_prob: 0.0
drop_chunk_prob: 0.0
sample_rate: 8000
speeds: [95, 100, 105]
wavedrop: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
perturb_prob: 0.0
drop_freq_prob: 1.0
drop_chunk_prob: 1.0
sample_rate: 8000
# loss thresholding -- this thresholds the training loss
threshold_byloss: true
threshold: -30
# Encoder parameters
N_encoder_out: 256
out_channels: 256
kernel_size: 16
kernel_stride: 8
# Dataloader options
dataloader_opts:
batch_size: 1
num_workers: 3
# Specifying the network
Encoder: &id003 !new:speechbrain.lobes.models.dual_path.Encoder
kernel_size: 16
out_channels: 256
SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
num_layers: 8
d_model: 256
nhead: 8
d_ffn: 1024
dropout: 0
use_positional_encoding: true
norm_before: true
SBtfinter: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
num_layers: 8
d_model: 256
nhead: 8
d_ffn: 1024
dropout: 0
use_positional_encoding: true
norm_before: true
MaskNet: &id005 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
num_spks: 3
in_channels: 256
out_channels: 256
num_layers: 2
K: 250
intra_model: *id001
inter_model: *id002
norm: ln
linear_layer_after_inter_intra: false
skip_around_intra: true
Decoder: &id004 !new:speechbrain.lobes.models.dual_path.Decoder
in_channels: 256
out_channels: 1
kernel_size: 16
stride: 8
bias: false
optimizer: !name:torch.optim.Adam
lr: 0.00015
weight_decay: 0
loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
lr_scheduler: !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
factor: 0.5
patience: 2
dont_halve_until_epoch: 5
epoch_counter: &id006 !new:speechbrain.utils.epoch_loop.EpochCounter
# lr_scheduler: !ref <lr_scheduler>
limit: 200
modules:
encoder: *id003
decoder: *id004
masknet: *id005
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
checkpoints_dir: results/sepformer-libri3mix/111/save
recoverables:
encoder: *id003
decoder: *id004
masknet: *id005
counter: *id006
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
save_file: results/sepformer-libri3mix/111/train_log.txt
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
loadables:
encoder: !ref <Encoder>
masknet: !ref <MaskNet>
decoder: !ref <Decoder>
|