sb-ecapa-vggsound-simclr / train_hyp.yaml
gorinars's picture
first commit
60646eb
raw
history blame contribute delete
No virus
6.18 kB
# Generated 2023-02-12 from:
# /home/agorin/cssl_sound/hparams/ecapa_vgg.yaml
# yamllint disable
# File : supclr_train.yaml
# Author : Zhepei Wang <zhepeiw2@illinois.edu>
# Date : 27.01.2022
# Last Modified Date: 31.03.2022
# Last Modified By : Zhepei Wang <zhepeiw2@illinois.edu>
seed: 2022
__set_seed: !apply:torch.manual_seed [2022]
np_rng: !new:numpy.random.RandomState [2022]
resume_interrupt: false
resume_task_idx: 0
balanced_cry: false
time_stamp: 2023-02-12+21-11-02
experiment_name: ecapa_vgg
# output_folder: !ref results/<experiment_name>/<seed>
output_base: results #/home/agorin/datasets/results_cssl
output_folder: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg
train_log: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/train_log.txt
save_folder: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save
# Number of classes
n_classes: 308
num_tasks: 1
# cont learning setup
task_classes: &id001 !apply:utils.prepare_task_classes
num_classes: 308
num_tasks: 1
seed: 2022
replay_num_keep: 0
use_mixup: false
mixup_alpha: 0.4
train_duration: 4.0
# Training parameters
number_of_epochs: 50
batch_size: 128
# lr: 0.001
# base_lr: 0.00000001
# max_lr: !ref <lr>
# step_size: 65000
warmup_epochs: 5
warmup_lr: 0.0
base_lr: 0.015
final_lr: 5e-09
# dataset
sample_rate: 16000
data_folder: /home/agorin/datasets/VGG-Sound
label_encoder_path: ./dataset/label_encoder_vggsound_ordered.txt
prepare_split_csv_fn: !name:dataset.prepare_vggsound2.prepare_split
root_dir: /home/agorin/datasets/VGG-Sound
output_dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save
task_classes: *id001
train_split: 0.8
seed: 2022
train_dataloader_opts:
batch_size: 128
num_workers: 8
shuffle: true
drop_last: true
valid_dataloader_opts:
batch_size: 32
num_workers: 8
# Experiment params
auto_mix_prec: false # True # True # Set it to True for mixed precision
# Feature parameters
n_mels: 80
left_frames: 0
right_frames: 0
deltas: false
amp_to_db: false
normalize: true
win_length: 25
hop_length: 10
n_fft: 400
f_min: 0
use_time_roll: false
use_freq_shift: false
emb_dim: 256
emb_norm_type: bn
proj_norm_type: bn
# augmentation
# time_domain_aug: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
# sample_rate: !ref <sample_rate>
# # drop_chunk_count_high: 2
# # drop_chunk_noise_factor: 0.05
# speeds: [90, 95, 100, 105, 110]
# drop_freq_count_high: 4
# drop_chunk_count_high: 3
# # drop_chunk_length_low: 1000
# # drop_chunk_length_high: 5000
spec_domain_aug: !new:augmentation.TFAugmentation
time_warp: true
time_warp_window: 8
freq_mask: true
freq_mask_width: !tuple (0, 10)
n_freq_mask: 2
time_mask: true
time_mask_width: !tuple (0, 10)
n_time_mask: 2
replace_with_zero: true
time_roll: false
time_roll_limit: !tuple (0, 200)
freq_shift: false
freq_shift_limit: !tuple (-10, 10)
# Functions
compute_features: &id002 !new:speechbrain.lobes.features.Fbank
n_mels: 80
left_frames: 0
right_frames: 0
deltas: false
sample_rate: 16000
n_fft: 400
win_length: 25
hop_length: 10
f_min: 0
mean_var_norm: &id007 !new:speechbrain.processing.features.InputNormalization
norm_type: sentence
std_norm: false
embedding_model: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
input_size: 80
channels: [1024, 1024, 1024, 1024, 3072]
kernel_sizes: [5, 3, 3, 3, 1]
dilations: [1, 2, 3, 4, 1]
groups: [1, 1, 1, 1, 1]
attention_channels: 128
lin_neurons: 256
# embedding_model: !new:models.pann.Cnn14
# mel_bins: !ref <n_mels>
# emb_dim: !ref <emb_dim>
# norm_type: !ref <emb_norm_type>
projector: &id005 !new:models.modules.SimSiamProjector
input_size: 256
hidden_size: 256
output_size: 256
norm_type: bn
predictor: &id006 !new:models.modules.SimSiamPredictor
input_size: 256
hidden_size: 128
norm_type: bn
classifier: &id004 !new:models.modules.Classifier
input_size: 256
output_size: 308
modules:
compute_features: *id002
embedding_model: *id003
classifier: *id004
projector: *id005
predictor: *id006
mean_var_norm: *id007
ssl_weight: 1.
compute_simclr_cost: !new:losses.SimCLRLoss
tau: 0.5
sup_weight: 0.
compute_sup_cost: !new:losses.LogSoftmaxWithProbWrapper
loss_fn: !new:torch.nn.Identity
dist_weight: 0
compute_dist_cost: !new:losses.SimCLRLoss
tau: 0.5
acc_metric: !name:speechbrain.utils.Accuracy.AccuracyStats
# opt_class: !name:torch.optim.Adam
# lr: !ref <base_lr>
# weight_decay: 0.0005
#
# lr_scheduler_fn: !name:speechbrain.nnet.schedulers.CyclicLRScheduler
# base_lr: !ref <final_lr>
# max_lr: !ref <base_lr>
# step_size: 888
opt_class: !name:torch.optim.SGD
lr: 0.015
weight_decay: 0.0005
momentum: 0.9
lr_scheduler_fn: !name:schedulers.SimSiamCosineScheduler
warmup_epochs: 5
warmup_lr: 0.0
num_epochs: 50
base_lr: 0.015
final_lr: 5e-09
steps_per_epoch: 200
constant_predictor_lr: true
epoch_counter_fn: !name:speechbrain.utils.epoch_loop.EpochCounter
limit: 50
datapoint_counter: &id008 !new:utils.DatapointCounter
#prev_checkpointer: null
#prev_checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
# checkpoints_dir: /home/agorin/vgg_offline/2022-04-13+23-33-21_seed_2022+ssl_offline/save/task0
# # Logging + checkpoints
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
checkpoints_dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save
recoverables:
embedding_model: *id003
classifier: *id004
projector: *id005
predictor: *id006
normalizer: *id007
datapoint_counter: *id008
ssl_checkpoints_dir: # /home/agorin/vgg_offline/2022-04-13+23-33-21_seed_2022+ssl_offline/save
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
save_file: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/train_log.txt
# wandb
use_wandb: false
train_log_frequency: 20
wandb_logger_fn: !name:utils.MyWandBLogger
initializer: !name:wandb.init
entity: CAL
project: cssl_sound
name: 2023-02-12+21-11-02+seed_2022+ecapa_vgg
dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg
reinit: true
yaml_config: hparams/vgg/supclr_train.yaml
resume: false