# Generated 2023-02-12 from: # /home/agorin/cssl_sound/hparams/ecapa_vgg.yaml # yamllint disable # File : supclr_train.yaml # Author : Zhepei Wang # Date : 27.01.2022 # Last Modified Date: 31.03.2022 # Last Modified By : Zhepei Wang seed: 2022 __set_seed: !apply:torch.manual_seed [2022] np_rng: !new:numpy.random.RandomState [2022] resume_interrupt: false resume_task_idx: 0 balanced_cry: false time_stamp: 2023-02-12+21-11-02 experiment_name: ecapa_vgg # output_folder: !ref results// output_base: results #/home/agorin/datasets/results_cssl output_folder: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg train_log: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/train_log.txt save_folder: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save # Number of classes n_classes: 308 num_tasks: 1 # cont learning setup task_classes: &id001 !apply:utils.prepare_task_classes num_classes: 308 num_tasks: 1 seed: 2022 replay_num_keep: 0 use_mixup: false mixup_alpha: 0.4 train_duration: 4.0 # Training parameters number_of_epochs: 50 batch_size: 128 # lr: 0.001 # base_lr: 0.00000001 # max_lr: !ref # step_size: 65000 warmup_epochs: 5 warmup_lr: 0.0 base_lr: 0.015 final_lr: 5e-09 # dataset sample_rate: 16000 data_folder: /home/agorin/datasets/VGG-Sound label_encoder_path: ./dataset/label_encoder_vggsound_ordered.txt prepare_split_csv_fn: !name:dataset.prepare_vggsound2.prepare_split root_dir: /home/agorin/datasets/VGG-Sound output_dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save task_classes: *id001 train_split: 0.8 seed: 2022 train_dataloader_opts: batch_size: 128 num_workers: 8 shuffle: true drop_last: true valid_dataloader_opts: batch_size: 32 num_workers: 8 # Experiment params auto_mix_prec: false # True # True # Set it to True for mixed precision # Feature parameters n_mels: 80 left_frames: 0 right_frames: 0 deltas: false amp_to_db: false normalize: true win_length: 25 hop_length: 10 n_fft: 400 f_min: 0 use_time_roll: false use_freq_shift: false emb_dim: 256 emb_norm_type: bn proj_norm_type: bn # augmentation # time_domain_aug: !new:speechbrain.lobes.augment.TimeDomainSpecAugment # sample_rate: !ref # # drop_chunk_count_high: 2 # # drop_chunk_noise_factor: 0.05 # speeds: [90, 95, 100, 105, 110] # drop_freq_count_high: 4 # drop_chunk_count_high: 3 # # drop_chunk_length_low: 1000 # # drop_chunk_length_high: 5000 spec_domain_aug: !new:augmentation.TFAugmentation time_warp: true time_warp_window: 8 freq_mask: true freq_mask_width: !tuple (0, 10) n_freq_mask: 2 time_mask: true time_mask_width: !tuple (0, 10) n_time_mask: 2 replace_with_zero: true time_roll: false time_roll_limit: !tuple (0, 200) freq_shift: false freq_shift_limit: !tuple (-10, 10) # Functions compute_features: &id002 !new:speechbrain.lobes.features.Fbank n_mels: 80 left_frames: 0 right_frames: 0 deltas: false sample_rate: 16000 n_fft: 400 win_length: 25 hop_length: 10 f_min: 0 mean_var_norm: &id007 !new:speechbrain.processing.features.InputNormalization norm_type: sentence std_norm: false embedding_model: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN input_size: 80 channels: [1024, 1024, 1024, 1024, 3072] kernel_sizes: [5, 3, 3, 3, 1] dilations: [1, 2, 3, 4, 1] groups: [1, 1, 1, 1, 1] attention_channels: 128 lin_neurons: 256 # embedding_model: !new:models.pann.Cnn14 # mel_bins: !ref # emb_dim: !ref # norm_type: !ref projector: &id005 !new:models.modules.SimSiamProjector input_size: 256 hidden_size: 256 output_size: 256 norm_type: bn predictor: &id006 !new:models.modules.SimSiamPredictor input_size: 256 hidden_size: 128 norm_type: bn classifier: &id004 !new:models.modules.Classifier input_size: 256 output_size: 308 modules: compute_features: *id002 embedding_model: *id003 classifier: *id004 projector: *id005 predictor: *id006 mean_var_norm: *id007 ssl_weight: 1. compute_simclr_cost: !new:losses.SimCLRLoss tau: 0.5 sup_weight: 0. compute_sup_cost: !new:losses.LogSoftmaxWithProbWrapper loss_fn: !new:torch.nn.Identity dist_weight: 0 compute_dist_cost: !new:losses.SimCLRLoss tau: 0.5 acc_metric: !name:speechbrain.utils.Accuracy.AccuracyStats # opt_class: !name:torch.optim.Adam # lr: !ref # weight_decay: 0.0005 # # lr_scheduler_fn: !name:speechbrain.nnet.schedulers.CyclicLRScheduler # base_lr: !ref # max_lr: !ref # step_size: 888 opt_class: !name:torch.optim.SGD lr: 0.015 weight_decay: 0.0005 momentum: 0.9 lr_scheduler_fn: !name:schedulers.SimSiamCosineScheduler warmup_epochs: 5 warmup_lr: 0.0 num_epochs: 50 base_lr: 0.015 final_lr: 5e-09 steps_per_epoch: 200 constant_predictor_lr: true epoch_counter_fn: !name:speechbrain.utils.epoch_loop.EpochCounter limit: 50 datapoint_counter: &id008 !new:utils.DatapointCounter #prev_checkpointer: null #prev_checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer # checkpoints_dir: /home/agorin/vgg_offline/2022-04-13+23-33-21_seed_2022+ssl_offline/save/task0 # # Logging + checkpoints checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer checkpoints_dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/save recoverables: embedding_model: *id003 classifier: *id004 projector: *id005 predictor: *id006 normalizer: *id007 datapoint_counter: *id008 ssl_checkpoints_dir: # /home/agorin/vgg_offline/2022-04-13+23-33-21_seed_2022+ssl_offline/save train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger save_file: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg/train_log.txt # wandb use_wandb: false train_log_frequency: 20 wandb_logger_fn: !name:utils.MyWandBLogger initializer: !name:wandb.init entity: CAL project: cssl_sound name: 2023-02-12+21-11-02+seed_2022+ecapa_vgg dir: results/2023-02-12+21-11-02_seed_2022+ecapa_vgg reinit: true yaml_config: hparams/vgg/supclr_train.yaml resume: false