Universal_Source_Separation / uss_material /ss_model=resunet30,querynet=at_soft,data=full.yaml
RSNuts's picture
Upload 6 files
7557af0
---
task_name: audioset
data:
indexes_dict: "hdf5s/indexes/full_train.h5"
sampler_type: balanced_sampler
anchor_segment_detect_mode: max_area # "max_area" | "random"
sample_rate: 32000
frames_per_second: 100
segment_seconds: 2.0
classes_num: 527
augmentation:
match_energy: True
mix_num: 2
sound_event_detection:
model_type: Cnn14_DecisionLevelMax
freeze: True
query_net:
model_type: Cnn14_Wrapper # "Cnn14_Wrapper" | "AdaptiveCnn14_Wrapper"
base_checkpoint_type: "Cnn14"
freeze_base: True
freeze_adaptor: False
bottleneck_type: at_soft # "embedding" | "at_soft"
outputs_num: 527
ss_model:
model_type: ResUNet30
input_channels: 1
output_channels: 1
train:
num_workers: 16
loss_type: l1_wav
optimizer:
optimizer_type: AdamW
learning_rate: 1e-3
lr_lambda_type: constant_warm_up # "constant_warm_up" | "linear_warm_up"
warm_up_steps: 10000
reduce_lr_steps: 1000000
batch_size_per_device: 16
precision: 32
steps_per_epoch: 10000 # Every 10000 steps is called an epoch
evaluate_step_frequency: 20000 # Evaluate every #evaluate_step_frequency steps
save_step_frequency: 100000 # Save every #save_step_frequency steps
early_stop_steps: 10000001
random_seed: 1234
resume_checkpoint_path: ""
evaluate:
balanced_train_eval_dir: "evaluation/audioset/2s_segments_balanced_train"
test_eval_dir: "evaluation/audioset/2s_segments_test"
max_eval_per_class: 10