Spaces:
Running
on
Zero
Running
on
Zero
# @package __global__ | |
defaults: | |
- /solver/default | |
- /augmentations/default | |
- override /dset: audio/example | |
- _self_ | |
solver: watermarking # standard name to load the solver using builders | |
sample_rate: ??? | |
channels: ??? | |
# all the defaults form compression | |
losses: | |
adv: 4. | |
feat: 4. | |
l1: 0.1 | |
mel: 0.0 | |
msspec: 2.0 | |
sisnr: 0.0 | |
wm_detection: 1.0 # loss for first 2 bits cannot be 0 | |
wm_mb: 1.0 # loss for the rest of the bits (wm message) | |
tf_loudnessratio: 10.0 | |
balancer: | |
balance_grads: true | |
ema_decay: 0.999 | |
per_batch_item: true | |
total_norm: 1. | |
crop: | |
prob: 0.4 | |
shuffle_prob: 0.2 | |
pad_prob: 0.2 # shuffle_prob + pad_prob + prob <= 1 | |
size: 0.5 | |
max_n_windows: 5 | |
adversarial: | |
every: 1 | |
adversaries: [msstftd] | |
adv_loss: hinge | |
feat_loss: l1 | |
tf_loudnessratio: | |
sample_rate: ${sample_rate} | |
segment: 0.5 | |
overlap: 0.5 | |
n_bands: 16 | |
temperature: 1.0 | |
# watermarking: audioseal | |
# losses hyperparameters | |
l1: {} | |
l2: {} | |
wm_detection: | |
p_weight: 1 | |
n_weight: 1 | |
wm_mb: | |
loss_type: bce # loss between decoded and original | |
temperature: 0.1 # decoded is divided by temperature before loss computation | |
spec_range: | |
n_fft: 2048 | |
min_frequency: 300.0 | |
max_frequency: 15000.0 | |
sample_rate: ${sample_rate} | |
spec_entropy_range: | |
n_fft: 2048 | |
min_frequency: 300.0 | |
max_frequency: 15000.0 | |
sample_rate: ${sample_rate} | |
mrstft: | |
factor_sc: .5 | |
factor_mag: .5 | |
normalized: false | |
mel: | |
sample_rate: ${sample_rate} | |
n_fft: 1024 | |
hop_length: 256 | |
win_length: 1024 | |
n_mels: 64 | |
f_min: 64 | |
f_max: null | |
normalized: false | |
floor_level: 1e-5 | |
sisnr: | |
sample_rate: ${sample_rate} | |
segment: 5. | |
msspec: | |
sample_rate: ${sample_rate} | |
range_start: 6 | |
range_end: 11 | |
n_mels: 64 | |
f_min: 64 | |
f_max: null | |
normalized: true | |
alphas: false | |
floor_level: 1e-5 | |
# metrics | |
metrics: | |
visqol: | |
mode: audio | |
bin: null # path to visqol install | |
model: tcdaudio14_aacvopus_coresv_svrnsim_n.68_g.01_c1.model # visqol v3 | |
# adversaries hyperparameters | |
msstftd: | |
in_channels: 1 | |
out_channels: 1 | |
filters: 32 | |
norm: weight_norm | |
n_ffts: [1024, 2048, 512, 256, 128] | |
hop_lengths: [256, 512, 128, 64, 32] | |
win_lengths: [1024, 2048, 512, 256, 128] | |
activation: LeakyReLU | |
activation_params: { negative_slope: 0.3 } | |
msd: | |
in_channels: 1 | |
out_channels: 1 | |
scale_norms: [spectral_norm, weight_norm, weight_norm] | |
kernel_sizes: [5, 3] | |
filters: 16 | |
max_filters: 1024 | |
downsample_scales: [4, 4, 4, 4] | |
inner_kernel_sizes: null | |
groups: [4, 4, 4, 4] | |
strides: null | |
paddings: null | |
activation: LeakyReLU | |
activation_params: { negative_slope: 0.3 } | |
mpd: | |
in_channels: 1 | |
out_channels: 1 | |
periods: [2, 3, 5, 7, 11] | |
n_layers: 5 | |
kernel_size: 5 | |
stride: 3 | |
filters: 8 | |
filter_scales: 4 | |
max_filters: 1024 | |
activation: LeakyReLU | |
activation_params: { negative_slope: 0.3 } | |
norm: weight_norm | |
# data hyperparameters | |
dataset: | |
batch_size: 16 | |
num_workers: 10 | |
segment_duration: 1 | |
train: | |
num_samples: 500000 | |
valid: | |
num_samples: 10000 | |
evaluate: | |
batch_size: 16 | |
num_samples: 10000 | |
segment_duration: 10 | |
generate: | |
batch_size: 16 | |
num_samples: 50 | |
segment_duration: 30 | |
# solver hyperparameters | |
evaluate: | |
every: 10 | |
num_workers: 5 | |
metrics: | |
visqol: false | |
sisnr: true | |
generate: | |
every: 10 | |
num_workers: 5 | |
audio: | |
sample_rate: ${sample_rate} | |
# checkpointing schedule | |
checkpoint: | |
save_last: true | |
save_every: 25 | |
keep_last: 10 | |
keep_every_states: null | |
# optimization hyperparameters | |
optim: | |
epochs: 300 | |
updates_per_epoch: 2000 | |
lr: 5e-5 | |
max_norm: 3.0 | |
optimizer: adam | |
adam: | |
betas: [0.5, 0.9] | |
weight_decay: 0. | |
ema: | |
use: true # whether to use EMA or not | |
updates: 1 # update at every step | |
device: ${device} # device for EMA, can be put on GPU if more frequent updates | |
decay: 0.99 # EMA decay value, if null, no EMA is used | |
schedule: | |
lr_scheduler: "cosine" | |
cosine: | |
warmup: 4000 | |
lr_min_ratio: 0.0 | |
cycle_length: 1.0 | |