File size: 467 Bytes
c116427
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# TEXT ENCODER CONFIG
text_model: 'ldm/modules/encoders/CLAP/bert-base-uncased'
text_len: 100
transformer_embed_dim: 768
freeze_text_encoder_weights: True

# AUDIO ENCODER CONFIG
audioenc_name: 'Cnn14'
out_emb: 2048
sampling_rate: 44100
duration: 5
fmin: 50
fmax: 14000
n_fft: 1028
hop_size: 320
mel_bins: 64
window_size: 1024

# PROJECTION SPACE CONFIG 
d_proj: 1024
temperature: 0.003

# TRAINING AND EVALUATION CONFIG
num_classes: 527
batch_size: 1024
demo: False