vta-ldm / tools /base_config.py
fffiloni's picture
Upload 130 files
c673f60 verified
raw
history blame
4.51 kB
import os
def default_vae_config():
basic_config = {
"model": {
"params": {
"first_stage_config": {
"base_learning_rate": 4.5e-05,
"target": "audioldm.variational_autoencoder.autoencoder.AutoencoderKL",
"params": {
"monitor": "val/rec_loss",
"image_key": "fbank",
"subband": 1,
"embed_dim": 8,
"time_shuffle": 1,
"ddconfig": {
"double_z": True,
"z_channels": 8,
"resolution": 256,
"downsample_time": False,
"in_channels": 1,
"out_ch": 1,
"ch": 128,
"ch_mult": [1, 2, 4],
"num_res_blocks": 2,
"attn_resolutions": [],
"dropout": 0.0,
},
},
},
},
},
}
return basic_config
def default_stft_config():
basic_config = {
"preprocessing_16k": {
"audio": {"sampling_rate": 16000, "max_wav_value": 32768},
"stft": {"filter_length": 1024, "hop_length": 160, "win_length": 1024},
"mel": {
"n_mel_channels": 64,
"mel_fmin": 0,
"mel_fmax": 8000,
"freqm": 0,
"timem": 0,
"blur": False,
"mean": -4.63,
"std": 2.74,
"target_length": 1024,
},
},
"preprocessing_24k": {
"audio": {"sampling_rate": 24000, "max_wav_value": 32768},
"stft": {"filter_length": 2048, "hop_length": 240, "win_length": 2048},
"mel": {
"n_mel_channels": 64,
"mel_fmin": 0,
"mel_fmax": 12000,
"target_length": 1024,
},
},
"preprocessing_32k": {
"audio": {"sampling_rate": 32000, "max_wav_value": 32768},
"stft": {"filter_length": 2048, "hop_length": 320, "win_length": 2048},
"mel": {
"n_mel_channels": 64,
"mel_fmin": 0,
"mel_fmax": 16000,
"target_length": 1024,
},
},
"preprocessing_48k": {
"audio": {"sampling_rate": 48000, "max_wav_value": 32768, "duration": 10.00},
"stft": {"filter_length": 2048, "hop_length": 480, "win_length": 2048},
"mel": {
"n_mel_channels": 64,
"mel_fmin": 20,
"mel_fmax": 24000
}
},
}
return basic_config
def get_metadata():
return {
"audioldm-s-full": {
"path": os.path.join(
CACHE_DIR,
"audioldm-s-full.ckpt",
),
"url": "https://zenodo.org/record/7600541/files/audioldm-s-full?download=1",
},
"audioldm-l-full": {
"path": os.path.join(
CACHE_DIR,
"audioldm-l-full.ckpt",
),
"url": "https://zenodo.org/record/7698295/files/audioldm-full-l.ckpt?download=1",
},
"audioldm-s-full-v2": {
"path": os.path.join(
CACHE_DIR,
"audioldm-s-full-v2.ckpt",
),
"url": "https://zenodo.org/record/7698295/files/audioldm-full-s-v2.ckpt?download=1",
},
"audioldm-m-text-ft": {
"path": os.path.join(
CACHE_DIR,
"audioldm-m-text-ft.ckpt",
),
"url": "https://zenodo.org/record/7813012/files/audioldm-m-text-ft.ckpt?download=1",
},
"audioldm-s-text-ft": {
"path": os.path.join(
CACHE_DIR,
"audioldm-s-text-ft.ckpt",
),
"url": "https://zenodo.org/record/7813012/files/audioldm-s-text-ft.ckpt?download=1",
},
"audioldm-m-full": {
"path": os.path.join(
CACHE_DIR,
"audioldm-m-full.ckpt",
),
"url": "https://zenodo.org/record/7813012/files/audioldm-m-full.ckpt?download=1",
},
}