mage / config.yaml
ekg15's picture
Upload 2 files
0f5d967
train:
epoch: 201
batchsize: 8
lr: 5e-5
lr_gamma: 0.1
lr_steps: [30, 40]
cos: True # use cosine lr schedule
checkpoint_every: 3000
model:
target: modules.mage_model.MAGE
params:
codebook_size: 512
frames_length: 10
image_resolution: 16
vision_width: 512
dropout: 0.2
use_cids: False
randomness: True
auto_beta: True
v_kl: 100
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
monitor: val/rec_loss
embed_dim: 4
ckpt_path: "models/autoencoders/kl_f8_cater/last_caterv2.ckpt"
ddconfig:
double_z: true
z_channels: 4
resolution: 128
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: [ ]
dropout: 0.0
lossconfig:
target: torch.nn.Identity
text_encoder_config:
target: modules.mage_model.TransformerTextEncoder
params:
vocab_size: 50
context_length: 38
transformer_width: 512
transformer_layers: 2
output_dim: 512
padding_idx: 0
dropout: 0.1
ma_config:
target: modules.mage_model.MAEncoder
params:
layers: 1
d_model: 512
generate_decoder_config:
target: modules.mage_model.FlatAxialDecoder
params:
in_channels: 512
out_channels: 4
model_channels: 512
frames_length: 10
layers: 6
data:
target: dataload.CATER
params:
dataset: 'caterv2'
data_root: '../datasets/CATER-GEN-v2' # ../datasets/CATER-GEN-v2
frames_length: 10
sample_speed: [3.0, 6.0]
randomness: True