# dataset dataset: "ucf101_img" data_path: "/path/to/datasets/UCF101/videos/" frame_data_txt: "/path/to/datasets/UCF101/train_256_list.txt" pretrained_model_path: "/path/to/pretrained/Latte/" # save and load results_dir: "./results_img" pretrained: # model config: model: LatteIMG-XL/2 num_frames: 16 image_size: 256 # choices=[256, 512] num_sampling_steps: 250 frame_interval: 3 fixed_spatial: False attention_bias: True learn_sigma: True extras: 2 # [1, 2] 1 unconditional generation, 2 class-conditional generation # train config: save_ceph: True # important use_image_num: 8 # important learning_rate: 1e-4 ckpt_every: 10000 clip_max_norm: 0.1 start_clip_iter: 100000 local_batch_size: 4 # important max_train_steps: 1000000 global_seed: 3407 num_workers: 8 log_every: 50 lr_warmup_steps: 0 resume_from_checkpoint: gradient_accumulation_steps: 1 # TODO num_classes: 101 # low VRAM and speed up training use_compile: False mixed_precision: False enable_xformers_memory_efficient_attention: False gradient_checkpointing: False