Spaces:
Running
on
Zero
Running
on
Zero
exp_root_dir: "outputs" | |
name: "image-to-shape-diffusion/clip-dino-rgb-pixart-lr2e4-ddim" | |
tag: "${rmspace:${system.shape_model_type}+n${data.n_samples}+pfeat${system.shape_model.point_feats}+lr${system.optimizer.args.lr},_}" | |
seed: 0 | |
data_type: "objaverse-datamodule" | |
data: | |
root_dir: ./data/objaverse | |
data_type: "sdf" | |
sampling_strategy: random | |
n_samples: 10240 | |
load_supervision: False | |
supervision_type: "" | |
n_supervision: 0 | |
load_image: True # whether to load images | |
image_data_path: ./data/objaverse/render+blender+singleview+nv20 | |
image_type: "rgb" # rgb, normal | |
idx: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] # front view | |
n_views: 1 | |
background_color: [0.5, 0.5, 0.5] | |
marign_pix_dis: 30 | |
batch_size: 40 | |
num_workers: 16 | |
system_type: "pixart-diffusion-system" | |
system: | |
val_samples_json: "val_data/images/val_samples_rgb_image.json" | |
z_scale_factor: 1.0 | |
guidance_scale: 7.5 | |
num_inference_steps: 50 | |
eta: 0.0 | |
extract_mesh_func: diffdmc | |
shape_model_type: michelangelo-autoencoder | |
shape_model: | |
pretrained_model_name_or_path: /mnt/cfs/public/native3D/ckpts/michelangelo-autoencoder-l256-e64-ne8-nd16-scaleup.ckpt | |
use_downsample: true | |
downsample_ratio: 0.0625 | |
num_latents: 768 | |
use_multi_reso: false | |
resolutions: [4096, 8192, 12288] | |
sampling_prob: [0, 0, 1] | |
embed_dim: 64 | |
point_feats: 3 | |
out_dim: 1 | |
num_freqs: 8 | |
include_pi: false | |
heads: 12 | |
width: 768 | |
num_encoder_layers: 8 | |
num_decoder_layers: 16 | |
use_ln_post: true | |
init_scale: 0.25 | |
qkv_bias: false | |
use_flash: true | |
use_checkpoint: true | |
condition_model_type: "cond-embedder" | |
condition_model: | |
pretrained_clip_name_or_path: openai/clip-vit-large-patch14 | |
pretrained_dino_name_or_path: facebook/dinov2-base | |
pretrained_tokenizer_name_or_path: openai/clip-vit-large-patch14 | |
freeze_modulation_clip: true | |
freeze_modulation_dino: true | |
encode_camera: false | |
camera_embeds_dim: 0 | |
n_views: ${data.n_views} | |
empty_embeds_ratio: 0.1 | |
normalize_embeds: false | |
zero_uncond_embeds: true | |
linear_proj_init: constant | |
image_size_dino: 224 | |
image_size_clip: 224 | |
denoiser_model_type: "pixart-denoiser" | |
denoiser_model: | |
input_channels: ${system.shape_model.embed_dim} | |
output_channels: ${system.shape_model.embed_dim} | |
n_ctx: ${system.shape_model.num_latents} | |
width: 768 | |
layers: 32 | |
heads: 12 | |
context_dim: 1024 | |
init_scale: 1.0 | |
skip_ln: true | |
variance_type: ${system.noise_scheduler.variance_type} | |
use_checkpoint: true | |
dit_block: DiTBlock | |
noise_scheduler_type: "diffusers.schedulers.DDPMScheduler" | |
noise_scheduler: | |
num_train_timesteps: 1000 | |
beta_start: 0.00085 | |
beta_end: 0.012 | |
beta_schedule: "scaled_linear" | |
variance_type: "fixed_small" | |
clip_sample: false | |
denoise_scheduler_type: "diffusers.schedulers.DDIMScheduler" | |
denoise_scheduler: | |
num_train_timesteps: 1000 | |
beta_start: 0.00085 | |
beta_end: 0.012 | |
beta_schedule: "scaled_linear" | |
clip_sample: false # clip sample to -1~1 | |
set_alpha_to_one: false | |
steps_offset: 1 | |
loggers: | |
wandb: | |
enable: false | |
project: "CraftsMan" | |
name: image-to-shape-diffusion+${name}+${tag} | |
loss: | |
loss_type: "mse" | |
lambda_diffusion: 1. | |
optimizer: | |
name: AdamW | |
args: | |
lr: 2.e-4 | |
betas: [0.9, 0.99] | |
eps: 1.e-6 | |
scheduler: | |
name: CosineAnnealingLR | |
args: | |
T_max: 5000 | |
eta_min: 1e-6 | |
trainer: | |
num_nodes: 1 | |
max_epochs: 100000 | |
log_every_n_steps: 5 | |
num_sanity_val_steps: 1 | |
check_val_every_n_epoch: 25 | |
enable_progress_bar: true | |
precision: 16-mixed | |
strategy: 'ddp_find_unused_parameters_true' | |
checkpoint: | |
save_last: true | |
save_top_k: -1 | |
every_n_train_steps: 5000 |