name: craftsman/craftsman-v1-5 description: 'image to 3d shape diffusion of CraftsMan(https://github.com/wyysf-98/CraftsMan)' system_type: pixart-diffusion-system system: z_scale_factor: 1.0 guidance_scale: 7.5 num_inference_steps: 50 eta: 0.0 extract_mesh_func: diffdmc shape_model_type: michelangelo-autoencoder shape_model: n_samples: 16384 use_downsample: true downsample_ratio: 0.0625 num_latents: 768 use_multi_reso: false resolutions: - 4096 - 8192 - 12288 sampling_prob: - 0 - 0 - 1 embed_dim: 64 point_feats: 3 out_dim: 1 num_freqs: 8 include_pi: false heads: 12 width: 768 num_encoder_layers: 8 num_decoder_layers: 16 use_ln_post: true init_scale: 0.25 qkv_bias: false use_flash: true use_checkpoint: true condition_model_type: cond-embedder condition_model: freeze_modulation_clip: true freeze_modulation_dino: true encode_camera: false camera_embeds_dim: 32 n_views: 1 empty_embeds_ratio: 0.1 normalize_embeds: false zero_uncond_embeds: true linear_proj_init: constant image_size_dino: 224 image_size_clip: 224 denoiser_model_type: pixart-denoiser denoiser_model: input_channels: 64 output_channels: 64 n_ctx: 768 width: 1024 layers: 32 heads: 16 context_dim: 1024 init_scale: 1.0 skip_ln: true variance_type: FIXED_SMALL use_checkpoint: true dit_block: DiTBlock noise_scheduler_type: "diffusers.schedulers.DDPMScheduler" noise_scheduler: num_train_timesteps: 1000 beta_start: 0.00085 beta_end: 0.012 beta_schedule: "scaled_linear" variance_type: "fixed_small" clip_sample: false denoise_scheduler_type: "diffusers.schedulers.DDIMScheduler" denoise_scheduler: num_train_timesteps: 1000 beta_start: 0.00085 beta_end: 0.012 beta_schedule: "scaled_linear" clip_sample: false # clip sample to -1~1 set_alpha_to_one: false steps_offset: 1 val_samples_json: "" loggers: wandb: enable: false project: CraftsMan name: image-to-shape-diffusion loss: loss_type: mse lambda_diffusion: 1.0 optimizer: name: AdamW args: lr: 0.0002 betas: - 0.9 - 0.99 eps: 1.0e-06 scheduler: name: CosineAnnealingLR args: T_max: 5 eta_min: 1.0e-06