File size: 1,520 Bytes
2252f3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f4557da
2252f3d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
pretrained_model_name_or_path: 'stabilityai/stable-diffusion-2-1-unclip'
revision: null

num_views: 7
with_smpl: false
validation_dataset:
  prompt_embeds_path: mvdiffusion/data/fixed_prompt_embeds_7view
  root_dir: 'examples/shhq'
  num_views: ${num_views}
  bg_color: 'white'
  img_wh:  [768, 768]
  num_validation_samples: 1000
  crop_size: 740
  margin_size: 50
  smpl_folder: 'smpl_image_pymaf'


save_dir: 'mv_results'
save_mode: 'rgba' # 'concat', 'rgba', 'rgb'
seed: 42
validation_batch_size: 1
dataloader_num_workers: 1 
local_rank: -1

pipe_kwargs:
  num_views: ${num_views}

validation_guidance_scales: 3.0
pipe_validation_kwargs:
  num_inference_steps: 40
  eta: 1.0

validation_grid_nrow: ${num_views}

unet_from_pretrained_kwargs:
  unclip: true
  sdxl: false
  num_views: ${num_views}
  sample_size: 96
  zero_init_conv_in: false # modify
  
  projection_camera_embeddings_input_dim: 2 # 2 for elevation and 6 for focal_length  
  zero_init_camera_projection: false
  num_regress_blocks: 3
  
  cd_attention_last: false
  cd_attention_mid: false
  multiview_attention: true
  sparse_mv_attention: true
  selfattn_block: self_rowwise
  mvcd_attention: true

recon_opt:
  res_path: out
  save_glb: true
  # camera setting
  num_view: 6
  scale: 4
  mode: ortho
  resolution: 1024
  cam_path: 'mvdiffusion/data/six_human_pose'
  # optimization
  iters: 700
  clr_iters: 200
  debug: false
  snapshot_step: 50
  lr_clr: 2e-3
  gpu_id: 0

  replace_hand: false

enable_xformers_memory_efficient_attention: true