File size: 1,939 Bytes
7f51798
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
ldm_configs:

  # scheduler_config:
  #   target: sgm.lr_scheduler.LambdaLinearScheduler
  #   params:
  #     warm_up_steps: [10000]
  #     cycle_lengths: [10000000000000]
  #     f_start: [1.e-6]
  #     f_max: [1.]
  #     f_min: [1.]

  # denoiser_config:
  #   target: sgm.modules.diffusionmodules.denoiser.DiscreteDenoiser
  #   params:
  #     num_idx: 1000

  #     scaling_config:
  #       target: sgm.modules.diffusionmodules.denoiser_scaling.EpsScaling
  #     discretization_config:
  #       target: sgm.modules.diffusionmodules.discretizer.LegacyDDPMDiscretization

  conditioner_config:
    target: sgm.modules.GeneralConditioner
    params:
      emb_models:

        - is_trainable: False
          input_key: 'img'
          ucg_rate: 0.32
          target: sgm.modules.encoders.modules.FrozenOpenCLIPImageMVEmbedder
          params:
            open_clip_embedding_config:
                target: sgm.modules.encoders.modules.FrozenOpenCLIPImageEmbedder
                params:
                  arch: 'ViT-L-14'
                  version: 'openai'
                  freeze: True # TODO, add ModLN later
                  output_tokens: True

        - is_trainable: True
          input_key: 'img-c'
          ucg_rate: 0.32
          # legacy_ucg_value: None
          target: sgm.modules.encoders.modules.FrozenDinov2ImageEmbedderMVPlucker
          params:
              freeze: False
              enable_bf16: True
              output_cls: False # return pooling
              arch: vitb
              n_cond_frames: 4 # first 4 views as cond
              modLN: True

  loss_fn_config:
    target: sgm.modules.diffusionmodules.loss.FMLoss
    params:
      transport_config:
        target: transport.create_transport
        params: # all follow default
          snr_type: lognorm

  guider_config:
    target: sgm.modules.diffusionmodules.guiders.VanillaCFG
    params:
      # scale: 1.0
      scale: 5.0