File size: 2,017 Bytes
bfa59ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
seed: 12345


# Super-resolution settings
basesr:
  sf: 4
  chopping:     # for latent diffusion
    pch_size: 128
    weight_type: Gaussian
    extra_bs: 8      # 16 ----> 26G memory

# VAE settings
tiled_vae: True
latent_tiled_size: 128
sample_tiled_size: 1024
gradient_checkpointing_vae: True
sliced_vae: False

# classifer-free guidance
cfg_scale: 1.0

# sampling settings 
start_timesteps: 200

# color fixing
color_fix: ~

# Stable Diffusion 
base_model: sd-turbo
sd_pipe:
  target: diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline
  enable_grad_checkpoint: True
  params:
    pretrained_model_name_or_path: stabilityai/sd-turbo
    use_safetensors: True
    torch_dtype: torch.float16

model_start:
  target: diffusers.models.autoencoders.NoisePredictor
  ckpt_path: ~           # For initializing
  params:
    in_channels: 3
    down_block_types:
      - AttnDownBlock2D
      - AttnDownBlock2D
    up_block_types:
      - AttnUpBlock2D
      - AttnUpBlock2D
    block_out_channels:
      - 256    # 192, 256
      - 512    # 384, 512
    layers_per_block: 
      - 3
      - 3
    act_fn: silu
    latent_channels: 4
    norm_num_groups: 32
    sample_size: 128
    mid_block_add_attention: True
    resnet_time_scale_shift: default
    temb_channels: 512
    attention_head_dim: 64 
    freq_shift: 0
    flip_sin_to_cos: True
    double_z: True

model_middle:
  target: diffusers.models.autoencoders.NoisePredictor
  params:
    in_channels: 3
    down_block_types:
      - AttnDownBlock2D
      - AttnDownBlock2D
    up_block_types:
      - AttnUpBlock2D
      - AttnUpBlock2D
    block_out_channels:
      - 256    # 192, 256
      - 512    # 384, 512
    layers_per_block: 
      - 3
      - 3
    act_fn: silu
    latent_channels: 4
    norm_num_groups: 32
    sample_size: 128
    mid_block_add_attention: True
    resnet_time_scale_shift: default
    temb_channels: 512
    attention_head_dim: 64 
    freq_shift: 0
    flip_sin_to_cos: True
    double_z: True