chaojiemao commited on
Commit
78e9f55
1 Parent(s): 1b7af3a

Create ace_0.6b_1024.yaml

Browse files
Files changed (1) hide show
  1. config/models/ace_0.6b_1024.yaml +132 -0
config/models/ace_0.6b_1024.yaml ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NAME: ACE_0.6B_1024
2
+ IS_DEFAULT: False
3
+ USE_DYNAMIC_MODEL: False
4
+ DEFAULT_PARAS:
5
+ PARAS:
6
+ #
7
+ INPUT:
8
+ INPUT_IMAGE:
9
+ INPUT_MASK:
10
+ TASK:
11
+ PROMPT: ""
12
+ NEGATIVE_PROMPT: ""
13
+ OUTPUT_HEIGHT: 1024
14
+ OUTPUT_WIDTH: 1024
15
+ SAMPLER: ddim
16
+ SAMPLE_STEPS: 50
17
+ GUIDE_SCALE: 4.5
18
+ GUIDE_RESCALE: 0.5
19
+ SEED: -1
20
+ TAR_INDEX: 0
21
+ REFINER_SCALE: 0.2
22
+ USE_ACE: True
23
+ #REFINER_PROMPT: "High Resolution, Sharpness, Clarity, Detail Enhancement, Noise Reduction, HD, 4k, Image Restoration, HDR"
24
+ REFINER_PROMPT: "High Resolution, Sharpness, Clarity, Detail Enhancement, Noise Reduction, HD, 4k, Image Restoration, HDR"
25
+ OUTPUT:
26
+ LATENT:
27
+ IMAGES:
28
+ SEED:
29
+ MODULES_PARAS:
30
+ FIRST_STAGE_MODEL:
31
+ FUNCTION:
32
+ - NAME: encode
33
+ DTYPE: float16
34
+ INPUT: ["IMAGE"]
35
+ - NAME: decode
36
+ DTYPE: float16
37
+ INPUT: ["LATENT"]
38
+ #
39
+ DIFFUSION_MODEL:
40
+ FUNCTION:
41
+ - NAME: forward
42
+ DTYPE: float16
43
+ INPUT: ["SAMPLE_STEPS", "SAMPLE", "GUIDE_SCALE"]
44
+ #
45
+ COND_STAGE_MODEL:
46
+ FUNCTION:
47
+ - NAME: encode_list_of_list
48
+ DTYPE: bfloat16
49
+ INPUT: ["PROMPT"]
50
+ #
51
+ MODEL:
52
+ NAME: LatentDiffusionACE
53
+ PRETRAINED_MODEL:
54
+ IGNORE_KEYS: [ ]
55
+ SCALE_FACTOR: 0.18215
56
+ SIZE_FACTOR: 8
57
+ DECODER_BIAS: 0.5
58
+ DEFAULT_N_PROMPT: ""
59
+ TEXT_IDENTIFIER: [ '{image}', '{image1}', '{image2}', '{image3}', '{image4}', '{image5}', '{image6}', '{image7}', '{image8}', '{image9}' ]
60
+ USE_TEXT_POS_EMBEDDINGS: True
61
+ #
62
+ DIFFUSION:
63
+ NAME: BaseDiffusion
64
+ PREDICTION_TYPE: eps
65
+ MIN_SNR_GAMMA:
66
+ NOISE_SCHEDULER:
67
+ NAME: LinearScheduler
68
+ NUM_TIMESTEPS: 1000
69
+ BETA_MIN: 0.0001
70
+ BETA_MAX: 0.02
71
+ #
72
+ DIFFUSION_MODEL:
73
+ NAME: ACE
74
+ PRETRAINED_MODEL: hf://scepter-studio/ACE-0.6B-1024px@models/dit/ace_0.6b_1024px.pth
75
+ IGNORE_KEYS: [ ]
76
+ PATCH_SIZE: 2
77
+ IN_CHANNELS: 4
78
+ HIDDEN_SIZE: 1152
79
+ DEPTH: 28
80
+ NUM_HEADS: 16
81
+ MLP_RATIO: 4.0
82
+ PRED_SIGMA: True
83
+ DROP_PATH: 0.0
84
+ WINDOW_DIZE: 0
85
+ Y_CHANNELS: 4096
86
+ MAX_SEQ_LEN: 4096
87
+ QK_NORM: True
88
+ USE_GRAD_CHECKPOINT: True
89
+ ATTENTION_BACKEND: flash_attn
90
+ #
91
+ FIRST_STAGE_MODEL:
92
+ NAME: AutoencoderKL
93
+ EMBED_DIM: 4
94
+ PRETRAINED_MODEL: hf://scepter-studio/ACE-0.6B-1024px@models/vae/vae.bin
95
+ IGNORE_KEYS: []
96
+ #
97
+ ENCODER:
98
+ NAME: Encoder
99
+ CH: 128
100
+ OUT_CH: 3
101
+ NUM_RES_BLOCKS: 2
102
+ IN_CHANNELS: 3
103
+ ATTN_RESOLUTIONS: [ ]
104
+ CH_MULT: [ 1, 2, 4, 4 ]
105
+ Z_CHANNELS: 4
106
+ DOUBLE_Z: True
107
+ DROPOUT: 0.0
108
+ RESAMP_WITH_CONV: True
109
+ #
110
+ DECODER:
111
+ NAME: Decoder
112
+ CH: 128
113
+ OUT_CH: 3
114
+ NUM_RES_BLOCKS: 2
115
+ IN_CHANNELS: 3
116
+ ATTN_RESOLUTIONS: [ ]
117
+ CH_MULT: [ 1, 2, 4, 4 ]
118
+ Z_CHANNELS: 4
119
+ DROPOUT: 0.0
120
+ RESAMP_WITH_CONV: True
121
+ GIVE_PRE_END: False
122
+ TANH_OUT: False
123
+ #
124
+ COND_STAGE_MODEL:
125
+ NAME: T5EmbedderHF
126
+ PRETRAINED_MODEL: hf://scepter-studio/ACE-0.6B-1024px@models/text_encoder/t5-v1_1-xxl/
127
+ TOKENIZER_PATH: hf://scepter-studio/ACE-0.6B-1024px@models/tokenizer/t5-v1_1-xxl
128
+ LENGTH: 120
129
+ T5_DTYPE: bfloat16
130
+ ADDED_IDENTIFIER: [ '{image}', '{caption}', '{mask}', '{ref_image}', '{image1}', '{image2}', '{image3}', '{image4}', '{image5}', '{image6}', '{image7}', '{image8}', '{image9}' ]
131
+ CLEAN: whitespace
132
+ USE_GRAD: False