renyuxi commited on
Commit
831b863
1 Parent(s): bc77397

update readme

Browse files
Files changed (1) hide show
  1. README.md +185 -15
README.md CHANGED
@@ -31,9 +31,10 @@ In this repository, we release the models distilled from [SDXL Base 1.0](https:/
31
  * `Hyper-SD15-Nstep-lora.safetensors`: Lora checkpoint, for SD1.5-related models.
32
  * `Hyper-SDXL-1step-unet.safetensors`: Unet checkpoint distilled from SDXL-Base.
33
 
34
- ## SDXL-related models Usage
35
-
36
- ### 2-Steps, 4-Steps, 8-steps LoRA
 
37
  ```python
38
  import torch
39
  from diffusers import DiffusionPipeline, DDIMScheduler
@@ -46,14 +47,15 @@ ckpt_name = "Hyper-SDXL-2steps-lora.safetensors"
46
  pipe = DiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, variant="fp16").to("cuda")
47
  pipe.load_lora_weights(hf_hub_download(repo_name, ckpt_name))
48
  pipe.fuse_lora()
49
- # Ensure ddim scheduler timestep spacing set as trailing
50
  pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
51
  # lower eta results in more detail
52
  prompt="a photo of a cat"
53
  image=pipe(prompt=prompt, num_inference_steps=2, guidance_scale=0).images[0]
54
  ```
55
 
56
- ### Unified LoRA (support 1 to 8 steps inference)
 
57
  ```python
58
  import torch
59
  from diffusers import DiffusionPipeline, TCDScheduler
@@ -67,15 +69,14 @@ pipe.load_lora_weights(hf_hub_download(repo_name, ckpt_name))
67
  pipe.fuse_lora()
68
  # Use TCD scheduler to achieve better image quality
69
  pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
70
- # lower eta results in more detail
71
  eta=1.0
72
  prompt="a photo of a cat"
73
  image=pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0, eta=eta).images[0]
74
  ```
75
 
76
-
77
- ### 1-step SDXL Unet
78
-
79
  ```python
80
  import torch
81
  from diffusers import DiffusionPipeline, UNet2DConditionModel, LCMScheduler
@@ -96,10 +97,10 @@ image=pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0, timesteps=[80
96
  ```
97
 
98
 
 
99
 
100
- ## SD1.5-related models Usage
101
-
102
- ### 2-Steps, 4-Steps, 8-steps LoRA
103
  ```python
104
  import torch
105
  from diffusers import DiffusionPipeline, DDIMScheduler
@@ -112,14 +113,15 @@ ckpt_name = "Hyper-SD15-2steps-lora.safetensors"
112
  pipe = DiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, variant="fp16").to("cuda")
113
  pipe.load_lora_weights(hf_hub_download(repo_name, ckpt_name))
114
  pipe.fuse_lora()
115
- # Ensure ddim scheduler timestep spacing set as trailing
116
  pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
117
  prompt="a photo of a cat"
118
  image=pipe(prompt=prompt, num_inference_steps=2, guidance_scale=0).images[0]
119
  ```
120
 
121
 
122
- ### Unified LoRA (support 1 to 8 steps inference)
 
123
  ```python
124
  import torch
125
  from diffusers import DiffusionPipeline, TCDScheduler
@@ -133,12 +135,180 @@ pipe.load_lora_weights(hf_hub_download(repo_name, ckpt_name))
133
  pipe.fuse_lora()
134
  # Use TCD scheduler to achieve better image quality
135
  pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
136
- # Lower eta results in more detail
137
  eta=1.0
138
  prompt="a photo of a cat"
139
  image=pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0, eta=eta).images[0]
140
  ```
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  ## Citation
143
  ```bibtex
144
  @article{ren2024hypersd,
 
31
  * `Hyper-SD15-Nstep-lora.safetensors`: Lora checkpoint, for SD1.5-related models.
32
  * `Hyper-SDXL-1step-unet.safetensors`: Unet checkpoint distilled from SDXL-Base.
33
 
34
+ ## Text-to-Image Usage
35
+ ### SDXL-related models
36
+ #### 2-Steps, 4-Steps, 8-steps LoRA
37
+ Take the 2-steps LoRA as an example, you can also use other LoRAs for the corresponding inference steps setting.
38
  ```python
39
  import torch
40
  from diffusers import DiffusionPipeline, DDIMScheduler
 
47
  pipe = DiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, variant="fp16").to("cuda")
48
  pipe.load_lora_weights(hf_hub_download(repo_name, ckpt_name))
49
  pipe.fuse_lora()
50
+ # Ensure ddim scheduler timestep spacing set as trailing !!!
51
  pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
52
  # lower eta results in more detail
53
  prompt="a photo of a cat"
54
  image=pipe(prompt=prompt, num_inference_steps=2, guidance_scale=0).images[0]
55
  ```
56
 
57
+ #### Unified LoRA (support 1 to 8 steps inference)
58
+ You can flexibly adjust the number of inference steps and eta value to achieve best performance.
59
  ```python
60
  import torch
61
  from diffusers import DiffusionPipeline, TCDScheduler
 
69
  pipe.fuse_lora()
70
  # Use TCD scheduler to achieve better image quality
71
  pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
72
+ # Lower eta results in more detail for multi-steps inference
73
  eta=1.0
74
  prompt="a photo of a cat"
75
  image=pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0, eta=eta).images[0]
76
  ```
77
 
78
+ #### 1-step SDXL Unet
79
+ Only for the single step inference.
 
80
  ```python
81
  import torch
82
  from diffusers import DiffusionPipeline, UNet2DConditionModel, LCMScheduler
 
97
  ```
98
 
99
 
100
+ ### SD1.5-related models
101
 
102
+ #### 2-Steps, 4-Steps, 8-steps LoRA
103
+ Take the 2-steps LoRA as an example, you can also use other LoRAs for the corresponding inference steps setting.
 
104
  ```python
105
  import torch
106
  from diffusers import DiffusionPipeline, DDIMScheduler
 
113
  pipe = DiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, variant="fp16").to("cuda")
114
  pipe.load_lora_weights(hf_hub_download(repo_name, ckpt_name))
115
  pipe.fuse_lora()
116
+ # Ensure ddim scheduler timestep spacing set as trailing !!!
117
  pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
118
  prompt="a photo of a cat"
119
  image=pipe(prompt=prompt, num_inference_steps=2, guidance_scale=0).images[0]
120
  ```
121
 
122
 
123
+ #### Unified LoRA (support 1 to 8 steps inference)
124
+ You can flexibly adjust the number of inference steps and eta value to achieve best performance.
125
  ```python
126
  import torch
127
  from diffusers import DiffusionPipeline, TCDScheduler
 
135
  pipe.fuse_lora()
136
  # Use TCD scheduler to achieve better image quality
137
  pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
138
+ # Lower eta results in more detail for multi-steps inference
139
  eta=1.0
140
  prompt="a photo of a cat"
141
  image=pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0, eta=eta).images[0]
142
  ```
143
 
144
+ ## ControlNet Usage
145
+ ### SDXL-related models
146
+
147
+ #### 2-Steps, 4-Steps, 8-steps LoRA
148
+ Take Canny Controlnet and 2-steps inference as an example:
149
+ ```python
150
+ import torch
151
+ from diffusers.utils import load_image
152
+ import numpy as np
153
+ import cv2
154
+ from PIL import Image
155
+ from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL, DDIMScheduler
156
+ from huggingface_hub import hf_hub_download
157
+
158
+ # Load original image
159
+ image = load_image("https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png")
160
+ image = np.array(image)
161
+ # Prepare Canny Control Image
162
+ low_threshold = 100
163
+ high_threshold = 200
164
+ image = cv2.Canny(image, low_threshold, high_threshold)
165
+ image = image[:, :, None]
166
+ image = np.concatenate([image, image, image], axis=2)
167
+ control_image = Image.fromarray(image)
168
+ control_image.save("control.png")
169
+ control_weight = 0.5 # recommended for good generalization
170
+
171
+ # Initialize pipeline
172
+ controlnet = ControlNetModel.from_pretrained(
173
+ "diffusers/controlnet-canny-sdxl-1.0",
174
+ torch_dtype=torch.float16
175
+ )
176
+ vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
177
+ pipe = StableDiffusionXLControlNetPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, torch_dtype=torch.float16).to("cuda")
178
+
179
+ pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-SDXL-2steps-lora.safetensors"))
180
+ # Ensure ddim scheduler timestep spacing set as trailing !!!
181
+ pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
182
+ pipe.fuse_lora()
183
+ image = pipe("A chocolate cookie", num_inference_steps=2, image=control_image, guidance_scale=0, controlnet_conditioning_scale=control_weight).images[0]
184
+ image.save('image_out.png')
185
+ ```
186
+
187
+ #### Unified LoRA (support 1 to 8 steps inference)
188
+ Take Canny Controlnet as an example:
189
+ ```python
190
+ import torch
191
+ from diffusers.utils import load_image
192
+ import numpy as np
193
+ import cv2
194
+ from PIL import Image
195
+ from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL, TCDScheduler
196
+ from huggingface_hub import hf_hub_download
197
+
198
+ # Load original image
199
+ image = load_image("https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png")
200
+ image = np.array(image)
201
+ # Prepare Canny Control Image
202
+ low_threshold = 100
203
+ high_threshold = 200
204
+ image = cv2.Canny(image, low_threshold, high_threshold)
205
+ image = image[:, :, None]
206
+ image = np.concatenate([image, image, image], axis=2)
207
+ control_image = Image.fromarray(image)
208
+ control_image.save("control.png")
209
+ control_weight = 0.5 # recommended for good generalization
210
+
211
+ # Initialize pipeline
212
+ controlnet = ControlNetModel.from_pretrained(
213
+ "diffusers/controlnet-canny-sdxl-1.0",
214
+ torch_dtype=torch.float16
215
+ )
216
+ vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
217
+ pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
218
+ "stabilityai/stable-diffusion-xl-base-1.0",
219
+ controlnet=controlnet, vae=vae, torch_dtype=torch.float16).to("cuda")
220
+
221
+ # Load Hyper-SD15-1step lora
222
+ pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-SDXL-1step-lora.safetensors"))
223
+ pipe.fuse_lora()
224
+ # Use TCD scheduler to achieve better image quality
225
+ pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
226
+ # Lower eta results in more detail for multi-steps inference
227
+ eta=1.0
228
+ image = pipe("A chocolate cookie", num_inference_steps=4, image=control_image, guidance_scale=0, controlnet_conditioning_scale=control_weight, eta=eta).images[0]
229
+ image.save('image_out.png')
230
+ ```
231
+
232
+ ### SD1.5-related models
233
+
234
+ #### 2-Steps, 4-Steps, 8-steps LoRA
235
+ Take Canny Controlnet and 2-steps inference as an example:
236
+ ```python
237
+ import torch
238
+ from diffusers.utils import load_image
239
+ import numpy as np
240
+ import cv2
241
+ from PIL import Image
242
+ from diffusers import ControlNetModel, StableDiffusionControlNetPipeline, DDIMScheduler
243
+
244
+ from huggingface_hub import hf_hub_download
245
+
246
+ controlnet_checkpoint = "lllyasviel/control_v11p_sd15_canny"
247
+
248
+ # Load original image
249
+ image = load_image("https://huggingface.co/lllyasviel/control_v11p_sd15_canny/resolve/main/images/input.png")
250
+ image = np.array(image)
251
+ # Prepare Canny Control Image
252
+ low_threshold = 100
253
+ high_threshold = 200
254
+ image = cv2.Canny(image, low_threshold, high_threshold)
255
+ image = image[:, :, None]
256
+ image = np.concatenate([image, image, image], axis=2)
257
+ control_image = Image.fromarray(image)
258
+ control_image.save("control.png")
259
+
260
+ # Initialize pipeline
261
+ controlnet = ControlNetModel.from_pretrained(controlnet_checkpoint, torch_dtype=torch.float16)
262
+ pipe = StableDiffusionControlNetPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16).to("cuda")
263
+ pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-SD15-2steps-lora.safetensors"))
264
+ pipe.fuse_lora()
265
+ # Ensure ddim scheduler timestep spacing set as trailing !!!
266
+ pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
267
+ image = pipe("a blue paradise bird in the jungle", num_inference_steps=2, image=control_image, guidance_scale=0).images[0]
268
+ image.save('image_out.png')
269
+ ```
270
+
271
+
272
+ #### Unified LoRA (support 1 to 8 steps inference)
273
+ Take Canny Controlnet as an example:
274
+ ```python
275
+ import torch
276
+ from diffusers.utils import load_image
277
+ import numpy as np
278
+ import cv2
279
+ from PIL import Image
280
+ from diffusers import ControlNetModel, StableDiffusionControlNetPipeline, TCDScheduler
281
+ from huggingface_hub import hf_hub_download
282
+
283
+ controlnet_checkpoint = "lllyasviel/control_v11p_sd15_canny"
284
+
285
+ # Load original image
286
+ image = load_image("https://huggingface.co/lllyasviel/control_v11p_sd15_canny/resolve/main/images/input.png")
287
+ image = np.array(image)
288
+ # Prepare Canny Control Image
289
+ low_threshold = 100
290
+ high_threshold = 200
291
+ image = cv2.Canny(image, low_threshold, high_threshold)
292
+ image = image[:, :, None]
293
+ image = np.concatenate([image, image, image], axis=2)
294
+ control_image = Image.fromarray(image)
295
+ control_image.save("control.png")
296
+
297
+ # Initialize pipeline
298
+ controlnet = ControlNetModel.from_pretrained(controlnet_checkpoint, torch_dtype=torch.float16)
299
+ pipe = StableDiffusionControlNetPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16).to("cuda")
300
+ # Load Hyper-SD15-1step lora
301
+ pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-SD15-1step-lora.safetensors"))
302
+ pipe.fuse_lora()
303
+ # Use TCD scheduler to achieve better image quality
304
+ pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
305
+ # Lower eta results in more detail for multi-steps inference
306
+ eta=1.0
307
+ image = pipe("a blue paradise bird in the jungle", num_inference_steps=1, image=control_image, guidance_scale=0, eta=eta).images[0]
308
+ image.save('image_out.png')
309
+ ```
310
+
311
+
312
  ## Citation
313
  ```bibtex
314
  @article{ren2024hypersd,