yibolu
commited on
Commit
•
b8c960d
1
Parent(s):
26dcb0c
update readme
Browse files- .gitattributes +15 -0
- README.md +110 -33
- control_bird_canny.png +0 -0
- images/model_load_performance.png +3 -0
- images/sd_controlnet_txt2img.png +3 -0
- images/sd_txt2img.png +3 -0
- images/sdxl_controlnet_txt2img.png +3 -0
- images/sdxl_txt2img.png +3 -0
- outputs/res_controlnet_img2img_0.png +0 -0
- outputs/res_controlnet_sdxl_txt2img.png +3 -0
- outputs/res_controlnet_txt2img_0.png +0 -0
- outputs/res_img2img_0.png +0 -0
- outputs/res_sdxl_txt2img_0.png +3 -0
- outputs/res_sdxl_txt2img_lora_0.png +3 -0
- outputs/res_txt2img_0.png +0 -0
- outputs/res_txt2img_lora_0.png +0 -0
.gitattributes
CHANGED
@@ -32,8 +32,23 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
lyrasd_model/lyrasd_lib/libth_lyrasd_cu11_sm80.so filter=lfs diff=lfs merge=lfs -text
|
37 |
lyrasd_model/lyrasd_lib/libth_lyrasd_cu11_sm86.so filter=lfs diff=lfs merge=lfs -text
|
38 |
lyrasd_model/lyrasd_lib/libth_lyrasd_cu12_sm80.so filter=lfs diff=lfs merge=lfs -text
|
39 |
lyrasd_model/lyrasd_lib/libth_lyrasd_cu12_sm86.so filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
36 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
37 |
lyrasd_model/lyrasd_lib/libth_lyrasd_cu11_sm80.so filter=lfs diff=lfs merge=lfs -text
|
38 |
lyrasd_model/lyrasd_lib/libth_lyrasd_cu11_sm86.so filter=lfs diff=lfs merge=lfs -text
|
39 |
lyrasd_model/lyrasd_lib/libth_lyrasd_cu12_sm80.so filter=lfs diff=lfs merge=lfs -text
|
40 |
lyrasd_model/lyrasd_lib/libth_lyrasd_cu12_sm86.so filter=lfs diff=lfs merge=lfs -text
|
41 |
+
control_bird_canny.png filter=lfs diff=lfs merge=lfs -text
|
42 |
+
images/sdxl_controlnet_txt2img.png filter=lfs diff=lfs merge=lfs -text
|
43 |
+
outputs/res_controlnet_sdxl_txt2img.png filter=lfs diff=lfs merge=lfs -text
|
44 |
+
outputs/res_controlnet_txt2img_0.png filter=lfs diff=lfs merge=lfs -text
|
45 |
+
outputs/res_img2img_0.png filter=lfs diff=lfs merge=lfs -text
|
46 |
+
outputs/res_sdxl_txt2img_0.png filter=lfs diff=lfs merge=lfs -text
|
47 |
+
images/sd_controlnet_txt2img.png filter=lfs diff=lfs merge=lfs -text
|
48 |
+
images/sd_txt2img.png filter=lfs diff=lfs merge=lfs -text
|
49 |
+
outputs/res_controlnet_img2img_0.png filter=lfs diff=lfs merge=lfs -text
|
50 |
+
outputs/res_sdxl_txt2img_lora_0.png filter=lfs diff=lfs merge=lfs -text
|
51 |
+
outputs/res_txt2img_0.png filter=lfs diff=lfs merge=lfs -text
|
52 |
+
outputs/res_txt2img_lora_0.png filter=lfs diff=lfs merge=lfs -text
|
53 |
+
images/model_load_performance.png filter=lfs diff=lfs merge=lfs -text
|
54 |
+
images/sdxl_txt2img.png filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -10,49 +10,61 @@ tags:
|
|
10 |
|
11 |
We consider the Diffusers as the much more extendable framework for the SD ecosystem. Therefore, we have made a **pivot to Diffusers**, leading to a complete update of lyraSD.
|
12 |
|
13 |
-
lyraSD is currently the **fastest Stable Diffusion model** that can 100% align the outputs of **Diffusers** available, boasting an inference cost of only **0.
|
14 |
|
15 |
Among its main features are:
|
16 |
|
17 |
-
- **
|
18 |
-
- **LoRA Hot Swap**: Can hot swap a Lora within 0.5s (0.1s if cached)
|
19 |
-
- 100% likeness to diffusers output
|
20 |
-
- 4 Commonly used Pipelines
|
21 |
- - Text2Img
|
22 |
- - Img2Img
|
|
|
23 |
- - ControlNetText2Img
|
24 |
- - ControlNetImg2Img
|
25 |
-
-
|
|
|
|
|
|
|
|
|
26 |
|
27 |
## Speed
|
28 |
|
29 |
### test environment
|
30 |
|
31 |
-
-
|
32 |
-
-
|
33 |
-
-
|
34 |
-
-
|
35 |
-
-
|
|
|
36 |
|
37 |
-
### Text2Img
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
|
43 |
-
###
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
48 |
|
49 |
## Model Sources
|
50 |
|
|
|
51 |
- **Checkpoint:** https://civitai.com/models/7371/rev-animated
|
52 |
- **ControlNet:** https://huggingface.co/lllyasviel/sd-controlnet-canny
|
53 |
- **Lora:** https://civitai.com/models/18323?modelVersionId=46846
|
54 |
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
```python
|
58 |
import torch
|
@@ -60,7 +72,7 @@ import time
|
|
60 |
|
61 |
from lyrasd_model import LyraSdTxt2ImgPipeline
|
62 |
|
63 |
-
#
|
64 |
# 1. clip 模型
|
65 |
# 2. 转换好的优化后的 unet 模型,放入其中的 unet_bins 文件夹
|
66 |
# 3. vae 模型
|
@@ -75,8 +87,8 @@ lora_path = "./models/lyrasd_xiaorenshu_lora"
|
|
75 |
model = LyraSdTxt2ImgPipeline(model_path, lib_path)
|
76 |
|
77 |
# load lora
|
78 |
-
#
|
79 |
-
model.
|
80 |
|
81 |
# 准备应用的输入和超参数
|
82 |
prompt = "a cat, cute, cartoon, concise, traditional, chinese painting, Tang and Song Dynasties, masterpiece, 4k, 8k, UHD, best quality"
|
@@ -97,36 +109,101 @@ print("image gen cost: ",time.perf_counter() - start)
|
|
97 |
for i, image in enumerate(images):
|
98 |
image.save(f"outputs/res_txt2img_lora_{i}.png")
|
99 |
|
100 |
-
# unload lora
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
|
|
|
|
104 |
```
|
|
|
105 |
## Demo output
|
106 |
|
107 |
### Text2Img
|
108 |
-
#### Text2Img
|
109 |
![text2img_demo](./outputs/res_txt2img_0.png)
|
110 |
|
111 |
-
#### Text2Img with Lora
|
112 |
![text2img_demo](./outputs/res_txt2img_lora_0.png)
|
113 |
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
#### Img2Img input
|
117 |
<img src="https://chuangxin-research-1258344705.cos.ap-guangzhou.myqcloud.com/share/files/seaside_town.png?q-sign-algorithm=sha1&q-ak=AKIDBF6i7GCtKWS8ZkgOtACzX3MQDl37xYty&q-sign-time=1692601590;1865401590&q-key-time=1692601590;1865401590&q-header-list=&q-url-param-list=&q-signature=ca04ca92d990d94813029c0d9ef29537e5f4637c" alt="img2img input" width="512"/>
|
118 |
|
119 |
#### Img2Img output
|
120 |
-
![text2img_demo](./outputs/res_img2img_0.png)
|
121 |
|
122 |
### ControlNet Text2Img
|
123 |
|
124 |
#### Control Image
|
125 |
![text2img_demo](./control_bird_canny.png)
|
126 |
|
127 |
-
#### ControlNet Text2Img Output
|
128 |
![text2img_demo](./outputs/res_controlnet_txt2img_0.png)
|
129 |
|
|
|
|
|
|
|
|
|
130 |
## Docker Environment Recommendation
|
131 |
|
132 |
- For Cuda 11.X: we recommend ```nvcr.io/nvidia/pytorch:22.12-py3```
|
@@ -146,7 +223,7 @@ python txt2img_demo.py
|
|
146 |
author = {Kangjian Wu, Zhengtao Wang, Yibo Lu, Haoxiong Su, Bin Wu},
|
147 |
title = {lyraSD: Accelerating Stable Diffusion with best flexibility},
|
148 |
howpublished = {\url{https://huggingface.co/TMElyralab/lyraSD}},
|
149 |
-
year = {
|
150 |
}
|
151 |
```
|
152 |
|
|
|
10 |
|
11 |
We consider the Diffusers as the much more extendable framework for the SD ecosystem. Therefore, we have made a **pivot to Diffusers**, leading to a complete update of lyraSD.
|
12 |
|
13 |
+
lyraSD is currently the **fastest Stable Diffusion model** that can 100% align the outputs of **Diffusers** available, boasting an inference cost of only **0.36 seconds** for a 512x512 image, accelerating the process up to **50% faster** than the original version.
|
14 |
|
15 |
Among its main features are:
|
16 |
|
17 |
+
- **All Commonly used** SD1.5 and SDXL pipelines
|
|
|
|
|
|
|
18 |
- - Text2Img
|
19 |
- - Img2Img
|
20 |
+
- - Inpainting
|
21 |
- - ControlNetText2Img
|
22 |
- - ControlNetImg2Img
|
23 |
+
- - IpAdapterText2Img
|
24 |
+
- **Fast ControlNet Hot Swap**: Can hot swap a ControlNet model weights within 0.6s
|
25 |
+
- **Fast LoRA Hot Swap**: Can hot swap a Lora within 0.14s
|
26 |
+
- 100% likeness to diffusers output
|
27 |
+
- Supported Devices: Any GPU with SM version >= 80. For example, Nvidia Nvidia Ampere architecture (A2, A10, A16, A30, A40, A100), RTX 4090, 3080 and etc.
|
28 |
|
29 |
## Speed
|
30 |
|
31 |
### test environment
|
32 |
|
33 |
+
- Device: Nvidia A100 40G
|
34 |
+
- Nvidia driver version: 525.105.17
|
35 |
+
- Nvidia cuda version: 12.0
|
36 |
+
- Percision:fp16
|
37 |
+
- Steps: 20
|
38 |
+
- Sampler: EulerA
|
39 |
|
40 |
+
### SD1.5 Text2Img Performance
|
41 |
+
![Alt text](images/sd_txt2img.png)
|
42 |
+
|
43 |
+
### SD1.5 ControlNet-Text2Img Performance
|
44 |
+
![Alt text](images/sd_controlnet_txt2img.png)
|
45 |
|
46 |
+
### SDXL Text2Img Performance
|
47 |
+
![Alt text](images/sd_txt2img.png)
|
48 |
+
|
49 |
+
### SDXL ControlNet-Text2Img Performance
|
50 |
+
![Alt text](images/sdxl_controlnet_txt2img.png)
|
51 |
+
|
52 |
+
### SD Model Load Performance
|
53 |
+
![Alt text](images/model_load_performance.png)
|
54 |
|
55 |
## Model Sources
|
56 |
|
57 |
+
SD1.5
|
58 |
- **Checkpoint:** https://civitai.com/models/7371/rev-animated
|
59 |
- **ControlNet:** https://huggingface.co/lllyasviel/sd-controlnet-canny
|
60 |
- **Lora:** https://civitai.com/models/18323?modelVersionId=46846
|
61 |
|
62 |
+
SDXL
|
63 |
+
- **Checkpoint:** https://civitai.com/models/43977?modelVersionId=227916
|
64 |
+
- **ControlNet:** https://huggingface.co/diffusers/controlnet-canny-sdxl-1.0
|
65 |
+
- **Lora:** https://civitai.com/models/18323?modelVersionId=46846
|
66 |
+
|
67 |
+
## SD1.5 Text2Img Uses
|
68 |
|
69 |
```python
|
70 |
import torch
|
|
|
72 |
|
73 |
from lyrasd_model import LyraSdTxt2ImgPipeline
|
74 |
|
75 |
+
# 存放模型文件的路径,应该包含一下结构(和diffusers一致):
|
76 |
# 1. clip 模型
|
77 |
# 2. 转换好的优化后的 unet 模型,放入其中的 unet_bins 文件夹
|
78 |
# 3. vae 模型
|
|
|
87 |
model = LyraSdTxt2ImgPipeline(model_path, lib_path)
|
88 |
|
89 |
# load lora
|
90 |
+
# lora model path, name,lora strength
|
91 |
+
model.load_lora_v2(lora_path, "xiaorenshu", 0.4)
|
92 |
|
93 |
# 准备应用的输入和超参数
|
94 |
prompt = "a cat, cute, cartoon, concise, traditional, chinese painting, Tang and Song Dynasties, masterpiece, 4k, 8k, UHD, best quality"
|
|
|
109 |
for i, image in enumerate(images):
|
110 |
image.save(f"outputs/res_txt2img_lora_{i}.png")
|
111 |
|
112 |
+
# unload lora, lora’s name, clear lora cache
|
113 |
+
model.unload_lora_v2("xiaorenshu", True)
|
114 |
+
```
|
115 |
+
|
116 |
+
## SDXL Text2Img Uses
|
117 |
+
|
118 |
+
```python
|
119 |
+
import torch
|
120 |
+
import time
|
121 |
+
|
122 |
+
from lyrasd_model import LyraSdXLTxt2ImgPipeline
|
123 |
+
|
124 |
+
# 存放模型文件的路径,应该包含一下结构:
|
125 |
+
# 1. clip 模型
|
126 |
+
# 2. 转换好的优化后的 unet 模型,放入其中的 unet_bins 文件夹
|
127 |
+
# 3. vae 模型
|
128 |
+
# 4. scheduler 配置
|
129 |
+
|
130 |
+
# LyraSD 的 C++ 编译动态链接库,其中包含 C++ CUDA 计算的细节
|
131 |
+
lib_path = "./lyrasd_model/lyrasd_lib/libth_lyrasd_cu11_sm80.so"
|
132 |
+
model_path = "./models/lyrasd_helloworldSDXL20Fp16"
|
133 |
+
lora_path = "./models/lyrasd_xiaorenshu_lora"
|
134 |
+
|
135 |
+
# 构建 Txt2Img 的 Pipeline
|
136 |
+
model = LyraSdXLTxt2ImgPipeline(model_path, lib_path)
|
137 |
+
|
138 |
+
# load lora
|
139 |
+
# lora model path, name,lora strength
|
140 |
+
model.load_lora_v2(lora_path, "xiaorenshu", 0.4)
|
141 |
+
|
142 |
+
# 准备应用的输入和超参数
|
143 |
+
prompt = "a cat, cute, cartoon, concise, traditional, chinese painting, Tang and Song Dynasties, masterpiece, 4k, 8k, UHD, best quality"
|
144 |
+
negative_prompt = "(((horrible))), (((scary))), (((naked))), (((large breasts))), high saturation, colorful, human:2, body:2, low quality, bad quality, lowres, out of frame, duplicate, watermark, signature, text, frames, cut, cropped, malformed limbs, extra limbs, (((missing arms))), (((missing legs)))"
|
145 |
+
height, width = 512, 512
|
146 |
+
steps = 30
|
147 |
+
guidance_scale = 7
|
148 |
+
generator = torch.Generator().manual_seed(123)
|
149 |
+
num_images = 1
|
150 |
|
151 |
+
start = time.perf_counter()
|
152 |
+
# 推理生成
|
153 |
+
images = model( prompt,
|
154 |
+
height=height,
|
155 |
+
width=width,
|
156 |
+
num_inference_steps=steps,
|
157 |
+
num_images_per_prompt=1,
|
158 |
+
guidance_scale=guidance_scale,
|
159 |
+
negative_prompt=negative_prompt,
|
160 |
+
generator=generator
|
161 |
+
)
|
162 |
+
print("image gen cost: ",time.perf_counter() - start)
|
163 |
+
# 存储生成的图片
|
164 |
+
for i, image in enumerate(images):
|
165 |
+
image.save(f"outputs/res_txt2img_xl_lora_{i}.png")
|
166 |
|
167 |
+
# unload lora,参数为 lora 的名字,是否清除 lora 缓存
|
168 |
+
model.unload_lora_v2("xiaorenshu", True)
|
169 |
```
|
170 |
+
|
171 |
## Demo output
|
172 |
|
173 |
### Text2Img
|
174 |
+
#### SD1.5 Text2Img
|
175 |
![text2img_demo](./outputs/res_txt2img_0.png)
|
176 |
|
177 |
+
#### SD1.5 Text2Img with Lora
|
178 |
![text2img_demo](./outputs/res_txt2img_lora_0.png)
|
179 |
|
180 |
+
#### SDXL Text2Img
|
181 |
+
![text2img_demo](./outputs/res_sdxl_txt2img_0.png)
|
182 |
+
|
183 |
+
#### SDXL Text2Img with Lora
|
184 |
+
![text2img_demo](./outputs/res_sdxl_txt2img_lora_0.png)
|
185 |
+
|
186 |
+
|
187 |
+
<!-- ### Img2Img
|
188 |
|
189 |
#### Img2Img input
|
190 |
<img src="https://chuangxin-research-1258344705.cos.ap-guangzhou.myqcloud.com/share/files/seaside_town.png?q-sign-algorithm=sha1&q-ak=AKIDBF6i7GCtKWS8ZkgOtACzX3MQDl37xYty&q-sign-time=1692601590;1865401590&q-key-time=1692601590;1865401590&q-header-list=&q-url-param-list=&q-signature=ca04ca92d990d94813029c0d9ef29537e5f4637c" alt="img2img input" width="512"/>
|
191 |
|
192 |
#### Img2Img output
|
193 |
+
![text2img_demo](./outputs/res_img2img_0.png) -->
|
194 |
|
195 |
### ControlNet Text2Img
|
196 |
|
197 |
#### Control Image
|
198 |
![text2img_demo](./control_bird_canny.png)
|
199 |
|
200 |
+
#### SD1.5 ControlNet Text2Img Output
|
201 |
![text2img_demo](./outputs/res_controlnet_txt2img_0.png)
|
202 |
|
203 |
+
#### SDXL ControlNet Text2Img Output
|
204 |
+
![text2img_demo](./outputs/res_controlnet_sdxl_txt2img.png)
|
205 |
+
|
206 |
+
|
207 |
## Docker Environment Recommendation
|
208 |
|
209 |
- For Cuda 11.X: we recommend ```nvcr.io/nvidia/pytorch:22.12-py3```
|
|
|
223 |
author = {Kangjian Wu, Zhengtao Wang, Yibo Lu, Haoxiong Su, Bin Wu},
|
224 |
title = {lyraSD: Accelerating Stable Diffusion with best flexibility},
|
225 |
howpublished = {\url{https://huggingface.co/TMElyralab/lyraSD}},
|
226 |
+
year = {2024}
|
227 |
}
|
228 |
```
|
229 |
|
control_bird_canny.png
CHANGED
Git LFS Details
|
images/model_load_performance.png
ADDED
Git LFS Details
|
images/sd_controlnet_txt2img.png
ADDED
Git LFS Details
|
images/sd_txt2img.png
ADDED
Git LFS Details
|
images/sdxl_controlnet_txt2img.png
ADDED
Git LFS Details
|
images/sdxl_txt2img.png
ADDED
Git LFS Details
|
outputs/res_controlnet_img2img_0.png
CHANGED
Git LFS Details
|
outputs/res_controlnet_sdxl_txt2img.png
ADDED
Git LFS Details
|
outputs/res_controlnet_txt2img_0.png
CHANGED
Git LFS Details
|
outputs/res_img2img_0.png
CHANGED
Git LFS Details
|
outputs/res_sdxl_txt2img_0.png
ADDED
Git LFS Details
|
outputs/res_sdxl_txt2img_lora_0.png
ADDED
Git LFS Details
|
outputs/res_txt2img_0.png
CHANGED
Git LFS Details
|
outputs/res_txt2img_lora_0.png
CHANGED
Git LFS Details
|