Spaces:
Running
on
Zero
Running
on
Zero
adamelliotfields
commited on
Commit
•
61ad3d2
1
Parent(s):
c5cf566
Add IP-Adapter
Browse files- README.md +6 -5
- app.css +4 -0
- app.py +74 -31
- cli.py +4 -0
- lib/config.py +4 -5
- lib/inference.py +15 -2
- lib/loader.py +49 -11
- usage.md +15 -15
README.md
CHANGED
@@ -16,6 +16,7 @@ license: apache-2.0
|
|
16 |
models:
|
17 |
- ai-forever/Real-ESRGAN
|
18 |
- fluently/Fluently-v4
|
|
|
19 |
- Linaqruf/anything-v3-1
|
20 |
- Lykon/dreamshaper-8
|
21 |
- prompthero/openjourney-v4
|
@@ -28,6 +29,9 @@ preload_from_hub:
|
|
28 |
- >-
|
29 |
fluently/Fluently-v4
|
30 |
text_encoder/model.fp16.safetensors,unet/diffusion_pytorch_model.fp16.safetensors,vae/diffusion_pytorch_model.fp16.safetensors
|
|
|
|
|
|
|
31 |
- >-
|
32 |
Linaqruf/anything-v3-1
|
33 |
text_encoder/model.safetensors,unet/diffusion_pytorch_model.safetensors,vae/diffusion_pytorch_model.safetensors
|
@@ -48,9 +52,10 @@ preload_from_hub:
|
|
48 |
# diffusion
|
49 |
|
50 |
Gradio app for Stable Diffusion 1.5 including:
|
51 |
-
* txt2img and img2img pipelines
|
52 |
* Curated models and TI embeddings
|
53 |
* 100+ styles from sdxl_prompt_styler
|
|
|
54 |
* Compel prompt weighting
|
55 |
* Multiple samplers with Karras scheduling
|
56 |
* DeepCache, FreeU, and Clip Skip available
|
@@ -80,7 +85,3 @@ python app.py --port 7860
|
|
80 |
# cli
|
81 |
python cli.py 'an astronaut riding a horse on mars'
|
82 |
```
|
83 |
-
|
84 |
-
## TODO
|
85 |
-
|
86 |
-
- [ ] IP-Adapter and T2I-Adapter
|
|
|
16 |
models:
|
17 |
- ai-forever/Real-ESRGAN
|
18 |
- fluently/Fluently-v4
|
19 |
+
- h94/IP-Adapter
|
20 |
- Linaqruf/anything-v3-1
|
21 |
- Lykon/dreamshaper-8
|
22 |
- prompthero/openjourney-v4
|
|
|
29 |
- >-
|
30 |
fluently/Fluently-v4
|
31 |
text_encoder/model.fp16.safetensors,unet/diffusion_pytorch_model.fp16.safetensors,vae/diffusion_pytorch_model.fp16.safetensors
|
32 |
+
- >-
|
33 |
+
h94/IP-Adapter
|
34 |
+
models/ip-adapter-full-face_sd15.safetensors,models/ip-adapter-plus_sd15.safetensors,models/image_encoder/model.safetensors
|
35 |
- >-
|
36 |
Linaqruf/anything-v3-1
|
37 |
text_encoder/model.safetensors,unet/diffusion_pytorch_model.safetensors,vae/diffusion_pytorch_model.safetensors
|
|
|
52 |
# diffusion
|
53 |
|
54 |
Gradio app for Stable Diffusion 1.5 including:
|
55 |
+
* txt2img and img2img pipelines with IP-Adapter
|
56 |
* Curated models and TI embeddings
|
57 |
* 100+ styles from sdxl_prompt_styler
|
58 |
+
* 150+ prompts from StableStudio
|
59 |
* Compel prompt weighting
|
60 |
* Multiple samplers with Karras scheduling
|
61 |
* DeepCache, FreeU, and Clip Skip available
|
|
|
85 |
# cli
|
86 |
python cli.py 'an astronaut riding a horse on mars'
|
87 |
```
|
|
|
|
|
|
|
|
app.css
CHANGED
@@ -47,6 +47,10 @@
|
|
47 |
max-width: 42px;
|
48 |
}
|
49 |
|
|
|
|
|
|
|
|
|
50 |
.popover {
|
51 |
position: relative;
|
52 |
}
|
|
|
47 |
max-width: 42px;
|
48 |
}
|
49 |
|
50 |
+
.image-container {
|
51 |
+
max-height: 438px;
|
52 |
+
}
|
53 |
+
|
54 |
.popover {
|
55 |
position: relative;
|
56 |
}
|
app.py
CHANGED
@@ -44,27 +44,32 @@ def random_fn():
|
|
44 |
return gr.Textbox(value=random.choice(prompts))
|
45 |
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
if image is not None:
|
50 |
return gr.Dropdown(
|
51 |
choices=[("🔒", -2)],
|
52 |
interactive=False,
|
53 |
value=-2,
|
54 |
)
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
)
|
61 |
|
62 |
|
63 |
def image_prompt_fn(images):
|
64 |
-
return
|
65 |
|
66 |
|
67 |
-
# can't use image input in JS
|
68 |
def image_select_fn(images, image, i):
|
69 |
# -2 is the lock icon, -1 is None
|
70 |
if i == -2:
|
@@ -278,29 +283,53 @@ with gr.Blocks(
|
|
278 |
with gr.TabItem("🖼️ Image"):
|
279 |
with gr.Row():
|
280 |
image_prompt = gr.Image(
|
|
|
281 |
show_label=False,
|
282 |
min_width=320,
|
283 |
format="png",
|
284 |
type="pil",
|
285 |
-
scale=0,
|
286 |
)
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
filterable=False,
|
294 |
-
value=-1,
|
295 |
-
)
|
296 |
-
denoising_strength = gr.Slider(
|
297 |
-
value=Config.DENOISING_STRENGTH,
|
298 |
-
label="Denoising Strength",
|
299 |
-
minimum=0.0,
|
300 |
-
maximum=1.0,
|
301 |
-
step=0.1,
|
302 |
)
|
303 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
with gr.TabItem("ℹ️ Usage"):
|
305 |
gr.Markdown(read_file("usage.md"), elem_classes=["markdown"])
|
306 |
|
@@ -358,9 +387,9 @@ with gr.Blocks(
|
|
358 |
seed.change(None, inputs=[seed], outputs=[], js=seed_js)
|
359 |
|
360 |
file_format.change(
|
361 |
-
lambda f: (gr.Gallery(format=f), gr.Image(format=f)),
|
362 |
inputs=[file_format],
|
363 |
-
outputs=[output_images, image_prompt],
|
364 |
show_api=False,
|
365 |
)
|
366 |
|
@@ -372,11 +401,11 @@ with gr.Blocks(
|
|
372 |
js=aspect_ratio_js,
|
373 |
)
|
374 |
|
375 |
-
# lock the input
|
376 |
output_images.change(
|
377 |
gallery_fn,
|
378 |
-
inputs=[output_images, image_prompt],
|
379 |
-
outputs=[image_select],
|
380 |
show_api=False,
|
381 |
)
|
382 |
|
@@ -387,6 +416,12 @@ with gr.Blocks(
|
|
387 |
outputs=[image_prompt],
|
388 |
show_api=False,
|
389 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
390 |
|
391 |
# reset the dropdown on clear
|
392 |
image_prompt.clear(
|
@@ -395,6 +430,12 @@ with gr.Blocks(
|
|
395 |
outputs=[image_select],
|
396 |
show_api=False,
|
397 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
|
399 |
# show "Custom" aspect ratio when manually changing width or height
|
400 |
gr.on(
|
@@ -415,6 +456,8 @@ with gr.Blocks(
|
|
415 |
prompt,
|
416 |
negative_prompt,
|
417 |
image_prompt,
|
|
|
|
|
418 |
embeddings,
|
419 |
style,
|
420 |
seed,
|
|
|
44 |
return gr.Textbox(value=random.choice(prompts))
|
45 |
|
46 |
|
47 |
+
def create_image_dropdown(images, locked=False):
|
48 |
+
if locked:
|
|
|
49 |
return gr.Dropdown(
|
50 |
choices=[("🔒", -2)],
|
51 |
interactive=False,
|
52 |
value=-2,
|
53 |
)
|
54 |
+
else:
|
55 |
+
return gr.Dropdown(
|
56 |
+
choices=[("None", -1)] + [(str(i + 1), i) for i, _ in enumerate(images or [])],
|
57 |
+
interactive=True,
|
58 |
+
value=-1,
|
59 |
+
)
|
60 |
+
|
61 |
+
|
62 |
+
def gallery_fn(images, image, ip_image):
|
63 |
+
return (
|
64 |
+
create_image_dropdown(images, locked=image is not None),
|
65 |
+
create_image_dropdown(images, locked=ip_image is not None),
|
66 |
)
|
67 |
|
68 |
|
69 |
def image_prompt_fn(images):
|
70 |
+
return create_image_dropdown(images)
|
71 |
|
72 |
|
|
|
73 |
def image_select_fn(images, image, i):
|
74 |
# -2 is the lock icon, -1 is None
|
75 |
if i == -2:
|
|
|
283 |
with gr.TabItem("🖼️ Image"):
|
284 |
with gr.Row():
|
285 |
image_prompt = gr.Image(
|
286 |
+
show_share_button=False,
|
287 |
show_label=False,
|
288 |
min_width=320,
|
289 |
format="png",
|
290 |
type="pil",
|
|
|
291 |
)
|
292 |
+
ip_image = gr.Image(
|
293 |
+
show_share_button=False,
|
294 |
+
label="IP-Adapter",
|
295 |
+
min_width=320,
|
296 |
+
format="png",
|
297 |
+
type="pil",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
298 |
)
|
299 |
|
300 |
+
with gr.Group():
|
301 |
+
with gr.Row():
|
302 |
+
image_select = gr.Dropdown(
|
303 |
+
choices=[("None", -1)],
|
304 |
+
label="Gallery Image",
|
305 |
+
interactive=True,
|
306 |
+
filterable=False,
|
307 |
+
value=-1,
|
308 |
+
)
|
309 |
+
ip_image_select = gr.Dropdown(
|
310 |
+
choices=[("None", -1)],
|
311 |
+
label="Gallery Image (IP-Adapter)",
|
312 |
+
interactive=True,
|
313 |
+
filterable=False,
|
314 |
+
value=-1,
|
315 |
+
)
|
316 |
+
|
317 |
+
with gr.Row():
|
318 |
+
denoising_strength = gr.Slider(
|
319 |
+
value=Config.DENOISING_STRENGTH,
|
320 |
+
label="Denoising Strength",
|
321 |
+
minimum=0.0,
|
322 |
+
maximum=1.0,
|
323 |
+
step=0.1,
|
324 |
+
)
|
325 |
+
|
326 |
+
with gr.Row():
|
327 |
+
ip_face = gr.Checkbox(
|
328 |
+
elem_classes=["checkbox"],
|
329 |
+
label="IP-Adapter Face",
|
330 |
+
value=False,
|
331 |
+
)
|
332 |
+
|
333 |
with gr.TabItem("ℹ️ Usage"):
|
334 |
gr.Markdown(read_file("usage.md"), elem_classes=["markdown"])
|
335 |
|
|
|
387 |
seed.change(None, inputs=[seed], outputs=[], js=seed_js)
|
388 |
|
389 |
file_format.change(
|
390 |
+
lambda f: (gr.Gallery(format=f), gr.Image(format=f), gr.Image(format=f)),
|
391 |
inputs=[file_format],
|
392 |
+
outputs=[output_images, image_prompt, ip_image],
|
393 |
show_api=False,
|
394 |
)
|
395 |
|
|
|
401 |
js=aspect_ratio_js,
|
402 |
)
|
403 |
|
404 |
+
# lock the input images so you don't lose them when the gallery updates
|
405 |
output_images.change(
|
406 |
gallery_fn,
|
407 |
+
inputs=[output_images, image_prompt, ip_image],
|
408 |
+
outputs=[image_select, ip_image_select],
|
409 |
show_api=False,
|
410 |
)
|
411 |
|
|
|
416 |
outputs=[image_prompt],
|
417 |
show_api=False,
|
418 |
)
|
419 |
+
ip_image_select.change(
|
420 |
+
image_select_fn,
|
421 |
+
inputs=[output_images, ip_image, ip_image_select],
|
422 |
+
outputs=[ip_image],
|
423 |
+
show_api=False,
|
424 |
+
)
|
425 |
|
426 |
# reset the dropdown on clear
|
427 |
image_prompt.clear(
|
|
|
430 |
outputs=[image_select],
|
431 |
show_api=False,
|
432 |
)
|
433 |
+
ip_image.clear(
|
434 |
+
image_prompt_fn,
|
435 |
+
inputs=[output_images],
|
436 |
+
outputs=[ip_image_select],
|
437 |
+
show_api=False,
|
438 |
+
)
|
439 |
|
440 |
# show "Custom" aspect ratio when manually changing width or height
|
441 |
gr.on(
|
|
|
456 |
prompt,
|
457 |
negative_prompt,
|
458 |
image_prompt,
|
459 |
+
ip_image,
|
460 |
+
ip_face,
|
461 |
embeddings,
|
462 |
style,
|
463 |
seed,
|
cli.py
CHANGED
@@ -31,6 +31,8 @@ def main():
|
|
31 |
parser.add_argument("--steps", type=int, metavar="INT", default=Config.INFERENCE_STEPS)
|
32 |
parser.add_argument("--strength", type=float, metavar="FLOAT", default=Config.DENOISING_STRENGTH)
|
33 |
parser.add_argument("--image", type=str, metavar="STR")
|
|
|
|
|
34 |
parser.add_argument("--taesd", action="store_true")
|
35 |
parser.add_argument("--clip-skip", action="store_true")
|
36 |
parser.add_argument("--truncate", action="store_true")
|
@@ -44,6 +46,8 @@ def main():
|
|
44 |
args.prompt,
|
45 |
args.negative,
|
46 |
args.image,
|
|
|
|
|
47 |
args.embedding,
|
48 |
args.style,
|
49 |
args.seed,
|
|
|
31 |
parser.add_argument("--steps", type=int, metavar="INT", default=Config.INFERENCE_STEPS)
|
32 |
parser.add_argument("--strength", type=float, metavar="FLOAT", default=Config.DENOISING_STRENGTH)
|
33 |
parser.add_argument("--image", type=str, metavar="STR")
|
34 |
+
parser.add_argument("--ip-image", type=str, metavar="STR")
|
35 |
+
parser.add_argument("--ip-face", action="store_true")
|
36 |
parser.add_argument("--taesd", action="store_true")
|
37 |
parser.add_argument("--clip-skip", action="store_true")
|
38 |
parser.add_argument("--truncate", action="store_true")
|
|
|
46 |
args.prompt,
|
47 |
args.negative,
|
48 |
args.image,
|
49 |
+
args.ip_image,
|
50 |
+
args.ip_face,
|
51 |
args.embedding,
|
52 |
args.style,
|
53 |
args.seed,
|
lib/config.py
CHANGED
@@ -20,12 +20,11 @@ Config = SimpleNamespace(
|
|
20 |
],
|
21 |
SCHEDULER="DEIS 2M",
|
22 |
SCHEDULERS=[
|
|
|
23 |
"DEIS 2M",
|
24 |
"DPM++ 2M",
|
25 |
-
"
|
26 |
"Euler a",
|
27 |
-
"Heun",
|
28 |
-
"LMS",
|
29 |
"PNDM",
|
30 |
],
|
31 |
EMBEDDING="fast_negative",
|
@@ -39,8 +38,8 @@ Config = SimpleNamespace(
|
|
39 |
HEIGHT=576,
|
40 |
NUM_IMAGES=1,
|
41 |
SEED=-1,
|
42 |
-
GUIDANCE_SCALE=
|
43 |
-
INFERENCE_STEPS=
|
44 |
DENOISING_STRENGTH=0.6,
|
45 |
DEEPCACHE_INTERVAL=2,
|
46 |
SCALE=1,
|
|
|
20 |
],
|
21 |
SCHEDULER="DEIS 2M",
|
22 |
SCHEDULERS=[
|
23 |
+
"DDIM",
|
24 |
"DEIS 2M",
|
25 |
"DPM++ 2M",
|
26 |
+
"Euler",
|
27 |
"Euler a",
|
|
|
|
|
28 |
"PNDM",
|
29 |
],
|
30 |
EMBEDDING="fast_negative",
|
|
|
38 |
HEIGHT=576,
|
39 |
NUM_IMAGES=1,
|
40 |
SEED=-1,
|
41 |
+
GUIDANCE_SCALE=6,
|
42 |
+
INFERENCE_STEPS=35,
|
43 |
DENOISING_STRENGTH=0.6,
|
44 |
DEEPCACHE_INTERVAL=2,
|
45 |
SCALE=1,
|
lib/inference.py
CHANGED
@@ -75,6 +75,8 @@ def generate(
|
|
75 |
positive_prompt,
|
76 |
negative_prompt="",
|
77 |
image_prompt=None,
|
|
|
|
|
78 |
embeddings=[],
|
79 |
style=None,
|
80 |
seed=None,
|
@@ -120,11 +122,17 @@ def generate(
|
|
120 |
|
121 |
KIND = "img2img" if image_prompt is not None else "txt2img"
|
122 |
|
|
|
|
|
|
|
|
|
|
|
123 |
with torch.inference_mode():
|
124 |
start = time.perf_counter()
|
125 |
loader = Loader()
|
126 |
pipe, upscaler = loader.load(
|
127 |
KIND,
|
|
|
128 |
model,
|
129 |
scheduler,
|
130 |
karras,
|
@@ -146,10 +154,12 @@ def generate(
|
|
146 |
token=f"<{embedding}>",
|
147 |
)
|
148 |
negative_prompt = (
|
149 |
-
f"{negative_prompt}, {embedding}"
|
|
|
|
|
150 |
)
|
151 |
except (EnvironmentError, HFValidationError, RepositoryNotFoundError):
|
152 |
-
raise Error(f"Invalid embedding: {embedding}")
|
153 |
|
154 |
# prompt embeds
|
155 |
compel = Compel(
|
@@ -202,6 +212,9 @@ def generate(
|
|
202 |
kwargs["strength"] = denoising_strength
|
203 |
kwargs["image"] = prepare_image(image_prompt, (width, height))
|
204 |
|
|
|
|
|
|
|
205 |
try:
|
206 |
image = pipe(**kwargs).images[0]
|
207 |
if scale > 1:
|
|
|
75 |
positive_prompt,
|
76 |
negative_prompt="",
|
77 |
image_prompt=None,
|
78 |
+
ip_image=None,
|
79 |
+
ip_face=False,
|
80 |
embeddings=[],
|
81 |
style=None,
|
82 |
seed=None,
|
|
|
122 |
|
123 |
KIND = "img2img" if image_prompt is not None else "txt2img"
|
124 |
|
125 |
+
IP_ADAPTER = None
|
126 |
+
|
127 |
+
if ip_image:
|
128 |
+
IP_ADAPTER = "full-face" if ip_face else "plus"
|
129 |
+
|
130 |
with torch.inference_mode():
|
131 |
start = time.perf_counter()
|
132 |
loader = Loader()
|
133 |
pipe, upscaler = loader.load(
|
134 |
KIND,
|
135 |
+
IP_ADAPTER,
|
136 |
model,
|
137 |
scheduler,
|
138 |
karras,
|
|
|
154 |
token=f"<{embedding}>",
|
155 |
)
|
156 |
negative_prompt = (
|
157 |
+
f"{negative_prompt}, (<{embedding}>)1.1"
|
158 |
+
if negative_prompt
|
159 |
+
else f"(<{embedding}>)1.1"
|
160 |
)
|
161 |
except (EnvironmentError, HFValidationError, RepositoryNotFoundError):
|
162 |
+
raise Error(f"Invalid embedding: <{embedding}>")
|
163 |
|
164 |
# prompt embeds
|
165 |
compel = Compel(
|
|
|
212 |
kwargs["strength"] = denoising_strength
|
213 |
kwargs["image"] = prepare_image(image_prompt, (width, height))
|
214 |
|
215 |
+
if IP_ADAPTER:
|
216 |
+
kwargs["ip_adapter_image"] = prepare_image(ip_image, (width, height))
|
217 |
+
|
218 |
try:
|
219 |
image = pipe(**kwargs).images[0]
|
220 |
if scale > 1:
|
lib/loader.py
CHANGED
@@ -1,17 +1,17 @@
|
|
1 |
import torch
|
2 |
from DeepCache import DeepCacheSDHelper
|
3 |
from diffusers import (
|
|
|
4 |
DEISMultistepScheduler,
|
5 |
DPMSolverMultistepScheduler,
|
6 |
EulerAncestralDiscreteScheduler,
|
7 |
-
|
8 |
-
KDPM2AncestralDiscreteScheduler,
|
9 |
-
LMSDiscreteScheduler,
|
10 |
PNDMScheduler,
|
11 |
StableDiffusionImg2ImgPipeline,
|
12 |
StableDiffusionPipeline,
|
13 |
)
|
14 |
from diffusers.models import AutoencoderKL, AutoencoderTiny
|
|
|
15 |
from torch._dynamo import OptimizedModule
|
16 |
|
17 |
from .upscaler import RealESRGAN
|
@@ -29,6 +29,7 @@ class Loader:
|
|
29 |
cls._instance = super(Loader, cls).__new__(cls)
|
30 |
cls._instance.pipe = None
|
31 |
cls._instance.upscaler = None
|
|
|
32 |
return cls._instance
|
33 |
|
34 |
def _load_upscaler(self, device=None, scale=4):
|
@@ -61,7 +62,38 @@ class Loader:
|
|
61 |
# https://github.com/ChenyangSi/FreeU
|
62 |
self.pipe.enable_freeu(b1=1.5, b2=1.6, s1=0.9, s2=0.2)
|
63 |
|
64 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
vae_type = type(self.pipe.vae)
|
66 |
is_kl = issubclass(vae_type, (AutoencoderKL, OptimizedModule))
|
67 |
is_tiny = issubclass(vae_type, AutoencoderTiny)
|
@@ -97,10 +129,12 @@ class Loader:
|
|
97 |
self.pipe = pipelines[kind].from_pretrained(model, **kwargs).to(device, dtype)
|
98 |
if not isinstance(self.pipe, pipelines[kind]):
|
99 |
self.pipe = pipelines[kind].from_pipe(self.pipe).to(device, dtype)
|
|
|
100 |
|
101 |
def load(
|
102 |
self,
|
103 |
kind,
|
|
|
104 |
model,
|
105 |
scheduler,
|
106 |
karras,
|
@@ -114,26 +148,29 @@ class Loader:
|
|
114 |
model_lower = model.lower()
|
115 |
|
116 |
schedulers = {
|
|
|
117 |
"DEIS 2M": DEISMultistepScheduler,
|
118 |
"DPM++ 2M": DPMSolverMultistepScheduler,
|
119 |
-
"
|
120 |
"Euler a": EulerAncestralDiscreteScheduler,
|
121 |
-
"Heun": HeunDiscreteScheduler,
|
122 |
-
"LMS": LMSDiscreteScheduler,
|
123 |
"PNDM": PNDMScheduler,
|
124 |
}
|
125 |
|
126 |
scheduler_kwargs = {
|
127 |
"beta_schedule": "scaled_linear",
|
128 |
"timestep_spacing": "leading",
|
129 |
-
"use_karras_sigmas": karras,
|
130 |
"beta_start": 0.00085,
|
131 |
"beta_end": 0.012,
|
132 |
"steps_offset": 1,
|
133 |
}
|
134 |
|
135 |
-
if scheduler in ["Euler a", "PNDM"]:
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
# no fp16 variant
|
139 |
if model_lower not in [
|
@@ -175,7 +212,8 @@ class Loader:
|
|
175 |
self.pipe = None
|
176 |
self._load_pipeline(kind, model_lower, device, dtype, **pipe_kwargs)
|
177 |
|
178 |
-
self.
|
|
|
179 |
self._load_freeu(freeu)
|
180 |
self._load_deepcache(deepcache)
|
181 |
self._load_upscaler(device, scale)
|
|
|
1 |
import torch
|
2 |
from DeepCache import DeepCacheSDHelper
|
3 |
from diffusers import (
|
4 |
+
DDIMScheduler,
|
5 |
DEISMultistepScheduler,
|
6 |
DPMSolverMultistepScheduler,
|
7 |
EulerAncestralDiscreteScheduler,
|
8 |
+
EulerDiscreteScheduler,
|
|
|
|
|
9 |
PNDMScheduler,
|
10 |
StableDiffusionImg2ImgPipeline,
|
11 |
StableDiffusionPipeline,
|
12 |
)
|
13 |
from diffusers.models import AutoencoderKL, AutoencoderTiny
|
14 |
+
from diffusers.models.attention_processor import AttnProcessor2_0, IPAdapterAttnProcessor2_0
|
15 |
from torch._dynamo import OptimizedModule
|
16 |
|
17 |
from .upscaler import RealESRGAN
|
|
|
29 |
cls._instance = super(Loader, cls).__new__(cls)
|
30 |
cls._instance.pipe = None
|
31 |
cls._instance.upscaler = None
|
32 |
+
cls._instance.ip_adapter = None
|
33 |
return cls._instance
|
34 |
|
35 |
def _load_upscaler(self, device=None, scale=4):
|
|
|
62 |
# https://github.com/ChenyangSi/FreeU
|
63 |
self.pipe.enable_freeu(b1=1.5, b2=1.6, s1=0.9, s2=0.2)
|
64 |
|
65 |
+
def _load_ip_adapter(self, ip_adapter=None):
|
66 |
+
if self.ip_adapter is None and self.ip_adapter != ip_adapter:
|
67 |
+
self.pipe.load_ip_adapter(
|
68 |
+
"h94/IP-Adapter",
|
69 |
+
subfolder="models",
|
70 |
+
weight_name=f"ip-adapter-{ip_adapter}_sd15.safetensors",
|
71 |
+
)
|
72 |
+
self.pipe.set_ip_adapter_scale(0.6 if ip_adapter == "full-face" else 0.5)
|
73 |
+
self.ip_adapter = ip_adapter
|
74 |
+
|
75 |
+
if self.ip_adapter is not None and ip_adapter is None:
|
76 |
+
if not isinstance(self.pipe, StableDiffusionImg2ImgPipeline):
|
77 |
+
self.pipe.image_encoder = None
|
78 |
+
self.pipe.register_to_config(image_encoder=[None, None])
|
79 |
+
|
80 |
+
self.pipe.feature_extractor = None
|
81 |
+
self.pipe.unet.encoder_hid_proj = None
|
82 |
+
self.pipe.unet.config.encoder_hid_dim_type = None
|
83 |
+
self.pipe.register_to_config(feature_extractor=[None, None])
|
84 |
+
|
85 |
+
attn_procs = {}
|
86 |
+
for name, value in self.pipe.unet.attn_processors.items():
|
87 |
+
attn_processor_class = AttnProcessor2_0() # raises if not torch 2
|
88 |
+
attn_procs[name] = (
|
89 |
+
attn_processor_class
|
90 |
+
if isinstance(value, IPAdapterAttnProcessor2_0)
|
91 |
+
else value.__class__()
|
92 |
+
)
|
93 |
+
self.pipe.unet.set_attn_processor(attn_procs)
|
94 |
+
self.pipe.ip_adapter = None
|
95 |
+
|
96 |
+
def _load_vae(self, taesd=False, model_name=None, variant=None):
|
97 |
vae_type = type(self.pipe.vae)
|
98 |
is_kl = issubclass(vae_type, (AutoencoderKL, OptimizedModule))
|
99 |
is_tiny = issubclass(vae_type, AutoencoderTiny)
|
|
|
129 |
self.pipe = pipelines[kind].from_pretrained(model, **kwargs).to(device, dtype)
|
130 |
if not isinstance(self.pipe, pipelines[kind]):
|
131 |
self.pipe = pipelines[kind].from_pipe(self.pipe).to(device, dtype)
|
132 |
+
self.ip_adapter = None
|
133 |
|
134 |
def load(
|
135 |
self,
|
136 |
kind,
|
137 |
+
ip_adapter,
|
138 |
model,
|
139 |
scheduler,
|
140 |
karras,
|
|
|
148 |
model_lower = model.lower()
|
149 |
|
150 |
schedulers = {
|
151 |
+
"DDIM": DDIMScheduler,
|
152 |
"DEIS 2M": DEISMultistepScheduler,
|
153 |
"DPM++ 2M": DPMSolverMultistepScheduler,
|
154 |
+
"Euler": EulerDiscreteScheduler,
|
155 |
"Euler a": EulerAncestralDiscreteScheduler,
|
|
|
|
|
156 |
"PNDM": PNDMScheduler,
|
157 |
}
|
158 |
|
159 |
scheduler_kwargs = {
|
160 |
"beta_schedule": "scaled_linear",
|
161 |
"timestep_spacing": "leading",
|
|
|
162 |
"beta_start": 0.00085,
|
163 |
"beta_end": 0.012,
|
164 |
"steps_offset": 1,
|
165 |
}
|
166 |
|
167 |
+
if scheduler not in ["DDIM", "Euler a", "PNDM"]:
|
168 |
+
scheduler_kwargs["use_karras_sigmas"] = karras
|
169 |
+
|
170 |
+
# https://github.com/huggingface/diffusers/blob/8a3f0c1/scripts/convert_original_stable_diffusion_to_diffusers.py#L939
|
171 |
+
if scheduler == "DDIM":
|
172 |
+
scheduler_kwargs["clip_sample"] = False
|
173 |
+
scheduler_kwargs["set_alpha_to_one"] = False
|
174 |
|
175 |
# no fp16 variant
|
176 |
if model_lower not in [
|
|
|
212 |
self.pipe = None
|
213 |
self._load_pipeline(kind, model_lower, device, dtype, **pipe_kwargs)
|
214 |
|
215 |
+
self._load_ip_adapter(ip_adapter)
|
216 |
+
self._load_vae(taesd, model_lower, variant)
|
217 |
self._load_freeu(freeu)
|
218 |
self._load_deepcache(deepcache)
|
219 |
self._load_upscaler(device, scale)
|
usage.md
CHANGED
@@ -12,6 +12,8 @@ Positive and negative prompts are embedded by [Compel](https://github.com/damian
|
|
12 |
|
13 |
Note that `++` is `1.1^2` (and so on). See [syntax features](https://github.com/damian0815/compel/blob/main/doc/syntax.md) to learn more and read [Civitai](https://civitai.com)'s guide on [prompting](https://education.civitai.com/civitais-prompt-crafting-guide-part-1-basics/) for best practices.
|
14 |
|
|
|
|
|
15 |
#### Arrays
|
16 |
|
17 |
Arrays allow you to generate different images from a single prompt. For example, `[[cat,corgi]]` will expand into 2 separate prompts. Make sure `Images` is set accordingly (e.g., 2). Only works for the positive prompt. Inspired by [Fooocus](https://github.com/lllyasviel/Fooocus/pull/1503).
|
@@ -30,7 +32,7 @@ Styles are prompt templates from twri's [sdxl_prompt_styler](https://github.com/
|
|
30 |
|
31 |
### Scale
|
32 |
|
33 |
-
Rescale up to 4x using [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN).
|
34 |
|
35 |
### Models
|
36 |
|
@@ -45,27 +47,25 @@ Each model checkpoint has a different aesthetic:
|
|
45 |
|
46 |
### Schedulers
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
* [DEIS 2M](https://huggingface.co/docs/diffusers/en/api/schedulers/deis) (default)
|
51 |
-
* [DPM++ 2M](https://huggingface.co/docs/diffusers/en/api/schedulers/multistep_dpm_solver)
|
52 |
-
* [DPM2 a](https://huggingface.co/docs/diffusers/api/schedulers/dpm_discrete_ancestral)
|
53 |
-
* [Euler a](https://huggingface.co/docs/diffusers/en/api/schedulers/euler_ancestral)
|
54 |
-
* [Heun](https://huggingface.co/docs/diffusers/api/schedulers/heun)
|
55 |
-
* [LMS](https://huggingface.co/docs/diffusers/api/schedulers/lms_discrete)
|
56 |
-
* [PNDM](https://huggingface.co/docs/diffusers/api/schedulers/pndm)
|
57 |
|
58 |
### Image-to-Image
|
59 |
|
60 |
-
The `🖼️ Image` tab enables the image-to-image
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
|
63 |
|
64 |
### Advanced
|
65 |
|
66 |
#### DeepCache
|
67 |
|
68 |
-
[DeepCache](https://github.com/horseee/DeepCache) (Ma et al. 2023) caches lower
|
69 |
* `1`: no caching
|
70 |
* `2`: more quality (default)
|
71 |
* `3`: balanced
|
@@ -73,7 +73,7 @@ Denoising strength is essentially how much the generation will differ from the i
|
|
73 |
|
74 |
#### FreeU
|
75 |
|
76 |
-
[FreeU](https://github.com/ChenyangSi/FreeU) (Si et al. 2023) re-weights the contributions sourced from the
|
77 |
|
78 |
#### Clip Skip
|
79 |
|
@@ -81,7 +81,7 @@ When enabled, the last CLIP layer is skipped. This can sometimes improve image q
|
|
81 |
|
82 |
#### Tiny VAE
|
83 |
|
84 |
-
Enable [madebyollin/taesd](https://github.com/madebyollin/taesd) for
|
85 |
|
86 |
#### Prompt Truncation
|
87 |
|
|
|
12 |
|
13 |
Note that `++` is `1.1^2` (and so on). See [syntax features](https://github.com/damian0815/compel/blob/main/doc/syntax.md) to learn more and read [Civitai](https://civitai.com)'s guide on [prompting](https://education.civitai.com/civitais-prompt-crafting-guide-part-1-basics/) for best practices.
|
14 |
|
15 |
+
You can also press the `🎲` button to generate a random prompt.
|
16 |
+
|
17 |
#### Arrays
|
18 |
|
19 |
Arrays allow you to generate different images from a single prompt. For example, `[[cat,corgi]]` will expand into 2 separate prompts. Make sure `Images` is set accordingly (e.g., 2). Only works for the positive prompt. Inspired by [Fooocus](https://github.com/lllyasviel/Fooocus/pull/1503).
|
|
|
32 |
|
33 |
### Scale
|
34 |
|
35 |
+
Rescale up to 4x using [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN) (Wang et al. 2021).
|
36 |
|
37 |
### Models
|
38 |
|
|
|
47 |
|
48 |
### Schedulers
|
49 |
|
50 |
+
The default is [DEIS 2M](https://huggingface.co/docs/diffusers/en/api/schedulers/deis) with [Karras](https://arxiv.org/abs/2206.00364) enabled. The other multistep scheduler, [DPM++ 2M](https://huggingface.co/docs/diffusers/en/api/schedulers/multistep_dpm_solver), is also good. For realism, [DDIM](https://huggingface.co/docs/diffusers/en/api/schedulers/ddim) is recommended. [Euler a](https://huggingface.co/docs/diffusers/en/api/schedulers/euler_ancestral) is worth trying for a different look.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
### Image-to-Image
|
53 |
|
54 |
+
The `🖼️ Image` tab enables the image-to-image and IP-Adapter pipelines. Either use the image input or select a generation from the gallery. To disable, simply clear the image input (the `x` overlay button).
|
55 |
+
|
56 |
+
Denoising strength is essentially how much the generation will differ from the input image. A value of `0` will be identical to the original, while `1` will be a completely new image. You may want to also increase the number of inference steps. Only applies to the image-to-image input.
|
57 |
+
|
58 |
+
### IP-Adapter
|
59 |
+
|
60 |
+
In an image-to-image pipeline, the input image is used as the initial latent. With [IP-Adapter](https://github.com/tencent-ailab/IP-Adapter) (Ye et al. 2023), the input image is processed by a separate image encoder and the encoded features are used as conditioning along with the text prompt.
|
61 |
|
62 |
+
For capturing faces, enable `IP-Adapter Face` to use the full-face model. You should use an input image that is mostly a face along with the Realistic Vision model. The input image should also be the same aspect ratio as the output to avoid distortion.
|
63 |
|
64 |
### Advanced
|
65 |
|
66 |
#### DeepCache
|
67 |
|
68 |
+
[DeepCache](https://github.com/horseee/DeepCache) (Ma et al. 2023) caches lower UNet layers and reuses them every `Interval` steps:
|
69 |
* `1`: no caching
|
70 |
* `2`: more quality (default)
|
71 |
* `3`: balanced
|
|
|
73 |
|
74 |
#### FreeU
|
75 |
|
76 |
+
[FreeU](https://github.com/ChenyangSi/FreeU) (Si et al. 2023) re-weights the contributions sourced from the UNet’s skip connections and backbone feature maps to potentially improve image quality.
|
77 |
|
78 |
#### Clip Skip
|
79 |
|
|
|
81 |
|
82 |
#### Tiny VAE
|
83 |
|
84 |
+
Enable [madebyollin/taesd](https://github.com/madebyollin/taesd) for near-instant latent decoding with a minor loss in detail. Useful for development.
|
85 |
|
86 |
#### Prompt Truncation
|
87 |
|