Spaces:
Running
on
Zero
Running
on
Zero
adamelliotfields
commited on
Commit
•
60849d7
1
Parent(s):
459aacb
Image-to-image
Browse files- README.md +2 -1
- app.py +83 -2
- cli.py +4 -0
- lib/config.py +1 -0
- lib/inference.py +31 -32
- lib/loader.py +40 -48
- usage.md +8 -2
README.md
CHANGED
@@ -48,6 +48,7 @@ preload_from_hub:
|
|
48 |
# diffusion
|
49 |
|
50 |
Gradio app for Stable Diffusion 1.5 including:
|
|
|
51 |
* curated models and TI embeddings
|
52 |
* multiple samplers with Karras schedule
|
53 |
* Compel prompting
|
@@ -84,4 +85,4 @@ python cli.py 'an astronaut riding a horse on mars'
|
|
84 |
## TODO
|
85 |
|
86 |
- [ ] Metadata embed and display
|
87 |
-
- [ ]
|
|
|
48 |
# diffusion
|
49 |
|
50 |
Gradio app for Stable Diffusion 1.5 including:
|
51 |
+
* txt2img and img2img pipelines
|
52 |
* curated models and TI embeddings
|
53 |
* multiple samplers with Karras schedule
|
54 |
* Compel prompting
|
|
|
85 |
## TODO
|
86 |
|
87 |
- [ ] Metadata embed and display
|
88 |
+
- [ ] IP-Adapter and T2I-Adapter
|
app.py
CHANGED
@@ -44,6 +44,34 @@ def random_fn():
|
|
44 |
return gr.Textbox(value=random.choice(prompts))
|
45 |
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
def generate_fn(*args):
|
48 |
if len(args) > 0:
|
49 |
prompt = args[0]
|
@@ -251,6 +279,33 @@ with gr.Blocks(
|
|
251 |
value=False,
|
252 |
)
|
253 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
with gr.TabItem("ℹ️ Usage"):
|
255 |
gr.Markdown(read_file("usage.md"), elem_classes=["markdown"])
|
256 |
|
@@ -308,9 +363,9 @@ with gr.Blocks(
|
|
308 |
seed.change(None, inputs=[seed], outputs=[], js=seed_js)
|
309 |
|
310 |
file_format.change(
|
311 |
-
lambda f: gr.Gallery(format=f),
|
312 |
inputs=[file_format],
|
313 |
-
outputs=[output_images],
|
314 |
show_api=False,
|
315 |
)
|
316 |
|
@@ -322,6 +377,30 @@ with gr.Blocks(
|
|
322 |
js=aspect_ratio_js,
|
323 |
)
|
324 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
# show "Custom" aspect ratio when manually changing width or height
|
326 |
gr.on(
|
327 |
triggers=[width.input, height.input],
|
@@ -340,6 +419,7 @@ with gr.Blocks(
|
|
340 |
inputs=[
|
341 |
prompt,
|
342 |
negative_prompt,
|
|
|
343 |
embeddings,
|
344 |
style,
|
345 |
seed,
|
@@ -349,6 +429,7 @@ with gr.Blocks(
|
|
349 |
height,
|
350 |
guidance_scale,
|
351 |
inference_steps,
|
|
|
352 |
num_images,
|
353 |
use_karras,
|
354 |
use_taesd,
|
|
|
44 |
return gr.Textbox(value=random.choice(prompts))
|
45 |
|
46 |
|
47 |
+
# can't toggle interactive in JS
|
48 |
+
def gallery_fn(images, image):
|
49 |
+
if image is not None:
|
50 |
+
return gr.Dropdown(
|
51 |
+
choices=[("🔒", -1)],
|
52 |
+
interactive=False,
|
53 |
+
value=-1,
|
54 |
+
)
|
55 |
+
|
56 |
+
return gr.Dropdown(
|
57 |
+
choices=[("None", -1)]
|
58 |
+
+ [(str(i + 1), i) for i, _ in enumerate(images if images is not None else [])],
|
59 |
+
interactive=True,
|
60 |
+
value=-1,
|
61 |
+
)
|
62 |
+
|
63 |
+
|
64 |
+
def image_prompt_fn(images):
|
65 |
+
return gallery_fn(images, None)
|
66 |
+
|
67 |
+
|
68 |
+
# can't use image input in JS
|
69 |
+
def image_select_fn(images, image, i):
|
70 |
+
if image is not None and i == -1:
|
71 |
+
return gr.Image(value=image)
|
72 |
+
return gr.Image(value=images[i][0]) if i > -1 else None
|
73 |
+
|
74 |
+
|
75 |
def generate_fn(*args):
|
76 |
if len(args) > 0:
|
77 |
prompt = args[0]
|
|
|
279 |
value=False,
|
280 |
)
|
281 |
|
282 |
+
# img2img tab
|
283 |
+
with gr.TabItem("🖼️ Image"):
|
284 |
+
with gr.Row():
|
285 |
+
image_prompt = gr.Image(
|
286 |
+
show_label=False,
|
287 |
+
min_width=320,
|
288 |
+
format="png",
|
289 |
+
type="pil",
|
290 |
+
scale=0,
|
291 |
+
)
|
292 |
+
|
293 |
+
with gr.Row():
|
294 |
+
image_select = gr.Dropdown(
|
295 |
+
choices=[("None", -1)],
|
296 |
+
label="Load from Gallery",
|
297 |
+
interactive=True,
|
298 |
+
filterable=False,
|
299 |
+
value=-1,
|
300 |
+
)
|
301 |
+
denoising_strength = gr.Slider(
|
302 |
+
value=Config.DENOISING_STRENGTH,
|
303 |
+
label="Denoising Strength",
|
304 |
+
minimum=0.0,
|
305 |
+
maximum=1.0,
|
306 |
+
step=0.1,
|
307 |
+
)
|
308 |
+
|
309 |
with gr.TabItem("ℹ️ Usage"):
|
310 |
gr.Markdown(read_file("usage.md"), elem_classes=["markdown"])
|
311 |
|
|
|
363 |
seed.change(None, inputs=[seed], outputs=[], js=seed_js)
|
364 |
|
365 |
file_format.change(
|
366 |
+
lambda f: (gr.Gallery(format=f), gr.Image(format=f)),
|
367 |
inputs=[file_format],
|
368 |
+
outputs=[output_images, image_prompt],
|
369 |
show_api=False,
|
370 |
)
|
371 |
|
|
|
377 |
js=aspect_ratio_js,
|
378 |
)
|
379 |
|
380 |
+
# lock the input image so you don't lose it when the gallery updates
|
381 |
+
output_images.change(
|
382 |
+
gallery_fn,
|
383 |
+
inputs=[output_images, image_prompt],
|
384 |
+
outputs=[image_select],
|
385 |
+
show_api=False,
|
386 |
+
)
|
387 |
+
|
388 |
+
# show the selected image in the image input
|
389 |
+
image_select.change(
|
390 |
+
image_select_fn,
|
391 |
+
inputs=[output_images, image_prompt, image_select],
|
392 |
+
outputs=[image_prompt],
|
393 |
+
show_api=False,
|
394 |
+
)
|
395 |
+
|
396 |
+
# reset the dropdown on clear
|
397 |
+
image_prompt.clear(
|
398 |
+
image_prompt_fn,
|
399 |
+
inputs=[output_images],
|
400 |
+
outputs=[image_select],
|
401 |
+
show_api=False,
|
402 |
+
)
|
403 |
+
|
404 |
# show "Custom" aspect ratio when manually changing width or height
|
405 |
gr.on(
|
406 |
triggers=[width.input, height.input],
|
|
|
419 |
inputs=[
|
420 |
prompt,
|
421 |
negative_prompt,
|
422 |
+
image_prompt,
|
423 |
embeddings,
|
424 |
style,
|
425 |
seed,
|
|
|
429 |
height,
|
430 |
guidance_scale,
|
431 |
inference_steps,
|
432 |
+
denoising_strength,
|
433 |
num_images,
|
434 |
use_karras,
|
435 |
use_taesd,
|
cli.py
CHANGED
@@ -30,6 +30,8 @@ def main():
|
|
30 |
parser.add_argument("--guidance", type=float, metavar="FLOAT", default=Config.GUIDANCE_SCALE)
|
31 |
parser.add_argument("--steps", type=int, metavar="INT", default=Config.INFERENCE_STEPS)
|
32 |
parser.add_argument("--tome", type=float, metavar="FLOAT", default=Config.TOME_RATIO)
|
|
|
|
|
33 |
parser.add_argument("--taesd", action="store_true")
|
34 |
parser.add_argument("--clip-skip", action="store_true")
|
35 |
parser.add_argument("--truncate", action="store_true")
|
@@ -42,6 +44,7 @@ def main():
|
|
42 |
images = generate(
|
43 |
args.prompt,
|
44 |
args.negative,
|
|
|
45 |
args.embedding,
|
46 |
args.style,
|
47 |
args.seed,
|
@@ -51,6 +54,7 @@ def main():
|
|
51 |
args.height,
|
52 |
args.guidance,
|
53 |
args.steps,
|
|
|
54 |
args.images,
|
55 |
args.karras,
|
56 |
args.taesd,
|
|
|
30 |
parser.add_argument("--guidance", type=float, metavar="FLOAT", default=Config.GUIDANCE_SCALE)
|
31 |
parser.add_argument("--steps", type=int, metavar="INT", default=Config.INFERENCE_STEPS)
|
32 |
parser.add_argument("--tome", type=float, metavar="FLOAT", default=Config.TOME_RATIO)
|
33 |
+
parser.add_argument("--strength", type=float, metavar="FLOAT", default=Config.DENOISING_STRENGTH)
|
34 |
+
parser.add_argument("--image", type=str, metavar="STR")
|
35 |
parser.add_argument("--taesd", action="store_true")
|
36 |
parser.add_argument("--clip-skip", action="store_true")
|
37 |
parser.add_argument("--truncate", action="store_true")
|
|
|
44 |
images = generate(
|
45 |
args.prompt,
|
46 |
args.negative,
|
47 |
+
args.image,
|
48 |
args.embedding,
|
49 |
args.style,
|
50 |
args.seed,
|
|
|
54 |
args.height,
|
55 |
args.guidance,
|
56 |
args.steps,
|
57 |
+
args.strength,
|
58 |
args.images,
|
59 |
args.karras,
|
60 |
args.taesd,
|
lib/config.py
CHANGED
@@ -41,6 +41,7 @@ Config = SimpleNamespace(
|
|
41 |
SEED=-1,
|
42 |
GUIDANCE_SCALE=7,
|
43 |
INFERENCE_STEPS=30,
|
|
|
44 |
DEEPCACHE_INTERVAL=2,
|
45 |
TOME_RATIO=0.0,
|
46 |
SCALE=1,
|
|
|
41 |
SEED=-1,
|
42 |
GUIDANCE_SCALE=7,
|
43 |
INFERENCE_STEPS=30,
|
44 |
+
DENOISING_STRENGTH=0.6,
|
45 |
DEEPCACHE_INTERVAL=2,
|
46 |
TOME_RATIO=0.0,
|
47 |
SCALE=1,
|
lib/inference.py
CHANGED
@@ -16,15 +16,9 @@ from huggingface_hub.utils import HFValidationError, RepositoryNotFoundError
|
|
16 |
|
17 |
from .loader import Loader
|
18 |
|
19 |
-
__import__("warnings").filterwarnings("ignore", category=FutureWarning, module="diffusers")
|
20 |
__import__("warnings").filterwarnings("ignore", category=FutureWarning, module="transformers")
|
21 |
__import__("transformers").logging.set_verbosity_error()
|
22 |
|
23 |
-
ZERO_GPU = (
|
24 |
-
os.environ.get("SPACES_ZERO_GPU", "").lower() == "true"
|
25 |
-
or os.environ.get("SPACES_ZERO_GPU", "") == "1"
|
26 |
-
)
|
27 |
-
|
28 |
with open("./data/styles.json") as f:
|
29 |
styles = json.load(f)
|
30 |
|
@@ -76,6 +70,7 @@ def apply_style(prompt, style_id, negative=False):
|
|
76 |
def generate(
|
77 |
positive_prompt,
|
78 |
negative_prompt="",
|
|
|
79 |
embeddings=[],
|
80 |
style=None,
|
81 |
seed=None,
|
@@ -85,6 +80,7 @@ def generate(
|
|
85 |
height=512,
|
86 |
guidance_scale=7.5,
|
87 |
inference_steps=50,
|
|
|
88 |
num_images=1,
|
89 |
karras=False,
|
90 |
taesd=False,
|
@@ -92,7 +88,7 @@ def generate(
|
|
92 |
clip_skip=False,
|
93 |
truncate_prompts=False,
|
94 |
increment_seed=True,
|
95 |
-
|
96 |
tome_ratio=0,
|
97 |
scale=1,
|
98 |
Info: Callable[[str], None] = None,
|
@@ -119,19 +115,22 @@ def generate(
|
|
119 |
else ReturnedEmbeddingsType.LAST_HIDDEN_STATES_NORMALIZED
|
120 |
)
|
121 |
|
|
|
|
|
122 |
with torch.inference_mode():
|
123 |
start = time.perf_counter()
|
124 |
loader = Loader()
|
125 |
pipe, upscaler = loader.load(
|
|
|
126 |
model,
|
127 |
scheduler,
|
128 |
karras,
|
129 |
taesd,
|
130 |
freeu,
|
131 |
-
|
132 |
scale,
|
133 |
-
DTYPE,
|
134 |
DEVICE,
|
|
|
135 |
)
|
136 |
|
137 |
# load embeddings and append to negative prompt
|
@@ -151,13 +150,13 @@ def generate(
|
|
151 |
|
152 |
# prompt embeds
|
153 |
compel = Compel(
|
154 |
-
|
|
|
|
|
|
|
155 |
dtype_for_device_getter=lambda _: DTYPE,
|
156 |
returned_embeddings_type=EMBEDDINGS_TYPE,
|
157 |
-
|
158 |
-
text_encoder=pipe.text_encoder,
|
159 |
-
tokenizer=pipe.tokenizer,
|
160 |
-
device=pipe.device,
|
161 |
)
|
162 |
|
163 |
images = []
|
@@ -185,34 +184,34 @@ def generate(
|
|
185 |
except PromptParser.ParsingException:
|
186 |
raise Error("ParsingException: Invalid prompt")
|
187 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
with token_merging(pipe, tome_ratio=tome_ratio):
|
189 |
try:
|
190 |
-
image = pipe(
|
191 |
-
output_type="np" if scale > 1 else "pil",
|
192 |
-
num_inference_steps=inference_steps,
|
193 |
-
negative_prompt_embeds=neg_embeds,
|
194 |
-
guidance_scale=guidance_scale,
|
195 |
-
prompt_embeds=pos_embeds,
|
196 |
-
generator=generator,
|
197 |
-
height=height,
|
198 |
-
width=width,
|
199 |
-
).images[0]
|
200 |
if scale > 1:
|
201 |
image = upscaler.predict(image)
|
202 |
images.append((image, str(current_seed)))
|
203 |
finally:
|
204 |
-
|
205 |
-
|
206 |
-
torch.cuda.empty_cache()
|
207 |
|
208 |
if increment_seed:
|
209 |
current_seed += 1
|
210 |
|
211 |
-
if ZERO_GPU:
|
212 |
-
# spaces always start fresh
|
213 |
-
loader.pipe = None
|
214 |
-
loader.upscaler = None
|
215 |
-
|
216 |
diff = time.perf_counter() - start
|
217 |
if Info:
|
218 |
Info(f"Generated {len(images)} image{'s' if len(images) > 1 else ''} in {diff:.2f}s")
|
|
|
16 |
|
17 |
from .loader import Loader
|
18 |
|
|
|
19 |
__import__("warnings").filterwarnings("ignore", category=FutureWarning, module="transformers")
|
20 |
__import__("transformers").logging.set_verbosity_error()
|
21 |
|
|
|
|
|
|
|
|
|
|
|
22 |
with open("./data/styles.json") as f:
|
23 |
styles = json.load(f)
|
24 |
|
|
|
70 |
def generate(
|
71 |
positive_prompt,
|
72 |
negative_prompt="",
|
73 |
+
image_prompt=None,
|
74 |
embeddings=[],
|
75 |
style=None,
|
76 |
seed=None,
|
|
|
80 |
height=512,
|
81 |
guidance_scale=7.5,
|
82 |
inference_steps=50,
|
83 |
+
denoising_strength=0.8,
|
84 |
num_images=1,
|
85 |
karras=False,
|
86 |
taesd=False,
|
|
|
88 |
clip_skip=False,
|
89 |
truncate_prompts=False,
|
90 |
increment_seed=True,
|
91 |
+
deepcache=1,
|
92 |
tome_ratio=0,
|
93 |
scale=1,
|
94 |
Info: Callable[[str], None] = None,
|
|
|
115 |
else ReturnedEmbeddingsType.LAST_HIDDEN_STATES_NORMALIZED
|
116 |
)
|
117 |
|
118 |
+
KIND = "img2img" if image_prompt is not None else "txt2img"
|
119 |
+
|
120 |
with torch.inference_mode():
|
121 |
start = time.perf_counter()
|
122 |
loader = Loader()
|
123 |
pipe, upscaler = loader.load(
|
124 |
+
KIND,
|
125 |
model,
|
126 |
scheduler,
|
127 |
karras,
|
128 |
taesd,
|
129 |
freeu,
|
130 |
+
deepcache,
|
131 |
scale,
|
|
|
132 |
DEVICE,
|
133 |
+
DTYPE,
|
134 |
)
|
135 |
|
136 |
# load embeddings and append to negative prompt
|
|
|
150 |
|
151 |
# prompt embeds
|
152 |
compel = Compel(
|
153 |
+
device=pipe.device,
|
154 |
+
tokenizer=pipe.tokenizer,
|
155 |
+
text_encoder=pipe.text_encoder,
|
156 |
+
truncate_long_prompts=truncate_prompts,
|
157 |
dtype_for_device_getter=lambda _: DTYPE,
|
158 |
returned_embeddings_type=EMBEDDINGS_TYPE,
|
159 |
+
textual_inversion_manager=DiffusersTextualInversionManager(pipe),
|
|
|
|
|
|
|
160 |
)
|
161 |
|
162 |
images = []
|
|
|
184 |
except PromptParser.ParsingException:
|
185 |
raise Error("ParsingException: Invalid prompt")
|
186 |
|
187 |
+
kwargs = {
|
188 |
+
"width": width,
|
189 |
+
"height": height,
|
190 |
+
"generator": generator,
|
191 |
+
"prompt_embeds": pos_embeds,
|
192 |
+
"guidance_scale": guidance_scale,
|
193 |
+
"negative_prompt_embeds": neg_embeds,
|
194 |
+
"num_inference_steps": inference_steps,
|
195 |
+
"output_type": "np" if scale > 1 else "pil",
|
196 |
+
}
|
197 |
+
|
198 |
+
if KIND == "img2img":
|
199 |
+
kwargs["image"] = image_prompt
|
200 |
+
kwargs["strength"] = denoising_strength
|
201 |
+
|
202 |
with token_merging(pipe, tome_ratio=tome_ratio):
|
203 |
try:
|
204 |
+
image = pipe(**kwargs).images[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
if scale > 1:
|
206 |
image = upscaler.predict(image)
|
207 |
images.append((image, str(current_seed)))
|
208 |
finally:
|
209 |
+
pipe.unload_textual_inversion()
|
210 |
+
torch.cuda.empty_cache()
|
|
|
211 |
|
212 |
if increment_seed:
|
213 |
current_seed += 1
|
214 |
|
|
|
|
|
|
|
|
|
|
|
215 |
diff = time.perf_counter() - start
|
216 |
if Info:
|
217 |
Info(f"Generated {len(images)} image{'s' if len(images) > 1 else ''} in {diff:.2f}s")
|
lib/loader.py
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
import os
|
2 |
-
|
3 |
import torch
|
4 |
from DeepCache import DeepCacheSDHelper
|
5 |
from diffusers import (
|
@@ -10,6 +8,7 @@ from diffusers import (
|
|
10 |
KDPM2AncestralDiscreteScheduler,
|
11 |
LMSDiscreteScheduler,
|
12 |
PNDMScheduler,
|
|
|
13 |
StableDiffusionPipeline,
|
14 |
)
|
15 |
from diffusers.models import AutoencoderKL, AutoencoderTiny
|
@@ -17,10 +16,7 @@ from torch._dynamo import OptimizedModule
|
|
17 |
|
18 |
from .upscaler import RealESRGAN
|
19 |
|
20 |
-
|
21 |
-
os.environ.get("SPACES_ZERO_GPU", "").lower() == "true"
|
22 |
-
or os.environ.get("SPACES_ZERO_GPU", "") == "1"
|
23 |
-
)
|
24 |
|
25 |
|
26 |
# inspired by ComfyUI
|
@@ -45,14 +41,12 @@ class Loader:
|
|
45 |
|
46 |
def _load_deepcache(self, interval=1):
|
47 |
has_deepcache = hasattr(self.pipe, "deepcache")
|
48 |
-
|
49 |
if has_deepcache and self.pipe.deepcache.params["cache_interval"] == interval:
|
50 |
return
|
51 |
if has_deepcache:
|
52 |
self.pipe.deepcache.disable()
|
53 |
else:
|
54 |
self.pipe.deepcache = DeepCacheSDHelper(pipe=self.pipe)
|
55 |
-
|
56 |
self.pipe.deepcache.set_params(cache_interval=interval)
|
57 |
self.pipe.deepcache.enable()
|
58 |
|
@@ -78,35 +72,44 @@ class Loader:
|
|
78 |
print("Switching to Tiny VAE...")
|
79 |
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
80 |
pretrained_model_name_or_path="madebyollin/taesd",
|
81 |
-
|
82 |
-
).to(device=self.pipe.device)
|
83 |
return
|
84 |
|
85 |
if is_tiny and not taesd:
|
86 |
print("Switching to KL VAE...")
|
87 |
model = AutoencoderKL.from_pretrained(
|
88 |
pretrained_model_name_or_path=model_name,
|
89 |
-
use_safetensors=True,
|
90 |
subfolder="vae",
|
91 |
variant=variant,
|
92 |
-
).to(
|
93 |
self.pipe.vae = torch.compile(
|
94 |
mode="reduce-overhead",
|
95 |
fullgraph=True,
|
96 |
model=model,
|
97 |
)
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
def load(
|
100 |
self,
|
|
|
101 |
model,
|
102 |
scheduler,
|
103 |
karras,
|
104 |
taesd,
|
105 |
freeu,
|
106 |
-
|
107 |
scale,
|
108 |
-
dtype,
|
109 |
device,
|
|
|
110 |
):
|
111 |
model_lower = model.lower()
|
112 |
|
@@ -133,7 +136,7 @@ class Loader:
|
|
133 |
del scheduler_kwargs["use_karras_sigmas"]
|
134 |
|
135 |
# no fp16 variant
|
136 |
-
if
|
137 |
"sg161222/realistic_vision_v5.1_novae",
|
138 |
"prompthero/openjourney-v4",
|
139 |
"linaqruf/anything-v3-1",
|
@@ -144,48 +147,37 @@ class Loader:
|
|
144 |
|
145 |
pipe_kwargs = {
|
146 |
"scheduler": schedulers[scheduler](**scheduler_kwargs),
|
147 |
-
"pretrained_model_name_or_path": model_lower,
|
148 |
"requires_safety_checker": False,
|
149 |
-
"use_safetensors": True,
|
150 |
"safety_checker": None,
|
151 |
"variant": variant,
|
152 |
}
|
153 |
|
154 |
-
|
155 |
-
|
156 |
-
model_name = self.pipe.config._name_or_path
|
157 |
-
same_model = model_name.lower() == model_lower
|
158 |
-
same_scheduler = isinstance(self.pipe.scheduler, schedulers[scheduler])
|
159 |
-
same_karras = (
|
160 |
-
not hasattr(self.pipe.scheduler.config, "use_karras_sigmas")
|
161 |
-
or self.pipe.scheduler.config.use_karras_sigmas == karras
|
162 |
-
)
|
163 |
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
self._load_vae(model_lower, taesd, variant)
|
172 |
-
self._load_freeu(freeu)
|
173 |
-
self._load_deepcache(deepcache_interval)
|
174 |
-
self._load_upscaler(device, scale)
|
175 |
-
torch.cuda.empty_cache()
|
176 |
-
return self.pipe, self.upscaler
|
177 |
-
else:
|
178 |
-
print(f"Unloading {model_name.lower()}...")
|
179 |
-
self.pipe = None
|
180 |
-
|
181 |
-
print(f"Loading {model_lower} with {'Tiny' if taesd else 'KL'} VAE...")
|
182 |
-
self.pipe = StableDiffusionPipeline.from_pretrained(**pipe_kwargs).to(
|
183 |
-
device=device,
|
184 |
-
dtype=dtype,
|
185 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
self._load_vae(model_lower, taesd, variant)
|
187 |
self._load_freeu(freeu)
|
188 |
-
self._load_deepcache(
|
189 |
self._load_upscaler(device, scale)
|
190 |
torch.cuda.empty_cache()
|
191 |
return self.pipe, self.upscaler
|
|
|
|
|
|
|
1 |
import torch
|
2 |
from DeepCache import DeepCacheSDHelper
|
3 |
from diffusers import (
|
|
|
8 |
KDPM2AncestralDiscreteScheduler,
|
9 |
LMSDiscreteScheduler,
|
10 |
PNDMScheduler,
|
11 |
+
StableDiffusionImg2ImgPipeline,
|
12 |
StableDiffusionPipeline,
|
13 |
)
|
14 |
from diffusers.models import AutoencoderKL, AutoencoderTiny
|
|
|
16 |
|
17 |
from .upscaler import RealESRGAN
|
18 |
|
19 |
+
__import__("warnings").filterwarnings("ignore", category=FutureWarning, module="diffusers")
|
|
|
|
|
|
|
20 |
|
21 |
|
22 |
# inspired by ComfyUI
|
|
|
41 |
|
42 |
def _load_deepcache(self, interval=1):
|
43 |
has_deepcache = hasattr(self.pipe, "deepcache")
|
|
|
44 |
if has_deepcache and self.pipe.deepcache.params["cache_interval"] == interval:
|
45 |
return
|
46 |
if has_deepcache:
|
47 |
self.pipe.deepcache.disable()
|
48 |
else:
|
49 |
self.pipe.deepcache = DeepCacheSDHelper(pipe=self.pipe)
|
|
|
50 |
self.pipe.deepcache.set_params(cache_interval=interval)
|
51 |
self.pipe.deepcache.enable()
|
52 |
|
|
|
72 |
print("Switching to Tiny VAE...")
|
73 |
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
74 |
pretrained_model_name_or_path="madebyollin/taesd",
|
75 |
+
).to(self.pipe.device)
|
|
|
76 |
return
|
77 |
|
78 |
if is_tiny and not taesd:
|
79 |
print("Switching to KL VAE...")
|
80 |
model = AutoencoderKL.from_pretrained(
|
81 |
pretrained_model_name_or_path=model_name,
|
|
|
82 |
subfolder="vae",
|
83 |
variant=variant,
|
84 |
+
).to(self.pipe.device)
|
85 |
self.pipe.vae = torch.compile(
|
86 |
mode="reduce-overhead",
|
87 |
fullgraph=True,
|
88 |
model=model,
|
89 |
)
|
90 |
|
91 |
+
def _load_pipeline(self, kind, model, device, dtype, **kwargs):
|
92 |
+
pipelines = {
|
93 |
+
"txt2img": StableDiffusionPipeline,
|
94 |
+
"img2img": StableDiffusionImg2ImgPipeline,
|
95 |
+
}
|
96 |
+
if self.pipe is None:
|
97 |
+
self.pipe = pipelines[kind].from_pretrained(model, **kwargs).to(device, dtype)
|
98 |
+
if not isinstance(self.pipe, pipelines[kind]):
|
99 |
+
self.pipe = pipelines[kind].from_pipe(self.pipe).to(device, dtype)
|
100 |
+
|
101 |
def load(
|
102 |
self,
|
103 |
+
kind,
|
104 |
model,
|
105 |
scheduler,
|
106 |
karras,
|
107 |
taesd,
|
108 |
freeu,
|
109 |
+
deepcache,
|
110 |
scale,
|
|
|
111 |
device,
|
112 |
+
dtype,
|
113 |
):
|
114 |
model_lower = model.lower()
|
115 |
|
|
|
136 |
del scheduler_kwargs["use_karras_sigmas"]
|
137 |
|
138 |
# no fp16 variant
|
139 |
+
if model_lower not in [
|
140 |
"sg161222/realistic_vision_v5.1_novae",
|
141 |
"prompthero/openjourney-v4",
|
142 |
"linaqruf/anything-v3-1",
|
|
|
147 |
|
148 |
pipe_kwargs = {
|
149 |
"scheduler": schedulers[scheduler](**scheduler_kwargs),
|
|
|
150 |
"requires_safety_checker": False,
|
|
|
151 |
"safety_checker": None,
|
152 |
"variant": variant,
|
153 |
}
|
154 |
|
155 |
+
if self.pipe is None:
|
156 |
+
print(f"Loading {model_lower} with {'Tiny' if taesd else 'KL'} VAE...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
+
self._load_pipeline(kind, model_lower, device, dtype, **pipe_kwargs)
|
159 |
+
model_name = self.pipe.config._name_or_path
|
160 |
+
same_model = model_name.lower() == model_lower
|
161 |
+
same_scheduler = isinstance(self.pipe.scheduler, schedulers[scheduler])
|
162 |
+
same_karras = (
|
163 |
+
not hasattr(self.pipe.scheduler.config, "use_karras_sigmas")
|
164 |
+
or self.pipe.scheduler.config.use_karras_sigmas == karras
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
)
|
166 |
+
|
167 |
+
if same_model:
|
168 |
+
if not same_scheduler:
|
169 |
+
print(f"Switching to {scheduler}...")
|
170 |
+
if not same_karras:
|
171 |
+
print(f"{'Enabling' if karras else 'Disabling'} Karras sigmas...")
|
172 |
+
if not same_scheduler or not same_karras:
|
173 |
+
self.pipe.scheduler = schedulers[scheduler](**scheduler_kwargs)
|
174 |
+
else:
|
175 |
+
self.pipe = None
|
176 |
+
self._load_pipeline(kind, model_lower, device, dtype, **pipe_kwargs)
|
177 |
+
|
178 |
self._load_vae(model_lower, taesd, variant)
|
179 |
self._load_freeu(freeu)
|
180 |
+
self._load_deepcache(deepcache)
|
181 |
self._load_upscaler(device, scale)
|
182 |
torch.cuda.empty_cache()
|
183 |
return self.pipe, self.upscaler
|
usage.md
CHANGED
@@ -55,6 +55,12 @@ Optionally, the [Karras](https://arxiv.org/abs/2206.00364) noise schedule can be
|
|
55 |
* [LMS](https://huggingface.co/docs/diffusers/api/schedulers/lms_discrete)
|
56 |
* [PNDM](https://huggingface.co/docs/diffusers/api/schedulers/pndm)
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
### Advanced
|
59 |
|
60 |
#### DeepCache
|
@@ -67,7 +73,7 @@ Optionally, the [Karras](https://arxiv.org/abs/2206.00364) noise schedule can be
|
|
67 |
|
68 |
#### ToMe
|
69 |
|
70 |
-
[Token merging](https://github.com/dbolya/tomesd) (Bolya & Hoffman 2023) reduces the number of tokens processed by the model. Set `Ratio` to the desired reduction factor.
|
71 |
|
72 |
#### FreeU
|
73 |
|
@@ -75,7 +81,7 @@ Optionally, the [Karras](https://arxiv.org/abs/2206.00364) noise schedule can be
|
|
75 |
|
76 |
#### Clip Skip
|
77 |
|
78 |
-
When enabled, the last CLIP layer is skipped. This
|
79 |
|
80 |
#### Tiny VAE
|
81 |
|
|
|
55 |
* [LMS](https://huggingface.co/docs/diffusers/api/schedulers/lms_discrete)
|
56 |
* [PNDM](https://huggingface.co/docs/diffusers/api/schedulers/pndm)
|
57 |
|
58 |
+
### Image-to-Image
|
59 |
+
|
60 |
+
The `🖼️ Image` tab enables the image-to-image pipeline. Either use the image input or select a generation from the gallery and then adjust the denoising strength. To disable, simply clear the image input (the `x` overlay button).
|
61 |
+
|
62 |
+
Denoising strength is essentially how much the generation will differ from the input image. A value of `0` will be identical to the original, while `1` will be a completely new image. You may want to also increase the number of inference steps.
|
63 |
+
|
64 |
### Advanced
|
65 |
|
66 |
#### DeepCache
|
|
|
73 |
|
74 |
#### ToMe
|
75 |
|
76 |
+
[Token merging](https://github.com/dbolya/tomesd) (Bolya & Hoffman 2023) reduces the number of tokens processed by the model. Set `Ratio` to the desired reduction factor. Only necessary to speed up generation on older GPUs.
|
77 |
|
78 |
#### FreeU
|
79 |
|
|
|
81 |
|
82 |
#### Clip Skip
|
83 |
|
84 |
+
When enabled, the last CLIP layer is skipped. This can sometimes improve image quality with anime models.
|
85 |
|
86 |
#### Tiny VAE
|
87 |
|