adamelliotfields commited on
Commit
60849d7
1 Parent(s): 459aacb

Image-to-image

Browse files
Files changed (7) hide show
  1. README.md +2 -1
  2. app.py +83 -2
  3. cli.py +4 -0
  4. lib/config.py +1 -0
  5. lib/inference.py +31 -32
  6. lib/loader.py +40 -48
  7. usage.md +8 -2
README.md CHANGED
@@ -48,6 +48,7 @@ preload_from_hub:
48
  # diffusion
49
 
50
  Gradio app for Stable Diffusion 1.5 including:
 
51
  * curated models and TI embeddings
52
  * multiple samplers with Karras schedule
53
  * Compel prompting
@@ -84,4 +85,4 @@ python cli.py 'an astronaut riding a horse on mars'
84
  ## TODO
85
 
86
  - [ ] Metadata embed and display
87
- - [ ] Image-to-image
 
48
  # diffusion
49
 
50
  Gradio app for Stable Diffusion 1.5 including:
51
+ * txt2img and img2img pipelines
52
  * curated models and TI embeddings
53
  * multiple samplers with Karras schedule
54
  * Compel prompting
 
85
  ## TODO
86
 
87
  - [ ] Metadata embed and display
88
+ - [ ] IP-Adapter and T2I-Adapter
app.py CHANGED
@@ -44,6 +44,34 @@ def random_fn():
44
  return gr.Textbox(value=random.choice(prompts))
45
 
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def generate_fn(*args):
48
  if len(args) > 0:
49
  prompt = args[0]
@@ -251,6 +279,33 @@ with gr.Blocks(
251
  value=False,
252
  )
253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  with gr.TabItem("ℹ️ Usage"):
255
  gr.Markdown(read_file("usage.md"), elem_classes=["markdown"])
256
 
@@ -308,9 +363,9 @@ with gr.Blocks(
308
  seed.change(None, inputs=[seed], outputs=[], js=seed_js)
309
 
310
  file_format.change(
311
- lambda f: gr.Gallery(format=f),
312
  inputs=[file_format],
313
- outputs=[output_images],
314
  show_api=False,
315
  )
316
 
@@ -322,6 +377,30 @@ with gr.Blocks(
322
  js=aspect_ratio_js,
323
  )
324
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  # show "Custom" aspect ratio when manually changing width or height
326
  gr.on(
327
  triggers=[width.input, height.input],
@@ -340,6 +419,7 @@ with gr.Blocks(
340
  inputs=[
341
  prompt,
342
  negative_prompt,
 
343
  embeddings,
344
  style,
345
  seed,
@@ -349,6 +429,7 @@ with gr.Blocks(
349
  height,
350
  guidance_scale,
351
  inference_steps,
 
352
  num_images,
353
  use_karras,
354
  use_taesd,
 
44
  return gr.Textbox(value=random.choice(prompts))
45
 
46
 
47
+ # can't toggle interactive in JS
48
+ def gallery_fn(images, image):
49
+ if image is not None:
50
+ return gr.Dropdown(
51
+ choices=[("🔒", -1)],
52
+ interactive=False,
53
+ value=-1,
54
+ )
55
+
56
+ return gr.Dropdown(
57
+ choices=[("None", -1)]
58
+ + [(str(i + 1), i) for i, _ in enumerate(images if images is not None else [])],
59
+ interactive=True,
60
+ value=-1,
61
+ )
62
+
63
+
64
+ def image_prompt_fn(images):
65
+ return gallery_fn(images, None)
66
+
67
+
68
+ # can't use image input in JS
69
+ def image_select_fn(images, image, i):
70
+ if image is not None and i == -1:
71
+ return gr.Image(value=image)
72
+ return gr.Image(value=images[i][0]) if i > -1 else None
73
+
74
+
75
  def generate_fn(*args):
76
  if len(args) > 0:
77
  prompt = args[0]
 
279
  value=False,
280
  )
281
 
282
+ # img2img tab
283
+ with gr.TabItem("🖼️ Image"):
284
+ with gr.Row():
285
+ image_prompt = gr.Image(
286
+ show_label=False,
287
+ min_width=320,
288
+ format="png",
289
+ type="pil",
290
+ scale=0,
291
+ )
292
+
293
+ with gr.Row():
294
+ image_select = gr.Dropdown(
295
+ choices=[("None", -1)],
296
+ label="Load from Gallery",
297
+ interactive=True,
298
+ filterable=False,
299
+ value=-1,
300
+ )
301
+ denoising_strength = gr.Slider(
302
+ value=Config.DENOISING_STRENGTH,
303
+ label="Denoising Strength",
304
+ minimum=0.0,
305
+ maximum=1.0,
306
+ step=0.1,
307
+ )
308
+
309
  with gr.TabItem("ℹ️ Usage"):
310
  gr.Markdown(read_file("usage.md"), elem_classes=["markdown"])
311
 
 
363
  seed.change(None, inputs=[seed], outputs=[], js=seed_js)
364
 
365
  file_format.change(
366
+ lambda f: (gr.Gallery(format=f), gr.Image(format=f)),
367
  inputs=[file_format],
368
+ outputs=[output_images, image_prompt],
369
  show_api=False,
370
  )
371
 
 
377
  js=aspect_ratio_js,
378
  )
379
 
380
+ # lock the input image so you don't lose it when the gallery updates
381
+ output_images.change(
382
+ gallery_fn,
383
+ inputs=[output_images, image_prompt],
384
+ outputs=[image_select],
385
+ show_api=False,
386
+ )
387
+
388
+ # show the selected image in the image input
389
+ image_select.change(
390
+ image_select_fn,
391
+ inputs=[output_images, image_prompt, image_select],
392
+ outputs=[image_prompt],
393
+ show_api=False,
394
+ )
395
+
396
+ # reset the dropdown on clear
397
+ image_prompt.clear(
398
+ image_prompt_fn,
399
+ inputs=[output_images],
400
+ outputs=[image_select],
401
+ show_api=False,
402
+ )
403
+
404
  # show "Custom" aspect ratio when manually changing width or height
405
  gr.on(
406
  triggers=[width.input, height.input],
 
419
  inputs=[
420
  prompt,
421
  negative_prompt,
422
+ image_prompt,
423
  embeddings,
424
  style,
425
  seed,
 
429
  height,
430
  guidance_scale,
431
  inference_steps,
432
+ denoising_strength,
433
  num_images,
434
  use_karras,
435
  use_taesd,
cli.py CHANGED
@@ -30,6 +30,8 @@ def main():
30
  parser.add_argument("--guidance", type=float, metavar="FLOAT", default=Config.GUIDANCE_SCALE)
31
  parser.add_argument("--steps", type=int, metavar="INT", default=Config.INFERENCE_STEPS)
32
  parser.add_argument("--tome", type=float, metavar="FLOAT", default=Config.TOME_RATIO)
 
 
33
  parser.add_argument("--taesd", action="store_true")
34
  parser.add_argument("--clip-skip", action="store_true")
35
  parser.add_argument("--truncate", action="store_true")
@@ -42,6 +44,7 @@ def main():
42
  images = generate(
43
  args.prompt,
44
  args.negative,
 
45
  args.embedding,
46
  args.style,
47
  args.seed,
@@ -51,6 +54,7 @@ def main():
51
  args.height,
52
  args.guidance,
53
  args.steps,
 
54
  args.images,
55
  args.karras,
56
  args.taesd,
 
30
  parser.add_argument("--guidance", type=float, metavar="FLOAT", default=Config.GUIDANCE_SCALE)
31
  parser.add_argument("--steps", type=int, metavar="INT", default=Config.INFERENCE_STEPS)
32
  parser.add_argument("--tome", type=float, metavar="FLOAT", default=Config.TOME_RATIO)
33
+ parser.add_argument("--strength", type=float, metavar="FLOAT", default=Config.DENOISING_STRENGTH)
34
+ parser.add_argument("--image", type=str, metavar="STR")
35
  parser.add_argument("--taesd", action="store_true")
36
  parser.add_argument("--clip-skip", action="store_true")
37
  parser.add_argument("--truncate", action="store_true")
 
44
  images = generate(
45
  args.prompt,
46
  args.negative,
47
+ args.image,
48
  args.embedding,
49
  args.style,
50
  args.seed,
 
54
  args.height,
55
  args.guidance,
56
  args.steps,
57
+ args.strength,
58
  args.images,
59
  args.karras,
60
  args.taesd,
lib/config.py CHANGED
@@ -41,6 +41,7 @@ Config = SimpleNamespace(
41
  SEED=-1,
42
  GUIDANCE_SCALE=7,
43
  INFERENCE_STEPS=30,
 
44
  DEEPCACHE_INTERVAL=2,
45
  TOME_RATIO=0.0,
46
  SCALE=1,
 
41
  SEED=-1,
42
  GUIDANCE_SCALE=7,
43
  INFERENCE_STEPS=30,
44
+ DENOISING_STRENGTH=0.6,
45
  DEEPCACHE_INTERVAL=2,
46
  TOME_RATIO=0.0,
47
  SCALE=1,
lib/inference.py CHANGED
@@ -16,15 +16,9 @@ from huggingface_hub.utils import HFValidationError, RepositoryNotFoundError
16
 
17
  from .loader import Loader
18
 
19
- __import__("warnings").filterwarnings("ignore", category=FutureWarning, module="diffusers")
20
  __import__("warnings").filterwarnings("ignore", category=FutureWarning, module="transformers")
21
  __import__("transformers").logging.set_verbosity_error()
22
 
23
- ZERO_GPU = (
24
- os.environ.get("SPACES_ZERO_GPU", "").lower() == "true"
25
- or os.environ.get("SPACES_ZERO_GPU", "") == "1"
26
- )
27
-
28
  with open("./data/styles.json") as f:
29
  styles = json.load(f)
30
 
@@ -76,6 +70,7 @@ def apply_style(prompt, style_id, negative=False):
76
  def generate(
77
  positive_prompt,
78
  negative_prompt="",
 
79
  embeddings=[],
80
  style=None,
81
  seed=None,
@@ -85,6 +80,7 @@ def generate(
85
  height=512,
86
  guidance_scale=7.5,
87
  inference_steps=50,
 
88
  num_images=1,
89
  karras=False,
90
  taesd=False,
@@ -92,7 +88,7 @@ def generate(
92
  clip_skip=False,
93
  truncate_prompts=False,
94
  increment_seed=True,
95
- deepcache_interval=1,
96
  tome_ratio=0,
97
  scale=1,
98
  Info: Callable[[str], None] = None,
@@ -119,19 +115,22 @@ def generate(
119
  else ReturnedEmbeddingsType.LAST_HIDDEN_STATES_NORMALIZED
120
  )
121
 
 
 
122
  with torch.inference_mode():
123
  start = time.perf_counter()
124
  loader = Loader()
125
  pipe, upscaler = loader.load(
 
126
  model,
127
  scheduler,
128
  karras,
129
  taesd,
130
  freeu,
131
- deepcache_interval,
132
  scale,
133
- DTYPE,
134
  DEVICE,
 
135
  )
136
 
137
  # load embeddings and append to negative prompt
@@ -151,13 +150,13 @@ def generate(
151
 
152
  # prompt embeds
153
  compel = Compel(
154
- textual_inversion_manager=DiffusersTextualInversionManager(pipe),
 
 
 
155
  dtype_for_device_getter=lambda _: DTYPE,
156
  returned_embeddings_type=EMBEDDINGS_TYPE,
157
- truncate_long_prompts=truncate_prompts,
158
- text_encoder=pipe.text_encoder,
159
- tokenizer=pipe.tokenizer,
160
- device=pipe.device,
161
  )
162
 
163
  images = []
@@ -185,34 +184,34 @@ def generate(
185
  except PromptParser.ParsingException:
186
  raise Error("ParsingException: Invalid prompt")
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  with token_merging(pipe, tome_ratio=tome_ratio):
189
  try:
190
- image = pipe(
191
- output_type="np" if scale > 1 else "pil",
192
- num_inference_steps=inference_steps,
193
- negative_prompt_embeds=neg_embeds,
194
- guidance_scale=guidance_scale,
195
- prompt_embeds=pos_embeds,
196
- generator=generator,
197
- height=height,
198
- width=width,
199
- ).images[0]
200
  if scale > 1:
201
  image = upscaler.predict(image)
202
  images.append((image, str(current_seed)))
203
  finally:
204
- if not ZERO_GPU:
205
- pipe.unload_textual_inversion()
206
- torch.cuda.empty_cache()
207
 
208
  if increment_seed:
209
  current_seed += 1
210
 
211
- if ZERO_GPU:
212
- # spaces always start fresh
213
- loader.pipe = None
214
- loader.upscaler = None
215
-
216
  diff = time.perf_counter() - start
217
  if Info:
218
  Info(f"Generated {len(images)} image{'s' if len(images) > 1 else ''} in {diff:.2f}s")
 
16
 
17
  from .loader import Loader
18
 
 
19
  __import__("warnings").filterwarnings("ignore", category=FutureWarning, module="transformers")
20
  __import__("transformers").logging.set_verbosity_error()
21
 
 
 
 
 
 
22
  with open("./data/styles.json") as f:
23
  styles = json.load(f)
24
 
 
70
  def generate(
71
  positive_prompt,
72
  negative_prompt="",
73
+ image_prompt=None,
74
  embeddings=[],
75
  style=None,
76
  seed=None,
 
80
  height=512,
81
  guidance_scale=7.5,
82
  inference_steps=50,
83
+ denoising_strength=0.8,
84
  num_images=1,
85
  karras=False,
86
  taesd=False,
 
88
  clip_skip=False,
89
  truncate_prompts=False,
90
  increment_seed=True,
91
+ deepcache=1,
92
  tome_ratio=0,
93
  scale=1,
94
  Info: Callable[[str], None] = None,
 
115
  else ReturnedEmbeddingsType.LAST_HIDDEN_STATES_NORMALIZED
116
  )
117
 
118
+ KIND = "img2img" if image_prompt is not None else "txt2img"
119
+
120
  with torch.inference_mode():
121
  start = time.perf_counter()
122
  loader = Loader()
123
  pipe, upscaler = loader.load(
124
+ KIND,
125
  model,
126
  scheduler,
127
  karras,
128
  taesd,
129
  freeu,
130
+ deepcache,
131
  scale,
 
132
  DEVICE,
133
+ DTYPE,
134
  )
135
 
136
  # load embeddings and append to negative prompt
 
150
 
151
  # prompt embeds
152
  compel = Compel(
153
+ device=pipe.device,
154
+ tokenizer=pipe.tokenizer,
155
+ text_encoder=pipe.text_encoder,
156
+ truncate_long_prompts=truncate_prompts,
157
  dtype_for_device_getter=lambda _: DTYPE,
158
  returned_embeddings_type=EMBEDDINGS_TYPE,
159
+ textual_inversion_manager=DiffusersTextualInversionManager(pipe),
 
 
 
160
  )
161
 
162
  images = []
 
184
  except PromptParser.ParsingException:
185
  raise Error("ParsingException: Invalid prompt")
186
 
187
+ kwargs = {
188
+ "width": width,
189
+ "height": height,
190
+ "generator": generator,
191
+ "prompt_embeds": pos_embeds,
192
+ "guidance_scale": guidance_scale,
193
+ "negative_prompt_embeds": neg_embeds,
194
+ "num_inference_steps": inference_steps,
195
+ "output_type": "np" if scale > 1 else "pil",
196
+ }
197
+
198
+ if KIND == "img2img":
199
+ kwargs["image"] = image_prompt
200
+ kwargs["strength"] = denoising_strength
201
+
202
  with token_merging(pipe, tome_ratio=tome_ratio):
203
  try:
204
+ image = pipe(**kwargs).images[0]
 
 
 
 
 
 
 
 
 
205
  if scale > 1:
206
  image = upscaler.predict(image)
207
  images.append((image, str(current_seed)))
208
  finally:
209
+ pipe.unload_textual_inversion()
210
+ torch.cuda.empty_cache()
 
211
 
212
  if increment_seed:
213
  current_seed += 1
214
 
 
 
 
 
 
215
  diff = time.perf_counter() - start
216
  if Info:
217
  Info(f"Generated {len(images)} image{'s' if len(images) > 1 else ''} in {diff:.2f}s")
lib/loader.py CHANGED
@@ -1,5 +1,3 @@
1
- import os
2
-
3
  import torch
4
  from DeepCache import DeepCacheSDHelper
5
  from diffusers import (
@@ -10,6 +8,7 @@ from diffusers import (
10
  KDPM2AncestralDiscreteScheduler,
11
  LMSDiscreteScheduler,
12
  PNDMScheduler,
 
13
  StableDiffusionPipeline,
14
  )
15
  from diffusers.models import AutoencoderKL, AutoencoderTiny
@@ -17,10 +16,7 @@ from torch._dynamo import OptimizedModule
17
 
18
  from .upscaler import RealESRGAN
19
 
20
- ZERO_GPU = (
21
- os.environ.get("SPACES_ZERO_GPU", "").lower() == "true"
22
- or os.environ.get("SPACES_ZERO_GPU", "") == "1"
23
- )
24
 
25
 
26
  # inspired by ComfyUI
@@ -45,14 +41,12 @@ class Loader:
45
 
46
  def _load_deepcache(self, interval=1):
47
  has_deepcache = hasattr(self.pipe, "deepcache")
48
-
49
  if has_deepcache and self.pipe.deepcache.params["cache_interval"] == interval:
50
  return
51
  if has_deepcache:
52
  self.pipe.deepcache.disable()
53
  else:
54
  self.pipe.deepcache = DeepCacheSDHelper(pipe=self.pipe)
55
-
56
  self.pipe.deepcache.set_params(cache_interval=interval)
57
  self.pipe.deepcache.enable()
58
 
@@ -78,35 +72,44 @@ class Loader:
78
  print("Switching to Tiny VAE...")
79
  self.pipe.vae = AutoencoderTiny.from_pretrained(
80
  pretrained_model_name_or_path="madebyollin/taesd",
81
- use_safetensors=True,
82
- ).to(device=self.pipe.device)
83
  return
84
 
85
  if is_tiny and not taesd:
86
  print("Switching to KL VAE...")
87
  model = AutoencoderKL.from_pretrained(
88
  pretrained_model_name_or_path=model_name,
89
- use_safetensors=True,
90
  subfolder="vae",
91
  variant=variant,
92
- ).to(device=self.pipe.device)
93
  self.pipe.vae = torch.compile(
94
  mode="reduce-overhead",
95
  fullgraph=True,
96
  model=model,
97
  )
98
 
 
 
 
 
 
 
 
 
 
 
99
  def load(
100
  self,
 
101
  model,
102
  scheduler,
103
  karras,
104
  taesd,
105
  freeu,
106
- deepcache_interval,
107
  scale,
108
- dtype,
109
  device,
 
110
  ):
111
  model_lower = model.lower()
112
 
@@ -133,7 +136,7 @@ class Loader:
133
  del scheduler_kwargs["use_karras_sigmas"]
134
 
135
  # no fp16 variant
136
- if not ZERO_GPU and model_lower not in [
137
  "sg161222/realistic_vision_v5.1_novae",
138
  "prompthero/openjourney-v4",
139
  "linaqruf/anything-v3-1",
@@ -144,48 +147,37 @@ class Loader:
144
 
145
  pipe_kwargs = {
146
  "scheduler": schedulers[scheduler](**scheduler_kwargs),
147
- "pretrained_model_name_or_path": model_lower,
148
  "requires_safety_checker": False,
149
- "use_safetensors": True,
150
  "safety_checker": None,
151
  "variant": variant,
152
  }
153
 
154
- # already loaded
155
- if self.pipe is not None:
156
- model_name = self.pipe.config._name_or_path
157
- same_model = model_name.lower() == model_lower
158
- same_scheduler = isinstance(self.pipe.scheduler, schedulers[scheduler])
159
- same_karras = (
160
- not hasattr(self.pipe.scheduler.config, "use_karras_sigmas")
161
- or self.pipe.scheduler.config.use_karras_sigmas == karras
162
- )
163
 
164
- if same_model:
165
- if not same_scheduler:
166
- print(f"Switching to {scheduler}...")
167
- if not same_karras:
168
- print(f"{'Enabling' if karras else 'Disabling'} Karras sigmas...")
169
- if not same_scheduler or not same_karras:
170
- self.pipe.scheduler = schedulers[scheduler](**scheduler_kwargs)
171
- self._load_vae(model_lower, taesd, variant)
172
- self._load_freeu(freeu)
173
- self._load_deepcache(deepcache_interval)
174
- self._load_upscaler(device, scale)
175
- torch.cuda.empty_cache()
176
- return self.pipe, self.upscaler
177
- else:
178
- print(f"Unloading {model_name.lower()}...")
179
- self.pipe = None
180
-
181
- print(f"Loading {model_lower} with {'Tiny' if taesd else 'KL'} VAE...")
182
- self.pipe = StableDiffusionPipeline.from_pretrained(**pipe_kwargs).to(
183
- device=device,
184
- dtype=dtype,
185
  )
 
 
 
 
 
 
 
 
 
 
 
 
186
  self._load_vae(model_lower, taesd, variant)
187
  self._load_freeu(freeu)
188
- self._load_deepcache(deepcache_interval)
189
  self._load_upscaler(device, scale)
190
  torch.cuda.empty_cache()
191
  return self.pipe, self.upscaler
 
 
 
1
  import torch
2
  from DeepCache import DeepCacheSDHelper
3
  from diffusers import (
 
8
  KDPM2AncestralDiscreteScheduler,
9
  LMSDiscreteScheduler,
10
  PNDMScheduler,
11
+ StableDiffusionImg2ImgPipeline,
12
  StableDiffusionPipeline,
13
  )
14
  from diffusers.models import AutoencoderKL, AutoencoderTiny
 
16
 
17
  from .upscaler import RealESRGAN
18
 
19
+ __import__("warnings").filterwarnings("ignore", category=FutureWarning, module="diffusers")
 
 
 
20
 
21
 
22
  # inspired by ComfyUI
 
41
 
42
  def _load_deepcache(self, interval=1):
43
  has_deepcache = hasattr(self.pipe, "deepcache")
 
44
  if has_deepcache and self.pipe.deepcache.params["cache_interval"] == interval:
45
  return
46
  if has_deepcache:
47
  self.pipe.deepcache.disable()
48
  else:
49
  self.pipe.deepcache = DeepCacheSDHelper(pipe=self.pipe)
 
50
  self.pipe.deepcache.set_params(cache_interval=interval)
51
  self.pipe.deepcache.enable()
52
 
 
72
  print("Switching to Tiny VAE...")
73
  self.pipe.vae = AutoencoderTiny.from_pretrained(
74
  pretrained_model_name_or_path="madebyollin/taesd",
75
+ ).to(self.pipe.device)
 
76
  return
77
 
78
  if is_tiny and not taesd:
79
  print("Switching to KL VAE...")
80
  model = AutoencoderKL.from_pretrained(
81
  pretrained_model_name_or_path=model_name,
 
82
  subfolder="vae",
83
  variant=variant,
84
+ ).to(self.pipe.device)
85
  self.pipe.vae = torch.compile(
86
  mode="reduce-overhead",
87
  fullgraph=True,
88
  model=model,
89
  )
90
 
91
+ def _load_pipeline(self, kind, model, device, dtype, **kwargs):
92
+ pipelines = {
93
+ "txt2img": StableDiffusionPipeline,
94
+ "img2img": StableDiffusionImg2ImgPipeline,
95
+ }
96
+ if self.pipe is None:
97
+ self.pipe = pipelines[kind].from_pretrained(model, **kwargs).to(device, dtype)
98
+ if not isinstance(self.pipe, pipelines[kind]):
99
+ self.pipe = pipelines[kind].from_pipe(self.pipe).to(device, dtype)
100
+
101
  def load(
102
  self,
103
+ kind,
104
  model,
105
  scheduler,
106
  karras,
107
  taesd,
108
  freeu,
109
+ deepcache,
110
  scale,
 
111
  device,
112
+ dtype,
113
  ):
114
  model_lower = model.lower()
115
 
 
136
  del scheduler_kwargs["use_karras_sigmas"]
137
 
138
  # no fp16 variant
139
+ if model_lower not in [
140
  "sg161222/realistic_vision_v5.1_novae",
141
  "prompthero/openjourney-v4",
142
  "linaqruf/anything-v3-1",
 
147
 
148
  pipe_kwargs = {
149
  "scheduler": schedulers[scheduler](**scheduler_kwargs),
 
150
  "requires_safety_checker": False,
 
151
  "safety_checker": None,
152
  "variant": variant,
153
  }
154
 
155
+ if self.pipe is None:
156
+ print(f"Loading {model_lower} with {'Tiny' if taesd else 'KL'} VAE...")
 
 
 
 
 
 
 
157
 
158
+ self._load_pipeline(kind, model_lower, device, dtype, **pipe_kwargs)
159
+ model_name = self.pipe.config._name_or_path
160
+ same_model = model_name.lower() == model_lower
161
+ same_scheduler = isinstance(self.pipe.scheduler, schedulers[scheduler])
162
+ same_karras = (
163
+ not hasattr(self.pipe.scheduler.config, "use_karras_sigmas")
164
+ or self.pipe.scheduler.config.use_karras_sigmas == karras
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  )
166
+
167
+ if same_model:
168
+ if not same_scheduler:
169
+ print(f"Switching to {scheduler}...")
170
+ if not same_karras:
171
+ print(f"{'Enabling' if karras else 'Disabling'} Karras sigmas...")
172
+ if not same_scheduler or not same_karras:
173
+ self.pipe.scheduler = schedulers[scheduler](**scheduler_kwargs)
174
+ else:
175
+ self.pipe = None
176
+ self._load_pipeline(kind, model_lower, device, dtype, **pipe_kwargs)
177
+
178
  self._load_vae(model_lower, taesd, variant)
179
  self._load_freeu(freeu)
180
+ self._load_deepcache(deepcache)
181
  self._load_upscaler(device, scale)
182
  torch.cuda.empty_cache()
183
  return self.pipe, self.upscaler
usage.md CHANGED
@@ -55,6 +55,12 @@ Optionally, the [Karras](https://arxiv.org/abs/2206.00364) noise schedule can be
55
  * [LMS](https://huggingface.co/docs/diffusers/api/schedulers/lms_discrete)
56
  * [PNDM](https://huggingface.co/docs/diffusers/api/schedulers/pndm)
57
 
 
 
 
 
 
 
58
  ### Advanced
59
 
60
  #### DeepCache
@@ -67,7 +73,7 @@ Optionally, the [Karras](https://arxiv.org/abs/2206.00364) noise schedule can be
67
 
68
  #### ToMe
69
 
70
- [Token merging](https://github.com/dbolya/tomesd) (Bolya & Hoffman 2023) reduces the number of tokens processed by the model. Set `Ratio` to the desired reduction factor. ToMe's impact is more noticeable on larger images.
71
 
72
  #### FreeU
73
 
@@ -75,7 +81,7 @@ Optionally, the [Karras](https://arxiv.org/abs/2206.00364) noise schedule can be
75
 
76
  #### Clip Skip
77
 
78
- When enabled, the last CLIP layer is skipped. This _can_ improve image quality with anime models.
79
 
80
  #### Tiny VAE
81
 
 
55
  * [LMS](https://huggingface.co/docs/diffusers/api/schedulers/lms_discrete)
56
  * [PNDM](https://huggingface.co/docs/diffusers/api/schedulers/pndm)
57
 
58
+ ### Image-to-Image
59
+
60
+ The `🖼️ Image` tab enables the image-to-image pipeline. Either use the image input or select a generation from the gallery and then adjust the denoising strength. To disable, simply clear the image input (the `x` overlay button).
61
+
62
+ Denoising strength is essentially how much the generation will differ from the input image. A value of `0` will be identical to the original, while `1` will be a completely new image. You may want to also increase the number of inference steps.
63
+
64
  ### Advanced
65
 
66
  #### DeepCache
 
73
 
74
  #### ToMe
75
 
76
+ [Token merging](https://github.com/dbolya/tomesd) (Bolya & Hoffman 2023) reduces the number of tokens processed by the model. Set `Ratio` to the desired reduction factor. Only necessary to speed up generation on older GPUs.
77
 
78
  #### FreeU
79
 
 
81
 
82
  #### Clip Skip
83
 
84
+ When enabled, the last CLIP layer is skipped. This can sometimes improve image quality with anime models.
85
 
86
  #### Tiny VAE
87