Spaces:

adamelliotfields
/

diffusion

Running on Zero

App Files Files Community

adamelliotfields commited on Aug 16, 2024

Commit

60849d7

verified ·

1 Parent(s): 459aacb

Image-to-image

Browse files

Files changed (7) hide show

README.md +2 -1
app.py +83 -2
cli.py +4 -0
lib/config.py +1 -0
lib/inference.py +31 -32
lib/loader.py +40 -48
usage.md +8 -2

README.md CHANGED Viewed

@@ -48,6 +48,7 @@ preload_from_hub:
 # diffusion
 Gradio app for Stable Diffusion 1.5 including:
 * curated models and TI embeddings
 * multiple samplers with Karras schedule
 * Compel prompting
@@ -84,4 +85,4 @@ python cli.py 'an astronaut riding a horse on mars'
 ## TODO
 - [ ] Metadata embed and display
-- [ ] Image-to-image

 # diffusion
 Gradio app for Stable Diffusion 1.5 including:
+* txt2img and img2img pipelines
 * curated models and TI embeddings
 * multiple samplers with Karras schedule
 * Compel prompting
 ## TODO
 - [ ] Metadata embed and display
+- [ ] IP-Adapter and T2I-Adapter

app.py CHANGED Viewed

@@ -44,6 +44,34 @@ def random_fn():
     return gr.Textbox(value=random.choice(prompts))
 def generate_fn(*args):
     if len(args) > 0:
         prompt = args[0]
@@ -251,6 +279,33 @@ with gr.Blocks(
                             value=False,
                         )
             with gr.TabItem("ℹ️ Usage"):
                 gr.Markdown(read_file("usage.md"), elem_classes=["markdown"])
@@ -308,9 +363,9 @@ with gr.Blocks(
     seed.change(None, inputs=[seed], outputs=[], js=seed_js)
     file_format.change(
-        lambda f: gr.Gallery(format=f),
         inputs=[file_format],
-        outputs=[output_images],
         show_api=False,
     )
@@ -322,6 +377,30 @@ with gr.Blocks(
         js=aspect_ratio_js,
     )
     # show "Custom" aspect ratio when manually changing width or height
     gr.on(
         triggers=[width.input, height.input],
@@ -340,6 +419,7 @@ with gr.Blocks(
         inputs=[
             prompt,
             negative_prompt,
             embeddings,
             style,
             seed,
@@ -349,6 +429,7 @@ with gr.Blocks(
             height,
             guidance_scale,
             inference_steps,
             num_images,
             use_karras,
             use_taesd,

     return gr.Textbox(value=random.choice(prompts))
+# can't toggle interactive in JS
+def gallery_fn(images, image):
+    if image is not None:
+        return gr.Dropdown(
+            choices=[("🔒", -1)],
+            interactive=False,
+            value=-1,
+        )
+    return gr.Dropdown(
+        choices=[("None", -1)]
+        + [(str(i + 1), i) for i, _ in enumerate(images if images is not None else [])],
+        interactive=True,
+        value=-1,
+    )
+def image_prompt_fn(images):
+    return gallery_fn(images, None)
+# can't use image input in JS
+def image_select_fn(images, image, i):
+    if image is not None and i == -1:
+        return gr.Image(value=image)
+    return gr.Image(value=images[i][0]) if i > -1 else None
 def generate_fn(*args):
     if len(args) > 0:
         prompt = args[0]
                             value=False,
                         )
+            # img2img tab
+            with gr.TabItem("🖼️ Image"):
+                with gr.Row():
+                    image_prompt = gr.Image(
+                        show_label=False,
+                        min_width=320,
+                        format="png",
+                        type="pil",
+                        scale=0,
+                    )
+                with gr.Row():
+                    image_select = gr.Dropdown(
+                        choices=[("None", -1)],
+                        label="Load from Gallery",
+                        interactive=True,
+                        filterable=False,
+                        value=-1,
+                    )
+                    denoising_strength = gr.Slider(
+                        value=Config.DENOISING_STRENGTH,
+                        label="Denoising Strength",
+                        minimum=0.0,
+                        maximum=1.0,
+                        step=0.1,
+                    )
             with gr.TabItem("ℹ️ Usage"):
                 gr.Markdown(read_file("usage.md"), elem_classes=["markdown"])
     seed.change(None, inputs=[seed], outputs=[], js=seed_js)
     file_format.change(
+        lambda f: (gr.Gallery(format=f), gr.Image(format=f)),
         inputs=[file_format],
+        outputs=[output_images, image_prompt],
         show_api=False,
     )
         js=aspect_ratio_js,
     )
+    # lock the input image so you don't lose it when the gallery updates
+    output_images.change(
+        gallery_fn,
+        inputs=[output_images, image_prompt],
+        outputs=[image_select],
+        show_api=False,
+    )
+    # show the selected image in the image input
+    image_select.change(
+        image_select_fn,
+        inputs=[output_images, image_prompt, image_select],
+        outputs=[image_prompt],
+        show_api=False,
+    )
+    # reset the dropdown on clear
+    image_prompt.clear(
+        image_prompt_fn,
+        inputs=[output_images],
+        outputs=[image_select],
+        show_api=False,
+    )
     # show "Custom" aspect ratio when manually changing width or height
     gr.on(
         triggers=[width.input, height.input],
         inputs=[
             prompt,
             negative_prompt,
+            image_prompt,
             embeddings,
             style,
             seed,
             height,
             guidance_scale,
             inference_steps,
+            denoising_strength,
             num_images,
             use_karras,
             use_taesd,

cli.py CHANGED Viewed

@@ -30,6 +30,8 @@ def main():
     parser.add_argument("--guidance", type=float, metavar="FLOAT", default=Config.GUIDANCE_SCALE)
     parser.add_argument("--steps", type=int, metavar="INT", default=Config.INFERENCE_STEPS)
     parser.add_argument("--tome", type=float, metavar="FLOAT", default=Config.TOME_RATIO)
     parser.add_argument("--taesd", action="store_true")
     parser.add_argument("--clip-skip", action="store_true")
     parser.add_argument("--truncate", action="store_true")
@@ -42,6 +44,7 @@ def main():
     images = generate(
         args.prompt,
         args.negative,
         args.embedding,
         args.style,
         args.seed,
@@ -51,6 +54,7 @@ def main():
         args.height,
         args.guidance,
         args.steps,
         args.images,
         args.karras,
         args.taesd,

     parser.add_argument("--guidance", type=float, metavar="FLOAT", default=Config.GUIDANCE_SCALE)
     parser.add_argument("--steps", type=int, metavar="INT", default=Config.INFERENCE_STEPS)
     parser.add_argument("--tome", type=float, metavar="FLOAT", default=Config.TOME_RATIO)
+    parser.add_argument("--strength", type=float, metavar="FLOAT", default=Config.DENOISING_STRENGTH)
+    parser.add_argument("--image", type=str, metavar="STR")
     parser.add_argument("--taesd", action="store_true")
     parser.add_argument("--clip-skip", action="store_true")
     parser.add_argument("--truncate", action="store_true")
     images = generate(
         args.prompt,
         args.negative,
+        args.image,
         args.embedding,
         args.style,
         args.seed,
         args.height,
         args.guidance,
         args.steps,
+        args.strength,
         args.images,
         args.karras,
         args.taesd,

lib/config.py CHANGED Viewed

@@ -41,6 +41,7 @@ Config = SimpleNamespace(
     SEED=-1,
     GUIDANCE_SCALE=7,
     INFERENCE_STEPS=30,
     DEEPCACHE_INTERVAL=2,
     TOME_RATIO=0.0,
     SCALE=1,

     SEED=-1,
     GUIDANCE_SCALE=7,
     INFERENCE_STEPS=30,
+    DENOISING_STRENGTH=0.6,
     DEEPCACHE_INTERVAL=2,
     TOME_RATIO=0.0,
     SCALE=1,

lib/inference.py CHANGED Viewed

@@ -16,15 +16,9 @@ from huggingface_hub.utils import HFValidationError, RepositoryNotFoundError
 from .loader import Loader
-__import__("warnings").filterwarnings("ignore", category=FutureWarning, module="diffusers")
 __import__("warnings").filterwarnings("ignore", category=FutureWarning, module="transformers")
 __import__("transformers").logging.set_verbosity_error()
-ZERO_GPU = (
-    os.environ.get("SPACES_ZERO_GPU", "").lower() == "true"
-    or os.environ.get("SPACES_ZERO_GPU", "") == "1"
-)
 with open("./data/styles.json") as f:
     styles = json.load(f)
@@ -76,6 +70,7 @@ def apply_style(prompt, style_id, negative=False):
 def generate(
     positive_prompt,
     negative_prompt="",
     embeddings=[],
     style=None,
     seed=None,
@@ -85,6 +80,7 @@ def generate(
     height=512,
     guidance_scale=7.5,
     inference_steps=50,
     num_images=1,
     karras=False,
     taesd=False,
@@ -92,7 +88,7 @@ def generate(
     clip_skip=False,
     truncate_prompts=False,
     increment_seed=True,
-    deepcache_interval=1,
     tome_ratio=0,
     scale=1,
     Info: Callable[[str], None] = None,
@@ -119,19 +115,22 @@ def generate(
         else ReturnedEmbeddingsType.LAST_HIDDEN_STATES_NORMALIZED
     )
     with torch.inference_mode():
         start = time.perf_counter()
         loader = Loader()
         pipe, upscaler = loader.load(
             model,
             scheduler,
             karras,
             taesd,
             freeu,
-            deepcache_interval,
             scale,
-            DTYPE,
             DEVICE,
         )
         # load embeddings and append to negative prompt
@@ -151,13 +150,13 @@ def generate(
         # prompt embeds
         compel = Compel(
-            textual_inversion_manager=DiffusersTextualInversionManager(pipe),
             dtype_for_device_getter=lambda _: DTYPE,
             returned_embeddings_type=EMBEDDINGS_TYPE,
-            truncate_long_prompts=truncate_prompts,
-            text_encoder=pipe.text_encoder,
-            tokenizer=pipe.tokenizer,
-            device=pipe.device,
         )
         images = []
@@ -185,34 +184,34 @@ def generate(
             except PromptParser.ParsingException:
                 raise Error("ParsingException: Invalid prompt")
             with token_merging(pipe, tome_ratio=tome_ratio):
                 try:
-                    image = pipe(
-                        output_type="np" if scale > 1 else "pil",
-                        num_inference_steps=inference_steps,
-                        negative_prompt_embeds=neg_embeds,
-                        guidance_scale=guidance_scale,
-                        prompt_embeds=pos_embeds,
-                        generator=generator,
-                        height=height,
-                        width=width,
-                    ).images[0]
                     if scale > 1:
                         image = upscaler.predict(image)
                     images.append((image, str(current_seed)))
                 finally:
-                    if not ZERO_GPU:
-                        pipe.unload_textual_inversion()
-                        torch.cuda.empty_cache()
             if increment_seed:
                 current_seed += 1
-        if ZERO_GPU:
-            # spaces always start fresh
-            loader.pipe = None
-            loader.upscaler = None
         diff = time.perf_counter() - start
         if Info:
             Info(f"Generated {len(images)} image{'s' if len(images) > 1 else ''} in {diff:.2f}s")

 from .loader import Loader
 __import__("warnings").filterwarnings("ignore", category=FutureWarning, module="transformers")
 __import__("transformers").logging.set_verbosity_error()
 with open("./data/styles.json") as f:
     styles = json.load(f)
 def generate(
     positive_prompt,
     negative_prompt="",
+    image_prompt=None,
     embeddings=[],
     style=None,
     seed=None,
     height=512,
     guidance_scale=7.5,
     inference_steps=50,
+    denoising_strength=0.8,
     num_images=1,
     karras=False,
     taesd=False,
     clip_skip=False,
     truncate_prompts=False,
     increment_seed=True,
+    deepcache=1,
     tome_ratio=0,
     scale=1,
     Info: Callable[[str], None] = None,
         else ReturnedEmbeddingsType.LAST_HIDDEN_STATES_NORMALIZED
     )
+    KIND = "img2img" if image_prompt is not None else "txt2img"
     with torch.inference_mode():
         start = time.perf_counter()
         loader = Loader()
         pipe, upscaler = loader.load(
+            KIND,
             model,
             scheduler,
             karras,
             taesd,
             freeu,
+            deepcache,
             scale,
             DEVICE,
+            DTYPE,
         )
         # load embeddings and append to negative prompt
         # prompt embeds
         compel = Compel(
+            device=pipe.device,
+            tokenizer=pipe.tokenizer,
+            text_encoder=pipe.text_encoder,
+            truncate_long_prompts=truncate_prompts,
             dtype_for_device_getter=lambda _: DTYPE,
             returned_embeddings_type=EMBEDDINGS_TYPE,
+            textual_inversion_manager=DiffusersTextualInversionManager(pipe),
         )
         images = []
             except PromptParser.ParsingException:
                 raise Error("ParsingException: Invalid prompt")
+            kwargs = {
+                "width": width,
+                "height": height,
+                "generator": generator,
+                "prompt_embeds": pos_embeds,
+                "guidance_scale": guidance_scale,
+                "negative_prompt_embeds": neg_embeds,
+                "num_inference_steps": inference_steps,
+                "output_type": "np" if scale > 1 else "pil",
+            }
+            if KIND == "img2img":
+                kwargs["image"] = image_prompt
+                kwargs["strength"] = denoising_strength
             with token_merging(pipe, tome_ratio=tome_ratio):
                 try:
+                    image = pipe(**kwargs).images[0]
                     if scale > 1:
                         image = upscaler.predict(image)
                     images.append((image, str(current_seed)))
                 finally:
+                    pipe.unload_textual_inversion()
+                    torch.cuda.empty_cache()
             if increment_seed:
                 current_seed += 1
         diff = time.perf_counter() - start
         if Info:
             Info(f"Generated {len(images)} image{'s' if len(images) > 1 else ''} in {diff:.2f}s")

lib/loader.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import os
 import torch
 from DeepCache import DeepCacheSDHelper
 from diffusers import (
@@ -10,6 +8,7 @@ from diffusers import (
     KDPM2AncestralDiscreteScheduler,
     LMSDiscreteScheduler,
     PNDMScheduler,
     StableDiffusionPipeline,
 )
 from diffusers.models import AutoencoderKL, AutoencoderTiny
@@ -17,10 +16,7 @@ from torch._dynamo import OptimizedModule
 from .upscaler import RealESRGAN
-ZERO_GPU = (
-    os.environ.get("SPACES_ZERO_GPU", "").lower() == "true"
-    or os.environ.get("SPACES_ZERO_GPU", "") == "1"
-)
 # inspired by ComfyUI
@@ -45,14 +41,12 @@ class Loader:
     def _load_deepcache(self, interval=1):
         has_deepcache = hasattr(self.pipe, "deepcache")
         if has_deepcache and self.pipe.deepcache.params["cache_interval"] == interval:
             return
         if has_deepcache:
             self.pipe.deepcache.disable()
         else:
             self.pipe.deepcache = DeepCacheSDHelper(pipe=self.pipe)
         self.pipe.deepcache.set_params(cache_interval=interval)
         self.pipe.deepcache.enable()
@@ -78,35 +72,44 @@ class Loader:
             print("Switching to Tiny VAE...")
             self.pipe.vae = AutoencoderTiny.from_pretrained(
                 pretrained_model_name_or_path="madebyollin/taesd",
-                use_safetensors=True,
-            ).to(device=self.pipe.device)
             return
         if is_tiny and not taesd:
             print("Switching to KL VAE...")
             model = AutoencoderKL.from_pretrained(
                 pretrained_model_name_or_path=model_name,
-                use_safetensors=True,
                 subfolder="vae",
                 variant=variant,
-            ).to(device=self.pipe.device)
             self.pipe.vae = torch.compile(
                 mode="reduce-overhead",
                 fullgraph=True,
                 model=model,
             )
     def load(
         self,
         model,
         scheduler,
         karras,
         taesd,
         freeu,
-        deepcache_interval,
         scale,
-        dtype,
         device,
     ):
         model_lower = model.lower()
@@ -133,7 +136,7 @@ class Loader:
             del scheduler_kwargs["use_karras_sigmas"]
         # no fp16 variant
-        if not ZERO_GPU and model_lower not in [
             "sg161222/realistic_vision_v5.1_novae",
             "prompthero/openjourney-v4",
             "linaqruf/anything-v3-1",
@@ -144,48 +147,37 @@ class Loader:
         pipe_kwargs = {
             "scheduler": schedulers[scheduler](**scheduler_kwargs),
-            "pretrained_model_name_or_path": model_lower,
             "requires_safety_checker": False,
-            "use_safetensors": True,
             "safety_checker": None,
             "variant": variant,
         }
-        # already loaded
-        if self.pipe is not None:
-            model_name = self.pipe.config._name_or_path
-            same_model = model_name.lower() == model_lower
-            same_scheduler = isinstance(self.pipe.scheduler, schedulers[scheduler])
-            same_karras = (
-                not hasattr(self.pipe.scheduler.config, "use_karras_sigmas")
-                or self.pipe.scheduler.config.use_karras_sigmas == karras
-            )
-            if same_model:
-                if not same_scheduler:
-                    print(f"Switching to {scheduler}...")
-                if not same_karras:
-                    print(f"{'Enabling' if karras else 'Disabling'} Karras sigmas...")
-                if not same_scheduler or not same_karras:
-                    self.pipe.scheduler = schedulers[scheduler](**scheduler_kwargs)
-                self._load_vae(model_lower, taesd, variant)
-                self._load_freeu(freeu)
-                self._load_deepcache(deepcache_interval)
-                self._load_upscaler(device, scale)
-                torch.cuda.empty_cache()
-                return self.pipe, self.upscaler
-            else:
-                print(f"Unloading {model_name.lower()}...")
-                self.pipe = None
-        print(f"Loading {model_lower} with {'Tiny' if taesd else 'KL'} VAE...")
-        self.pipe = StableDiffusionPipeline.from_pretrained(**pipe_kwargs).to(
-            device=device,
-            dtype=dtype,
         )
         self._load_vae(model_lower, taesd, variant)
         self._load_freeu(freeu)
-        self._load_deepcache(deepcache_interval)
         self._load_upscaler(device, scale)
         torch.cuda.empty_cache()
         return self.pipe, self.upscaler

 import torch
 from DeepCache import DeepCacheSDHelper
 from diffusers import (
     KDPM2AncestralDiscreteScheduler,
     LMSDiscreteScheduler,
     PNDMScheduler,
+    StableDiffusionImg2ImgPipeline,
     StableDiffusionPipeline,
 )
 from diffusers.models import AutoencoderKL, AutoencoderTiny
 from .upscaler import RealESRGAN
+__import__("warnings").filterwarnings("ignore", category=FutureWarning, module="diffusers")
 # inspired by ComfyUI
     def _load_deepcache(self, interval=1):
         has_deepcache = hasattr(self.pipe, "deepcache")
         if has_deepcache and self.pipe.deepcache.params["cache_interval"] == interval:
             return
         if has_deepcache:
             self.pipe.deepcache.disable()
         else:
             self.pipe.deepcache = DeepCacheSDHelper(pipe=self.pipe)
         self.pipe.deepcache.set_params(cache_interval=interval)
         self.pipe.deepcache.enable()
             print("Switching to Tiny VAE...")
             self.pipe.vae = AutoencoderTiny.from_pretrained(
                 pretrained_model_name_or_path="madebyollin/taesd",
+            ).to(self.pipe.device)
             return
         if is_tiny and not taesd:
             print("Switching to KL VAE...")
             model = AutoencoderKL.from_pretrained(
                 pretrained_model_name_or_path=model_name,
                 subfolder="vae",
                 variant=variant,
+            ).to(self.pipe.device)
             self.pipe.vae = torch.compile(
                 mode="reduce-overhead",
                 fullgraph=True,
                 model=model,
             )
+    def _load_pipeline(self, kind, model, device, dtype, **kwargs):
+        pipelines = {
+            "txt2img": StableDiffusionPipeline,
+            "img2img": StableDiffusionImg2ImgPipeline,
+        }
+        if self.pipe is None:
+            self.pipe = pipelines[kind].from_pretrained(model, **kwargs).to(device, dtype)
+        if not isinstance(self.pipe, pipelines[kind]):
+            self.pipe = pipelines[kind].from_pipe(self.pipe).to(device, dtype)
     def load(
         self,
+        kind,
         model,
         scheduler,
         karras,
         taesd,
         freeu,
+        deepcache,
         scale,
         device,
+        dtype,
     ):
         model_lower = model.lower()
             del scheduler_kwargs["use_karras_sigmas"]
         # no fp16 variant
+        if model_lower not in [
             "sg161222/realistic_vision_v5.1_novae",
             "prompthero/openjourney-v4",
             "linaqruf/anything-v3-1",
         pipe_kwargs = {
             "scheduler": schedulers[scheduler](**scheduler_kwargs),
             "requires_safety_checker": False,
             "safety_checker": None,
             "variant": variant,
         }
+        if self.pipe is None:
+            print(f"Loading {model_lower} with {'Tiny' if taesd else 'KL'} VAE...")
+        self._load_pipeline(kind, model_lower, device, dtype, **pipe_kwargs)
+        model_name = self.pipe.config._name_or_path
+        same_model = model_name.lower() == model_lower
+        same_scheduler = isinstance(self.pipe.scheduler, schedulers[scheduler])
+        same_karras = (
+            not hasattr(self.pipe.scheduler.config, "use_karras_sigmas")
+            or self.pipe.scheduler.config.use_karras_sigmas == karras
         )
+        if same_model:
+            if not same_scheduler:
+                print(f"Switching to {scheduler}...")
+            if not same_karras:
+                print(f"{'Enabling' if karras else 'Disabling'} Karras sigmas...")
+            if not same_scheduler or not same_karras:
+                self.pipe.scheduler = schedulers[scheduler](**scheduler_kwargs)
+        else:
+            self.pipe = None
+            self._load_pipeline(kind, model_lower, device, dtype, **pipe_kwargs)
         self._load_vae(model_lower, taesd, variant)
         self._load_freeu(freeu)
+        self._load_deepcache(deepcache)
         self._load_upscaler(device, scale)
         torch.cuda.empty_cache()
         return self.pipe, self.upscaler

usage.md CHANGED Viewed

@@ -55,6 +55,12 @@ Optionally, the [Karras](https://arxiv.org/abs/2206.00364) noise schedule can be
 * [LMS](https://huggingface.co/docs/diffusers/api/schedulers/lms_discrete)
 * [PNDM](https://huggingface.co/docs/diffusers/api/schedulers/pndm)
 ### Advanced
 #### DeepCache
@@ -67,7 +73,7 @@ Optionally, the [Karras](https://arxiv.org/abs/2206.00364) noise schedule can be
 #### ToMe
-[Token merging](https://github.com/dbolya/tomesd) (Bolya & Hoffman 2023) reduces the number of tokens processed by the model. Set `Ratio` to the desired reduction factor. ToMe's impact is more noticeable on larger images.
 #### FreeU
@@ -75,7 +81,7 @@ Optionally, the [Karras](https://arxiv.org/abs/2206.00364) noise schedule can be
 #### Clip Skip
-When enabled, the last CLIP layer is skipped. This _can_ improve image quality with anime models.
 #### Tiny VAE

 * [LMS](https://huggingface.co/docs/diffusers/api/schedulers/lms_discrete)
 * [PNDM](https://huggingface.co/docs/diffusers/api/schedulers/pndm)
+### Image-to-Image
+The `🖼️ Image` tab enables the image-to-image pipeline. Either use the image input or select a generation from the gallery and then adjust the denoising strength. To disable, simply clear the image input (the `x` overlay button).
+Denoising strength is essentially how much the generation will differ from the input image. A value of `0` will be identical to the original, while `1` will be a completely new image. You may want to also increase the number of inference steps.
 ### Advanced
 #### DeepCache
 #### ToMe
+[Token merging](https://github.com/dbolya/tomesd) (Bolya & Hoffman 2023) reduces the number of tokens processed by the model. Set `Ratio` to the desired reduction factor. Only necessary to speed up generation on older GPUs.
 #### FreeU
 #### Clip Skip
+When enabled, the last CLIP layer is skipped. This can sometimes improve image quality with anime models.
 #### Tiny VAE