Spaces:

adamelliotfields
/

diffusion

Running on Zero

App Files Files Community

adamelliotfields commited on Nov 27, 2024

Commit

7a7cda5

verified ·

1 Parent(s): 98afd85

Move ControlNet to Image tab

Browse files

Files changed (8) hide show

README.md +3 -12
app.py +95 -94
lib/__init__.py +2 -2
lib/annotators.py +3 -1
lib/config.py +10 -4
lib/inference.py +14 -33
lib/loader.py +1 -0
lib/utils.py +65 -51

README.md CHANGED Viewed

@@ -60,25 +60,16 @@ preload_from_hub:  # up to 10
 # diffusion
 Gradio app for Stable Diffusion 1.5 featuring:
-* txt2img and img2img pipelines with IP-Adapter
 * Curated models, LoRAs, and TI embeddings
-* ControlNet with annotators
 * Compel prompt weighting
-* dozens of styles and starter prompts
 * Multiple samplers with Karras scheduling
 * DeepCache, FreeU, and Clip Skip available
 * Real-ESRGAN upscaling
 * Optional tiny autoencoder
-## Motivation
-* host a free and easy-to-use Stable Diffusion UI on ZeroGPU
-* provide the necessary tools for common workflows
-* curate useful models, adapters, and embeddings
-* prefer Diffusers over custom PyTorch
-* be fast on 8GB with no offloading
-* only support CUDA on Linux/WSL
 ## Usage
 See [`DOCS.md`](https://huggingface.co/spaces/adamelliotfields/diffusion/blob/main/DOCS.md).

 # diffusion
 Gradio app for Stable Diffusion 1.5 featuring:
+* txt2img and img2img pipelines with ControlNet and IP-Adapter
+* Canny edge detection (more preprocessors coming soon)
 * Curated models, LoRAs, and TI embeddings
 * Compel prompt weighting
+* Hand-written style templates
 * Multiple samplers with Karras scheduling
 * DeepCache, FreeU, and Clip Skip available
 * Real-ESRGAN upscaling
 * Optional tiny autoencoder
 ## Usage
 See [`DOCS.md`](https://huggingface.co/spaces/adamelliotfields/diffusion/blob/main/DOCS.md).

app.py CHANGED Viewed

@@ -6,16 +6,13 @@ import random
 import gradio as gr
 from lib import (
-    CannyAnnotator,
     Config,
     async_call,
     disable_progress_bars,
     download_civit_file,
     download_repo_files,
     generate,
-    get_valid_size,
     read_file,
-    resize_image,
 )
 # the CSS `content` attribute expects a string so we need to wrap the number in quotes
@@ -45,7 +42,7 @@ aspect_ratio_js = """
 """
-def create_image_dropdown(images, locked=False):
     if locked:
         return gr.Dropdown(
             choices=[("🔒", -2)],
@@ -60,19 +57,17 @@ def create_image_dropdown(images, locked=False):
         )
-async def gallery_fn(images, image, ip_image):
     return (
-        create_image_dropdown(images, locked=image is not None),
-        create_image_dropdown(images, locked=ip_image is not None),
     )
-async def image_prompt_fn(images):
-    return create_image_dropdown(images)
-# handle selecting an image from the gallery
-# -2 is the lock icon, -1 is None
 async def image_select_fn(images, image, i):
     if i == -2:
         return gr.Image(image)
@@ -87,15 +82,6 @@ async def random_fn():
     return gr.Textbox(value=random.choice(prompts))
-# TODO: move this to another file once more annotators are added; will need @GPU decorator
-async def annotate_fn(image, annotator):
-    size = get_valid_size(image)
-    image = resize_image(image, size)
-    if annotator == "canny":
-        canny = CannyAnnotator()
-        return canny(image, size)
 async def generate_fn(*args, progress=gr.Progress(track_tqdm=True)):
     if len(args) > 0:
         prompt = args[0]
@@ -105,17 +91,22 @@ async def generate_fn(*args, progress=gr.Progress(track_tqdm=True)):
         raise gr.Error("You must enter a prompt")
     # always the last arguments
-    DISABLE_IMAGE_PROMPT, DISABLE_IP_IMAGE_PROMPT = args[-2:]
-    gen_args = list(args[:-2])
     if DISABLE_IMAGE_PROMPT:
         gen_args[2] = None
-    if DISABLE_IP_IMAGE_PROMPT:
         gen_args[3] = None
     try:
         if Config.ZERO_GPU:
             progress((0, 100), desc="ZeroGPU init")
         images = await async_call(
             generate,
             *gen_args,
@@ -125,6 +116,7 @@ async def generate_fn(*args, progress=gr.Progress(track_tqdm=True)):
         )
     except RuntimeError:
         raise gr.Error("Error: Please try again")
     return images
@@ -155,6 +147,7 @@ with gr.Blocks(
     # override image inputs without clearing them
     DISABLE_IMAGE_PROMPT = gr.State(False)
     DISABLE_IP_IMAGE_PROMPT = gr.State(False)
     gr.HTML(read_file("./partials/intro.html"))
@@ -212,6 +205,14 @@ with gr.Blocks(
                 image_prompt = gr.Image(
                     show_share_button=False,
                     label="Initial Image",
                     min_width=320,
                     format="png",
                     type="pil",
@@ -226,100 +227,84 @@ with gr.Blocks(
             with gr.Row():
                 image_select = gr.Dropdown(
-                    info="Use an initial image from the gallery",
                     choices=[("None", -1)],
-                    label="Gallery Image",
                     interactive=True,
                     filterable=False,
                     value=-1,
                 )
                 ip_image_select = gr.Dropdown(
-                    info="Use an IP-Adapter image from the gallery",
-                    label="Gallery Image",
                     choices=[("None", -1)],
                     interactive=True,
                     filterable=False,
                     value=-1,
                 )
             with gr.Row():
                 denoising_strength = gr.Slider(
                     value=Config.DENOISING_STRENGTH,
-                    label="Denoising Strength",
                     minimum=0.0,
                     maximum=1.0,
                     step=0.1,
                 )
             with gr.Row():
                 disable_image = gr.Checkbox(
-                    elem_classes=["checkbox"],
                     label="Disable Initial Image",
                     value=False,
                 )
-                disable_ip_image = gr.Checkbox(
                     elem_classes=["checkbox"],
                     label="Disable IP-Adapter Image",
                     value=False,
                 )
                 use_ip_face = gr.Checkbox(
-                    elem_classes=["checkbox"],
                     label="Use IP-Adapter Face",
                     value=False,
                 )
-        # controlnet tab
-        with gr.TabItem("🎮 Control"):
-            with gr.Row():
-                control_image_input = gr.Image(
-                    show_share_button=False,
-                    label="Control Image",
-                    min_width=320,
-                    format="png",
-                    type="pil",
-                )
-                control_image_prompt = gr.Image(
-                    interactive=False,
-                    show_share_button=False,
-                    label="Control Image Output",
-                    show_label=False,
-                    min_width=320,
-                    format="png",
-                    type="pil",
-                )
-            with gr.Row():
-                control_annotator = gr.Dropdown(
-                    choices=[("Canny", "canny")],
-                    label="Annotator",
-                    filterable=False,
-                    value="canny",
-                )
-            with gr.Row():
-                annotate_btn = gr.Button("Annotate", variant="primary")
-                clear_control_btn = gr.ClearButton(
-                    elem_classes=["icon-button", "popover"],
-                    components=[control_image_prompt],
-                    variant="secondary",
-                    elem_id="clear-control",
-                    min_width=0,
-                    value="🗑️",
-                )
         with gr.TabItem("⚙️ Menu"):
             with gr.Group():
                 negative_prompt = gr.Textbox(
-                    value="nsfw+",
                     label="Negative Prompt",
                     lines=2,
                 )
                 with gr.Row():
                     model = gr.Dropdown(
                         choices=Config.MODELS,
-                        filterable=False,
                         value=Config.MODEL,
                         label="Model",
                         min_width=240,
                     )
@@ -489,25 +474,12 @@ with gr.Blocks(
                         value=False,
                     )
-    annotate_btn.click(
-        annotate_fn,
-        inputs=[control_image_input, control_annotator],
-        outputs=[control_image_prompt],
-    )
     random_btn.click(random_fn, inputs=[], outputs=[prompt], show_api=False)
     refresh_btn.click(None, inputs=[], outputs=[seed], js=refresh_seed_js)
     seed.change(None, inputs=[seed], outputs=[], js=seed_js)
-    file_format.change(
-        lambda f: (gr.Gallery(format=f), gr.Image(format=f), gr.Image(format=f)),
-        inputs=[file_format],
-        outputs=[output_images, image_prompt, ip_image_prompt],
-        show_api=False,
-    )
     # input events are only user input; change events are both user and programmatic
     aspect_ratio.input(
         None,
@@ -516,11 +488,23 @@ with gr.Blocks(
         js=aspect_ratio_js,
     )
     # lock the input images so you don't lose them when the gallery updates
     output_images.change(
         gallery_fn,
-        inputs=[output_images, image_prompt, ip_image_prompt],
-        outputs=[image_select, ip_image_select],
         show_api=False,
     )
@@ -531,6 +515,12 @@ with gr.Blocks(
         outputs=[image_prompt],
         show_api=False,
     )
     ip_image_select.change(
         image_select_fn,
         inputs=[output_images, ip_image_prompt, ip_image_select],
@@ -545,6 +535,12 @@ with gr.Blocks(
         outputs=[image_select],
         show_api=False,
     )
     ip_image_prompt.clear(
         image_prompt_fn,
         inputs=[output_images],
@@ -563,10 +559,14 @@ with gr.Blocks(
     # toggle image prompts by updating session state
     gr.on(
-        triggers=[disable_image.input, disable_ip_image.input],
-        fn=lambda disable_image, disable_ip_image: (disable_image, disable_ip_image),
-        inputs=[disable_image, disable_ip_image],
-        outputs=[DISABLE_IMAGE_PROMPT, DISABLE_IP_IMAGE_PROMPT],
     )
     # generate images
@@ -579,8 +579,8 @@ with gr.Blocks(
             prompt,
             negative_prompt,
             image_prompt,
-            ip_image_prompt,
             control_image_prompt,
             lora_1,
             lora_1_weight,
             lora_2,
@@ -605,6 +605,7 @@ with gr.Blocks(
             use_clip_skip,
             use_ip_face,
             DISABLE_IMAGE_PROMPT,
             DISABLE_IP_IMAGE_PROMPT,
         ],
     )

 import gradio as gr
 from lib import (
     Config,
     async_call,
     disable_progress_bars,
     download_civit_file,
     download_repo_files,
     generate,
     read_file,
 )
 # the CSS `content` attribute expects a string so we need to wrap the number in quotes
 """
+def image_prompt_fn(images, locked=False):
     if locked:
         return gr.Dropdown(
             choices=[("🔒", -2)],
         )
+async def gallery_fn(images, image, control_image, ip_image):
     return (
+        image_prompt_fn(images, locked=image is not None),
+        image_prompt_fn(images, locked=control_image is not None),
+        image_prompt_fn(images, locked=ip_image is not None),
     )
+# Handle selecting an image from the gallery:
+# * -2 is the lock icon
+# * -1 is None
 async def image_select_fn(images, image, i):
     if i == -2:
         return gr.Image(image)
     return gr.Textbox(value=random.choice(prompts))
 async def generate_fn(*args, progress=gr.Progress(track_tqdm=True)):
     if len(args) > 0:
         prompt = args[0]
         raise gr.Error("You must enter a prompt")
     # always the last arguments
+    DISABLE_IMAGE_PROMPT, DISABLE_CONTROL_IMAGE_PROMPT, DISABLE_IP_IMAGE_PROMPT = args[-3:]
+    gen_args = list(args[:-3])
+    # the first two arguments are the prompt and negative prompt
     if DISABLE_IMAGE_PROMPT:
         gen_args[2] = None
+    if DISABLE_CONTROL_IMAGE_PROMPT:
         gen_args[3] = None
+    if DISABLE_IP_IMAGE_PROMPT:
+        gen_args[4] = None
     try:
         if Config.ZERO_GPU:
             progress((0, 100), desc="ZeroGPU init")
+        # the remaining arguments are the alert handlers and progress bar
         images = await async_call(
             generate,
             *gen_args,
         )
     except RuntimeError:
         raise gr.Error("Error: Please try again")
     return images
     # override image inputs without clearing them
     DISABLE_IMAGE_PROMPT = gr.State(False)
     DISABLE_IP_IMAGE_PROMPT = gr.State(False)
+    DISABLE_CONTROL_IMAGE_PROMPT = gr.State(False)
     gr.HTML(read_file("./partials/intro.html"))
                 image_prompt = gr.Image(
                     show_share_button=False,
                     label="Initial Image",
+                    min_width=640,
+                    format="png",
+                    type="pil",
+                )
+            with gr.Row():
+                control_image_prompt = gr.Image(
+                    show_share_button=False,
+                    label="Control Image",
                     min_width=320,
                     format="png",
                     type="pil",
             with gr.Row():
                 image_select = gr.Dropdown(
+                    info="Use a gallery image for initial latents",
                     choices=[("None", -1)],
+                    label="Initial Image",
                     interactive=True,
                     filterable=False,
+                    min_width=100,
+                    value=-1,
+                )
+                control_image_select = gr.Dropdown(
+                    info="Use a gallery image for ControlNet",
+                    label="ControlNet Image",
+                    choices=[("None", -1)],
+                    interactive=True,
+                    filterable=False,
+                    min_width=100,
                     value=-1,
                 )
                 ip_image_select = gr.Dropdown(
+                    info="Use a gallery image for IP-Adapter",
+                    label="IP-Adapter Image",
                     choices=[("None", -1)],
                     interactive=True,
                     filterable=False,
+                    min_width=100,
                     value=-1,
                 )
             with gr.Row():
                 denoising_strength = gr.Slider(
+                    label="Initial Image Strength",
                     value=Config.DENOISING_STRENGTH,
                     minimum=0.0,
                     maximum=1.0,
                     step=0.1,
                 )
+                control_annotator = gr.Dropdown(
+                    label="ControlNet Annotator",
+                    # TODO: annotators should be in config with names
+                    choices=[("Canny", "canny")],
+                    value=Config.ANNOTATOR,
+                    filterable=False,
+                )
             with gr.Row():
                 disable_image = gr.Checkbox(
                     label="Disable Initial Image",
+                    elem_classes=["checkbox"],
                     value=False,
                 )
+                disable_control_image = gr.Checkbox(
+                    label="Disable ControlNet Image",
                     elem_classes=["checkbox"],
+                    value=False,
+                )
+                disable_ip_image = gr.Checkbox(
                     label="Disable IP-Adapter Image",
+                    elem_classes=["checkbox"],
                     value=False,
                 )
                 use_ip_face = gr.Checkbox(
                     label="Use IP-Adapter Face",
+                    elem_classes=["checkbox"],
                     value=False,
                 )
         with gr.TabItem("⚙️ Menu"):
             with gr.Group():
                 negative_prompt = gr.Textbox(
                     label="Negative Prompt",
+                    value="nsfw+",
                     lines=2,
                 )
                 with gr.Row():
                     model = gr.Dropdown(
                         choices=Config.MODELS,
                         value=Config.MODEL,
+                        filterable=False,
                         label="Model",
                         min_width=240,
                     )
                         value=False,
                     )
     random_btn.click(random_fn, inputs=[], outputs=[prompt], show_api=False)
     refresh_btn.click(None, inputs=[], outputs=[seed], js=refresh_seed_js)
     seed.change(None, inputs=[seed], outputs=[], js=seed_js)
     # input events are only user input; change events are both user and programmatic
     aspect_ratio.input(
         None,
         js=aspect_ratio_js,
     )
+    file_format.change(
+        lambda f: (
+            gr.Gallery(format=f),
+            gr.Image(format=f),
+            gr.Image(format=f),
+            gr.Image(format=f),
+        ),
+        inputs=[file_format],
+        outputs=[output_images, image_prompt, control_image_prompt, ip_image_prompt],
+        show_api=False,
+    )
     # lock the input images so you don't lose them when the gallery updates
     output_images.change(
         gallery_fn,
+        inputs=[output_images, image_prompt, control_image_prompt, ip_image_prompt],
+        outputs=[image_select, control_image_select, ip_image_select],
         show_api=False,
     )
         outputs=[image_prompt],
         show_api=False,
     )
+    control_image_select.change(
+        image_select_fn,
+        inputs=[output_images, control_image_prompt, control_image_select],
+        outputs=[control_image_prompt],
+        show_api=False,
+    )
     ip_image_select.change(
         image_select_fn,
         inputs=[output_images, ip_image_prompt, ip_image_select],
         outputs=[image_select],
         show_api=False,
     )
+    control_image_prompt.clear(
+        image_prompt_fn,
+        inputs=[output_images],
+        outputs=[control_image_select],
+        show_api=False,
+    )
     ip_image_prompt.clear(
         image_prompt_fn,
         inputs=[output_images],
     # toggle image prompts by updating session state
     gr.on(
+        triggers=[disable_image.input, disable_control_image.input, disable_ip_image.input],
+        fn=lambda disable_image, disable_control_image, disable_ip_image: (
+            disable_image,
+            disable_control_image,
+            disable_ip_image,
+        ),
+        inputs=[disable_image, disable_control_image, disable_ip_image],
+        outputs=[DISABLE_IMAGE_PROMPT, DISABLE_CONTROL_IMAGE_PROMPT, DISABLE_IP_IMAGE_PROMPT],
     )
     # generate images
             prompt,
             negative_prompt,
             image_prompt,
             control_image_prompt,
+            ip_image_prompt,
             lora_1,
             lora_1_weight,
             lora_2,
             use_clip_skip,
             use_ip_face,
             DISABLE_IMAGE_PROMPT,
+            DISABLE_CONTROL_IMAGE_PROMPT,
             DISABLE_IP_IMAGE_PROMPT,
         ],
     )

lib/__init__.py CHANGED Viewed

@@ -5,12 +5,12 @@ from .loader import Loader
 from .logger import Logger
 from .upscaler import RealESRGAN
 from .utils import (
     async_call,
     disable_progress_bars,
     download_civit_file,
     download_repo_files,
     enable_progress_bars,
-    get_valid_size,
     load_json,
     read_file,
     resize_image,
@@ -24,13 +24,13 @@ __all__ = [
     "Loader",
     "Logger",
     "RealESRGAN",
     "async_call",
     "disable_progress_bars",
     "download_civit_file",
     "download_repo_files",
     "enable_progress_bars",
     "generate",
-    "get_valid_size",
     "load_json",
     "read_file",
     "resize_image",

 from .logger import Logger
 from .upscaler import RealESRGAN
 from .utils import (
+    annotate_image,
     async_call,
     disable_progress_bars,
     download_civit_file,
     download_repo_files,
     enable_progress_bars,
     load_json,
     read_file,
     resize_image,
     "Loader",
     "Logger",
     "RealESRGAN",
+    "annotate_image",
     "async_call",
     "disable_progress_bars",
     "download_civit_file",
     "download_repo_files",
     "enable_progress_bars",
     "generate",
     "load_json",
     "read_file",
     "resize_image",

lib/annotators.py CHANGED Viewed

@@ -1,6 +1,8 @@
 from threading import Lock
 from controlnet_aux import CannyDetector
 class CannyAnnotator:
@@ -14,7 +16,7 @@ class CannyAnnotator:
                 cls._instance.model = CannyDetector()
         return cls._instance
-    def __call__(self, img, size):
         resolution = min(*size)
         return self.model(
             img,

 from threading import Lock
+from typing import Tuple
 from controlnet_aux import CannyDetector
+from PIL import Image
 class CannyAnnotator:
                 cls._instance.model = CannyDetector()
         return cls._instance
+    def __call__(self, img: Image.Image, size: Tuple[int, int]) -> Image.Image:
         resolution = min(*size)
         return self.model(
             img,

lib/config.py CHANGED Viewed

@@ -23,9 +23,10 @@ from .pipelines import (
     CustomStableDiffusionPipeline,
 )
-# improved GPU handling and progress bars; set before importing spaces
 os.environ["ZEROGPU_V2"] = "1"
 if find_spec("hf_transfer"):
     os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
@@ -35,7 +36,8 @@ filterwarnings("ignore", category=FutureWarning, module="transformers")
 diffusers_logging.set_verbosity_error()
 transformers_logging.set_verbosity_error()
-_sd_files = [
     "feature_extractor/preprocessor_config.json",
     "safety_checker/config.json",
     "scheduler/scheduler_config.json",
@@ -52,10 +54,12 @@ _sd_files = [
     "model_index.json",
 ]
 Config = SimpleNamespace(
     HF_TOKEN=os.environ.get("HF_TOKEN", None),
     CIVIT_TOKEN=os.environ.get("CIVIT_TOKEN", None),
     ZERO_GPU=import_module("spaces").config.Config.zero_gpu,
     HF_MODELS={
         # downloaded on startup
         "ai-forever/Real-ESRGAN": ["RealESRGAN_x2.pth", "RealESRGAN_x4.pth"],
@@ -64,7 +68,7 @@ Config = SimpleNamespace(
         "fluently/Fluently-v4": ["Fluently-v4.safetensors"],
         "Linaqruf/anything-v3-1": ["anything-v3-2.safetensors"],
         "lllyasviel/control_v11p_sd15_canny": ["diffusion_pytorch_model.fp16.safetensors"],
-        "Lykon/dreamshaper-8": [*_sd_files],
         "madebyollin/taesd": ["diffusion_pytorch_model.safetensors"],
         "prompthero/openjourney-v4": ["openjourney-v4.ckpt"],
         "SG161222/Realistic_Vision_V5.1_noVAE": ["Realistic_Vision_V5.1_fp16-no-ema.safetensors"],
@@ -111,8 +115,9 @@ Config = SimpleNamespace(
         "SG161222/Realistic_Vision_V5.1_noVAE",
         "XpucT/Deliberate",
     ],
     MODEL_CHECKPOINTS={
-        # keep keys lowercase
         "comfy-org/stable-diffusion-v1-5-archive": "v1-5-pruned-emaonly-fp16.safetensors",
         "cyberdelia/cyberrealistic": "CyberRealistic_V5_FP16.safetensors",
         "fluently/fluently-v4": "Fluently-v4.safetensors",
@@ -131,6 +136,7 @@ Config = SimpleNamespace(
         "PNDM": PNDMScheduler,
         "UniPC 2M": UniPCMultistepScheduler,
     },
     ANNOTATORS={
         "canny": "lllyasviel/control_v11p_sd15_canny",
     },

     CustomStableDiffusionPipeline,
 )
+# Improved GPU handling and progress bars; set before importing spaces
 os.environ["ZEROGPU_V2"] = "1"
+# Errors if enabled and not installed
 if find_spec("hf_transfer"):
     os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 diffusers_logging.set_verbosity_error()
 transformers_logging.set_verbosity_error()
+# Standard Stable Diffusion 1.5 file structure
+sd_files = [
     "feature_extractor/preprocessor_config.json",
     "safety_checker/config.json",
     "scheduler/scheduler_config.json",
     "model_index.json",
 ]
+# Using namespace instead of dataclass for simplicity
 Config = SimpleNamespace(
     HF_TOKEN=os.environ.get("HF_TOKEN", None),
     CIVIT_TOKEN=os.environ.get("CIVIT_TOKEN", None),
     ZERO_GPU=import_module("spaces").config.Config.zero_gpu,
+    # TODO: fix model config redundancy
     HF_MODELS={
         # downloaded on startup
         "ai-forever/Real-ESRGAN": ["RealESRGAN_x2.pth", "RealESRGAN_x4.pth"],
         "fluently/Fluently-v4": ["Fluently-v4.safetensors"],
         "Linaqruf/anything-v3-1": ["anything-v3-2.safetensors"],
         "lllyasviel/control_v11p_sd15_canny": ["diffusion_pytorch_model.fp16.safetensors"],
+        "Lykon/dreamshaper-8": [*sd_files],
         "madebyollin/taesd": ["diffusion_pytorch_model.safetensors"],
         "prompthero/openjourney-v4": ["openjourney-v4.ckpt"],
         "SG161222/Realistic_Vision_V5.1_noVAE": ["Realistic_Vision_V5.1_fp16-no-ema.safetensors"],
         "SG161222/Realistic_Vision_V5.1_noVAE",
         "XpucT/Deliberate",
     ],
+    # Single-file model weights
     MODEL_CHECKPOINTS={
+        # keep keys lowercase for case-insensitive matching in the loader
         "comfy-org/stable-diffusion-v1-5-archive": "v1-5-pruned-emaonly-fp16.safetensors",
         "cyberdelia/cyberrealistic": "CyberRealistic_V5_FP16.safetensors",
         "fluently/fluently-v4": "Fluently-v4.safetensors",
         "PNDM": PNDMScheduler,
         "UniPC 2M": UniPCMultistepScheduler,
     },
+    ANNOTATOR="canny",
     ANNOTATORS={
         "canny": "lllyasviel/control_v11p_sd15_canny",
     },

lib/inference.py CHANGED Viewed

@@ -5,18 +5,22 @@ import time
 from datetime import datetime
 from itertools import product
-import numpy as np
 import torch
 from compel import Compel, DiffusersTextualInversionManager, ReturnedEmbeddingsType
 from compel.prompt_parser import PromptParser
 from huggingface_hub.utils import HFValidationError, RepositoryNotFoundError
-from PIL import Image
 from spaces import GPU
 from .config import Config
 from .loader import Loader
 from .logger import Logger
-from .utils import load_json, safe_progress, timer
 def parse_prompt_with_arrays(prompt: str) -> list[str]:
@@ -58,25 +62,7 @@ def apply_style(positive_prompt, negative_prompt, style_id="none"):
     )
-def prepare_image(input, size=None):
-    image = None
-    if isinstance(input, Image.Image):
-        image = input
-    if isinstance(input, np.ndarray):
-        image = Image.fromarray(input)
-    if isinstance(input, str):
-        if os.path.isfile(input):
-            image = Image.open(input)
-    if image is not None:
-        image = image.convert("RGB")
-    if size is not None:
-        image = image.resize(size, Image.Resampling.LANCZOS)
-    if image is not None:
-        return image
-    else:
-        raise ValueError("Invalid image prompt")
 def gpu_duration(**kwargs):
     loading = 20
     duration = 10
@@ -97,8 +83,8 @@ def generate(
     positive_prompt,
     negative_prompt="",
     image_prompt=None,
-    ip_image_prompt=None,
     control_image_prompt=None,
     lora_1=None,
     lora_1_weight=0.0,
     lora_2=None,
@@ -146,9 +132,6 @@ def generate(
     KIND = "img2img" if image_prompt is not None else "txt2img"
     KIND = f"controlnet_{KIND}" if control_image_prompt is not None else KIND
-    if KIND.startswith("controlnet_") and annotator.lower() not in Config.ANNOTATORS.keys():
-        raise Error(f"Invalid annotator: {annotator}")
     EMBEDDINGS_TYPE = (
         ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NORMALIZED
         if clip_skip
@@ -296,21 +279,19 @@ def generate(
         if progress is not None:
             kwargs["callback_on_step_end"] = callback_on_step_end
         if KIND == "img2img":
             kwargs["strength"] = denoising_strength
-            kwargs["image"] = prepare_image(image_prompt, (width, height))
         if KIND == "controlnet_txt2img":
-            # don't resize controlnet images
-            kwargs["image"] = prepare_image(control_image_prompt, None)
         if KIND == "controlnet_img2img":
-            kwargs["control_image"] = prepare_image(control_image_prompt, None)
         if IP_ADAPTER:
-            # don't resize full-face images since they are usually square crops
-            size = None if ip_face else (width, height)
-            kwargs["ip_adapter_image"] = prepare_image(ip_image_prompt, size)
         try:
             image = pipe(**kwargs).images[0]

 from datetime import datetime
 from itertools import product
 import torch
 from compel import Compel, DiffusersTextualInversionManager, ReturnedEmbeddingsType
 from compel.prompt_parser import PromptParser
 from huggingface_hub.utils import HFValidationError, RepositoryNotFoundError
 from spaces import GPU
 from .config import Config
 from .loader import Loader
 from .logger import Logger
+from .utils import (
+    annotate_image,
+    load_json,
+    resize_image,
+    safe_progress,
+    timer,
+)
 def parse_prompt_with_arrays(prompt: str) -> list[str]:
     )
+# Dynamic signature for the GPU duration function
 def gpu_duration(**kwargs):
     loading = 20
     duration = 10
     positive_prompt,
     negative_prompt="",
     image_prompt=None,
     control_image_prompt=None,
+    ip_image_prompt=None,
     lora_1=None,
     lora_1_weight=0.0,
     lora_2=None,
     KIND = "img2img" if image_prompt is not None else "txt2img"
     KIND = f"controlnet_{KIND}" if control_image_prompt is not None else KIND
     EMBEDDINGS_TYPE = (
         ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NORMALIZED
         if clip_skip
         if progress is not None:
             kwargs["callback_on_step_end"] = callback_on_step_end
+        # Resizing so the initial latents are the same size as the generated image
         if KIND == "img2img":
             kwargs["strength"] = denoising_strength
+            kwargs["image"] = resize_image(image_prompt, (width, height))
         if KIND == "controlnet_txt2img":
+            kwargs["image"] = annotate_image(control_image_prompt, annotator)
         if KIND == "controlnet_img2img":
+            kwargs["control_image"] = annotate_image(control_image_prompt, annotator)
         if IP_ADAPTER:
+            kwargs["ip_adapter_image"] = resize_image(ip_image_prompt)
         try:
             image = pipe(**kwargs).images[0]

lib/loader.py CHANGED Viewed

@@ -372,6 +372,7 @@ class Loader:
             # defaults to float32
             pipe_kwargs["torch_dtype"] = torch.float16
         if kind.startswith("controlnet_"):
             pipe_kwargs["controlnet"] = ControlNetModel.from_pretrained(
                 Config.ANNOTATORS[annotator],

             # defaults to float32
             pipe_kwargs["torch_dtype"] = torch.float16
+        # config maps the repo to the ID: canny -> lllyasviel/control_sd15_canny
         if kind.startswith("controlnet_"):
             pipe_kwargs["controlnet"] = ControlNetModel.from_pretrained(
                 Config.ANNOTATORS[annotator],

lib/utils.py CHANGED Viewed

@@ -4,10 +4,9 @@ import json
 import os
 import time
 from contextlib import contextmanager
-from typing import Callable, TypeVar
 import anyio
-import cv2
 import httpx
 import numpy as np
 from anyio import Semaphore
@@ -18,6 +17,7 @@ from PIL import Image
 from transformers import logging as transformers_logging
 from typing_extensions import ParamSpec
 from .logger import Logger
 T = TypeVar("T")
@@ -110,64 +110,78 @@ def download_civit_file(lora_id, version_id, file_path=".", token=None):
         log.error(f"RequestError: {e}")
-# resize an image while preserving the aspect ratio (size is width-first)
-def resize_image(image, size):
     if isinstance(image, Image.Image):
-        image = np.array(image)
-    H, W, _ = image.shape
-    W = float(W)
-    H = float(H)
-    target_W, target_H = size
-    # Use the smaller scaling factor to maintain the aspect ratio.
-    k_w = float(target_W) / W
-    k_h = float(target_H) / H
-    k = min(k_w, k_h)
-    new_W = int(np.round(W * k / 64.0)) * 64
-    new_H = int(np.round(H * k / 64.0)) * 64
-    img = cv2.resize(
-        image,
-        (new_W, new_H),
-        interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA,
-    )
-    return img
-# ensure image is within bounds
-def get_valid_size(image, step=64, low=512, high=4096):
-    def round_down(x, step=step):
-        return int((x // step) * step)
-    def clamp_range(x, low=low, high=high):
-        return max(low, min(x, high))
-    if isinstance(image, Image.Image):
-        image = np.array(image)
-    H, W = image.shape[:2]
-    ar = W / H
     # try width first
-    if W > H:
-        new_W = round_down(clamp_range(W))
-        new_H = round_down(new_W / ar)
     else:
-        new_H = round_down(clamp_range(H))
-        new_W = round_down(new_H * ar)
-    # if the new size is out of bounds, try the other dimension
-    if new_W < low or new_W > high:
-        new_W = round_down(clamp_range(W))
-        new_H = round_down(new_W / ar)
-    if new_H < low or new_H > high:
-        new_H = round_down(clamp_range(H))
-        new_W = round_down(new_H * ar)
-    return (new_W, new_H)
-# like the original but supports args and kwargs instead of a dict
 # https://github.com/huggingface/huggingface-inference-toolkit/blob/0.2.0/src/huggingface_inference_toolkit/async_utils.py
 async def async_call(fn: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T:
     async with MAX_THREADS_GUARD:

 import os
 import time
 from contextlib import contextmanager
+from typing import Callable, Tuple, TypeVar
 import anyio
 import httpx
 import numpy as np
 from anyio import Semaphore
 from transformers import logging as transformers_logging
 from typing_extensions import ParamSpec
+from .annotators import CannyAnnotator
 from .logger import Logger
 T = TypeVar("T")
         log.error(f"RequestError: {e}")
+def image_to_pil(image: Image.Image):
+    """Converts various image inputs to RGB PIL Image."""
+    if isinstance(image, str) and os.path.isfile(image):
+        image = Image.open(image)
+    if isinstance(image, np.ndarray):
+        image = Image.fromarray(image)
     if isinstance(image, Image.Image):
+        return image.convert("RGB")
+    raise ValueError("Invalid image input")
+def get_valid_image_size(
+    width: int,
+    height: int,
+    step=64,
+    min_size=512,
+    max_size=4096,
+):
+    """Get new image dimensions while preserving aspect ratio."""
+    def round_down(x):
+        return int((x // step) * step)
+    def clamp(x):
+        return max(min_size, min(x, max_size))
+    aspect_ratio = width / height
     # try width first
+    if width > height:
+        new_width = round_down(clamp(width))
+        new_height = round_down(new_width / aspect_ratio)
     else:
+        new_height = round_down(clamp(height))
+        new_width = round_down(new_height * aspect_ratio)
+    # if new dimensions are out of bounds, try height
+    if not min_size <= new_width <= max_size:
+        new_width = round_down(clamp(width))
+        new_height = round_down(new_width / aspect_ratio)
+    if not min_size <= new_height <= max_size:
+        new_height = round_down(clamp(height))
+        new_width = round_down(new_height * aspect_ratio)
+    return (new_width, new_height)
+def resize_image(
+    image: Image.Image,
+    size: Tuple[int, int] = None,
+    resampling: Image.Resampling = None,
+):
+    """Resize image with proper interpolation and dimension constraints."""
+    image = image_to_pil(image)
+    if size is None:
+        size = get_valid_image_size(*image.size)
+    if resampling is None:
+        resampling = Image.Resampling.LANCZOS
+    return image.resize(size, resampling)
+def annotate_image(image: Image.Image, annotator="canny"):
+    """Get the feature map of an image using the specified annotator."""
+    size = get_valid_image_size(*image.size)
+    image = resize_image(image, size)
+    if annotator.lower() == "canny":
+        canny = CannyAnnotator()
+        return canny(image, size)
+    raise ValueError(f"Invalid annotator: {annotator}")
+# Like the original but supports args and kwargs instead of a dict
 # https://github.com/huggingface/huggingface-inference-toolkit/blob/0.2.0/src/huggingface_inference_toolkit/async_utils.py
 async def async_call(fn: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T:
     async with MAX_THREADS_GUARD: