Upload files: v0.2.1

Browse files

Files changed (7) hide show

asdff/__init__.py +10 -0
asdff/__version__.py +1 -0
asdff/base.py +174 -0
asdff/sd.py +51 -0
asdff/utils.py +70 -0
asdff/yolo.py +80 -0
pipeline.py +1 -0

asdff/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from .__version__ import __version__
+from .sd import AdCnPipeline, AdPipeline
+from .yolo import yolo_detector
+__all__ = [
+    "AdPipeline",
+    "AdCnPipeline",
+    "yolo_detector",
+    "__version__",
+]

asdff/__version__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __version__ = "0.2.1"

asdff/base.py ADDED Viewed

	@@ -0,0 +1,174 @@

+from __future__ import annotations
+import inspect
+from abc import ABC, abstractmethod
+from typing import Any, Callable, Iterable, List, Mapping, Optional
+from diffusers.utils import logging
+from PIL import Image
+from asdff.utils import (
+    ADOutput,
+    bbox_padding,
+    composite,
+    mask_dilate,
+    mask_gaussian_blur,
+)
+from asdff.yolo import yolo_detector
+logger = logging.get_logger("diffusers")
+DetectorType = Callable[[Image.Image], Optional[List[Image.Image]]]
+def ordinal(n: int) -> str:
+    d = {1: "st", 2: "nd", 3: "rd"}
+    return str(n) + ("th" if 11 <= n % 100 <= 13 else d.get(n % 10, "th"))
+class AdPipelineBase(ABC):
+    @property
+    @abstractmethod
+    def inpaint_pipeline(self) -> Callable:
+        raise NotImplementedError
+    @property
+    @abstractmethod
+    def txt2img_class(self) -> type:
+        raise NotImplementedError
+    def __call__(  # noqa: C901
+        self,
+        common: Mapping[str, Any] | None = None,
+        txt2img_only: Mapping[str, Any] | None = None,
+        inpaint_only: Mapping[str, Any] | None = None,
+        images: Image.Image | Iterable[Image.Image] | None = None,
+        detectors: DetectorType | Iterable[DetectorType] | None = None,
+        mask_dilation: int = 4,
+        mask_blur: int = 4,
+        mask_padding: int = 32,
+    ):
+        if common is None:
+            common = {}
+        if txt2img_only is None:
+            txt2img_only = {}
+        if inpaint_only is None:
+            inpaint_only = {}
+        if "strength" not in inpaint_only:
+            inpaint_only = {**inpaint_only, "strength": 0.4}
+        if detectors is None:
+            detectors = [self.default_detector]
+        elif not isinstance(detectors, Iterable):
+            detectors = [detectors]
+        if images is None:
+            txt2img_output = self.process_txt2img(common, txt2img_only)
+            txt2img_images = txt2img_output[0]
+        else:
+            if txt2img_only:
+                msg = "Both `images` and `txt2img_only` are specified. if `images` is specified, `txt2img_only` is ignored."
+                logger.warning(msg)
+            txt2img_images = [images] if not isinstance(images, Iterable) else images
+        init_images = []
+        final_images = []
+        for i, init_image in enumerate(txt2img_images):
+            init_images.append(init_image.copy())
+            final_image = None
+            for j, detector in enumerate(detectors):
+                masks = detector(init_image)
+                if masks is None:
+                    logger.info(
+                        f"No object detected on {ordinal(i + 1)} image with {ordinal(j + 1)} detector."
+                    )
+                    continue
+                for k, mask in enumerate(masks):
+                    mask = mask.convert("L")
+                    mask = mask_dilate(mask, mask_dilation)
+                    bbox = mask.getbbox()
+                    if bbox is None:
+                        logger.info(f"No object in {ordinal(k + 1)} mask.")
+                        continue
+                    mask = mask_gaussian_blur(mask, mask_blur)
+                    bbox_padded = bbox_padding(bbox, init_image.size, mask_padding)
+                    inpaint_output = self.process_inpainting(
+                        common,
+                        inpaint_only,
+                        init_image,
+                        mask,
+                        bbox_padded,
+                    )
+                    inpaint_image = inpaint_output[0][0]
+                    final_image = composite(
+                        init_image,
+                        mask,
+                        inpaint_image,
+                        bbox_padded,
+                    )
+                    init_image = final_image
+            if final_image is not None:
+                final_images.append(final_image)
+        return ADOutput(images=final_images, init_images=init_images)
+    @property
+    def default_detector(self) -> Callable[..., list[Image.Image] | None]:
+        return yolo_detector
+    def _get_txt2img_args(
+        self, common: Mapping[str, Any], txt2img_only: Mapping[str, Any]
+    ):
+        return {**common, **txt2img_only, "output_type": "pil"}
+    def _get_inpaint_args(
+        self, common: Mapping[str, Any], inpaint_only: Mapping[str, Any]
+    ):
+        common = dict(common)
+        sig = inspect.signature(self.inpaint_pipeline)
+        if (
+            "control_image" in sig.parameters
+            and "control_image" not in common
+            and "image" in common
+        ):
+            common["control_image"] = common.pop("image")
+        return {
+            **common,
+            **inpaint_only,
+            "num_images_per_prompt": 1,
+            "output_type": "pil",
+        }
+    def process_txt2img(
+        self, common: Mapping[str, Any], txt2img_only: Mapping[str, Any]
+    ):
+        txt2img_args = self._get_txt2img_args(common, txt2img_only)
+        return self.txt2img_class.__call__(self, **txt2img_args)
+    def process_inpainting(
+        self,
+        common: Mapping[str, Any],
+        inpaint_only: Mapping[str, Any],
+        init_image: Image.Image,
+        mask: Image.Image,
+        bbox_padded: tuple[int, int, int, int],
+    ):
+        crop_image = init_image.crop(bbox_padded)
+        crop_mask = mask.crop(bbox_padded)
+        inpaint_args = self._get_inpaint_args(common, inpaint_only)
+        inpaint_args["image"] = crop_image
+        inpaint_args["mask_image"] = crop_mask
+        if "control_image" in inpaint_args:
+            inpaint_args["control_image"] = inpaint_args["control_image"].resize(
+                crop_image.size
+            )
+        return self.inpaint_pipeline(**inpaint_args)

asdff/sd.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from __future__ import annotations
+from functools import cached_property
+from diffusers import (
+    StableDiffusionControlNetInpaintPipeline,
+    StableDiffusionControlNetPipeline,
+    StableDiffusionInpaintPipeline,
+    StableDiffusionPipeline,
+)
+from asdff.base import AdPipelineBase
+class AdPipeline(AdPipelineBase, StableDiffusionPipeline):
+    @cached_property
+    def inpaint_pipeline(self):
+        return StableDiffusionInpaintPipeline(
+            vae=self.vae,
+            text_encoder=self.text_encoder,
+            tokenizer=self.tokenizer,
+            unet=self.unet,
+            scheduler=self.scheduler,
+            safety_checker=self.safety_checker,
+            feature_extractor=self.feature_extractor,
+            requires_safety_checker=self.config.requires_safety_checker,
+        )
+    @property
+    def txt2img_class(self):
+        return StableDiffusionPipeline
+class AdCnPipeline(AdPipelineBase, StableDiffusionControlNetPipeline):
+    @cached_property
+    def inpaint_pipeline(self):
+        return StableDiffusionControlNetInpaintPipeline(
+            vae=self.vae,
+            text_encoder=self.text_encoder,
+            tokenizer=self.tokenizer,
+            unet=self.unet,
+            controlnet=self.controlnet,
+            scheduler=self.scheduler,
+            safety_checker=self.safety_checker,
+            feature_extractor=self.feature_extractor,
+            requires_safety_checker=self.config.requires_safety_checker,
+        )
+    @property
+    def txt2img_class(self):
+        return StableDiffusionControlNetPipeline

asdff/utils.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from __future__ import annotations
+from dataclasses import dataclass
+import cv2
+import numpy as np
+from diffusers.utils import BaseOutput
+from PIL import Image, ImageFilter, ImageOps
+@dataclass
+class ADOutput(BaseOutput):
+    images: list[Image.Image]
+    init_images: list[Image.Image]
+def mask_dilate(image: Image.Image, value: int = 4) -> Image.Image:
+    if value <= 0:
+        return image
+    arr = np.array(image)
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (value, value))
+    dilated = cv2.dilate(arr, kernel, iterations=1)
+    return Image.fromarray(dilated)
+def mask_gaussian_blur(image: Image.Image, value: int = 4) -> Image.Image:
+    if value <= 0:
+        return image
+    blur = ImageFilter.GaussianBlur(value)
+    return image.filter(blur)
+def bbox_padding(
+    bbox: tuple[int, int, int, int], image_size: tuple[int, int], value: int = 32
+) -> tuple[int, int, int, int]:
+    if value <= 0:
+        return bbox
+    arr = np.array(bbox).reshape(2, 2)
+    arr[0] -= value
+    arr[1] += value
+    arr = np.clip(arr, (0, 0), image_size)
+    return tuple(arr.flatten())
+def composite(
+    init: Image.Image,
+    mask: Image.Image,
+    gen: Image.Image,
+    bbox_padded: tuple[int, int, int, int],
+) -> Image.Image:
+    img_masked = Image.new("RGBa", init.size)
+    img_masked.paste(
+        init.convert("RGBA").convert("RGBa"),
+        mask=ImageOps.invert(mask),
+    )
+    img_masked = img_masked.convert("RGBA")
+    size = (
+        bbox_padded[2] - bbox_padded[0],
+        bbox_padded[3] - bbox_padded[1],
+    )
+    resized = gen.resize(size)
+    output = Image.new("RGBA", init.size)
+    output.paste(resized, bbox_padded)
+    output.alpha_composite(img_masked)
+    return output.convert("RGB")

asdff/yolo.py ADDED Viewed

	@@ -0,0 +1,80 @@

+from __future__ import annotations
+from pathlib import Path
+import numpy as np
+import torch
+from huggingface_hub import hf_hub_download
+from PIL import Image, ImageDraw
+from torchvision.transforms.functional import to_pil_image
+try:
+    from ultralytics import YOLO
+except ModuleNotFoundError:
+    print("Please install ultralytics using `pip install ultralytics`")
+    raise
+def create_mask_from_bbox(
+    bboxes: np.ndarray, shape: tuple[int, int]
+) -> list[Image.Image]:
+    """
+    Parameters
+    ----------
+        bboxes: list[list[float]]
+            list of [x1, y1, x2, y2]
+            bounding boxes
+        shape: tuple[int, int]
+            shape of the image (width, height)
+    Returns
+    -------
+        masks: list[Image.Image]
+        A list of masks
+    """
+    masks = []
+    for bbox in bboxes:
+        mask = Image.new("L", shape, "black")
+        mask_draw = ImageDraw.Draw(mask)
+        mask_draw.rectangle(bbox, fill="white")
+        masks.append(mask)
+    return masks
+def mask_to_pil(masks: torch.Tensor, shape: tuple[int, int]) -> list[Image.Image]:
+    """
+    Parameters
+    ----------
+    masks: torch.Tensor, dtype=torch.float32, shape=(N, H, W).
+        The device can be CUDA, but `to_pil_image` takes care of that.
+    shape: tuple[int, int]
+        (width, height) of the original image
+    Returns
+    -------
+    images: list[Image.Image]
+    """
+    n = masks.shape[0]
+    return [to_pil_image(masks[i], mode="L").resize(shape) for i in range(n)]
+def yolo_detector(
+    image: Image.Image, model_path: str | Path | None = None, confidence: float = 0.3
+) -> list[Image.Image] | None:
+    if not model_path:
+        model_path = hf_hub_download("Bingsu/adetailer", "face_yolov8n.pt")
+    model = YOLO(model_path)
+    pred = model(image, conf=confidence)
+    bboxes = pred[0].boxes.xyxy.cpu().numpy()
+    if bboxes.size == 0:
+        return None
+    if pred[0].masks is None:
+        masks = create_mask_from_bbox(bboxes, image.size)
+    else:
+        masks = mask_to_pil(pred[0].masks.data, image.size)
+    return masks

pipeline.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from asdff import AdCnPipeline # noqa: F401