Upload files: v0.1.1

Browse files

Files changed (6) hide show

asdff/__init__.py +9 -0
asdff/__version__.py +1 -0
asdff/sd.py +123 -0
asdff/utils.py +70 -0
asdff/yolo.py +73 -0
pipeline.py +1 -0

asdff/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from .__version__ import __version__
+from .sd import AdPipeline
+from .yolo import yolo_detector
+__all__ = [
+    "AdPipeline",
+    "yolo_detector",
+    "__version__",
+]

asdff/__version__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __version__ = "0.1.1"

asdff/sd.py ADDED Viewed

	@@ -0,0 +1,123 @@

+from __future__ import annotations
+from functools import cached_property
+from typing import Any, Callable, Iterable, List, Mapping, Optional
+from diffusers import StableDiffusionInpaintPipeline, StableDiffusionPipeline
+from diffusers.utils import logging
+from PIL import Image
+from asdff.utils import (
+    ADOutput,
+    bbox_padding,
+    composite,
+    mask_dilate,
+    mask_gaussian_blur,
+)
+from asdff.yolo import yolo_detector
+logger = logging.get_logger("diffusers")
+DetectorType = Callable[[Image.Image], Optional[List[Image.Image]]]
+def ordinal(n: int) -> str:
+    d = {1: "st", 2: "nd", 3: "rd"}
+    return str(n) + ("th" if 11 <= n % 100 <= 13 else d.get(n % 10, "th"))
+class AdPipeline(StableDiffusionPipeline):
+    @cached_property
+    def inpaint_pipeline(self):
+        return StableDiffusionInpaintPipeline(
+            vae=self.vae,
+            text_encoder=self.text_encoder,
+            tokenizer=self.tokenizer,
+            unet=self.unet,
+            scheduler=self.scheduler,
+            safety_checker=self.safety_checker,
+            feature_extractor=self.feature_extractor,
+            requires_safety_checker=self.config.requires_safety_checker,
+        )
+    def __call__(  # noqa: C901
+        self,
+        common: Mapping[str, Any] | None = None,
+        txt2img_only: Mapping[str, Any] | None = None,
+        inpaint_only: Mapping[str, Any] | None = None,
+        detectors: DetectorType | Iterable[DetectorType] | None = None,
+        mask_dilation: int = 4,
+        mask_blur: int = 4,
+        mask_padding: int = 32,
+    ):
+        if common is None:
+            common = {}
+        if txt2img_only is None:
+            txt2img_only = {}
+        if inpaint_only is None:
+            inpaint_only = {}
+        if "strength" not in inpaint_only:
+            inpaint_only = {**inpaint_only, "strength": 0.4}
+        if detectors is None:
+            detectors = [self.default_detector]
+        elif callable(detectors):
+            detectors = [detectors]
+        txt2img_output = super().__call__(**common, **txt2img_only, output_type="pil")
+        txt2img_images: list[Image.Image] = txt2img_output[0]
+        init_images = []
+        final_images = []
+        for i, init_image in enumerate(txt2img_images):
+            init_images.append(init_image.copy())
+            final_image = None
+            for j, detector in enumerate(detectors):
+                masks = detector(init_image)
+                if masks is None:
+                    logger.info(
+                        f"No object detected on {ordinal(i + 1)} image with {ordinal(j + 1)} detector."
+                    )
+                    continue
+                for k, mask in enumerate(masks):
+                    mask = mask.convert("L")
+                    mask = mask_dilate(mask, mask_dilation)
+                    bbox = mask.getbbox()
+                    if bbox is None:
+                        logger.info(f"No object in {ordinal(k + 1)} mask.")
+                        continue
+                    mask = mask_gaussian_blur(mask, mask_blur)
+                    bbox_padded = bbox_padding(bbox, init_image.size, mask_padding)
+                    crop_image = init_image.crop(bbox_padded)
+                    crop_mask = mask.crop(bbox_padded)
+                    inpaint_output = self.inpaint_pipeline(
+                        **common,
+                        **inpaint_only,
+                        image=crop_image,
+                        mask_image=crop_mask,
+                        num_images_per_prompt=1,
+                        output_type="pil",
+                    )
+                    inpaint_image: Image.Image = inpaint_output[0][0]
+                    final_image = composite(
+                        init=init_image,
+                        mask=mask,
+                        gen=inpaint_image,
+                        bbox_padded=bbox_padded,
+                    )
+                    init_image = final_image
+            if final_image is not None:
+                final_images.append(final_image)
+        return ADOutput(images=final_images, init_images=init_images)
+    @property
+    def default_detector(self) -> Callable[..., list[Image.Image] | None]:
+        return yolo_detector

asdff/utils.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from __future__ import annotations
+from dataclasses import dataclass
+import cv2
+import numpy as np
+from diffusers.utils import BaseOutput
+from PIL import Image, ImageFilter, ImageOps
+@dataclass
+class ADOutput(BaseOutput):
+    images: list[Image.Image]
+    init_images: list[Image.Image]
+def mask_dilate(image: Image.Image, value: int = 4) -> Image.Image:
+    if value <= 0:
+        return image
+    arr = np.array(image)
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (value, value))
+    dilated = cv2.dilate(arr, kernel, iterations=1)
+    return Image.fromarray(dilated)
+def mask_gaussian_blur(image: Image.Image, value: int = 4) -> Image.Image:
+    if value <= 0:
+        return image
+    blur = ImageFilter.GaussianBlur(value)
+    return image.filter(blur)
+def bbox_padding(
+    bbox: tuple[int, int, int, int], image_size: tuple[int, int], value: int = 32
+) -> tuple[int, int, int, int]:
+    if value <= 0:
+        return bbox
+    arr = np.array(bbox).reshape(2, 2)
+    arr[0] -= value
+    arr[1] += value
+    arr = np.clip(arr, (0, 0), image_size)
+    return tuple(arr.flatten())
+def composite(
+    init: Image.Image,
+    mask: Image.Image,
+    gen: Image.Image,
+    bbox_padded: tuple[int, int, int, int],
+) -> Image.Image:
+    img_masked = Image.new("RGBa", init.size)
+    img_masked.paste(
+        init.convert("RGBA").convert("RGBa"),
+        mask=ImageOps.invert(mask),
+    )
+    img_masked = img_masked.convert("RGBA")
+    size = (
+        bbox_padded[2] - bbox_padded[0],
+        bbox_padded[3] - bbox_padded[1],
+    )
+    resized = gen.resize(size)
+    output = Image.new("RGBA", init.size)
+    output.paste(resized, bbox_padded)
+    output.alpha_composite(img_masked)
+    return output.convert("RGB")

asdff/yolo.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from __future__ import annotations
+import numpy as np
+import torch
+from huggingface_hub import hf_hub_download
+from PIL import Image, ImageDraw
+from torchvision.transforms.functional import to_pil_image
+from ultralytics import YOLO
+def create_mask_from_bbox(
+    bboxes: np.ndarray, shape: tuple[int, int]
+) -> list[Image.Image]:
+    """
+    Parameters
+    ----------
+        bboxes: list[list[float]]
+            list of [x1, y1, x2, y2]
+            bounding boxes
+        shape: tuple[int, int]
+            shape of the image (width, height)
+    Returns
+    -------
+        masks: list[Image.Image]
+        A list of masks
+    """
+    masks = []
+    for bbox in bboxes:
+        mask = Image.new("L", shape, "black")
+        mask_draw = ImageDraw.Draw(mask)
+        mask_draw.rectangle(bbox, fill="white")
+        masks.append(mask)
+    return masks
+def mask_to_pil(masks: torch.Tensor, shape: tuple[int, int]) -> list[Image.Image]:
+    """
+    Parameters
+    ----------
+    masks: torch.Tensor, dtype=torch.float32, shape=(N, H, W).
+        The device can be CUDA, but `to_pil_image` takes care of that.
+    shape: tuple[int, int]
+        (width, height) of the original image
+    Returns
+    -------
+    images: list[Image.Image]
+    """
+    n = masks.shape[0]
+    return [to_pil_image(masks[i], mode="L").resize(shape) for i in range(n)]
+def yolo_detector(
+    image: Image.Image, model_path: str | None = None, confidence: float = 0.3
+) -> list[Image.Image] | None:
+    if not model_path:
+        model_path = hf_hub_download("Bingsu/adetailer", "face_yolov8n.pt")
+    model = YOLO(model_path)
+    pred = model(image, conf=confidence)
+    bboxes = pred[0].boxes.xyxy.cpu().numpy()
+    if bboxes.size == 0:
+        return None
+    if pred[0].masks is None:
+        masks = create_mask_from_bbox(bboxes, image.size)
+    else:
+        masks = mask_to_pil(pred[0].masks.data, image.size)
+    return masks

pipeline.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from asdff import AdPipeline # noqa: F401