ACE-Plus

Running on Zero

App Files Files Community

chaojiemao commited on Jan 7

Commit

01e514a

verified ·

1 Parent(s): aac0117

Upload 7 files

Browse files

Files changed (7) hide show

config/ace_plus_diffusers_infer.yaml +25 -0
examples/__init__.py +0 -0
examples/examples.py +81 -0
inference/__init__.py +0 -0
inference/ace_plus_diffusers.py +115 -0
inference/utils.py +105 -0
models/model_zoo.yaml +28 -0

config/ace_plus_diffusers_infer.yaml ADDED Viewed

	@@ -0,0 +1,25 @@

+NAME: ace_plus_diffuser_infer
+IS_DEFAULT: True
+USE_DYNAMIC_MODEL: False
+INFERENCE_TYPE: ACE_DIFFUSER_PLUS
+DEFAULT_PARAS:
+  PARAS:
+  #
+  INPUT:
+    INPUT_IMAGE:
+    INPUT_MASK:
+    TASK:
+    PROMPT: ""
+    OUTPUT_HEIGHT: 1024
+    OUTPUT_WIDTH: 1024
+    SAMPLER: flow_euler
+    SAMPLE_STEPS: 28
+    GUIDE_SCALE: 50
+    SEED: 42
+    MAX_SEQ_LENGTH: 4096
+  OUTPUT:
+    LATENT:
+    IMAGES:
+    SEED:
+MODEL:
+  PRETRAINED_MODEL: ${FLUX_FILL_PATH}

examples/__init__.py ADDED Viewed

File without changes

examples/examples.py ADDED Viewed

	@@ -0,0 +1,81 @@

+all_examples = [
+            {
+                "input_image": None,
+                "input_mask": None,
+                "input_reference_image": "assets/samples/portrait/human_1.jpg",
+                "save_path": "examples/outputs/portrait_human_1.jpg",
+                "instruction": "Maintain the facial features, A girl is wearing a neat police uniform and sporting a badge. She is smiling with a friendly and confident demeanor. The background is blurred, featuring a cartoon logo.",
+                "output_h": 1024,
+                "output_w": 1024,
+                "seed": 4194866942,
+                "repainting_scale": 1.0,
+                "task_type": "portrait",
+                "edit_type": "repainting"
+            },
+            {
+                "input_image": None,
+                "input_mask": None,
+                "input_reference_image": "assets/samples/subject/subject_1.jpg",
+                "save_path": "examples/outputs/subject_subject_1.jpg",
+                "instruction": "Display the logo in a minimalist style printed in white on a matte black ceramic coffee mug, alongside a steaming cup of coffee on a cozy cafe table.",
+                "output_h": 1024,
+                "output_w": 1024,
+                "seed": 2935362780,
+                "repainting_scale": 1.0,
+                "task_type": "subject",
+                "edit_type": "repainting"
+            },
+            {
+                "input_image": "assets/samples/local/local_1.webp",
+                "input_mask":  "assets/samples/local/local_1_m.webp",
+                "input_reference_image": None,
+                "save_path": "examples/outputs/local_local_1.jpg",
+                "instruction": "By referencing the mask, restore a partial image from the doodle {image} that aligns with the textual explanation: \"1 white old owl\".",
+                "output_h": -1,
+                "output_w": -1,
+                "seed": 1159797084,
+                "repainting_scale": 0.5,
+                "task_type": "local_editing",
+                "edit_type": "contour_repainting"
+            },
+            {
+                "input_image": "assets/samples/application/photo_editing/1_1_edit.png",
+                "input_mask": "assets/samples/application/photo_editing/1_1_m.png",
+                "input_reference_image": "assets/samples/application/photo_editing/1_ref.png",
+                "save_path": "examples/outputs/photo_editing_1.jpg",
+                "instruction": "The item is put on the ground.",
+                "output_h": -1,
+                "output_w": -1,
+                "seed": 2072028954,
+                "repainting_scale": 1.0,
+                "task_type": "subject",
+                "edit_type": "repainting"
+            },
+            {
+                "input_image": "assets/samples/application/logo_paste/1_1_edit.png",
+                "input_mask": "assets/samples/application/logo_paste/1_1_m.png",
+                "input_reference_image": "assets/samples/application/logo_paste/1_ref.png",
+                "save_path": "examples/outputs/logo_paste_1.jpg",
+                "instruction": "The logo is printed on the headphones.",
+                "output_h": -1,
+                "output_w": -1,
+                "seed": 934582264,
+                "repainting_scale": 1.0,
+                "task_type": "subject",
+                "edit_type": "repainting"
+            },
+            {
+                "input_image": "assets/samples/application/movie_poster/1_1_edit.png",
+                "input_mask": "assets/samples/application/movie_poster/1_1_m.png",
+                "input_reference_image": "assets/samples/application/movie_poster/1_ref.png",
+                "save_path": "examples/outputs/movie_poster_1.jpg",
+                "instruction": "The man is facing the camera and is smiling.",
+                "output_h": -1,
+                "output_w": -1,
+                "seed": 988183236,
+                "repainting_scale": 1.0,
+                "task_type": "portrait",
+                "edit_type": "repainting"
+            }
+        ]

inference/__init__.py ADDED Viewed

File without changes

inference/ace_plus_diffusers.py ADDED Viewed

	@@ -0,0 +1,115 @@

+# -*- coding: utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import random
+from collections import OrderedDict
+import torch, os
+from diffusers import FluxFillPipeline
+from scepter.modules.utils.config import Config
+from scepter.modules.utils.distribute import we
+from scepter.modules.utils.file_system import FS
+from scepter.modules.utils.logger import get_logger
+from transformers import T5TokenizerFast
+from .utils import ACEPlusImageProcessor
+class ACEPlusDiffuserInference():
+    def __init__(self, logger=None):
+        if logger is None:
+            logger = get_logger(name='ace_plus')
+        self.logger = logger
+        self.input = {}
+    def load_default(self, cfg):
+        if cfg is not None:
+            self.input_cfg = {k.lower(): v for k, v in cfg.INPUT.items()}
+            self.input = {k.lower(): dict(v).get('DEFAULT', None) if isinstance(v, (dict, OrderedDict, Config)) else v for k, v in cfg.INPUT.items()}
+            self.output = {k.lower(): v for k, v in cfg.OUTPUT.items()}
+    def init_from_cfg(self, cfg):
+        self.max_seq_len = cfg.get("MAX_SEQ_LEN", 4096)
+        self.image_processor = ACEPlusImageProcessor(max_seq_len=self.max_seq_len)
+        local_folder = FS.get_dir_to_local_dir(cfg.MODEL.PRETRAINED_MODEL)
+        self.pipe = FluxFillPipeline.from_pretrained(local_folder, torch_dtype=torch.bfloat16).to("cuda")
+        tokenizer_2 = T5TokenizerFast.from_pretrained(os.path.join(local_folder, "tokenizer_2"),
+                                                      additional_special_tokens=["{image}"])
+        self.pipe.tokenizer_2 = tokenizer_2
+        self.load_default(cfg.DEFAULT_PARAS)
+    def prepare_input(self,
+                      image,
+                      mask,
+                      batch_size=1,
+                      dtype = torch.bfloat16,
+                      num_images_per_prompt=1,
+                      height=512,
+                      width=512,
+                      generator=None):
+        num_channels_latents = self.pipe.vae.config.latent_channels
+        # import pdb;pdb.set_trace()
+        mask, masked_image_latents = self.pipe.prepare_mask_latents(
+            mask.unsqueeze(0),
+            image.unsqueeze(0).to(we.device_id, dtype = dtype),
+            batch_size,
+            num_channels_latents,
+            num_images_per_prompt,
+            height,
+            width,
+            dtype,
+            we.device_id,
+            generator,
+        )
+        # import pdb;pdb.set_trace()
+        masked_image_latents = torch.cat((masked_image_latents, mask), dim=-1)
+        return masked_image_latents
+    @torch.no_grad()
+    def __call__(self,
+                 reference_image=None,
+                 edit_image=None,
+                 edit_mask=None,
+                 prompt='',
+                 task=None,
+                 output_height=1024,
+                 output_width=1024,
+                 sampler='flow_euler',
+                 sample_steps=28,
+                 guide_scale=50,
+                 lora_path=None,
+                 seed=-1,
+                 tar_index=0,
+                 align=0,
+                 repainting_scale=0,
+                 **kwargs):
+        if isinstance(prompt, str):
+            prompt = [prompt]
+        seed = seed if seed >= 0 else random.randint(0, 2 ** 32 - 1)
+        image, mask, out_h, out_w, slice_w = self.image_processor.preprocess(reference_image, edit_image, edit_mask, repainting_scale = repainting_scale)
+        h, w = image.shape[1:]
+        generator = torch.Generator("cpu").manual_seed(seed)
+        masked_image_latents = self.prepare_input(image, mask,
+                                               batch_size=len(prompt) , height=h, width=w, generator = generator)
+        if lora_path is not None:
+            with FS.get_from(lora_path) as local_path:
+                self.pipe.load_lora_weights(local_path)
+        image = self.pipe(
+            prompt=prompt,
+            masked_image_latents=masked_image_latents,
+            height=h,
+            width=w,
+            guidance_scale=guide_scale,
+            num_inference_steps=sample_steps,
+            max_sequence_length=512,
+            generator=generator
+        ).images[0]
+        return self.image_processor.postprocess(image, slice_w, out_w, out_h), seed
+if __name__ == '__main__':
+    pass

inference/utils.py ADDED Viewed

	@@ -0,0 +1,105 @@

+# -*- coding: utf-8 -*-
+# Copyright (c) Alibaba, Inc. and its affiliates.
+import math
+import torch
+import torchvision.transforms as T
+import numpy as np
+from scepter.modules.annotator.registry import ANNOTATORS
+from scepter.modules.utils.config import Config
+from PIL import Image
+def edit_preprocess(processor, device, edit_image, edit_mask):
+    if edit_image is None or processor is None:
+        return edit_image
+    processor = Config(cfg_dict=processor, load=False)
+    processor = ANNOTATORS.build(processor).to(device)
+    new_edit_image = processor(np.asarray(edit_image))
+    processor = processor.to("cpu")
+    del processor
+    new_edit_image = Image.fromarray(new_edit_image)
+    return Image.composite(new_edit_image, edit_image, edit_mask)
+class ACEPlusImageProcessor():
+    def __init__(self, max_aspect_ratio=4, d=16, max_seq_len=1024):
+        self.max_aspect_ratio = max_aspect_ratio
+        self.d = d
+        self.max_seq_len = max_seq_len
+        self.transforms = T.Compose([
+            T.ToTensor(),
+            T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
+        ])
+    def image_check(self, image):
+        if image is None:
+            return image
+        # preprocess
+        W, H = image.size
+        if H / W > self.max_aspect_ratio:
+            image = T.CenterCrop([int(self.max_aspect_ratio * W), W])(image)
+        elif W / H > self.max_aspect_ratio:
+            image = T.CenterCrop([H, int(self.max_aspect_ratio * H)])(image)
+        return self.transforms(image)
+    def preprocess(self,
+                   reference_image=None,
+                   edit_image=None,
+                   edit_mask=None,
+                   height=1024,
+                   width=1024,
+                   repainting_scale = 1.0):
+        reference_image = self.image_check(reference_image)
+        edit_image = self.image_check(edit_image)
+        # for reference generation
+        if edit_image is None:
+            edit_image = torch.zeros([3, height, width])
+            edit_mask = torch.ones([1, height, width])
+        else:
+            edit_mask = np.asarray(edit_mask)
+            edit_mask = np.where(edit_mask > 128, 1, 0)
+            edit_mask = edit_mask.astype(
+                np.float32) if np.any(edit_mask) else np.ones_like(edit_mask).astype(
+                np.float32)
+            edit_mask = torch.tensor(edit_mask).unsqueeze(0)
+        edit_image = edit_image * (1 - edit_mask * repainting_scale)
+        out_h, out_w = edit_image.shape[-2:]
+        assert edit_mask is not None
+        if reference_image is not None:
+        # align height with edit_image
+            _, H, W = reference_image.shape
+            _, eH, eW = edit_image.shape
+            scale = eH / H
+            tH, tW = eH, int(W * scale)
+            reference_image = T.Resize((tH, tW), interpolation=T.InterpolationMode.BILINEAR, antialias=True)(reference_image)
+            edit_image = torch.cat([reference_image, edit_image], dim=-1)
+            edit_mask = torch.cat([torch.zeros([1, reference_image.shape[1], reference_image.shape[2]]), edit_mask], dim=-1)
+            slice_w = reference_image.shape[-1]
+        else:
+            slice_w = 0
+        H, W = edit_image.shape[-2:]
+        scale = min(1.0, math.sqrt(self.max_seq_len * 2 / ((H / self.d) * (W / self.d))))
+        rH = int(H * scale) // self.d * self.d  # ensure divisible by self.d
+        rW = int(W * scale) // self.d * self.d
+        slice_w = int(slice_w * scale) // self.d * self.d
+        edit_image = T.Resize((rH, rW), interpolation=T.InterpolationMode.BILINEAR, antialias=True)(edit_image)
+        edit_mask = T.Resize((rH, rW), interpolation=T.InterpolationMode.NEAREST_EXACT, antialias=True)(edit_mask)
+        return edit_image, edit_mask, out_h, out_w, slice_w
+    def postprocess(self, image, slice_w, out_w, out_h):
+        w, h = image.size
+        if slice_w > 0:
+            output_image = image.crop((slice_w + 20, 0, w, h))
+            output_image = output_image.resize((out_w, out_h))
+        else:
+            output_image = image
+        return output_image

models/model_zoo.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+MODEL:
+  PORTRAIT:
+    MODEL_PATH: ${PORTRAIT_MODEL_PATH}
+  SUBJECT:
+    MODEL_PATH: ${SUBJECT_MODEL_PATH}
+  LOCAL_EDITING:
+    MODEL_PATH: ${LOCAL_MODEL_PATH}
+    REPAINTING_SCALE: 0.5
+    PREPROCESSOR:
+      - NAME: CannyAnnotator
+        TYPE: canny_repainting
+        LOW_THRESHOLD: 100
+        HIGH_THRESHOLD: 200
+      - NAME: ColorAnnotator
+        TYPE: mosaic_repainting
+        RATIO: 64
+      - NAME: InfoDrawContourAnnotator
+        TYPE: contour_repainting
+        INPUT_NC: 3
+        OUTPUT_NC: 1
+        N_RESIDUAL_BLOCKS: 3
+        SIGMOID: True
+        PRETRAINED_MODEL: "ms://iic/scepter_annotator@annotator/ckpts/informative_drawing_contour_style.pth"
+      - NAME: MidasDetector
+        PRETRAINED_MODEL: "ms://iic/scepter_annotator@annotator/ckpts/dpt_hybrid-midas-501f0c75.pt"
+        TYPE: depth_repainting
+      - NAME: GrayAnnotator
+        TYPE: recolorizing