PowerPaint_v2 / main.py
Sanster's picture
update
1c7bbea
import sys
import cv2
import numpy as np
import torch
from PIL import Image, ImageOps
from transformers import CLIPTextModel, CLIPTokenizer
from diffusers.utils import load_image
from diffusers import DPMSolverMultistepScheduler
from powerpaint_v2.BrushNet_CA import BrushNetModel
from powerpaint_v2.pipeline_PowerPaint_Brushnet_CA import (
StableDiffusionPowerPaintBrushNetPipeline,
)
from powerpaint_v2.power_paint_tokenizer import PowerPaintTokenizer
from powerpaint_v2.unet_2d_condition import UNet2DConditionModel
def task_to_prompt(control_type):
if control_type == "object-removal":
promptA = "P_ctxt"
promptB = "P_ctxt"
negative_promptA = "P_obj"
negative_promptB = "P_obj"
elif control_type == "context-aware":
promptA = "P_ctxt"
promptB = "P_ctxt"
negative_promptA = ""
negative_promptB = ""
elif control_type == "shape-guided":
promptA = "P_shape"
promptB = "P_ctxt"
negative_promptA = "P_shape"
negative_promptB = "P_ctxt"
elif control_type == "image-outpainting":
promptA = "P_ctxt"
promptB = "P_ctxt"
negative_promptA = "P_obj"
negative_promptB = "P_obj"
else:
promptA = "P_obj"
promptB = "P_obj"
negative_promptA = "P_obj"
negative_promptB = "P_obj"
return promptA, promptB, negative_promptA, negative_promptB
@torch.inference_mode()
def predict(
pipe,
input_image,
prompt,
fitting_degree,
ddim_steps,
scale,
negative_prompt,
task,
):
promptA, promptB, negative_promptA, negative_promptB = task_to_prompt(task)
print(task, promptA, promptB, negative_promptA, negative_promptB)
img = np.array(input_image["image"].convert("RGB"))
W = int(np.shape(img)[0] - np.shape(img)[0] % 8)
H = int(np.shape(img)[1] - np.shape(img)[1] % 8)
input_image["image"] = input_image["image"].resize((H, W))
input_image["mask"] = input_image["mask"].resize((H, W))
np_inpimg = np.array(input_image["image"])
np_inmask = np.array(input_image["mask"]) / 255.0
np_inpimg = np_inpimg * (1 - np_inmask)
input_image["image"] = Image.fromarray(np_inpimg.astype(np.uint8)).convert("RGB")
result = pipe(
promptA=promptA,
promptB=promptB,
promptU=prompt,
tradoff=fitting_degree,
tradoff_nag=fitting_degree,
image=input_image["image"].convert("RGB"),
mask=input_image["mask"].convert("RGB"),
num_inference_steps=ddim_steps,
brushnet_conditioning_scale=1.0,
negative_promptA=negative_promptA,
negative_promptB=negative_promptB,
negative_promptU=negative_prompt,
guidance_scale=scale,
width=H,
height=W,
).images[0]
return result
# base_model_name = "runwayml/stable-diffusion-v1-5"
base_model_name = sys.argv[1]
text_encoder_brushnet = CLIPTextModel.from_pretrained(
"text_encoder_brushnet",
variant="fp16",
torch_dtype=torch.float16,
)
unet = UNet2DConditionModel.from_pretrained(
base_model_name,
subfolder="unet",
variant="fp16",
torch_dtype=torch.float16,
)
brushnet = BrushNetModel.from_pretrained(
"./PowerPaint_Brushnet",
variant="fp16",
torch_dtype=torch.float16,
)
pipe = StableDiffusionPowerPaintBrushNetPipeline.from_pretrained(
base_model_name,
torch_dtype=torch.float16,
safety_checker=None,
unet=unet,
brushnet=brushnet,
text_encoder_brushnet=text_encoder_brushnet,
variant="fp16",
)
pipe.tokenizer = PowerPaintTokenizer(CLIPTokenizer.from_pretrained("./tokenizer"))
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to("mps")
img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
image = load_image(img_url).convert("RGB").resize((512, 512))
mask = load_image(mask_url).convert("RGB").resize((512, 512))
input_image = {"image": image, "mask": mask}
prompt = "Face of a fox sitting on a bench"
negative_prompt = "out of frame, lowres, error, cropped, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, out of frame, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, disfigured, gross proportions, malformed limbs, watermark, signature"
fitting_degree = 1
steps = 30
tasks = [
{
"task": "object-removal",
"guidance_scale": 12,
"prompt": "empty scene blur",
"negative_prompt": "",
},
{
"task": "shape-guided",
"guidance_scale": 7.5,
"prompt": prompt,
"negative_prompt": negative_prompt,
},
{
"task": "context-aware",
"guidance_scale": 7.5,
"prompt": "empty secne",
"negative_prompt": negative_prompt,
},
{
"task": "inpaint",
"guidance_scale": 7.5,
"prompt": prompt,
"negative_prompt": negative_prompt,
},
{
"task": "image-outpainting",
"guidance_scale": 7.5,
"prompt": "",
"negative_prompt": negative_prompt,
},
]
for task in tasks:
if task["task"] == "image-outpainting":
margin = 128
input_image["image"] = ImageOps.expand(
input_image["image"],
border=(margin, margin, margin, margin),
fill=(127, 127, 127),
)
outpaint_mask = np.zeros_like(np.asarray(input_image["mask"]))
input_image["mask"] = Image.fromarray(
cv2.copyMakeBorder(
outpaint_mask,
margin,
margin,
margin,
margin,
cv2.BORDER_CONSTANT,
value=(255, 255, 255),
)
)
result_image = predict(
pipe,
input_image,
task["prompt"],
fitting_degree,
steps,
task["guidance_scale"],
task["negative_prompt"],
task["task"],
)
result_image.save(f"{task['task']}_result.png")