PowerPaint_v2 / main.py

update

1c7bbea 12 months ago

6.23 kB

	import sys
	import cv2
	import numpy as np
	import torch
	from PIL import Image, ImageOps
	from transformers import CLIPTextModel, CLIPTokenizer
	from diffusers.utils import load_image
	from diffusers import DPMSolverMultistepScheduler

	from powerpaint_v2.BrushNet_CA import BrushNetModel
	from powerpaint_v2.pipeline_PowerPaint_Brushnet_CA import (
	StableDiffusionPowerPaintBrushNetPipeline,
	)
	from powerpaint_v2.power_paint_tokenizer import PowerPaintTokenizer
	from powerpaint_v2.unet_2d_condition import UNet2DConditionModel


	def task_to_prompt(control_type):
	if control_type == "object-removal":
	promptA = "P_ctxt"
	promptB = "P_ctxt"
	negative_promptA = "P_obj"
	negative_promptB = "P_obj"
	elif control_type == "context-aware":
	promptA = "P_ctxt"
	promptB = "P_ctxt"
	negative_promptA = ""
	negative_promptB = ""
	elif control_type == "shape-guided":
	promptA = "P_shape"
	promptB = "P_ctxt"
	negative_promptA = "P_shape"
	negative_promptB = "P_ctxt"
	elif control_type == "image-outpainting":
	promptA = "P_ctxt"
	promptB = "P_ctxt"
	negative_promptA = "P_obj"
	negative_promptB = "P_obj"
	else:
	promptA = "P_obj"
	promptB = "P_obj"
	negative_promptA = "P_obj"
	negative_promptB = "P_obj"

	return promptA, promptB, negative_promptA, negative_promptB


	@torch.inference_mode()
	def predict(
	pipe,
	input_image,
	prompt,
	fitting_degree,
	ddim_steps,
	scale,
	negative_prompt,
	task,
	):
	promptA, promptB, negative_promptA, negative_promptB = task_to_prompt(task)
	print(task, promptA, promptB, negative_promptA, negative_promptB)
	img = np.array(input_image["image"].convert("RGB"))

	W = int(np.shape(img)[0] - np.shape(img)[0] % 8)
	H = int(np.shape(img)[1] - np.shape(img)[1] % 8)
	input_image["image"] = input_image["image"].resize((H, W))
	input_image["mask"] = input_image["mask"].resize((H, W))

	np_inpimg = np.array(input_image["image"])
	np_inmask = np.array(input_image["mask"]) / 255.0

	np_inpimg = np_inpimg * (1 - np_inmask)

	input_image["image"] = Image.fromarray(np_inpimg.astype(np.uint8)).convert("RGB")

	result = pipe(
	promptA=promptA,
	promptB=promptB,
	promptU=prompt,
	tradoff=fitting_degree,
	tradoff_nag=fitting_degree,
	image=input_image["image"].convert("RGB"),
	mask=input_image["mask"].convert("RGB"),
	num_inference_steps=ddim_steps,
	brushnet_conditioning_scale=1.0,
	negative_promptA=negative_promptA,
	negative_promptB=negative_promptB,
	negative_promptU=negative_prompt,
	guidance_scale=scale,
	width=H,
	height=W,
	).images[0]
	return result


	# base_model_name = "runwayml/stable-diffusion-v1-5"
	base_model_name = sys.argv[1]
	text_encoder_brushnet = CLIPTextModel.from_pretrained(
	"text_encoder_brushnet",
	variant="fp16",
	torch_dtype=torch.float16,
	)
	unet = UNet2DConditionModel.from_pretrained(
	base_model_name,
	subfolder="unet",
	variant="fp16",
	torch_dtype=torch.float16,
	)
	brushnet = BrushNetModel.from_pretrained(
	"./PowerPaint_Brushnet",
	variant="fp16",
	torch_dtype=torch.float16,
	)
	pipe = StableDiffusionPowerPaintBrushNetPipeline.from_pretrained(
	base_model_name,
	torch_dtype=torch.float16,
	safety_checker=None,
	unet=unet,
	brushnet=brushnet,
	text_encoder_brushnet=text_encoder_brushnet,
	variant="fp16",
	)
	pipe.tokenizer = PowerPaintTokenizer(CLIPTokenizer.from_pretrained("./tokenizer"))
	pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
	pipe = pipe.to("mps")


	img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
	mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
	image = load_image(img_url).convert("RGB").resize((512, 512))
	mask = load_image(mask_url).convert("RGB").resize((512, 512))


	input_image = {"image": image, "mask": mask}
	prompt = "Face of a fox sitting on a bench"
	negative_prompt = "out of frame, lowres, error, cropped, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, out of frame, mutation, deformed, blurry, dehydrated, bad anatomy, bad proportions, extra limbs, disfigured, gross proportions, malformed limbs, watermark, signature"
	fitting_degree = 1
	steps = 30
	tasks = [
	{
	"task": "object-removal",
	"guidance_scale": 12,
	"prompt": "empty scene blur",
	"negative_prompt": "",
	},
	{
	"task": "shape-guided",
	"guidance_scale": 7.5,
	"prompt": prompt,
	"negative_prompt": negative_prompt,
	},
	{
	"task": "context-aware",
	"guidance_scale": 7.5,
	"prompt": "empty secne",
	"negative_prompt": negative_prompt,
	},
	{
	"task": "inpaint",
	"guidance_scale": 7.5,
	"prompt": prompt,
	"negative_prompt": negative_prompt,
	},
	{
	"task": "image-outpainting",
	"guidance_scale": 7.5,
	"prompt": "",
	"negative_prompt": negative_prompt,
	},
	]

	for task in tasks:
	if task["task"] == "image-outpainting":
	margin = 128
	input_image["image"] = ImageOps.expand(
	input_image["image"],
	border=(margin, margin, margin, margin),
	fill=(127, 127, 127),
	)
	outpaint_mask = np.zeros_like(np.asarray(input_image["mask"]))
	input_image["mask"] = Image.fromarray(
	cv2.copyMakeBorder(
	outpaint_mask,
	margin,
	margin,
	margin,
	margin,
	cv2.BORDER_CONSTANT,
	value=(255, 255, 255),
	)
	)

	result_image = predict(
	pipe,
	input_image,
	task["prompt"],
	fitting_degree,
	steps,
	task["guidance_scale"],
	task["negative_prompt"],
	task["task"],
	)

	result_image.save(f"{task['task']}_result.png")