Clint Adams
Try to run on ZERO
072d8d2
raw
history blame
9.51 kB
import cv2
import numpy as np
import torch
import gradio as gr
import random
import spaces
from diffusers import DPMSolverMultistepScheduler, StableDiffusionXLPipeline
DESCRIPTION='''
This uses code lifted almost verbatim from
[Outpainting II - Differential Diffusion](https://huggingface.co/blog/OzzyGT/outpainting-differential-diffusion).
'''
xlp_kwargs = {
'custom_pipeline': 'pipeline_stable_diffusion_xl_differential_img2img'
}
if torch.cuda.is_available():
device = 'cuda'
device_dtype = torch.float16
xlp_kwargs['variant'] = 'fp16'
else:
device = 'cpu'
device_dtype = torch.float32
DESCRIPTION+='''
This Space appears to be running on a CPU; it will take hours to get results. You may [duplicate this space](https://huggingface.co/spaces/clinteroni/outpainting-demo?duplicate=true) and pay for an upgraded runtime instead.
'''
xlp_kwargs['torch_dtype'] = device_dtype
def merge_images(original, new_image, offset, direction):
if direction in ["left", "right"]:
merged_image = np.zeros(
(original.shape[0], original.shape[1] + offset, 3), dtype=np.uint8)
elif direction in ["top", "bottom"]:
merged_image = np.zeros(
(original.shape[0] + offset, original.shape[1], 3), dtype=np.uint8)
if direction == "left":
merged_image[:, offset:] = original
merged_image[:, : new_image.shape[1]] = new_image
elif direction == "right":
merged_image[:, : original.shape[1]] = original
merged_image[:, original.shape[1] + offset -
new_image.shape[1]: original.shape[1] + offset] = new_image
elif direction == "top":
merged_image[offset:, :] = original
merged_image[: new_image.shape[0], :] = new_image
elif direction == "bottom":
merged_image[: original.shape[0], :] = original
merged_image[original.shape[0] + offset - new_image.shape[0]: original.shape[0] + offset, :] = new_image
return merged_image
def slice_image(image):
height, width, _ = image.shape
slice_size = min(width // 2, height // 3)
slices = []
for h in range(3):
for w in range(2):
left = w * slice_size
upper = h * slice_size
right = left + slice_size
lower = upper + slice_size
if w == 1 and right > width:
left -= right - width
right = width
if h == 2 and lower > height:
upper -= lower - height
lower = height
slice = image[upper:lower, left:right]
slices.append(slice)
return slices
def process_image(
image,
fill_color=(0, 0, 0),
mask_offset=50,
blur_radius=500,
expand_pixels=256,
direction="left",
inpaint_mask_color=50,
max_size=1024,
):
height, width = image.shape[:2]
new_height = height + \
(expand_pixels if direction in ["top", "bottom"] else 0)
new_width = width + \
(expand_pixels if direction in ["left", "right"] else 0)
if new_height > max_size:
# If so, crop the image from the opposite side
if direction == "top":
image = image[:max_size, :]
elif direction == "bottom":
image = image[new_height - max_size:, :]
new_height = max_size
if new_width > max_size:
# If so, crop the image from the opposite side
if direction == "left":
image = image[:, :max_size]
elif direction == "right":
image = image[:, new_width - max_size:]
new_width = max_size
height, width = image.shape[:2]
new_image = np.full((new_height, new_width, 3), fill_color, dtype=np.uint8)
mask = np.full_like(new_image, 255, dtype=np.uint8)
inpaint_mask = np.full_like(new_image, 0, dtype=np.uint8)
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
inpaint_mask = cv2.cvtColor(inpaint_mask, cv2.COLOR_BGR2GRAY)
if direction == "left":
new_image[:, expand_pixels:] = image[:, : max_size - expand_pixels]
mask[:, : expand_pixels + mask_offset] = inpaint_mask_color
inpaint_mask[:, :expand_pixels] = 255
elif direction == "right":
new_image[:, :width] = image
mask[:, width - mask_offset:] = inpaint_mask_color
inpaint_mask[:, width:] = 255
elif direction == "top":
new_image[expand_pixels:, :] = image[: max_size - expand_pixels, :]
mask[: expand_pixels + mask_offset, :] = inpaint_mask_color
inpaint_mask[:expand_pixels, :] = 255
elif direction == "bottom":
new_image[:height, :] = image
mask[height - mask_offset:, :] = inpaint_mask_color
inpaint_mask[height:, :] = 255
# mask blur
if blur_radius % 2 == 0:
blur_radius += 1
mask = cv2.GaussianBlur(mask, (blur_radius, blur_radius), 0)
# telea inpaint
_, mask_np = cv2.threshold(
inpaint_mask, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
inpaint = cv2.inpaint(new_image, mask_np, 3, cv2.INPAINT_TELEA)
# convert image to tensor
inpaint = cv2.cvtColor(inpaint, cv2.COLOR_BGR2RGB)
inpaint = torch.from_numpy(inpaint).permute(2, 0, 1).float()
inpaint = inpaint / 127.5 - 1
inpaint = inpaint.unsqueeze(0).to(device)
# convert mask to tensor
mask = torch.from_numpy(mask)
mask = mask.unsqueeze(0).float() / 255.0
mask = mask.to(device)
return inpaint, mask
def image_resize(image, new_size=1024):
height, width = image.shape[:2]
aspect_ratio = width / height
new_width = new_size
new_height = new_size
if aspect_ratio != 1:
if width > height:
new_height = int(new_size / aspect_ratio)
else:
new_width = int(new_size * aspect_ratio)
image = cv2.resize(image, (new_width, new_height),
interpolation=cv2.INTER_LANCZOS4)
return image
@spaces.GPU
def outpaint(pil_image, direction='right', times_to_expand=4):
if torch.cuda.is_available():
torch.cuda.empty_cache()
pipeline = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
**xlp_kwargs
).to(device)
pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
pipeline.scheduler.config, use_karras_sigmas=True)
pipeline.load_ip_adapter(
"h94/IP-Adapter",
subfolder="sdxl_models",
weight_name=[
"ip-adapter-plus_sdxl_vit-h.safetensors",
],
image_encoder_folder="models/image_encoder",
)
pipeline.set_ip_adapter_scale(0.1)
def generate_image(prompt, negative_prompt, image, mask, ip_adapter_image, seed: int = None):
if seed is None:
seed = random.randint(0, 2**32 - 1)
generator = torch.Generator(device="cpu").manual_seed(seed)
image = pipeline(
prompt=prompt,
negative_prompt=negative_prompt,
width=1024,
height=1024,
guidance_scale=4.0,
num_inference_steps=25,
original_image=image,
image=image,
strength=1.0,
map=mask,
generator=generator,
ip_adapter_image=[ip_adapter_image],
output_type="np",
).images[0]
image = (image * 255).astype(np.uint8)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
return image
prompt = ""
negative_prompt = ""
inpaint_mask_color = 50 # lighter use more of the Telea inpainting
# I recommend to don't go more than half of the picture so it has context
expand_pixels = 256
original = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
image = image_resize(original)
# image.shape[1] for horizontal, image.shape[0] for vertical
expand_pixels_to_square = 1024 - image.shape[1]
image, mask = process_image(
image, expand_pixels=expand_pixels_to_square, direction=direction, inpaint_mask_color=inpaint_mask_color
)
ip_adapter_image = []
for index, part in enumerate(slice_image(original)):
ip_adapter_image.append(part)
generated = generate_image(
prompt, negative_prompt, image, mask, ip_adapter_image)
final_image = generated
for i in range(times_to_expand):
image, mask = process_image(
final_image, direction=direction, expand_pixels=expand_pixels, inpaint_mask_color=inpaint_mask_color
)
ip_adapter_image = []
for index, part in enumerate(slice_image(generated)):
ip_adapter_image.append(part)
generated = generate_image(
prompt, negative_prompt, image, mask, ip_adapter_image)
final_image = merge_images(final_image, generated, 256, direction)
color_converted = cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB)
return color_converted
gradio_app = gr.Interface(
outpaint,
inputs=[
gr.Image(label="Select start image", sources=[
'upload', 'webcam'], type='pil'),
gr.Radio(["left", "right", "top", 'bottom'], label="Direction",
info="Outward from which edge to paint?", value='right'),
gr.Slider(2, 4, step=1, value=4, label="Times to expand",
info="Choose between 2 and 4"),
],
outputs=[gr.Image(label="Processed Image")],
title="Outpainting with differential diffusion demo",
description=DESCRIPTION
)
if __name__ == "__main__":
gradio_app.queue(max_size=20).launch()