controlnet-tile-sdxl-1.0 / README.md

xinsir

Update README.md

1ae8d95 verified 18 days ago

preview code

raw

history blame contribute delete

No virus

10.4 kB

	---
	license: apache-2.0
	pipeline_tag: text-to-image
	---

	# *ControlNet Tile SDXL*
	![images](./masonry.webp)

	# Image Deblur Example(Repaint Detail)
	![images_0)](./000118_tile_blur_concat.webp)

	![images_1)](./000126_tile_blur_concat.webp)

	![images_2)](./000129_tile_blur_concat.webp)

	![images_3)](./000132_tile_blur_concat.webp)

	![images_4)](./000139_tile_blur_concat.webp)

	# Image Variation Example(like midjourney)

	![images_5)](./000003_tile_var_concat.webp)

	![images_6)](./000008_tile_var_concat.webp)

	![images_7)](./000018_tile_var_concat.webp)

	![images_8)](./000030_tile_var_concat.webp)

	![images_9)](./000039_tile_var_concat.webp)

	# Image Super-resolution(like realESRGAN)

	support any aspect ratio and any times upscale, followings are 3 * 3 times

	![images_5)](./000003.webp)

	![images_6)](./000003_scribble.webp)

	![images_7)](./000053.webp)

	![images_8)](./000053_scribble.webp)

	# Code to Use Tile blur

	code reference: https://huggingface.co/TTPlanet/TTPLanet_SDXL_Controlnet_Tile_Realistic/blob/main/TTP_tile_preprocessor_v5.py
	https://github.com/lllyasviel/ControlNet-v1-1-nightly/blob/main/gradio_tile.py

	```python
	from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
	from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
	from PIL import Image
	from guided_filter import FastGuidedFilter # I have upload this file in this repo
	import torch
	import numpy as np
	import cv2

	def resize_image_control(control_image, resolution):
	HH, WW, _ = control_image.shape
	crop_h = random.randint(0, HH - resolution[1])
	crop_w = random.randint(0, WW - resolution[0])
	crop_image = control_image[crop_h:crop_h+resolution[1], crop_w:crop_w+resolution[0], :]
	return crop_image, crop_w, crop_h

	def apply_gaussian_blur(image_np, ksize=5, sigmaX=1.0):
	if ksize % 2 == 0:
	ksize += 1 # ksize must be odd
	blurred_image = cv2.GaussianBlur(image_np, (ksize, ksize), sigmaX=sigmaX)
	return blurred_image

	def apply_guided_filter(image_np, radius, eps, scale):
	filter = FastGuidedFilter(image_np, radius, eps, scale)
	return filter.filter(image_np)


	controlnet_conditioning_scale = 1.0
	prompt = "your prompt, the longer the better, you can describe it as detail as possible"
	negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'

	eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")


	controlnet = ControlNetModel.from_pretrained(
	"xinsir/controlnet-tile-sdxl-1.0",
	torch_dtype=torch.float16
	)

	# when test with other base model, you need to change the vae also.
	vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

	pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0",
	controlnet=controlnet,
	vae=vae,
	safety_checker=None,
	torch_dtype=torch.float16,
	scheduler=eulera_scheduler,
	)

	controlnet_img = cv2.imread("your original image path")
	height, width, _ = controlnet_img.shape
	ratio = np.sqrt(1024. * 1024. / (width * height))
	W, H = int(width * ratio), int(height * ratio)

	crop_w, crop_h = 0, 0
	controlnet_img = cv2.resize(controlnet_img, (W, H))


	blur_strength = random.sample([i / 10. for i in range(10, 201, 2)], k=1)[0]
	radius = random.sample([i for i in range(1, 40, 2)], k=1)[0]
	eps = random.sample([i / 1000. for i in range(1, 101, 2)], k=1)[0]
	scale_factor = random.sample([i / 10. for i in range(10, 181, 5)], k=1)[0]


	if random.random() > 0.5:
	controlnet_img = apply_gaussian_blur(controlnet_img, ksize=int(blur_strength), sigmaX=blur_strength / 2)

	if random.random() > 0.5:
	# Apply Guided Filter
	controlnet_img = apply_guided_filter(controlnet_img, radius, eps, scale_factor)

	# Resize image
	controlnet_img = cv2.resize(controlnet_img, (int(W / scale_factor), int(H / scale_factor)), interpolation=cv2.INTER_AREA)
	controlnet_img = cv2.resize(controlnet_img, (W, H), interpolation=cv2.INTER_CUBIC)

	controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
	controlnet_img = Image.fromarray(controlnet_img)

	# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance

	images = pipe(
	prompt,
	negative_prompt=negative_prompt,
	image=controlnet_img,
	controlnet_conditioning_scale=controlnet_conditioning_scale,
	width=new_width,
	height=new_height,
	num_inference_steps=30,
	).images

	images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")

	```
	# Code to Use Tile var

	Use more detail prompt to regerate can help!

	```python
	from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
	from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
	from PIL import Image
	import torch
	import numpy as np
	import cv2

	controlnet_conditioning_scale = 1.0
	prompt = "your prompt, the longer the better, you can describe it as detail as possible"
	negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'

	eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")


	controlnet = ControlNetModel.from_pretrained(
	"xinsir/controlnet-tile-sdxl-1.0",
	torch_dtype=torch.float16
	)

	# when test with other base model, you need to change the vae also.
	vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

	pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0",
	controlnet=controlnet,
	vae=vae,
	safety_checker=None,
	torch_dtype=torch.float16,
	scheduler=eulera_scheduler,
	)

	controlnet_img = cv2.imread("your original image path")
	height, width, _ = controlnet_img.shape
	ratio = np.sqrt(1024. * 1024. / (width * height))
	W, H = int(width * ratio), int(height * ratio)

	crop_w, crop_h = 0, 0
	controlnet_img = cv2.resize(controlnet_img, (W, H))
	controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
	controlnet_img = Image.fromarray(controlnet_img)

	# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance
	images = pipe(
	prompt,
	negative_prompt=negative_prompt,
	image=controlnet_img,
	controlnet_conditioning_scale=controlnet_conditioning_scale,
	width=new_width,
	height=new_height,
	num_inference_steps=30,
	).images

	images[0].save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")

	```


	# Code to Use Tile super

	performance may unstable and next version is optimizing!

	```python
	from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
	from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
	from PIL import Image
	import torch
	import numpy as np
	import cv2

	controlnet_conditioning_scale = 1.0
	prompt = "your prompt, the longer the better, you can describe it as detail as possible"
	negative_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality'

	eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")


	controlnet = ControlNetModel.from_pretrained(
	"xinsir/controlnet-tile-sdxl-1.0",
	torch_dtype=torch.float16
	)

	# when test with other base model, you need to change the vae also.
	vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)

	pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0",
	controlnet=controlnet,
	vae=vae,
	safety_checker=None,
	torch_dtype=torch.float16,
	scheduler=eulera_scheduler,
	)

	controlnet_img = cv2.imread("your original image path")
	height, width, _ = controlnet_img.shape
	ratio = np.sqrt(1024. * 1024. / (width * height))
	W, H = int(width * ratio) // 48 * 48, int(height * ratio) // 48 * 48
	controlnet_img = cv2.resize(controlnet_img, (W, H))
	controlnet_img = cv2.cvtColor(controlnet_img, cv2.COLOR_BGR2RGB)
	controlnet_img = Image.fromarray(controlnet_img)

	# need to resize the image resolution to 1024 * 1024 or same bucket resolution to get the best performance
	target_width = W // 3
	target_height = H // 3

	for i in range(3): # 两行
	for j in range(3): # 两列
	left = j * target_width
	top = i * target_height
	right = left + target_width
	bottom = top + target_height

	# 根据计算的边界裁剪图像
	cropped_image = controlnet_img.crop((left, top, right, bottom))
	cropped_image = cropped_image.resize((W, H))

	images.append(cropped_image)

	seed = random.randint(0, 2147483647)
	generator = torch.Generator('cuda').manual_seed(seed)

	result_images = []
	for sub_img in images:
	new_width, new_height = W, H
	out = pipe(prompt=[prompt]*1,
	image=sub_img,
	control_image=sub_img,
	negative_prompt=[negative_prompt]*1,
	generator=generator,
	width=new_width,
	height=new_height,
	num_inference_steps=30,
	crops_coords_top_left=(W, H),
	target_size=(W, H),
	original_size=(W * 2, H * 2),
	)
	result_images.append(out.images[0])

	new_im = Image.new('RGB', (new_width3, new_height3))
	# 拼接图片到新的图像上
	new_im.paste(result_images[0], (0, 0))
	new_im.paste(result_images[1], (new_width, 0))
	new_im.paste(result_images[2], (new_width * 2, 0))
	new_im.paste(result_images[3], (0, new_height))
	new_im.paste(result_images[4], (new_width, new_height))
	new_im.paste(result_images[5], (new_width * 2, new_height))
	new_im.paste(result_images[6], (0, new_height * 2))
	new_im.paste(result_images[7], (new_width, new_height * 2))
	new_im.paste(result_images[8], (new_width * 2, new_height * 2))

	new_im.save(f"your image save path, png format is usually better than jpg or webp in terms of image quality but got much bigger")

	```