import torch import numpy as np from PIL import Image from diffusers import ControlNetModel, StableDiffusionXLControlNetImg2ImgPipeline, DDIMScheduler from hidiffusion import apply_hidiffusion, remove_hidiffusion import cv2 controlnet = ControlNetModel.from_pretrained( "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16, variant="fp16" ).to("cuda") scheduler = DDIMScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler") pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, scheduler = scheduler, torch_dtype=torch.float16, ).to("cuda") # Apply hidiffusion with a single line of code. apply_hidiffusion(pipe) pipe.enable_model_cpu_offload() pipe.enable_xformers_memory_efficient_attention() path = './assets/lara.jpeg' ori_image = Image.open(path) # get canny image image = np.array(ori_image) image = cv2.Canny(image, 50, 120) image = image[:, :, None] image = np.concatenate([image, image, image], axis=2) canny_image = Image.fromarray(image) controlnet_conditioning_scale = 0.5 # recommended for good generalization prompt = "Lara Croft with brown hair, and is wearing a tank top, a brown backpack. The room is dark and has an old-fashioned decor with a patterned floor and a wall featuring a design with arches and a dark area on the right side, muted color, high detail, 8k high definition award winning" negative_prompt = "underexposed, poorly drawn hands, duplicate hands, overexposed, bad art, beginner, amateur, abstract, disfigured, deformed, close up, weird colors, watermark" image = pipe(prompt, image=ori_image, control_image=canny_image, height=1536, width=2048, strength=0.99, num_inference_steps=50, controlnet_conditioning_scale=controlnet_conditioning_scale, guidance_scale=12.5, negative_prompt = negative_prompt, eta=1.0 ).images[0] image.save("lara.jpg")