style-aligned-controlnet

Runtime error

App Files Files Community

style-aligned-controlnet / app.py

ysharma HF staff

Update app.py

5855e29 8 months ago

raw

history blame

No virus

4.08 kB

	import gradio as gr
	from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
	from diffusers.utils import load_image
	from transformers import DPTImageProcessor, DPTForDepthEstimation
	import torch
	import mediapy
	import sa_handler
	import pipeline_calls



	# init models

	depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to("cuda")
	feature_processor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")

	controlnet = ControlNetModel.from_pretrained(
	"diffusers/controlnet-depth-sdxl-1.0",
	variant="fp16",
	use_safetensors=True,
	torch_dtype=torch.float16,
	).to("cuda")
	vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16).to("cuda")
	pipeline = StableDiffusionXLControlNetPipeline.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0",
	controlnet=controlnet,
	vae=vae,
	variant="fp16",
	use_safetensors=True,
	torch_dtype=torch.float16,
	).to("cuda")
	pipeline.enable_model_cpu_offload()

	sa_args = sa_handler.StyleAlignedArgs(share_group_norm=False,
	share_layer_norm=False,
	share_attention=True,
	adain_queries=True,
	adain_keys=True,
	adain_values=False,
	)
	handler = sa_handler.Handler(pipeline)
	handler.register(sa_args, )


	# get depth maps
	def get_depth_maps(image):
	image = load_image(image) #("./example_image/train.png")
	depth_image1 = pipeline_calls.get_depth_map(image, feature_processor, depth_estimator)
	#depth_image2 = load_image("./example_image/sun.png").resize((1024, 1024))
	#mediapy.show_images([depth_image1, depth_image2])
	return depth_image1 #[depth_image1, depth_image2]



	# run ControlNet depth with StyleAligned
	def style_aligned_controlnet(reference_prompt, target_prompt, image):
	#reference_prompt = "a poster in flat design style"
	#target_prompts = [target_prompts] #["a train in flat design style", "the sun in flat design style"]
	controlnet_conditioning_scale = 0.8
	num_images_per_prompt = 1 # adjust according to VRAM size
	depth_map = get_depth_maps(image)
	latents = torch.randn(1 + num_images_per_prompt, 4, 128, 128).to(pipeline.unet.dtype)
	#for deph_map, target_prompt in zip((depth_image1, depth_image2), target_prompts):
	latents[1:] = torch.randn(num_images_per_prompt, 4, 128, 128).to(pipeline.unet.dtype)
	images = pipeline_calls.controlnet_call(pipeline, [reference_prompt, target_prompt],
	image=deph_map,
	num_inference_steps=50,
	controlnet_conditioning_scale=controlnet_conditioning_scale,
	num_images_per_prompt=num_images_per_prompt,
	latents=latents)
	print(f"images -{images}")
	return images[0]

	#mediapy.show_images([images[0], deph_map] + images[1:], titles=["reference", "depth"] + [f'result {i}' for i in range(1, len(images))])



	with gr.Blocks() as demo:
	with gr.Row(variant='panel'):
	with gr.Group():
	gr.Markdown("### <center>Reference Prompt and Image</center>")
	ref_prompt = gr.Textbox(label="Enter a Prompt describing the reference image", placeholder='a photo of <object> in <style name> style')
	depth_map = gr.Image(label="Upload the image to get Depth Map", )
	with gr.Group():
	gr.Markdown("### <center>Prompt for generation and generated Image</center>")
	prompt = gr.Textbox(label="Enter a Prompt", placeholder='a photo of <object> in <style name> style')
	output = gr.Image(label="Style-Aligned ControlNet",type='pil')
	btn = gr.Button("Generate", size='sm')

	btn.click(fn=greet, inputs=[ref_prompt, prompt, depth_map], outputs=output, api_name="style_aligned_controlnet")


	demo.launch()