diffusers
/

tools

stable-diffusion

stable-diffusion-diffusers

absolute-realism

Model card Files Files and versions Community

tools / run_decomposed_if.py

patrickvonplaten's picture

patrickvonplaten

up

e58dd86 over 1 year ago

3.08 kB

	#!/usr/bin/env python3
	from diffusers import IFBasePipeline, IFSuperResolutionPipeline, UNet2DConditionModel
	from transformers import T5EncoderModel, T5Tokenizer
	import torch
	import gc
	import os
	from pathlib import Path

	prompt = 'a photo of a kangaroo wearing an orange hoodie and blue sunglasses standing in front of the eiffel tower holding a sign that says "very deep learning"'

	model_id = "diffusers/if"
	model_id = "/home/patrick/if"

	# T5
	t5_tok = T5Tokenizer.from_pretrained(model_id, subfolder="tokenizer", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)

	t5 = T5EncoderModel.from_pretrained(model_id, subfolder="text_encoder", torch_dtype=torch.float16, variant="fp16", low_cpu_mem_usage=True)
	t5.cuda()

	prompt = prompt.lower().strip() # make sure everything is lower-cased
	with torch.no_grad():
	inputs = t5_tok(prompt, max_length=77, return_tensors="pt", truncation=True, padding="max_length").to("cuda")
	prompt_embeds = t5(**inputs).last_hidden_state

	with torch.no_grad():
	inputs = t5_tok("", max_length=77, return_tensors="pt", truncation=True, padding="max_length").to("cuda")
	neg_prompt_embeds = t5(**inputs).last_hidden_state

	del t5
	torch.cuda.empty_cache()
	gc.collect()

	generator = torch.Generator("cuda").manual_seed(0)

	# Stage 1
	pipe = IFBasePipeline.from_pretrained(model_id, text_encoder=None, torch_dtype=torch.float16, variant="fp16")
	pipe.to("cuda")

	image = pipe(prompt_embeds=prompt_embeds, negative_prompt_embeds=neg_prompt_embeds, output_type="pt", num_inference_steps=100, generator=generator).images

	# save_image
	pil_image = pipe.numpy_to_pil(pipe.decode_latents(image))[0]
	pil_image.save(os.path.join(Path.home(), "images", "if_I_0.png"))

	# offload
	del pipe
	torch.cuda.empty_cache()
	gc.collect()

	# Stage 2
	unet = UNet2DConditionModel.from_pretrained(model_id, subfolder="super_res_1_unet", torch_dtype=torch.float16)
	pipe = IFSuperResolutionPipeline.from_pretrained(model_id, unet=unet, text_encoder=None, torch_dtype=torch.float16, variant="fp16")
	pipe.to("cuda")

	image = pipe(image=image, prompt_embeds=prompt_embeds, negative_prompt_embeds=neg_prompt_embeds, num_inference_steps=50, noise_level=250, output_type="pt", generator=generator).images

	# save_image
	pil_image = pipe.numpy_to_pil(pipe.decode_latents(image))[0]
	pil_image.save(os.path.join(Path.home(), "images", "if_II_0.png"))

	# offload
	del pipe
	torch.cuda.empty_cache()
	gc.collect()

	# Stage 3
	unet = UNet2DConditionModel.from_pretrained(model_id, subfolder="super_res_2_unet", torch_dtype=torch.float16)
	pipe = IFSuperResolutionPipeline.from_pretrained(model_id, unet=unet, text_encoder=None, torch_dtype=torch.float16, variant="fp16")
	pipe.to("cuda")

	image = pipe(image=image, prompt_embeds=prompt_embeds, negative_prompt_embeds=neg_prompt_embeds, num_inference_steps=40, noise_level=0, output_type="pt", generator=generator).images

	# save image
	pil_image = pipe.numpy_to_pil(pipe.decode_latents(image))[0]
	pil_image.save(os.path.join(Path.home(), "images", "if_III_0.png"))

	# offload
	del pipe
	torch.cuda.empty_cache()
	gc.collect()