Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from diffusers import StableDiffusionXLPipeline, DDIMScheduler | |
import torch | |
import sa_handler | |
import math | |
# init models | |
scheduler = DDIMScheduler( | |
beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", | |
clip_sample=False, set_alpha_to_one=False) | |
pipeline = StableDiffusionXLPipeline.from_pretrained( | |
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", | |
use_safetensors=True, | |
scheduler=scheduler | |
).to("cuda") | |
pipeline.enable_model_cpu_offload() | |
pipeline.enable_vae_slicing() | |
# DDIM inversion | |
from diffusers.utils import load_image | |
import inversion | |
import numpy as np | |
def run(ref_path, ref_style, ref_prompt, prompt1, prompt2, prompt3): | |
src_style = f"{ref_style}" | |
src_prompt = f"{ref_prompt}, {src_style}." | |
image_path = f"{ref_path}" | |
num_inference_steps = 50 | |
x0 = np.array(load_image(image_path).resize((1024, 1024))) | |
zts = inversion.ddim_inversion(pipeline, x0, src_prompt, num_inference_steps, 2) | |
#mediapy.show_image(x0, title="innput reference image", height=256) | |
# run StyleAligned | |
prompts = [ | |
src_prompt, | |
prompt1, | |
prompt2. | |
prompt3 | |
] | |
# some parameters you can adjust to control fidelity to reference | |
shared_score_shift = np.log(2) # higher value induces higher fidelity, set 0 for no shift | |
shared_score_scale = 1.0 # higher value induces higher, set 1 for no rescale | |
# for very famouse images consider supressing attention to refference, here is a configuration example: | |
# shared_score_shift = np.log(1) | |
# shared_score_scale = 0.5 | |
for i in range(1, len(prompts)): | |
prompts[i] = f'{prompts[i]}, {src_style}.' | |
handler = sa_handler.Handler(pipeline) | |
sa_args = sa_handler.StyleAlignedArgs( | |
share_group_norm=True, share_layer_norm=True, share_attention=True, | |
adain_queries=True, adain_keys=True, adain_values=False, | |
shared_score_shift=shared_score_shift, shared_score_scale=shared_score_scale,) | |
handler.register(sa_args) | |
zT, inversion_callback = inversion.make_inversion_callback(zts, offset=5) | |
g_cpu = torch.Generator(device='cpu') | |
g_cpu.manual_seed(10) | |
latents = torch.randn(len(prompts), 4, 128, 128, device='cpu', generator=g_cpu, | |
dtype=pipeline.unet.dtype,).to('cuda:0') | |
latents[0] = zT | |
images_a = pipeline(prompts, latents=latents, | |
callback_on_step_end=inversion_callback, | |
num_inference_steps=num_inference_steps, guidance_scale=10.0).images | |
handler.remove() | |
#mediapy.show_images(images_a, titles=[p[:-(len(src_style) + 3)] for p in prompts]) | |
return images_a | |
gr.Interface( | |
fn=run, | |
inputs=[ | |
gr.Image(type="filepath", value="./example_image/medieval-bed.jpeg"), | |
gr.Textbox(value="medieval painting"), | |
gr.Textbox(value="Man laying on bed"), | |
gr.Textbox(value="A man working on a laptop"), | |
gr.Textbox(value="A man eating pizza"), | |
gr.Textbox(value="A woman playing on saxophone") | |
], | |
outputs=[ | |
gr.Gallery() | |
], | |
title="Style Aligned Image Generation" | |
).launch() |