Spaces:

fffiloni
/

StyleAligned_Transfer

Running on Zero

App Files Files Community

fffiloni commited on Dec 15, 2023

Commit

ee7720a

1 Parent(s): bd810e1

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -20

app.py CHANGED Viewed

@@ -4,44 +4,87 @@ import spaces
 from diffusers import StableDiffusionXLPipeline, DDIMScheduler
 import torch
 import sa_handler
 # init models
-scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False,
-                              set_alpha_to_one=False)
 pipeline = StableDiffusionXLPipeline.from_pretrained(
-    "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True,
     scheduler=scheduler
 ).to("cuda")
 pipeline.enable_model_cpu_offload()
 pipeline.enable_vae_slicing()
-handler = sa_handler.Handler(pipeline)
-sa_args = sa_handler.StyleAlignedArgs(share_group_norm=False,
-                                      share_layer_norm=False,
-                                      share_attention=True,
-                                      adain_queries=True,
-                                      adain_keys=True,
-                                      adain_values=False,
-                                     )
-handler.register(sa_args, )
 # run StyleAligned
 @spaces.GPU
 def infer(prompts):
-    sets_of_prompts = [
-      "a toy train. macro photo. 3d game asset",
-      "a toy airplane. macro photo. 3d game asset",
-      "a toy bicycle. macro photo. 3d game asset",
-      "a toy car. macro photo. 3d game asset",
-      "a toy boat. macro photo. 3d game asset",
-    ]
     images = pipeline(sets_of_prompts,).images
-    return images
 gr.Interface(
     fn=infer,

 from diffusers import StableDiffusionXLPipeline, DDIMScheduler
 import torch
 import sa_handler
+import math
 # init models
+scheduler = DDIMScheduler(
+    beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear",
+    clip_sample=False, set_alpha_to_one=False)
 pipeline = StableDiffusionXLPipeline.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16",
+    use_safetensors=True,
     scheduler=scheduler
 ).to("cuda")
 pipeline.enable_model_cpu_offload()
 pipeline.enable_vae_slicing()
+# DDIM inversion
+from diffusers.utils import load_image
+import inversion
+import numpy as np
+src_style = "medieval painting"
+src_prompt = f'Man laying in a bed, {src_style}.'
+image_path = './example_image/medieval-bed.jpeg'
+num_inference_steps = 50
+x0 = np.array(load_image(image_path).resize((1024, 1024)))
+zts = inversion.ddim_inversion(pipeline, x0, src_prompt, num_inference_steps, 2)
+#mediapy.show_image(x0, title="innput reference image", height=256)
 # run StyleAligned
+prompts = [
+    src_prompt,
+    "A man working on a laptop",
+    "A man eats pizza",
+    "A woman playig on saxophone",
+]
+# some parameters you can adjust to control fidelity to reference
+shared_score_shift = np.log(2)  # higher value induces higher fidelity, set 0 for no shift
+shared_score_scale = 1.0  # higher value induces higher, set 1 for no rescale
+# for very famouse images consider supressing attention to refference, here is a configuration example:
+# shared_score_shift = np.log(1)
+# shared_score_scale = 0.5
+for i in range(1, len(prompts)):
+    prompts[i] = f'{prompts[i]}, {src_style}.'
+handler = sa_handler.Handler(pipeline)
+sa_args = sa_handler.StyleAlignedArgs(
+    share_group_norm=True, share_layer_norm=True, share_attention=True,
+    adain_queries=True, adain_keys=True, adain_values=False,
+    shared_score_shift=shared_score_shift, shared_score_scale=shared_score_scale,)
+handler.register(sa_args)
+zT, inversion_callback = inversion.make_inversion_callback(zts, offset=5)
+g_cpu = torch.Generator(device='cpu')
+g_cpu.manual_seed(10)
+latents = torch.randn(len(prompts), 4, 128, 128, device='cpu', generator=g_cpu,
+                      dtype=pipeline.unet.dtype,).to('cuda:0')
+latents[0] = zT
+images_a = pipeline(prompts, latents=latents,
+                    callback_on_step_end=inversion_callback,
+                    num_inference_steps=num_inference_steps, guidance_scale=10.0).images
+handler.remove()
+mediapy.show_images(images_a, titles=[p[:-(len(src_style) + 3)] for p in prompts])
 @spaces.GPU
 def infer(prompts):
     images = pipeline(sets_of_prompts,).images
+    return images_a
 gr.Interface(
     fn=infer,