fffiloni commited on
Commit
ee7720a
1 Parent(s): bd810e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -20
app.py CHANGED
@@ -4,44 +4,87 @@ import spaces
4
  from diffusers import StableDiffusionXLPipeline, DDIMScheduler
5
  import torch
6
  import sa_handler
 
7
 
8
  # init models
9
 
10
- scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False,
11
- set_alpha_to_one=False)
 
 
12
  pipeline = StableDiffusionXLPipeline.from_pretrained(
13
- "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True,
 
14
  scheduler=scheduler
15
  ).to("cuda")
16
 
17
  pipeline.enable_model_cpu_offload()
18
  pipeline.enable_vae_slicing()
19
 
20
- handler = sa_handler.Handler(pipeline)
21
- sa_args = sa_handler.StyleAlignedArgs(share_group_norm=False,
22
- share_layer_norm=False,
23
- share_attention=True,
24
- adain_queries=True,
25
- adain_keys=True,
26
- adain_values=False,
27
- )
 
28
 
29
- handler.register(sa_args, )
 
 
 
30
 
31
  # run StyleAligned
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  @spaces.GPU
34
  def infer(prompts):
35
- sets_of_prompts = [
36
- "a toy train. macro photo. 3d game asset",
37
- "a toy airplane. macro photo. 3d game asset",
38
- "a toy bicycle. macro photo. 3d game asset",
39
- "a toy car. macro photo. 3d game asset",
40
- "a toy boat. macro photo. 3d game asset",
41
- ]
42
 
43
  images = pipeline(sets_of_prompts,).images
44
- return images
45
 
46
  gr.Interface(
47
  fn=infer,
 
4
  from diffusers import StableDiffusionXLPipeline, DDIMScheduler
5
  import torch
6
  import sa_handler
7
+ import math
8
 
9
  # init models
10
 
11
+ scheduler = DDIMScheduler(
12
+ beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear",
13
+ clip_sample=False, set_alpha_to_one=False)
14
+
15
  pipeline = StableDiffusionXLPipeline.from_pretrained(
16
+ "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16",
17
+ use_safetensors=True,
18
  scheduler=scheduler
19
  ).to("cuda")
20
 
21
  pipeline.enable_model_cpu_offload()
22
  pipeline.enable_vae_slicing()
23
 
24
+ # DDIM inversion
25
+
26
+ from diffusers.utils import load_image
27
+ import inversion
28
+ import numpy as np
29
+
30
+ src_style = "medieval painting"
31
+ src_prompt = f'Man laying in a bed, {src_style}.'
32
+ image_path = './example_image/medieval-bed.jpeg'
33
 
34
+ num_inference_steps = 50
35
+ x0 = np.array(load_image(image_path).resize((1024, 1024)))
36
+ zts = inversion.ddim_inversion(pipeline, x0, src_prompt, num_inference_steps, 2)
37
+ #mediapy.show_image(x0, title="innput reference image", height=256)
38
 
39
  # run StyleAligned
40
+ prompts = [
41
+ src_prompt,
42
+ "A man working on a laptop",
43
+ "A man eats pizza",
44
+ "A woman playig on saxophone",
45
+ ]
46
+
47
+ # some parameters you can adjust to control fidelity to reference
48
+ shared_score_shift = np.log(2) # higher value induces higher fidelity, set 0 for no shift
49
+ shared_score_scale = 1.0 # higher value induces higher, set 1 for no rescale
50
+
51
+ # for very famouse images consider supressing attention to refference, here is a configuration example:
52
+ # shared_score_shift = np.log(1)
53
+ # shared_score_scale = 0.5
54
+
55
+ for i in range(1, len(prompts)):
56
+ prompts[i] = f'{prompts[i]}, {src_style}.'
57
+
58
+ handler = sa_handler.Handler(pipeline)
59
+ sa_args = sa_handler.StyleAlignedArgs(
60
+ share_group_norm=True, share_layer_norm=True, share_attention=True,
61
+ adain_queries=True, adain_keys=True, adain_values=False,
62
+ shared_score_shift=shared_score_shift, shared_score_scale=shared_score_scale,)
63
+ handler.register(sa_args)
64
+
65
+ zT, inversion_callback = inversion.make_inversion_callback(zts, offset=5)
66
+
67
+ g_cpu = torch.Generator(device='cpu')
68
+ g_cpu.manual_seed(10)
69
+
70
+ latents = torch.randn(len(prompts), 4, 128, 128, device='cpu', generator=g_cpu,
71
+ dtype=pipeline.unet.dtype,).to('cuda:0')
72
+ latents[0] = zT
73
+
74
+ images_a = pipeline(prompts, latents=latents,
75
+ callback_on_step_end=inversion_callback,
76
+ num_inference_steps=num_inference_steps, guidance_scale=10.0).images
77
+
78
+ handler.remove()
79
+ mediapy.show_images(images_a, titles=[p[:-(len(src_style) + 3)] for p in prompts])
80
+
81
 
82
  @spaces.GPU
83
  def infer(prompts):
84
+
 
 
 
 
 
 
85
 
86
  images = pipeline(sets_of_prompts,).images
87
+ return images_a
88
 
89
  gr.Interface(
90
  fn=infer,