radames commited on
Commit
6772c51
1 Parent(s): 6d61d37
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +30 -28
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: InstantStyle + Hyper SDXL
3
  emoji: 🖼🎨⚡️
4
  colorFrom: blue
5
  colorTo: purple
 
1
  ---
2
+ title: InstantStyle + Hyper SD
3
  emoji: 🖼🎨⚡️
4
  colorFrom: blue
5
  colorTo: purple
app.py CHANGED
@@ -5,15 +5,17 @@ import tempfile
5
  import numpy as np
6
  from pathlib import Path
7
  from PIL import Image
8
- from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, TCDScheduler
 
 
 
 
9
  import spaces
10
  import gradio as gr
11
  from huggingface_hub import hf_hub_download, snapshot_download
12
- from ip_adapter import IPAdapterXL
13
 
14
- snapshot_download(
15
- repo_id="h94/IP-Adapter", allow_patterns="sdxl_models/*", local_dir="."
16
- )
17
 
18
  # global variable
19
  MAX_SEED = np.iinfo(np.int32).max
@@ -21,48 +23,44 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
21
  dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
22
 
23
  # initialization
24
- base_model_path = "stabilityai/stable-diffusion-xl-base-1.0"
25
- image_encoder_path = "sdxl_models/image_encoder"
26
- ip_ckpt = "sdxl_models/ip-adapter_sdxl.bin"
27
 
28
- controlnet_path = "diffusers/controlnet-canny-sdxl-1.0"
29
  controlnet = ControlNetModel.from_pretrained(
30
  controlnet_path, use_safetensors=False, torch_dtype=torch.float16
31
  ).to(device)
32
 
33
  # load Hyper SD
34
 
35
- pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
36
  base_model_path,
37
  controlnet=controlnet,
38
  torch_dtype=torch.float16,
39
  variant="fp16",
40
- add_watermarker=False,
41
  ).to(device)
42
  pipe.set_progress_bar_config(disable=True)
43
  pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
44
  pipe.load_lora_weights(
45
- hf_hub_download("ByteDance/Hyper-SD", "Hyper-SDXL-1step-lora.safetensors")
46
  )
 
 
47
  eta = 1.0
48
 
49
- # load ip-adapter
50
- # target_blocks=["block"] for original IP-Adapter
51
- # target_blocks=["up_blocks.0.attentions.1"] for style blocks only
52
- # target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks
53
- ip_model = IPAdapterXL(
54
  pipe,
55
  image_encoder_path,
56
  ip_ckpt,
57
  device,
58
- target_blocks=["up_blocks.0.attentions.1"],
59
  )
60
 
61
 
62
  def resize_img(
63
  input_image,
64
  max_side=1280,
65
- min_side=1024,
66
  size=None,
67
  pad_to_max_side=False,
68
  mode=Image.BILINEAR,
@@ -165,42 +163,44 @@ def create_image(
165
  seed = random.randint(0, MAX_SEED) if seed == -1 else seed
166
  if target == "Load original IP-Adapter":
167
  # target_blocks=["blocks"] for original IP-Adapter
168
- ip_model = IPAdapterXL(
169
  pipe, image_encoder_path, ip_ckpt, device, target_blocks=["blocks"]
170
  )
171
  elif target == "Load only style blocks":
172
  # target_blocks=["up_blocks.0.attentions.1"] for style blocks only
173
- ip_model = IPAdapterXL(
174
  pipe,
175
  image_encoder_path,
176
  ip_ckpt,
177
  device,
178
- target_blocks=["up_blocks.0.attentions.1"],
179
  )
180
  elif target == "Load style+layout block":
181
  # target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks
182
- ip_model = IPAdapterXL(
183
  pipe,
184
  image_encoder_path,
185
  ip_ckpt,
186
  device,
187
- target_blocks=["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"],
188
  )
189
 
190
  if input_image is not None:
191
- input_image = resize_img(input_image, max_side=1024)
192
  cv_input_image = pil_to_cv2(input_image)
193
  detected_map = cv2.Canny(cv_input_image, 50, 200)
194
  canny_map = Image.fromarray(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB))
195
  else:
196
- canny_map = Image.new("RGB", (1024, 1024), color=(255, 255, 255))
197
  control_scale = 0
198
 
199
  if float(control_scale) == 0:
200
- canny_map = canny_map.resize((1024, 1024))
201
 
202
  if len(neg_content_prompt) > 0 and neg_content_scale != 0:
203
  images = ip_model.generate(
 
 
204
  pil_image=image_pil,
205
  prompt=prompt,
206
  negative_prompt=n_prompt,
@@ -217,6 +217,8 @@ def create_image(
217
  )
218
  else:
219
  images = ip_model.generate(
 
 
220
  pil_image=image_pil,
221
  prompt=prompt,
222
  negative_prompt=n_prompt,
@@ -340,7 +342,7 @@ with block:
340
  minimum=1,
341
  maximum=10.0,
342
  step=1.0,
343
- value=1,
344
  label="num inference steps",
345
  )
346
  seed = gr.Slider(
 
5
  import numpy as np
6
  from pathlib import Path
7
  from PIL import Image
8
+ from diffusers import (
9
+ ControlNetModel,
10
+ StableDiffusionControlNetPipeline,
11
+ TCDScheduler,
12
+ )
13
  import spaces
14
  import gradio as gr
15
  from huggingface_hub import hf_hub_download, snapshot_download
16
+ from ip_adapter import IPAdapter
17
 
18
+ snapshot_download(repo_id="h94/IP-Adapter", allow_patterns="models/*", local_dir=".")
 
 
19
 
20
  # global variable
21
  MAX_SEED = np.iinfo(np.int32).max
 
23
  dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
24
 
25
  # initialization
26
+ base_model_path = "runwayml/stable-diffusion-v1-5"
27
+ image_encoder_path = "models/image_encoder"
28
+ ip_ckpt = "models/ip-adapter_sd15.bin"
29
 
30
+ controlnet_path = "lllyasviel/control_v11p_sd15_canny"
31
  controlnet = ControlNetModel.from_pretrained(
32
  controlnet_path, use_safetensors=False, torch_dtype=torch.float16
33
  ).to(device)
34
 
35
  # load Hyper SD
36
 
37
+ pipe = StableDiffusionControlNetPipeline.from_pretrained(
38
  base_model_path,
39
  controlnet=controlnet,
40
  torch_dtype=torch.float16,
41
  variant="fp16",
 
42
  ).to(device)
43
  pipe.set_progress_bar_config(disable=True)
44
  pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
45
  pipe.load_lora_weights(
46
+ hf_hub_download("ByteDance/Hyper-SD", "Hyper-SD15-1step-lora.safetensors")
47
  )
48
+ pipe.enable_vae_tiling()
49
+
50
  eta = 1.0
51
 
52
+ ip_model = IPAdapter(
 
 
 
 
53
  pipe,
54
  image_encoder_path,
55
  ip_ckpt,
56
  device,
 
57
  )
58
 
59
 
60
  def resize_img(
61
  input_image,
62
  max_side=1280,
63
+ min_side=512,
64
  size=None,
65
  pad_to_max_side=False,
66
  mode=Image.BILINEAR,
 
163
  seed = random.randint(0, MAX_SEED) if seed == -1 else seed
164
  if target == "Load original IP-Adapter":
165
  # target_blocks=["blocks"] for original IP-Adapter
166
+ ip_model = IPAdapter(
167
  pipe, image_encoder_path, ip_ckpt, device, target_blocks=["blocks"]
168
  )
169
  elif target == "Load only style blocks":
170
  # target_blocks=["up_blocks.0.attentions.1"] for style blocks only
171
+ ip_model = IPAdapter(
172
  pipe,
173
  image_encoder_path,
174
  ip_ckpt,
175
  device,
176
+ target_blocks=["up_blocks.1"],
177
  )
178
  elif target == "Load style+layout block":
179
  # target_blocks = ["up_blocks.0.attentions.1", "down_blocks.2.attentions.1"] # for style+layout blocks
180
+ ip_model = IPAdapter(
181
  pipe,
182
  image_encoder_path,
183
  ip_ckpt,
184
  device,
185
+ target_blocks=["down_blocks.2", "mid_block", "up_blocks.1"],
186
  )
187
 
188
  if input_image is not None:
189
+ input_image = resize_img(input_image, max_side=512)
190
  cv_input_image = pil_to_cv2(input_image)
191
  detected_map = cv2.Canny(cv_input_image, 50, 200)
192
  canny_map = Image.fromarray(cv2.cvtColor(detected_map, cv2.COLOR_BGR2RGB))
193
  else:
194
+ canny_map = Image.new("RGB", (512, 512), color=(255, 255, 255))
195
  control_scale = 0
196
 
197
  if float(control_scale) == 0:
198
+ canny_map = canny_map.resize((512, 512))
199
 
200
  if len(neg_content_prompt) > 0 and neg_content_scale != 0:
201
  images = ip_model.generate(
202
+ width=512,
203
+ height=512,
204
  pil_image=image_pil,
205
  prompt=prompt,
206
  negative_prompt=n_prompt,
 
217
  )
218
  else:
219
  images = ip_model.generate(
220
+ width=512,
221
+ height=512,
222
  pil_image=image_pil,
223
  prompt=prompt,
224
  negative_prompt=n_prompt,
 
342
  minimum=1,
343
  maximum=10.0,
344
  step=1.0,
345
+ value=3,
346
  label="num inference steps",
347
  )
348
  seed = gr.Slider(