feishen29 commited on
Commit
b91efff
·
verified ·
1 Parent(s): 2afb5eb

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -62
app.py CHANGED
@@ -39,37 +39,12 @@ from insightface.app import FaceAnalysis
39
  from insightface.utils import face_align
40
 
41
 
42
- # device = 'cuda:2' if torch.cuda.is_available() else 'cpu'
43
-
44
  parser = argparse.ArgumentParser(description='IMAGDressing-v1')
45
- # parser.add_argument('--if_resampler', type=bool, default=True)
46
  parser.add_argument('--if_ipa', type=bool, default=True)
47
  parser.add_argument('--if_control', type=bool, default=True)
48
 
49
- # parser.add_argument('--pretrained_model_name_or_path',
50
- # default="./ckpt/Realistic_Vision_V4.0_noVAE",
51
- # type=str)
52
- # parser.add_argument('--ip_ckpt',
53
- # default="./ckpt/ip-adapter-faceid-plus_sd15.bin",
54
- # type=str)
55
- # parser.add_argument('--pretrained_image_encoder_path',
56
- # default="./ckpt/image_encoder/",
57
- # type=str)
58
- # parser.add_argument('--pretrained_vae_model_path',
59
- # default="./ckpt/sd-vae-ft-mse/",
60
- # type=str)
61
- # parser.add_argument('--model_ckpt',
62
- # default="./ckpt/IMAGDressing-v1_512.pt",
63
- # type=str)
64
- # parser.add_argument('--output_path', type=str, default="./output_ipa_control_resampler")
65
- # # parser.add_argument('--device', type=str, default="cuda:0")
66
  args = parser.parse_args()
67
 
68
- # svae path
69
- # output_path = args.output_path
70
- #
71
- # if not os.path.exists(output_path):
72
- # os.makedirs(output_path)
73
 
74
 
75
  args.device = "cuda"
@@ -80,8 +55,6 @@ text_encoder = CLIPTextModel.from_pretrained("SG161222/Realistic_Vision_V4.0_noV
80
  image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder="models/image_encoder").to(dtype=torch.float16, device=args.device)
81
  unet = UNet2DConditionModel.from_pretrained("SG161222/Realistic_Vision_V4.0_noVAE", subfolder="unet").to(dtype=torch.float16,device=args.device)
82
 
83
- image_face_fusion = pipeline('face_fusion_torch', model='damo/cv_unet_face_fusion_torch', model_revision='v1.0.3')
84
-
85
  #face_model
86
  app = FaceAnalysis(model_path="buffalo_l", providers=[('CUDAExecutionProvider', {"device_id": args.device})]) ##使用GPU:0, 默认使用buffalo_l就可以了
87
  app.prepare(ctx_id=0, det_size=(640, 640))
@@ -112,7 +85,7 @@ for name in unet.attn_processors.keys():
112
  elif name.startswith("down_blocks"):
113
  block_id = int(name[len("down_blocks.")])
114
  hidden_size = unet.config.block_out_channels[block_id]
115
- # lora_rank = hidden_size // 2 # args.lora_rank
116
  if cross_attention_dim is None:
117
  attn_procs[name] = RefLoraSAttnProcessor2_0(name, hidden_size)
118
  else:
@@ -161,18 +134,10 @@ noise_scheduler = DDIMScheduler(
161
  set_alpha_to_one=False,
162
  steps_offset=1,
163
  )
164
- # noise_scheduler = UniPCMultistepScheduler.from_config(args.pretrained_model_name_or_path, subfolder="scheduler")
165
 
166
  control_net_openpose = ControlNetModel.from_pretrained(
167
  "lllyasviel/control_v11p_sd15_openpose",
168
  torch_dtype=torch.float16).to(device=args.device)
169
- # pipe = PipIpaControlNet(unet=unet, reference_unet=ref_unet, vae=vae, tokenizer=tokenizer,
170
- # text_encoder=text_encoder, image_encoder=image_encoder,
171
- # ip_ckpt=args.ip_ckpt,
172
- # ImgProj=image_proj, controlnet=control_net_openpose,
173
- # scheduler=noise_scheduler,
174
- # safety_checker=StableDiffusionSafetyChecker,
175
- # feature_extractor=CLIPImageProcessor)
176
 
177
  img_transform = transforms.Compose([
178
  transforms.Resize([640, 512], interpolation=transforms.InterpolationMode.BILINEAR),
@@ -197,33 +162,27 @@ def resize_img(input_image, max_side=640, min_side=512, size=None,
197
  @spaces.GPU
198
  def dress_process(garm_img, face_img, pose_img, prompt, cloth_guidance_scale, caption_guidance_scale,
199
  face_guidance_scale,self_guidance_scale, cross_guidance_scale,if_ipa, if_post, if_control, denoise_steps, seed=42):
200
- # prompt = prompt + ', confident smile expression, fashion, best quality, amazing quality, very aesthetic'
201
  if prompt is None:
202
  prompt = "a photography of a model"
203
  prompt = prompt + ', best quality, high quality'
204
  print(prompt, cloth_guidance_scale, if_ipa, if_control, denoise_steps, seed)
205
  clip_image_processor = CLIPImageProcessor()
206
- # clothes_img = garm_img.convert("RGB")
207
  if not garm_img:
208
  raise gr.Error("请上传衣服 / Please upload garment")
209
  clothes_img = resize_img(garm_img)
210
  vae_clothes = img_transform(clothes_img).unsqueeze(0)
211
- # print(vae_clothes.shape)
212
  ref_clip_image = clip_image_processor(images=clothes_img, return_tensors="pt").pixel_values
213
 
214
  if if_ipa:
215
- # image = cv2.imread(face_img)
216
  faces = app.get(face_img)
217
-
218
  if not faces:
219
  raise gr.Error("人脸检测异常,尝试其他肖像 / Abnormal face detection. Try another portrait")
220
  faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
221
  face_image = face_align.norm_crop(face_img, landmark=faces[0].kps, image_size=224) # you can also segment the face
222
 
223
- # face_img = face_image[:, :, ::-1]
224
- # face_img = Image.fromarray(face_image.astype('uint8'))
225
- # face_img.save('face.png')
226
-
227
  face_clip_image = clip_image_processor(images=face_image, return_tensors="pt").pixel_values
228
  else:
229
  faceid_embeds = None
@@ -235,9 +194,6 @@ def dress_process(garm_img, face_img, pose_img, prompt, cloth_guidance_scale, ca
235
  pose_image = diffusers.utils.load_image(pose_img)
236
  else:
237
  pose_image = None
238
- # print(if_ipa, if_control)
239
- # pipe, generator = prepare_pipeline(args, if_ipa, if_control, unet, ref_unet, vae, tokenizer, text_encoder,
240
- # image_encoder, image_proj, control_net_openpose)
241
 
242
  noise_scheduler = DDIMScheduler(
243
  num_train_timesteps=1000,
@@ -248,7 +204,7 @@ def dress_process(garm_img, face_img, pose_img, prompt, cloth_guidance_scale, ca
248
  set_alpha_to_one=False,
249
  steps_offset=1,
250
  )
251
- # noise_scheduler = UniPCMultistepScheduler.from_config(args.pretrained_model_name_or_path, subfolder="scheduler")
252
  pipe = PipIpaControlNet(unet=unet, reference_unet=ref_unet, vae=vae, tokenizer=tokenizer,
253
  text_encoder=text_encoder, image_encoder=image_encoder,
254
  ip_ckpt='./ckpt/ip-adapter-faceid-plus_sd15.bin',
@@ -279,11 +235,12 @@ def dress_process(garm_img, face_img, pose_img, prompt, cloth_guidance_scale, ca
279
  ).images
280
 
281
  if if_post and if_ipa:
282
- # PIL 图像转换为 NumPy 数组
 
283
  output_array = np.array(output[0])
284
- # 将 RGB 图像转换为 BGR 图像
285
  bgr_array = cv2.cvtColor(output_array, cv2.COLOR_RGB2BGR)
286
- # 将 NumPy 数组转换为 PIL 图像
287
  bgr_image = Image.fromarray(bgr_array)
288
  result = image_face_fusion(dict(template=bgr_image, user=Image.fromarray(face_image.astype('uint8'))))
289
  return result[OutputKeys.OUTPUT_IMG]
@@ -349,11 +306,8 @@ with image_blocks as demo:
349
  outputs=pose_img,
350
  examples=pose_list_path)
351
 
352
- # with gr.Column():
353
- # # image_out = gr.Image(label="Output", elem_id="output-img", height=400)
354
- # masked_img = gr.Image(label="Masked image output", elem_id="masked-img", show_share_button=False)
355
  with gr.Column():
356
- # image_out = gr.Image(label="Output", elem_id="output-img", height=400)
357
  image_out = gr.Image(label="Output", elem_id="output-img", show_share_button=False)
358
  # Add usage tips below the output image
359
  gr.Markdown("""
@@ -367,19 +321,17 @@ with image_blocks as demo:
367
  """)
368
  with gr.Column():
369
  try_button = gr.Button(value="Dressing")
370
- with gr.Accordion(label="Advanced Settings", open=False):
371
  with gr.Row(elem_id="prompt-container"):
372
  with gr.Row():
373
  prompt = gr.Textbox(placeholder="Description of prompt ex) A beautiful woman dress Short Sleeve Round Neck T-shirts",value='A beautiful woman',
374
  show_label=False, elem_id="prompt")
375
- # with gr.Row():
376
- # neg_prompt = gr.Textbox(placeholder="Description of neg prompt ex) Short Sleeve Round Neck T-shirts",
377
- # show_label=False, elem_id="neg_prompt")
378
  with gr.Row():
379
- cloth_guidance_scale = gr.Slider(label="Cloth guidance Scale", minimum=0.0, maximum=1.0, value=0.9, step=0.1,
380
  visible=True)
381
  with gr.Row():
382
- caption_guidance_scale = gr.Slider(label="Prompt Guidance Scale", minimum=1, maximum=10., value=7.0, step=0.1,
383
  visible=True)
384
  with gr.Row():
385
  face_guidance_scale = gr.Slider(label="Face Guidance Scale", minimum=0.0, maximum=2.0, value=0.9, step=0.1,
 
39
  from insightface.utils import face_align
40
 
41
 
 
 
42
  parser = argparse.ArgumentParser(description='IMAGDressing-v1')
 
43
  parser.add_argument('--if_ipa', type=bool, default=True)
44
  parser.add_argument('--if_control', type=bool, default=True)
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  args = parser.parse_args()
47
 
 
 
 
 
 
48
 
49
 
50
  args.device = "cuda"
 
55
  image_encoder = CLIPVisionModelWithProjection.from_pretrained("h94/IP-Adapter", subfolder="models/image_encoder").to(dtype=torch.float16, device=args.device)
56
  unet = UNet2DConditionModel.from_pretrained("SG161222/Realistic_Vision_V4.0_noVAE", subfolder="unet").to(dtype=torch.float16,device=args.device)
57
 
 
 
58
  #face_model
59
  app = FaceAnalysis(model_path="buffalo_l", providers=[('CUDAExecutionProvider', {"device_id": args.device})]) ##使用GPU:0, 默认使用buffalo_l就可以了
60
  app.prepare(ctx_id=0, det_size=(640, 640))
 
85
  elif name.startswith("down_blocks"):
86
  block_id = int(name[len("down_blocks.")])
87
  hidden_size = unet.config.block_out_channels[block_id]
88
+
89
  if cross_attention_dim is None:
90
  attn_procs[name] = RefLoraSAttnProcessor2_0(name, hidden_size)
91
  else:
 
134
  set_alpha_to_one=False,
135
  steps_offset=1,
136
  )
 
137
 
138
  control_net_openpose = ControlNetModel.from_pretrained(
139
  "lllyasviel/control_v11p_sd15_openpose",
140
  torch_dtype=torch.float16).to(device=args.device)
 
 
 
 
 
 
 
141
 
142
  img_transform = transforms.Compose([
143
  transforms.Resize([640, 512], interpolation=transforms.InterpolationMode.BILINEAR),
 
162
  @spaces.GPU
163
  def dress_process(garm_img, face_img, pose_img, prompt, cloth_guidance_scale, caption_guidance_scale,
164
  face_guidance_scale,self_guidance_scale, cross_guidance_scale,if_ipa, if_post, if_control, denoise_steps, seed=42):
165
+
166
  if prompt is None:
167
  prompt = "a photography of a model"
168
  prompt = prompt + ', best quality, high quality'
169
  print(prompt, cloth_guidance_scale, if_ipa, if_control, denoise_steps, seed)
170
  clip_image_processor = CLIPImageProcessor()
171
+
172
  if not garm_img:
173
  raise gr.Error("请上传衣服 / Please upload garment")
174
  clothes_img = resize_img(garm_img)
175
  vae_clothes = img_transform(clothes_img).unsqueeze(0)
 
176
  ref_clip_image = clip_image_processor(images=clothes_img, return_tensors="pt").pixel_values
177
 
178
  if if_ipa:
 
179
  faces = app.get(face_img)
 
180
  if not faces:
181
  raise gr.Error("人脸检测异常,尝试其他肖像 / Abnormal face detection. Try another portrait")
182
  faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
183
  face_image = face_align.norm_crop(face_img, landmark=faces[0].kps, image_size=224) # you can also segment the face
184
 
185
+
 
 
 
186
  face_clip_image = clip_image_processor(images=face_image, return_tensors="pt").pixel_values
187
  else:
188
  faceid_embeds = None
 
194
  pose_image = diffusers.utils.load_image(pose_img)
195
  else:
196
  pose_image = None
 
 
 
197
 
198
  noise_scheduler = DDIMScheduler(
199
  num_train_timesteps=1000,
 
204
  set_alpha_to_one=False,
205
  steps_offset=1,
206
  )
207
+
208
  pipe = PipIpaControlNet(unet=unet, reference_unet=ref_unet, vae=vae, tokenizer=tokenizer,
209
  text_encoder=text_encoder, image_encoder=image_encoder,
210
  ip_ckpt='./ckpt/ip-adapter-faceid-plus_sd15.bin',
 
235
  ).images
236
 
237
  if if_post and if_ipa:
238
+ image_face_fusion = pipeline('face_fusion_torch', model='damo/cv_unet_face_fusion_torch',
239
+ model_revision='v1.0.3')
240
  output_array = np.array(output[0])
241
+
242
  bgr_array = cv2.cvtColor(output_array, cv2.COLOR_RGB2BGR)
243
+
244
  bgr_image = Image.fromarray(bgr_array)
245
  result = image_face_fusion(dict(template=bgr_image, user=Image.fromarray(face_image.astype('uint8'))))
246
  return result[OutputKeys.OUTPUT_IMG]
 
306
  outputs=pose_img,
307
  examples=pose_list_path)
308
 
309
+
 
 
310
  with gr.Column():
 
311
  image_out = gr.Image(label="Output", elem_id="output-img", show_share_button=False)
312
  # Add usage tips below the output image
313
  gr.Markdown("""
 
321
  """)
322
  with gr.Column():
323
  try_button = gr.Button(value="Dressing")
324
+ with gr.Accordion(label="Advanced Settings", open=True):
325
  with gr.Row(elem_id="prompt-container"):
326
  with gr.Row():
327
  prompt = gr.Textbox(placeholder="Description of prompt ex) A beautiful woman dress Short Sleeve Round Neck T-shirts",value='A beautiful woman',
328
  show_label=False, elem_id="prompt")
329
+
 
 
330
  with gr.Row():
331
+ cloth_guidance_scale = gr.Slider(label="Cloth guidance Scale", minimum=0.0, maximum=1.0, value=0.85, step=0.1,
332
  visible=True)
333
  with gr.Row():
334
+ caption_guidance_scale = gr.Slider(label="Prompt Guidance Scale", minimum=1, maximum=10., value=6.5, step=0.1,
335
  visible=True)
336
  with gr.Row():
337
  face_guidance_scale = gr.Slider(label="Face Guidance Scale", minimum=0.0, maximum=2.0, value=0.9, step=0.1,