Spaces:
Running
on
Zero
Running
on
Zero
adaface-neurips
commited on
Commit
·
b0b5a77
1
Parent(s):
0b38fab
add link to adaface, various improvements
Browse files- adaface/adaface_wrapper.py +15 -7
- adaface/util.py +7 -6
- app.py +14 -5
- infer.py +2 -1
adaface/adaface_wrapper.py
CHANGED
@@ -12,6 +12,8 @@ from insightface.app import FaceAnalysis
|
|
12 |
from adaface.arc2face_models import CLIPTextModelWrapper
|
13 |
from adaface.util import get_arc2face_id_prompt_embs
|
14 |
import re, os
|
|
|
|
|
15 |
|
16 |
class AdaFaceWrapper(nn.Module):
|
17 |
def __init__(self, pipeline_name, base_model_path, adaface_ckpt_path, device,
|
@@ -216,7 +218,7 @@ class AdaFaceWrapper(nn.Module):
|
|
216 |
# NOTE: Since return_core_id_embs is True, id_prompt_emb is only the 16 core ID embeddings.
|
217 |
# arc2face prompt template: "photo of a id person"
|
218 |
# ID embeddings start from "id person ...". So there are 3 template tokens before the 16 ID embeddings.
|
219 |
-
faceid_embeds, id_prompt_emb \
|
220 |
= get_arc2face_id_prompt_embs(self.face_app, self.pipeline.tokenizer, self.arc2face_text_encoder,
|
221 |
extract_faceid_embeds=not gen_rand_face,
|
222 |
pre_face_embs=pre_face_embs,
|
@@ -235,6 +237,9 @@ class AdaFaceWrapper(nn.Module):
|
|
235 |
gen_neg_prompt=False,
|
236 |
verbose=True)
|
237 |
|
|
|
|
|
|
|
238 |
# adaface_subj_embs: [1, 1, 16, 768].
|
239 |
# adaface_prompt_embs: [1, 77, 768] (not used).
|
240 |
adaface_subj_embs, adaface_prompt_embs = \
|
@@ -248,7 +253,7 @@ class AdaFaceWrapper(nn.Module):
|
|
248 |
self.update_text_encoder_subj_embs(adaface_subj_embs)
|
249 |
return adaface_subj_embs
|
250 |
|
251 |
-
def encode_prompt(self, prompt, device="cuda", verbose=False):
|
252 |
prompt = self.update_prompt(prompt)
|
253 |
if verbose:
|
254 |
print(f"Prompt: {prompt}")
|
@@ -259,14 +264,16 @@ class AdaFaceWrapper(nn.Module):
|
|
259 |
# prompt_embeds_, negative_prompt_embeds_: [1, 77, 768]
|
260 |
prompt_embeds_, negative_prompt_embeds_ = \
|
261 |
self.pipeline.encode_prompt(prompt, device=device, num_images_per_prompt=1,
|
262 |
-
do_classifier_free_guidance=True, negative_prompt=
|
263 |
return prompt_embeds_, negative_prompt_embeds_
|
264 |
|
265 |
# ref_img_strength is used only in the img2img pipeline.
|
266 |
-
def forward(self, noise, prompt,
|
|
|
|
|
|
|
267 |
# prompt_embeds_, negative_prompt_embeds_: [1, 77, 768]
|
268 |
-
prompt_embeds_, negative_prompt_embeds_ = self.encode_prompt(prompt, device=self.device, verbose=verbose)
|
269 |
-
|
270 |
# Repeat the prompt embeddings for all images in the batch.
|
271 |
prompt_embeds_ = prompt_embeds_.repeat(out_image_count, 1, 1)
|
272 |
negative_prompt_embeds_ = negative_prompt_embeds_.repeat(out_image_count, 1, 1)
|
@@ -280,7 +287,8 @@ class AdaFaceWrapper(nn.Module):
|
|
280 |
num_inference_steps=self.num_inference_steps,
|
281 |
guidance_scale=guidance_scale,
|
282 |
num_images_per_prompt=1,
|
283 |
-
strength=ref_img_strength
|
|
|
284 |
# images: [BS, 3, 512, 512]
|
285 |
return images
|
286 |
|
|
|
12 |
from adaface.arc2face_models import CLIPTextModelWrapper
|
13 |
from adaface.util import get_arc2face_id_prompt_embs
|
14 |
import re, os
|
15 |
+
import sys
|
16 |
+
sys.modules['ldm'] = sys.modules['adaface']
|
17 |
|
18 |
class AdaFaceWrapper(nn.Module):
|
19 |
def __init__(self, pipeline_name, base_model_path, adaface_ckpt_path, device,
|
|
|
218 |
# NOTE: Since return_core_id_embs is True, id_prompt_emb is only the 16 core ID embeddings.
|
219 |
# arc2face prompt template: "photo of a id person"
|
220 |
# ID embeddings start from "id person ...". So there are 3 template tokens before the 16 ID embeddings.
|
221 |
+
face_image_count, faceid_embeds, id_prompt_emb \
|
222 |
= get_arc2face_id_prompt_embs(self.face_app, self.pipeline.tokenizer, self.arc2face_text_encoder,
|
223 |
extract_faceid_embeds=not gen_rand_face,
|
224 |
pre_face_embs=pre_face_embs,
|
|
|
237 |
gen_neg_prompt=False,
|
238 |
verbose=True)
|
239 |
|
240 |
+
if face_image_count == 0:
|
241 |
+
return None
|
242 |
+
|
243 |
# adaface_subj_embs: [1, 1, 16, 768].
|
244 |
# adaface_prompt_embs: [1, 77, 768] (not used).
|
245 |
adaface_subj_embs, adaface_prompt_embs = \
|
|
|
253 |
self.update_text_encoder_subj_embs(adaface_subj_embs)
|
254 |
return adaface_subj_embs
|
255 |
|
256 |
+
def encode_prompt(self, prompt, negative_prompt, device="cuda", verbose=False):
|
257 |
prompt = self.update_prompt(prompt)
|
258 |
if verbose:
|
259 |
print(f"Prompt: {prompt}")
|
|
|
264 |
# prompt_embeds_, negative_prompt_embeds_: [1, 77, 768]
|
265 |
prompt_embeds_, negative_prompt_embeds_ = \
|
266 |
self.pipeline.encode_prompt(prompt, device=device, num_images_per_prompt=1,
|
267 |
+
do_classifier_free_guidance=True, negative_prompt=negative_prompt)
|
268 |
return prompt_embeds_, negative_prompt_embeds_
|
269 |
|
270 |
# ref_img_strength is used only in the img2img pipeline.
|
271 |
+
def forward(self, noise, prompt, negative_prompt=None, guidance_scale=4.0,
|
272 |
+
out_image_count=4, ref_img_strength=0.8, generator=None, verbose=False):
|
273 |
+
if negative_prompt is None:
|
274 |
+
negative_prompt = self.negative_prompt
|
275 |
# prompt_embeds_, negative_prompt_embeds_: [1, 77, 768]
|
276 |
+
prompt_embeds_, negative_prompt_embeds_ = self.encode_prompt(prompt, negative_prompt, device=self.device, verbose=verbose)
|
|
|
277 |
# Repeat the prompt embeddings for all images in the batch.
|
278 |
prompt_embeds_ = prompt_embeds_.repeat(out_image_count, 1, 1)
|
279 |
negative_prompt_embeds_ = negative_prompt_embeds_.repeat(out_image_count, 1, 1)
|
|
|
287 |
num_inference_steps=self.num_inference_steps,
|
288 |
guidance_scale=guidance_scale,
|
289 |
num_images_per_prompt=1,
|
290 |
+
strength=ref_img_strength,
|
291 |
+
generator=generator).images
|
292 |
# images: [BS, 3, 512, 512]
|
293 |
return images
|
294 |
|
adaface/util.py
CHANGED
@@ -246,8 +246,9 @@ def get_arc2face_id_prompt_embs(face_app, clip_tokenizer, arc2face_text_encoder,
|
|
246 |
input_max_length=77, noise_level=0.0,
|
247 |
return_core_id_embs=False,
|
248 |
gen_neg_prompt=False, verbose=False):
|
|
|
|
|
249 |
if extract_faceid_embeds:
|
250 |
-
image_count = 0
|
251 |
faceid_embeds = []
|
252 |
if image_paths is not None:
|
253 |
images_np = []
|
@@ -275,13 +276,13 @@ def get_arc2face_id_prompt_embs(face_app, clip_tokenizer, arc2face_text_encoder,
|
|
275 |
face_info = sorted(face_infos, key=lambda x:(x['bbox'][2]-x['bbox'][0])*x['bbox'][3]-x['bbox'][1])[-1]
|
276 |
# Each faceid_embed: [1, 512]
|
277 |
faceid_embeds.append(torch.from_numpy(face_info.normed_embedding).unsqueeze(0))
|
278 |
-
|
279 |
|
280 |
if verbose:
|
281 |
if image_folder is not None:
|
282 |
-
print(f"Extracted ID embeddings from {
|
283 |
else:
|
284 |
-
print(f"Extracted ID embeddings from {
|
285 |
|
286 |
if len(faceid_embeds) == 0:
|
287 |
print("No face detected. Use a random face instead.")
|
@@ -335,7 +336,7 @@ def get_arc2face_id_prompt_embs(face_app, clip_tokenizer, arc2face_text_encoder,
|
|
335 |
|
336 |
#if extract_faceid_embeds:
|
337 |
# arc2face_neg_prompt_emb = arc2face_neg_prompt_emb.repeat(id_batch_size, 1, 1)
|
338 |
-
return faceid_embeds, arc2face_pos_prompt_emb, arc2face_neg_prompt_emb
|
339 |
else:
|
340 |
-
return faceid_embeds, arc2face_pos_prompt_emb
|
341 |
|
|
|
246 |
input_max_length=77, noise_level=0.0,
|
247 |
return_core_id_embs=False,
|
248 |
gen_neg_prompt=False, verbose=False):
|
249 |
+
face_image_count = 0
|
250 |
+
|
251 |
if extract_faceid_embeds:
|
|
|
252 |
faceid_embeds = []
|
253 |
if image_paths is not None:
|
254 |
images_np = []
|
|
|
276 |
face_info = sorted(face_infos, key=lambda x:(x['bbox'][2]-x['bbox'][0])*x['bbox'][3]-x['bbox'][1])[-1]
|
277 |
# Each faceid_embed: [1, 512]
|
278 |
faceid_embeds.append(torch.from_numpy(face_info.normed_embedding).unsqueeze(0))
|
279 |
+
face_image_count += 1
|
280 |
|
281 |
if verbose:
|
282 |
if image_folder is not None:
|
283 |
+
print(f"Extracted ID embeddings from {face_image_count} images in {image_folder}")
|
284 |
else:
|
285 |
+
print(f"Extracted ID embeddings from {face_image_count} images")
|
286 |
|
287 |
if len(faceid_embeds) == 0:
|
288 |
print("No face detected. Use a random face instead.")
|
|
|
336 |
|
337 |
#if extract_faceid_embeds:
|
338 |
# arc2face_neg_prompt_emb = arc2face_neg_prompt_emb.repeat(id_batch_size, 1, 1)
|
339 |
+
return face_image_count, faceid_embeds, arc2face_pos_prompt_emb, arc2face_neg_prompt_emb
|
340 |
else:
|
341 |
+
return face_image_count, faceid_embeds, arc2face_pos_prompt_emb
|
342 |
|
app.py
CHANGED
@@ -76,8 +76,13 @@ def gen_init_images(uploaded_image_paths, prompt, adaface_id_cfg_scale, out_imag
|
|
76 |
# [('/tmp/gradio/249981e66a7c665aaaf1c7eaeb24949af4366c88/jensen huang.jpg', None)]
|
77 |
# Extract the file paths.
|
78 |
uploaded_image_paths = [path[0] for path in uploaded_image_paths]
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
81 |
# Generate two images each time for the user to select from.
|
82 |
noise = torch.randn(out_image_count, 3, 512, 512)
|
83 |
# samples: A list of PIL Image instances.
|
@@ -163,7 +168,7 @@ def generate_image(image_container, uploaded_image_paths, init_img_file_paths, i
|
|
163 |
save_videos_grid(sample, save_sample_path)
|
164 |
return save_sample_path
|
165 |
|
166 |
-
def
|
167 |
if not prompt:
|
168 |
raise gr.Error("Prompt cannot be blank")
|
169 |
|
@@ -229,7 +234,11 @@ with gr.Blocks(css=css) as demo:
|
|
229 |
❗️❗️❗️**Tips:**
|
230 |
- You can upload one or more subject images for generating ID-specific video.
|
231 |
- Try different parameter combinations for the best generation quality.
|
232 |
-
-
|
|
|
|
|
|
|
|
|
233 |
"""
|
234 |
)
|
235 |
|
@@ -401,7 +410,7 @@ with gr.Blocks(css=css) as demo:
|
|
401 |
outputs=[uploaded_init_img_gallery, init_img_files, init_clear_button_column])
|
402 |
uploaded_init_img_gallery.select(fn=get_clicked_image, inputs=None, outputs=init_img_selected_idx)
|
403 |
|
404 |
-
submit.click(fn=
|
405 |
inputs=[prompt],outputs=None).success(
|
406 |
fn=randomize_seed_fn,
|
407 |
inputs=[seed, randomize_seed],
|
|
|
76 |
# [('/tmp/gradio/249981e66a7c665aaaf1c7eaeb24949af4366c88/jensen huang.jpg', None)]
|
77 |
# Extract the file paths.
|
78 |
uploaded_image_paths = [path[0] for path in uploaded_image_paths]
|
79 |
+
adaface_subj_embs = \
|
80 |
+
adaface.generate_adaface_embeddings(image_folder=None, image_paths=uploaded_image_paths,
|
81 |
+
out_id_embs_scale=adaface_id_cfg_scale, update_text_encoder=True)
|
82 |
+
|
83 |
+
if adaface_subj_embs is None:
|
84 |
+
raise gr.Error(f"Failed to detect any faces! Please try with other images")
|
85 |
+
|
86 |
# Generate two images each time for the user to select from.
|
87 |
noise = torch.randn(out_image_count, 3, 512, 512)
|
88 |
# samples: A list of PIL Image instances.
|
|
|
168 |
save_videos_grid(sample, save_sample_path)
|
169 |
return save_sample_path
|
170 |
|
171 |
+
def validate_prompt(prompt):
|
172 |
if not prompt:
|
173 |
raise gr.Error("Prompt cannot be blank")
|
174 |
|
|
|
234 |
❗️❗️❗️**Tips:**
|
235 |
- You can upload one or more subject images for generating ID-specific video.
|
236 |
- Try different parameter combinations for the best generation quality.
|
237 |
+
- Usage explanations and demos: [Readme](https://huggingface.co/spaces/adaface-neurips/adaface-animate/blob/main/README2.md).
|
238 |
+
- AdaFace Text-to-Image: <a href="https://huggingface.co/spaces/adaface-neurips/adaface" style="display: inline-flex; align-items: center;">
|
239 |
+
AdaFace
|
240 |
+
<img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-yellow" alt="Hugging Face Spaces" style="margin-left: 5px;">
|
241 |
+
</a>
|
242 |
"""
|
243 |
)
|
244 |
|
|
|
410 |
outputs=[uploaded_init_img_gallery, init_img_files, init_clear_button_column])
|
411 |
uploaded_init_img_gallery.select(fn=get_clicked_image, inputs=None, outputs=init_img_selected_idx)
|
412 |
|
413 |
+
submit.click(fn=validate_prompt,
|
414 |
inputs=[prompt],outputs=None).success(
|
415 |
fn=randomize_seed_fn,
|
416 |
inputs=[seed, randomize_seed],
|
infer.py
CHANGED
@@ -64,7 +64,8 @@ def load_model(base_model_type="sar", adaface_base_model_type="sar",
|
|
64 |
# scheduler=DPMSolverMultistepScheduler(**OmegaConf.to_container(inference_config.DPMSolver_scheduler_kwargs)
|
65 |
# scheduler=EulerAncestralDiscreteScheduler(**OmegaConf.to_container(inference_config.noise_scheduler_kwargs)
|
66 |
# scheduler=EulerAncestralDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="linear",steps_offset=1
|
67 |
-
),
|
|
|
68 |
).to(device=device)
|
69 |
|
70 |
pipeline = load_weights(
|
|
|
64 |
# scheduler=DPMSolverMultistepScheduler(**OmegaConf.to_container(inference_config.DPMSolver_scheduler_kwargs)
|
65 |
# scheduler=EulerAncestralDiscreteScheduler(**OmegaConf.to_container(inference_config.noise_scheduler_kwargs)
|
66 |
# scheduler=EulerAncestralDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="linear",steps_offset=1
|
67 |
+
),
|
68 |
+
torch_dtype=torch.float16,
|
69 |
).to(device=device)
|
70 |
|
71 |
pipeline = load_weights(
|