Spaces:
Runtime error
Runtime error
from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL | |
from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus | |
from insightface.app import FaceAnalysis | |
from insightface.utils import face_align | |
from huggingface_hub import hf_hub_download | |
import torch | |
from PIL import Image | |
import cv2 | |
import gradio as gr | |
hf_hub_download(repo_id='h94/IP-Adapter-FaceID', filename='ip-adapter-faceid-plus_sd15.bin', local_dir='IP-Adapter-FaceID') | |
hf_hub_download(repo_id='h94/IP-Adapter', filename='models/image_encoder/config.json', local_dir='IP-Adapter') | |
hf_hub_download(repo_id='h94/IP-Adapter', filename='models/image_encoder/pytorch_model.bin', local_dir='IP-Adapter') | |
def get_ip_model(): | |
base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE" | |
vae_model_path = "stabilityai/sd-vae-ft-mse" | |
image_encoder_path = "IP-Adapter/models/image_encoder" | |
ip_ckpt = "IP-Adapter-FaceID/ip-adapter-faceid-plus_sd15.bin" | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
print(f'Using device: {device}') | |
noise_scheduler = DDIMScheduler(num_train_timesteps=1000, beta_start=0.00085, beta_end=0.012, | |
beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False, steps_offset=1) | |
vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch_dtype) | |
pipe = StableDiffusionPipeline.from_pretrained( | |
base_model_path, | |
torch_dtype=torch_dtype, | |
scheduler=noise_scheduler, | |
vae=vae, | |
feature_extractor=None, | |
safety_checker=None | |
) | |
ip_model = IPAdapterFaceIDPlus(pipe, image_encoder_path, ip_ckpt, device, num_tokens=4, torch_dtype=torch_dtype) | |
return ip_model | |
def generate_images(prompt, img_filepath, negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality, blurry", | |
img_prompt_scale=0.5, num_inference_steps=30, seed=None, n_images=1): | |
image = cv2.imread(img_filepath) | |
faces = app.get(image) | |
faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0) | |
face_image = face_align.norm_crop(image, landmark=faces[0].kps, image_size=200) | |
images = ip_model.generate( | |
prompt=prompt, negative_prompt=negative_prompt, face_image=face_image, faceid_embeds=faceid_embeds, | |
num_samples=n_images, width=512, height=512, num_inference_steps=num_inference_steps, seed=seed, | |
scale=img_prompt_scale, | |
) | |
return [images[0], Image.fromarray(face_image[..., [2, 1, 0]])] | |
if __name__ == "__main__": | |
ip_model = get_ip_model() | |
app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) | |
app.prepare(ctx_id=0, det_size=(512, 512), det_thresh=0.2) | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
# ✨ Image Prompt Adapter With FaceID 🧙♂️ | |
Unleash the magic of generating whimsical images with just an image and a sprinkle of text! Learn the secrets here: [Magic Link](https://huggingface.co/h94/IP-Adapter-FaceID) | |
🚀 This enchanting demo is designed to soar on GPU. While it can still dance on CPU, conjuring just one image might take up to 600 seconds—compared to the blink-of-an-eye magic on GPU! ✨ | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
demo_inputs = [] | |
demo_inputs.append(gr.Textbox(label='text prompt', value='A bold rider in a white horse')) | |
demo_inputs.append(gr.Image(type='filepath', label='image prompt')) | |
with gr.Accordion(label='Advanced options', open=False): | |
demo_inputs.append(gr.Textbox(label='negative text prompt', | |
value="deformed hands, watermark, text, deformed fingers, blurred faces, irregular face, irrregular body shape, ugly eyes, deformed face, squint, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, poorly framed, extra limbs, disfigured, deformed, body out of frame, blurry, bad anatomy, blurred, watermark, grainy, signature, cut off, draft, ugly eyes, squint, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, poorly framed, extra limbs, disfigured, deformed, body out of frame, blurry, bad anatomy, blurred, watermark, grainy, signature, cut off, draft, disfigured, kitsch, ugly, oversaturated, grain, low-res, Deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, ugly, disgusting, poorly drawn, childish, mutilated, mangled, old, surreal, 2 heads, 2 faces")) | |
demo_inputs.append(gr.Slider(maximum=1, minimum=0, value=0.5, step=0.05, label='image prompt scale')) | |
btn = gr.Button("Generate") | |
with gr.Column(): | |
demo_outputs = [] | |
demo_outputs.append(gr.Image(label='generated image')) | |
demo_outputs.append(gr.Image(label='detected face', height=200, width=200)) | |
btn.click(generate_images, inputs=demo_inputs, outputs=demo_outputs) | |
sample_prompts = [ | |
'A wizard casting spells in a coffee shop', | |
'A penguin teaching a yoga class', | |
'A robot composing a symphony', | |
'A giraffe participating in a slam poetry contest', | |
'A bold rider in a white horse' | |
] | |
gr.Examples(sample_prompts, inputs=demo_inputs[0], label='Sample prompts') | |
demo.launch(share=True, debug=True) |