Spaces:

samir-fama
/

Image-Adapter-With-Face-ID

Runtime error

App Files Files Community

Image-Adapter-With-Face-ID / app.py

samir-fama

Create app.py

f15e0fa about 1 year ago

raw

history blame

5.7 kB

	from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
	from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus
	from insightface.app import FaceAnalysis
	from insightface.utils import face_align

	from huggingface_hub import hf_hub_download
	import torch

	from PIL import Image
	import cv2

	import gradio as gr

	hf_hub_download(repo_id='h94/IP-Adapter-FaceID', filename='ip-adapter-faceid-plus_sd15.bin', local_dir='IP-Adapter-FaceID')
	hf_hub_download(repo_id='h94/IP-Adapter', filename='models/image_encoder/config.json', local_dir='IP-Adapter')
	hf_hub_download(repo_id='h94/IP-Adapter', filename='models/image_encoder/pytorch_model.bin', local_dir='IP-Adapter')

	def get_ip_model():
	base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
	vae_model_path = "stabilityai/sd-vae-ft-mse"
	image_encoder_path = "IP-Adapter/models/image_encoder"
	ip_ckpt = "IP-Adapter-FaceID/ip-adapter-faceid-plus_sd15.bin"

	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
	print(f'Using device: {device}')

	noise_scheduler = DDIMScheduler(num_train_timesteps=1000, beta_start=0.00085, beta_end=0.012,
	beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False, steps_offset=1)

	vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch_dtype)
	pipe = StableDiffusionPipeline.from_pretrained(
	base_model_path,
	torch_dtype=torch_dtype,
	scheduler=noise_scheduler,
	vae=vae,
	feature_extractor=None,
	safety_checker=None
	)

	ip_model = IPAdapterFaceIDPlus(pipe, image_encoder_path, ip_ckpt, device, num_tokens=4, torch_dtype=torch_dtype)
	return ip_model


	def generate_images(prompt, img_filepath, negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality, blurry",
	img_prompt_scale=0.5, num_inference_steps=30, seed=None, n_images=1):
	image = cv2.imread(img_filepath)
	faces = app.get(image)

	faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
	face_image = face_align.norm_crop(image, landmark=faces[0].kps, image_size=200)
	images = ip_model.generate(
	prompt=prompt, negative_prompt=negative_prompt, face_image=face_image, faceid_embeds=faceid_embeds,
	num_samples=n_images, width=512, height=512, num_inference_steps=num_inference_steps, seed=seed,
	scale=img_prompt_scale,
	)
	return [images[0], Image.fromarray(face_image[..., [2, 1, 0]])]

	if __name__ == "__main__":
	ip_model = get_ip_model()
	app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
	app.prepare(ctx_id=0, det_size=(512, 512), det_thresh=0.2)


	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# ✨ Image Prompt Adapter With FaceID 🧙‍♂️

	Unleash the magic of generating whimsical images with just an image and a sprinkle of text! Learn the secrets here: [Magic Link](https://huggingface.co/h94/IP-Adapter-FaceID)

	🚀 This enchanting demo is designed to soar on GPU. While it can still dance on CPU, conjuring just one image might take up to 600 seconds—compared to the blink-of-an-eye magic on GPU! ✨
	""")
	with gr.Row():
	with gr.Column():
	demo_inputs = []
	demo_inputs.append(gr.Textbox(label='text prompt', value='A bold rider in a white horse'))
	demo_inputs.append(gr.Image(type='filepath', label='image prompt'))
	with gr.Accordion(label='Advanced options', open=False):
	demo_inputs.append(gr.Textbox(label='negative text prompt',
	value="deformed hands, watermark, text, deformed fingers, blurred faces, irregular face, irrregular body shape, ugly eyes, deformed face, squint, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, poorly framed, extra limbs, disfigured, deformed, body out of frame, blurry, bad anatomy, blurred, watermark, grainy, signature, cut off, draft, ugly eyes, squint, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, poorly framed, extra limbs, disfigured, deformed, body out of frame, blurry, bad anatomy, blurred, watermark, grainy, signature, cut off, draft, disfigured, kitsch, ugly, oversaturated, grain, low-res, Deformed, blurry, bad anatomy, disfigured, poorly drawn face, mutation, mutated, extra limb, ugly, poorly drawn hands, missing limb, blurry, floating limbs, disconnected limbs, malformed hands, blur, out of focus, long neck, long body, ugly, disgusting, poorly drawn, childish, mutilated, mangled, old, surreal, 2 heads, 2 faces"))
	demo_inputs.append(gr.Slider(maximum=1, minimum=0, value=0.5, step=0.05, label='image prompt scale'))
	btn = gr.Button("Generate")

	with gr.Column():
	demo_outputs = []
	demo_outputs.append(gr.Image(label='generated image'))
	demo_outputs.append(gr.Image(label='detected face', height=200, width=200))
	btn.click(generate_images, inputs=demo_inputs, outputs=demo_outputs)
	sample_prompts = [
	'A wizard casting spells in a coffee shop',
	'A penguin teaching a yoga class',
	'A robot composing a symphony',
	'A giraffe participating in a slam poetry contest',
	'A bold rider in a white horse'
	]
	gr.Examples(sample_prompts, inputs=demo_inputs[0], label='Sample prompts')

	demo.launch(share=True, debug=True)