Spaces:

waveydaveygravy
/

IP-Adapter-Face-ID-Plus-Controlnet

Runtime error

App Files Files Community

IP-Adapter-Face-ID-Plus-Controlnet / app.py

waveydaveygravy

Create app.py

8d3fbf3 verified 10 months ago

raw

history blame contribute delete

5.29 kB

	import cv2
	from insightface.app import FaceAnalysis
	from insightface.utils import face_align
	import torch
	import os
	from datetime import datetime
	import torch
	import gradio as gr
	from diffusers import (
	StableDiffusionPipeline,
	DDIMScheduler,
	AutoencoderKL,
	StableDiffusionControlNetPipeline,
	ControlNetModel,
	)
	from PIL import Image
	from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus
	from diffusers.utils import load_image
	import numpy as np

	# date_time = now.strftime("%Y-%m-%d_%H-%M-%S")


	def generate_image(
	prompt,
	negative_prompt,
	depth_map_dir,
	face_reference_image,
	s_scale,
	num_inference_steps,
	v2,
	):
	# Get the current date and time
	now = datetime.now()
	date_time = now.strftime("%Y-%m-%d_%H-%M-%S")

	# Create the output directory if it doesn't exist
	output_dir = "/content/output"
	os.makedirs(output_dir, exist_ok=True)
	# depth_map_dir = "" # or whichever you have the depthmap images in

	app = FaceAnalysis(
	name="buffalo_l", providers=["CUDAExecutionProvider", "CPUExecutionProvider"]
	)
	app.prepare(ctx_id=0, det_size=(640, 640))
	face_reference_image = face_reference_image # the face reference image
	face_reference_image_np = np.array(face_reference_image)
	faces = app.get(face_reference_image_np)
	faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
	face_image = face_align.norm_crop(
	face_reference_image_np, landmark=faces[0].kps, image_size=224
	) # you can also segment the face

	base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
	vae_model_path = "stabilityai/sd-vae-ft-mse"
	image_encoder_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
	ip_ckpt = (
	"/content/ip-adapter-faceid-plus_sd15.bin"
	if not v2
	else "ip-adapter-faceid-plusv2_sd15.bin"
	)
	device = "cuda"

	# Control net test
	controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth"
	controlnet = ControlNetModel.from_pretrained(
	controlnet_model_path, torch_dtype=torch.float16
	)

	noise_scheduler = DDIMScheduler(
	num_train_timesteps=1000,
	beta_start=0.00085,
	beta_end=0.012,
	beta_schedule="scaled_linear",
	clip_sample=False,
	set_alpha_to_one=False,
	steps_offset=1,
	)

	vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)

	pipe = StableDiffusionControlNetPipeline.from_pretrained(
	base_model_path,
	torch_dtype=torch.float16,
	controlnet=controlnet,
	scheduler=noise_scheduler,
	vae=vae,
	feature_extractor=None,
	safety_checker=None,
	)

	# load ip-adapter
	ip_model = IPAdapterFaceIDPlus(pipe, image_encoder_path, ip_ckpt, device)

	depth_map_files = [
	f for f in os.listdir(depth_map_dir) if f.endswith((".jpg", ".png"))
	]
	images = []

	for idx, filename in enumerate(depth_map_files):
	depth_map_path = os.path.join(depth_map_dir, filename)
	depth_map = load_image(depth_map_path)

	image = ip_model.generate(
	prompt=prompt,
	negative_prompt=negative_prompt,
	image=depth_map,
	face_image=face_image,
	faceid_embeds=faceid_embeds,
	shortcut=v2,
	s_scale=s_scale,
	num_samples=1, # Generate one image per depth map
	width=512,
	height=512,
	num_inference_steps=num_inference_steps,
	seed=2023,
	)[0]

	# Save the image with the prompt name, date/time, and depth map index
	image_name = f"{prompt.replace(' ', '_')}_{date_time}_{idx}_0.png"
	image_path = os.path.join(output_dir, image_name)
	image.save(image_path)
	images.append(image)

	torch.cuda.empty_cache()
	return images


	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	prompt = gr.Textbox(label="Prompt")
	negative_prompt = gr.Textbox(label="Negative Prompt")
	depth_map_dir = gr.Textbox(label="Depth Map Directory")
	face_reference_image = gr.Image(label="Face Reference Image", type="pil")
	# s_scale = gr.Slider(label="Face Structure strength", value=0.6, step=0.1, minimum=0, maximum=3)
	# num_inference_steps = gr.Slider(label="steps", value=10, step=1, minimum=1, maximum=50)
	v2 = gr.Checkbox(label="Use v2 Adapter", value=False)

	with gr.Column():
	s_scale = gr.Slider(
	label="Face Structure strength",
	value=0.6,
	step=0.1,
	minimum=0,
	maximum=3,
	)
	num_inference_steps = gr.Slider(
	label="steps", value=10, step=1, minimum=1, maximum=50
	)
	gallery = gr.Gallery(label="Generated Images")

	generate_btn = gr.Button("Generate Images")
	generate_btn.click(
	fn=generate_image,
	inputs=[
	prompt,
	negative_prompt,
	depth_map_dir,
	face_reference_image,
	s_scale,
	num_inference_steps,
	v2,
	],
	outputs=gallery,
	)

	demo.launch(share=True, debug=True)