Spaces:
Runtime error
Runtime error
import cv2 | |
from insightface.app import FaceAnalysis | |
from insightface.utils import face_align | |
import torch | |
import os | |
from datetime import datetime | |
import torch | |
import gradio as gr | |
from diffusers import ( | |
StableDiffusionPipeline, | |
DDIMScheduler, | |
AutoencoderKL, | |
StableDiffusionControlNetPipeline, | |
ControlNetModel, | |
) | |
from PIL import Image | |
from ip_adapter.ip_adapter_faceid import IPAdapterFaceIDPlus | |
from diffusers.utils import load_image | |
import numpy as np | |
# date_time = now.strftime("%Y-%m-%d_%H-%M-%S") | |
def generate_image( | |
prompt, | |
negative_prompt, | |
depth_map_dir, | |
face_reference_image, | |
s_scale, | |
num_inference_steps, | |
v2, | |
): | |
# Get the current date and time | |
now = datetime.now() | |
date_time = now.strftime("%Y-%m-%d_%H-%M-%S") | |
# Create the output directory if it doesn't exist | |
output_dir = "/content/output" | |
os.makedirs(output_dir, exist_ok=True) | |
# depth_map_dir = "" # or whichever you have the depthmap images in | |
app = FaceAnalysis( | |
name="buffalo_l", providers=["CUDAExecutionProvider", "CPUExecutionProvider"] | |
) | |
app.prepare(ctx_id=0, det_size=(640, 640)) | |
face_reference_image = face_reference_image # the face reference image | |
face_reference_image_np = np.array(face_reference_image) | |
faces = app.get(face_reference_image_np) | |
faceid_embeds = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0) | |
face_image = face_align.norm_crop( | |
face_reference_image_np, landmark=faces[0].kps, image_size=224 | |
) # you can also segment the face | |
base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE" | |
vae_model_path = "stabilityai/sd-vae-ft-mse" | |
image_encoder_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K" | |
ip_ckpt = ( | |
"/content/ip-adapter-faceid-plus_sd15.bin" | |
if not v2 | |
else "ip-adapter-faceid-plusv2_sd15.bin" | |
) | |
device = "cuda" | |
# Control net test | |
controlnet_model_path = "lllyasviel/control_v11f1p_sd15_depth" | |
controlnet = ControlNetModel.from_pretrained( | |
controlnet_model_path, torch_dtype=torch.float16 | |
) | |
noise_scheduler = DDIMScheduler( | |
num_train_timesteps=1000, | |
beta_start=0.00085, | |
beta_end=0.012, | |
beta_schedule="scaled_linear", | |
clip_sample=False, | |
set_alpha_to_one=False, | |
steps_offset=1, | |
) | |
vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16) | |
pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
base_model_path, | |
torch_dtype=torch.float16, | |
controlnet=controlnet, | |
scheduler=noise_scheduler, | |
vae=vae, | |
feature_extractor=None, | |
safety_checker=None, | |
) | |
# load ip-adapter | |
ip_model = IPAdapterFaceIDPlus(pipe, image_encoder_path, ip_ckpt, device) | |
depth_map_files = [ | |
f for f in os.listdir(depth_map_dir) if f.endswith((".jpg", ".png")) | |
] | |
images = [] | |
for idx, filename in enumerate(depth_map_files): | |
depth_map_path = os.path.join(depth_map_dir, filename) | |
depth_map = load_image(depth_map_path) | |
image = ip_model.generate( | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
image=depth_map, | |
face_image=face_image, | |
faceid_embeds=faceid_embeds, | |
shortcut=v2, | |
s_scale=s_scale, | |
num_samples=1, # Generate one image per depth map | |
width=512, | |
height=512, | |
num_inference_steps=num_inference_steps, | |
seed=2023, | |
)[0] | |
# Save the image with the prompt name, date/time, and depth map index | |
image_name = f"{prompt.replace(' ', '_')}_{date_time}_{idx}_0.png" | |
image_path = os.path.join(output_dir, image_name) | |
image.save(image_path) | |
images.append(image) | |
torch.cuda.empty_cache() | |
return images | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(): | |
prompt = gr.Textbox(label="Prompt") | |
negative_prompt = gr.Textbox(label="Negative Prompt") | |
depth_map_dir = gr.Textbox(label="Depth Map Directory") | |
face_reference_image = gr.Image(label="Face Reference Image", type="pil") | |
# s_scale = gr.Slider(label="Face Structure strength", value=0.6, step=0.1, minimum=0, maximum=3) | |
# num_inference_steps = gr.Slider(label="steps", value=10, step=1, minimum=1, maximum=50) | |
v2 = gr.Checkbox(label="Use v2 Adapter", value=False) | |
with gr.Column(): | |
s_scale = gr.Slider( | |
label="Face Structure strength", | |
value=0.6, | |
step=0.1, | |
minimum=0, | |
maximum=3, | |
) | |
num_inference_steps = gr.Slider( | |
label="steps", value=10, step=1, minimum=1, maximum=50 | |
) | |
gallery = gr.Gallery(label="Generated Images") | |
generate_btn = gr.Button("Generate Images") | |
generate_btn.click( | |
fn=generate_image, | |
inputs=[ | |
prompt, | |
negative_prompt, | |
depth_map_dir, | |
face_reference_image, | |
s_scale, | |
num_inference_steps, | |
v2, | |
], | |
outputs=gallery, | |
) | |
demo.launch(share=True, debug=True) | |