Ip-Adapter-FaceID

Running on Zero

File size: 8,893 Bytes

8d02f67
85330fa
aa14900
 
02269e1
85330fa
 
 
 
aa14900
 
 
 
 
5dc51ba
 
cf9fa34
aa14900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf9fa34
aa14900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf9fa34
aa14900
 
 
 
 
 
 
 
 
 
 
 
 
 
cf9fa34
aa14900
 
 
 
cf9fa34
aa14900
 
 
 
 
 
85330fa
 
aa14900
85330fa
 
 
 
 
 
 
 
 
02269e1
aa14900
 
 
 
5dc51ba
 
aa14900
 
a60d15c
aa14900
 
 
 
85330fa
aa14900
 
 
85330fa
aa14900
 
 
 
 
85330fa
aa14900
 
 
 
5dc51ba
85330fa
 
 
 
 
 
aa14900
85330fa
cf9fa34
 
 
 
aa14900
cf9fa34
aa14900
 
 
 
85330fa
 
 
 
 
 
5dc51ba
85330fa
 
 
5dc51ba
85330fa
aa14900
 
85330fa
 
aa14900
cf9fa34
aa14900
 
85330fa
aa14900
 
85330fa
aa14900
 
 
7f4b639
 
 
 
aa14900
85330fa
 
aa14900
85330fa
 
aa14900
 
02269e1

import torch
import spaces
from diffusers import DDIMScheduler, StableDiffusionXLPipeline
import ipown
from huggingface_hub import hf_hub_download
from insightface.app import FaceAnalysis
import gradio as gr
import cv2

# List of models for switching
model_options = {
    "CyberRealistic": "John6666/cyberrealistic-pony-v61-sdxl",
    "StallionDreams": "John6666/stallion-dreams-pony-realistic-v1-sdxl",
    "PonyRealism": "John6666/pony-realism-v21main-sdxl"
}

# Full style list for applying styles to the prompt
style_list = [
    {
        "name": "(No style)",
        "prompt": "{prompt}",
        "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
    },
    {
        "name": "Cinematic",
        "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
        "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured",
    },
    {
        "name": "3D Model",
        "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
        "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting",
    },
    {
        "name": "Anime",
        "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime,  highly detailed",
        "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast",
    },
    {
        "name": "Digital Art",
        "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed",
        "negative_prompt": "photo, photorealistic, realism, ugly",
    },
    {
        "name": "Photographic",
        "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed",
        "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly",
    },
    {
        "name": "Pixel art",
        "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics",
        "negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic",
    },
    {
        "name": "Fantasy art",
        "prompt": "ethereal fantasy concept art of  {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy",
        "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white",
    },
    {
        "name": "Neonpunk",
        "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional",
        "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured",
    },
    {
        "name": "Manga",
        "prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style",
        "negative_prompt": "ugly, deformed, noisy, blurry, low contrast, realism, photorealistic, Western comic style",
    },
]

# Styles dictionary to map style names to prompts and negative prompts
styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
STYLE_NAMES = list(styles.keys())
DEFAULT_STYLE_NAME = "(No style)"

# Function to apply the selected style
def apply_style(style_name: str, positive: str, negative: str = "") -> tuple[str, str]:
    p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
    return p.replace("{prompt}", positive), n + negative

# Download the necessary model component
ip_ckpt = hf_hub_download(repo_id="h94/IP-Adapter-FaceID", filename="ip-adapter-faceid_sdxl.bin", repo_type="model")
device = "cuda"

# Configure the noise scheduler
noise_scheduler = DDIMScheduler(
    num_train_timesteps=1000,
    beta_start=0.00085,
    beta_end=0.012,
    beta_schedule="scaled_linear",
    clip_sample=False,
    set_alpha_to_one=False,
    steps_offset=1,
)

# Function to initialize the pipeline with a selected model
def get_pipeline(model_path):
    return StableDiffusionXLPipeline.from_pretrained(
        model_path,
        torch_dtype=torch.float16,
        scheduler=noise_scheduler,
        use_safetensors=True,
    )

# Initialize with a default model
current_model = model_options["PonyRealism"]
pipe = get_pipeline(current_model)
ip_model = ipown.IPAdapterFaceIDXL(pipe, ip_ckpt, device)

@spaces.GPU()
def generate_image(images, model_choice, style_name, prompt, negative_prompt, face_strength, likeness_strength, num_inference_steps, guidance_scale, width, height):
    global current_model, pipe, ip_model

    # Update the model if the choice has changed
    if model_options[model_choice] != current_model:
        current_model = model_options[model_choice]
        pipe = get_pipeline(current_model)
        ip_model = ipown.IPAdapterFaceIDXL(pipe, ip_ckpt, device)

    torch.cuda.empty_cache()

    app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
    app.prepare(ctx_id=0, det_size=(512, 512))

    faceid_all_embeds = []
    for image in images:
        face = cv2.imread(image)
        faces = app.get(face)
        faceid_embed = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
        faceid_all_embeds.append(faceid_embed)

    average_embedding = torch.mean(torch.stack(faceid_all_embeds, dim=0), dim=0)
    
    # Apply the selected style
    styled_prompt, styled_negative_prompt = apply_style(style_name, prompt, negative_prompt)
    
    image = ip_model.generate(
        prompt=styled_prompt, negative_prompt=styled_negative_prompt, faceid_embeds=average_embedding,
        scale=likeness_strength, width=width, height=height, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps
    )

    return image

def swap_to_gallery(images):
    return gr.update(value=images, visible=True), gr.update(visible=True), gr.update(visible=False)

def remove_back_to_files():
    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)

css = '''
h1{margin-bottom: 0 !important}
'''

with gr.Blocks(css=css) as demo:
    gr.Markdown("# IP-Adapter-FaceID SDXL demo")
    gr.Markdown("A simple Demo for the [h94/IP-Adapter-FaceID SDXL model](https://huggingface.co/h94/IP-Adapter-FaceID).")
    with gr.Row():
        with gr.Column():
            model_dropdown = gr.Dropdown(label="Select Model", choices=list(model_options.keys()), value="PonyRealism")
            style_dropdown = gr.Dropdown(label="Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME)
            files = gr.Files(label="Drag 1 or more photos of your face", file_types=["image"])
            uploaded_files = gr.Gallery(label="Your images", visible=False, columns=5, rows=1, height=250)
            with gr.Column(visible=False) as clear_button:
                remove_and_reupload = gr.ClearButton(value="Remove files and upload new ones", components=files, size="sm")
            prompt = gr.Textbox(label="Prompt", placeholder="A photo of a man/woman/person ...")
            negative_prompt = gr.Textbox(label="Negative Prompt", placeholder="low quality")
            face_strength = gr.Slider(label="Face Strength", value=7.5, step=0.1, minimum=0, maximum=15)
            likeness_strength = gr.Slider(label="Likeness Strength", value=1.0, step=0.1, minimum=0, maximum=5)
            with gr.Accordion("Advanced Options", open=False):
                num_inference_steps = gr.Slider(label="Number of Inference Steps", value=30, step=1, minimum=10, maximum=100)
                guidance_scale = gr.Slider(label="Guidance Scale", value=7.5, step=0.1, minimum=1, maximum=20)
                width = gr.Slider(label="Width", value=512, step=64, minimum=256, maximum=1024)
                height = gr.Slider(label="Height", value=512, step=64, minimum=256, maximum=1024)
            submit = gr.Button("Submit", variant="primary")
        with gr.Column():
            gallery = gr.Gallery(label="Generated Images")
        
        files.upload(fn=swap_to_gallery, inputs=files, outputs=[uploaded_files, clear_button, files])
        remove_and_reupload.click(fn=remove_back_to_files, outputs=[uploaded_files, clear_button, files])
        submit.click(fn=generate_image, inputs=[files, model_dropdown, style_dropdown, prompt, negative_prompt, face_strength, likeness_strength, num_inference_steps, guidance_scale, width, height], outputs=gallery)

demo.launch()