import gradio as gr import numpy as np import torch import base64 import os from io import BytesIO from PIL import Image from pulid import attention_processor as attention from pulid.pipeline import PuLIDPipeline from pulid.utils import resize_numpy_image_long, seed_everything torch.set_grad_enabled(False) pipeline = PuLIDPipeline() SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret') # other params DEFAULT_NEGATIVE_PROMPT = ( 'flaws in the eyes, flaws in the face, flaws, lowres, non-HDRi, low quality, worst quality,' 'artifacts noise, text, watermark, glitch, deformed, mutated, ugly, disfigured, hands, ' 'low resolution, partially rendered objects, deformed or partially rendered eyes, ' 'deformed, deformed eyeballs, cross-eyed,blurry' ) def decode_data_uri_to_image(data_uri): # parse the data uri header, encoded = data_uri.split(",", 1) data = base64.b64decode(encoded) img = Image.open(BytesIO(data)) return img def run(*args): secret_token = args[0] if secret_token != SECRET_TOKEN: raise gr.Error( f'Invalid secret token. Please fork the original space if you want to use it for yourself.') id_image_data_uri = args[1] supp_images_data_uris = args[2:5] prompt, neg_prompt, scale, seed, steps, H, W, id_scale, mode, id_mix = args[5:] pipeline.debug_img_list = [] if mode == 'fidelity': attention.NUM_ZERO = 8 attention.ORTHO = False attention.ORTHO_v2 = True elif mode == 'extremely style': attention.NUM_ZERO = 16 attention.ORTHO = True attention.ORTHO_v2 = False else: raise ValueError("Invalid mode") id_image = decode_data_uri_to_image(id_image_data_uri) if id_image_data_uri else None if id_image is not None: id_image = np.array(id_image) id_image = resize_numpy_image_long(id_image, 1024) id_embeddings = pipeline.get_id_embedding(id_image) for supp_id_image_uri in supp_images_data_uris: if supp_id_image_uri: supp_id_image = decode_data_uri_to_image(supp_id_image_uri) supp_id_image = np.array(supp_id_image) supp_id_image = resize_numpy_image_long(supp_id_image, 1024) supp_id_embeddings = pipeline.get_id_embedding(supp_id_image) id_embeddings = torch.cat( (id_embeddings, supp_id_embeddings if id_mix else supp_id_embeddings[:, :5]), dim=1 ) else: id_embeddings = None seed_everything(seed) img = pipeline.inference(prompt, (1, H, W), neg_prompt, id_embeddings, id_scale, scale, steps)[0] image = Image.fromarray(np.array(img)) # Convert image to base64 buffered = BytesIO() image.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") return img_str with gr.Blocks(title="PuLID") as demo: with gr.Row(): with gr.Column(): gr.HTML("""

This space is a headless component of the cloud rendering engine used by AiTube.

It is not available for public use, but you can use the original space.

""") token = gr.Textbox() face_image = gr.Textbox(label="ID image (main)", placeholder="Enter Data URI for the image") supp_image1 = gr.Textbox(label="Additional ID image (auxiliary)", placeholder="Enter Data URI for the image") supp_image2 = gr.Textbox(label="Additional ID image (auxiliary)", placeholder="Enter Data URI for the image") supp_image3 = gr.Textbox(label="Additional ID image (auxiliary)", placeholder="Enter Data URI for the image") prompt = gr.Textbox(label="Prompt", value='portrait,cinematic,wolf ears,white hair') submit = gr.Button("Generate") neg_prompt = gr.Textbox(label="Negative Prompt", value=DEFAULT_NEGATIVE_PROMPT) scale = gr.Slider( label="CFG, recommend value range [1, 1.5], 1 will be faster ", value=1.2, minimum=1, maximum=1.5, step=0.1, ) seed = gr.Slider( label="Seed", value=42, minimum=np.iinfo(np.uint32).min, maximum=np.iinfo(np.uint32).max, step=1 ) steps = gr.Slider(label="Steps", value=4, minimum=1, maximum=100, step=1) with gr.Row(): H = gr.Slider(label="Height", value=1024, minimum=512, maximum=1280, step=64) W = gr.Slider(label="Width", value=768, minimum=512, maximum=1280, step=64) with gr.Row(): id_scale = gr.Slider(label="ID scale", minimum=0, maximum=5, step=0.05, value=0.8, interactive=True) mode = gr.Dropdown(label="mode", choices=['fidelity', 'extremely style'], value='fidelity') id_mix = gr.Checkbox( label="ID Mix (if you want to mix two ID image, please turn this on, otherwise, turn this off)", value=False, ) output_b64 = gr.Textbox() inps = [ token, face_image, supp_image1, supp_image2, supp_image3, prompt, neg_prompt, scale, seed, steps, H, W, id_scale, mode, id_mix, ] submit.click(fn=run, inputs=inps, outputs=output_b64, api_name="run") demo.queue(max_size=30).launch()