import gradio as gr
import numpy as np
import torch
import base64
import os
from io import BytesIO
from PIL import Image
from pulid import attention_processor as attention
from pulid.pipeline import PuLIDPipeline
from pulid.utils import resize_numpy_image_long, seed_everything

torch.set_grad_enabled(False)

pipeline = PuLIDPipeline()

SECRET_TOKEN = os.getenv('SECRET_TOKEN', 'default_secret')

# other params
DEFAULT_NEGATIVE_PROMPT = (
    'flaws in the eyes, flaws in the face, flaws, lowres, non-HDRi, low quality, worst quality,'
    'artifacts noise, text, watermark, glitch, deformed, mutated, ugly, disfigured, hands, '
    'low resolution, partially rendered objects,  deformed or partially rendered eyes, '
    'deformed, deformed eyeballs, cross-eyed,blurry'
)

def decode_data_uri_to_image(data_uri):
    # parse the data uri
    header, encoded = data_uri.split(",", 1)
    data = base64.b64decode(encoded)
    img = Image.open(BytesIO(data))
    return img

def run(*args):
    secret_token = args[0]

    if secret_token != SECRET_TOKEN:
        raise gr.Error(
            f'Invalid secret token. Please fork the original space if you want to use it for yourself.')

    id_image_data_uri = args[1]
    supp_images_data_uris = args[2:5]
    prompt, neg_prompt, scale, seed, steps, H, W, id_scale, mode, id_mix = args[5:]

    pipeline.debug_img_list = []
    if mode == 'fidelity':
        attention.NUM_ZERO = 8
        attention.ORTHO = False
        attention.ORTHO_v2 = True
    elif mode == 'extremely style':
        attention.NUM_ZERO = 16
        attention.ORTHO = True
        attention.ORTHO_v2 = False
    else:
        raise ValueError("Invalid mode")

    id_image = decode_data_uri_to_image(id_image_data_uri) if id_image_data_uri else None
    if id_image is not None:
        id_image = np.array(id_image)
        id_image = resize_numpy_image_long(id_image, 1024)
        id_embeddings = pipeline.get_id_embedding(id_image)
        for supp_id_image_uri in supp_images_data_uris:
            if supp_id_image_uri:
                supp_id_image = decode_data_uri_to_image(supp_id_image_uri)
                supp_id_image = np.array(supp_id_image)
                supp_id_image = resize_numpy_image_long(supp_id_image, 1024)
                supp_id_embeddings = pipeline.get_id_embedding(supp_id_image)
                id_embeddings = torch.cat(
                    (id_embeddings, supp_id_embeddings if id_mix else supp_id_embeddings[:, :5]), dim=1
                )
    else:
        id_embeddings = None

    seed_everything(seed)
    img = pipeline.inference(prompt, (1, H, W), neg_prompt, id_embeddings, id_scale, scale, steps)[0]
    image = Image.fromarray(np.array(img))

    # Convert image to base64
    buffered = BytesIO()
    image.save(buffered, format="PNG")
    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")

    return img_str

with gr.Blocks(title="PuLID") as demo:
    with gr.Row():
        with gr.Column():
            gr.HTML("""
                <div style="z-index: 100; position: fixed; top: 0px; right: 0px; left: 0px; bottom: 0px; width: 100%; height: 100%; background: white; display: flex; align-items: center; justify-content: center; color: black;">
                <div style="text-align: center; color: black;">
                <p style="color: black;">This space is a headless component of the cloud rendering engine used by AiTube.</p>
                <p style="color: black;">It is not available for public use, but you can use the <a href="https://huggingface.co/spaces/yanze/PuLID" target="_blank">original space</a>.</p>
                </div>
                </div>""")
            token = gr.Textbox()
            face_image = gr.Textbox(label="ID image (main)", placeholder="Enter Data URI for the image")
            supp_image1 = gr.Textbox(label="Additional ID image (auxiliary)", placeholder="Enter Data URI for the image")
            supp_image2 = gr.Textbox(label="Additional ID image (auxiliary)", placeholder="Enter Data URI for the image")
            supp_image3 = gr.Textbox(label="Additional ID image (auxiliary)", placeholder="Enter Data URI for the image")
            prompt = gr.Textbox(label="Prompt", value='portrait,cinematic,wolf ears,white hair')
            submit = gr.Button("Generate")
            neg_prompt = gr.Textbox(label="Negative Prompt", value=DEFAULT_NEGATIVE_PROMPT)
            scale = gr.Slider(
                label="CFG, recommend value range [1, 1.5], 1 will be faster ",
                value=1.2,
                minimum=1,
                maximum=1.5,
                step=0.1,
            )
            seed = gr.Slider(
                label="Seed", value=42, minimum=np.iinfo(np.uint32).min, maximum=np.iinfo(np.uint32).max, step=1
            )
            steps = gr.Slider(label="Steps", value=4, minimum=1, maximum=100, step=1)
            with gr.Row():
                H = gr.Slider(label="Height", value=1024, minimum=512, maximum=1280, step=64)
                W = gr.Slider(label="Width", value=768, minimum=512, maximum=1280, step=64)
            with gr.Row():
                id_scale = gr.Slider(label="ID scale", minimum=0, maximum=5, step=0.05, value=0.8, interactive=True)
                mode = gr.Dropdown(label="mode", choices=['fidelity', 'extremely style'], value='fidelity')
                id_mix = gr.Checkbox(
                    label="ID Mix (if you want to mix two ID image, please turn this on, otherwise, turn this off)",
                    value=False,
                )

        output_b64 = gr.Textbox()
 
    inps = [
        token,
        face_image,
        supp_image1,
        supp_image2,
        supp_image3,
        prompt,
        neg_prompt,
        scale,
        seed,
        steps,
        H,
        W,
        id_scale,
        mode,
        id_mix,
    ]
    submit.click(fn=run, inputs=inps, outputs=output_b64, api_name="run")

demo.queue(max_size=30).launch()