import gradio as gr
import torch
from PIL import Image
from diffusers import AutoPipelineForText2Image, DDIMScheduler
from transformers import CLIPVisionModelWithProjection
import numpy as np
import spaces  # Make sure to import spaces

# Initialize the pipeline without specifying the device; this will be handled by the @spaces.GPU decorator
pipeline = AutoPipelineForText2Image.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16
)

# Configure the scheduler for the pipeline
pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)

# Load IP adapter with specified weights and set the scale for each component
pipeline.load_ip_adapter(
    "h94/IP-Adapter",
    subfolder="sdxl_models",
    weight_name=[
        "ip-adapter-plus_sdxl_vit-h.safetensors",
        "ip-adapter-plus-face_sdxl_vit-h.safetensors"
    ]
)
pipeline.set_ip_adapter_scale([0.7, 0.5])

# Ensure the model and its components are moved to GPU
pipeline.to("cuda")

# Define the desired size
desired_size = (1024, 1024)

@spaces.GPU
def transform_image(face_image):
    generator = torch.Generator(device="cuda").manual_seed(0)

    # Process the input face image
    if isinstance(face_image, Image.Image):
        processed_face_image = face_image
    elif isinstance(face_image, np.ndarray):
        processed_face_image = Image.fromarray(face_image)
    else:
        raise ValueError("Unsupported image format")

    # Resize the face image
    processed_face_image = processed_face_image.resize(desired_size, Image.LANCZOS)

    # Load and resize the style image from the local path
    style_image_path = "examples/soyjak2.jpeg"
    style_image = Image.open(style_image_path).resize(desired_size, Image.LANCZOS)

    # Perform the transformation using the configured pipeline
    image = pipeline(
        prompt="soyjak",
        ip_adapter_image=[style_image, processed_face_image],
        negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality",
        num_inference_steps=30,
        generator=generator,
    ).images[0]

    # Move the pipeline back to CPU after processing to release GPU resources
    pipeline.to("cpu")
    return image

# Gradio interface setup
demo = gr.Interface(
    fn=transform_image,
    inputs=gr.Image(label="Upload your face image"),
    outputs=gr.Image(label="Your Soyjak"),
    title="InstaSoyjak - turn anyone into a Soyjak",
    description="All you need to do is upload an image. Please use responsibly. Please follow me on Twitter if you like this space: https://twitter.com/angrypenguinPNG. Idea from Yacine, please give him a follow: https://twitter.com/yacineMTB.",
)

demo.queue(max_size=20)
demo.launch()