import gradio as gr
import torch
from PIL import Image
from diffusers import AutoPipelineForText2Image, DDIMScheduler
import numpy as np
from torchvision import transforms
import spaces  # Make sure to import spaces

# Initialize the pipeline
pipeline = AutoPipelineForText2Image.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16
)

# Configure the scheduler for the pipeline
pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)

# Load IP adapter with specified weights and set the scale for each component
pipeline.load_ip_adapter(
    "h94/IP-Adapter",
    subfolder="sdxl_models",
    weight_name=[
        "ip-adapter-plus_sdxl_vit-h.safetensors",
        "ip-adapter-plus-face_sdxl_vit-h.safetensors"
    ]
)
pipeline.set_ip_adapter_scale([0.7, 0.5])

# Define the desired size for the images
desired_size = (1024, 1024)

@spaces.GPU
def transform_image(face_image):
    # Move the pipeline to the GPU inside the function
    pipeline.to("cuda")
    generator = torch.Generator(device="cuda").manual_seed(0)

    # Process the input face image
    if isinstance(face_image, Image.Image):
        processed_face_image = face_image
    elif isinstance(face_image, np.ndarray):
        processed_face_image = Image.fromarray(face_image)
    else:
        raise ValueError("Unsupported image format")

    # Resize the face image and convert to tensor
    processed_face_image = processed_face_image.resize(desired_size, Image.LANCZOS)
    processed_face_tensor = transforms.ToTensor()(processed_face_image).unsqueeze(0).to("cuda")

    # Load the style image from the local path, resize it and convert to tensor
    style_image_path = "examples/soyjak2.jpeg"  # Ensure this path is correct
    style_image = Image.open(style_image_path).resize(desired_size, Image.LANCZOS)
    style_image_tensor = transforms.ToTensor()(style_image).unsqueeze(0).to("cuda")

    # Perform the transformation using the configured pipeline
    image = pipeline(
        prompt="soyjak",
        ip_adapter_image=[style_image_tensor, processed_face_tensor],  # Ensure these are tensors
        negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality",
        num_inference_steps=30,
        generator=generator,
    ).images[0]

    # Convert the tensor to a PIL Image to display it in Gradio
    image = transforms.ToPILImage()(image.squeeze(0))
    
    # Move the pipeline back to CPU after processing to release GPU resources
    pipeline.to("cpu")
    return image

# Gradio interface setup
demo = gr.Interface(
    fn=transform_image,
    inputs=gr.Image(label="Upload your face image"),
    outputs=gr.Image(label="Your Soyjak"),
    title="InstaSoyjak - turn anyone into a Soyjak",
    description="All you need to do is upload an image. Please use responsibly.",
)

demo.queue(max_size=20)
demo.launch()