#!/usr/bin/env python

import cv2
import numpy as np
import torch
import random
import base64
import json
import threading
import uuid
import math

import io
from PIL import Image

from diffusers import (
    AutoencoderKL,
    StableDiffusionControlNetPipeline,
    ControlNetModel,
    UniPCMultistepScheduler,
    StableDiffusionControlNetImg2ImgPipeline,
    StableDiffusionXLControlNetPipeline,
    DiffusionPipeline,
)
from diffusers.utils import load_image
from transformers import pipeline

import gradio as gr

vae = AutoencoderKL.from_pretrained(
    "stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16
)


canny_controlnet = ControlNetModel.from_pretrained(
    "lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16
)
canny_pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "SG161222/Realistic_Vision_V3.0_VAE",
    controlnet=canny_controlnet,
    torch_dtype=torch.float16,
    use_safetensors=True,
)

canny_controlnet_tile = ControlNetModel.from_pretrained(
    "lllyasviel/control_v11f1e_sd15_tile", torch_dtype=torch.float16
)
canny_pipe_img2img = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
    "SG161222/Realistic_Vision_V3.0_VAE",
    controlnet=canny_controlnet_tile,
    torch_dtype=torch.float16,
    use_safetensors=True,
)
canny_pipe_img2img.enable_model_cpu_offload()
canny_pipe_img2img.enable_xformers_memory_efficient_attention()


canny_pipe.scheduler = UniPCMultistepScheduler.from_config(canny_pipe.scheduler.config)
canny_pipe.enable_model_cpu_offload()
canny_pipe.enable_xformers_memory_efficient_attention()

controlnet_xl = ControlNetModel.from_pretrained(
    "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16
)
vae_xl = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
)
pipe_xl = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet_xl,
    vae=vae_xl,
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16",
)
pipe_xl.scheduler = UniPCMultistepScheduler.from_config(pipe_xl.scheduler.config)
pipe_xl.enable_xformers_memory_efficient_attention()
pipe_xl.enable_model_cpu_offload()

refiner = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-refiner-1.0",
    text_encoder_2=pipe_xl.text_encoder_2,
    vae=pipe_xl.vae,
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16",
)
refiner.enable_xformers_memory_efficient_attention()
refiner.enable_model_cpu_offload()


def resize_image_output(im, width, height):
    im = np.array(im)
    newSize = (width, height)
    img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
    img = Image.fromarray(img)
    return img


def resize_image(im, max_size=590000):
    [x, y, z] = im.shape
    new_size = [0, 0]

    min_size = 262144
    if x * y > max_size:
        scale_ratio = math.sqrt((x * y) / max_size)
        new_size[0] = int(x / scale_ratio)
        new_size[1] = int(y / scale_ratio)
    elif x * y <= min_size:
        scale_ratio = math.sqrt((x * y) / min_size)
        new_size[0] = int(x / scale_ratio)
        new_size[1] = int(y / scale_ratio)
    else:
        new_size[0] = int(x)
        new_size[1] = int(y)

    height = (new_size[0] // 8) * 8
    width = (new_size[1] // 8) * 8

    newSize = (width, height)
    img = cv2.resize(im, newSize, interpolation=cv2.INTER_CUBIC)
    return img


def process_canny_tile(
    input_image,
    control_image,
    x,
    y,
    prompt,
    a_prompt,
    n_prompt,
    num_samples,
    image_resolution,
    ddim_steps,
    guess_mode,
    strength_conditioning,
    scale,
    seed,
    eta,
    low_threshold,
    high_threshold,
):

    image = input_image

    return canny_pipe_img2img(
        prompt="",
        image=image,
        control_image=image,
        num_inference_steps=20,
        guidance_scale=4,
        strength=0.3,
        guess_mode=True,
        negative_prompt=n_prompt,
        num_images_per_prompt=1,
        eta=eta,
        generator=torch.Generator(device="cpu").manual_seed(seed),
    )


def process_canny(
    input_image,
    x,
    y,
    prompt,
    a_prompt,
    n_prompt,
    num_samples,
    image_resolution,
    ddim_steps,
    guess_mode,
    strength,
    scale,
    seed,
    eta,
    low_threshold,
    high_threshold,
):

    image = input_image

    return canny_pipe(
        prompt=",".join([prompt, a_prompt]),
        image=image,
        height=x,
        width=y,
        num_inference_steps=ddim_steps,
        guidance_scale=scale,
        negative_prompt=n_prompt,
        num_images_per_prompt=num_samples,
        eta=eta,
        controlnet_conditioning_scale=strength,
        generator=torch.Generator(device="cpu").manual_seed(seed),
    )


def process_canny_sdxl(
    input_image,
    x,
    y,
    prompt,
    a_prompt,
    n_prompt,
    num_samples,
    image_resolution,
    ddim_steps,
    guess_mode,
    strength,
    scale,
    seed,
    eta,
    low_threshold,
    high_threshold,
):

    image = input_image

    image = pipe_xl(
        prompt=",".join([prompt, a_prompt]),
        image=image,
        height=x,
        width=y,
        num_inference_steps=ddim_steps,
        guidance_scale=scale,
        negative_prompt=n_prompt,
        num_images_per_prompt=num_samples,
        eta=eta,
        controlnet_conditioning_scale=strength,
        generator=torch.Generator(device="cpu").manual_seed(seed),
        output_type="latent",
    ).images

    return refiner(
        prompt=prompt,
        num_inference_steps=ddim_steps,
        num_images_per_prompt=num_samples,
        denoising_start=0.8,
        image=image,
    )


def process(
    image,
    prompt,
    a_prompt,
    n_prompt,
    ddim_steps,
    strength,
    scale,
    seed,
    eta,
    low_threshold,
    high_threshold,
):
    image = load_image(image)
    image = np.array(image)
    [x_orig, y_orig, z_orig] = image.shape
    image = resize_image(image)
    [x, y, z] = image.shape

    image = cv2.Canny(image, low_threshold, high_threshold)
    image = image[:, :, None]
    image = np.concatenate([image, image, image], axis=2)
    image = Image.fromarray(image)

    result = process_canny(
        image,
        x,
        y,
        prompt,
        a_prompt,
        n_prompt,
        1,
        None,
        ddim_steps,
        False,
        float(strength),
        scale,
        seed,
        eta,
        low_threshold,
        high_threshold,
    )

    im = result.images[0]
    im = resize_image_output(im, y_orig, x_orig)
    highres = False
    if highres:
        result_upscaled = process_canny_tile(
            im,
            im,
            x_orig,
            y_orig,
            prompt,
            a_prompt,
            n_prompt,
            num_samples,
            None,
            ddim_steps,
            False,
            strength,
            scale,
            seed,
            eta,
            low_threshold,
            high_threshold,
        )
        im = result_upscaled.images[0]

    return im


demo = gr.Blocks().queue()

with demo:
    with gr.Row():
        gr.Markdown("## Control Stable Diffusion with Canny Edge Maps")

    with gr.Row():
        with gr.Column():
            input_image = gr.Image(type="pil", label="Input Image")
            input_prompt = gr.Textbox()
            run_button = gr.Button(label="Run")

            with gr.Accordion("Advanced Options", open=False):
                strength = gr.Slider(
                    label="Control Strength",
                    minimum=0.0,
                    maximum=2.0,
                    value=1.0,
                    step=0.01,
                )
                low_threshold = gr.Slider(
                    label="Canny low threshold",
                    minimum=1,
                    maximum=255,
                    value=100,
                    step=1,
                )
                high_threshold = gr.Slider(
                    label="Canny high threshold",
                    minimum=1,
                    maximum=255,
                    value=200,
                    step=1,
                )
                ddim_steps = gr.Slider(
                    label="Steps", minimum=1, maximum=100, value=20, step=1
                )
                scale = gr.Slider(
                    label="Guidance Scale",
                    minimum=0.1,
                    maximum=30.0,
                    value=7.5,
                    step=0.1,
                )  # default value was 9.0
                seed = gr.Slider(
                    label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True
                )
                eta = gr.Number(label="eta (DDIM)", value=0.0)
                a_prompt = gr.Textbox(
                    label="Added Prompt", value="best quality, extremely detailed"
                )
                n_prompt = gr.Textbox(
                    label="Negative Prompt",
                    value="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
                )

        with gr.Column():
            result = gr.Image(label="Output", type="numpy")

    ips = [
        input_image,
        input_prompt,
        a_prompt,
        n_prompt,
        ddim_steps,
        strength,
        scale,
        seed,
        eta,
        low_threshold,
        high_threshold,
    ]
    run_button.click(fn=process, inputs=ips, outputs=[result])


demo.launch()