import spaces
import gradio as gr
import time
import torch
import gc
import tempfile
import numpy as np
import cv2

from diffusers import LTXPipeline
from diffusers.utils import export_to_video


device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = LTXPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16)
pipe.to(device)

def create_demo() -> gr.Blocks:

    @spaces.GPU(duration=60)
    def text_to_video(
        prompt: str,
        negative_prompt: str,
        width: int = 768,
        height: int = 512,
        num_frames: int = 121,
        frame_rate: int = 25,
        num_inference_steps: int = 30,
        seed: int = 8,
        progress: gr.Progress = gr.Progress(),
    ):
        generator = torch.Generator(device=device).manual_seed(seed)
        run_task_time = 0
        time_cost_str = ''
        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
        try:
            with torch.no_grad():
                video = pipe(
                    prompt=prompt,
                    negative_prompt=negative_prompt,
                    generator=generator,
                    width=width,
                    height=height,
                    num_frames=num_frames,
                    num_inference_steps=num_inference_steps,
                ).frames[0]
        finally:
            torch.cuda.empty_cache()
            gc.collect()
        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)

        output_path = tempfile.mktemp(suffix=".mp4")
        export_to_video(video, output_path, fps=frame_rate)
        
        del video
        torch.cuda.empty_cache()
        return output_path, time_cost_str

    def get_time_cost(run_task_time, time_cost_str):
        now_time = int(time.time()*1000)
        if run_task_time == 0:
            time_cost_str = 'start'
        else:
            if time_cost_str != '': 
                time_cost_str += f'-->'
            time_cost_str += f'{now_time - run_task_time}'
        run_task_time = now_time
        return run_task_time, time_cost_str

    with gr.Blocks() as demo:
        with gr.Row():
            with gr.Column():
                txt2vid_prompt = gr.Textbox(
                    label="Enter Your Prompt",
                    placeholder="Describe the video you want to generate (minimum 50 characters)...",
                    value="A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage.",
                    lines=5,
                )

                txt2vid_negative_prompt = gr.Textbox(
                    label="Enter Negative Prompt",
                    placeholder="Describe what you don't want in the video...",
                    value="low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly",
                    lines=2,
                )

                txt2vid_generate = gr.Button(
                    "Generate Video",
                    variant="primary",
                    size="lg",
                )

            with gr.Column():
                txt2vid_output = gr.Video(label="Generated Output")
                txt2vid_generated_cost = gr.Textbox(label="Time cost by step (ms):", visible=True, interactive=False)
                
        txt2vid_generate.click(
            fn=text_to_video,
            inputs=[txt2vid_prompt, txt2vid_negative_prompt],
            outputs=[txt2vid_output, txt2vid_generated_cost],
        )

    return demo