import spaces import gradio as gr import time import torch import gc import tempfile import numpy as np import cv2 from diffusers import LTXPipeline from diffusers.utils import export_to_video device = "cuda" if torch.cuda.is_available() else "cpu" pipe = LTXPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16) pipe.to(device) def create_demo() -> gr.Blocks: @spaces.GPU(duration=60) def text_to_video( prompt: str, negative_prompt: str, width: int = 768, height: int = 512, num_frames: int = 121, frame_rate: int = 25, num_inference_steps: int = 30, seed: int = 8, progress: gr.Progress = gr.Progress(), ): generator = torch.Generator(device=device).manual_seed(seed) run_task_time = 0 time_cost_str = '' run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str) try: with torch.no_grad(): video = pipe( prompt=prompt, negative_prompt=negative_prompt, generator=generator, width=width, height=height, num_frames=num_frames, num_inference_steps=num_inference_steps, ).frames[0] finally: torch.cuda.empty_cache() gc.collect() run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str) output_path = tempfile.mktemp(suffix=".mp4") export_to_video(video, output_path, fps=frame_rate) del video torch.cuda.empty_cache() return output_path, time_cost_str def get_time_cost(run_task_time, time_cost_str): now_time = int(time.time()*1000) if run_task_time == 0: time_cost_str = 'start' else: if time_cost_str != '': time_cost_str += f'-->' time_cost_str += f'{now_time - run_task_time}' run_task_time = now_time return run_task_time, time_cost_str with gr.Blocks() as demo: with gr.Row(): with gr.Column(): txt2vid_prompt = gr.Textbox( label="Enter Your Prompt", placeholder="Describe the video you want to generate (minimum 50 characters)...", value="A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage.", lines=5, ) txt2vid_negative_prompt = gr.Textbox( label="Enter Negative Prompt", placeholder="Describe what you don't want in the video...", value="low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly", lines=2, ) txt2vid_generate = gr.Button( "Generate Video", variant="primary", size="lg", ) with gr.Column(): txt2vid_output = gr.Video(label="Generated Output") txt2vid_generated_cost = gr.Textbox(label="Time cost by step (ms):", visible=True, interactive=False) txt2vid_generate.click( fn=text_to_video, inputs=[txt2vid_prompt, txt2vid_negative_prompt], outputs=[txt2vid_output, txt2vid_generated_cost], ) return demo