import gradio as gr import imageio import torch from diffusers import TextToVideoZeroPipeline from video_diffusion.tuneavideo.util import save_videos_grid from video_diffusion.utils.model_list import stable_model_list class ZeroShotText2VideoGenerator: def __init__(self): self.pipe = None def load_model(self, model_id): if self.pipe is None: self.pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") self.pipe.to("cuda") self.pipe.enable_xformers_memory_efficient_attention() self.pipe.enable_attention_slicing() return self.pipe def generate_video( self, prompt, negative_prompt, model_id, height, width, video_length, guidance_scale, fps, t0, t1, motion_field_strength_x, motion_field_strength_y, ): pipe = self.load_model(model_id) result = pipe( prompt=prompt, negative_prompt=negative_prompt, height=height, width=width, video_length=video_length, guidance_scale=guidance_scale, t0=t0, t1=t1, motion_field_strength_x=motion_field_strength_x, motion_field_strength_y=motion_field_strength_y, ).images result = [(r * 255).astype("uint8") for r in result] imageio.mimsave("video.mp4", result, fps=fps) return "video.mp4" def app(): with gr.Blocks(): with gr.Row(): with gr.Column(): zero_shot_text2video_prompt = gr.Textbox( lines=1, placeholder="Prompt", show_label=False, ) zero_shot_text2video_negative_prompt = gr.Textbox( lines=1, placeholder="Negative Prompt", show_label=False, ) zero_shot_text2video_model_id = gr.Dropdown( choices=stable_model_list, label="Stable Model List", value=stable_model_list[0], ) with gr.Row(): with gr.Column(): zero_shot_text2video_guidance_scale = gr.Slider( label="Guidance Scale", minimum=1, maximum=15, step=1, value=7.5, ) zero_shot_text2video_video_length = gr.Slider( label="Video Length", minimum=1, maximum=100, step=1, value=10, ) zero_shot_text2video_t0 = gr.Slider( label="Timestep T0", minimum=0, maximum=100, step=1, value=44, ) zero_shot_text2video_motion_field_strength_x = gr.Slider( label="Motion Field Strength X", minimum=0, maximum=100, step=1, value=12, ) zero_shot_text2video_fps = gr.Slider( label="Fps", minimum=1, maximum=60, step=1, value=10, ) with gr.Row(): with gr.Column(): zero_shot_text2video_height = gr.Slider( label="Height", minimum=128, maximum=1280, step=32, value=512, ) zero_shot_text2video_width = gr.Slider( label="Width", minimum=128, maximum=1280, step=32, value=512, ) zero_shot_text2video_t1 = gr.Slider( label="Timestep T1", minimum=0, maximum=100, step=1, value=47, ) zero_shot_text2video_motion_field_strength_y = gr.Slider( label="Motion Field Strength Y", minimum=0, maximum=100, step=1, value=12, ) zero_shot_text2video_button = gr.Button(value="Generator") with gr.Column(): zero_shot_text2video_output = gr.Video(label="Output") zero_shot_text2video_button.click( fn=ZeroShotText2VideoGenerator().generate_video, inputs=[ zero_shot_text2video_prompt, zero_shot_text2video_negative_prompt, zero_shot_text2video_model_id, zero_shot_text2video_height, zero_shot_text2video_width, zero_shot_text2video_video_length, zero_shot_text2video_guidance_scale, zero_shot_text2video_fps, zero_shot_text2video_t0, zero_shot_text2video_t1, zero_shot_text2video_motion_field_strength_x, zero_shot_text2video_motion_field_strength_y, ], outputs=zero_shot_text2video_output, )