import os import gradio as gr import torch import numpy as np import spaces import random from PIL import Image from glob import glob from pathlib import Path from typing import Optional from diffusers import StableVideoDiffusionPipeline from diffusers.utils import load_image, export_to_video import uuid # from huggingface_hub import hf_hub_download # os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" # HF_TOKEN = os.environ.get("HF_TOKEN", None) # Constants model = "ECNU-CILab/ExVideo-SVD-128f-v1" MAX_SEED = np.iinfo(np.int32).max CSS = """ footer { visibility: hidden; } """ JS = """function () { gradioURL = window.location.href if (!gradioURL.endsWith('?__theme=dark')) { window.location.replace(gradioURL + '?__theme=dark'); } }""" # Ensure model and scheduler are initialized in GPU-enabled function if torch.cuda.is_available(): pipe = StableVideoDiffusionPipeline.from_pretrained( model, torch_dtype=torch.float16, variant="fp16").to("cuda") # function source codes modified from multimodalart/stable-video-diffusion @spaces.GPU(duration=120) def generate( image: Image, seed: Optional[int] = -1, motion_bucket_id: int = 127, fps_id: int = 6, version: str = "svd_xt", cond_aug: float = 0.02, decoding_t: int = 1, device: str = "cuda", output_folder: str = "outputs", progress=gr.Progress(track_tqdm=True)): if seed == -1: seed = random.randint(0, MAX_SEED) if image.mode == "RGBA": image = image.convert("RGB") generator = torch.manual_seed(seed) os.makedirs(output_folder, exist_ok=True) base_count = len(glob(os.path.join(output_folder, "*.mp4"))) video_path = os.path.join(output_folder, f"{base_count:06d}.mp4") frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0] export_to_video(frames, video_path, fps=fps_id) torch.manual_seed(seed) return video_path, seed def resize_image(image, output_size=(1024, 576)): # Calculate aspect ratios target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size image_aspect = image.width / image.height # Aspect ratio of the original image # Resize then crop if the original image is larger if image_aspect > target_aspect: # Resize the image to match the target height, maintaining aspect ratio new_height = output_size[1] new_width = int(new_height * image_aspect) resized_image = image.resize((new_width, new_height), Image.LANCZOS) # Calculate coordinates for cropping left = (new_width - output_size[0]) / 2 top = 0 right = (new_width + output_size[0]) / 2 bottom = output_size[1] else: # Resize the image to match the target width, maintaining aspect ratio new_width = output_size[0] new_height = int(new_width / image_aspect) resized_image = image.resize((new_width, new_height), Image.LANCZOS) # Calculate coordinates for cropping left = 0 top = (new_height - output_size[1]) / 2 right = output_size[0] bottom = (new_height + output_size[1]) / 2 # Crop the image cropped_image = resized_image.crop((left, top, right, bottom)) return cropped_image examples = [ "./train.jpg", "./girl.webp", "./robo.jpg", ] # Gradio Interface with gr.Blocks(css=CSS, js=JS, theme="soft") as demo: gr.HTML("

Exvideo📽️

") gr.HTML("

ExVideo image-to-video generation
Update: first version

") with gr.Row(): image = gr.Image(label='Upload Image', height=600, scale=2) video = gr.Video(label="Generated Video", height=600, scale=2) with gr.Accordion("Advanced Options", open=True): with gr.Column(scale=1): seed = gr.Slider( label="Seed (-1 Random)", minimum=-1, maximum=MAX_SEED, step=1, value=-1, ) motion_bucket_id = gr.Slider( label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255 ) fps_id = gr.Slider( label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, minimum=5, maximum=30 ) submit_btn = gr.Button("Generate") clear_btn = gr.ClearButton("Clear") gr.Examples( examples=examples, inputs=image, outputs=[video, seed], fn=generate, cache_examples="lazy", examples_per_page=4, ) image.upload(fn=resize_image, inputs=image, outputs=image, queue=False) generate_btn.click(fn=generate, inputs=[image, seed, motion_bucket_id, fps_id], outputs=[video, seed], api_name="video") demo.queue().launch()