Exvideo📽️

import os
import gradio as gr
import torch
import numpy as np
import spaces
import random
from PIL import Image

from glob import glob
from pathlib import Path
from typing import Optional

from diffusers import StableVideoDiffusionPipeline
from diffusers.utils import load_image, export_to_video

import uuid
# from huggingface_hub import hf_hub_download


# os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
# HF_TOKEN = os.environ.get("HF_TOKEN", None)
# Constants
model = "ECNU-CILab/ExVideo-SVD-128f-v1"

MAX_SEED = np.iinfo(np.int32).max

CSS = """
footer {
    visibility: hidden;
}
"""

JS = """function () {
  gradioURL = window.location.href
  if (!gradioURL.endsWith('?__theme=dark')) {
    window.location.replace(gradioURL + '?__theme=dark');
  }
}"""


# Ensure model and scheduler are initialized in GPU-enabled function
if torch.cuda.is_available():
    pipe = StableVideoDiffusionPipeline.from_pretrained(
    model, 
    torch_dtype=torch.float16, 
    variant="fp16").to("cuda")

# function source codes modified from multimodalart/stable-video-diffusion
@spaces.GPU(duration=120)
def generate(
    image: Image,
    seed: Optional[int] = -1,
    motion_bucket_id: int = 127,
    fps_id: int = 6,
    version: str = "svd_xt",
    cond_aug: float = 0.02,
    decoding_t: int = 1,
    device: str = "cuda",
    output_folder: str = "outputs",
    progress=gr.Progress(track_tqdm=True)):

    if seed == -1:
        seed = random.randint(0, MAX_SEED)
        
    if image.mode == "RGBA":
        image = image.convert("RGB")
        
    generator = torch.manual_seed(seed)
    
    os.makedirs(output_folder, exist_ok=True)
    base_count = len(glob(os.path.join(output_folder, "*.mp4")))
    video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")

    frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
    export_to_video(frames, video_path, fps=fps_id)
    torch.manual_seed(seed)
    
    return video_path, seed

def resize_image(image, output_size=(1024, 576)):
    # Calculate aspect ratios
    target_aspect = output_size[0] / output_size[1]  # Aspect ratio of the desired size
    image_aspect = image.width / image.height  # Aspect ratio of the original image

    # Resize then crop if the original image is larger
    if image_aspect > target_aspect:
        # Resize the image to match the target height, maintaining aspect ratio
        new_height = output_size[1]
        new_width = int(new_height * image_aspect)
        resized_image = image.resize((new_width, new_height), Image.LANCZOS)
        # Calculate coordinates for cropping
        left = (new_width - output_size[0]) / 2
        top = 0
        right = (new_width + output_size[0]) / 2
        bottom = output_size[1]
    else:
        # Resize the image to match the target width, maintaining aspect ratio
        new_width = output_size[0]
        new_height = int(new_width / image_aspect)
        resized_image = image.resize((new_width, new_height), Image.LANCZOS)
        # Calculate coordinates for cropping
        left = 0
        top = (new_height - output_size[1]) / 2
        right = output_size[0]
        bottom = (new_height + output_size[1]) / 2

    # Crop the image
    cropped_image = resized_image.crop((left, top, right, bottom))
    return cropped_image


examples = [
        "./train.jpg",
        "./girl.webp",
        "./robo.jpg",
    ]


# Gradio Interface

with gr.Blocks(css=CSS, js=JS, theme="soft") as demo:
    gr.HTML("<h1><center>Exvideo📽️</center></h1>")
    gr.HTML("<p><center><a href='https://huggingface.co/ECNU-CILab/ExVideo-SVD-128f-v1'>ExVideo</a> image-to-video generation<br><b>Update</b>: first version</center></p>")
    with gr.Row():
        image = gr.Image(label='Upload Image', height=600, scale=2)
        video = gr.Video(label="Generated Video", height=600, scale=2)
        with gr.Accordion("Advanced Options", open=True):
            with gr.Column(scale=1):
                seed = gr.Slider(
                    label="Seed (-1 Random)",
                    minimum=-1,
                    maximum=MAX_SEED,
                    step=1,
                    value=-1,
                )
                motion_bucket_id = gr.Slider(
                    label="Motion bucket id", 
                    info="Controls how much motion to add/remove from the image", 
                    value=127, 
                    minimum=1, 
                    maximum=255
                )
                fps_id = gr.Slider(
                    label="Frames per second", 
                    info="The length of your video in seconds will be 25/fps", 
                    value=6, 
                    minimum=5, 
                    maximum=30
                )

    submit_btn = gr.Button("Generate")
    clear_btn = gr.ClearButton("Clear")
    gr.Examples(
        examples=examples,
        inputs=image,
        outputs=[video, seed],
        fn=generate,
        cache_examples="lazy",
        examples_per_page=4,
    )

    image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)

    generate_btn.click(fn=generate, inputs=[image, seed, motion_bucket_id, fps_id], outputs=[video, seed], api_name="video")
    
demo.queue().launch()