mochi-1

Running on Zero

File size: 2,275 Bytes

3facca5
0d3ff24
3facca5
 
 
 
 
 
 
 
0d3ff24
3facca5
 
 
 
 
 
549018e
3facca5
549018e
 
3facca5
549018e
3facca5
 
 
0d3ff24
2fb1485
0d3ff24
 
 
 
 
 
 
 
 
 
 
3facca5
0d3ff24
3facca5
549018e
3facca5
 
0d3ff24
549018e
3facca5
 
549018e
ace3238
 
0d3ff24
 
549018e
1d7aaae
549018e
 
 
c3dbd3d
3facca5
 
0d3ff24
 
 
 
 
 
 
 
 
 
549018e
3facca5

import os

if os.environ.get("SPACES_ZERO_GPU") is not None:
    import spaces
else:
    class spaces:
        @staticmethod
        def GPU(func):
            def wrapper(*args, **kwargs):
                return func(*args, **kwargs)

            return wrapper

import torch
from diffusers import MochiPipeline
from diffusers.utils import export_to_video
import gradio as gr
import config as cfg

# Load the pre-trained model
pipe = MochiPipeline.from_pretrained(cfg.MODEL_PRE_TRAINED_ID, variant="bf16", torch_dtype=torch.bfloat16)

# Enable memory-saving optimizations
pipe.enable_model_cpu_offload()
pipe.enable_vae_tiling()


@spaces.GPU(duration=120)
def generate_video(prompt, num_frames=84, fps=30, high_quality=False):
    if high_quality:
        print("High quality option selected. Requires 42GB VRAM.")
        # Check if running on ZeroGPU
        if os.environ.get("SPACES_ZERO_GPU") is not None:
            raise RuntimeError("High quality option may fail on ZeroGPU environments.")
        with torch.autocast("cuda", torch.bfloat16, cache_enabled=False):
            frames = pipe(prompt, num_frames=num_frames).frames[0]
    else:
        print("Standard quality option selected.")
        frames = pipe(prompt, num_frames=num_frames).frames[0]

        # Export frames as video
    video_path = "mochi.mp4"
    export_to_video(frames, video_path, fps=fps)
    return video_path


# Create the Gradio interface
interface = gr.Interface(
    fn=generate_video,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter your text prompt here... 💡"),
        gr.Slider(minimum=1, maximum=240, value=84, label="Number of frames 🎞️"),
        gr.Slider(minimum=1, maximum=60, value=30, label="FPS (Frames per second) ⏱️"),
        gr.Checkbox(label="High Quality Output (requires 42GB VRAM, may fail on ZeroGPU)")
    ],
    outputs=gr.Video(),
    title=cfg.TITLE,
    description=cfg.DESCRIPTION,
    examples=cfg.EXAMPLES,
    article=cfg.BUY_ME_A_COFFE
)

# Center the title and description using custom CSS
interface.css = """  
    .interface-title {  
        text-align: center;  
    }  
    .interface-description {  
        text-align: center;  
    }  
"""

# Launch the application
if __name__ == "__main__":
    interface.launch()