Spaces:

EduardoPacheco
/

Dinov2-Video

Running

File size: 2,155 Bytes

import torch
import gradio as gr
from transformers import AutoModel

import utils

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModel.from_pretrained('facebook/dinov2-base')
model.to(device);

def app_fn(
    source_video: str,
    batch_size: int,
    threshold: float,
    n_patches: int,
    is_larger: bool,
    interpolate: bool,
) -> str:
    frames = utils.load_video_frames(source_video)
    processed_frames = utils.process_video(
        model=model,
        video=frames[:120],
        batch_size=batch_size,
        threshold=threshold,
        n_patches=n_patches,
        is_larger=is_larger,
        interpolate=interpolate,
        device=device
    )

    output_video = utils.create_video_from_frames_rgb(processed_frames)

    return output_video

if __name__ == "__main__":
    title = "🦖 DINOv2 Video 🦖"
    with gr.Blocks(title=title) as demo:
        with gr.Row():
            source_video = gr.Video(label="Input Video", sources="upload", format="mp4")
            output_video = gr.Video(label="Output Video")
        with gr.Row():
            batch_size = gr.Slider(minimum=1, maximum=30, step=1, value=4, label="Batch Size")
            threshold = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label="Threshold")
            n_patches = gr.Slider(minimum=20, maximum=40, step=1, value=30, label="Number of Patches")
            is_larger = gr.Checkbox(label="Is Larger", value=True)
            interpolate = gr.Checkbox(label="Interpolate", value=False)
        
        btn = gr.Button("Process Video")
        btn.click(
            fn=app_fn,
            inputs=[source_video, batch_size, threshold, n_patches, is_larger, interpolate],
            outputs=[output_video]
        )
        examples = gr.Examples(
            examples=[
                ["assets/dog-running.mp4", 30, 0.5, 40, False, True],
            ],
            inputs=[source_video, batch_size, threshold, n_patches, is_larger, interpolate],
            outputs=[output_video],
            fn=app_fn,
            cache_examples=True
        )

    demo.queue(max_size=5).launch(share=True)