Dinov2-Video / app.py
EduardoPacheco's picture
Limited to 120 frames and modified example to is_larger
bb80211
import torch
import gradio as gr
from transformers import AutoModel
import utils
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModel.from_pretrained('facebook/dinov2-base')
model.to(device);
def app_fn(
source_video: str,
batch_size: int,
threshold: float,
n_patches: int,
is_larger: bool,
interpolate: bool,
) -> str:
frames = utils.load_video_frames(source_video)
processed_frames = utils.process_video(
model=model,
video=frames[:120],
batch_size=batch_size,
threshold=threshold,
n_patches=n_patches,
is_larger=is_larger,
interpolate=interpolate,
device=device
)
output_video = utils.create_video_from_frames_rgb(processed_frames)
return output_video
if __name__ == "__main__":
title = "πŸ¦– DINOv2 Video πŸ¦–"
with gr.Blocks(title=title) as demo:
with gr.Row():
source_video = gr.Video(label="Input Video", sources="upload", format="mp4")
output_video = gr.Video(label="Output Video")
with gr.Row():
batch_size = gr.Slider(minimum=1, maximum=30, step=1, value=4, label="Batch Size")
threshold = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label="Threshold")
n_patches = gr.Slider(minimum=20, maximum=40, step=1, value=30, label="Number of Patches")
is_larger = gr.Checkbox(label="Is Larger", value=True)
interpolate = gr.Checkbox(label="Interpolate", value=False)
btn = gr.Button("Process Video")
btn.click(
fn=app_fn,
inputs=[source_video, batch_size, threshold, n_patches, is_larger, interpolate],
outputs=[output_video]
)
examples = gr.Examples(
examples=[
["assets/dog-running.mp4", 30, 0.5, 40, False, True],
],
inputs=[source_video, batch_size, threshold, n_patches, is_larger, interpolate],
outputs=[output_video],
fn=app_fn,
cache_examples=True
)
demo.queue(max_size=5).launch(share=True)