import torch import gradio as gr from transformers import AutoModel import utils device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = AutoModel.from_pretrained('facebook/dinov2-base') model.to(device); def app_fn( source_video: str, batch_size: int, threshold: float, n_patches: int, is_larger: bool, interpolate: bool, ) -> str: frames = utils.load_video_frames(source_video) processed_frames = utils.process_video( model=model, video=frames, batch_size=batch_size, threshold=threshold, n_patches=n_patches, is_larger=is_larger, interpolate=interpolate, device=device ) output_video = utils.create_video_from_frames_rgb(processed_frames) return output_video if __name__ == "__main__": title = "🦖 DINOv2 Video 🦖" with gr.Blocks() as demo: with gr.Row(): source_video = gr.Video(label="Input Video", sources="upload", format="mp4") output_video = gr.Video(label="Output Video") with gr.Row(): batch_size = gr.Slider(minimum=1, maximum=30, step=1, value=4, label="Batch Size") threshold = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.5, label="Threshold") n_patches = gr.Slider(minimum=20, maximum=40, step=1, value=30, label="Number of Patches") is_larger = gr.Checkbox(label="Is Larger", value=True) interpolate = gr.Checkbox(label="Interpolate", value=False) btn = gr.Button("Process Video") btn.click( fn=app_fn, inputs=[source_video, batch_size, threshold, n_patches, is_larger, interpolate], outputs=[output_video] ) examples = gr.Examples( examples=[ ["assets/dog-running.mp4", 30, 0.5, 40, True, False], ], inputs=[source_video, batch_size, threshold, n_patches, is_larger, interpolate], outputs=[output_video], fn=app_fn, cache_examples=True ) demo.queue(max_size=5).launc()