import depth_pro import gradio as gr import matplotlib.cm as cm import numpy as np from depth_pro.depth_pro import DepthProConfig from PIL import Image MARKDOWN = """

Depth Pro: Sharp Monocular Metric Depth in Less Than a Second

""" def run(input_image_path): config = DepthProConfig( patch_encoder_preset="dinov2l16_384", image_encoder_preset="dinov2l16_384", checkpoint_uri="./depth_pro.pt", decoder_features=256, use_fov_head=True, fov_encoder_preset="dinov2l16_384", ) # Load model and preprocessing transform model, transform = depth_pro.create_model_and_transforms(config=config) model.eval() # Load and preprocess an image image, _, f_px = depth_pro.load_rgb(input_image_path) image = transform(image) # Run inference prediction = model.infer(image, f_px=f_px) depth_map = prediction["depth"].squeeze().cpu().numpy() focallength_px = prediction["focallength_px"] depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min()) colormap = cm.get_cmap("viridis") depth_map = colormap(depth_map) depth_map = (depth_map[:, :, :3] * 255).astype(np.uint8) depth_map = Image.fromarray(depth_map) return depth_map, focallength_px.item() with gr.Blocks() as demo: gr.Markdown(MARKDOWN) with gr.Row(): with gr.Column(): input_image_path = gr.Image( label="Input Image", type="filepath", sources=["upload"] ) with gr.Column(): with gr.Column(): output_depth_map = gr.Image(label="Depth Map") output_focal_length = gr.Number(label="Focal Length") with gr.Row(): btn = gr.Button("Run") btn.click( run, inputs=[input_image_path], outputs=[output_depth_map, output_focal_length] ) examples = gr.Examples( examples=[ "assets/input_one.webp", ], fn=run, inputs=[input_image_path], outputs=[output_depth_map, output_focal_length], cache_examples=True, ) demo.launch()