Spaces:

CharlieAmalet
/

tools3ox_api

Running on Zero

File size: 4,858 Bytes

import torch
torch.jit.script = lambda f: f
from zoedepth.utils.misc import colorize, save_raw_16bit
from zoedepth.utils.geometry import depth_to_points, create_triangles
from marigold_depth_estimation import MarigoldPipeline
import gradio as gr
import spaces

from PIL import Image
import numpy as np
import trimesh
from functools import partial
import tempfile


css = """
#img-display-container {
    max-height: 50vh;
    }
#img-display-input {
    max-height: 40vh;
    }

#img-display-output {
    max-height: 40vh;
    }
"""

# DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
DEVICE = 'cuda'
model = torch.hub.load('isl-org/ZoeDepth', "ZoeD_N", pretrained=True).to("cpu").eval()

CHECKPOINT = "prs-eth/marigold-v1-0"
pipe = MarigoldPipeline.from_pretrained(CHECKPOINT)

# ----------- Depth functions
@spaces.GPU(enable_queue=True)
def save_raw_16bit(depth, fpath="raw.png"):
    if isinstance(depth, torch.Tensor):
        depth = depth.squeeze().cpu().numpy()
    
    assert isinstance(depth, np.ndarray), "Depth must be a torch tensor or numpy array"
    assert depth.ndim == 2, "Depth must be 2D"
    depth = depth * 256  # scale for 16-bit png
    depth = depth.astype(np.uint16)
    return depth

@spaces.GPU(enable_queue=True)
def process_image(image: Image.Image):
    global model
    image = image.convert("RGB")

    device = "cuda" if torch.cuda.is_available() else "cpu"
    # model.to(device)
    # depth = model.infer_pil(image)

    # processed_array = save_raw_16bit(colorize(depth)[:, :, 0])
    # return Image.fromarray(processed_array)

    model.to(device)
    
    # # inference
    processed_array = pipe(image)["depth"]

    return Image.fromarray(processed_array)

# ----------- Depth functions

# ----------- Mesh functions
@spaces.GPU(enable_queue=True)
def depth_edges_mask(depth):
    global model
    """Returns a mask of edges in the depth map.
    Args:
    depth: 2D numpy array of shape (H, W) with dtype float32.
    Returns:
    mask: 2D numpy array of shape (H, W) with dtype bool.
    """
    # Compute the x and y gradients of the depth map.
    depth_dx, depth_dy = np.gradient(depth)
    # Compute the gradient magnitude.
    depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2)
    # Compute the edge mask.
    mask = depth_grad > 0.05
    return mask

@spaces.GPU(enable_queue=True)
def predict_depth(image):
    global model
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    depth = model.infer_pil(image)
    return depth

@spaces.GPU(enable_queue=True)
def get_mesh(image: Image.Image, keep_edges=True):
    image.thumbnail((1024,1024))  # limit the size of the input image

    depth = predict_depth(image)
    pts3d = depth_to_points(depth[None])
    pts3d = pts3d.reshape(-1, 3)

    # Create a trimesh mesh from the points
    # Each pixel is connected to its 4 neighbors
    # colors are the RGB values of the image

    verts = pts3d.reshape(-1, 3)
    image = np.array(image)
    if keep_edges:
        triangles = create_triangles(image.shape[0], image.shape[1])
    else:
        triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth))

    colors = image.reshape(-1, 3)
    mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors)

    # Save as glb
    glb_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
    glb_path = glb_file.name
    mesh.export(glb_path)
    return glb_path

# ----------- Mesh functions

title = "# ZoeDepth"
description = """Unofficial demo for **ZoeDepth: Zero-shot Transfer by Combining Relative and Metric Depth**."""

with gr.Blocks(css=css) as API:
    gr.Markdown(title)
    gr.Markdown(description)
    with gr.Tab("Depth Prediction"):
        with gr.Row():
            inputs=gr.Image(label="Input Image", type='pil', height=500)  # Input is an image
            outputs=gr.Image(label="Depth Map", type='pil', height=500)  # Output is also an image
        generate_btn = gr.Button(value="Generate")
        # generate_btn.click(partial(process_image, model), inputs=inputs, outputs=outputs, api_name="generate_depth")
        generate_btn.click(process_image, inputs=inputs, outputs=outputs, api_name="generate_depth")
        
    with gr.Tab("Image to 3D"):
        with gr.Row():
            with gr.Column():
                inputs=[gr.Image(label="Input Image", type='pil', height=500), gr.Checkbox(label="Keep occlusion edges", value=True)]
            outputs=gr.Model3D(label="3D Mesh", clear_color=[1.0, 1.0, 1.0, 1.0], height=500)
        generate_btn = gr.Button(value="Generate")
        # generate_btn.click(partial(get_mesh, model), inputs=inputs, outputs=outputs, api_name="generate_mesh")
        generate_btn.click(get_mesh, inputs=inputs, outputs=outputs, api_name="generate_mesh")

if __name__ == '__main__':
    API.launch()