Spaces:
Running
on
L40S
Running
on
L40S
import numpy as np | |
import torch | |
import torch.nn.functional as F | |
import math | |
import cv2 | |
from scipy.stats import qmc | |
from easydict import EasyDict as edict | |
from ..representations.octree import DfsOctree | |
def intrinsics_to_projection( | |
intrinsics: torch.Tensor, | |
near: float, | |
far: float, | |
) -> torch.Tensor: | |
""" | |
OpenCV intrinsics to OpenGL perspective matrix | |
Args: | |
intrinsics (torch.Tensor): [3, 3] OpenCV intrinsics matrix | |
near (float): near plane to clip | |
far (float): far plane to clip | |
Returns: | |
(torch.Tensor): [4, 4] OpenGL perspective matrix | |
""" | |
fx, fy = intrinsics[0, 0], intrinsics[1, 1] | |
cx, cy = intrinsics[0, 2], intrinsics[1, 2] | |
ret = torch.zeros((4, 4), dtype=intrinsics.dtype, device=intrinsics.device) | |
ret[0, 0] = 2 * fx | |
ret[1, 1] = 2 * fy | |
ret[0, 2] = 2 * cx - 1 | |
ret[1, 2] = - 2 * cy + 1 | |
ret[2, 2] = far / (far - near) | |
ret[2, 3] = near * far / (near - far) | |
ret[3, 2] = 1. | |
return ret | |
def render(viewpoint_camera, octree : DfsOctree, pipe, bg_color : torch.Tensor, scaling_modifier = 1.0, used_rank = None, colors_overwrite = None, aux=None, halton_sampler=None): | |
""" | |
Render the scene. | |
Background tensor (bg_color) must be on GPU! | |
""" | |
# lazy import | |
if 'OctreeTrivecRasterizer' not in globals(): | |
from diffoctreerast import OctreeVoxelRasterizer, OctreeGaussianRasterizer, OctreeTrivecRasterizer, OctreeDecoupolyRasterizer | |
# Set up rasterization configuration | |
tanfovx = math.tan(viewpoint_camera.FoVx * 0.5) | |
tanfovy = math.tan(viewpoint_camera.FoVy * 0.5) | |
raster_settings = edict( | |
image_height=int(viewpoint_camera.image_height), | |
image_width=int(viewpoint_camera.image_width), | |
tanfovx=tanfovx, | |
tanfovy=tanfovy, | |
bg=bg_color, | |
scale_modifier=scaling_modifier, | |
viewmatrix=viewpoint_camera.world_view_transform, | |
projmatrix=viewpoint_camera.full_proj_transform, | |
sh_degree=octree.active_sh_degree, | |
campos=viewpoint_camera.camera_center, | |
with_distloss=pipe.with_distloss, | |
jitter=pipe.jitter, | |
debug=pipe.debug, | |
) | |
positions = octree.get_xyz | |
if octree.primitive == "voxel": | |
densities = octree.get_density | |
elif octree.primitive == "gaussian": | |
opacities = octree.get_opacity | |
elif octree.primitive == "trivec": | |
trivecs = octree.get_trivec | |
densities = octree.get_density | |
raster_settings.density_shift = octree.density_shift | |
elif octree.primitive == "decoupoly": | |
decoupolys_V, decoupolys_g = octree.get_decoupoly | |
densities = octree.get_density | |
raster_settings.density_shift = octree.density_shift | |
else: | |
raise ValueError(f"Unknown primitive {octree.primitive}") | |
depths = octree.get_depth | |
# If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors | |
# from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer. | |
colors_precomp = None | |
shs = octree.get_features | |
if octree.primitive in ["voxel", "gaussian"] and colors_overwrite is not None: | |
colors_precomp = colors_overwrite | |
shs = None | |
ret = edict() | |
if octree.primitive == "voxel": | |
renderer = OctreeVoxelRasterizer(raster_settings=raster_settings) | |
rgb, depth, alpha, distloss = renderer( | |
positions = positions, | |
densities = densities, | |
shs = shs, | |
colors_precomp = colors_precomp, | |
depths = depths, | |
aabb = octree.aabb, | |
aux = aux, | |
) | |
ret['rgb'] = rgb | |
ret['depth'] = depth | |
ret['alpha'] = alpha | |
ret['distloss'] = distloss | |
elif octree.primitive == "gaussian": | |
renderer = OctreeGaussianRasterizer(raster_settings=raster_settings) | |
rgb, depth, alpha = renderer( | |
positions = positions, | |
opacities = opacities, | |
shs = shs, | |
colors_precomp = colors_precomp, | |
depths = depths, | |
aabb = octree.aabb, | |
aux = aux, | |
) | |
ret['rgb'] = rgb | |
ret['depth'] = depth | |
ret['alpha'] = alpha | |
elif octree.primitive == "trivec": | |
raster_settings.used_rank = used_rank if used_rank is not None else trivecs.shape[1] | |
renderer = OctreeTrivecRasterizer(raster_settings=raster_settings) | |
rgb, depth, alpha, percent_depth = renderer( | |
positions = positions, | |
trivecs = trivecs, | |
densities = densities, | |
shs = shs, | |
colors_precomp = colors_precomp, | |
colors_overwrite = colors_overwrite, | |
depths = depths, | |
aabb = octree.aabb, | |
aux = aux, | |
halton_sampler = halton_sampler, | |
) | |
ret['percent_depth'] = percent_depth | |
ret['rgb'] = rgb | |
ret['depth'] = depth | |
ret['alpha'] = alpha | |
elif octree.primitive == "decoupoly": | |
raster_settings.used_rank = used_rank if used_rank is not None else decoupolys_V.shape[1] | |
renderer = OctreeDecoupolyRasterizer(raster_settings=raster_settings) | |
rgb, depth, alpha = renderer( | |
positions = positions, | |
decoupolys_V = decoupolys_V, | |
decoupolys_g = decoupolys_g, | |
densities = densities, | |
shs = shs, | |
colors_precomp = colors_precomp, | |
depths = depths, | |
aabb = octree.aabb, | |
aux = aux, | |
) | |
ret['rgb'] = rgb | |
ret['depth'] = depth | |
ret['alpha'] = alpha | |
return ret | |
class OctreeRenderer: | |
""" | |
Renderer for the Voxel representation. | |
Args: | |
rendering_options (dict): Rendering options. | |
""" | |
def __init__(self, rendering_options={}) -> None: | |
try: | |
import diffoctreerast | |
except ImportError: | |
print("\033[93m[WARNING] diffoctreerast is not installed. The renderer will be disabled.\033[0m") | |
self.unsupported = True | |
else: | |
self.unsupported = False | |
self.pipe = edict({ | |
"with_distloss": False, | |
"with_aux": False, | |
"scale_modifier": 1.0, | |
"used_rank": None, | |
"jitter": False, | |
"debug": False, | |
}) | |
self.rendering_options = edict({ | |
"resolution": None, | |
"near": None, | |
"far": None, | |
"ssaa": 1, | |
"bg_color": 'random', | |
}) | |
self.halton_sampler = qmc.Halton(2, scramble=False) | |
self.rendering_options.update(rendering_options) | |
self.bg_color = None | |
def render( | |
self, | |
octree: DfsOctree, | |
extrinsics: torch.Tensor, | |
intrinsics: torch.Tensor, | |
colors_overwrite: torch.Tensor = None, | |
) -> edict: | |
""" | |
Render the octree. | |
Args: | |
octree (Octree): octree | |
extrinsics (torch.Tensor): (4, 4) camera extrinsics | |
intrinsics (torch.Tensor): (3, 3) camera intrinsics | |
colors_overwrite (torch.Tensor): (N, 3) override color | |
Returns: | |
edict containing: | |
color (torch.Tensor): (3, H, W) rendered color | |
depth (torch.Tensor): (H, W) rendered depth | |
alpha (torch.Tensor): (H, W) rendered alpha | |
distloss (Optional[torch.Tensor]): (H, W) rendered distance loss | |
percent_depth (Optional[torch.Tensor]): (H, W) rendered percent depth | |
aux (Optional[edict]): auxiliary tensors | |
""" | |
resolution = self.rendering_options["resolution"] | |
near = self.rendering_options["near"] | |
far = self.rendering_options["far"] | |
ssaa = self.rendering_options["ssaa"] | |
if self.unsupported: | |
image = np.zeros((512, 512, 3), dtype=np.uint8) | |
text_bbox = cv2.getTextSize("Unsupported", cv2.FONT_HERSHEY_SIMPLEX, 2, 3)[0] | |
origin = (512 - text_bbox[0]) // 2, (512 - text_bbox[1]) // 2 | |
image = cv2.putText(image, "Unsupported", origin, cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3, cv2.LINE_AA) | |
return { | |
'color': torch.tensor(image, dtype=torch.float32).permute(2, 0, 1) / 255, | |
} | |
if self.rendering_options["bg_color"] == 'random': | |
self.bg_color = torch.zeros(3, dtype=torch.float32, device="cuda") | |
if np.random.rand() < 0.5: | |
self.bg_color += 1 | |
else: | |
self.bg_color = torch.tensor(self.rendering_options["bg_color"], dtype=torch.float32, device="cuda") | |
if self.pipe["with_aux"]: | |
aux = { | |
'grad_color2': torch.zeros((octree.num_leaf_nodes, 3), dtype=torch.float32, requires_grad=True, device="cuda") + 0, | |
'contributions': torch.zeros((octree.num_leaf_nodes, 1), dtype=torch.float32, requires_grad=True, device="cuda") + 0, | |
} | |
for k in aux.keys(): | |
aux[k].requires_grad_() | |
aux[k].retain_grad() | |
else: | |
aux = None | |
view = extrinsics | |
perspective = intrinsics_to_projection(intrinsics, near, far) | |
camera = torch.inverse(view)[:3, 3] | |
focalx = intrinsics[0, 0] | |
focaly = intrinsics[1, 1] | |
fovx = 2 * torch.atan(0.5 / focalx) | |
fovy = 2 * torch.atan(0.5 / focaly) | |
camera_dict = edict({ | |
"image_height": resolution * ssaa, | |
"image_width": resolution * ssaa, | |
"FoVx": fovx, | |
"FoVy": fovy, | |
"znear": near, | |
"zfar": far, | |
"world_view_transform": view.T.contiguous(), | |
"projection_matrix": perspective.T.contiguous(), | |
"full_proj_transform": (perspective @ view).T.contiguous(), | |
"camera_center": camera | |
}) | |
# Render | |
render_ret = render(camera_dict, octree, self.pipe, self.bg_color, aux=aux, colors_overwrite=colors_overwrite, scaling_modifier=self.pipe.scale_modifier, used_rank=self.pipe.used_rank, halton_sampler=self.halton_sampler) | |
if ssaa > 1: | |
render_ret.rgb = F.interpolate(render_ret.rgb[None], size=(resolution, resolution), mode='bilinear', align_corners=False, antialias=True).squeeze() | |
render_ret.depth = F.interpolate(render_ret.depth[None, None], size=(resolution, resolution), mode='bilinear', align_corners=False, antialias=True).squeeze() | |
render_ret.alpha = F.interpolate(render_ret.alpha[None, None], size=(resolution, resolution), mode='bilinear', align_corners=False, antialias=True).squeeze() | |
if hasattr(render_ret, 'percent_depth'): | |
render_ret.percent_depth = F.interpolate(render_ret.percent_depth[None, None], size=(resolution, resolution), mode='bilinear', align_corners=False, antialias=True).squeeze() | |
ret = edict({ | |
'color': render_ret.rgb, | |
'depth': render_ret.depth, | |
'alpha': render_ret.alpha, | |
}) | |
if self.pipe["with_distloss"] and 'distloss' in render_ret: | |
ret['distloss'] = render_ret.distloss | |
if self.pipe["with_aux"]: | |
ret['aux'] = aux | |
if hasattr(render_ret, 'percent_depth'): | |
ret['percent_depth'] = render_ret.percent_depth | |
return ret | |