import numpy as np import torch import torch.nn.functional as F import math import cv2 from scipy.stats import qmc from easydict import EasyDict as edict from ..representations.octree import DfsOctree def intrinsics_to_projection( intrinsics: torch.Tensor, near: float, far: float, ) -> torch.Tensor: """ OpenCV intrinsics to OpenGL perspective matrix Args: intrinsics (torch.Tensor): [3, 3] OpenCV intrinsics matrix near (float): near plane to clip far (float): far plane to clip Returns: (torch.Tensor): [4, 4] OpenGL perspective matrix """ fx, fy = intrinsics[0, 0], intrinsics[1, 1] cx, cy = intrinsics[0, 2], intrinsics[1, 2] ret = torch.zeros((4, 4), dtype=intrinsics.dtype, device=intrinsics.device) ret[0, 0] = 2 * fx ret[1, 1] = 2 * fy ret[0, 2] = 2 * cx - 1 ret[1, 2] = -2 * cy + 1 ret[2, 2] = far / (far - near) ret[2, 3] = near * far / (near - far) ret[3, 2] = 1.0 return ret def render( viewpoint_camera, octree: DfsOctree, pipe, bg_color: torch.Tensor, scaling_modifier=1.0, used_rank=None, colors_overwrite=None, aux=None, halton_sampler=None, ): """ Render the scene. Background tensor (bg_color) must be on GPU! """ # lazy import if "OctreeTrivecRasterizer" not in globals(): from diffoctreerast import ( OctreeVoxelRasterizer, OctreeGaussianRasterizer, OctreeTrivecRasterizer, OctreeDecoupolyRasterizer, ) # Set up rasterization configuration tanfovx = math.tan(viewpoint_camera.FoVx * 0.5) tanfovy = math.tan(viewpoint_camera.FoVy * 0.5) raster_settings = edict( image_height=int(viewpoint_camera.image_height), image_width=int(viewpoint_camera.image_width), tanfovx=tanfovx, tanfovy=tanfovy, bg=bg_color, scale_modifier=scaling_modifier, viewmatrix=viewpoint_camera.world_view_transform, projmatrix=viewpoint_camera.full_proj_transform, sh_degree=octree.active_sh_degree, campos=viewpoint_camera.camera_center, with_distloss=pipe.with_distloss, jitter=pipe.jitter, debug=pipe.debug, ) positions = octree.get_xyz if octree.primitive == "voxel": densities = octree.get_density elif octree.primitive == "gaussian": opacities = octree.get_opacity elif octree.primitive == "trivec": trivecs = octree.get_trivec densities = octree.get_density raster_settings.density_shift = octree.density_shift elif octree.primitive == "decoupoly": decoupolys_V, decoupolys_g = octree.get_decoupoly densities = octree.get_density raster_settings.density_shift = octree.density_shift else: raise ValueError(f"Unknown primitive {octree.primitive}") depths = octree.get_depth # If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors # from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer. colors_precomp = None shs = octree.get_features if octree.primitive in ["voxel", "gaussian"] and colors_overwrite is not None: colors_precomp = colors_overwrite shs = None ret = edict() if octree.primitive == "voxel": renderer = OctreeVoxelRasterizer(raster_settings=raster_settings) rgb, depth, alpha, distloss = renderer( positions=positions, densities=densities, shs=shs, colors_precomp=colors_precomp, depths=depths, aabb=octree.aabb, aux=aux, ) ret["rgb"] = rgb ret["depth"] = depth ret["alpha"] = alpha ret["distloss"] = distloss elif octree.primitive == "gaussian": renderer = OctreeGaussianRasterizer(raster_settings=raster_settings) rgb, depth, alpha = renderer( positions=positions, opacities=opacities, shs=shs, colors_precomp=colors_precomp, depths=depths, aabb=octree.aabb, aux=aux, ) ret["rgb"] = rgb ret["depth"] = depth ret["alpha"] = alpha elif octree.primitive == "trivec": raster_settings.used_rank = ( used_rank if used_rank is not None else trivecs.shape[1] ) renderer = OctreeTrivecRasterizer(raster_settings=raster_settings) rgb, depth, alpha, percent_depth = renderer( positions=positions, trivecs=trivecs, densities=densities, shs=shs, colors_precomp=colors_precomp, colors_overwrite=colors_overwrite, depths=depths, aabb=octree.aabb, aux=aux, halton_sampler=halton_sampler, ) ret["percent_depth"] = percent_depth ret["rgb"] = rgb ret["depth"] = depth ret["alpha"] = alpha elif octree.primitive == "decoupoly": raster_settings.used_rank = ( used_rank if used_rank is not None else decoupolys_V.shape[1] ) renderer = OctreeDecoupolyRasterizer(raster_settings=raster_settings) rgb, depth, alpha = renderer( positions=positions, decoupolys_V=decoupolys_V, decoupolys_g=decoupolys_g, densities=densities, shs=shs, colors_precomp=colors_precomp, depths=depths, aabb=octree.aabb, aux=aux, ) ret["rgb"] = rgb ret["depth"] = depth ret["alpha"] = alpha return ret class OctreeRenderer: """ Renderer for the Voxel representation. Args: rendering_options (dict): Rendering options. """ def __init__(self, rendering_options={}) -> None: try: import diffoctreerast except ImportError: print( "\033[93m[WARNING] diffoctreerast is not installed. The renderer will be disabled.\033[0m" ) self.unsupported = True else: self.unsupported = False self.pipe = edict( { "with_distloss": False, "with_aux": False, "scale_modifier": 1.0, "used_rank": None, "jitter": False, "debug": False, } ) self.rendering_options = edict( { "resolution": None, "near": None, "far": None, "ssaa": 1, "bg_color": "random", } ) self.halton_sampler = qmc.Halton(2, scramble=False) self.rendering_options.update(rendering_options) self.bg_color = None def render( self, octree: DfsOctree, extrinsics: torch.Tensor, intrinsics: torch.Tensor, colors_overwrite: torch.Tensor = None, ) -> edict: """ Render the octree. Args: octree (Octree): octree extrinsics (torch.Tensor): (4, 4) camera extrinsics intrinsics (torch.Tensor): (3, 3) camera intrinsics colors_overwrite (torch.Tensor): (N, 3) override color Returns: edict containing: color (torch.Tensor): (3, H, W) rendered color depth (torch.Tensor): (H, W) rendered depth alpha (torch.Tensor): (H, W) rendered alpha distloss (Optional[torch.Tensor]): (H, W) rendered distance loss percent_depth (Optional[torch.Tensor]): (H, W) rendered percent depth aux (Optional[edict]): auxiliary tensors """ resolution = self.rendering_options["resolution"] near = self.rendering_options["near"] far = self.rendering_options["far"] ssaa = self.rendering_options["ssaa"] if self.unsupported: image = np.zeros((512, 512, 3), dtype=np.uint8) text_bbox = cv2.getTextSize("Unsupported", cv2.FONT_HERSHEY_SIMPLEX, 2, 3)[ 0 ] origin = (512 - text_bbox[0]) // 2, (512 - text_bbox[1]) // 2 image = cv2.putText( image, "Unsupported", origin, cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3, cv2.LINE_AA, ) return { "color": torch.tensor(image, dtype=torch.float32).permute(2, 0, 1) / 255, } if self.rendering_options["bg_color"] == "random": self.bg_color = torch.zeros(3, dtype=torch.float32, device="cuda") if np.random.rand() < 0.5: self.bg_color += 1 else: self.bg_color = torch.tensor( self.rendering_options["bg_color"], dtype=torch.float32, device="cuda" ) if self.pipe["with_aux"]: aux = { "grad_color2": torch.zeros( (octree.num_leaf_nodes, 3), dtype=torch.float32, requires_grad=True, device="cuda", ) + 0, "contributions": torch.zeros( (octree.num_leaf_nodes, 1), dtype=torch.float32, requires_grad=True, device="cuda", ) + 0, } for k in aux.keys(): aux[k].requires_grad_() aux[k].retain_grad() else: aux = None view = extrinsics perspective = intrinsics_to_projection(intrinsics, near, far) camera = torch.inverse(view)[:3, 3] focalx = intrinsics[0, 0] focaly = intrinsics[1, 1] fovx = 2 * torch.atan(0.5 / focalx) fovy = 2 * torch.atan(0.5 / focaly) camera_dict = edict( { "image_height": resolution * ssaa, "image_width": resolution * ssaa, "FoVx": fovx, "FoVy": fovy, "znear": near, "zfar": far, "world_view_transform": view.T.contiguous(), "projection_matrix": perspective.T.contiguous(), "full_proj_transform": (perspective @ view).T.contiguous(), "camera_center": camera, } ) # Render render_ret = render( camera_dict, octree, self.pipe, self.bg_color, aux=aux, colors_overwrite=colors_overwrite, scaling_modifier=self.pipe.scale_modifier, used_rank=self.pipe.used_rank, halton_sampler=self.halton_sampler, ) if ssaa > 1: render_ret.rgb = F.interpolate( render_ret.rgb[None], size=(resolution, resolution), mode="bilinear", align_corners=False, antialias=True, ).squeeze() render_ret.depth = F.interpolate( render_ret.depth[None, None], size=(resolution, resolution), mode="bilinear", align_corners=False, antialias=True, ).squeeze() render_ret.alpha = F.interpolate( render_ret.alpha[None, None], size=(resolution, resolution), mode="bilinear", align_corners=False, antialias=True, ).squeeze() if hasattr(render_ret, "percent_depth"): render_ret.percent_depth = F.interpolate( render_ret.percent_depth[None, None], size=(resolution, resolution), mode="bilinear", align_corners=False, antialias=True, ).squeeze() ret = edict( { "color": render_ret.rgb, "depth": render_ret.depth, "alpha": render_ret.alpha, } ) if self.pipe["with_distloss"] and "distloss" in render_ret: ret["distloss"] = render_ret.distloss if self.pipe["with_aux"]: ret["aux"] = aux if hasattr(render_ret, "percent_depth"): ret["percent_depth"] = render_ret.percent_depth return ret