Spaces:
Running
on
Zero
Running
on
Zero
# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary | |
# | |
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual | |
# property and proprietary rights in and to this material, related | |
# documentation and any modifications thereto. Any use, reproduction, | |
# disclosure or distribution of this material and related documentation | |
# without an express license agreement from NVIDIA CORPORATION or | |
# its affiliates is strictly prohibited. | |
""" | |
The ray sampler is a module that takes in camera matrices and resolution and batches of rays. | |
Expects cam2world matrices that use the OpenCV camera coordinate system conventions. | |
""" | |
import torch | |
from pdb import set_trace as st | |
import random | |
HUGE_NUMBER = 1e10 | |
TINY_NUMBER = 1e-6 # float32 only has 7 decimal digits precision | |
###################################################################################### | |
# wrapper to simplify the use of nerfnet | |
###################################################################################### | |
# https://github.com/Kai-46/nerfplusplus/blob/ebf2f3e75fd6c5dfc8c9d0b533800daaf17bd95f/ddp_model.py#L16 | |
def depth2pts_outside(ray_o, ray_d, depth): | |
''' | |
ray_o, ray_d: [..., 3] | |
depth: [...]; inverse of distance to sphere origin | |
''' | |
# note: d1 becomes negative if this mid point is behind camera | |
d1 = -torch.sum(ray_d * ray_o, dim=-1) / torch.sum(ray_d * ray_d, dim=-1) | |
p_mid = ray_o + d1.unsqueeze(-1) * ray_d | |
p_mid_norm = torch.norm(p_mid, dim=-1) | |
ray_d_cos = 1. / torch.norm(ray_d, dim=-1) | |
d2 = torch.sqrt(1. - p_mid_norm * p_mid_norm) * ray_d_cos | |
p_sphere = ray_o + (d1 + d2).unsqueeze(-1) * ray_d | |
rot_axis = torch.cross(ray_o, p_sphere, dim=-1) | |
rot_axis = rot_axis / torch.norm(rot_axis, dim=-1, keepdim=True) | |
phi = torch.asin(p_mid_norm) | |
theta = torch.asin(p_mid_norm * depth) # depth is inside [0, 1] | |
rot_angle = (phi - theta).unsqueeze(-1) # [..., 1] | |
# now rotate p_sphere | |
# Rodrigues formula: https://en.wikipedia.org/wiki/Rodrigues%27_rotation_formula | |
p_sphere_new = p_sphere * torch.cos(rot_angle) + \ | |
torch.cross(rot_axis, p_sphere, dim=-1) * torch.sin(rot_angle) + \ | |
rot_axis * torch.sum(rot_axis*p_sphere, dim=-1, keepdim=True) * (1.-torch.cos(rot_angle)) | |
p_sphere_new = p_sphere_new / torch.norm( | |
p_sphere_new, dim=-1, keepdim=True) | |
pts = torch.cat((p_sphere_new, depth.unsqueeze(-1)), dim=-1) | |
# now calculate conventional depth | |
depth_real = 1. / (depth + TINY_NUMBER) * torch.cos(theta) * ray_d_cos + d1 | |
return pts, depth_real | |
class RaySampler(torch.nn.Module): | |
def __init__(self): | |
super().__init__() | |
self.ray_origins_h, self.ray_directions, self.depths, self.image_coords, self.rendering_options = None, None, None, None, None | |
def create_patch_uv(self, | |
patch_resolution, | |
resolution, | |
cam2world_matrix, | |
fg_bbox=None): | |
def sample_patch_uv(fg_bbox=None): | |
assert patch_resolution <= resolution | |
def sample_patch_range(): | |
patch_reolution_start = random.randint( | |
0, resolution - | |
patch_resolution) # alias for randrange(start, stop+1) | |
# patch_reolution_end = patch_reolution_start + patch_resolution | |
return patch_reolution_start # , patch_reolution_end | |
def sample_patch_range_oversample_boundary(range_start=None, | |
range_end=None): | |
# left down corner undersampled | |
if range_start is None: | |
# range_start = patch_resolution // 2 | |
range_start = patch_resolution | |
if range_end is None: | |
# range_end = resolution + patch_resolution // 2 | |
range_end = resolution + patch_resolution | |
# oversample the boundary | |
patch_reolution_end = random.randint( | |
range_start, | |
range_end, | |
) | |
# clip range | |
if patch_reolution_end <= patch_resolution: | |
patch_reolution_end = patch_resolution | |
elif patch_reolution_end > resolution: | |
patch_reolution_end = resolution | |
# patch_reolution_end = patch_reolution_start + patch_resolution | |
return patch_reolution_end # , patch_reolution_end | |
# h_start = sample_patch_range() | |
# assert fg_bbox is not None | |
if fg_bbox is not None and random.random( | |
) > 0.125: # only train foreground. Has 0.1 prob to sample/train background. | |
# if fg_bbox is not None: # only train foreground. Has 0.1 prob to sample/train background. | |
# only return one UV here | |
top_min, left_min = fg_bbox[:, :2].min(dim=0, | |
keepdim=True)[0][0] | |
height_max, width_max = fg_bbox[:, 2:].max(dim=0, | |
keepdim=True)[0][0] | |
if top_min + patch_resolution < height_max: | |
h_end = sample_patch_range_oversample_boundary( | |
top_min + patch_resolution, height_max) | |
else: | |
h_end = max( | |
height_max.to(torch.uint8).item(), patch_resolution) | |
if left_min + patch_resolution < width_max: | |
w_end = sample_patch_range_oversample_boundary( | |
left_min + patch_resolution, width_max) | |
else: | |
w_end = max( | |
width_max.to(torch.uint8).item(), patch_resolution) | |
h_start = h_end - patch_resolution | |
w_start = w_end - patch_resolution | |
try: | |
assert h_start >= 0 and w_start >= 0 | |
except: | |
st() | |
else: | |
h_end = sample_patch_range_oversample_boundary() | |
h_start = h_end - patch_resolution | |
w_end = sample_patch_range_oversample_boundary() | |
w_start = w_end - patch_resolution | |
assert h_start >= 0 and w_start >= 0 | |
uv = torch.stack( | |
torch.meshgrid( | |
torch.arange( | |
start=h_start, | |
# end=h_start+patch_resolution, | |
end=h_end, | |
dtype=torch.float32, | |
device=cam2world_matrix.device), | |
torch.arange( | |
start=w_start, | |
# end=w_start + patch_resolution, | |
end=w_end, | |
dtype=torch.float32, | |
device=cam2world_matrix.device), | |
indexing='ij')) * (1. / resolution) + (0.5 / resolution) | |
uv = uv.flip(0).reshape(2, -1).transpose(1, 0) # ij -> xy | |
return uv, (h_start, w_start, patch_resolution, patch_resolution | |
) # top: int, left: int, height: int, width: int | |
all_uv = [] | |
ray_bboxes = [] | |
for _ in range(cam2world_matrix.shape[0]): | |
uv, bbox = sample_patch_uv(fg_bbox) | |
all_uv.append(uv) | |
ray_bboxes.append(bbox) | |
all_uv = torch.stack(all_uv, 0) # B patch_res**2 2 | |
# ray_bboxes = torch.stack(ray_bboxes, 0) # B patch_res**2 2 | |
return all_uv, ray_bboxes | |
def create_uv(self, resolution, cam2world_matrix): | |
uv = torch.stack( | |
torch.meshgrid(torch.arange(resolution, | |
dtype=torch.float32, | |
device=cam2world_matrix.device), | |
torch.arange(resolution, | |
dtype=torch.float32, | |
device=cam2world_matrix.device), | |
indexing='ij')) * (1. / resolution) + (0.5 / | |
resolution) | |
uv = uv.flip(0).reshape(2, -1).transpose(1, 0) # why | |
uv = uv.unsqueeze(0).repeat(cam2world_matrix.shape[0], 1, 1) | |
return uv | |
def forward(self, cam2world_matrix, intrinsics, resolution, fg_mask=None): | |
""" | |
Create batches of rays and return origins and directions. | |
cam2world_matrix: (N, 4, 4) | |
intrinsics: (N, 3, 3) | |
resolution: int | |
ray_origins: (N, M, 3) | |
ray_dirs: (N, M, 2) | |
""" | |
N, M = cam2world_matrix.shape[0], resolution**2 | |
cam_locs_world = cam2world_matrix[:, :3, 3] | |
fx = intrinsics[:, 0, 0] | |
fy = intrinsics[:, 1, 1] | |
cx = intrinsics[:, 0, 2] | |
cy = intrinsics[:, 1, 2] | |
sk = intrinsics[:, 0, 1] | |
# uv = torch.stack( | |
# torch.meshgrid(torch.arange(resolution, | |
# dtype=torch.float32, | |
# device=cam2world_matrix.device), | |
# torch.arange(resolution, | |
# dtype=torch.float32, | |
# device=cam2world_matrix.device), | |
# indexing='ij')) * (1. / resolution) + (0.5 / | |
# resolution) | |
# uv = uv.flip(0).reshape(2, -1).transpose(1, 0) # why | |
# uv = uv.unsqueeze(0).repeat(cam2world_matrix.shape[0], 1, 1) | |
uv = self.create_uv( | |
resolution, | |
cam2world_matrix, | |
) | |
x_cam = uv[:, :, 0].view(N, -1) | |
y_cam = uv[:, :, 1].view(N, -1) # [0,1] range | |
z_cam = torch.ones((N, M), device=cam2world_matrix.device) | |
# basically torch.inverse(intrinsics) | |
x_lift = (x_cam - cx.unsqueeze(-1) + cy.unsqueeze(-1) * | |
sk.unsqueeze(-1) / fy.unsqueeze(-1) - sk.unsqueeze(-1) * | |
y_cam / fy.unsqueeze(-1)) / fx.unsqueeze(-1) * z_cam | |
y_lift = (y_cam - cy.unsqueeze(-1)) / fy.unsqueeze(-1) * z_cam | |
cam_rel_points = torch.stack( | |
(x_lift, y_lift, z_cam, torch.ones_like(z_cam)), dim=-1) | |
# st() | |
world_rel_points = torch.bmm(cam2world_matrix, | |
cam_rel_points.permute(0, 2, 1)).permute( | |
0, 2, 1)[:, :, :3] | |
ray_dirs = world_rel_points - cam_locs_world[:, None, :] | |
ray_dirs = torch.nn.functional.normalize(ray_dirs, dim=2) | |
ray_origins = cam_locs_world.unsqueeze(1).repeat( | |
1, ray_dirs.shape[1], 1) | |
return ray_origins, ray_dirs, None | |
class PatchRaySampler(RaySampler): | |
def forward(self, | |
cam2world_matrix, | |
intrinsics, | |
patch_resolution, | |
resolution, | |
fg_bbox=None): | |
""" | |
Create batches of rays and return origins and directions. | |
cam2world_matrix: (N, 4, 4) | |
intrinsics: (N, 3, 3) | |
resolution: int | |
ray_origins: (N, M, 3) | |
ray_dirs: (N, M, 2) | |
""" | |
N, M = cam2world_matrix.shape[0], patch_resolution**2 | |
cam_locs_world = cam2world_matrix[:, :3, 3] | |
fx = intrinsics[:, 0, 0] | |
fy = intrinsics[:, 1, 1] | |
cx = intrinsics[:, 0, 2] | |
cy = intrinsics[:, 1, 2] | |
sk = intrinsics[:, 0, 1] | |
# uv = self.create_uv(resolution, cam2world_matrix) | |
# all_uv, ray_bboxes = self.create_patch_uv( | |
all_uv_list = [] | |
ray_bboxes = [] | |
for idx in range(N): | |
uv, bboxes = self.create_patch_uv( | |
patch_resolution, resolution, cam2world_matrix[idx:idx + 1], | |
fg_bbox[idx:idx + 1] | |
if fg_bbox is not None else None) # for debugging, hard coded | |
all_uv_list.append( | |
uv | |
# cam2world_matrix[idx:idx+1], )[0] # for debugging, hard coded | |
) | |
ray_bboxes.extend(bboxes) | |
all_uv = torch.cat(all_uv_list, 0) | |
# ray_bboxes = torch.cat(ray_bboxes_list, 0) | |
# all_uv, _ = self.create_patch_uv( | |
# patch_resolution, resolution, | |
# cam2world_matrix, fg_bbox) # for debugging, hard coded | |
# st() | |
x_cam = all_uv[:, :, 0].view(N, -1) | |
y_cam = all_uv[:, :, 1].view(N, -1) # [0,1] range | |
z_cam = torch.ones((N, M), device=cam2world_matrix.device) | |
# basically torch.inverse(intrinsics) | |
x_lift = (x_cam - cx.unsqueeze(-1) + cy.unsqueeze(-1) * | |
sk.unsqueeze(-1) / fy.unsqueeze(-1) - sk.unsqueeze(-1) * | |
y_cam / fy.unsqueeze(-1)) / fx.unsqueeze(-1) * z_cam | |
y_lift = (y_cam - cy.unsqueeze(-1)) / fy.unsqueeze(-1) * z_cam | |
cam_rel_points = torch.stack( | |
(x_lift, y_lift, z_cam, torch.ones_like(z_cam)), dim=-1) | |
world_rel_points = torch.bmm(cam2world_matrix, | |
cam_rel_points.permute(0, 2, 1)).permute( | |
0, 2, 1)[:, :, :3] | |
ray_dirs = world_rel_points - cam_locs_world[:, None, :] | |
ray_dirs = torch.nn.functional.normalize(ray_dirs, dim=2) | |
ray_origins = cam_locs_world.unsqueeze(1).repeat( | |
1, ray_dirs.shape[1], 1) | |
return ray_origins, ray_dirs, ray_bboxes | |