NIRVANALAN
init
11e6f7b
raw
history blame
13.7 kB
# SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: LicenseRef-NvidiaProprietary
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
"""
The ray sampler is a module that takes in camera matrices and resolution and batches of rays.
Expects cam2world matrices that use the OpenCV camera coordinate system conventions.
"""
import torch
from pdb import set_trace as st
import random
HUGE_NUMBER = 1e10
TINY_NUMBER = 1e-6 # float32 only has 7 decimal digits precision
######################################################################################
# wrapper to simplify the use of nerfnet
######################################################################################
# https://github.com/Kai-46/nerfplusplus/blob/ebf2f3e75fd6c5dfc8c9d0b533800daaf17bd95f/ddp_model.py#L16
def depth2pts_outside(ray_o, ray_d, depth):
'''
ray_o, ray_d: [..., 3]
depth: [...]; inverse of distance to sphere origin
'''
# note: d1 becomes negative if this mid point is behind camera
d1 = -torch.sum(ray_d * ray_o, dim=-1) / torch.sum(ray_d * ray_d, dim=-1)
p_mid = ray_o + d1.unsqueeze(-1) * ray_d
p_mid_norm = torch.norm(p_mid, dim=-1)
ray_d_cos = 1. / torch.norm(ray_d, dim=-1)
d2 = torch.sqrt(1. - p_mid_norm * p_mid_norm) * ray_d_cos
p_sphere = ray_o + (d1 + d2).unsqueeze(-1) * ray_d
rot_axis = torch.cross(ray_o, p_sphere, dim=-1)
rot_axis = rot_axis / torch.norm(rot_axis, dim=-1, keepdim=True)
phi = torch.asin(p_mid_norm)
theta = torch.asin(p_mid_norm * depth) # depth is inside [0, 1]
rot_angle = (phi - theta).unsqueeze(-1) # [..., 1]
# now rotate p_sphere
# Rodrigues formula: https://en.wikipedia.org/wiki/Rodrigues%27_rotation_formula
p_sphere_new = p_sphere * torch.cos(rot_angle) + \
torch.cross(rot_axis, p_sphere, dim=-1) * torch.sin(rot_angle) + \
rot_axis * torch.sum(rot_axis*p_sphere, dim=-1, keepdim=True) * (1.-torch.cos(rot_angle))
p_sphere_new = p_sphere_new / torch.norm(
p_sphere_new, dim=-1, keepdim=True)
pts = torch.cat((p_sphere_new, depth.unsqueeze(-1)), dim=-1)
# now calculate conventional depth
depth_real = 1. / (depth + TINY_NUMBER) * torch.cos(theta) * ray_d_cos + d1
return pts, depth_real
class RaySampler(torch.nn.Module):
def __init__(self):
super().__init__()
self.ray_origins_h, self.ray_directions, self.depths, self.image_coords, self.rendering_options = None, None, None, None, None
def create_patch_uv(self,
patch_resolution,
resolution,
cam2world_matrix,
fg_bbox=None):
def sample_patch_uv(fg_bbox=None):
assert patch_resolution <= resolution
def sample_patch_range():
patch_reolution_start = random.randint(
0, resolution -
patch_resolution) # alias for randrange(start, stop+1)
# patch_reolution_end = patch_reolution_start + patch_resolution
return patch_reolution_start # , patch_reolution_end
def sample_patch_range_oversample_boundary(range_start=None,
range_end=None):
# left down corner undersampled
if range_start is None:
# range_start = patch_resolution // 2
range_start = patch_resolution
if range_end is None:
# range_end = resolution + patch_resolution // 2
range_end = resolution + patch_resolution
# oversample the boundary
patch_reolution_end = random.randint(
range_start,
range_end,
)
# clip range
if patch_reolution_end <= patch_resolution:
patch_reolution_end = patch_resolution
elif patch_reolution_end > resolution:
patch_reolution_end = resolution
# patch_reolution_end = patch_reolution_start + patch_resolution
return patch_reolution_end # , patch_reolution_end
# h_start = sample_patch_range()
# assert fg_bbox is not None
if fg_bbox is not None and random.random(
) > 0.125: # only train foreground. Has 0.1 prob to sample/train background.
# if fg_bbox is not None: # only train foreground. Has 0.1 prob to sample/train background.
# only return one UV here
top_min, left_min = fg_bbox[:, :2].min(dim=0,
keepdim=True)[0][0]
height_max, width_max = fg_bbox[:, 2:].max(dim=0,
keepdim=True)[0][0]
if top_min + patch_resolution < height_max:
h_end = sample_patch_range_oversample_boundary(
top_min + patch_resolution, height_max)
else:
h_end = max(
height_max.to(torch.uint8).item(), patch_resolution)
if left_min + patch_resolution < width_max:
w_end = sample_patch_range_oversample_boundary(
left_min + patch_resolution, width_max)
else:
w_end = max(
width_max.to(torch.uint8).item(), patch_resolution)
h_start = h_end - patch_resolution
w_start = w_end - patch_resolution
try:
assert h_start >= 0 and w_start >= 0
except:
st()
else:
h_end = sample_patch_range_oversample_boundary()
h_start = h_end - patch_resolution
w_end = sample_patch_range_oversample_boundary()
w_start = w_end - patch_resolution
assert h_start >= 0 and w_start >= 0
uv = torch.stack(
torch.meshgrid(
torch.arange(
start=h_start,
# end=h_start+patch_resolution,
end=h_end,
dtype=torch.float32,
device=cam2world_matrix.device),
torch.arange(
start=w_start,
# end=w_start + patch_resolution,
end=w_end,
dtype=torch.float32,
device=cam2world_matrix.device),
indexing='ij')) * (1. / resolution) + (0.5 / resolution)
uv = uv.flip(0).reshape(2, -1).transpose(1, 0) # ij -> xy
return uv, (h_start, w_start, patch_resolution, patch_resolution
) # top: int, left: int, height: int, width: int
all_uv = []
ray_bboxes = []
for _ in range(cam2world_matrix.shape[0]):
uv, bbox = sample_patch_uv(fg_bbox)
all_uv.append(uv)
ray_bboxes.append(bbox)
all_uv = torch.stack(all_uv, 0) # B patch_res**2 2
# ray_bboxes = torch.stack(ray_bboxes, 0) # B patch_res**2 2
return all_uv, ray_bboxes
def create_uv(self, resolution, cam2world_matrix):
uv = torch.stack(
torch.meshgrid(torch.arange(resolution,
dtype=torch.float32,
device=cam2world_matrix.device),
torch.arange(resolution,
dtype=torch.float32,
device=cam2world_matrix.device),
indexing='ij')) * (1. / resolution) + (0.5 /
resolution)
uv = uv.flip(0).reshape(2, -1).transpose(1, 0) # why
uv = uv.unsqueeze(0).repeat(cam2world_matrix.shape[0], 1, 1)
return uv
def forward(self, cam2world_matrix, intrinsics, resolution, fg_mask=None):
"""
Create batches of rays and return origins and directions.
cam2world_matrix: (N, 4, 4)
intrinsics: (N, 3, 3)
resolution: int
ray_origins: (N, M, 3)
ray_dirs: (N, M, 2)
"""
N, M = cam2world_matrix.shape[0], resolution**2
cam_locs_world = cam2world_matrix[:, :3, 3]
fx = intrinsics[:, 0, 0]
fy = intrinsics[:, 1, 1]
cx = intrinsics[:, 0, 2]
cy = intrinsics[:, 1, 2]
sk = intrinsics[:, 0, 1]
# uv = torch.stack(
# torch.meshgrid(torch.arange(resolution,
# dtype=torch.float32,
# device=cam2world_matrix.device),
# torch.arange(resolution,
# dtype=torch.float32,
# device=cam2world_matrix.device),
# indexing='ij')) * (1. / resolution) + (0.5 /
# resolution)
# uv = uv.flip(0).reshape(2, -1).transpose(1, 0) # why
# uv = uv.unsqueeze(0).repeat(cam2world_matrix.shape[0], 1, 1)
uv = self.create_uv(
resolution,
cam2world_matrix,
)
x_cam = uv[:, :, 0].view(N, -1)
y_cam = uv[:, :, 1].view(N, -1) # [0,1] range
z_cam = torch.ones((N, M), device=cam2world_matrix.device)
# basically torch.inverse(intrinsics)
x_lift = (x_cam - cx.unsqueeze(-1) + cy.unsqueeze(-1) *
sk.unsqueeze(-1) / fy.unsqueeze(-1) - sk.unsqueeze(-1) *
y_cam / fy.unsqueeze(-1)) / fx.unsqueeze(-1) * z_cam
y_lift = (y_cam - cy.unsqueeze(-1)) / fy.unsqueeze(-1) * z_cam
cam_rel_points = torch.stack(
(x_lift, y_lift, z_cam, torch.ones_like(z_cam)), dim=-1)
# st()
world_rel_points = torch.bmm(cam2world_matrix,
cam_rel_points.permute(0, 2, 1)).permute(
0, 2, 1)[:, :, :3]
ray_dirs = world_rel_points - cam_locs_world[:, None, :]
ray_dirs = torch.nn.functional.normalize(ray_dirs, dim=2)
ray_origins = cam_locs_world.unsqueeze(1).repeat(
1, ray_dirs.shape[1], 1)
return ray_origins, ray_dirs, None
class PatchRaySampler(RaySampler):
def forward(self,
cam2world_matrix,
intrinsics,
patch_resolution,
resolution,
fg_bbox=None):
"""
Create batches of rays and return origins and directions.
cam2world_matrix: (N, 4, 4)
intrinsics: (N, 3, 3)
resolution: int
ray_origins: (N, M, 3)
ray_dirs: (N, M, 2)
"""
N, M = cam2world_matrix.shape[0], patch_resolution**2
cam_locs_world = cam2world_matrix[:, :3, 3]
fx = intrinsics[:, 0, 0]
fy = intrinsics[:, 1, 1]
cx = intrinsics[:, 0, 2]
cy = intrinsics[:, 1, 2]
sk = intrinsics[:, 0, 1]
# uv = self.create_uv(resolution, cam2world_matrix)
# all_uv, ray_bboxes = self.create_patch_uv(
all_uv_list = []
ray_bboxes = []
for idx in range(N):
uv, bboxes = self.create_patch_uv(
patch_resolution, resolution, cam2world_matrix[idx:idx + 1],
fg_bbox[idx:idx + 1]
if fg_bbox is not None else None) # for debugging, hard coded
all_uv_list.append(
uv
# cam2world_matrix[idx:idx+1], )[0] # for debugging, hard coded
)
ray_bboxes.extend(bboxes)
all_uv = torch.cat(all_uv_list, 0)
# ray_bboxes = torch.cat(ray_bboxes_list, 0)
# all_uv, _ = self.create_patch_uv(
# patch_resolution, resolution,
# cam2world_matrix, fg_bbox) # for debugging, hard coded
# st()
x_cam = all_uv[:, :, 0].view(N, -1)
y_cam = all_uv[:, :, 1].view(N, -1) # [0,1] range
z_cam = torch.ones((N, M), device=cam2world_matrix.device)
# basically torch.inverse(intrinsics)
x_lift = (x_cam - cx.unsqueeze(-1) + cy.unsqueeze(-1) *
sk.unsqueeze(-1) / fy.unsqueeze(-1) - sk.unsqueeze(-1) *
y_cam / fy.unsqueeze(-1)) / fx.unsqueeze(-1) * z_cam
y_lift = (y_cam - cy.unsqueeze(-1)) / fy.unsqueeze(-1) * z_cam
cam_rel_points = torch.stack(
(x_lift, y_lift, z_cam, torch.ones_like(z_cam)), dim=-1)
world_rel_points = torch.bmm(cam2world_matrix,
cam_rel_points.permute(0, 2, 1)).permute(
0, 2, 1)[:, :, :3]
ray_dirs = world_rel_points - cam_locs_world[:, None, :]
ray_dirs = torch.nn.functional.normalize(ray_dirs, dim=2)
ray_origins = cam_locs_world.unsqueeze(1).repeat(
1, ray_dirs.shape[1], 1)
return ray_origins, ray_dirs, ray_bboxes