import cv2 import math import scipy import numpy as np import torch import open3d as o3d from tqdm import tqdm from .camera_util import create_camera_to_world ############################################################################### # Camera Trajectory ############################################################################### def fibonacci_sampling_on_sphere(num_samples=1): points = [] phi = np.pi * (3.0 - np.sqrt(5.0)) # golden angle in radians for i in range(num_samples): y = 1 - (i / float(num_samples - 1)) * 2 # y goes from 1 to -1 radius = np.sqrt(1 - y * y) # radius at y theta = phi * i # golden angle increment x = np.cos(theta) * radius z = np.sin(theta) * radius points.append([x, y, z]) points = np.array(points) return points def get_fibonacci_cameras(N=20, radius=2.0, device='cuda'): def normalize_vecs(vectors): return vectors / (torch.norm(vectors, dim=-1, keepdim=True)) t = torch.linspace(0, 1, N).reshape(-1, 1) cam_pos = fibonacci_sampling_on_sphere(N) cam_pos = torch.from_numpy(cam_pos).float().to(device) cam_pos = cam_pos * radius forward_vector = normalize_vecs(-cam_pos) up_vector = torch.tensor([0, 0, 1], dtype=torch.float, device=device).reshape(-1).expand_as(forward_vector) right_vector = normalize_vecs(torch.cross(forward_vector, up_vector, dim=-1)) up_vector = normalize_vecs(torch.cross(right_vector, forward_vector, dim=-1)) rotate = torch.stack( (right_vector, -up_vector, forward_vector), dim=-1) rotation_matrix = torch.eye(4, device=device).unsqueeze(0).repeat(forward_vector.shape[0], 1, 1) rotation_matrix[:, :3, :3] = rotate translation_matrix = torch.eye(4, device=device).unsqueeze(0).repeat(forward_vector.shape[0], 1, 1) translation_matrix[:, :3, 3] = cam_pos cam2world = translation_matrix @ rotation_matrix return cam2world def get_circular_cameras(N=120, elevation=0, radius=2.0, normalize=True, device='cuda'): camera_positions = [] for i in range(N): azimuth = 2 * np.pi * i / N - np.pi / 2 x = radius * np.cos(elevation) * np.cos(azimuth) y = radius * np.cos(elevation) * np.sin(azimuth) z = radius * np.sin(elevation) camera_positions.append([x, y, z]) camera_positions = np.array(camera_positions) camera_positions = torch.from_numpy(camera_positions).float() c2ws = create_camera_to_world(camera_positions, camera_system='opencv') if normalize: c2ws_first = create_camera_to_world(torch.tensor([0, -2, 0]), camera_system='opencv').unsqueeze(0) c2ws = torch.linalg.inv(c2ws_first) @ c2ws return c2ws ############################################################################### # TSDF Fusion ############################################################################### def rgbd_to_mesh(images, depths, c2ws, fov, mesh_path, cam_elev_thr=0): voxel_length = 2 * 2.0 / 512.0 sdf_trunc = 2 * 0.02 color_type = o3d.pipelines.integration.TSDFVolumeColorType.RGB8 volume = o3d.pipelines.integration.ScalableTSDFVolume( voxel_length=voxel_length, sdf_trunc=sdf_trunc, color_type=color_type, ) for i in tqdm(range(c2ws.shape[0])): camera_to_world = c2ws[i] world_to_camera = np.linalg.inv(camera_to_world) camera_position = camera_to_world[:3, 3] # camera_elevation = np.rad2deg(np.arcsin(camera_position[2])) camera_elevation = np.rad2deg(np.arcsin(camera_position[2] / np.linalg.norm(camera_position))) if camera_elevation < cam_elev_thr: continue color_image = o3d.geometry.Image(np.ascontiguousarray(images[i])) depth_image = o3d.geometry.Image(np.ascontiguousarray(depths[i])) rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth( color_image, depth_image, depth_scale=1.0, depth_trunc=4.0, convert_rgb_to_intensity=False ) camera_intrinsics = o3d.camera.PinholeCameraIntrinsic() fx = fy = images[i].shape[1] / 2. / np.tan(np.deg2rad(fov / 2.0)) cx = cy = images[i].shape[1] / 2. h = images[i].shape[0] w = images[i].shape[1] camera_intrinsics.set_intrinsics( w, h, fx, fy, cx, cy ) volume.integrate( rgbd_image, camera_intrinsics, world_to_camera, ) fused_mesh = volume.extract_triangle_mesh() triangle_clusters, cluster_n_triangles, cluster_area = ( fused_mesh.cluster_connected_triangles()) triangle_clusters = np.asarray(triangle_clusters) cluster_n_triangles = np.asarray(cluster_n_triangles) cluster_area = np.asarray(cluster_area) triangles_to_remove = cluster_n_triangles[triangle_clusters] < 500 fused_mesh.remove_triangles_by_mask(triangles_to_remove) fused_mesh.remove_unreferenced_vertices() fused_mesh = fused_mesh.filter_smooth_simple(number_of_iterations=2) fused_mesh = fused_mesh.compute_vertex_normals() o3d.io.write_triangle_mesh(mesh_path, fused_mesh) ############################################################################### # Visualization ############################################################################### def viewmatrix(lookdir, up, position): """Construct lookat view matrix.""" vec2 = normalize(lookdir) vec0 = normalize(np.cross(up, vec2)) vec1 = normalize(np.cross(vec2, vec0)) m = np.stack([vec0, vec1, vec2, position], axis=1) return m def normalize(x): """Normalization helper function.""" return x / np.linalg.norm(x) def generate_interpolated_path(poses, n_interp, spline_degree=5, smoothness=.03, rot_weight=.1): """Creates a smooth spline path between input keyframe camera poses. Spline is calculated with poses in format (position, lookat-point, up-point). Args: poses: (n, 3, 4) array of input pose keyframes. n_interp: returned path will have n_interp * (n - 1) total poses. spline_degree: polynomial degree of B-spline. smoothness: parameter for spline smoothing, 0 forces exact interpolation. rot_weight: relative weighting of rotation/translation in spline solve. Returns: Array of new camera poses with shape (n_interp * (n - 1), 3, 4). """ def poses_to_points(poses, dist): """Converts from pose matrices to (position, lookat, up) format.""" pos = poses[:, :3, -1] lookat = poses[:, :3, -1] - dist * poses[:, :3, 2] up = poses[:, :3, -1] + dist * poses[:, :3, 1] return np.stack([pos, lookat, up], 1) def points_to_poses(points): """Converts from (position, lookat, up) format to pose matrices.""" return np.array([viewmatrix(p - l, u - p, p) for p, l, u in points]) def interp(points, n, k, s): """Runs multidimensional B-spline interpolation on the input points.""" sh = points.shape pts = np.reshape(points, (sh[0], -1)) k = min(k, sh[0] - 1) tck, _ = scipy.interpolate.splprep(pts.T, k=k, s=s) u = np.linspace(0, 1, n, endpoint=False) new_points = np.array(scipy.interpolate.splev(u, tck)) new_points = np.reshape(new_points.T, (n, sh[1], sh[2])) return new_points points = poses_to_points(poses, dist=rot_weight) new_points = interp(points, n_interp * (points.shape[0] - 1), k=spline_degree, s=smoothness) return points_to_poses(new_points) ############################################################################### # Camera Estimation ############################################################################### def xy_grid(W, H, device=None, origin=(0, 0), unsqueeze=None, cat_dim=-1, homogeneous=False, **arange_kw): """ Output a (H,W,2) array of int32 with output[j,i,0] = i + origin[0] output[j,i,1] = j + origin[1] """ if device is None: # numpy arange, meshgrid, stack, ones = np.arange, np.meshgrid, np.stack, np.ones else: # torch arange = lambda *a, **kw: torch.arange(*a, device=device, **kw) meshgrid, stack = torch.meshgrid, torch.stack ones = lambda *a: torch.ones(*a, device=device) tw, th = [arange(o, o + s, **arange_kw) for s, o in zip((W, H), origin)] grid = meshgrid(tw, th, indexing='xy') if homogeneous: grid = grid + (ones((H, W)),) if unsqueeze is not None: grid = (grid[0].unsqueeze(unsqueeze), grid[1].unsqueeze(unsqueeze)) if cat_dim is not None: grid = stack(grid, cat_dim) return grid def estimate_focal(pts3d, pp=None, mask=None, min_focal=0., max_focal=np.inf): """ Reprojection method, for when the absolute depth is known: 1) estimate the camera focal using a robust estimator 2) reproject points onto true rays, minimizing a certain error """ H, W, THREE = pts3d.shape assert THREE == 3 if pp is None: pp = torch.tensor([W/2, H/2]).to(pts3d) # centered pixel grid pixels = xy_grid(W, H, device=pts3d.device).view(-1, 2) - pp.view(1, 2) # (HW, 2) pts3d = pts3d.view(H*W, 3).contiguous() # (HW, 3) # mask points if provided if mask is not None: mask = mask.to(pts3d.device).ravel().bool() assert len(mask) == pts3d.shape[0] pts3d = pts3d[mask] pixels = pixels[mask] # weiszfeld # init focal with l2 closed form # we try to find focal = argmin Sum | pixel - focal * (x,y)/z| xy_over_z = (pts3d[..., :2] / pts3d[..., 2:3]).nan_to_num(posinf=0, neginf=0) # homogeneous (x,y,1) dot_xy_px = (xy_over_z * pixels).sum(dim=-1) dot_xy_xy = xy_over_z.square().sum(dim=-1) focal = dot_xy_px.mean(dim=0) / dot_xy_xy.mean(dim=0) # iterative re-weighted least-squares for iter in range(10): # re-weighting by inverse of distance dis = (pixels - focal.view(-1, 1) * xy_over_z).norm(dim=-1) # print(dis.nanmean(-1)) w = dis.clip(min=1e-8).reciprocal() # update the scaling with the new weights focal = (w * dot_xy_px).mean(dim=0) / (w * dot_xy_xy).mean(dim=0) focal_base = max(H, W) / (2 * np.tan(np.deg2rad(60) / 2)) # size / 1.1547005383792515 focal = focal.clip(min=min_focal*focal_base, max=max_focal*focal_base) return focal.ravel() def fast_pnp(pts3d, mask, focal=None, pp=None, niter_PnP=10): """ Estimate camera poses and focals with RANSAC-PnP. Inputs: pts3d: H x W x 3 focal: 1 mask: H x W pp """ H, W, _ = pts3d.shape pixels = np.mgrid[:W, :H].T.astype(float) if focal is None: S = max(W, H) tentative_focals = np.geomspace(S/2, S*3, 21) else: tentative_focals = [focal] if pp is None: pp = (W/2, H/2) best = 0, for focal in tentative_focals: K = np.float32([(focal, 0, pp[0]), (0, focal, pp[1]), (0, 0, 1)]) success, R, T, inliers = cv2.solvePnPRansac(pts3d[mask], pixels[mask], K, None, iterationsCount=niter_PnP, reprojectionError=5, flags=cv2.SOLVEPNP_SQPNP) if not success: continue score = len(inliers) if success and score > best[0]: best = score, R, T, focal if not best[0]: return None _, R, T, best_focal = best R = cv2.Rodrigues(R)[0] # world to cam world2cam = np.eye(4).astype(float) world2cam[:3, :3] = R world2cam[:3, 3] = T.reshape(3) cam2world = np.linalg.inv(world2cam) return best_focal, cam2world