Spaces:
Runtime error
Runtime error
import copy | |
import json | |
import math | |
import os | |
import pathlib | |
from typing import Any, Callable, List, Optional, Text, Tuple, Union | |
import numpy as np | |
import scipy.signal | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from torch import Tensor | |
PRNGKey = Any | |
Shape = Tuple[int] | |
Dtype = Any # this could be a real type? | |
Array = Any | |
Activation = Callable[[Array], Array] | |
Initializer = Callable[[PRNGKey, Shape, Dtype], Array] | |
Normalizer = Callable[[], Callable[[Array], Array]] | |
PathType = Union[Text, pathlib.PurePosixPath] | |
from pathlib import PurePosixPath as GPath | |
def _compute_residual_and_jacobian( | |
x: np.ndarray, | |
y: np.ndarray, | |
xd: np.ndarray, | |
yd: np.ndarray, | |
k1: float = 0.0, | |
k2: float = 0.0, | |
k3: float = 0.0, | |
p1: float = 0.0, | |
p2: float = 0.0, | |
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, | |
np.ndarray]: | |
"""Auxiliary function of radial_and_tangential_undistort().""" | |
r = x * x + y * y | |
d = 1.0 + r * (k1 + r * (k2 + k3 * r)) | |
fx = d * x + 2 * p1 * x * y + p2 * (r + 2 * x * x) - xd | |
fy = d * y + 2 * p2 * x * y + p1 * (r + 2 * y * y) - yd | |
# Compute derivative of d over [x, y] | |
d_r = (k1 + r * (2.0 * k2 + 3.0 * k3 * r)) | |
d_x = 2.0 * x * d_r | |
d_y = 2.0 * y * d_r | |
# Compute derivative of fx over x and y. | |
fx_x = d + d_x * x + 2.0 * p1 * y + 6.0 * p2 * x | |
fx_y = d_y * x + 2.0 * p1 * x + 2.0 * p2 * y | |
# Compute derivative of fy over x and y. | |
fy_x = d_x * y + 2.0 * p2 * y + 2.0 * p1 * x | |
fy_y = d + d_y * y + 2.0 * p2 * x + 6.0 * p1 * y | |
return fx, fy, fx_x, fx_y, fy_x, fy_y | |
def _radial_and_tangential_undistort( | |
xd: np.ndarray, | |
yd: np.ndarray, | |
k1: float = 0, | |
k2: float = 0, | |
k3: float = 0, | |
p1: float = 0, | |
p2: float = 0, | |
eps: float = 1e-9, | |
max_iterations=10) -> Tuple[np.ndarray, np.ndarray]: | |
"""Computes undistorted (x, y) from (xd, yd).""" | |
# Initialize from the distorted point. | |
x = xd.copy() | |
y = yd.copy() | |
for _ in range(max_iterations): | |
fx, fy, fx_x, fx_y, fy_x, fy_y = _compute_residual_and_jacobian( | |
x=x, y=y, xd=xd, yd=yd, k1=k1, k2=k2, k3=k3, p1=p1, p2=p2) | |
denominator = fy_x * fx_y - fx_x * fy_y | |
x_numerator = fx * fy_y - fy * fx_y | |
y_numerator = fy * fx_x - fx * fy_x | |
step_x = np.where( | |
np.abs(denominator) > eps, x_numerator / denominator, | |
np.zeros_like(denominator)) | |
step_y = np.where( | |
np.abs(denominator) > eps, y_numerator / denominator, | |
np.zeros_like(denominator)) | |
x = x + step_x | |
y = y + step_y | |
return x, y | |
class Camera: | |
"""Class to handle camera geometry.""" | |
def __init__(self, | |
orientation: np.ndarray, | |
position: np.ndarray, | |
focal_length: Union[np.ndarray, float], | |
principal_point: np.ndarray, | |
image_size: np.ndarray, | |
skew: Union[np.ndarray, float] = 0.0, | |
pixel_aspect_ratio: Union[np.ndarray, float] = 1.0, | |
radial_distortion: Optional[np.ndarray] = None, | |
tangential_distortion: Optional[np.ndarray] = None, | |
dtype=np.float32): | |
"""Constructor for camera class.""" | |
if radial_distortion is None: | |
radial_distortion = np.array([0.0, 0.0, 0.0], dtype) | |
if tangential_distortion is None: | |
tangential_distortion = np.array([0.0, 0.0], dtype) | |
self.orientation = np.array(orientation, dtype) | |
self.position = np.array(position, dtype) | |
self.focal_length = np.array(focal_length, dtype) | |
self.principal_point = np.array(principal_point, dtype) | |
self.skew = np.array(skew, dtype) | |
self.pixel_aspect_ratio = np.array(pixel_aspect_ratio, dtype) | |
self.radial_distortion = np.array(radial_distortion, dtype) | |
self.tangential_distortion = np.array(tangential_distortion, dtype) | |
self.image_size = np.array(image_size, np.uint32) | |
self.dtype = dtype | |
def from_json(cls, path: PathType): | |
"""Loads a JSON camera into memory.""" | |
path = GPath(path) | |
# with path.open('r') as fp: | |
with open(path, 'r') as fp: | |
camera_json = json.load(fp) | |
# Fix old camera JSON. | |
if 'tangential' in camera_json: | |
camera_json['tangential_distortion'] = camera_json['tangential'] | |
return cls( | |
orientation=np.asarray(camera_json['orientation']), | |
position=np.asarray(camera_json['position']), | |
focal_length=camera_json['focal_length'], | |
principal_point=np.asarray(camera_json['principal_point']), | |
skew=camera_json['skew'], | |
pixel_aspect_ratio=camera_json['pixel_aspect_ratio'], | |
radial_distortion=np.asarray(camera_json['radial_distortion']), | |
tangential_distortion=np.asarray(camera_json['tangential_distortion']), | |
image_size=np.asarray(camera_json['image_size']), | |
) | |
def to_json(self): | |
return { | |
k: (v.tolist() if hasattr(v, 'tolist') else v) | |
for k, v in self.get_parameters().items() | |
} | |
def get_parameters(self): | |
return { | |
'orientation': self.orientation, | |
'position': self.position, | |
'focal_length': self.focal_length, | |
'principal_point': self.principal_point, | |
'skew': self.skew, | |
'pixel_aspect_ratio': self.pixel_aspect_ratio, | |
'radial_distortion': self.radial_distortion, | |
'tangential_distortion': self.tangential_distortion, | |
'image_size': self.image_size, | |
} | |
def scale_factor_x(self): | |
return self.focal_length | |
def scale_factor_y(self): | |
return self.focal_length * self.pixel_aspect_ratio | |
def principal_point_x(self): | |
return self.principal_point[0] | |
def principal_point_y(self): | |
return self.principal_point[1] | |
def has_tangential_distortion(self): | |
return any(self.tangential_distortion != 0.0) | |
def has_radial_distortion(self): | |
return any(self.radial_distortion != 0.0) | |
def image_size_y(self): | |
return self.image_size[1] | |
def image_size_x(self): | |
return self.image_size[0] | |
def image_shape(self): | |
return self.image_size_y, self.image_size_x | |
def optical_axis(self): | |
return self.orientation[2, :] | |
def translation(self): | |
return -np.matmul(self.orientation, self.position) | |
def pixel_to_local_rays(self, pixels: np.ndarray): | |
"""Returns the local ray directions for the provided pixels.""" | |
y = ((pixels[..., 1] - self.principal_point_y) / self.scale_factor_y) | |
x = ((pixels[..., 0] - self.principal_point_x - y * self.skew) / | |
self.scale_factor_x) | |
if self.has_radial_distortion or self.has_tangential_distortion: | |
x, y = _radial_and_tangential_undistort( | |
x, | |
y, | |
k1=self.radial_distortion[0], | |
k2=self.radial_distortion[1], | |
k3=self.radial_distortion[2], | |
p1=self.tangential_distortion[0], | |
p2=self.tangential_distortion[1]) | |
dirs = np.stack([x, y, np.ones_like(x)], axis=-1) | |
return dirs / np.linalg.norm(dirs, axis=-1, keepdims=True) | |
def pixels_to_rays(self, pixels: np.ndarray) -> np.ndarray: | |
"""Returns the rays for the provided pixels. | |
Args: | |
pixels: [A1, ..., An, 2] tensor or np.array containing 2d pixel positions. | |
Returns: | |
An array containing the normalized ray directions in world coordinates. | |
""" | |
if pixels.shape[-1] != 2: | |
raise ValueError('The last dimension of pixels must be 2.') | |
if pixels.dtype != self.dtype: | |
raise ValueError(f'pixels dtype ({pixels.dtype!r}) must match camera ' | |
f'dtype ({self.dtype!r})') | |
batch_shape = pixels.shape[:-1] | |
pixels = np.reshape(pixels, (-1, 2)) | |
local_rays_dir = self.pixel_to_local_rays(pixels) | |
rays_dir = np.matmul(self.orientation.T, local_rays_dir[..., np.newaxis]) | |
rays_dir = np.squeeze(rays_dir, axis=-1) | |
# Normalize rays. | |
rays_dir /= np.linalg.norm(rays_dir, axis=-1, keepdims=True) | |
rays_dir = rays_dir.reshape((*batch_shape, 3)) | |
return rays_dir | |
def pixels_to_points(self, pixels: np.ndarray, depth: np.ndarray): | |
rays_through_pixels = self.pixels_to_rays(pixels) | |
cosa = np.matmul(rays_through_pixels, self.optical_axis) | |
points = ( | |
rays_through_pixels * depth[..., np.newaxis] / cosa[..., np.newaxis] + | |
self.position) | |
return points | |
def points_to_local_points(self, points: np.ndarray): | |
translated_points = points - self.position | |
local_points = (np.matmul(self.orientation, translated_points.T)).T | |
return local_points | |
def project(self, points: np.ndarray): | |
"""Projects a 3D point (x,y,z) to a pixel position (x,y).""" | |
batch_shape = points.shape[:-1] | |
points = points.reshape((-1, 3)) | |
local_points = self.points_to_local_points(points) | |
# Get normalized local pixel positions. | |
x = local_points[..., 0] / local_points[..., 2] | |
y = local_points[..., 1] / local_points[..., 2] | |
r2 = x**2 + y**2 | |
# Apply radial distortion. | |
distortion = 1.0 + r2 * ( | |
self.radial_distortion[0] + r2 * | |
(self.radial_distortion[1] + self.radial_distortion[2] * r2)) | |
# Apply tangential distortion. | |
x_times_y = x * y | |
x = ( | |
x * distortion + 2.0 * self.tangential_distortion[0] * x_times_y + | |
self.tangential_distortion[1] * (r2 + 2.0 * x**2)) | |
y = ( | |
y * distortion + 2.0 * self.tangential_distortion[1] * x_times_y + | |
self.tangential_distortion[0] * (r2 + 2.0 * y**2)) | |
# Map the distorted ray to the image plane and return the depth. | |
pixel_x = self.focal_length * x + self.skew * y + self.principal_point_x | |
pixel_y = (self.focal_length * self.pixel_aspect_ratio * y | |
+ self.principal_point_y) | |
pixels = np.stack([pixel_x, pixel_y], axis=-1) | |
return pixels.reshape((*batch_shape, 2)) | |
def get_pixel_centers(self): | |
"""Returns the pixel centers.""" | |
xx, yy = np.meshgrid(np.arange(self.image_size_x, dtype=self.dtype), | |
np.arange(self.image_size_y, dtype=self.dtype)) | |
return np.stack([xx, yy], axis=-1) + 0.5 | |
def scale(self, scale: float): | |
"""Scales the camera.""" | |
if scale <= 0: | |
raise ValueError('scale needs to be positive.') | |
new_camera = Camera( | |
orientation=self.orientation.copy(), | |
position=self.position.copy(), | |
focal_length=self.focal_length * scale, | |
principal_point=self.principal_point.copy() * scale, | |
skew=self.skew, | |
pixel_aspect_ratio=self.pixel_aspect_ratio, | |
radial_distortion=self.radial_distortion.copy(), | |
tangential_distortion=self.tangential_distortion.copy(), | |
image_size=np.array((int(round(self.image_size[0] * scale)), | |
int(round(self.image_size[1] * scale)))), | |
) | |
return new_camera | |
def look_at(self, position, look_at, up, eps=1e-6): | |
"""Creates a copy of the camera which looks at a given point. | |
Copies the provided vision_sfm camera and returns a new camera that is | |
positioned at `camera_position` while looking at `look_at_position`. | |
Camera intrinsics are copied by this method. A common value for the | |
up_vector is (0, 1, 0). | |
Args: | |
position: A (3,) numpy array representing the position of the camera. | |
look_at: A (3,) numpy array representing the location the camera | |
looks at. | |
up: A (3,) numpy array representing the up direction, whose | |
projection is parallel to the y-axis of the image plane. | |
eps: a small number to prevent divides by zero. | |
Returns: | |
A new camera that is copied from the original but is positioned and | |
looks at the provided coordinates. | |
Raises: | |
ValueError: If the camera position and look at position are very close | |
to each other or if the up-vector is parallel to the requested optical | |
axis. | |
""" | |
look_at_camera = self.copy() | |
optical_axis = look_at - position | |
norm = np.linalg.norm(optical_axis) | |
if norm < eps: | |
raise ValueError('The camera center and look at position are too close.') | |
optical_axis /= norm | |
right_vector = np.cross(optical_axis, up) | |
norm = np.linalg.norm(right_vector) | |
if norm < eps: | |
raise ValueError('The up-vector is parallel to the optical axis.') | |
right_vector /= norm | |
# The three directions here are orthogonal to each other and form a right | |
# handed coordinate system. | |
camera_rotation = np.identity(3) | |
camera_rotation[0, :] = right_vector | |
camera_rotation[1, :] = np.cross(optical_axis, right_vector) | |
camera_rotation[2, :] = optical_axis | |
look_at_camera.position = position | |
look_at_camera.orientation = camera_rotation | |
return look_at_camera | |
def crop_image_domain( | |
self, left: int = 0, right: int = 0, top: int = 0, bottom: int = 0): | |
"""Returns a copy of the camera with adjusted image bounds. | |
Args: | |
left: number of pixels by which to reduce (or augment, if negative) the | |
image domain at the associated boundary. | |
right: likewise. | |
top: likewise. | |
bottom: likewise. | |
The crop parameters may not cause the camera image domain dimensions to | |
become non-positive. | |
Returns: | |
A camera with adjusted image dimensions. The focal length is unchanged, | |
and the principal point is updated to preserve the original principal | |
axis. | |
""" | |
crop_left_top = np.array([left, top]) | |
crop_right_bottom = np.array([right, bottom]) | |
new_resolution = self.image_size - crop_left_top - crop_right_bottom | |
new_principal_point = self.principal_point - crop_left_top | |
if np.any(new_resolution <= 0): | |
raise ValueError('Crop would result in non-positive image dimensions.') | |
new_camera = self.copy() | |
new_camera.image_size = np.array([int(new_resolution[0]), | |
int(new_resolution[1])]) | |
new_camera.principal_point = np.array([new_principal_point[0], | |
new_principal_point[1]]) | |
return new_camera | |
def copy(self): | |
return copy.deepcopy(self) | |
''' Misc | |
''' | |
mse2psnr = lambda x : -10. * torch.log10(x) | |
to8b = lambda x : (255*np.clip(x,0,1)).astype(np.uint8) | |
''' Checkpoint utils | |
''' |