Spaces:
Running
on
Zero
Running
on
Zero
import torch | |
import numpy as np | |
def normalize_vecs(vectors: torch.Tensor) -> torch.Tensor: | |
""" | |
Normalize vector lengths. | |
""" | |
return vectors / (torch.norm(vectors, dim=-1, keepdim=True)) | |
def blender_to_opencv(camera_matrix: torch.Tensor): | |
""" | |
Convert Blender World-to-Camera matrix into OpenCV space by flipping y and z axes | |
Blender camera system: x-right, y-up, z-backward | |
OpenCV camera system: x-right, y-down, z-forward | |
""" | |
flip_yz = torch.tensor([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]) | |
if camera_matrix.ndim == 3: | |
flip_yz = flip_yz.unsqueeze(0) | |
camera_matrix_opencv = torch.matmul(flip_yz.to(camera_matrix), camera_matrix) | |
return camera_matrix_opencv | |
def pad_camera_extrinsics_4x4(extrinsics): | |
if extrinsics.shape[-2] == 4: | |
return extrinsics | |
padding = torch.tensor([[0, 0, 0, 1]]).to(extrinsics) | |
if extrinsics.ndim == 3: | |
padding = padding.unsqueeze(0).repeat(extrinsics.shape[0], 1, 1) | |
extrinsics = torch.cat([extrinsics, padding], dim=-2) | |
return extrinsics | |
def create_camera_to_world(camera_position: torch.Tensor, look_at: torch.Tensor = None, up_world: torch.Tensor = None, camera_system: str = 'opencv'): | |
""" | |
Create OpenCV or OpenGL camera extrinsics from camera locations and look-at position. | |
camera_position: (M, 3) or (3,) | |
look_at: (3) | |
up_world: (3) | |
return: (M, 3, 4) or (3, 4) | |
""" | |
# by default, looking at the origin and world up is z-axis | |
if look_at is None: | |
look_at = torch.tensor([0, 0, 0], dtype=torch.float32) | |
if up_world is None: | |
up_world = torch.tensor([0, 0, 1], dtype=torch.float32) | |
if camera_position.ndim == 2: | |
look_at = look_at.unsqueeze(0).repeat(camera_position.shape[0], 1) | |
up_world = up_world.unsqueeze(0).repeat(camera_position.shape[0], 1) | |
assert camera_system in ['opencv', 'opengl'] | |
if camera_system == 'opencv': | |
# OpenCV camera: z-forward, x-right, y-down | |
z_axis = look_at - camera_position | |
z_axis = normalize_vecs(z_axis).float() | |
x_axis = torch.cross(z_axis, up_world) | |
x_axis = normalize_vecs(x_axis).float() | |
y_axis = torch.cross(z_axis, x_axis) | |
y_axis = normalize_vecs(y_axis).float() | |
else: | |
# OpenGL camera: z-backward, x-right, y-up | |
z_axis = camera_position - look_at | |
z_axis = normalize_vecs(z_axis).float() | |
x_axis = torch.cross(up_world, z_axis) | |
x_axis = normalize_vecs(x_axis).float() | |
y_axis = torch.cross(z_axis, x_axis) | |
y_axis = normalize_vecs(y_axis).float() | |
extrinsics = torch.stack([x_axis, y_axis, z_axis, camera_position], dim=-1) | |
extrinsics = pad_camera_extrinsics_4x4(extrinsics) | |
return extrinsics | |
def FOV_to_intrinsics(fov, device='cpu'): | |
""" | |
Creates a 3x3 camera intrinsics matrix from the camera field of view, specified in degrees. | |
Note the intrinsics are returned as normalized by image size, rather than in pixel units. | |
Assumes principal point is at image center. | |
""" | |
focal_length = 0.5 / np.tan(np.deg2rad(fov) * 0.5) | |
intrinsics = torch.tensor([[focal_length, 0, 0.5], [0, focal_length, 0.5], [0, 0, 1]], device=device) | |
return intrinsics | |
def normalize_cameras(extrinsics, camera_position: torch.Tensor = None, camera_system: str = 'opencv', canonical_index=0): | |
""" | |
Normalize the first camera to the canonical camera position, and transform other cameras accordingly. | |
extrinsics: (N, 4, 4) | |
""" | |
if camera_position is None: | |
camera_position = torch.tensor([[0, -2, 0]]).float() | |
assert camera_system in ['opencv', 'opengl'] | |
canonical_distance = camera_position.norm() | |
# compute conditional camera distances | |
cond_extrinsic = extrinsics[canonical_index] | |
# cond_extrinsic = extrinsics[0] | |
cond_camera_distance = cond_extrinsic[:3, 3].norm(dim=-1, keepdim=False) | |
# scale camera distances | |
scale = canonical_distance / cond_camera_distance | |
extrinsics[:, :3, 3] = extrinsics[:, :3, 3] * scale | |
# rotate all cameras | |
canonical_extrinsic = create_camera_to_world(camera_position, camera_system=camera_system).to(extrinsics) | |
# transform_matrix = torch.matmul(canonical_extrinsic, torch.linalg.inv(extrinsics[0:1])) | |
transform_matrix = torch.matmul(canonical_extrinsic, torch.linalg.inv(extrinsics[canonical_index:canonical_index+1])) | |
normalized_extrinsics = torch.matmul(transform_matrix, extrinsics) | |
return normalized_extrinsics, scale |