File size: 4,544 Bytes
184193d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import torch
import numpy as np


def normalize_vecs(vectors: torch.Tensor) -> torch.Tensor:
    """
    Normalize vector lengths.
    """
    return vectors / (torch.norm(vectors, dim=-1, keepdim=True))


def blender_to_opencv(camera_matrix: torch.Tensor):
    """
    Convert Blender World-to-Camera matrix into OpenCV space by flipping y and z axes
    Blender camera system: x-right, y-up, z-backward
    OpenCV camera system: x-right, y-down, z-forward
    """
    flip_yz = torch.tensor([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
    if camera_matrix.ndim == 3:
        flip_yz = flip_yz.unsqueeze(0)
    camera_matrix_opencv = torch.matmul(flip_yz.to(camera_matrix), camera_matrix)
    return camera_matrix_opencv


def pad_camera_extrinsics_4x4(extrinsics):
    if extrinsics.shape[-2] == 4:
        return extrinsics
    padding = torch.tensor([[0, 0, 0, 1]]).to(extrinsics)
    if extrinsics.ndim == 3:
        padding = padding.unsqueeze(0).repeat(extrinsics.shape[0], 1, 1)
    extrinsics = torch.cat([extrinsics, padding], dim=-2)
    return extrinsics


def create_camera_to_world(camera_position: torch.Tensor, look_at: torch.Tensor = None, up_world: torch.Tensor = None, camera_system: str = 'opencv'):
    """
    Create OpenCV or OpenGL camera extrinsics from camera locations and look-at position.

    camera_position: (M, 3) or (3,)
    look_at: (3)
    up_world: (3)
    return: (M, 3, 4) or (3, 4)
    """
    # by default, looking at the origin and world up is z-axis
    if look_at is None:
        look_at = torch.tensor([0, 0, 0], dtype=torch.float32)
    if up_world is None:
        up_world = torch.tensor([0, 0, 1], dtype=torch.float32)
    if camera_position.ndim == 2:
        look_at = look_at.unsqueeze(0).repeat(camera_position.shape[0], 1)
        up_world = up_world.unsqueeze(0).repeat(camera_position.shape[0], 1)

    assert camera_system in ['opencv', 'opengl']
    if camera_system == 'opencv':
        # OpenCV camera: z-forward, x-right, y-down
        z_axis = look_at - camera_position
        z_axis = normalize_vecs(z_axis).float()
        x_axis = torch.cross(z_axis, up_world)
        x_axis = normalize_vecs(x_axis).float()
        y_axis = torch.cross(z_axis, x_axis)
        y_axis = normalize_vecs(y_axis).float()
    else:
        # OpenGL camera: z-backward, x-right, y-up
        z_axis = camera_position - look_at
        z_axis = normalize_vecs(z_axis).float()
        x_axis = torch.cross(up_world, z_axis)
        x_axis = normalize_vecs(x_axis).float()
        y_axis = torch.cross(z_axis, x_axis)
        y_axis = normalize_vecs(y_axis).float()

    extrinsics = torch.stack([x_axis, y_axis, z_axis, camera_position], dim=-1)
    extrinsics = pad_camera_extrinsics_4x4(extrinsics)
    return extrinsics


def FOV_to_intrinsics(fov, device='cpu'):
    """
    Creates a 3x3 camera intrinsics matrix from the camera field of view, specified in degrees.
    Note the intrinsics are returned as normalized by image size, rather than in pixel units.
    Assumes principal point is at image center.
    """
    focal_length = 0.5 / np.tan(np.deg2rad(fov) * 0.5)
    intrinsics = torch.tensor([[focal_length, 0, 0.5], [0, focal_length, 0.5], [0, 0, 1]], device=device)
    return intrinsics


def normalize_cameras(extrinsics, camera_position: torch.Tensor = None, camera_system: str = 'opencv', canonical_index=0):
    """
    Normalize the first camera to the canonical camera position, and transform other cameras accordingly.

    extrinsics: (N, 4, 4)
    """
    if camera_position is None:
        camera_position = torch.tensor([[0, -2, 0]]).float()
    assert camera_system in ['opencv', 'opengl']

    canonical_distance = camera_position.norm()

    # compute conditional camera distances
    cond_extrinsic = extrinsics[canonical_index]
    # cond_extrinsic = extrinsics[0]
    cond_camera_distance = cond_extrinsic[:3, 3].norm(dim=-1, keepdim=False)

    # scale camera distances
    scale = canonical_distance / cond_camera_distance
    extrinsics[:, :3, 3] = extrinsics[:, :3, 3] * scale
    
    # rotate all cameras
    canonical_extrinsic = create_camera_to_world(camera_position, camera_system=camera_system).to(extrinsics)
    # transform_matrix = torch.matmul(canonical_extrinsic, torch.linalg.inv(extrinsics[0:1]))
    transform_matrix = torch.matmul(canonical_extrinsic, torch.linalg.inv(extrinsics[canonical_index:canonical_index+1]))
    normalized_extrinsics = torch.matmul(transform_matrix, extrinsics)

    return normalized_extrinsics, scale