Spaces:
Runtime error
Runtime error
# TRI-VIDAR - Copyright 2022 Toyota Research Institute. All rights reserved. | |
from copy import deepcopy | |
import cv2 | |
import numpy as np | |
import torch | |
import torchvision.transforms as transforms | |
from torchvision.transforms import InterpolationMode | |
from vidar.utils.data import keys_with | |
from vidar.utils.decorators import iterate1 | |
from vidar.utils.types import is_seq | |
def resize_pil(image, shape, interpolation=InterpolationMode.LANCZOS): | |
""" | |
Resizes input image | |
Parameters | |
---------- | |
image : Image PIL | |
Input image | |
shape : Tuple | |
Output shape [H,W] | |
interpolation : Int | |
Interpolation mode | |
Returns | |
------- | |
image : Image PIL | |
Resized image | |
""" | |
transform = transforms.Resize(shape, interpolation=interpolation) | |
return transform(image) | |
def resize_npy(depth, shape, expand=True): | |
""" | |
Resizes depth map | |
Parameters | |
---------- | |
depth : np.Array | |
Depth map [h,w] | |
shape : Tuple | |
Output shape (H,W) | |
expand : Bool | |
Expand output to [H,W,1] | |
Returns | |
------- | |
depth : np.Array | |
Resized depth map [H,W] | |
""" | |
# If a single number is provided, use resize ratio | |
if not is_seq(shape): | |
shape = tuple(int(s * shape) for s in depth.shape) | |
# Resize depth map | |
depth = cv2.resize(depth, dsize=tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST) | |
# Return resized depth map | |
return np.expand_dims(depth, axis=2) if expand else depth | |
def resize_npy_preserve(depth, shape): | |
""" | |
Resizes depth map preserving all valid depth pixels | |
Multiple downsampled points can be assigned to the same pixel. | |
Parameters | |
---------- | |
depth : np.Array | |
Depth map [h,w] | |
shape : Tuple | |
Output shape (H,W) | |
Returns | |
------- | |
depth : np.Array | |
Resized depth map [H,W,1] | |
""" | |
# If a single number is provided, use resize ratio | |
if not is_seq(shape): | |
shape = tuple(int(s * shape) for s in depth.shape) | |
# Store dimensions and reshapes to single column | |
depth = np.squeeze(depth) | |
h, w = depth.shape | |
x = depth.reshape(-1) | |
# Create coordinate grid | |
uv = np.mgrid[:h, :w].transpose(1, 2, 0).reshape(-1, 2) | |
# Filters valid points | |
idx = x > 0 | |
crd, val = uv[idx], x[idx] | |
# Downsamples coordinates | |
crd[:, 0] = (crd[:, 0] * (shape[0] / h)).astype(np.int32) | |
crd[:, 1] = (crd[:, 1] * (shape[1] / w)).astype(np.int32) | |
# Filters points inside image | |
idx = (crd[:, 0] < shape[0]) & (crd[:, 1] < shape[1]) | |
crd, val = crd[idx], val[idx] | |
# Creates downsampled depth image and assigns points | |
depth = np.zeros(shape) | |
depth[crd[:, 0], crd[:, 1]] = val | |
# Return resized depth map | |
return np.expand_dims(depth, axis=2) | |
def resize_torch_preserve(depth, shape): | |
""" | |
Resizes depth map preserving all valid depth pixels | |
Multiple downsampled points can be assigned to the same pixel. | |
Parameters | |
---------- | |
depth : torch.Tensor | |
Depth map [B,1,h,w] | |
shape : Tuple | |
Output shape (H,W) | |
Returns | |
------- | |
depth : torch.Tensor | |
Resized depth map [B,1,H,W] | |
""" | |
if depth.dim() == 4: | |
return torch.stack([resize_torch_preserve(depth[i], shape) | |
for i in range(depth.shape[0])], 0) | |
# If a single number is provided, use resize ratio | |
if not is_seq(shape): | |
shape = tuple(int(s * shape) for s in depth.shape) | |
# Store dimensions and reshapes to single column | |
c, h, w = depth.shape | |
# depth = np.squeeze(depth) | |
# h, w = depth.shape | |
x = depth.reshape(-1) | |
# Create coordinate grid | |
uv = np.mgrid[:h, :w].transpose(1, 2, 0).reshape(-1, 2) | |
# Filters valid points | |
idx = x > 0 | |
crd, val = uv[idx], x[idx] | |
# Downsamples coordinates | |
crd[:, 0] = (crd[:, 0] * (shape[0] / h)).astype(np.int32) | |
crd[:, 1] = (crd[:, 1] * (shape[1] / w)).astype(np.int32) | |
# Filters points inside image | |
idx = (crd[:, 0] < shape[0]) & (crd[:, 1] < shape[1]) | |
crd, val = crd[idx], val[idx] | |
# Creates downsampled depth image and assigns points | |
depth = torch.zeros(shape, device=depth.device, dtype=depth.dtype) | |
depth[crd[:, 0], crd[:, 1]] = val | |
# Return resized depth map | |
return depth.unsqueeze(0) | |
def resize_npy_multiply(data, shape): | |
"""Resize a numpy array and scale its content accordingly""" | |
ratio_w = shape[0] / data.shape[0] | |
ratio_h = shape[1] / data.shape[1] | |
out = resize_npy(data, shape, expand=False) | |
out[..., 0] *= ratio_h | |
out[..., 1] *= ratio_w | |
return out | |
def resize_intrinsics(intrinsics, original, resized): | |
""" | |
Resize camera intrinsics matrix to match a target resolution | |
Parameters | |
---------- | |
intrinsics : np.Array | |
Original intrinsics matrix [3,3] | |
original : Tuple | |
Original image resolution [W,H] | |
resized : Tuple | |
Target image resolution [w,h] | |
Returns | |
------- | |
intrinsics : np.Array | |
Resized intrinsics matrix [3,3] | |
""" | |
intrinsics = np.copy(intrinsics) | |
intrinsics[0] *= resized[0] / original[0] | |
intrinsics[1] *= resized[1] / original[1] | |
return intrinsics | |
def resize_sample_input(sample, shape, shape_supervision=None, | |
depth_downsample=1.0, preserve_depth=False, | |
pil_interpolation=InterpolationMode.LANCZOS): | |
""" | |
Resizes the input information of a sample | |
Parameters | |
---------- | |
sample : Dict | |
Dictionary with sample values | |
shape : tuple (H,W) | |
Output shape | |
shape_supervision : Tuple | |
Output supervision shape (H,W) | |
depth_downsample: Float | |
Resize ratio for depth maps | |
preserve_depth : Bool | |
Preserve depth maps when resizing | |
pil_interpolation : Int | |
Interpolation mode | |
Returns | |
------- | |
sample : Dict | |
Resized sample | |
""" | |
# Intrinsics | |
for key in keys_with(sample, 'intrinsics', without='raw'): | |
if f'raw_{key}' not in sample.keys(): | |
sample[f'raw_{key}'] = deepcopy(sample[key]) | |
sample[key] = resize_intrinsics(sample[key], list(sample['rgb'].values())[0].size, shape[::-1]) | |
# RGB | |
for key in keys_with(sample, 'rgb', without='raw'): | |
sample[key] = resize_pil(sample[key], shape, interpolation=pil_interpolation) | |
# Mask | |
for key in keys_with(sample, 'mask', without='raw'): | |
sample[key] = resize_pil(sample[key], shape, interpolation=InterpolationMode.NEAREST) | |
# Input depth | |
for key in keys_with(sample, 'input_depth'): | |
shape_depth = [int(s * depth_downsample) for s in shape] | |
resize_npy_depth = resize_npy_preserve if preserve_depth else resize_npy | |
sample[key] = resize_npy_depth(sample[key], shape_depth) | |
return sample | |
def resize_sample_supervision(sample, shape, depth_downsample=1.0, preserve_depth=False): | |
""" | |
Resizes the output information of a sample | |
Parameters | |
---------- | |
sample : Dict | |
Dictionary with sample values | |
shape : Tuple | |
Output shape (H,W) | |
depth_downsample: Float | |
Resize ratio for depth maps | |
preserve_depth : Bool | |
Preserve depth maps when resizing | |
Returns | |
------- | |
sample : Dict | |
Resized sample | |
""" | |
# Depth | |
for key in keys_with(sample, 'depth', without='input_depth'): | |
shape_depth = [int(s * depth_downsample) for s in shape] | |
resize_npy_depth = resize_npy_preserve if preserve_depth else resize_npy | |
sample[key] = resize_npy_depth(sample[key], shape_depth) | |
# Semantic | |
for key in keys_with(sample, 'semantic'): | |
sample[key] = resize_npy(sample[key], shape, expand=False) | |
# Optical flow | |
for key in keys_with(sample, 'optical_flow'): | |
sample[key] = resize_npy_multiply(sample[key], shape) | |
# Scene flow | |
for key in keys_with(sample, 'scene_flow'): | |
sample[key] = resize_npy(sample[key], shape, expand=False) | |
# Return resized sample | |
return sample | |
def resize_sample(sample, shape, shape_supervision=None, depth_downsample=1.0, preserve_depth=False, | |
pil_interpolation=InterpolationMode.LANCZOS): | |
""" | |
Resizes a sample, including image, intrinsics and depth maps. | |
Parameters | |
---------- | |
sample : Dict | |
Dictionary with sample values | |
shape : Tuple | |
Output shape (H,W) | |
shape_supervision : Tuple | |
Output shape (H,W) | |
depth_downsample: Float | |
Resize ratio for depth maps | |
preserve_depth : Bool | |
Preserve depth maps when resizing | |
pil_interpolation : Int | |
Interpolation mode | |
Returns | |
------- | |
sample : Dict | |
Resized sample | |
""" | |
# Resize input information | |
sample = resize_sample_input(sample, shape, | |
depth_downsample=depth_downsample, | |
preserve_depth=preserve_depth, | |
pil_interpolation=pil_interpolation) | |
# Resize output information | |
sample = resize_sample_supervision(sample, shape_supervision, | |
depth_downsample=depth_downsample, | |
preserve_depth=preserve_depth) | |
# Return resized sample | |
return sample | |