|
|
|
from typing import List, Tuple |
|
|
|
import numpy as np |
|
import torch |
|
import torch.nn as nn |
|
from PIL import Image |
|
from torch import Tensor |
|
from torch.nn import functional as F |
|
|
|
from mmdet3d.models import Det3DDataPreprocessor |
|
from mmdet3d.models.data_preprocessors.voxelize import dynamic_scatter_3d |
|
from mmdet3d.registry import MODELS |
|
from mmdet3d.structures.det3d_data_sample import SampleList |
|
|
|
|
|
@MODELS.register_module() |
|
class TPVFormerDataPreprocessor(Det3DDataPreprocessor): |
|
|
|
@torch.no_grad() |
|
def voxelize(self, points: List[Tensor], |
|
data_samples: SampleList) -> List[Tensor]: |
|
"""Apply voxelization to point cloud. In TPVFormer, it will get voxel- |
|
wise segmentation label and voxel/point coordinates. |
|
|
|
Args: |
|
points (List[Tensor]): Point cloud in one data batch. |
|
data_samples: (List[:obj:`Det3DDataSample`]): The annotation data |
|
of every samples. Add voxel-wise annotation for segmentation. |
|
|
|
Returns: |
|
List[Tensor]: Coordinates of voxels, shape is Nx3, |
|
""" |
|
for point, data_sample in zip(points, data_samples): |
|
min_bound = point.new_tensor( |
|
self.voxel_layer.point_cloud_range[:3]) |
|
max_bound = point.new_tensor( |
|
self.voxel_layer.point_cloud_range[3:]) |
|
point_clamp = torch.clamp(point, min_bound, max_bound + 1e-6) |
|
coors = torch.floor( |
|
(point_clamp - min_bound) / |
|
point_clamp.new_tensor(self.voxel_layer.voxel_size)).int() |
|
self.get_voxel_seg(coors, data_sample) |
|
data_sample.point_coors = coors |
|
|
|
def get_voxel_seg(self, res_coors: Tensor, data_sample: SampleList): |
|
"""Get voxel-wise segmentation label and point2voxel map. |
|
|
|
Args: |
|
res_coors (Tensor): The voxel coordinates of points, Nx3. |
|
data_sample: (:obj:`Det3DDataSample`): The annotation data of |
|
every samples. Add voxel-wise annotation forsegmentation. |
|
""" |
|
|
|
if self.training: |
|
pts_semantic_mask = data_sample.gt_pts_seg.pts_semantic_mask |
|
pts_semantic_mask = F.one_hot(pts_semantic_mask.long()).float() |
|
voxel_semantic_mask, voxel_coors, point2voxel_map = \ |
|
dynamic_scatter_3d(pts_semantic_mask, res_coors, 'mean', True) |
|
voxel_semantic_mask = torch.argmax(voxel_semantic_mask, dim=-1) |
|
data_sample.gt_pts_seg.voxel_semantic_mask = voxel_semantic_mask |
|
data_sample.point2voxel_map = point2voxel_map |
|
data_sample.voxel_coors = voxel_coors |
|
else: |
|
pseudo_tensor = res_coors.new_ones([res_coors.shape[0], 1]).float() |
|
_, _, point2voxel_map = dynamic_scatter_3d(pseudo_tensor, |
|
res_coors, 'mean', True) |
|
data_sample.point2voxel_map = point2voxel_map |
|
|
|
|
|
@MODELS.register_module() |
|
class GridMask(nn.Module): |
|
"""GridMask data augmentation. |
|
|
|
Modified from https://github.com/dvlab-research/GridMask. |
|
|
|
Args: |
|
use_h (bool): Whether to mask on height dimension. Defaults to True. |
|
use_w (bool): Whether to mask on width dimension. Defaults to True. |
|
rotate (int): Rotation degree. Defaults to 1. |
|
offset (bool): Whether to mask offset. Defaults to False. |
|
ratio (float): Mask ratio. Defaults to 0.5. |
|
mode (int): Mask mode. if mode == 0, mask with square grid. |
|
if mode == 1, mask the rest. Defaults to 0 |
|
prob (float): Probability of applying the augmentation. |
|
Defaults to 1.0. |
|
""" |
|
|
|
def __init__(self, |
|
use_h: bool = True, |
|
use_w: bool = True, |
|
rotate: int = 1, |
|
offset: bool = False, |
|
ratio: float = 0.5, |
|
mode: int = 0, |
|
prob: float = 1.0): |
|
super().__init__() |
|
self.use_h = use_h |
|
self.use_w = use_w |
|
self.rotate = rotate |
|
self.offset = offset |
|
self.ratio = ratio |
|
self.mode = mode |
|
self.prob = prob |
|
|
|
def forward(self, inputs: Tensor, |
|
data_samples: SampleList) -> Tuple[Tensor, SampleList]: |
|
if np.random.rand() > self.prob: |
|
return inputs, data_samples |
|
height, width = inputs.shape[-2:] |
|
mask_height = int(1.5 * height) |
|
mask_width = int(1.5 * width) |
|
distance = np.random.randint(2, min(height, width)) |
|
length = min(max(int(distance * self.ratio + 0.5), 1), distance - 1) |
|
mask = np.ones((mask_height, mask_width), np.float32) |
|
stride_on_height = np.random.randint(distance) |
|
stride_on_width = np.random.randint(distance) |
|
if self.use_h: |
|
for i in range(mask_height // distance): |
|
start = distance * i + stride_on_height |
|
end = min(start + length, mask_height) |
|
mask[start:end, :] *= 0 |
|
if self.use_w: |
|
for i in range(mask_width // distance): |
|
start = distance * i + stride_on_width |
|
end = min(start + length, mask_width) |
|
mask[:, start:end] *= 0 |
|
|
|
|
|
|
|
|
|
|
|
r = np.random.randint(self.rotate) |
|
mask = Image.fromarray(np.uint8(mask)) |
|
|
|
mask = mask.rotate(r) |
|
mask = np.array(mask) |
|
mask = mask[int(0.25 * height):int(0.25 * height) + height, |
|
int(0.25 * width):int(0.25 * width) + width] |
|
|
|
mask = inputs.new_tensor(mask) |
|
if self.mode == 1: |
|
mask = 1 - mask |
|
mask = mask.expand_as(inputs) |
|
if self.offset: |
|
offset = inputs.new_tensor(2 * |
|
(np.random.rand(height, width) - 0.5)) |
|
inputs = inputs * mask + offset * (1 - mask) |
|
else: |
|
inputs = inputs * mask |
|
|
|
return inputs, data_samples |
|
|