Spaces:
Running
on
Zero
Running
on
Zero
# Copyright (c) Meta Platforms, Inc. and affiliates. | |
# All rights reserved. | |
# | |
# This source code is licensed under the license found in the | |
# LICENSE file in the root directory of this source tree. | |
import copy | |
import random | |
import warnings | |
from itertools import product | |
from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Union | |
import mmengine | |
import numpy as np | |
import mmcv | |
from mmcv.image.geometric import _scale_size | |
from .base import BaseTransform | |
from .builder import TRANSFORMS | |
from .utils import cache_randomness | |
from .wrappers import Compose | |
Number = Union[int, float] | |
class Normalize(BaseTransform): | |
"""Normalize the image. | |
Required Keys: | |
- img | |
Modified Keys: | |
- img | |
Added Keys: | |
- img_norm_cfg | |
- mean | |
- std | |
- to_rgb | |
Args: | |
mean (sequence): Mean values of 3 channels. | |
std (sequence): Std values of 3 channels. | |
to_rgb (bool): Whether to convert the image from BGR to RGB before | |
normlizing the image. If ``to_rgb=True``, the order of mean and std | |
should be RGB. If ``to_rgb=False``, the order of mean and std | |
should be the same order of the image. Defaults to True. | |
""" | |
def __init__(self, | |
mean: Sequence[Number], | |
std: Sequence[Number], | |
to_rgb: bool = True) -> None: | |
self.mean = np.array(mean, dtype=np.float32) | |
self.std = np.array(std, dtype=np.float32) | |
self.to_rgb = to_rgb | |
def transform(self, results: dict) -> dict: | |
"""Function to normalize images. | |
Args: | |
results (dict): Result dict from loading pipeline. | |
Returns: | |
dict: Normalized results, key 'img_norm_cfg' key is added in to | |
result dict. | |
""" | |
results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, | |
self.to_rgb) | |
results['img_norm_cfg'] = dict( | |
mean=self.mean, std=self.std, to_rgb=self.to_rgb) | |
return results | |
def __repr__(self) -> str: | |
repr_str = self.__class__.__name__ | |
repr_str += f'(mean={self.mean}, std={self.std}, to_rgb={self.to_rgb})' | |
return repr_str | |
class Resize(BaseTransform): | |
"""Resize images & bbox & seg & keypoints. | |
This transform resizes the input image according to ``scale`` or | |
``scale_factor``. Bboxes, seg map and keypoints are then resized with the | |
same scale factor. | |
if ``scale`` and ``scale_factor`` are both set, it will use ``scale`` to | |
resize. | |
Required Keys: | |
- img | |
- gt_bboxes (optional) | |
- gt_seg_map (optional) | |
- gt_keypoints (optional) | |
Modified Keys: | |
- img | |
- gt_bboxes | |
- gt_seg_map | |
- gt_keypoints | |
- img_shape | |
Added Keys: | |
- scale | |
- scale_factor | |
- keep_ratio | |
Args: | |
scale (int or tuple): Images scales for resizing. Defaults to None | |
scale_factor (float or tuple[float]): Scale factors for resizing. | |
Defaults to None. | |
keep_ratio (bool): Whether to keep the aspect ratio when resizing the | |
image. Defaults to False. | |
clip_object_border (bool): Whether to clip the objects | |
outside the border of the image. In some dataset like MOT17, the gt | |
bboxes are allowed to cross the border of images. Therefore, we | |
don't need to clip the gt bboxes in these cases. Defaults to True. | |
backend (str): Image resize backend, choices are 'cv2' and 'pillow'. | |
These two backends generates slightly different results. Defaults | |
to 'cv2'. | |
interpolation (str): Interpolation method, accepted values are | |
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' | |
backend, "nearest", "bilinear" for 'pillow' backend. Defaults | |
to 'bilinear'. | |
""" | |
def __init__(self, | |
scale: Optional[Union[int, Tuple[int, int]]] = None, | |
scale_factor: Optional[Union[float, Tuple[float, | |
float]]] = None, | |
keep_ratio: bool = False, | |
clip_object_border: bool = True, | |
backend: str = 'cv2', | |
interpolation='bilinear') -> None: | |
assert scale is not None or scale_factor is not None, ( | |
'`scale` and' | |
'`scale_factor` can not both be `None`') | |
if scale is None: | |
self.scale = None | |
else: | |
if isinstance(scale, int): | |
self.scale = (scale, scale) | |
else: | |
self.scale = scale | |
self.backend = backend | |
self.interpolation = interpolation | |
self.keep_ratio = keep_ratio | |
self.clip_object_border = clip_object_border | |
if scale_factor is None: | |
self.scale_factor = None | |
elif isinstance(scale_factor, float): | |
self.scale_factor = (scale_factor, scale_factor) | |
elif isinstance(scale_factor, tuple): | |
assert (len(scale_factor)) == 2 | |
self.scale_factor = scale_factor | |
else: | |
raise TypeError( | |
f'expect scale_factor is float or Tuple(float), but' | |
f'get {type(scale_factor)}') | |
def _resize_img(self, results: dict) -> None: | |
"""Resize images with ``results['scale']``.""" | |
if results.get('img', None) is not None: | |
if self.keep_ratio: | |
img, scale_factor = mmcv.imrescale( | |
results['img'], | |
results['scale'], | |
interpolation=self.interpolation, | |
return_scale=True, | |
backend=self.backend) | |
# the w_scale and h_scale has minor difference | |
# a real fix should be done in the mmcv.imrescale in the future | |
new_h, new_w = img.shape[:2] | |
h, w = results['img'].shape[:2] | |
w_scale = new_w / w | |
h_scale = new_h / h | |
else: | |
img, w_scale, h_scale = mmcv.imresize( | |
results['img'], | |
results['scale'], | |
interpolation=self.interpolation, | |
return_scale=True, | |
backend=self.backend) | |
results['img'] = img | |
results['img_shape'] = img.shape[:2] | |
results['scale_factor'] = (w_scale, h_scale) | |
results['keep_ratio'] = self.keep_ratio | |
def _resize_bboxes(self, results: dict) -> None: | |
"""Resize bounding boxes with ``results['scale_factor']``.""" | |
if results.get('gt_bboxes', None) is not None: | |
bboxes = results['gt_bboxes'] * np.tile( | |
np.array(results['scale_factor']), 2) | |
if self.clip_object_border: | |
bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, | |
results['img_shape'][1]) | |
bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, | |
results['img_shape'][0]) | |
results['gt_bboxes'] = bboxes | |
def _resize_seg(self, results: dict) -> None: | |
"""Resize semantic segmentation map with ``results['scale']``.""" | |
if results.get('gt_seg_map', None) is not None: | |
if self.keep_ratio: | |
gt_seg = mmcv.imrescale( | |
results['gt_seg_map'], | |
results['scale'], | |
interpolation='nearest', | |
backend=self.backend) | |
else: | |
gt_seg = mmcv.imresize( | |
results['gt_seg_map'], | |
results['scale'], | |
interpolation='nearest', | |
backend=self.backend) | |
results['gt_seg_map'] = gt_seg | |
def _resize_keypoints(self, results: dict) -> None: | |
"""Resize keypoints with ``results['scale_factor']``.""" | |
if results.get('gt_keypoints', None) is not None: | |
keypoints = results['gt_keypoints'] | |
keypoints[:, :, :2] = keypoints[:, :, :2] * np.array( | |
results['scale_factor']) | |
if self.clip_object_border: | |
keypoints[:, :, 0] = np.clip(keypoints[:, :, 0], 0, | |
results['img_shape'][1]) | |
keypoints[:, :, 1] = np.clip(keypoints[:, :, 1], 0, | |
results['img_shape'][0]) | |
results['gt_keypoints'] = keypoints | |
def transform(self, results: dict) -> dict: | |
"""Transform function to resize images, bounding boxes, semantic | |
segmentation map and keypoints. | |
Args: | |
results (dict): Result dict from loading pipeline. | |
Returns: | |
dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map', | |
'gt_keypoints', 'scale', 'scale_factor', 'img_shape', | |
and 'keep_ratio' keys are updated in result dict. | |
""" | |
if self.scale: | |
results['scale'] = self.scale | |
else: | |
img_shape = results['img'].shape[:2] | |
results['scale'] = _scale_size(img_shape[::-1], | |
self.scale_factor) # type: ignore | |
self._resize_img(results) | |
self._resize_bboxes(results) | |
self._resize_seg(results) | |
self._resize_keypoints(results) | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
repr_str += f'(scale={self.scale}, ' | |
repr_str += f'scale_factor={self.scale_factor}, ' | |
repr_str += f'keep_ratio={self.keep_ratio}, ' | |
repr_str += f'clip_object_border={self.clip_object_border}), ' | |
repr_str += f'backend={self.backend}), ' | |
repr_str += f'interpolation={self.interpolation})' | |
return repr_str | |
class Pad(BaseTransform): | |
"""Pad the image & segmentation map. | |
There are three padding modes: (1) pad to a fixed size and (2) pad to the | |
minimum size that is divisible by some number. and (3)pad to square. Also, | |
pad to square and pad to the minimum size can be used as the same time. | |
Required Keys: | |
- img | |
- gt_bboxes (optional) | |
- gt_seg_map (optional) | |
Modified Keys: | |
- img | |
- gt_seg_map | |
- img_shape | |
Added Keys: | |
- pad_shape | |
- pad_fixed_size | |
- pad_size_divisor | |
Args: | |
size (tuple, optional): Fixed padding size. | |
Expected padding shape (w, h). Defaults to None. | |
size_divisor (int, optional): The divisor of padded size. Defaults to | |
None. | |
pad_to_square (bool): Whether to pad the image into a square. | |
Currently only used for YOLOX. Defaults to False. | |
pad_val (Number | dict[str, Number], optional): Padding value for if | |
the pad_mode is "constant". If it is a single number, the value | |
to pad the image is the number and to pad the semantic | |
segmentation map is 255. If it is a dict, it should have the | |
following keys: | |
- img: The value to pad the image. | |
- seg: The value to pad the semantic segmentation map. | |
Defaults to dict(img=0, seg=255). | |
padding_mode (str): Type of padding. Should be: constant, edge, | |
reflect or symmetric. Defaults to 'constant'. | |
- constant: pads with a constant value, this value is specified | |
with pad_val. | |
- edge: pads with the last value at the edge of the image. | |
- reflect: pads with reflection of image without repeating the last | |
value on the edge. For example, padding [1, 2, 3, 4] with 2 | |
elements on both sides in reflect mode will result in | |
[3, 2, 1, 2, 3, 4, 3, 2]. | |
- symmetric: pads with reflection of image repeating the last value | |
on the edge. For example, padding [1, 2, 3, 4] with 2 elements on | |
both sides in symmetric mode will result in | |
[2, 1, 1, 2, 3, 4, 4, 3] | |
""" | |
def __init__(self, | |
size: Optional[Tuple[int, int]] = None, | |
size_divisor: Optional[int] = None, | |
pad_to_square: bool = False, | |
pad_val: Union[Number, dict] = dict(img=0, seg=255), | |
padding_mode: str = 'constant') -> None: | |
self.size = size | |
self.size_divisor = size_divisor | |
if isinstance(pad_val, int): | |
pad_val = dict(img=pad_val, seg=255) | |
assert isinstance(pad_val, dict), 'pad_val ' | |
self.pad_val = pad_val | |
self.pad_to_square = pad_to_square | |
if pad_to_square: | |
assert size is None, \ | |
'The size and size_divisor must be None ' \ | |
'when pad2square is True' | |
else: | |
assert size is not None or size_divisor is not None, \ | |
'only one of size and size_divisor should be valid' | |
assert size is None or size_divisor is None | |
assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] | |
self.padding_mode = padding_mode | |
def _pad_img(self, results: dict) -> None: | |
"""Pad images according to ``self.size``.""" | |
pad_val = self.pad_val.get('img', 0) | |
size = None | |
if self.pad_to_square: | |
max_size = max(results['img'].shape[:2]) | |
size = (max_size, max_size) | |
if self.size_divisor is not None: | |
if size is None: | |
size = (results['img'].shape[0], results['img'].shape[1]) | |
pad_h = int(np.ceil( | |
size[0] / self.size_divisor)) * self.size_divisor | |
pad_w = int(np.ceil( | |
size[1] / self.size_divisor)) * self.size_divisor | |
size = (pad_h, pad_w) | |
elif self.size is not None: | |
size = self.size[::-1] | |
if isinstance(pad_val, int) and results['img'].ndim == 3: | |
pad_val = tuple(pad_val for _ in range(results['img'].shape[2])) | |
padded_img = mmcv.impad( | |
results['img'], | |
shape=size, | |
pad_val=pad_val, | |
padding_mode=self.padding_mode) | |
results['img'] = padded_img | |
results['pad_shape'] = padded_img.shape | |
results['pad_fixed_size'] = self.size | |
results['pad_size_divisor'] = self.size_divisor | |
results['img_shape'] = padded_img.shape[:2] | |
def _pad_seg(self, results: dict) -> None: | |
"""Pad semantic segmentation map according to | |
``results['pad_shape']``.""" | |
if results.get('gt_seg_map', None) is not None: | |
pad_val = self.pad_val.get('seg', 255) | |
if isinstance(pad_val, int) and results['gt_seg_map'].ndim == 3: | |
pad_val = tuple( | |
pad_val for _ in range(results['gt_seg_map'].shape[2])) | |
results['gt_seg_map'] = mmcv.impad( | |
results['gt_seg_map'], | |
shape=results['pad_shape'][:2], | |
pad_val=pad_val, | |
padding_mode=self.padding_mode) | |
def transform(self, results: dict) -> dict: | |
"""Call function to pad images, masks, semantic segmentation maps. | |
Args: | |
results (dict): Result dict from loading pipeline. | |
Returns: | |
dict: Updated result dict. | |
""" | |
self._pad_img(results) | |
self._pad_seg(results) | |
return results | |
def __repr__(self): | |
repr_str = self.__class__.__name__ | |
repr_str += f'(size={self.size}, ' | |
repr_str += f'size_divisor={self.size_divisor}, ' | |
repr_str += f'pad_to_square={self.pad_to_square}, ' | |
repr_str += f'pad_val={self.pad_val}), ' | |
repr_str += f'padding_mode={self.padding_mode})' | |
return repr_str | |
class CenterCrop(BaseTransform): | |
"""Crop the center of the image, segmentation masks, bounding boxes and key | |
points. If the crop area exceeds the original image and ``auto_pad`` is | |
True, the original image will be padded before cropping. | |
Required Keys: | |
- img | |
- gt_seg_map (optional) | |
- gt_bboxes (optional) | |
- gt_keypoints (optional) | |
Modified Keys: | |
- img | |
- img_shape | |
- gt_seg_map (optional) | |
- gt_bboxes (optional) | |
- gt_keypoints (optional) | |
Added Key: | |
- pad_shape | |
Args: | |
crop_size (Union[int, Tuple[int, int]]): Expected size after cropping | |
with the format of (w, h). If set to an integer, then cropping | |
width and height are equal to this integer. | |
auto_pad (bool): Whether to pad the image if it's smaller than the | |
``crop_size``. Defaults to False. | |
pad_cfg (dict): Base config for padding. Refer to ``mmcv.Pad`` for | |
detail. Defaults to ``dict(type='Pad')``. | |
clip_object_border (bool): Whether to clip the objects | |
outside the border of the image. In some dataset like MOT17, the | |
gt bboxes are allowed to cross the border of images. Therefore, | |
we don't need to clip the gt bboxes in these cases. | |
Defaults to True. | |
""" | |
def __init__(self, | |
crop_size: Union[int, Tuple[int, int]], | |
auto_pad: bool = False, | |
pad_cfg: dict = dict(type='Pad'), | |
clip_object_border: bool = True) -> None: | |
super().__init__() | |
assert isinstance(crop_size, int) or ( | |
isinstance(crop_size, tuple) and len(crop_size) == 2 | |
), 'The expected crop_size is an integer, or a tuple containing two ' | |
'intergers' | |
if isinstance(crop_size, int): | |
crop_size = (crop_size, crop_size) | |
assert crop_size[0] > 0 and crop_size[1] > 0 | |
self.crop_size = crop_size | |
self.auto_pad = auto_pad | |
self.pad_cfg = pad_cfg.copy() | |
# size will be overwritten | |
if 'size' in self.pad_cfg and auto_pad: | |
warnings.warn('``size`` is set in ``pad_cfg``,' | |
'however this argument will be overwritten' | |
' according to crop size and image size') | |
self.clip_object_border = clip_object_border | |
def _crop_img(self, results: dict, bboxes: np.ndarray) -> None: | |
"""Crop image. | |
Args: | |
results (dict): Result dict contains the data to transform. | |
bboxes (np.ndarray): Shape (4, ), location of cropped bboxes. | |
""" | |
if results.get('img', None) is not None: | |
img = mmcv.imcrop(results['img'], bboxes=bboxes) | |
img_shape = img.shape[:2] # type: ignore | |
results['img'] = img | |
results['img_shape'] = img_shape | |
results['pad_shape'] = img_shape | |
def _crop_seg_map(self, results: dict, bboxes: np.ndarray) -> None: | |
"""Crop semantic segmentation map. | |
Args: | |
results (dict): Result dict contains the data to transform. | |
bboxes (np.ndarray): Shape (4, ), location of cropped bboxes. | |
""" | |
if results.get('gt_seg_map', None) is not None: | |
img = mmcv.imcrop(results['gt_seg_map'], bboxes=bboxes) | |
results['gt_seg_map'] = img | |
def _crop_bboxes(self, results: dict, bboxes: np.ndarray) -> None: | |
"""Update bounding boxes according to CenterCrop. | |
Args: | |
results (dict): Result dict contains the data to transform. | |
bboxes (np.ndarray): Shape (4, ), location of cropped bboxes. | |
""" | |
if 'gt_bboxes' in results: | |
offset_w = bboxes[0] | |
offset_h = bboxes[1] | |
bbox_offset = np.array([offset_w, offset_h, offset_w, offset_h]) | |
# gt_bboxes has shape (num_gts, 4) in (tl_x, tl_y, br_x, br_y) | |
# order. | |
gt_bboxes = results['gt_bboxes'] - bbox_offset | |
if self.clip_object_border: | |
gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, | |
results['img'].shape[1]) | |
gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, | |
results['img'].shape[0]) | |
results['gt_bboxes'] = gt_bboxes | |
def _crop_keypoints(self, results: dict, bboxes: np.ndarray) -> None: | |
"""Update key points according to CenterCrop. Keypoints that not in the | |
cropped image will be set invisible. | |
Args: | |
results (dict): Result dict contains the data to transform. | |
bboxes (np.ndarray): Shape (4, ), location of cropped bboxes. | |
""" | |
if 'gt_keypoints' in results: | |
offset_w = bboxes[0] | |
offset_h = bboxes[1] | |
keypoints_offset = np.array([offset_w, offset_h, 0]) | |
# gt_keypoints has shape (N, NK, 3) in (x, y, visibility) order, | |
# NK = number of points per object | |
gt_keypoints = results['gt_keypoints'] - keypoints_offset | |
# set gt_kepoints out of the result image invisible | |
height, width = results['img'].shape[:2] | |
valid_pos = (gt_keypoints[:, :, 0] >= | |
0) * (gt_keypoints[:, :, 0] < | |
width) * (gt_keypoints[:, :, 1] >= 0) * ( | |
gt_keypoints[:, :, 1] < height) | |
gt_keypoints[:, :, 2] = np.where(valid_pos, gt_keypoints[:, :, 2], | |
0) | |
gt_keypoints[:, :, 0] = np.clip(gt_keypoints[:, :, 0], 0, | |
results['img'].shape[1]) | |
gt_keypoints[:, :, 1] = np.clip(gt_keypoints[:, :, 1], 0, | |
results['img'].shape[0]) | |
results['gt_keypoints'] = gt_keypoints | |
def transform(self, results: dict) -> dict: | |
"""Apply center crop on results. | |
Args: | |
results (dict): Result dict contains the data to transform. | |
Returns: | |
dict: Results with CenterCropped image and semantic segmentation | |
map. | |
""" | |
crop_width, crop_height = self.crop_size[0], self.crop_size[1] | |
assert 'img' in results, '`img` is not found in results' | |
img = results['img'] | |
# img.shape has length 2 for grayscale, length 3 for color | |
img_height, img_width = img.shape[:2] | |
if crop_height > img_height or crop_width > img_width: | |
if self.auto_pad: | |
# pad the area | |
img_height = max(img_height, crop_height) | |
img_width = max(img_width, crop_width) | |
pad_size = (img_width, img_height) | |
_pad_cfg = self.pad_cfg.copy() | |
_pad_cfg.update(dict(size=pad_size)) | |
pad_transform = TRANSFORMS.build(_pad_cfg) | |
results = pad_transform(results) | |
else: | |
crop_height = min(crop_height, img_height) | |
crop_width = min(crop_width, img_width) | |
y1 = max(0, int(round((img_height - crop_height) / 2.))) | |
x1 = max(0, int(round((img_width - crop_width) / 2.))) | |
y2 = min(img_height, y1 + crop_height) - 1 | |
x2 = min(img_width, x1 + crop_width) - 1 | |
bboxes = np.array([x1, y1, x2, y2]) | |
# crop the image | |
self._crop_img(results, bboxes) | |
# crop the gt_seg_map | |
self._crop_seg_map(results, bboxes) | |
# crop the bounding box | |
self._crop_bboxes(results, bboxes) | |
# crop the keypoints | |
self._crop_keypoints(results, bboxes) | |
return results | |
def __repr__(self) -> str: | |
repr_str = self.__class__.__name__ | |
repr_str += f'(crop_size = {self.crop_size}' | |
repr_str += f', auto_pad={self.auto_pad}' | |
repr_str += f', pad_cfg={self.pad_cfg}' | |
repr_str += f',clip_object_border = {self.clip_object_border})' | |
return repr_str | |
class RandomGrayscale(BaseTransform): | |
"""Randomly convert image to grayscale with a probability. | |
Required Key: | |
- img | |
Modified Key: | |
- img | |
Added Keys: | |
- grayscale | |
- grayscale_weights | |
Args: | |
prob (float): Probability that image should be converted to | |
grayscale. Defaults to 0.1. | |
keep_channels (bool): Whether keep channel number the same as | |
input. Defaults to False. | |
channel_weights (tuple): The grayscale weights of each channel, | |
and the weights will be normalized. For example, (1, 2, 1) | |
will be normalized as (0.25, 0.5, 0.25). Defaults to | |
(1., 1., 1.). | |
color_format (str): Color format set to be any of 'bgr', | |
'rgb', 'hsv'. Note: 'hsv' image will be transformed into 'bgr' | |
format no matter whether it is grayscaled. Defaults to 'bgr'. | |
""" | |
def __init__(self, | |
prob: float = 0.1, | |
keep_channels: bool = False, | |
channel_weights: Sequence[float] = (1., 1., 1.), | |
color_format: str = 'bgr') -> None: | |
super().__init__() | |
assert 0. <= prob <= 1., ('The range of ``prob`` value is [0., 1.],' + | |
f' but got {prob} instead') | |
self.prob = prob | |
self.keep_channels = keep_channels | |
self.channel_weights = channel_weights | |
assert color_format in ['bgr', 'rgb', 'hsv'] | |
self.color_format = color_format | |
def _random_prob(self): | |
return random.random() | |
def transform(self, results: dict) -> dict: | |
"""Apply random grayscale on results. | |
Args: | |
results (dict): Result dict contains the data to transform. | |
Returns: | |
dict: Results with grayscale image. | |
""" | |
img = results['img'] | |
# convert hsv to bgr | |
if self.color_format == 'hsv': | |
img = mmcv.hsv2bgr(img) | |
img = img[..., None] if img.ndim == 2 else img | |
num_output_channels = img.shape[2] | |
if self._random_prob() < self.prob: | |
if num_output_channels > 1: | |
assert num_output_channels == len( | |
self.channel_weights | |
), 'The length of ``channel_weights`` are supposed to be ' | |
f'num_output_channels, but got {len(self.channel_weights)}' | |
' instead.' | |
normalized_weights = ( | |
np.array(self.channel_weights) / sum(self.channel_weights)) | |
img = (normalized_weights * img).sum(axis=2) | |
img = img.astype('uint8') | |
if self.keep_channels: | |
img = img[:, :, None] | |
results['img'] = np.dstack( | |
[img for _ in range(num_output_channels)]) | |
else: | |
results['img'] = img | |
return results | |
img = img.astype('uint8') | |
results['img'] = img | |
return results | |
def __repr__(self) -> str: | |
repr_str = self.__class__.__name__ | |
repr_str += f'(prob = {self.prob}' | |
repr_str += f', keep_channels = {self.keep_channels}' | |
repr_str += f', channel_weights = {self.channel_weights}' | |
repr_str += f', color_format = {self.color_format})' | |
return repr_str | |
class MultiScaleFlipAug(BaseTransform): | |
"""Test-time augmentation with multiple scales and flipping. | |
An example configuration is as followed: | |
.. code-block:: | |
dict( | |
type='MultiScaleFlipAug', | |
scales=[(1333, 400), (1333, 800)], | |
flip=True, | |
transforms=[ | |
dict(type='Normalize', **img_norm_cfg), | |
dict(type='Pad', size_divisor=1), | |
dict(type='ImageToTensor', keys=['img']), | |
dict(type='Collect', keys=['img']) | |
]) | |
``results`` will be resized using all the sizes in ``scales``. | |
If ``flip`` is True, then flipped results will also be added into output | |
list. | |
For the above configuration, there are four combinations of resize | |
and flip: | |
- Resize to (1333, 400) + no flip | |
- Resize to (1333, 400) + flip | |
- Resize to (1333, 800) + no flip | |
- resize to (1333, 800) + flip | |
The four results are then transformed with ``transforms`` argument. | |
After that, results are wrapped into lists of the same length as below: | |
.. code-block:: | |
dict( | |
inputs=[...], | |
data_samples=[...] | |
) | |
Where the length of ``inputs`` and ``data_samples`` are both 4. | |
Required Keys: | |
- Depending on the requirements of the ``transforms`` parameter. | |
Modified Keys: | |
- All output keys of each transform. | |
Args: | |
transforms (list[dict]): Transforms to be applied to each resized | |
and flipped data. | |
scales (tuple | list[tuple] | None): Images scales for resizing. | |
scale_factor (float or tuple[float]): Scale factors for resizing. | |
Defaults to None. | |
allow_flip (bool): Whether apply flip augmentation. Defaults to False. | |
flip_direction (str | list[str]): Flip augmentation directions, | |
options are "horizontal", "vertical" and "diagonal". If | |
flip_direction is a list, multiple flip augmentations will be | |
applied. It has no effect when flip == False. Defaults to | |
"horizontal". | |
resize_cfg (dict): Base config for resizing. Defaults to | |
``dict(type='Resize', keep_ratio=True)``. | |
flip_cfg (dict): Base config for flipping. Defaults to | |
``dict(type='RandomFlip')``. | |
""" | |
def __init__( | |
self, | |
transforms: List[dict], | |
scales: Optional[Union[Tuple, List[Tuple]]] = None, | |
scale_factor: Optional[Union[float, List[float]]] = None, | |
allow_flip: bool = False, | |
flip_direction: Union[str, List[str]] = 'horizontal', | |
resize_cfg: dict = dict(type='Resize', keep_ratio=True), | |
flip_cfg: dict = dict(type='RandomFlip') | |
) -> None: | |
super().__init__() | |
self.transforms = Compose(transforms) # type: ignore | |
if scales is not None: | |
self.scales = scales if isinstance(scales, list) else [scales] | |
self.scale_key = 'scale' | |
assert mmengine.is_list_of(self.scales, tuple) | |
else: | |
# if ``scales`` and ``scale_factor`` both be ``None`` | |
if scale_factor is None: | |
self.scales = [1.] # type: ignore | |
elif isinstance(scale_factor, list): | |
self.scales = scale_factor # type: ignore | |
else: | |
self.scales = [scale_factor] # type: ignore | |
self.scale_key = 'scale_factor' | |
self.allow_flip = allow_flip | |
self.flip_direction = flip_direction if isinstance( | |
flip_direction, list) else [flip_direction] | |
assert mmengine.is_list_of(self.flip_direction, str) | |
if not self.allow_flip and self.flip_direction != ['horizontal']: | |
warnings.warn( | |
'flip_direction has no effect when flip is set to False') | |
self.resize_cfg = resize_cfg.copy() | |
self.flip_cfg = flip_cfg | |
def transform(self, results: dict) -> Dict: | |
"""Apply test time augment transforms on results. | |
Args: | |
results (dict): Result dict contains the data to transform. | |
Returns: | |
dict: The augmented data, where each value is wrapped | |
into a list. | |
""" | |
data_samples = [] | |
inputs = [] | |
flip_args = [(False, '')] | |
if self.allow_flip: | |
flip_args += [(True, direction) | |
for direction in self.flip_direction] | |
for scale in self.scales: | |
for flip, direction in flip_args: | |
_resize_cfg = self.resize_cfg.copy() | |
_resize_cfg.update({self.scale_key: scale}) | |
_resize_flip = [_resize_cfg] | |
if flip: | |
_flip_cfg = self.flip_cfg.copy() | |
_flip_cfg.update(prob=1.0, direction=direction) | |
_resize_flip.append(_flip_cfg) | |
else: | |
results['flip'] = False | |
results['flip_direction'] = None | |
resize_flip = Compose(_resize_flip) | |
_results = resize_flip(results.copy()) | |
packed_results = self.transforms(_results) # type: ignore | |
inputs.append(packed_results['inputs']) # type: ignore | |
data_samples.append( | |
packed_results['data_sample']) # type: ignore | |
return dict(inputs=inputs, data_sample=data_samples) | |
def __repr__(self) -> str: | |
repr_str = self.__class__.__name__ | |
repr_str += f'(transforms={self.transforms}' | |
repr_str += f', scales={self.scales}' | |
repr_str += f', allow_flip={self.allow_flip}' | |
repr_str += f', flip_direction={self.flip_direction})' | |
return repr_str | |
class TestTimeAug(BaseTransform): | |
"""Test-time augmentation transform. | |
An example configuration is as followed: | |
.. code-block:: | |
dict(type='TestTimeAug', | |
transforms=[ | |
[dict(type='Resize', scale=(1333, 400), keep_ratio=True), | |
dict(type='Resize', scale=(1333, 800), keep_ratio=True)], | |
[dict(type='RandomFlip', prob=1.), | |
dict(type='RandomFlip', prob=0.)], | |
[dict(type='PackDetInputs', | |
meta_keys=('img_id', 'img_path', 'ori_shape', | |
'img_shape', 'scale_factor', 'flip', | |
'flip_direction'))]]) | |
``results`` will be transformed using all transforms defined in | |
``transforms`` arguments. | |
For the above configuration, there are four combinations of resize | |
and flip: | |
- Resize to (1333, 400) + no flip | |
- Resize to (1333, 400) + flip | |
- Resize to (1333, 800) + no flip | |
- resize to (1333, 800) + flip | |
After that, results are wrapped into lists of the same length as below: | |
.. code-block:: | |
dict( | |
inputs=[...], | |
data_samples=[...] | |
) | |
The length of ``inputs`` and ``data_samples`` are both 4. | |
Required Keys: | |
- Depending on the requirements of the ``transforms`` parameter. | |
Modified Keys: | |
- All output keys of each transform. | |
Args: | |
transforms (list[list[dict]]): Transforms to be applied to data sampled | |
from dataset. ``transforms`` is a list of list, and each list | |
element usually represents a series of transforms with the same | |
type and different arguments. Data will be processed by each list | |
elements sequentially. See more information in :meth:`transform`. | |
""" | |
def __init__(self, transforms: list): | |
for i, transform_list in enumerate(transforms): | |
for j, transform in enumerate(transform_list): | |
if isinstance(transform, dict): | |
transform_list[j] = TRANSFORMS.build(transform) | |
elif callable(transform): | |
continue | |
else: | |
raise TypeError( | |
'transform must be callable or a dict, but got' | |
f' {type(transform)}') | |
transforms[i] = transform_list | |
self.subroutines = [ | |
Compose(subroutine) for subroutine in product(*transforms) | |
] | |
def transform(self, results: dict) -> dict: | |
"""Apply all transforms defined in :attr:`transforms` to the results. | |
As the example given in :obj:`TestTimeAug`, ``transforms`` consists of | |
2 ``Resize``, 2 ``RandomFlip`` and 1 ``PackDetInputs``. | |
The data sampled from dataset will be processed as follows: | |
1. Data will be processed by 2 ``Resize`` and return a list | |
of 2 results. | |
2. Each result in list will be further passed to 2 | |
``RandomFlip``, and aggregates into a list of 4 results. | |
3. Each result will be processed by ``PackDetInputs``, and | |
return a list of dict. | |
4. Aggregates the same fields of results, and finally returns | |
a dict. Each value of the dict represents 4 transformed | |
results. | |
Args: | |
results (dict): Result dict contains the data to transform. | |
Returns: | |
dict: The augmented data, where each value is wrapped | |
into a list. | |
""" | |
results_list = [] # type: ignore | |
for subroutine in self.subroutines: | |
result = subroutine(copy.deepcopy(results)) | |
assert isinstance(result, dict), ( | |
f'Data processed by {subroutine} must return a dict, but got ' | |
f'{result}') | |
assert result is not None, ( | |
f'Data processed by {subroutine} in `TestTimeAug` should not ' | |
'be None! Please check your validation dataset and the ' | |
f'transforms in {subroutine}') | |
results_list.append(result) | |
aug_data_dict = { | |
key: [item[key] for item in results_list] # type: ignore | |
for key in results_list[0] # type: ignore | |
} | |
return aug_data_dict | |
def __repr__(self) -> str: | |
repr_str = self.__class__.__name__ | |
repr_str += 'transforms=\n' | |
for subroutine in self.subroutines: | |
repr_str += f'{repr(subroutine)}\n' | |
return repr_str | |
class RandomChoiceResize(BaseTransform): | |
"""Resize images & bbox & mask from a list of multiple scales. | |
This transform resizes the input image to some scale. Bboxes and masks are | |
then resized with the same scale factor. Resize scale will be randomly | |
selected from ``scales``. | |
How to choose the target scale to resize the image will follow the rules | |
below: | |
- if `scale` is a list of tuple, the target scale is sampled from the list | |
uniformally. | |
- if `scale` is a tuple, the target scale will be set to the tuple. | |
Required Keys: | |
- img | |
- gt_bboxes (optional) | |
- gt_seg_map (optional) | |
- gt_keypoints (optional) | |
Modified Keys: | |
- img | |
- img_shape | |
- gt_bboxes (optional) | |
- gt_seg_map (optional) | |
- gt_keypoints (optional) | |
Added Keys: | |
- scale | |
- scale_factor | |
- scale_idx | |
- keep_ratio | |
Args: | |
scales (Union[list, Tuple]): Images scales for resizing. | |
resize_type (str): The type of resize class to use. Defaults to | |
"Resize". | |
**resize_kwargs: Other keyword arguments for the ``resize_type``. | |
Note: | |
By defaults, the ``resize_type`` is "Resize", if it's not overwritten | |
by your registry, it indicates the :class:`mmcv.Resize`. And therefore, | |
``resize_kwargs`` accepts any keyword arguments of it, like | |
``keep_ratio``, ``interpolation`` and so on. | |
If you want to use your custom resize class, the class should accept | |
``scale`` argument and have ``scale`` attribution which determines the | |
resize shape. | |
""" | |
def __init__( | |
self, | |
scales: Sequence[Union[int, Tuple]], | |
resize_type: str = 'Resize', | |
**resize_kwargs, | |
) -> None: | |
super().__init__() | |
if isinstance(scales, list): | |
self.scales = scales | |
else: | |
self.scales = [scales] | |
assert mmengine.is_seq_of(self.scales, (tuple, int)) | |
self.resize_cfg = dict(type=resize_type, **resize_kwargs) | |
# create a empty Resize object | |
self.resize = TRANSFORMS.build({'scale': 0, **self.resize_cfg}) | |
def _random_select(self) -> Tuple[int, int]: | |
"""Randomly select an scale from given candidates. | |
Returns: | |
(tuple, int): Returns a tuple ``(scale, scale_dix)``, | |
where ``scale`` is the selected image scale and | |
``scale_idx`` is the selected index in the given candidates. | |
""" | |
scale_idx = np.random.randint(len(self.scales)) | |
scale = self.scales[scale_idx] | |
return scale, scale_idx | |
def transform(self, results: dict) -> dict: | |
"""Apply resize transforms on results from a list of scales. | |
Args: | |
results (dict): Result dict contains the data to transform. | |
Returns: | |
dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map', | |
'gt_keypoints', 'scale', 'scale_factor', 'img_shape', | |
and 'keep_ratio' keys are updated in result dict. | |
""" | |
target_scale, scale_idx = self._random_select() | |
self.resize.scale = target_scale | |
results = self.resize(results) | |
results['scale_idx'] = scale_idx | |
return results | |
def __repr__(self) -> str: | |
repr_str = self.__class__.__name__ | |
repr_str += f'(scales={self.scales}' | |
repr_str += f', resize_cfg={self.resize_cfg})' | |
return repr_str | |
class RandomFlip(BaseTransform): | |
"""Flip the image & bbox & keypoints & segmentation map. Added or Updated | |
keys: flip, flip_direction, img, gt_bboxes, gt_seg_map, and | |
gt_keypoints. There are 3 flip modes: | |
- ``prob`` is float, ``direction`` is string: the image will be | |
``direction``ly flipped with probability of ``prob`` . | |
E.g., ``prob=0.5``, ``direction='horizontal'``, | |
then image will be horizontally flipped with probability of 0.5. | |
- ``prob`` is float, ``direction`` is list of string: the image will | |
be ``direction[i]``ly flipped with probability of | |
``prob/len(direction)``. | |
E.g., ``prob=0.5``, ``direction=['horizontal', 'vertical']``, | |
then image will be horizontally flipped with probability of 0.25, | |
vertically with probability of 0.25. | |
- ``prob`` is list of float, ``direction`` is list of string: | |
given ``len(prob) == len(direction)``, the image will | |
be ``direction[i]``ly flipped with probability of ``prob[i]``. | |
E.g., ``prob=[0.3, 0.5]``, ``direction=['horizontal', | |
'vertical']``, then image will be horizontally flipped with | |
probability of 0.3, vertically with probability of 0.5. | |
Required Keys: | |
- img | |
- gt_bboxes (optional) | |
- gt_seg_map (optional) | |
- gt_keypoints (optional) | |
Modified Keys: | |
- img | |
- gt_bboxes (optional) | |
- gt_seg_map (optional) | |
- gt_keypoints (optional) | |
Added Keys: | |
- flip | |
- flip_direction | |
- swap_seg_labels (optional) | |
Args: | |
prob (float | list[float], optional): The flipping probability. | |
Defaults to None. | |
direction(str | list[str]): The flipping direction. Options | |
If input is a list, the length must equal ``prob``. Each | |
element in ``prob`` indicates the flip probability of | |
corresponding direction. Defaults to 'horizontal'. | |
swap_seg_labels (list, optional): The label pair need to be swapped | |
for ground truth, like 'left arm' and 'right arm' need to be | |
swapped after horizontal flipping. For example, ``[(1, 5)]``, | |
where 1/5 is the label of the left/right arm. Defaults to None. | |
""" | |
def __init__(self, | |
prob: Optional[Union[float, Iterable[float]]] = None, | |
direction: Union[str, Sequence[Optional[str]]] = 'horizontal', | |
swap_seg_labels: Optional[Sequence] = None) -> None: | |
if isinstance(prob, list): | |
assert mmengine.is_list_of(prob, float) | |
assert 0 <= sum(prob) <= 1 | |
elif isinstance(prob, float): | |
assert 0 <= prob <= 1 | |
else: | |
raise ValueError(f'probs must be float or list of float, but \ | |
got `{type(prob)}`.') | |
self.prob = prob | |
self.swap_seg_labels = swap_seg_labels | |
valid_directions = ['horizontal', 'vertical', 'diagonal'] | |
if isinstance(direction, str): | |
assert direction in valid_directions | |
elif isinstance(direction, list): | |
assert mmengine.is_list_of(direction, str) | |
assert set(direction).issubset(set(valid_directions)) | |
else: | |
raise ValueError(f'direction must be either str or list of str, \ | |
but got `{type(direction)}`.') | |
self.direction = direction | |
if isinstance(prob, list): | |
assert len(prob) == len(self.direction) | |
def _flip_bbox(self, bboxes: np.ndarray, img_shape: Tuple[int, int], | |
direction: str) -> np.ndarray: | |
"""Flip bboxes horizontally. | |
Args: | |
bboxes (numpy.ndarray): Bounding boxes, shape (..., 4*k) | |
img_shape (tuple[int]): Image shape (height, width) | |
direction (str): Flip direction. Options are 'horizontal', | |
'vertical', and 'diagonal'. | |
Returns: | |
numpy.ndarray: Flipped bounding boxes. | |
""" | |
assert bboxes.shape[-1] % 4 == 0 | |
flipped = bboxes.copy() | |
h, w = img_shape | |
if direction == 'horizontal': | |
flipped[..., 0::4] = w - bboxes[..., 2::4] | |
flipped[..., 2::4] = w - bboxes[..., 0::4] | |
elif direction == 'vertical': | |
flipped[..., 1::4] = h - bboxes[..., 3::4] | |
flipped[..., 3::4] = h - bboxes[..., 1::4] | |
elif direction == 'diagonal': | |
flipped[..., 0::4] = w - bboxes[..., 2::4] | |
flipped[..., 1::4] = h - bboxes[..., 3::4] | |
flipped[..., 2::4] = w - bboxes[..., 0::4] | |
flipped[..., 3::4] = h - bboxes[..., 1::4] | |
else: | |
raise ValueError( | |
f"Flipping direction must be 'horizontal', 'vertical', \ | |
or 'diagonal', but got '{direction}'") | |
return flipped | |
def _flip_keypoints( | |
self, | |
keypoints: np.ndarray, | |
img_shape: Tuple[int, int], | |
direction: str, | |
) -> np.ndarray: | |
"""Flip keypoints horizontally, vertically or diagonally. | |
Args: | |
keypoints (numpy.ndarray): Keypoints, shape (..., 2) | |
img_shape (tuple[int]): Image shape (height, width) | |
direction (str): Flip direction. Options are 'horizontal', | |
'vertical', and 'diagonal'. | |
Returns: | |
numpy.ndarray: Flipped keypoints. | |
""" | |
meta_info = keypoints[..., 2:] | |
keypoints = keypoints[..., :2] | |
flipped = keypoints.copy() | |
h, w = img_shape | |
if direction == 'horizontal': | |
flipped[..., 0::2] = w - keypoints[..., 0::2] | |
elif direction == 'vertical': | |
flipped[..., 1::2] = h - keypoints[..., 1::2] | |
elif direction == 'diagonal': | |
flipped[..., 0::2] = w - keypoints[..., 0::2] | |
flipped[..., 1::2] = h - keypoints[..., 1::2] | |
else: | |
raise ValueError( | |
f"Flipping direction must be 'horizontal', 'vertical', \ | |
or 'diagonal', but got '{direction}'") | |
flipped = np.concatenate([flipped, meta_info], axis=-1) | |
return flipped | |
def _flip_seg_map(self, seg_map: dict, direction: str) -> np.ndarray: | |
"""Flip segmentation map horizontally, vertically or diagonally. | |
Args: | |
seg_map (numpy.ndarray): segmentation map, shape (H, W). | |
direction (str): Flip direction. Options are 'horizontal', | |
'vertical'. | |
Returns: | |
numpy.ndarray: Flipped segmentation map. | |
""" | |
seg_map = mmcv.imflip(seg_map, direction=direction) | |
if self.swap_seg_labels is not None: | |
# to handle datasets with left/right annotations | |
# like 'Left-arm' and 'Right-arm' in LIP dataset | |
# Modified from https://github.com/openseg-group/openseg.pytorch/blob/master/lib/datasets/tools/cv2_aug_transforms.py # noqa:E501 | |
# Licensed under MIT license | |
temp = seg_map.copy() | |
assert isinstance(self.swap_seg_labels, (tuple, list)) | |
for pair in self.swap_seg_labels: | |
assert isinstance(pair, (tuple, list)) and len(pair) == 2, \ | |
'swap_seg_labels must be a sequence with pair, but got ' \ | |
f'{self.swap_seg_labels}.' | |
seg_map[temp == pair[0]] = pair[1] | |
seg_map[temp == pair[1]] = pair[0] | |
return seg_map | |
def _choose_direction(self) -> str: | |
"""Choose the flip direction according to `prob` and `direction`""" | |
if isinstance(self.direction, | |
Sequence) and not isinstance(self.direction, str): | |
# None means non-flip | |
direction_list: list = list(self.direction) + [None] | |
elif isinstance(self.direction, str): | |
# None means non-flip | |
direction_list = [self.direction, None] | |
if isinstance(self.prob, list): | |
non_prob: float = 1 - sum(self.prob) | |
prob_list = self.prob + [non_prob] | |
elif isinstance(self.prob, float): | |
non_prob = 1. - self.prob | |
# exclude non-flip | |
single_ratio = self.prob / (len(direction_list) - 1) | |
prob_list = [single_ratio] * (len(direction_list) - 1) + [non_prob] | |
cur_dir = np.random.choice(direction_list, p=prob_list) | |
return cur_dir | |
def _flip(self, results: dict) -> None: | |
"""Flip images, bounding boxes, semantic segmentation map and | |
keypoints.""" | |
# flip image | |
results['img'] = mmcv.imflip( | |
results['img'], direction=results['flip_direction']) | |
img_shape = results['img'].shape[:2] | |
# flip bboxes | |
if results.get('gt_bboxes', None) is not None: | |
results['gt_bboxes'] = self._flip_bbox(results['gt_bboxes'], | |
img_shape, | |
results['flip_direction']) | |
# flip keypoints | |
if results.get('gt_keypoints', None) is not None: | |
results['gt_keypoints'] = self._flip_keypoints( | |
results['gt_keypoints'], img_shape, results['flip_direction']) | |
# flip seg map | |
if results.get('gt_seg_map', None) is not None: | |
results['gt_seg_map'] = self._flip_seg_map( | |
results['gt_seg_map'], direction=results['flip_direction']) | |
results['swap_seg_labels'] = self.swap_seg_labels | |
def _flip_on_direction(self, results: dict) -> None: | |
"""Function to flip images, bounding boxes, semantic segmentation map | |
and keypoints.""" | |
cur_dir = self._choose_direction() | |
if cur_dir is None: | |
results['flip'] = False | |
results['flip_direction'] = None | |
else: | |
results['flip'] = True | |
results['flip_direction'] = cur_dir | |
self._flip(results) | |
def transform(self, results: dict) -> dict: | |
"""Transform function to flip images, bounding boxes, semantic | |
segmentation map and keypoints. | |
Args: | |
results (dict): Result dict from loading pipeline. | |
Returns: | |
dict: Flipped results, 'img', 'gt_bboxes', 'gt_seg_map', | |
'gt_keypoints', 'flip', and 'flip_direction' keys are | |
updated in result dict. | |
""" | |
self._flip_on_direction(results) | |
return results | |
def __repr__(self) -> str: | |
repr_str = self.__class__.__name__ | |
repr_str += f'(prob={self.prob}, ' | |
repr_str += f'direction={self.direction})' | |
return repr_str | |
class RandomResize(BaseTransform): | |
"""Random resize images & bbox & keypoints. | |
How to choose the target scale to resize the image will follow the rules | |
below: | |
- if ``scale`` is a sequence of tuple | |
.. math:: | |
target\\_scale[0] \\sim Uniform([scale[0][0], scale[1][0]]) | |
.. math:: | |
target\\_scale[1] \\sim Uniform([scale[0][1], scale[1][1]]) | |
Following the resize order of weight and height in cv2, ``scale[i][0]`` | |
is for width, and ``scale[i][1]`` is for height. | |
- if ``scale`` is a tuple | |
.. math:: | |
target\\_scale[0] \\sim Uniform([ratio\\_range[0], ratio\\_range[1]]) | |
* scale[0] | |
.. math:: | |
target\\_scale[1] \\sim Uniform([ratio\\_range[0], ratio\\_range[1]]) | |
* scale[1] | |
Following the resize order of weight and height in cv2, ``ratio_range[0]`` | |
is for width, and ``ratio_range[1]`` is for height. | |
- if ``keep_ratio`` is True, the minimum value of ``target_scale`` will be | |
used to set the shorter side and the maximum value will be used to | |
set the longer side. | |
- if ``keep_ratio`` is False, the value of ``target_scale`` will be used to | |
reisze the width and height accordingly. | |
Required Keys: | |
- img | |
- gt_bboxes | |
- gt_seg_map | |
- gt_keypoints | |
Modified Keys: | |
- img | |
- gt_bboxes | |
- gt_seg_map | |
- gt_keypoints | |
- img_shape | |
Added Keys: | |
- scale | |
- scale_factor | |
- keep_ratio | |
Args: | |
scale (tuple or Sequence[tuple]): Images scales for resizing. | |
Defaults to None. | |
ratio_range (tuple[float], optional): (min_ratio, max_ratio). | |
Defaults to None. | |
resize_type (str): The type of resize class to use. Defaults to | |
"Resize". | |
**resize_kwargs: Other keyword arguments for the ``resize_type``. | |
Note: | |
By defaults, the ``resize_type`` is "Resize", if it's not overwritten | |
by your registry, it indicates the :class:`mmcv.Resize`. And therefore, | |
``resize_kwargs`` accepts any keyword arguments of it, like | |
``keep_ratio``, ``interpolation`` and so on. | |
If you want to use your custom resize class, the class should accept | |
``scale`` argument and have ``scale`` attribution which determines the | |
resize shape. | |
""" | |
def __init__( | |
self, | |
scale: Union[Tuple[int, int], Sequence[Tuple[int, int]]], | |
ratio_range: Tuple[float, float] = None, | |
resize_type: str = 'Resize', | |
**resize_kwargs, | |
) -> None: | |
self.scale = scale | |
self.ratio_range = ratio_range | |
self.resize_cfg = dict(type=resize_type, **resize_kwargs) | |
# create a empty Reisize object | |
self.resize = TRANSFORMS.build({'scale': 0, **self.resize_cfg}) | |
def _random_sample(scales: Sequence[Tuple[int, int]]) -> tuple: | |
"""Private function to randomly sample a scale from a list of tuples. | |
Args: | |
scales (list[tuple]): Images scale range for sampling. | |
There must be two tuples in scales, which specify the lower | |
and upper bound of image scales. | |
Returns: | |
tuple: The targeted scale of the image to be resized. | |
""" | |
assert mmengine.is_list_of(scales, tuple) and len(scales) == 2 | |
scale_0 = [scales[0][0], scales[1][0]] | |
scale_1 = [scales[0][1], scales[1][1]] | |
edge_0 = np.random.randint(min(scale_0), max(scale_0) + 1) | |
edge_1 = np.random.randint(min(scale_1), max(scale_1) + 1) | |
scale = (edge_0, edge_1) | |
return scale | |
def _random_sample_ratio(scale: tuple, ratio_range: Tuple[float, | |
float]) -> tuple: | |
"""Private function to randomly sample a scale from a tuple. | |
A ratio will be randomly sampled from the range specified by | |
``ratio_range``. Then it would be multiplied with ``scale`` to | |
generate sampled scale. | |
Args: | |
scale (tuple): Images scale base to multiply with ratio. | |
ratio_range (tuple[float]): The minimum and maximum ratio to scale | |
the ``scale``. | |
Returns: | |
tuple: The targeted scale of the image to be resized. | |
""" | |
assert isinstance(scale, tuple) and len(scale) == 2 | |
min_ratio, max_ratio = ratio_range | |
assert min_ratio <= max_ratio | |
ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio | |
scale = int(scale[0] * ratio), int(scale[1] * ratio) | |
return scale | |
def _random_scale(self) -> tuple: | |
"""Private function to randomly sample an scale according to the type | |
of ``scale``. | |
Returns: | |
tuple: The targeted scale of the image to be resized. | |
""" | |
if mmengine.is_tuple_of(self.scale, int): | |
assert self.ratio_range is not None and len(self.ratio_range) == 2 | |
scale = self._random_sample_ratio( | |
self.scale, # type: ignore | |
self.ratio_range) | |
elif mmengine.is_seq_of(self.scale, tuple): | |
scale = self._random_sample(self.scale) # type: ignore | |
else: | |
raise NotImplementedError('Do not support sampling function ' | |
f'for "{self.scale}"') | |
return scale | |
def transform(self, results: dict) -> dict: | |
"""Transform function to resize images, bounding boxes, semantic | |
segmentation map. | |
Args: | |
results (dict): Result dict from loading pipeline. | |
Returns: | |
dict: Resized results, ``img``, ``gt_bboxes``, ``gt_semantic_seg``, | |
``gt_keypoints``, ``scale``, ``scale_factor``, ``img_shape``, and | |
``keep_ratio`` keys are updated in result dict. | |
""" | |
results['scale'] = self._random_scale() | |
self.resize.scale = results['scale'] | |
results = self.resize(results) | |
return results | |
def __repr__(self) -> str: | |
repr_str = self.__class__.__name__ | |
repr_str += f'(scale={self.scale}, ' | |
repr_str += f'ratio_range={self.ratio_range}, ' | |
repr_str += f'resize_cfg={self.resize_cfg})' | |
return repr_str | |