Spaces:
Running
on
Zero
Running
on
Zero
# Copyright (c) Meta Platforms, Inc. and affiliates. | |
# All rights reserved. | |
# | |
# This source code is licensed under the license found in the | |
# LICENSE file in the root directory of this source tree. | |
import numbers | |
from typing import List, Optional, Tuple, Union, no_type_check | |
import cv2 | |
import numpy as np | |
from mmengine.utils import to_2tuple | |
from .io import imread_backend | |
try: | |
from PIL import Image | |
except ImportError: | |
Image = None | |
def _scale_size( | |
size: Tuple[int, int], | |
scale: Union[float, int, Tuple[float, float], Tuple[int, int]], | |
) -> Tuple[int, int]: | |
"""Rescale a size by a ratio. | |
Args: | |
size (tuple[int]): (w, h). | |
scale (float | int | tuple(float) | tuple(int)): Scaling factor. | |
Returns: | |
tuple[int]: scaled size. | |
""" | |
if isinstance(scale, (float, int)): | |
scale = (scale, scale) | |
w, h = size | |
return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5) | |
cv2_interp_codes = { | |
'nearest': cv2.INTER_NEAREST, | |
'bilinear': cv2.INTER_LINEAR, | |
'bicubic': cv2.INTER_CUBIC, | |
'area': cv2.INTER_AREA, | |
'lanczos': cv2.INTER_LANCZOS4 | |
} | |
cv2_border_modes = { | |
'constant': cv2.BORDER_CONSTANT, | |
'replicate': cv2.BORDER_REPLICATE, | |
'reflect': cv2.BORDER_REFLECT, | |
'wrap': cv2.BORDER_WRAP, | |
'reflect_101': cv2.BORDER_REFLECT_101, | |
'transparent': cv2.BORDER_TRANSPARENT, | |
'isolated': cv2.BORDER_ISOLATED | |
} | |
# Pillow >=v9.1.0 use a slightly different naming scheme for filters. | |
# Set pillow_interp_codes according to the naming scheme used. | |
if Image is not None: | |
if hasattr(Image, 'Resampling'): | |
pillow_interp_codes = { | |
'nearest': Image.Resampling.NEAREST, | |
'bilinear': Image.Resampling.BILINEAR, | |
'bicubic': Image.Resampling.BICUBIC, | |
'box': Image.Resampling.BOX, | |
'lanczos': Image.Resampling.LANCZOS, | |
'hamming': Image.Resampling.HAMMING | |
} | |
else: | |
pillow_interp_codes = { | |
'nearest': Image.NEAREST, | |
'bilinear': Image.BILINEAR, | |
'bicubic': Image.BICUBIC, | |
'box': Image.BOX, | |
'lanczos': Image.LANCZOS, | |
'hamming': Image.HAMMING | |
} | |
def imresize( | |
img: np.ndarray, | |
size: Tuple[int, int], | |
return_scale: bool = False, | |
interpolation: str = 'bilinear', | |
out: Optional[np.ndarray] = None, | |
backend: Optional[str] = None | |
) -> Union[Tuple[np.ndarray, float, float], np.ndarray]: | |
"""Resize image to a given size. | |
Args: | |
img (ndarray): The input image. | |
size (tuple[int]): Target size (w, h). | |
return_scale (bool): Whether to return `w_scale` and `h_scale`. | |
interpolation (str): Interpolation method, accepted values are | |
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' | |
backend, "nearest", "bilinear" for 'pillow' backend. | |
out (ndarray): The output destination. | |
backend (str | None): The image resize backend type. Options are `cv2`, | |
`pillow`, `None`. If backend is None, the global imread_backend | |
specified by ``mmcv.use_backend()`` will be used. Default: None. | |
Returns: | |
tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or | |
`resized_img`. | |
""" | |
h, w = img.shape[:2] | |
if backend is None: | |
backend = imread_backend | |
if backend not in ['cv2', 'pillow']: | |
raise ValueError(f'backend: {backend} is not supported for resize.' | |
f"Supported backends are 'cv2', 'pillow'") | |
if backend == 'pillow': | |
assert img.dtype == np.uint8, 'Pillow backend only support uint8 type' | |
pil_image = Image.fromarray(img) | |
pil_image = pil_image.resize(size, pillow_interp_codes[interpolation]) | |
resized_img = np.array(pil_image) | |
else: | |
resized_img = cv2.resize( | |
img, size, dst=out, interpolation=cv2_interp_codes[interpolation]) | |
if not return_scale: | |
return resized_img | |
else: | |
w_scale = size[0] / w | |
h_scale = size[1] / h | |
return resized_img, w_scale, h_scale | |
def imresize_to_multiple( | |
img: np.ndarray, | |
divisor: Union[int, Tuple[int, int]], | |
size: Union[int, Tuple[int, int], None] = None, | |
scale_factor: Union[float, int, Tuple[float, float], Tuple[int, int], | |
None] = None, | |
keep_ratio: bool = False, | |
return_scale: bool = False, | |
interpolation: str = 'bilinear', | |
out: Optional[np.ndarray] = None, | |
backend: Optional[str] = None | |
) -> Union[Tuple[np.ndarray, float, float], np.ndarray]: | |
"""Resize image according to a given size or scale factor and then rounds | |
up the the resized or rescaled image size to the nearest value that can be | |
divided by the divisor. | |
Args: | |
img (ndarray): The input image. | |
divisor (int | tuple): Resized image size will be a multiple of | |
divisor. If divisor is a tuple, divisor should be | |
(w_divisor, h_divisor). | |
size (None | int | tuple[int]): Target size (w, h). Default: None. | |
scale_factor (None | float | int | tuple[float] | tuple[int]): | |
Multiplier for spatial size. Should match input size if it is a | |
tuple and the 2D style is (w_scale_factor, h_scale_factor). | |
Default: None. | |
keep_ratio (bool): Whether to keep the aspect ratio when resizing the | |
image. Default: False. | |
return_scale (bool): Whether to return `w_scale` and `h_scale`. | |
interpolation (str): Interpolation method, accepted values are | |
"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2' | |
backend, "nearest", "bilinear" for 'pillow' backend. | |
out (ndarray): The output destination. | |
backend (str | None): The image resize backend type. Options are `cv2`, | |
`pillow`, `None`. If backend is None, the global imread_backend | |
specified by ``mmcv.use_backend()`` will be used. Default: None. | |
Returns: | |
tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or | |
`resized_img`. | |
""" | |
h, w = img.shape[:2] | |
if size is not None and scale_factor is not None: | |
raise ValueError('only one of size or scale_factor should be defined') | |
elif size is None and scale_factor is None: | |
raise ValueError('one of size or scale_factor should be defined') | |
elif size is not None: | |
size = to_2tuple(size) | |
if keep_ratio: | |
size = rescale_size((w, h), size, return_scale=False) | |
else: | |
size = _scale_size((w, h), scale_factor) | |
divisor = to_2tuple(divisor) | |
size = tuple(int(np.ceil(s / d)) * d for s, d in zip(size, divisor)) | |
resized_img, w_scale, h_scale = imresize( | |
img, | |
size, | |
return_scale=True, | |
interpolation=interpolation, | |
out=out, | |
backend=backend) | |
if return_scale: | |
return resized_img, w_scale, h_scale | |
else: | |
return resized_img | |
def imresize_like( | |
img: np.ndarray, | |
dst_img: np.ndarray, | |
return_scale: bool = False, | |
interpolation: str = 'bilinear', | |
backend: Optional[str] = None | |
) -> Union[Tuple[np.ndarray, float, float], np.ndarray]: | |
"""Resize image to the same size of a given image. | |
Args: | |
img (ndarray): The input image. | |
dst_img (ndarray): The target image. | |
return_scale (bool): Whether to return `w_scale` and `h_scale`. | |
interpolation (str): Same as :func:`resize`. | |
backend (str | None): Same as :func:`resize`. | |
Returns: | |
tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or | |
`resized_img`. | |
""" | |
h, w = dst_img.shape[:2] | |
return imresize(img, (w, h), return_scale, interpolation, backend=backend) | |
def rescale_size(old_size: tuple, | |
scale: Union[float, int, Tuple[int, int]], | |
return_scale: bool = False) -> tuple: | |
"""Calculate the new size to be rescaled to. | |
Args: | |
old_size (tuple[int]): The old size (w, h) of image. | |
scale (float | int | tuple[int]): The scaling factor or maximum size. | |
If it is a float number or an integer, then the image will be | |
rescaled by this factor, else if it is a tuple of 2 integers, then | |
the image will be rescaled as large as possible within the scale. | |
return_scale (bool): Whether to return the scaling factor besides the | |
rescaled image size. | |
Returns: | |
tuple[int]: The new rescaled image size. | |
""" | |
w, h = old_size | |
if isinstance(scale, (float, int)): | |
if scale <= 0: | |
raise ValueError(f'Invalid scale {scale}, must be positive.') | |
scale_factor = scale | |
elif isinstance(scale, tuple): | |
max_long_edge = max(scale) | |
max_short_edge = min(scale) | |
scale_factor = min(max_long_edge / max(h, w), | |
max_short_edge / min(h, w)) | |
else: | |
raise TypeError( | |
f'Scale must be a number or tuple of int, but got {type(scale)}') | |
new_size = _scale_size((w, h), scale_factor) | |
if return_scale: | |
return new_size, scale_factor | |
else: | |
return new_size | |
def imrescale( | |
img: np.ndarray, | |
scale: Union[float, int, Tuple[int, int]], | |
return_scale: bool = False, | |
interpolation: str = 'bilinear', | |
backend: Optional[str] = None | |
) -> Union[np.ndarray, Tuple[np.ndarray, float]]: | |
"""Resize image while keeping the aspect ratio. | |
Args: | |
img (ndarray): The input image. | |
scale (float | int | tuple[int]): The scaling factor or maximum size. | |
If it is a float number or an integer, then the image will be | |
rescaled by this factor, else if it is a tuple of 2 integers, then | |
the image will be rescaled as large as possible within the scale. | |
return_scale (bool): Whether to return the scaling factor besides the | |
rescaled image. | |
interpolation (str): Same as :func:`resize`. | |
backend (str | None): Same as :func:`resize`. | |
Returns: | |
ndarray: The rescaled image. | |
""" | |
h, w = img.shape[:2] | |
new_size, scale_factor = rescale_size((w, h), scale, return_scale=True) | |
rescaled_img = imresize( | |
img, new_size, interpolation=interpolation, backend=backend) | |
if return_scale: | |
return rescaled_img, scale_factor | |
else: | |
return rescaled_img | |
def imflip(img: np.ndarray, direction: str = 'horizontal') -> np.ndarray: | |
"""Flip an image horizontally or vertically. | |
Args: | |
img (ndarray): Image to be flipped. | |
direction (str): The flip direction, either "horizontal" or | |
"vertical" or "diagonal". | |
Returns: | |
ndarray: The flipped image. | |
""" | |
assert direction in ['horizontal', 'vertical', 'diagonal'] | |
if direction == 'horizontal': | |
return np.flip(img, axis=1) | |
elif direction == 'vertical': | |
return np.flip(img, axis=0) | |
else: | |
return np.flip(img, axis=(0, 1)) | |
def imflip_(img: np.ndarray, direction: str = 'horizontal') -> np.ndarray: | |
"""Inplace flip an image horizontally or vertically. | |
Args: | |
img (ndarray): Image to be flipped. | |
direction (str): The flip direction, either "horizontal" or | |
"vertical" or "diagonal". | |
Returns: | |
ndarray: The flipped image (inplace). | |
""" | |
assert direction in ['horizontal', 'vertical', 'diagonal'] | |
if direction == 'horizontal': | |
return cv2.flip(img, 1, img) | |
elif direction == 'vertical': | |
return cv2.flip(img, 0, img) | |
else: | |
return cv2.flip(img, -1, img) | |
def imrotate(img: np.ndarray, | |
angle: float, | |
center: Optional[Tuple[float, float]] = None, | |
scale: float = 1.0, | |
border_value: int = 0, | |
interpolation: str = 'bilinear', | |
auto_bound: bool = False, | |
border_mode: str = 'constant') -> np.ndarray: | |
"""Rotate an image. | |
Args: | |
img (np.ndarray): Image to be rotated. | |
angle (float): Rotation angle in degrees, positive values mean | |
clockwise rotation. | |
center (tuple[float], optional): Center point (w, h) of the rotation in | |
the source image. If not specified, the center of the image will be | |
used. | |
scale (float): Isotropic scale factor. | |
border_value (int): Border value used in case of a constant border. | |
Defaults to 0. | |
interpolation (str): Same as :func:`resize`. | |
auto_bound (bool): Whether to adjust the image size to cover the whole | |
rotated image. | |
border_mode (str): Pixel extrapolation method. Defaults to 'constant'. | |
Returns: | |
np.ndarray: The rotated image. | |
""" | |
if center is not None and auto_bound: | |
raise ValueError('`auto_bound` conflicts with `center`') | |
h, w = img.shape[:2] | |
if center is None: | |
center = ((w - 1) * 0.5, (h - 1) * 0.5) | |
assert isinstance(center, tuple) | |
matrix = cv2.getRotationMatrix2D(center, -angle, scale) | |
if auto_bound: | |
cos = np.abs(matrix[0, 0]) | |
sin = np.abs(matrix[0, 1]) | |
new_w = h * sin + w * cos | |
new_h = h * cos + w * sin | |
matrix[0, 2] += (new_w - w) * 0.5 | |
matrix[1, 2] += (new_h - h) * 0.5 | |
w = int(np.round(new_w)) | |
h = int(np.round(new_h)) | |
rotated = cv2.warpAffine( | |
img, | |
matrix, (w, h), | |
flags=cv2_interp_codes[interpolation], | |
borderMode=cv2_border_modes[border_mode], | |
borderValue=border_value) | |
return rotated | |
def bbox_clip(bboxes: np.ndarray, img_shape: Tuple[int, int]) -> np.ndarray: | |
"""Clip bboxes to fit the image shape. | |
Args: | |
bboxes (ndarray): Shape (..., 4*k) | |
img_shape (tuple[int]): (height, width) of the image. | |
Returns: | |
ndarray: Clipped bboxes. | |
""" | |
assert bboxes.shape[-1] % 4 == 0 | |
cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype) | |
cmin[0::2] = img_shape[1] - 1 | |
cmin[1::2] = img_shape[0] - 1 | |
clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0) | |
return clipped_bboxes | |
def bbox_scaling(bboxes: np.ndarray, | |
scale: float, | |
clip_shape: Optional[Tuple[int, int]] = None) -> np.ndarray: | |
"""Scaling bboxes w.r.t the box center. | |
Args: | |
bboxes (ndarray): Shape(..., 4). | |
scale (float): Scaling factor. | |
clip_shape (tuple[int], optional): If specified, bboxes that exceed the | |
boundary will be clipped according to the given shape (h, w). | |
Returns: | |
ndarray: Scaled bboxes. | |
""" | |
if float(scale) == 1.0: | |
scaled_bboxes = bboxes.copy() | |
else: | |
w = bboxes[..., 2] - bboxes[..., 0] + 1 | |
h = bboxes[..., 3] - bboxes[..., 1] + 1 | |
dw = (w * (scale - 1)) * 0.5 | |
dh = (h * (scale - 1)) * 0.5 | |
scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1) | |
if clip_shape is not None: | |
return bbox_clip(scaled_bboxes, clip_shape) | |
else: | |
return scaled_bboxes | |
def imcrop( | |
img: np.ndarray, | |
bboxes: np.ndarray, | |
scale: float = 1.0, | |
pad_fill: Union[float, list, None] = None | |
) -> Union[np.ndarray, List[np.ndarray]]: | |
"""Crop image patches. | |
3 steps: scale the bboxes -> clip bboxes -> crop and pad. | |
Args: | |
img (ndarray): Image to be cropped. | |
bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes. | |
scale (float, optional): Scale ratio of bboxes, the default value | |
1.0 means no scaling. | |
pad_fill (Number | list[Number]): Value to be filled for padding. | |
Default: None, which means no padding. | |
Returns: | |
list[ndarray] | ndarray: The cropped image patches. | |
""" | |
chn = 1 if img.ndim == 2 else img.shape[2] | |
if pad_fill is not None: | |
if isinstance(pad_fill, (int, float)): | |
pad_fill = [pad_fill for _ in range(chn)] | |
assert len(pad_fill) == chn | |
_bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes | |
scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32) | |
clipped_bbox = bbox_clip(scaled_bboxes, img.shape) | |
patches = [] | |
for i in range(clipped_bbox.shape[0]): | |
x1, y1, x2, y2 = tuple(clipped_bbox[i, :]) | |
if pad_fill is None: | |
patch = img[y1:y2 + 1, x1:x2 + 1, ...] | |
else: | |
_x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :]) | |
patch_h = _y2 - _y1 + 1 | |
patch_w = _x2 - _x1 + 1 | |
if chn == 1: | |
patch_shape = (patch_h, patch_w) | |
else: | |
patch_shape = (patch_h, patch_w, chn) # type: ignore | |
patch = np.array( | |
pad_fill, dtype=img.dtype) * np.ones( | |
patch_shape, dtype=img.dtype) | |
x_start = 0 if _x1 >= 0 else -_x1 | |
y_start = 0 if _y1 >= 0 else -_y1 | |
w = x2 - x1 + 1 | |
h = y2 - y1 + 1 | |
patch[y_start:y_start + h, x_start:x_start + w, | |
...] = img[y1:y1 + h, x1:x1 + w, ...] | |
patches.append(patch) | |
if bboxes.ndim == 1: | |
return patches[0] | |
else: | |
return patches | |
def impad(img: np.ndarray, | |
*, | |
shape: Optional[Tuple[int, int]] = None, | |
padding: Union[int, tuple, None] = None, | |
pad_val: Union[float, List] = 0, | |
padding_mode: str = 'constant') -> np.ndarray: | |
"""Pad the given image to a certain shape or pad on all sides with | |
specified padding mode and padding value. | |
Args: | |
img (ndarray): Image to be padded. | |
shape (tuple[int]): Expected padding shape (h, w). Default: None. | |
padding (int or tuple[int]): Padding on each border. If a single int is | |
provided this is used to pad all borders. If tuple of length 2 is | |
provided this is the padding on left/right and top/bottom | |
respectively. If a tuple of length 4 is provided this is the | |
padding for the left, top, right and bottom borders respectively. | |
Default: None. Note that `shape` and `padding` can not be both | |
set. | |
pad_val (Number | Sequence[Number]): Values to be filled in padding | |
areas when padding_mode is 'constant'. Default: 0. | |
padding_mode (str): Type of padding. Should be: constant, edge, | |
reflect or symmetric. Default: constant. | |
- constant: pads with a constant value, this value is specified | |
with pad_val. | |
- edge: pads with the last value at the edge of the image. | |
- reflect: pads with reflection of image without repeating the last | |
value on the edge. For example, padding [1, 2, 3, 4] with 2 | |
elements on both sides in reflect mode will result in | |
[3, 2, 1, 2, 3, 4, 3, 2]. | |
- symmetric: pads with reflection of image repeating the last value | |
on the edge. For example, padding [1, 2, 3, 4] with 2 elements on | |
both sides in symmetric mode will result in | |
[2, 1, 1, 2, 3, 4, 4, 3] | |
Returns: | |
ndarray: The padded image. | |
""" | |
assert (shape is not None) ^ (padding is not None) | |
if shape is not None: | |
width = max(shape[1] - img.shape[1], 0) | |
height = max(shape[0] - img.shape[0], 0) | |
padding = (0, 0, width, height) | |
# check pad_val | |
if isinstance(pad_val, tuple): | |
assert len(pad_val) == img.shape[-1] | |
elif not isinstance(pad_val, numbers.Number): | |
raise TypeError('pad_val must be a int or a tuple. ' | |
f'But received {type(pad_val)}') | |
# check padding | |
if isinstance(padding, tuple) and len(padding) in [2, 4]: | |
if len(padding) == 2: | |
padding = (padding[0], padding[1], padding[0], padding[1]) | |
elif isinstance(padding, numbers.Number): | |
padding = (padding, padding, padding, padding) | |
else: | |
raise ValueError('Padding must be a int or a 2, or 4 element tuple.' | |
f'But received {padding}') | |
# check padding mode | |
assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric'] | |
border_type = { | |
'constant': cv2.BORDER_CONSTANT, | |
'edge': cv2.BORDER_REPLICATE, | |
'reflect': cv2.BORDER_REFLECT_101, | |
'symmetric': cv2.BORDER_REFLECT | |
} | |
img = cv2.copyMakeBorder( | |
img, | |
padding[1], | |
padding[3], | |
padding[0], | |
padding[2], | |
border_type[padding_mode], | |
value=pad_val) | |
return img | |
def impad_to_multiple(img: np.ndarray, | |
divisor: int, | |
pad_val: Union[float, List] = 0) -> np.ndarray: | |
"""Pad an image to ensure each edge to be multiple to some number. | |
Args: | |
img (ndarray): Image to be padded. | |
divisor (int): Padded image edges will be multiple to divisor. | |
pad_val (Number | Sequence[Number]): Same as :func:`impad`. | |
Returns: | |
ndarray: The padded image. | |
""" | |
pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor | |
pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor | |
return impad(img, shape=(pad_h, pad_w), pad_val=pad_val) | |
def cutout(img: np.ndarray, | |
shape: Union[int, Tuple[int, int]], | |
pad_val: Union[int, float, tuple] = 0) -> np.ndarray: | |
"""Randomly cut out a rectangle from the original img. | |
Args: | |
img (ndarray): Image to be cutout. | |
shape (int | tuple[int]): Expected cutout shape (h, w). If given as a | |
int, the value will be used for both h and w. | |
pad_val (int | float | tuple[int | float]): Values to be filled in the | |
cut area. Defaults to 0. | |
Returns: | |
ndarray: The cutout image. | |
""" | |
channels = 1 if img.ndim == 2 else img.shape[2] | |
if isinstance(shape, int): | |
cut_h, cut_w = shape, shape | |
else: | |
assert isinstance(shape, tuple) and len(shape) == 2, \ | |
f'shape must be a int or a tuple with length 2, but got type ' \ | |
f'{type(shape)} instead.' | |
cut_h, cut_w = shape | |
if isinstance(pad_val, (int, float)): | |
pad_val = tuple([pad_val] * channels) | |
elif isinstance(pad_val, tuple): | |
assert len(pad_val) == channels, \ | |
'Expected the num of elements in tuple equals the channels' \ | |
'of input image. Found {} vs {}'.format( | |
len(pad_val), channels) | |
else: | |
raise TypeError(f'Invalid type {type(pad_val)} for `pad_val`') | |
img_h, img_w = img.shape[:2] | |
y0 = np.random.uniform(img_h) | |
x0 = np.random.uniform(img_w) | |
y1 = int(max(0, y0 - cut_h / 2.)) | |
x1 = int(max(0, x0 - cut_w / 2.)) | |
y2 = min(img_h, y1 + cut_h) | |
x2 = min(img_w, x1 + cut_w) | |
if img.ndim == 2: | |
patch_shape = (y2 - y1, x2 - x1) | |
else: | |
patch_shape = (y2 - y1, x2 - x1, channels) # type: ignore | |
img_cutout = img.copy() | |
patch = np.array( | |
pad_val, dtype=img.dtype) * np.ones( | |
patch_shape, dtype=img.dtype) | |
img_cutout[y1:y2, x1:x2, ...] = patch | |
return img_cutout | |
def _get_shear_matrix(magnitude: Union[int, float], | |
direction: str = 'horizontal') -> np.ndarray: | |
"""Generate the shear matrix for transformation. | |
Args: | |
magnitude (int | float): The magnitude used for shear. | |
direction (str): The flip direction, either "horizontal" | |
or "vertical". | |
Returns: | |
ndarray: The shear matrix with dtype float32. | |
""" | |
if direction == 'horizontal': | |
shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]]) | |
elif direction == 'vertical': | |
shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]]) | |
return shear_matrix | |
def imshear(img: np.ndarray, | |
magnitude: Union[int, float], | |
direction: str = 'horizontal', | |
border_value: Union[int, Tuple[int, int]] = 0, | |
interpolation: str = 'bilinear') -> np.ndarray: | |
"""Shear an image. | |
Args: | |
img (ndarray): Image to be sheared with format (h, w) | |
or (h, w, c). | |
magnitude (int | float): The magnitude used for shear. | |
direction (str): The flip direction, either "horizontal" | |
or "vertical". | |
border_value (int | tuple[int]): Value used in case of a | |
constant border. | |
interpolation (str): Same as :func:`resize`. | |
Returns: | |
ndarray: The sheared image. | |
""" | |
assert direction in ['horizontal', | |
'vertical'], f'Invalid direction: {direction}' | |
height, width = img.shape[:2] | |
if img.ndim == 2: | |
channels = 1 | |
elif img.ndim == 3: | |
channels = img.shape[-1] | |
if isinstance(border_value, int): | |
border_value = tuple([border_value] * channels) # type: ignore | |
elif isinstance(border_value, tuple): | |
assert len(border_value) == channels, \ | |
'Expected the num of elements in tuple equals the channels' \ | |
'of input image. Found {} vs {}'.format( | |
len(border_value), channels) | |
else: | |
raise ValueError( | |
f'Invalid type {type(border_value)} for `border_value`') | |
shear_matrix = _get_shear_matrix(magnitude, direction) | |
sheared = cv2.warpAffine( | |
img, | |
shear_matrix, | |
(width, height), | |
# Note case when the number elements in `border_value` | |
# greater than 3 (e.g. shearing masks whose channels large | |
# than 3) will raise TypeError in `cv2.warpAffine`. | |
# Here simply slice the first 3 values in `border_value`. | |
borderValue=border_value[:3], # type: ignore | |
flags=cv2_interp_codes[interpolation]) | |
return sheared | |
def _get_translate_matrix(offset: Union[int, float], | |
direction: str = 'horizontal') -> np.ndarray: | |
"""Generate the translate matrix. | |
Args: | |
offset (int | float): The offset used for translate. | |
direction (str): The translate direction, either | |
"horizontal" or "vertical". | |
Returns: | |
ndarray: The translate matrix with dtype float32. | |
""" | |
if direction == 'horizontal': | |
translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]]) | |
elif direction == 'vertical': | |
translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]]) | |
return translate_matrix | |
def imtranslate(img: np.ndarray, | |
offset: Union[int, float], | |
direction: str = 'horizontal', | |
border_value: Union[int, tuple] = 0, | |
interpolation: str = 'bilinear') -> np.ndarray: | |
"""Translate an image. | |
Args: | |
img (ndarray): Image to be translated with format | |
(h, w) or (h, w, c). | |
offset (int | float): The offset used for translate. | |
direction (str): The translate direction, either "horizontal" | |
or "vertical". | |
border_value (int | tuple[int]): Value used in case of a | |
constant border. | |
interpolation (str): Same as :func:`resize`. | |
Returns: | |
ndarray: The translated image. | |
""" | |
assert direction in ['horizontal', | |
'vertical'], f'Invalid direction: {direction}' | |
height, width = img.shape[:2] | |
if img.ndim == 2: | |
channels = 1 | |
elif img.ndim == 3: | |
channels = img.shape[-1] | |
if isinstance(border_value, int): | |
border_value = tuple([border_value] * channels) | |
elif isinstance(border_value, tuple): | |
assert len(border_value) == channels, \ | |
'Expected the num of elements in tuple equals the channels' \ | |
'of input image. Found {} vs {}'.format( | |
len(border_value), channels) | |
else: | |
raise ValueError( | |
f'Invalid type {type(border_value)} for `border_value`.') | |
translate_matrix = _get_translate_matrix(offset, direction) | |
translated = cv2.warpAffine( | |
img, | |
translate_matrix, | |
(width, height), | |
# Note case when the number elements in `border_value` | |
# greater than 3 (e.g. translating masks whose channels | |
# large than 3) will raise TypeError in `cv2.warpAffine`. | |
# Here simply slice the first 3 values in `border_value`. | |
borderValue=border_value[:3], | |
flags=cv2_interp_codes[interpolation]) | |
return translated | |