import random import warnings from typing import Union import torch from torch import Tensor from torchvision.transforms import RandomCrop, functional as F, CenterCrop, RandomHorizontalFlip, PILToTensor from torchvision.transforms.functional import _get_image_size as get_image_size from taming.data.helper_types import BoundingBox, Image pil_to_tensor = PILToTensor() def convert_pil_to_tensor(image: Image) -> Tensor: with warnings.catch_warnings(): # to filter PyTorch UserWarning as described here: https://github.com/pytorch/vision/issues/2194 warnings.simplefilter("ignore") return pil_to_tensor(image) class RandomCrop1dReturnCoordinates(RandomCrop): def forward(self, img: Image) -> (BoundingBox, Image): """ Additionally to cropping, returns the relative coordinates of the crop bounding box. Args: img (PIL Image or Tensor): Image to be cropped. Returns: Bounding box: x0, y0, w, h PIL Image or Tensor: Cropped image. Based on: torchvision.transforms.RandomCrop, torchvision 1.7.0 """ if self.padding is not None: img = F.pad(img, self.padding, self.fill, self.padding_mode) width, height = get_image_size(img) # pad the width if needed if self.pad_if_needed and width < self.size[1]: padding = [self.size[1] - width, 0] img = F.pad(img, padding, self.fill, self.padding_mode) # pad the height if needed if self.pad_if_needed and height < self.size[0]: padding = [0, self.size[0] - height] img = F.pad(img, padding, self.fill, self.padding_mode) i, j, h, w = self.get_params(img, self.size) bbox = (j / width, i / height, w / width, h / height) # x0, y0, w, h return bbox, F.crop(img, i, j, h, w) class Random2dCropReturnCoordinates(torch.nn.Module): """ Additionally to cropping, returns the relative coordinates of the crop bounding box. Args: img (PIL Image or Tensor): Image to be cropped. Returns: Bounding box: x0, y0, w, h PIL Image or Tensor: Cropped image. Based on: torchvision.transforms.RandomCrop, torchvision 1.7.0 """ def __init__(self, min_size: int): super().__init__() self.min_size = min_size def forward(self, img: Image) -> (BoundingBox, Image): width, height = get_image_size(img) max_size = min(width, height) if max_size <= self.min_size: size = max_size else: size = random.randint(self.min_size, max_size) top = random.randint(0, height - size) left = random.randint(0, width - size) bbox = left / width, top / height, size / width, size / height return bbox, F.crop(img, top, left, size, size) class CenterCropReturnCoordinates(CenterCrop): @staticmethod def get_bbox_of_center_crop(width: int, height: int) -> BoundingBox: if width > height: w = height / width h = 1.0 x0 = 0.5 - w / 2 y0 = 0. else: w = 1.0 h = width / height x0 = 0. y0 = 0.5 - h / 2 return x0, y0, w, h def forward(self, img: Union[Image, Tensor]) -> (BoundingBox, Union[Image, Tensor]): """ Additionally to cropping, returns the relative coordinates of the crop bounding box. Args: img (PIL Image or Tensor): Image to be cropped. Returns: Bounding box: x0, y0, w, h PIL Image or Tensor: Cropped image. Based on: torchvision.transforms.RandomHorizontalFlip (version 1.7.0) """ width, height = get_image_size(img) return self.get_bbox_of_center_crop(width, height), F.center_crop(img, self.size) class RandomHorizontalFlipReturn(RandomHorizontalFlip): def forward(self, img: Image) -> (bool, Image): """ Additionally to flipping, returns a boolean whether it was flipped or not. Args: img (PIL Image or Tensor): Image to be flipped. Returns: flipped: whether the image was flipped or not PIL Image or Tensor: Randomly flipped image. Based on: torchvision.transforms.RandomHorizontalFlip (version 1.7.0) """ if torch.rand(1) < self.p: return True, F.hflip(img) return False, img