Spaces:
Build error
Build error
import random | |
import warnings | |
from typing import Union | |
import torch | |
from torch import Tensor | |
from torchvision.transforms import RandomCrop, functional as F, CenterCrop, RandomHorizontalFlip, PILToTensor | |
from torchvision.transforms.functional import _get_image_size as get_image_size | |
from taming.data.helper_types import BoundingBox, Image | |
pil_to_tensor = PILToTensor() | |
def convert_pil_to_tensor(image: Image) -> Tensor: | |
with warnings.catch_warnings(): | |
# to filter PyTorch UserWarning as described here: https://github.com/pytorch/vision/issues/2194 | |
warnings.simplefilter("ignore") | |
return pil_to_tensor(image) | |
class RandomCrop1dReturnCoordinates(RandomCrop): | |
def forward(self, img: Image) -> (BoundingBox, Image): | |
""" | |
Additionally to cropping, returns the relative coordinates of the crop bounding box. | |
Args: | |
img (PIL Image or Tensor): Image to be cropped. | |
Returns: | |
Bounding box: x0, y0, w, h | |
PIL Image or Tensor: Cropped image. | |
Based on: | |
torchvision.transforms.RandomCrop, torchvision 1.7.0 | |
""" | |
if self.padding is not None: | |
img = F.pad(img, self.padding, self.fill, self.padding_mode) | |
width, height = get_image_size(img) | |
# pad the width if needed | |
if self.pad_if_needed and width < self.size[1]: | |
padding = [self.size[1] - width, 0] | |
img = F.pad(img, padding, self.fill, self.padding_mode) | |
# pad the height if needed | |
if self.pad_if_needed and height < self.size[0]: | |
padding = [0, self.size[0] - height] | |
img = F.pad(img, padding, self.fill, self.padding_mode) | |
i, j, h, w = self.get_params(img, self.size) | |
bbox = (j / width, i / height, w / width, h / height) # x0, y0, w, h | |
return bbox, F.crop(img, i, j, h, w) | |
class Random2dCropReturnCoordinates(torch.nn.Module): | |
""" | |
Additionally to cropping, returns the relative coordinates of the crop bounding box. | |
Args: | |
img (PIL Image or Tensor): Image to be cropped. | |
Returns: | |
Bounding box: x0, y0, w, h | |
PIL Image or Tensor: Cropped image. | |
Based on: | |
torchvision.transforms.RandomCrop, torchvision 1.7.0 | |
""" | |
def __init__(self, min_size: int): | |
super().__init__() | |
self.min_size = min_size | |
def forward(self, img: Image) -> (BoundingBox, Image): | |
width, height = get_image_size(img) | |
max_size = min(width, height) | |
if max_size <= self.min_size: | |
size = max_size | |
else: | |
size = random.randint(self.min_size, max_size) | |
top = random.randint(0, height - size) | |
left = random.randint(0, width - size) | |
bbox = left / width, top / height, size / width, size / height | |
return bbox, F.crop(img, top, left, size, size) | |
class CenterCropReturnCoordinates(CenterCrop): | |
def get_bbox_of_center_crop(width: int, height: int) -> BoundingBox: | |
if width > height: | |
w = height / width | |
h = 1.0 | |
x0 = 0.5 - w / 2 | |
y0 = 0. | |
else: | |
w = 1.0 | |
h = width / height | |
x0 = 0. | |
y0 = 0.5 - h / 2 | |
return x0, y0, w, h | |
def forward(self, img: Union[Image, Tensor]) -> (BoundingBox, Union[Image, Tensor]): | |
""" | |
Additionally to cropping, returns the relative coordinates of the crop bounding box. | |
Args: | |
img (PIL Image or Tensor): Image to be cropped. | |
Returns: | |
Bounding box: x0, y0, w, h | |
PIL Image or Tensor: Cropped image. | |
Based on: | |
torchvision.transforms.RandomHorizontalFlip (version 1.7.0) | |
""" | |
width, height = get_image_size(img) | |
return self.get_bbox_of_center_crop(width, height), F.center_crop(img, self.size) | |
class RandomHorizontalFlipReturn(RandomHorizontalFlip): | |
def forward(self, img: Image) -> (bool, Image): | |
""" | |
Additionally to flipping, returns a boolean whether it was flipped or not. | |
Args: | |
img (PIL Image or Tensor): Image to be flipped. | |
Returns: | |
flipped: whether the image was flipped or not | |
PIL Image or Tensor: Randomly flipped image. | |
Based on: | |
torchvision.transforms.RandomHorizontalFlip (version 1.7.0) | |
""" | |
if torch.rand(1) < self.p: | |
return True, F.hflip(img) | |
return False, img | |