Spaces:

jcarnero
/

birds-classification

Sleeping

App Files Files Community

jcarnero commited on Mar 4, 2023

Commit

c16c201

•

1 Parent(s): 6bd27ab

new gpu_crop transformation with test

Browse files

Files changed (3) hide show

deployment/transforms.py +43 -2
tests/{test_transforms.py → test_validation_transforms.py} +22 -3
training/notebooks/transforms-lab.ipynb +8 -17

deployment/transforms.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import math
-from typing import Literal, Union, Tuple
 import torch
-# # import torch.nn.functional as F
 import torchvision.transforms.functional as tvf
 # import torchvision.transforms as tvtfms
@@ -99,3 +99,44 @@ def resized_crop_pad(
         resized_image = pad(crop(resized_image, size), size)
     return resized_image

 import math
+from typing import Union, Tuple
 import torch
+import torch.nn.functional as F
 import torchvision.transforms.functional as tvf
 # import torchvision.transforms as tvtfms
         resized_image = pad(crop(resized_image, size), size)
     return resized_image
+def gpu_crop(batch: torch.tensor, size: Tuple[int, int]):
+    """
+    Crops each image in `batch` to a particular `size`.
+    Args:
+        batch (array of `torch.Tensor`):
+            A batch of images, should be of shape `NxCxWxH`
+        size (`tuple` of integers):
+            A size to pad to, should be in the form
+            of (width, height)
+    Returns:
+        A batch of cropped images
+    """
+    # Split into multiple lines for clarity
+    affine_matrix = torch.eye(3, device=batch.device).float()
+    affine_matrix = affine_matrix.unsqueeze(0)
+    affine_matrix = affine_matrix.expand(batch.size(0), 3, 3)
+    affine_matrix = affine_matrix.contiguous()[:, :2]
+    coords = F.affine_grid(affine_matrix, batch.shape[:2] + size, align_corners=True)
+    top_range, bottom_range = coords.min(), coords.max()
+    zoom = 1 / (bottom_range - top_range).item() * 2
+    resizing_limit = (
+        min(batch.shape[-2] / coords.shape[-2], batch.shape[-1] / coords.shape[-1]) / 2
+    )
+    if resizing_limit > 1 and resizing_limit > zoom:
+        batch = F.interpolate(
+            batch,
+            scale_factor=1 / resizing_limit,
+            mode="area",
+            recompute_scale_factor=True,
+        )
+    return F.grid_sample(
+        batch, coords, mode="bilinear", padding_mode="reflection", align_corners=True
+    )

tests/{test_transforms.py → test_validation_transforms.py} RENAMED Viewed

@@ -4,10 +4,12 @@ from pathlib import Path
 from typing import List
 import numpy as np
 from PIL import Image
 from fastai.vision.data import PILImage
 import fastai.vision.augment as fastai_aug
-from deployment.transforms import resized_crop_pad
 DATA_PATH = "data/200-bird-species-with-11788-images"
@@ -43,7 +45,7 @@ class TestTransforms:
         assert (np.array(im_fastai) == np.array(im_pil)).all()
     # RandomResizedCrop is not exactly equal to CropPad in validation
-    # # def testRandomResizedCropEqualsCropPadInValidation(self, im_fastai: PILImage):
     # #     crop_fastai = fastai_aug.CropPad((460, 460))
     # #     crop_rrc = fastai_aug.RandomResizedCrop((460, 460))
@@ -52,7 +54,7 @@ class TestTransforms:
     # #     assert (np.array(cropped_rrc) == np.array(cropped_fastai)).all()
-    def testRandomResizedCropInValidationEqualsCustomResizedCropPad(
         self, im_fastai: PILImage, im_pil: Image
     ):
         crop_rrc = fastai_aug.RandomResizedCrop((460, 460))
@@ -61,3 +63,20 @@ class TestTransforms:
             np.array(crop_rrc(im_fastai, split_idx=1))
             == np.array(resized_crop_pad(im_pil, (460, 460)))
         ).all()

 from typing import List
 import numpy as np
 from PIL import Image
+import torch
+import torchvision.transforms as tvtfms
 from fastai.vision.data import PILImage
 import fastai.vision.augment as fastai_aug
+from deployment.transforms import resized_crop_pad, gpu_crop
 DATA_PATH = "data/200-bird-species-with-11788-images"
         assert (np.array(im_fastai) == np.array(im_pil)).all()
     # RandomResizedCrop is not exactly equal to CropPad in validation
+    # # def testRandomResizedCropEqualsCropPad(self, im_fastai: PILImage):
     # #     crop_fastai = fastai_aug.CropPad((460, 460))
     # #     crop_rrc = fastai_aug.RandomResizedCrop((460, 460))
     # #     assert (np.array(cropped_rrc) == np.array(cropped_fastai)).all()
+    def testRandomResizedCropEqualsCustomResizedCropPad(
         self, im_fastai: PILImage, im_pil: Image
     ):
         crop_rrc = fastai_aug.RandomResizedCrop((460, 460))
             np.array(crop_rrc(im_fastai, split_idx=1))
             == np.array(resized_crop_pad(im_pil, (460, 460)))
         ).all()
+    def testFlipEqualsCustomGPUCrop(self, im_fastai: PILImage, im_pil: Image):
+        tt_fastai = fastai_aug.ToTensor()
+        i2f_fastai = fastai_aug.IntToFloatTensor()
+        flip = fastai_aug.Flip(size=(224, 224))
+        tt_torch = tvtfms.ToTensor()
+        # apply flip augmentation on validation
+        result_im_fastai = flip(
+            i2f_fastai(tt_fastai(im_fastai).unsqueeze(0)), split_idx=1
+        )
+        # apply custom gpu crop
+        result_im_tv = gpu_crop(tt_torch(im_pil).unsqueeze(0), size=(224, 224))
+        assert torch.allclose(result_im_fastai, result_im_tv)
+        assert (result_im_fastai == result_im_tv).all()

training/notebooks/transforms-lab.ipynb CHANGED Viewed

@@ -181,7 +181,7 @@
     {
      "data": {
       "text/plain": [
-       "<bound method DataBlock.datasets of <fastai.data.block.DataBlock object at 0x7f01c4a27eb0>>"
       ]
      },
      "execution_count": 12,
@@ -287,7 +287,7 @@
    "metadata": {},
    "source": [
     "* IntToFloatTensor seems easy enough and we can probably use the torch version\n",
-    "* Flip and Brightness are RandTransforms and are not applied to validation, but as we are using the size parameter, a RandomResizeCropGPU is done (doing center croping on validation). **WHY?**\n",
     "* Normalize seems easy enough to try replacing it with torch version"
    ]
   },
@@ -563,27 +563,18 @@
     "%cd .."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from deployment.transforms import ResizedCropPad"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
-    "custom_crop = ResizedCropPad((460, 460))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
@@ -592,19 +583,19 @@
        "(460, 460)"
       ]
      },
-     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "cropped = custom_crop(im)\n",
     "cropped.shape"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -614,7 +605,7 @@
        "<PIL.Image.Image image mode=RGB size=460x460>"
       ]
      },
-     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }

     {
      "data": {
       "text/plain": [
+       "<bound method DataBlock.datasets of <fastai.data.block.DataBlock object at 0x7fcc180cab90>>"
       ]
      },
      "execution_count": 12,
    "metadata": {},
    "source": [
     "* IntToFloatTensor seems easy enough and we can probably use the torch version\n",
+    "* Flip and Brightness are RandTransforms and are not applied to validation, but as we are using the size parameter, a RandomResizeCropGPU is done (doing center croping on validation) using the affine matrices.\n",
     "* Normalize seems easy enough to try replacing it with torch version"
    ]
   },
     "%cd .."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
+    "from deployment.transforms import resized_crop_pad"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
        "(460, 460)"
       ]
      },
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "cropped = resized_crop_pad(im, (460, 460))\n",
     "cropped.shape"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
        "<PIL.Image.Image image mode=RGB size=460x460>"
       ]
      },
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }