Spaces:

jcarnero
/

birds-classification

Sleeping

App Files Files Community

jcarnero commited on Feb 27, 2023

Commit

66083a7

1 Parent(s): be18511

transforms analysis

Browse files

Files changed (2) hide show

training/notebooks/lab.ipynb +1 -10
training/notebooks/transforms-lab.ipynb +473 -0

training/notebooks/lab.ipynb CHANGED Viewed

@@ -251,15 +251,6 @@
     ")"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dls = birds.dataloaders(path, bs=bs)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 18,
@@ -538,7 +529,7 @@
     }
    ],
    "source": [
-    "interp.plot_confusion_matrix(figsize=(12,12), dpi=100)  # dpi adjust the resolution"
    ]
   }
  ],

     ")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 18,
     }
    ],
    "source": [
+    "# # interp.plot_confusion_matrix(figsize=(12,12), dpi=100)  # dpi adjust the resolution"
    ]
   }
  ],

training/notebooks/transforms-lab.ipynb ADDED Viewed

	@@ -0,0 +1,473 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "%cd .."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from pathlib import Path\n",
+    "from birds import config\n",
+    "from birds.utils.kaggle import download_dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if download_dataset(config.OWNER, config.DATASET, config.DATA_PATH):\n",
+    "    import os\n",
+    "    import tarfile\n",
+    "\n",
+    "    with tarfile.open(Path(config.DATA_PATH) / \"CUB_200_2011.tgz\", \"r:gz\") as tar:\n",
+    "        tar.extractall(path=config.DATA_PATH)\n",
+    "\n",
+    "    os.remove(Path(config.DATA_PATH) / \"CUB_200_2011.tgz\")\n",
+    "    os.remove(Path(config.DATA_PATH) / \"segmentations.tgz\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from fastai.vision.all import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path = Path(config.DATA_PATH) / \"CUB_200_2011\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(11788, '001.Black_footed_Albatross/Black_Footed_Albatross_0016_796067.jpg')"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "with open(path / \"images.txt\", \"r\") as file:\n",
+    "    lines = [line.strip().split()[1] for line in file.readlines()]\n",
+    "len(lines), lines[15]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_birds_images(path):\n",
+    "    with open(path / \"images.txt\", \"r\") as file:\n",
+    "        lines = [path.resolve() / \"images\" / line.strip().split()[1] for line in file.readlines()]\n",
+    "    return lines"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(11788, '16 0')"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "with open(path / \"train_test_split.txt\", \"r\") as file:\n",
+    "    lines = [line.strip() for line in file.readlines()]\n",
+    "len(lines), lines[15]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def BirdsSplitter(path):\n",
+    "    with open(path / \"train_test_split.txt\", \"r\") as file:\n",
+    "        valid_idx = [int(line.strip().split()[0]) - 1 for line in file.readlines() if line.strip().split()[1] == '1']\n",
+    "    return IndexSplitter(valid_idx)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "item_tfms = RandomResizedCrop(460, min_scale=0.75, ratio=(1.0, 1.0))\n",
+    "batch_tfms = [\n",
+    "    *aug_transforms(size=224, max_warp=0),\n",
+    "    Normalize.from_stats(*imagenet_stats),\n",
+    "]\n",
+    "bs = 64"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "birds = DataBlock(\n",
+    "    blocks=(ImageBlock, CategoryBlock),\n",
+    "    get_items=get_birds_images,\n",
+    "    splitter=BirdsSplitter(path),\n",
+    "    get_y=RegexLabeller(pat=r\"/([^/]+)_\\d+_\\d+\\.jpg\"),\n",
+    "    item_tfms=item_tfms,\n",
+    "    batch_tfms=batch_tfms,\n",
+    ")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Transformations on validation dataset"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Notes:\n",
+    "* split_idx is set in a transform to indicate that only is executed on that split. None value makes transform to execute always, doing different things if split_idx==0 (train) or split_idx==1 (valid).\n",
+    "* RandTransform is only applied to the training set by default (split_idx = 0), unless split_idx is set to None in some child transform."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<bound method DataBlock.datasets of <fastai.data.block.DataBlock object at 0x7ff82dd75690>>"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "birds.datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ToTensor:\n",
+      "encodes: (PILMask,object) -> encodes\n",
+      "(PILBase,object) -> encodes\n",
+      "decodes: \n",
+      "---------------\n",
+      "RandomResizedCrop -- {'size': (460, 460), 'min_scale': 0.75, 'ratio': (1.0, 1.0), 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'val_xtra': 0.14, 'max_scale': 1.0, 'p': 1.0}:\n",
+      "encodes: (Image,object) -> encodes\n",
+      "(TensorBBox,object) -> encodes\n",
+      "(TensorPoint,object) -> encodes\n",
+      "decodes: \n",
+      "---------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "for tmfs in birds.item_tfms:\n",
+    "    print(tmfs)\n",
+    "    print(\"---------------\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* ToTensor seems to be simple and we can use torch transform\n",
+    "* RandomResizedCrop is a RandTransform, but according to its documentation, on the validation set, we center crop the image if it’s ratio isn’t in the range (to the minmum or maximum value) then resize."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "IntToFloatTensor -- {'div': 255.0, 'div_mask': 1}:\n",
+      "encodes: (TensorImage,object) -> encodes\n",
+      "(TensorMask,object) -> encodes\n",
+      "decodes: (TensorImage,object) -> decodes\n",
+      "\n",
+      "---------------\n",
+      "Flip -- {'size': 224, 'mode': 'bilinear', 'pad_mode': 'reflection', 'mode_mask': 'nearest', 'align_corners': True, 'p': 0.5}:\n",
+      "encodes: (TensorImage,object) -> encodes\n",
+      "(TensorMask,object) -> encodes\n",
+      "(TensorBBox,object) -> encodes\n",
+      "(TensorPoint,object) -> encodes\n",
+      "decodes: \n",
+      "---------------\n",
+      "Brightness -- {'max_lighting': 0.2, 'p': 1.0, 'draw': None, 'batch': False}:\n",
+      "encodes: (TensorImage,object) -> encodes\n",
+      "decodes: \n",
+      "---------------\n",
+      "Normalize -- {'mean': tensor([[[[0.4850]],\n",
+      "\n",
+      "         [[0.4560]],\n",
+      "\n",
+      "         [[0.4060]]]], device='cuda:0'), 'std': tensor([[[[0.2290]],\n",
+      "\n",
+      "         [[0.2240]],\n",
+      "\n",
+      "         [[0.2250]]]], device='cuda:0'), 'axes': (0, 2, 3)}:\n",
+      "encodes: (TensorImage,object) -> encodes\n",
+      "(Tabular,object) -> encodes\n",
+      "decodes: (TensorImage,object) -> decodes\n",
+      "(Tabular,object) -> decodes\n",
+      "\n",
+      "---------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "for tmfs in birds.batch_tfms:\n",
+    "    print(tmfs)\n",
+    "    print(\"---------------\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "* IntToFloatTensor seems easy enough and we can probably use the torch version\n",
+    "* Flip and Brightness are RandTransforms and are not applied to validation, but as we are using the size parameter, a RandomResizeCropGPU is done (doing center croping on validation). **WHY?**\n",
+    "* Normalize seems easy enough to try replacing it with torch version"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dls = birds.dataloaders(path, bs=bs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "RandomResizedCrop -- {'size': (460, 460), 'min_scale': 0.75, 'ratio': (1.0, 1.0), 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'val_xtra': 0.14, 'max_scale': 1.0, 'p': 1.0}:\n",
+      "encodes: (Image,object) -> encodes\n",
+      "(TensorBBox,object) -> encodes\n",
+      "(TensorPoint,object) -> encodes\n",
+      "decodes: \n",
+      "----------------\n",
+      "ToTensor:\n",
+      "encodes: (PILMask,object) -> encodes\n",
+      "(PILBase,object) -> encodes\n",
+      "decodes: \n",
+      "----------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "for tmfs in dls.valid.after_item:\n",
+    "    print(tmfs)\n",
+    "    print(\"----------------\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "IntToFloatTensor -- {'div': 255.0, 'div_mask': 1}:\n",
+      "encodes: (TensorImage,object) -> encodes\n",
+      "(TensorMask,object) -> encodes\n",
+      "decodes: (TensorImage,object) -> decodes\n",
+      "\n",
+      "----------------\n",
+      "Flip -- {'size': 224, 'mode': 'bilinear', 'pad_mode': 'reflection', 'mode_mask': 'nearest', 'align_corners': True, 'p': 0.5}:\n",
+      "encodes: (TensorImage,object) -> encodes\n",
+      "(TensorMask,object) -> encodes\n",
+      "(TensorBBox,object) -> encodes\n",
+      "(TensorPoint,object) -> encodes\n",
+      "decodes: \n",
+      "----------------\n",
+      "Brightness -- {'max_lighting': 0.2, 'p': 1.0, 'draw': None, 'batch': False}:\n",
+      "encodes: (TensorImage,object) -> encodes\n",
+      "decodes: \n",
+      "----------------\n",
+      "Normalize -- {'mean': tensor([[[[0.4850]],\n",
+      "\n",
+      "         [[0.4560]],\n",
+      "\n",
+      "         [[0.4060]]]], device='cuda:0'), 'std': tensor([[[[0.2290]],\n",
+      "\n",
+      "         [[0.2240]],\n",
+      "\n",
+      "         [[0.2250]]]], device='cuda:0'), 'axes': (0, 2, 3)}:\n",
+      "encodes: (TensorImage,object) -> encodes\n",
+      "(Tabular,object) -> encodes\n",
+      "decodes: (TensorImage,object) -> decodes\n",
+      "(Tabular,object) -> decodes\n",
+      "\n",
+      "----------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "for tmfs in dls.valid.after_batch:\n",
+    "    print(tmfs)\n",
+    "    print(\"----------------\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Test to see that aug_transforms actually do something in validation although they are augmentations, due the size parameter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from fastcore.transform import Pipeline\n",
+    "from fastai.vision.data import TensorImage\n",
+    "from fastai.vision.augment import TensorImage, aug_transforms, Pipeline\n",
+    "\n",
+    "tfms = aug_transforms(size=224)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\"[Flip -- {'size': 224, 'mode': 'bilinear', 'pad_mode': 'reflection', 'mode_mask': 'nearest', 'align_corners': True, 'p': 0.5}:\\nencodes: (TensorImage,object) -> encodes\\n(TensorMask,object) -> encodes\\n(TensorBBox,object) -> encodes\\n(TensorPoint,object) -> encodes\\ndecodes: , Brightness -- {'max_lighting': 0.2, 'p': 1.0, 'draw': None, 'batch': False}:\\nencodes: (TensorImage,object) -> encodes\\ndecodes: ]\""
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tfms.__repr__()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t = TensorImage(torch.rand(3,448,448))\n",
+    "p = Pipeline(tfms)\n",
+    "tfmd_tensor = p(t.unsqueeze(0))\n",
+    "# If nothing happened, this would be false\n",
+    "assert tfmd_tensor.shape == torch.Size([1,3,224,224])"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The reason is that AffineCoordTfm transforms uses the affine matrices to perform a GPU crop in validation (https://github.com/fastai/fastai/blob/4d1834cb0b6ac20b068de55cf57f40a0c2296cd4/fastai/vision/augment.py#L491) by using a different matrix for validation. This is equivalent to call RandomResizedCropGPU transform on validation."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "fastai",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "dbeaabf96d056229716848a298cd9413f5c098c5e85ebec7037464305d96e83e"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}