jcarnero commited on
Commit
66083a7
·
1 Parent(s): be18511

transforms analysis

Browse files
training/notebooks/lab.ipynb CHANGED
@@ -251,15 +251,6 @@
251
  ")"
252
  ]
253
  },
254
- {
255
- "cell_type": "code",
256
- "execution_count": 17,
257
- "metadata": {},
258
- "outputs": [],
259
- "source": [
260
- "dls = birds.dataloaders(path, bs=bs)"
261
- ]
262
- },
263
  {
264
  "cell_type": "code",
265
  "execution_count": 18,
@@ -538,7 +529,7 @@
538
  }
539
  ],
540
  "source": [
541
- "interp.plot_confusion_matrix(figsize=(12,12), dpi=100) # dpi adjust the resolution"
542
  ]
543
  }
544
  ],
 
251
  ")"
252
  ]
253
  },
 
 
 
 
 
 
 
 
 
254
  {
255
  "cell_type": "code",
256
  "execution_count": 18,
 
529
  }
530
  ],
531
  "source": [
532
+ "# # interp.plot_confusion_matrix(figsize=(12,12), dpi=100) # dpi adjust the resolution"
533
  ]
534
  }
535
  ],
training/notebooks/transforms-lab.ipynb ADDED
@@ -0,0 +1,473 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "%%capture\n",
10
+ "%cd .."
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "import os\n",
20
+ "from pathlib import Path\n",
21
+ "from birds import config\n",
22
+ "from birds.utils.kaggle import download_dataset"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": 3,
28
+ "metadata": {},
29
+ "outputs": [],
30
+ "source": [
31
+ "if download_dataset(config.OWNER, config.DATASET, config.DATA_PATH):\n",
32
+ " import os\n",
33
+ " import tarfile\n",
34
+ "\n",
35
+ " with tarfile.open(Path(config.DATA_PATH) / \"CUB_200_2011.tgz\", \"r:gz\") as tar:\n",
36
+ " tar.extractall(path=config.DATA_PATH)\n",
37
+ "\n",
38
+ " os.remove(Path(config.DATA_PATH) / \"CUB_200_2011.tgz\")\n",
39
+ " os.remove(Path(config.DATA_PATH) / \"segmentations.tgz\")\n"
40
+ ]
41
+ },
42
+ {
43
+ "cell_type": "code",
44
+ "execution_count": 4,
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": [
48
+ "from fastai.vision.all import *"
49
+ ]
50
+ },
51
+ {
52
+ "cell_type": "code",
53
+ "execution_count": 5,
54
+ "metadata": {},
55
+ "outputs": [],
56
+ "source": [
57
+ "path = Path(config.DATA_PATH) / \"CUB_200_2011\""
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "code",
62
+ "execution_count": 6,
63
+ "metadata": {},
64
+ "outputs": [
65
+ {
66
+ "data": {
67
+ "text/plain": [
68
+ "(11788, '001.Black_footed_Albatross/Black_Footed_Albatross_0016_796067.jpg')"
69
+ ]
70
+ },
71
+ "execution_count": 6,
72
+ "metadata": {},
73
+ "output_type": "execute_result"
74
+ }
75
+ ],
76
+ "source": [
77
+ "with open(path / \"images.txt\", \"r\") as file:\n",
78
+ " lines = [line.strip().split()[1] for line in file.readlines()]\n",
79
+ "len(lines), lines[15]"
80
+ ]
81
+ },
82
+ {
83
+ "cell_type": "code",
84
+ "execution_count": 7,
85
+ "metadata": {},
86
+ "outputs": [],
87
+ "source": [
88
+ "def get_birds_images(path):\n",
89
+ " with open(path / \"images.txt\", \"r\") as file:\n",
90
+ " lines = [path.resolve() / \"images\" / line.strip().split()[1] for line in file.readlines()]\n",
91
+ " return lines"
92
+ ]
93
+ },
94
+ {
95
+ "cell_type": "code",
96
+ "execution_count": 8,
97
+ "metadata": {},
98
+ "outputs": [
99
+ {
100
+ "data": {
101
+ "text/plain": [
102
+ "(11788, '16 0')"
103
+ ]
104
+ },
105
+ "execution_count": 8,
106
+ "metadata": {},
107
+ "output_type": "execute_result"
108
+ }
109
+ ],
110
+ "source": [
111
+ "with open(path / \"train_test_split.txt\", \"r\") as file:\n",
112
+ " lines = [line.strip() for line in file.readlines()]\n",
113
+ "len(lines), lines[15]"
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "code",
118
+ "execution_count": 9,
119
+ "metadata": {},
120
+ "outputs": [],
121
+ "source": [
122
+ "def BirdsSplitter(path):\n",
123
+ " with open(path / \"train_test_split.txt\", \"r\") as file:\n",
124
+ " valid_idx = [int(line.strip().split()[0]) - 1 for line in file.readlines() if line.strip().split()[1] == '1']\n",
125
+ " return IndexSplitter(valid_idx)"
126
+ ]
127
+ },
128
+ {
129
+ "cell_type": "code",
130
+ "execution_count": 10,
131
+ "metadata": {},
132
+ "outputs": [],
133
+ "source": [
134
+ "item_tfms = RandomResizedCrop(460, min_scale=0.75, ratio=(1.0, 1.0))\n",
135
+ "batch_tfms = [\n",
136
+ " *aug_transforms(size=224, max_warp=0),\n",
137
+ " Normalize.from_stats(*imagenet_stats),\n",
138
+ "]\n",
139
+ "bs = 64"
140
+ ]
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": 11,
145
+ "metadata": {},
146
+ "outputs": [],
147
+ "source": [
148
+ "birds = DataBlock(\n",
149
+ " blocks=(ImageBlock, CategoryBlock),\n",
150
+ " get_items=get_birds_images,\n",
151
+ " splitter=BirdsSplitter(path),\n",
152
+ " get_y=RegexLabeller(pat=r\"/([^/]+)_\\d+_\\d+\\.jpg\"),\n",
153
+ " item_tfms=item_tfms,\n",
154
+ " batch_tfms=batch_tfms,\n",
155
+ ")"
156
+ ]
157
+ },
158
+ {
159
+ "attachments": {},
160
+ "cell_type": "markdown",
161
+ "metadata": {},
162
+ "source": [
163
+ "## Transformations on validation dataset"
164
+ ]
165
+ },
166
+ {
167
+ "attachments": {},
168
+ "cell_type": "markdown",
169
+ "metadata": {},
170
+ "source": [
171
+ "Notes:\n",
172
+ "* split_idx is set in a transform to indicate that only is executed on that split. None value makes transform to execute always, doing different things if split_idx==0 (train) or split_idx==1 (valid).\n",
173
+ "* RandTransform is only applied to the training set by default (split_idx = 0), unless split_idx is set to None in some child transform."
174
+ ]
175
+ },
176
+ {
177
+ "cell_type": "code",
178
+ "execution_count": 12,
179
+ "metadata": {},
180
+ "outputs": [
181
+ {
182
+ "data": {
183
+ "text/plain": [
184
+ "<bound method DataBlock.datasets of <fastai.data.block.DataBlock object at 0x7ff82dd75690>>"
185
+ ]
186
+ },
187
+ "execution_count": 12,
188
+ "metadata": {},
189
+ "output_type": "execute_result"
190
+ }
191
+ ],
192
+ "source": [
193
+ "birds.datasets"
194
+ ]
195
+ },
196
+ {
197
+ "cell_type": "code",
198
+ "execution_count": 13,
199
+ "metadata": {},
200
+ "outputs": [
201
+ {
202
+ "name": "stdout",
203
+ "output_type": "stream",
204
+ "text": [
205
+ "ToTensor:\n",
206
+ "encodes: (PILMask,object) -> encodes\n",
207
+ "(PILBase,object) -> encodes\n",
208
+ "decodes: \n",
209
+ "---------------\n",
210
+ "RandomResizedCrop -- {'size': (460, 460), 'min_scale': 0.75, 'ratio': (1.0, 1.0), 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'val_xtra': 0.14, 'max_scale': 1.0, 'p': 1.0}:\n",
211
+ "encodes: (Image,object) -> encodes\n",
212
+ "(TensorBBox,object) -> encodes\n",
213
+ "(TensorPoint,object) -> encodes\n",
214
+ "decodes: \n",
215
+ "---------------\n"
216
+ ]
217
+ }
218
+ ],
219
+ "source": [
220
+ "for tmfs in birds.item_tfms:\n",
221
+ " print(tmfs)\n",
222
+ " print(\"---------------\")"
223
+ ]
224
+ },
225
+ {
226
+ "attachments": {},
227
+ "cell_type": "markdown",
228
+ "metadata": {},
229
+ "source": [
230
+ "* ToTensor seems to be simple and we can use torch transform\n",
231
+ "* RandomResizedCrop is a RandTransform, but according to its documentation, on the validation set, we center crop the image if it’s ratio isn’t in the range (to the minmum or maximum value) then resize."
232
+ ]
233
+ },
234
+ {
235
+ "cell_type": "code",
236
+ "execution_count": 14,
237
+ "metadata": {},
238
+ "outputs": [
239
+ {
240
+ "name": "stdout",
241
+ "output_type": "stream",
242
+ "text": [
243
+ "IntToFloatTensor -- {'div': 255.0, 'div_mask': 1}:\n",
244
+ "encodes: (TensorImage,object) -> encodes\n",
245
+ "(TensorMask,object) -> encodes\n",
246
+ "decodes: (TensorImage,object) -> decodes\n",
247
+ "\n",
248
+ "---------------\n",
249
+ "Flip -- {'size': 224, 'mode': 'bilinear', 'pad_mode': 'reflection', 'mode_mask': 'nearest', 'align_corners': True, 'p': 0.5}:\n",
250
+ "encodes: (TensorImage,object) -> encodes\n",
251
+ "(TensorMask,object) -> encodes\n",
252
+ "(TensorBBox,object) -> encodes\n",
253
+ "(TensorPoint,object) -> encodes\n",
254
+ "decodes: \n",
255
+ "---------------\n",
256
+ "Brightness -- {'max_lighting': 0.2, 'p': 1.0, 'draw': None, 'batch': False}:\n",
257
+ "encodes: (TensorImage,object) -> encodes\n",
258
+ "decodes: \n",
259
+ "---------------\n",
260
+ "Normalize -- {'mean': tensor([[[[0.4850]],\n",
261
+ "\n",
262
+ " [[0.4560]],\n",
263
+ "\n",
264
+ " [[0.4060]]]], device='cuda:0'), 'std': tensor([[[[0.2290]],\n",
265
+ "\n",
266
+ " [[0.2240]],\n",
267
+ "\n",
268
+ " [[0.2250]]]], device='cuda:0'), 'axes': (0, 2, 3)}:\n",
269
+ "encodes: (TensorImage,object) -> encodes\n",
270
+ "(Tabular,object) -> encodes\n",
271
+ "decodes: (TensorImage,object) -> decodes\n",
272
+ "(Tabular,object) -> decodes\n",
273
+ "\n",
274
+ "---------------\n"
275
+ ]
276
+ }
277
+ ],
278
+ "source": [
279
+ "for tmfs in birds.batch_tfms:\n",
280
+ " print(tmfs)\n",
281
+ " print(\"---------------\")"
282
+ ]
283
+ },
284
+ {
285
+ "attachments": {},
286
+ "cell_type": "markdown",
287
+ "metadata": {},
288
+ "source": [
289
+ "* IntToFloatTensor seems easy enough and we can probably use the torch version\n",
290
+ "* Flip and Brightness are RandTransforms and are not applied to validation, but as we are using the size parameter, a RandomResizeCropGPU is done (doing center croping on validation). **WHY?**\n",
291
+ "* Normalize seems easy enough to try replacing it with torch version"
292
+ ]
293
+ },
294
+ {
295
+ "cell_type": "code",
296
+ "execution_count": 15,
297
+ "metadata": {},
298
+ "outputs": [],
299
+ "source": [
300
+ "dls = birds.dataloaders(path, bs=bs)"
301
+ ]
302
+ },
303
+ {
304
+ "cell_type": "code",
305
+ "execution_count": 16,
306
+ "metadata": {},
307
+ "outputs": [
308
+ {
309
+ "name": "stdout",
310
+ "output_type": "stream",
311
+ "text": [
312
+ "RandomResizedCrop -- {'size': (460, 460), 'min_scale': 0.75, 'ratio': (1.0, 1.0), 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'val_xtra': 0.14, 'max_scale': 1.0, 'p': 1.0}:\n",
313
+ "encodes: (Image,object) -> encodes\n",
314
+ "(TensorBBox,object) -> encodes\n",
315
+ "(TensorPoint,object) -> encodes\n",
316
+ "decodes: \n",
317
+ "----------------\n",
318
+ "ToTensor:\n",
319
+ "encodes: (PILMask,object) -> encodes\n",
320
+ "(PILBase,object) -> encodes\n",
321
+ "decodes: \n",
322
+ "----------------\n"
323
+ ]
324
+ }
325
+ ],
326
+ "source": [
327
+ "for tmfs in dls.valid.after_item:\n",
328
+ " print(tmfs)\n",
329
+ " print(\"----------------\")"
330
+ ]
331
+ },
332
+ {
333
+ "cell_type": "code",
334
+ "execution_count": 17,
335
+ "metadata": {},
336
+ "outputs": [
337
+ {
338
+ "name": "stdout",
339
+ "output_type": "stream",
340
+ "text": [
341
+ "IntToFloatTensor -- {'div': 255.0, 'div_mask': 1}:\n",
342
+ "encodes: (TensorImage,object) -> encodes\n",
343
+ "(TensorMask,object) -> encodes\n",
344
+ "decodes: (TensorImage,object) -> decodes\n",
345
+ "\n",
346
+ "----------------\n",
347
+ "Flip -- {'size': 224, 'mode': 'bilinear', 'pad_mode': 'reflection', 'mode_mask': 'nearest', 'align_corners': True, 'p': 0.5}:\n",
348
+ "encodes: (TensorImage,object) -> encodes\n",
349
+ "(TensorMask,object) -> encodes\n",
350
+ "(TensorBBox,object) -> encodes\n",
351
+ "(TensorPoint,object) -> encodes\n",
352
+ "decodes: \n",
353
+ "----------------\n",
354
+ "Brightness -- {'max_lighting': 0.2, 'p': 1.0, 'draw': None, 'batch': False}:\n",
355
+ "encodes: (TensorImage,object) -> encodes\n",
356
+ "decodes: \n",
357
+ "----------------\n",
358
+ "Normalize -- {'mean': tensor([[[[0.4850]],\n",
359
+ "\n",
360
+ " [[0.4560]],\n",
361
+ "\n",
362
+ " [[0.4060]]]], device='cuda:0'), 'std': tensor([[[[0.2290]],\n",
363
+ "\n",
364
+ " [[0.2240]],\n",
365
+ "\n",
366
+ " [[0.2250]]]], device='cuda:0'), 'axes': (0, 2, 3)}:\n",
367
+ "encodes: (TensorImage,object) -> encodes\n",
368
+ "(Tabular,object) -> encodes\n",
369
+ "decodes: (TensorImage,object) -> decodes\n",
370
+ "(Tabular,object) -> decodes\n",
371
+ "\n",
372
+ "----------------\n"
373
+ ]
374
+ }
375
+ ],
376
+ "source": [
377
+ "for tmfs in dls.valid.after_batch:\n",
378
+ " print(tmfs)\n",
379
+ " print(\"----------------\")"
380
+ ]
381
+ },
382
+ {
383
+ "attachments": {},
384
+ "cell_type": "markdown",
385
+ "metadata": {},
386
+ "source": [
387
+ "Test to see that aug_transforms actually do something in validation although they are augmentations, due the size parameter"
388
+ ]
389
+ },
390
+ {
391
+ "cell_type": "code",
392
+ "execution_count": 18,
393
+ "metadata": {},
394
+ "outputs": [],
395
+ "source": [
396
+ "import torch\n",
397
+ "from fastcore.transform import Pipeline\n",
398
+ "from fastai.vision.data import TensorImage\n",
399
+ "from fastai.vision.augment import TensorImage, aug_transforms, Pipeline\n",
400
+ "\n",
401
+ "tfms = aug_transforms(size=224)"
402
+ ]
403
+ },
404
+ {
405
+ "cell_type": "code",
406
+ "execution_count": 21,
407
+ "metadata": {},
408
+ "outputs": [
409
+ {
410
+ "data": {
411
+ "text/plain": [
412
+ "\"[Flip -- {'size': 224, 'mode': 'bilinear', 'pad_mode': 'reflection', 'mode_mask': 'nearest', 'align_corners': True, 'p': 0.5}:\\nencodes: (TensorImage,object) -> encodes\\n(TensorMask,object) -> encodes\\n(TensorBBox,object) -> encodes\\n(TensorPoint,object) -> encodes\\ndecodes: , Brightness -- {'max_lighting': 0.2, 'p': 1.0, 'draw': None, 'batch': False}:\\nencodes: (TensorImage,object) -> encodes\\ndecodes: ]\""
413
+ ]
414
+ },
415
+ "execution_count": 21,
416
+ "metadata": {},
417
+ "output_type": "execute_result"
418
+ }
419
+ ],
420
+ "source": [
421
+ "tfms.__repr__()"
422
+ ]
423
+ },
424
+ {
425
+ "cell_type": "code",
426
+ "execution_count": 22,
427
+ "metadata": {},
428
+ "outputs": [],
429
+ "source": [
430
+ "t = TensorImage(torch.rand(3,448,448))\n",
431
+ "p = Pipeline(tfms)\n",
432
+ "tfmd_tensor = p(t.unsqueeze(0))\n",
433
+ "# If nothing happened, this would be false\n",
434
+ "assert tfmd_tensor.shape == torch.Size([1,3,224,224])"
435
+ ]
436
+ },
437
+ {
438
+ "attachments": {},
439
+ "cell_type": "markdown",
440
+ "metadata": {},
441
+ "source": [
442
+ "The reason is that AffineCoordTfm transforms uses the affine matrices to perform a GPU crop in validation (https://github.com/fastai/fastai/blob/4d1834cb0b6ac20b068de55cf57f40a0c2296cd4/fastai/vision/augment.py#L491) by using a different matrix for validation. This is equivalent to call RandomResizedCropGPU transform on validation."
443
+ ]
444
+ }
445
+ ],
446
+ "metadata": {
447
+ "kernelspec": {
448
+ "display_name": "fastai",
449
+ "language": "python",
450
+ "name": "python3"
451
+ },
452
+ "language_info": {
453
+ "codemirror_mode": {
454
+ "name": "ipython",
455
+ "version": 3
456
+ },
457
+ "file_extension": ".py",
458
+ "mimetype": "text/x-python",
459
+ "name": "python",
460
+ "nbconvert_exporter": "python",
461
+ "pygments_lexer": "ipython3",
462
+ "version": "3.10.9"
463
+ },
464
+ "orig_nbformat": 4,
465
+ "vscode": {
466
+ "interpreter": {
467
+ "hash": "dbeaabf96d056229716848a298cd9413f5c098c5e85ebec7037464305d96e83e"
468
+ }
469
+ }
470
+ },
471
+ "nbformat": 4,
472
+ "nbformat_minor": 2
473
+ }