Spaces:
Sleeping
Sleeping
transforms analysis
Browse files
training/notebooks/lab.ipynb
CHANGED
@@ -251,15 +251,6 @@
|
|
251 |
")"
|
252 |
]
|
253 |
},
|
254 |
-
{
|
255 |
-
"cell_type": "code",
|
256 |
-
"execution_count": 17,
|
257 |
-
"metadata": {},
|
258 |
-
"outputs": [],
|
259 |
-
"source": [
|
260 |
-
"dls = birds.dataloaders(path, bs=bs)"
|
261 |
-
]
|
262 |
-
},
|
263 |
{
|
264 |
"cell_type": "code",
|
265 |
"execution_count": 18,
|
@@ -538,7 +529,7 @@
|
|
538 |
}
|
539 |
],
|
540 |
"source": [
|
541 |
-
"interp.plot_confusion_matrix(figsize=(12,12), dpi=100) # dpi adjust the resolution"
|
542 |
]
|
543 |
}
|
544 |
],
|
|
|
251 |
")"
|
252 |
]
|
253 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
{
|
255 |
"cell_type": "code",
|
256 |
"execution_count": 18,
|
|
|
529 |
}
|
530 |
],
|
531 |
"source": [
|
532 |
+
"# # interp.plot_confusion_matrix(figsize=(12,12), dpi=100) # dpi adjust the resolution"
|
533 |
]
|
534 |
}
|
535 |
],
|
training/notebooks/transforms-lab.ipynb
ADDED
@@ -0,0 +1,473 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"%%capture\n",
|
10 |
+
"%cd .."
|
11 |
+
]
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"cell_type": "code",
|
15 |
+
"execution_count": 2,
|
16 |
+
"metadata": {},
|
17 |
+
"outputs": [],
|
18 |
+
"source": [
|
19 |
+
"import os\n",
|
20 |
+
"from pathlib import Path\n",
|
21 |
+
"from birds import config\n",
|
22 |
+
"from birds.utils.kaggle import download_dataset"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"cell_type": "code",
|
27 |
+
"execution_count": 3,
|
28 |
+
"metadata": {},
|
29 |
+
"outputs": [],
|
30 |
+
"source": [
|
31 |
+
"if download_dataset(config.OWNER, config.DATASET, config.DATA_PATH):\n",
|
32 |
+
" import os\n",
|
33 |
+
" import tarfile\n",
|
34 |
+
"\n",
|
35 |
+
" with tarfile.open(Path(config.DATA_PATH) / \"CUB_200_2011.tgz\", \"r:gz\") as tar:\n",
|
36 |
+
" tar.extractall(path=config.DATA_PATH)\n",
|
37 |
+
"\n",
|
38 |
+
" os.remove(Path(config.DATA_PATH) / \"CUB_200_2011.tgz\")\n",
|
39 |
+
" os.remove(Path(config.DATA_PATH) / \"segmentations.tgz\")\n"
|
40 |
+
]
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"cell_type": "code",
|
44 |
+
"execution_count": 4,
|
45 |
+
"metadata": {},
|
46 |
+
"outputs": [],
|
47 |
+
"source": [
|
48 |
+
"from fastai.vision.all import *"
|
49 |
+
]
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"cell_type": "code",
|
53 |
+
"execution_count": 5,
|
54 |
+
"metadata": {},
|
55 |
+
"outputs": [],
|
56 |
+
"source": [
|
57 |
+
"path = Path(config.DATA_PATH) / \"CUB_200_2011\""
|
58 |
+
]
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"cell_type": "code",
|
62 |
+
"execution_count": 6,
|
63 |
+
"metadata": {},
|
64 |
+
"outputs": [
|
65 |
+
{
|
66 |
+
"data": {
|
67 |
+
"text/plain": [
|
68 |
+
"(11788, '001.Black_footed_Albatross/Black_Footed_Albatross_0016_796067.jpg')"
|
69 |
+
]
|
70 |
+
},
|
71 |
+
"execution_count": 6,
|
72 |
+
"metadata": {},
|
73 |
+
"output_type": "execute_result"
|
74 |
+
}
|
75 |
+
],
|
76 |
+
"source": [
|
77 |
+
"with open(path / \"images.txt\", \"r\") as file:\n",
|
78 |
+
" lines = [line.strip().split()[1] for line in file.readlines()]\n",
|
79 |
+
"len(lines), lines[15]"
|
80 |
+
]
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"cell_type": "code",
|
84 |
+
"execution_count": 7,
|
85 |
+
"metadata": {},
|
86 |
+
"outputs": [],
|
87 |
+
"source": [
|
88 |
+
"def get_birds_images(path):\n",
|
89 |
+
" with open(path / \"images.txt\", \"r\") as file:\n",
|
90 |
+
" lines = [path.resolve() / \"images\" / line.strip().split()[1] for line in file.readlines()]\n",
|
91 |
+
" return lines"
|
92 |
+
]
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"cell_type": "code",
|
96 |
+
"execution_count": 8,
|
97 |
+
"metadata": {},
|
98 |
+
"outputs": [
|
99 |
+
{
|
100 |
+
"data": {
|
101 |
+
"text/plain": [
|
102 |
+
"(11788, '16 0')"
|
103 |
+
]
|
104 |
+
},
|
105 |
+
"execution_count": 8,
|
106 |
+
"metadata": {},
|
107 |
+
"output_type": "execute_result"
|
108 |
+
}
|
109 |
+
],
|
110 |
+
"source": [
|
111 |
+
"with open(path / \"train_test_split.txt\", \"r\") as file:\n",
|
112 |
+
" lines = [line.strip() for line in file.readlines()]\n",
|
113 |
+
"len(lines), lines[15]"
|
114 |
+
]
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"cell_type": "code",
|
118 |
+
"execution_count": 9,
|
119 |
+
"metadata": {},
|
120 |
+
"outputs": [],
|
121 |
+
"source": [
|
122 |
+
"def BirdsSplitter(path):\n",
|
123 |
+
" with open(path / \"train_test_split.txt\", \"r\") as file:\n",
|
124 |
+
" valid_idx = [int(line.strip().split()[0]) - 1 for line in file.readlines() if line.strip().split()[1] == '1']\n",
|
125 |
+
" return IndexSplitter(valid_idx)"
|
126 |
+
]
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"cell_type": "code",
|
130 |
+
"execution_count": 10,
|
131 |
+
"metadata": {},
|
132 |
+
"outputs": [],
|
133 |
+
"source": [
|
134 |
+
"item_tfms = RandomResizedCrop(460, min_scale=0.75, ratio=(1.0, 1.0))\n",
|
135 |
+
"batch_tfms = [\n",
|
136 |
+
" *aug_transforms(size=224, max_warp=0),\n",
|
137 |
+
" Normalize.from_stats(*imagenet_stats),\n",
|
138 |
+
"]\n",
|
139 |
+
"bs = 64"
|
140 |
+
]
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"cell_type": "code",
|
144 |
+
"execution_count": 11,
|
145 |
+
"metadata": {},
|
146 |
+
"outputs": [],
|
147 |
+
"source": [
|
148 |
+
"birds = DataBlock(\n",
|
149 |
+
" blocks=(ImageBlock, CategoryBlock),\n",
|
150 |
+
" get_items=get_birds_images,\n",
|
151 |
+
" splitter=BirdsSplitter(path),\n",
|
152 |
+
" get_y=RegexLabeller(pat=r\"/([^/]+)_\\d+_\\d+\\.jpg\"),\n",
|
153 |
+
" item_tfms=item_tfms,\n",
|
154 |
+
" batch_tfms=batch_tfms,\n",
|
155 |
+
")"
|
156 |
+
]
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"attachments": {},
|
160 |
+
"cell_type": "markdown",
|
161 |
+
"metadata": {},
|
162 |
+
"source": [
|
163 |
+
"## Transformations on validation dataset"
|
164 |
+
]
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"attachments": {},
|
168 |
+
"cell_type": "markdown",
|
169 |
+
"metadata": {},
|
170 |
+
"source": [
|
171 |
+
"Notes:\n",
|
172 |
+
"* split_idx is set in a transform to indicate that only is executed on that split. None value makes transform to execute always, doing different things if split_idx==0 (train) or split_idx==1 (valid).\n",
|
173 |
+
"* RandTransform is only applied to the training set by default (split_idx = 0), unless split_idx is set to None in some child transform."
|
174 |
+
]
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"cell_type": "code",
|
178 |
+
"execution_count": 12,
|
179 |
+
"metadata": {},
|
180 |
+
"outputs": [
|
181 |
+
{
|
182 |
+
"data": {
|
183 |
+
"text/plain": [
|
184 |
+
"<bound method DataBlock.datasets of <fastai.data.block.DataBlock object at 0x7ff82dd75690>>"
|
185 |
+
]
|
186 |
+
},
|
187 |
+
"execution_count": 12,
|
188 |
+
"metadata": {},
|
189 |
+
"output_type": "execute_result"
|
190 |
+
}
|
191 |
+
],
|
192 |
+
"source": [
|
193 |
+
"birds.datasets"
|
194 |
+
]
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"cell_type": "code",
|
198 |
+
"execution_count": 13,
|
199 |
+
"metadata": {},
|
200 |
+
"outputs": [
|
201 |
+
{
|
202 |
+
"name": "stdout",
|
203 |
+
"output_type": "stream",
|
204 |
+
"text": [
|
205 |
+
"ToTensor:\n",
|
206 |
+
"encodes: (PILMask,object) -> encodes\n",
|
207 |
+
"(PILBase,object) -> encodes\n",
|
208 |
+
"decodes: \n",
|
209 |
+
"---------------\n",
|
210 |
+
"RandomResizedCrop -- {'size': (460, 460), 'min_scale': 0.75, 'ratio': (1.0, 1.0), 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'val_xtra': 0.14, 'max_scale': 1.0, 'p': 1.0}:\n",
|
211 |
+
"encodes: (Image,object) -> encodes\n",
|
212 |
+
"(TensorBBox,object) -> encodes\n",
|
213 |
+
"(TensorPoint,object) -> encodes\n",
|
214 |
+
"decodes: \n",
|
215 |
+
"---------------\n"
|
216 |
+
]
|
217 |
+
}
|
218 |
+
],
|
219 |
+
"source": [
|
220 |
+
"for tmfs in birds.item_tfms:\n",
|
221 |
+
" print(tmfs)\n",
|
222 |
+
" print(\"---------------\")"
|
223 |
+
]
|
224 |
+
},
|
225 |
+
{
|
226 |
+
"attachments": {},
|
227 |
+
"cell_type": "markdown",
|
228 |
+
"metadata": {},
|
229 |
+
"source": [
|
230 |
+
"* ToTensor seems to be simple and we can use torch transform\n",
|
231 |
+
"* RandomResizedCrop is a RandTransform, but according to its documentation, on the validation set, we center crop the image if it’s ratio isn’t in the range (to the minmum or maximum value) then resize."
|
232 |
+
]
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"cell_type": "code",
|
236 |
+
"execution_count": 14,
|
237 |
+
"metadata": {},
|
238 |
+
"outputs": [
|
239 |
+
{
|
240 |
+
"name": "stdout",
|
241 |
+
"output_type": "stream",
|
242 |
+
"text": [
|
243 |
+
"IntToFloatTensor -- {'div': 255.0, 'div_mask': 1}:\n",
|
244 |
+
"encodes: (TensorImage,object) -> encodes\n",
|
245 |
+
"(TensorMask,object) -> encodes\n",
|
246 |
+
"decodes: (TensorImage,object) -> decodes\n",
|
247 |
+
"\n",
|
248 |
+
"---------------\n",
|
249 |
+
"Flip -- {'size': 224, 'mode': 'bilinear', 'pad_mode': 'reflection', 'mode_mask': 'nearest', 'align_corners': True, 'p': 0.5}:\n",
|
250 |
+
"encodes: (TensorImage,object) -> encodes\n",
|
251 |
+
"(TensorMask,object) -> encodes\n",
|
252 |
+
"(TensorBBox,object) -> encodes\n",
|
253 |
+
"(TensorPoint,object) -> encodes\n",
|
254 |
+
"decodes: \n",
|
255 |
+
"---------------\n",
|
256 |
+
"Brightness -- {'max_lighting': 0.2, 'p': 1.0, 'draw': None, 'batch': False}:\n",
|
257 |
+
"encodes: (TensorImage,object) -> encodes\n",
|
258 |
+
"decodes: \n",
|
259 |
+
"---------------\n",
|
260 |
+
"Normalize -- {'mean': tensor([[[[0.4850]],\n",
|
261 |
+
"\n",
|
262 |
+
" [[0.4560]],\n",
|
263 |
+
"\n",
|
264 |
+
" [[0.4060]]]], device='cuda:0'), 'std': tensor([[[[0.2290]],\n",
|
265 |
+
"\n",
|
266 |
+
" [[0.2240]],\n",
|
267 |
+
"\n",
|
268 |
+
" [[0.2250]]]], device='cuda:0'), 'axes': (0, 2, 3)}:\n",
|
269 |
+
"encodes: (TensorImage,object) -> encodes\n",
|
270 |
+
"(Tabular,object) -> encodes\n",
|
271 |
+
"decodes: (TensorImage,object) -> decodes\n",
|
272 |
+
"(Tabular,object) -> decodes\n",
|
273 |
+
"\n",
|
274 |
+
"---------------\n"
|
275 |
+
]
|
276 |
+
}
|
277 |
+
],
|
278 |
+
"source": [
|
279 |
+
"for tmfs in birds.batch_tfms:\n",
|
280 |
+
" print(tmfs)\n",
|
281 |
+
" print(\"---------------\")"
|
282 |
+
]
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"attachments": {},
|
286 |
+
"cell_type": "markdown",
|
287 |
+
"metadata": {},
|
288 |
+
"source": [
|
289 |
+
"* IntToFloatTensor seems easy enough and we can probably use the torch version\n",
|
290 |
+
"* Flip and Brightness are RandTransforms and are not applied to validation, but as we are using the size parameter, a RandomResizeCropGPU is done (doing center croping on validation). **WHY?**\n",
|
291 |
+
"* Normalize seems easy enough to try replacing it with torch version"
|
292 |
+
]
|
293 |
+
},
|
294 |
+
{
|
295 |
+
"cell_type": "code",
|
296 |
+
"execution_count": 15,
|
297 |
+
"metadata": {},
|
298 |
+
"outputs": [],
|
299 |
+
"source": [
|
300 |
+
"dls = birds.dataloaders(path, bs=bs)"
|
301 |
+
]
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"cell_type": "code",
|
305 |
+
"execution_count": 16,
|
306 |
+
"metadata": {},
|
307 |
+
"outputs": [
|
308 |
+
{
|
309 |
+
"name": "stdout",
|
310 |
+
"output_type": "stream",
|
311 |
+
"text": [
|
312 |
+
"RandomResizedCrop -- {'size': (460, 460), 'min_scale': 0.75, 'ratio': (1.0, 1.0), 'resamples': (<Resampling.BILINEAR: 2>, <Resampling.NEAREST: 0>), 'val_xtra': 0.14, 'max_scale': 1.0, 'p': 1.0}:\n",
|
313 |
+
"encodes: (Image,object) -> encodes\n",
|
314 |
+
"(TensorBBox,object) -> encodes\n",
|
315 |
+
"(TensorPoint,object) -> encodes\n",
|
316 |
+
"decodes: \n",
|
317 |
+
"----------------\n",
|
318 |
+
"ToTensor:\n",
|
319 |
+
"encodes: (PILMask,object) -> encodes\n",
|
320 |
+
"(PILBase,object) -> encodes\n",
|
321 |
+
"decodes: \n",
|
322 |
+
"----------------\n"
|
323 |
+
]
|
324 |
+
}
|
325 |
+
],
|
326 |
+
"source": [
|
327 |
+
"for tmfs in dls.valid.after_item:\n",
|
328 |
+
" print(tmfs)\n",
|
329 |
+
" print(\"----------------\")"
|
330 |
+
]
|
331 |
+
},
|
332 |
+
{
|
333 |
+
"cell_type": "code",
|
334 |
+
"execution_count": 17,
|
335 |
+
"metadata": {},
|
336 |
+
"outputs": [
|
337 |
+
{
|
338 |
+
"name": "stdout",
|
339 |
+
"output_type": "stream",
|
340 |
+
"text": [
|
341 |
+
"IntToFloatTensor -- {'div': 255.0, 'div_mask': 1}:\n",
|
342 |
+
"encodes: (TensorImage,object) -> encodes\n",
|
343 |
+
"(TensorMask,object) -> encodes\n",
|
344 |
+
"decodes: (TensorImage,object) -> decodes\n",
|
345 |
+
"\n",
|
346 |
+
"----------------\n",
|
347 |
+
"Flip -- {'size': 224, 'mode': 'bilinear', 'pad_mode': 'reflection', 'mode_mask': 'nearest', 'align_corners': True, 'p': 0.5}:\n",
|
348 |
+
"encodes: (TensorImage,object) -> encodes\n",
|
349 |
+
"(TensorMask,object) -> encodes\n",
|
350 |
+
"(TensorBBox,object) -> encodes\n",
|
351 |
+
"(TensorPoint,object) -> encodes\n",
|
352 |
+
"decodes: \n",
|
353 |
+
"----------------\n",
|
354 |
+
"Brightness -- {'max_lighting': 0.2, 'p': 1.0, 'draw': None, 'batch': False}:\n",
|
355 |
+
"encodes: (TensorImage,object) -> encodes\n",
|
356 |
+
"decodes: \n",
|
357 |
+
"----------------\n",
|
358 |
+
"Normalize -- {'mean': tensor([[[[0.4850]],\n",
|
359 |
+
"\n",
|
360 |
+
" [[0.4560]],\n",
|
361 |
+
"\n",
|
362 |
+
" [[0.4060]]]], device='cuda:0'), 'std': tensor([[[[0.2290]],\n",
|
363 |
+
"\n",
|
364 |
+
" [[0.2240]],\n",
|
365 |
+
"\n",
|
366 |
+
" [[0.2250]]]], device='cuda:0'), 'axes': (0, 2, 3)}:\n",
|
367 |
+
"encodes: (TensorImage,object) -> encodes\n",
|
368 |
+
"(Tabular,object) -> encodes\n",
|
369 |
+
"decodes: (TensorImage,object) -> decodes\n",
|
370 |
+
"(Tabular,object) -> decodes\n",
|
371 |
+
"\n",
|
372 |
+
"----------------\n"
|
373 |
+
]
|
374 |
+
}
|
375 |
+
],
|
376 |
+
"source": [
|
377 |
+
"for tmfs in dls.valid.after_batch:\n",
|
378 |
+
" print(tmfs)\n",
|
379 |
+
" print(\"----------------\")"
|
380 |
+
]
|
381 |
+
},
|
382 |
+
{
|
383 |
+
"attachments": {},
|
384 |
+
"cell_type": "markdown",
|
385 |
+
"metadata": {},
|
386 |
+
"source": [
|
387 |
+
"Test to see that aug_transforms actually do something in validation although they are augmentations, due the size parameter"
|
388 |
+
]
|
389 |
+
},
|
390 |
+
{
|
391 |
+
"cell_type": "code",
|
392 |
+
"execution_count": 18,
|
393 |
+
"metadata": {},
|
394 |
+
"outputs": [],
|
395 |
+
"source": [
|
396 |
+
"import torch\n",
|
397 |
+
"from fastcore.transform import Pipeline\n",
|
398 |
+
"from fastai.vision.data import TensorImage\n",
|
399 |
+
"from fastai.vision.augment import TensorImage, aug_transforms, Pipeline\n",
|
400 |
+
"\n",
|
401 |
+
"tfms = aug_transforms(size=224)"
|
402 |
+
]
|
403 |
+
},
|
404 |
+
{
|
405 |
+
"cell_type": "code",
|
406 |
+
"execution_count": 21,
|
407 |
+
"metadata": {},
|
408 |
+
"outputs": [
|
409 |
+
{
|
410 |
+
"data": {
|
411 |
+
"text/plain": [
|
412 |
+
"\"[Flip -- {'size': 224, 'mode': 'bilinear', 'pad_mode': 'reflection', 'mode_mask': 'nearest', 'align_corners': True, 'p': 0.5}:\\nencodes: (TensorImage,object) -> encodes\\n(TensorMask,object) -> encodes\\n(TensorBBox,object) -> encodes\\n(TensorPoint,object) -> encodes\\ndecodes: , Brightness -- {'max_lighting': 0.2, 'p': 1.0, 'draw': None, 'batch': False}:\\nencodes: (TensorImage,object) -> encodes\\ndecodes: ]\""
|
413 |
+
]
|
414 |
+
},
|
415 |
+
"execution_count": 21,
|
416 |
+
"metadata": {},
|
417 |
+
"output_type": "execute_result"
|
418 |
+
}
|
419 |
+
],
|
420 |
+
"source": [
|
421 |
+
"tfms.__repr__()"
|
422 |
+
]
|
423 |
+
},
|
424 |
+
{
|
425 |
+
"cell_type": "code",
|
426 |
+
"execution_count": 22,
|
427 |
+
"metadata": {},
|
428 |
+
"outputs": [],
|
429 |
+
"source": [
|
430 |
+
"t = TensorImage(torch.rand(3,448,448))\n",
|
431 |
+
"p = Pipeline(tfms)\n",
|
432 |
+
"tfmd_tensor = p(t.unsqueeze(0))\n",
|
433 |
+
"# If nothing happened, this would be false\n",
|
434 |
+
"assert tfmd_tensor.shape == torch.Size([1,3,224,224])"
|
435 |
+
]
|
436 |
+
},
|
437 |
+
{
|
438 |
+
"attachments": {},
|
439 |
+
"cell_type": "markdown",
|
440 |
+
"metadata": {},
|
441 |
+
"source": [
|
442 |
+
"The reason is that AffineCoordTfm transforms uses the affine matrices to perform a GPU crop in validation (https://github.com/fastai/fastai/blob/4d1834cb0b6ac20b068de55cf57f40a0c2296cd4/fastai/vision/augment.py#L491) by using a different matrix for validation. This is equivalent to call RandomResizedCropGPU transform on validation."
|
443 |
+
]
|
444 |
+
}
|
445 |
+
],
|
446 |
+
"metadata": {
|
447 |
+
"kernelspec": {
|
448 |
+
"display_name": "fastai",
|
449 |
+
"language": "python",
|
450 |
+
"name": "python3"
|
451 |
+
},
|
452 |
+
"language_info": {
|
453 |
+
"codemirror_mode": {
|
454 |
+
"name": "ipython",
|
455 |
+
"version": 3
|
456 |
+
},
|
457 |
+
"file_extension": ".py",
|
458 |
+
"mimetype": "text/x-python",
|
459 |
+
"name": "python",
|
460 |
+
"nbconvert_exporter": "python",
|
461 |
+
"pygments_lexer": "ipython3",
|
462 |
+
"version": "3.10.9"
|
463 |
+
},
|
464 |
+
"orig_nbformat": 4,
|
465 |
+
"vscode": {
|
466 |
+
"interpreter": {
|
467 |
+
"hash": "dbeaabf96d056229716848a298cd9413f5c098c5e85ebec7037464305d96e83e"
|
468 |
+
}
|
469 |
+
}
|
470 |
+
},
|
471 |
+
"nbformat": 4,
|
472 |
+
"nbformat_minor": 2
|
473 |
+
}
|