sunwaee commited on
Commit
e6ecdf3
·
1 Parent(s): ec6e014

added retinanet repo

Browse files
retinanet/anchors.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+
5
+
6
+ class Anchors(nn.Module):
7
+ def __init__(self, pyramid_levels=None, strides=None, sizes=None, ratios=None, scales=None):
8
+ super(Anchors, self).__init__()
9
+
10
+ if pyramid_levels is None:
11
+ self.pyramid_levels = [3, 4, 5, 6, 7]
12
+ if strides is None:
13
+ self.strides = [2 ** x for x in self.pyramid_levels]
14
+ if sizes is None:
15
+ self.sizes = [2 ** (x + 2) for x in self.pyramid_levels]
16
+ if ratios is None:
17
+ self.ratios = np.array([0.5, 1, 2])
18
+ if scales is None:
19
+ self.scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
20
+
21
+ def forward(self, image):
22
+
23
+ image_shape = image.shape[2:]
24
+ image_shape = np.array(image_shape)
25
+ image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in self.pyramid_levels]
26
+
27
+ # compute anchors over all pyramid levels
28
+ all_anchors = np.zeros((0, 4)).astype(np.float32)
29
+
30
+ for idx, p in enumerate(self.pyramid_levels):
31
+ anchors = generate_anchors(base_size=self.sizes[idx], ratios=self.ratios, scales=self.scales)
32
+ shifted_anchors = shift(image_shapes[idx], self.strides[idx], anchors)
33
+ all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
34
+
35
+ all_anchors = np.expand_dims(all_anchors, axis=0)
36
+
37
+ if torch.cuda.is_available():
38
+ return torch.from_numpy(all_anchors.astype(np.float32)).cuda()
39
+ else:
40
+ return torch.from_numpy(all_anchors.astype(np.float32))
41
+
42
+ def generate_anchors(base_size=16, ratios=None, scales=None):
43
+ """
44
+ Generate anchor (reference) windows by enumerating aspect ratios X
45
+ scales w.r.t. a reference window.
46
+ """
47
+
48
+ if ratios is None:
49
+ ratios = np.array([0.5, 1, 2])
50
+
51
+ if scales is None:
52
+ scales = np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)])
53
+
54
+ num_anchors = len(ratios) * len(scales)
55
+
56
+ # initialize output anchors
57
+ anchors = np.zeros((num_anchors, 4))
58
+
59
+ # scale base_size
60
+ anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T
61
+
62
+ # compute areas of anchors
63
+ areas = anchors[:, 2] * anchors[:, 3]
64
+
65
+ # correct for ratios
66
+ anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales)))
67
+ anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales))
68
+
69
+ # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2)
70
+ anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T
71
+ anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T
72
+
73
+ return anchors
74
+
75
+ def compute_shape(image_shape, pyramid_levels):
76
+ """Compute shapes based on pyramid levels.
77
+
78
+ :param image_shape:
79
+ :param pyramid_levels:
80
+ :return:
81
+ """
82
+ image_shape = np.array(image_shape[:2])
83
+ image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) for x in pyramid_levels]
84
+ return image_shapes
85
+
86
+
87
+ def anchors_for_shape(
88
+ image_shape,
89
+ pyramid_levels=None,
90
+ ratios=None,
91
+ scales=None,
92
+ strides=None,
93
+ sizes=None,
94
+ shapes_callback=None,
95
+ ):
96
+
97
+ image_shapes = compute_shape(image_shape, pyramid_levels)
98
+
99
+ # compute anchors over all pyramid levels
100
+ all_anchors = np.zeros((0, 4))
101
+ for idx, p in enumerate(pyramid_levels):
102
+ anchors = generate_anchors(base_size=sizes[idx], ratios=ratios, scales=scales)
103
+ shifted_anchors = shift(image_shapes[idx], strides[idx], anchors)
104
+ all_anchors = np.append(all_anchors, shifted_anchors, axis=0)
105
+
106
+ return all_anchors
107
+
108
+
109
+ def shift(shape, stride, anchors):
110
+ shift_x = (np.arange(0, shape[1]) + 0.5) * stride
111
+ shift_y = (np.arange(0, shape[0]) + 0.5) * stride
112
+
113
+ shift_x, shift_y = np.meshgrid(shift_x, shift_y)
114
+
115
+ shifts = np.vstack((
116
+ shift_x.ravel(), shift_y.ravel(),
117
+ shift_x.ravel(), shift_y.ravel()
118
+ )).transpose()
119
+
120
+ # add A anchors (1, A, 4) to
121
+ # cell K shifts (K, 1, 4) to get
122
+ # shift anchors (K, A, 4)
123
+ # reshape to (K*A, 4) shifted anchors
124
+ A = anchors.shape[0]
125
+ K = shifts.shape[0]
126
+ all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
127
+ all_anchors = all_anchors.reshape((K * A, 4))
128
+
129
+ return all_anchors
130
+
retinanet/coco_eval.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pycocotools.cocoeval import COCOeval
2
+ import json
3
+ import torch
4
+
5
+
6
+ def evaluate_coco(dataset, model, threshold=0.05):
7
+
8
+ model.eval()
9
+
10
+ with torch.no_grad():
11
+
12
+ # start collecting results
13
+ results = []
14
+ image_ids = []
15
+
16
+ for index in range(len(dataset)):
17
+ data = dataset[index]
18
+ scale = data['scale']
19
+
20
+ # run network
21
+ if torch.cuda.is_available():
22
+ scores, labels, boxes = model(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
23
+ else:
24
+ scores, labels, boxes = model(data['img'].permute(2, 0, 1).float().unsqueeze(dim=0))
25
+ scores = scores.cpu()
26
+ labels = labels.cpu()
27
+ boxes = boxes.cpu()
28
+
29
+ # correct boxes for image scale
30
+ boxes /= scale
31
+
32
+ if boxes.shape[0] > 0:
33
+ # change to (x, y, w, h) (MS COCO standard)
34
+ boxes[:, 2] -= boxes[:, 0]
35
+ boxes[:, 3] -= boxes[:, 1]
36
+
37
+ # compute predicted labels and scores
38
+ #for box, score, label in zip(boxes[0], scores[0], labels[0]):
39
+ for box_id in range(boxes.shape[0]):
40
+ score = float(scores[box_id])
41
+ label = int(labels[box_id])
42
+ box = boxes[box_id, :]
43
+
44
+ # scores are sorted, so we can break
45
+ if score < threshold:
46
+ break
47
+
48
+ # append detection for each positively labeled class
49
+ image_result = {
50
+ 'image_id' : dataset.image_ids[index],
51
+ 'category_id' : dataset.label_to_coco_label(label),
52
+ 'score' : float(score),
53
+ 'bbox' : box.tolist(),
54
+ }
55
+
56
+ # append detection to results
57
+ results.append(image_result)
58
+
59
+ # append image to list of processed images
60
+ image_ids.append(dataset.image_ids[index])
61
+
62
+ # print progress
63
+ print('{}/{}'.format(index, len(dataset)), end='\r')
64
+
65
+ if not len(results):
66
+ return
67
+
68
+ # write output
69
+ json.dump(results, open('{}_bbox_results.json'.format(dataset.set_name), 'w'), indent=4)
70
+
71
+ # load results in COCO evaluation tool
72
+ coco_true = dataset.coco
73
+ coco_pred = coco_true.loadRes('{}_bbox_results.json'.format(dataset.set_name))
74
+
75
+ # run COCO evaluation
76
+ coco_eval = COCOeval(coco_true, coco_pred, 'bbox')
77
+ coco_eval.params.imgIds = image_ids
78
+ coco_eval.evaluate()
79
+ coco_eval.accumulate()
80
+ coco_eval.summarize()
81
+
82
+ model.train()
83
+
84
+ return
retinanet/csv_eval.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import print_function
2
+
3
+ import numpy as np
4
+ import json
5
+ import os
6
+ import matplotlib.pyplot as plt
7
+ import torch
8
+
9
+
10
+
11
+ def compute_overlap(a, b):
12
+ """
13
+ Parameters
14
+ ----------
15
+ a: (N, 4) ndarray of float
16
+ b: (K, 4) ndarray of float
17
+ Returns
18
+ -------
19
+ overlaps: (N, K) ndarray of overlap between boxes and query_boxes
20
+ """
21
+ area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
22
+
23
+ iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
24
+ ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])
25
+
26
+ iw = np.maximum(iw, 0)
27
+ ih = np.maximum(ih, 0)
28
+
29
+ ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih
30
+
31
+ ua = np.maximum(ua, np.finfo(float).eps)
32
+
33
+ intersection = iw * ih
34
+
35
+ return intersection / ua
36
+
37
+
38
+ def _compute_ap(recall, precision):
39
+ """ Compute the average precision, given the recall and precision curves.
40
+ Code originally from https://github.com/rbgirshick/py-faster-rcnn.
41
+ # Arguments
42
+ recall: The recall curve (list).
43
+ precision: The precision curve (list).
44
+ # Returns
45
+ The average precision as computed in py-faster-rcnn.
46
+ """
47
+ # correct AP calculation
48
+ # first append sentinel values at the end
49
+ mrec = np.concatenate(([0.], recall, [1.]))
50
+ mpre = np.concatenate(([0.], precision, [0.]))
51
+
52
+ # compute the precision envelope
53
+ for i in range(mpre.size - 1, 0, -1):
54
+ mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
55
+
56
+ # to calculate area under PR curve, look for points
57
+ # where X axis (recall) changes value
58
+ i = np.where(mrec[1:] != mrec[:-1])[0]
59
+
60
+ # and sum (\Delta recall) * prec
61
+ ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
62
+ return ap
63
+
64
+
65
+ def _get_detections(dataset, retinanet, score_threshold=0.05, max_detections=100, save_path=None):
66
+ """ Get the detections from the retinanet using the generator.
67
+ The result is a list of lists such that the size is:
68
+ all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes]
69
+ # Arguments
70
+ dataset : The generator used to run images through the retinanet.
71
+ retinanet : The retinanet to run on the images.
72
+ score_threshold : The score confidence threshold to use.
73
+ max_detections : The maximum number of detections to use per image.
74
+ save_path : The path to save the images with visualized detections to.
75
+ # Returns
76
+ A list of lists containing the detections for each image in the generator.
77
+ """
78
+ all_detections = [[None for i in range(dataset.num_classes())] for j in range(len(dataset))]
79
+
80
+ retinanet.eval()
81
+
82
+ with torch.no_grad():
83
+
84
+ for index in range(len(dataset)):
85
+ data = dataset[index]
86
+ scale = data['scale']
87
+
88
+ # run network
89
+ if torch.cuda.is_available():
90
+ scores, labels, boxes = retinanet(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
91
+ else:
92
+ scores, labels, boxes = retinanet(data['img'].permute(2, 0, 1).float().unsqueeze(dim=0))
93
+ scores = scores.cpu().numpy()
94
+ labels = labels.cpu().numpy()
95
+ boxes = boxes.cpu().numpy()
96
+
97
+ # correct boxes for image scale
98
+ boxes /= scale
99
+
100
+ # select indices which have a score above the threshold
101
+ indices = np.where(scores > score_threshold)[0]
102
+ if indices.shape[0] > 0:
103
+ # select those scores
104
+ scores = scores[indices]
105
+
106
+ # find the order with which to sort the scores
107
+ scores_sort = np.argsort(-scores)[:max_detections]
108
+
109
+ # select detections
110
+ image_boxes = boxes[indices[scores_sort], :]
111
+ image_scores = scores[scores_sort]
112
+ image_labels = labels[indices[scores_sort]]
113
+ image_detections = np.concatenate([image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)
114
+
115
+ # copy detections to all_detections
116
+ for label in range(dataset.num_classes()):
117
+ all_detections[index][label] = image_detections[image_detections[:, -1] == label, :-1]
118
+ else:
119
+ # copy detections to all_detections
120
+ for label in range(dataset.num_classes()):
121
+ all_detections[index][label] = np.zeros((0, 5))
122
+
123
+ print('{}/{}'.format(index + 1, len(dataset)), end='\r')
124
+
125
+ return all_detections
126
+
127
+
128
+ def _get_annotations(generator):
129
+ """ Get the ground truth annotations from the generator.
130
+ The result is a list of lists such that the size is:
131
+ all_detections[num_images][num_classes] = annotations[num_detections, 5]
132
+ # Arguments
133
+ generator : The generator used to retrieve ground truth annotations.
134
+ # Returns
135
+ A list of lists containing the annotations for each image in the generator.
136
+ """
137
+ all_annotations = [[None for i in range(generator.num_classes())] for j in range(len(generator))]
138
+
139
+ for i in range(len(generator)):
140
+ # load the annotations
141
+ annotations = generator.load_annotations(i)
142
+
143
+ # copy detections to all_annotations
144
+ for label in range(generator.num_classes()):
145
+ all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()
146
+
147
+ print('{}/{}'.format(i + 1, len(generator)), end='\r')
148
+
149
+ return all_annotations
150
+
151
+
152
+ def evaluate(
153
+ generator,
154
+ retinanet,
155
+ iou_threshold=0.5,
156
+ score_threshold=0.05,
157
+ max_detections=100,
158
+ save_path=None
159
+ ):
160
+ """ Evaluate a given dataset using a given retinanet.
161
+ # Arguments
162
+ generator : The generator that represents the dataset to evaluate.
163
+ retinanet : The retinanet to evaluate.
164
+ iou_threshold : The threshold used to consider when a detection is positive or negative.
165
+ score_threshold : The score confidence threshold to use for detections.
166
+ max_detections : The maximum number of detections to use per image.
167
+ save_path : The path to save precision recall curve of each label.
168
+ # Returns
169
+ A dict mapping class names to mAP scores.
170
+ """
171
+
172
+
173
+
174
+ # gather all detections and annotations
175
+
176
+ all_detections = _get_detections(generator, retinanet, score_threshold=score_threshold, max_detections=max_detections, save_path=save_path)
177
+ all_annotations = _get_annotations(generator)
178
+
179
+ average_precisions = {}
180
+
181
+ for label in range(generator.num_classes()):
182
+ false_positives = np.zeros((0,))
183
+ true_positives = np.zeros((0,))
184
+ scores = np.zeros((0,))
185
+ num_annotations = 0.0
186
+
187
+ for i in range(len(generator)):
188
+ detections = all_detections[i][label]
189
+ annotations = all_annotations[i][label]
190
+ num_annotations += annotations.shape[0]
191
+ detected_annotations = []
192
+
193
+ for d in detections:
194
+ scores = np.append(scores, d[4])
195
+
196
+ if annotations.shape[0] == 0:
197
+ false_positives = np.append(false_positives, 1)
198
+ true_positives = np.append(true_positives, 0)
199
+ continue
200
+
201
+ overlaps = compute_overlap(np.expand_dims(d, axis=0), annotations)
202
+ assigned_annotation = np.argmax(overlaps, axis=1)
203
+ max_overlap = overlaps[0, assigned_annotation]
204
+
205
+ if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
206
+ false_positives = np.append(false_positives, 0)
207
+ true_positives = np.append(true_positives, 1)
208
+ detected_annotations.append(assigned_annotation)
209
+ else:
210
+ false_positives = np.append(false_positives, 1)
211
+ true_positives = np.append(true_positives, 0)
212
+
213
+ # no annotations -> AP for this class is 0 (is this correct?)
214
+ if num_annotations == 0:
215
+ average_precisions[label] = 0, 0
216
+ continue
217
+
218
+ # sort by score
219
+ indices = np.argsort(-scores)
220
+ false_positives = false_positives[indices]
221
+ true_positives = true_positives[indices]
222
+
223
+ # compute false positives and true positives
224
+ false_positives = np.cumsum(false_positives)
225
+ true_positives = np.cumsum(true_positives)
226
+
227
+ # compute recall and precision
228
+ recall = true_positives / num_annotations
229
+ precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
230
+
231
+ # compute average precision
232
+ average_precision = _compute_ap(recall, precision)
233
+ average_precisions[label] = average_precision, num_annotations
234
+
235
+
236
+ print('\nmAP:')
237
+ for label in range(generator.num_classes()):
238
+ label_name = generator.label_to_name(label)
239
+ print('{}: {}'.format(label_name, average_precisions[label][0]))
240
+ print("Precision: ",precision[-1])
241
+ print("Recall: ",recall[-1])
242
+
243
+ if save_path!=None:
244
+ plt.plot(recall,precision)
245
+ # naming the x axis
246
+ plt.xlabel('Recall')
247
+ # naming the y axis
248
+ plt.ylabel('Precision')
249
+
250
+ # giving a title to my graph
251
+ plt.title('Precision Recall curve')
252
+
253
+ # function to show the plot
254
+ plt.savefig(save_path+'/'+label_name+'_precision_recall.jpg')
255
+
256
+
257
+
258
+ return average_precisions
259
+
retinanet/dataloader.py ADDED
@@ -0,0 +1,458 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import print_function, division
2
+ import sys
3
+ import os
4
+ import torch
5
+ import numpy as np
6
+ import random
7
+ import csv
8
+
9
+ from torch.utils.data import Dataset, DataLoader
10
+ from torchvision import transforms, utils
11
+ from torch.utils.data.sampler import Sampler
12
+
13
+ from pycocotools.coco import COCO
14
+
15
+ import skimage.io
16
+ import skimage.transform
17
+ import skimage.color
18
+ import skimage
19
+
20
+ from PIL import Image
21
+
22
+
23
+ class CocoDataset(Dataset):
24
+ """Coco dataset."""
25
+
26
+ def __init__(self, root_dir, set_name='train2017', transform=None):
27
+ """
28
+ Args:
29
+ root_dir (string): COCO directory.
30
+ transform (callable, optional): Optional transform to be applied
31
+ on a sample.
32
+ """
33
+ self.root_dir = root_dir
34
+ self.set_name = set_name
35
+ self.transform = transform
36
+
37
+ self.coco = COCO(os.path.join(self.root_dir, 'annotations', 'instances_' + self.set_name + '.json'))
38
+ self.image_ids = self.coco.getImgIds()
39
+
40
+ self.load_classes()
41
+
42
+ def load_classes(self):
43
+ # load class names (name -> label)
44
+ categories = self.coco.loadCats(self.coco.getCatIds())
45
+ categories.sort(key=lambda x: x['id'])
46
+
47
+ self.classes = {}
48
+ self.coco_labels = {}
49
+ self.coco_labels_inverse = {}
50
+ for c in categories:
51
+ self.coco_labels[len(self.classes)] = c['id']
52
+ self.coco_labels_inverse[c['id']] = len(self.classes)
53
+ self.classes[c['name']] = len(self.classes)
54
+
55
+ # also load the reverse (label -> name)
56
+ self.labels = {}
57
+ for key, value in self.classes.items():
58
+ self.labels[value] = key
59
+
60
+ def __len__(self):
61
+ return len(self.image_ids)
62
+
63
+ def __getitem__(self, idx):
64
+
65
+ img = self.load_image(idx)
66
+ annot = self.load_annotations(idx)
67
+ sample = {'img': img, 'annot': annot}
68
+ if self.transform:
69
+ sample = self.transform(sample)
70
+
71
+ return sample
72
+
73
+ def load_image(self, image_index):
74
+ image_info = self.coco.loadImgs(self.image_ids[image_index])[0]
75
+ path = os.path.join(self.root_dir, 'images', self.set_name, image_info['file_name'])
76
+ img = skimage.io.imread(path)
77
+
78
+ if len(img.shape) == 2:
79
+ img = skimage.color.gray2rgb(img)
80
+
81
+ return img.astype(np.float32)/255.0
82
+
83
+ def load_annotations(self, image_index):
84
+ # get ground truth annotations
85
+ annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
86
+ annotations = np.zeros((0, 5))
87
+
88
+ # some images appear to miss annotations (like image with id 257034)
89
+ if len(annotations_ids) == 0:
90
+ return annotations
91
+
92
+ # parse annotations
93
+ coco_annotations = self.coco.loadAnns(annotations_ids)
94
+ for idx, a in enumerate(coco_annotations):
95
+
96
+ # some annotations have basically no width / height, skip them
97
+ if a['bbox'][2] < 1 or a['bbox'][3] < 1:
98
+ continue
99
+
100
+ annotation = np.zeros((1, 5))
101
+ annotation[0, :4] = a['bbox']
102
+ annotation[0, 4] = self.coco_label_to_label(a['category_id'])
103
+ annotations = np.append(annotations, annotation, axis=0)
104
+
105
+ # transform from [x, y, w, h] to [x1, y1, x2, y2]
106
+ annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
107
+ annotations[:, 3] = annotations[:, 1] + annotations[:, 3]
108
+
109
+ return annotations
110
+
111
+ def coco_label_to_label(self, coco_label):
112
+ return self.coco_labels_inverse[coco_label]
113
+
114
+
115
+ def label_to_coco_label(self, label):
116
+ return self.coco_labels[label]
117
+
118
+ def image_aspect_ratio(self, image_index):
119
+ image = self.coco.loadImgs(self.image_ids[image_index])[0]
120
+ return float(image['width']) / float(image['height'])
121
+
122
+ def num_classes(self):
123
+ return 80
124
+
125
+
126
+ class CSVDataset(Dataset):
127
+ """CSV dataset."""
128
+
129
+ def __init__(self, train_file, class_list, transform=None):
130
+ """
131
+ Args:
132
+ train_file (string): CSV file with training annotations
133
+ annotations (string): CSV file with class list
134
+ test_file (string, optional): CSV file with testing annotations
135
+ """
136
+ self.train_file = train_file
137
+ self.class_list = class_list
138
+ self.transform = transform
139
+
140
+ # parse the provided class file
141
+ try:
142
+ with self._open_for_csv(self.class_list) as file:
143
+ self.classes = self.load_classes(csv.reader(file, delimiter=','))
144
+ except ValueError as e:
145
+ raise(ValueError('invalid CSV class file: {}: {}'.format(self.class_list, e)))
146
+
147
+ self.labels = {}
148
+ for key, value in self.classes.items():
149
+ self.labels[value] = key
150
+
151
+ # csv with img_path, x1, y1, x2, y2, class_name
152
+ try:
153
+ with self._open_for_csv(self.train_file) as file:
154
+ self.image_data = self._read_annotations(csv.reader(file, delimiter=','), self.classes)
155
+ except ValueError as e:
156
+ raise(ValueError('invalid CSV annotations file: {}: {}'.format(self.train_file, e)))
157
+ self.image_names = list(self.image_data.keys())
158
+
159
+ def _parse(self, value, function, fmt):
160
+ """
161
+ Parse a string into a value, and format a nice ValueError if it fails.
162
+ Returns `function(value)`.
163
+ Any `ValueError` raised is catched and a new `ValueError` is raised
164
+ with message `fmt.format(e)`, where `e` is the caught `ValueError`.
165
+ """
166
+ try:
167
+ return function(value)
168
+ except ValueError as e:
169
+ raise_from(ValueError(fmt.format(e)), None)
170
+
171
+ def _open_for_csv(self, path):
172
+ """
173
+ Open a file with flags suitable for csv.reader.
174
+ This is different for python2 it means with mode 'rb',
175
+ for python3 this means 'r' with "universal newlines".
176
+ """
177
+ if sys.version_info[0] < 3:
178
+ return open(path, 'rb')
179
+ else:
180
+ return open(path, 'r', newline='')
181
+
182
+ def load_classes(self, csv_reader):
183
+ result = {}
184
+
185
+ for line, row in enumerate(csv_reader):
186
+ line += 1
187
+
188
+ try:
189
+ class_name, class_id = row
190
+ except ValueError:
191
+ raise(ValueError('line {}: format should be \'class_name,class_id\''.format(line)))
192
+ class_id = self._parse(class_id, int, 'line {}: malformed class ID: {{}}'.format(line))
193
+
194
+ if class_name in result:
195
+ raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
196
+ result[class_name] = class_id
197
+ return result
198
+
199
+ def __len__(self):
200
+ return len(self.image_names)
201
+
202
+ def __getitem__(self, idx):
203
+
204
+ img = self.load_image(idx)
205
+ annot = self.load_annotations(idx)
206
+ sample = {'img': img, 'annot': annot}
207
+ if self.transform:
208
+ sample = self.transform(sample)
209
+
210
+ return sample
211
+
212
+ def load_image(self, image_index):
213
+ img = skimage.io.imread(self.image_names[image_index])
214
+
215
+ if len(img.shape) == 2:
216
+ img = skimage.color.gray2rgb(img)
217
+
218
+ return img.astype(np.float32)/255.0
219
+
220
+ def load_annotations(self, image_index):
221
+ # get ground truth annotations
222
+ annotation_list = self.image_data[self.image_names[image_index]]
223
+ annotations = np.zeros((0, 5))
224
+
225
+ # some images appear to miss annotations (like image with id 257034)
226
+ if len(annotation_list) == 0:
227
+ return annotations
228
+
229
+ # parse annotations
230
+ for idx, a in enumerate(annotation_list):
231
+ # some annotations have basically no width / height, skip them
232
+ x1 = a['x1']
233
+ x2 = a['x2']
234
+ y1 = a['y1']
235
+ y2 = a['y2']
236
+
237
+ if (x2-x1) < 1 or (y2-y1) < 1:
238
+ continue
239
+
240
+ annotation = np.zeros((1, 5))
241
+
242
+ annotation[0, 0] = x1
243
+ annotation[0, 1] = y1
244
+ annotation[0, 2] = x2
245
+ annotation[0, 3] = y2
246
+
247
+ annotation[0, 4] = self.name_to_label(a['class'])
248
+ annotations = np.append(annotations, annotation, axis=0)
249
+
250
+ return annotations
251
+
252
+ def _read_annotations(self, csv_reader, classes):
253
+ result = {}
254
+ for line, row in enumerate(csv_reader):
255
+ line += 1
256
+
257
+ try:
258
+ img_file, x1, y1, x2, y2, class_name = row[:6]
259
+ except ValueError:
260
+ raise_from(ValueError('line {}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''.format(line)), None)
261
+
262
+ if img_file not in result:
263
+ result[img_file] = []
264
+
265
+ # If a row contains only an image path, it's an image without annotations.
266
+ if (x1, y1, x2, y2, class_name) == ('', '', '', '', ''):
267
+ continue
268
+
269
+ x1 = self._parse(x1, int, 'line {}: malformed x1: {{}}'.format(line))
270
+ y1 = self._parse(y1, int, 'line {}: malformed y1: {{}}'.format(line))
271
+ x2 = self._parse(x2, int, 'line {}: malformed x2: {{}}'.format(line))
272
+ y2 = self._parse(y2, int, 'line {}: malformed y2: {{}}'.format(line))
273
+
274
+ # Check that the bounding box is valid.
275
+ if x2 <= x1:
276
+ raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
277
+ if y2 <= y1:
278
+ raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
279
+
280
+ # check if the current class name is correctly present
281
+ if class_name not in classes:
282
+ raise ValueError('line {}: unknown class name: \'{}\' (classes: {})'.format(line, class_name, classes))
283
+
284
+ result[img_file].append({'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name})
285
+ return result
286
+
287
+ def name_to_label(self, name):
288
+ return self.classes[name]
289
+
290
+ def label_to_name(self, label):
291
+ return self.labels[label]
292
+
293
+ def num_classes(self):
294
+ return max(self.classes.values()) + 1
295
+
296
+ def image_aspect_ratio(self, image_index):
297
+ image = Image.open(self.image_names[image_index])
298
+ return float(image.width) / float(image.height)
299
+
300
+
301
+ def collater(data):
302
+
303
+ imgs = [s['img'] for s in data]
304
+ annots = [s['annot'] for s in data]
305
+ scales = [s['scale'] for s in data]
306
+
307
+ widths = [int(s.shape[0]) for s in imgs]
308
+ heights = [int(s.shape[1]) for s in imgs]
309
+ batch_size = len(imgs)
310
+
311
+ max_width = np.array(widths).max()
312
+ max_height = np.array(heights).max()
313
+
314
+ padded_imgs = torch.zeros(batch_size, max_width, max_height, 3)
315
+
316
+ for i in range(batch_size):
317
+ img = imgs[i]
318
+ padded_imgs[i, :int(img.shape[0]), :int(img.shape[1]), :] = img
319
+
320
+ max_num_annots = max(annot.shape[0] for annot in annots)
321
+
322
+ if max_num_annots > 0:
323
+
324
+ annot_padded = torch.ones((len(annots), max_num_annots, 5)) * -1
325
+
326
+ if max_num_annots > 0:
327
+ for idx, annot in enumerate(annots):
328
+ #print(annot.shape)
329
+ if annot.shape[0] > 0:
330
+ annot_padded[idx, :annot.shape[0], :] = annot
331
+ else:
332
+ annot_padded = torch.ones((len(annots), 1, 5)) * -1
333
+
334
+
335
+ padded_imgs = padded_imgs.permute(0, 3, 1, 2)
336
+
337
+ return {'img': padded_imgs, 'annot': annot_padded, 'scale': scales}
338
+
339
+ class Resizer(object):
340
+ """Convert ndarrays in sample to Tensors."""
341
+
342
+ def __call__(self, sample, min_side=608, max_side=1024):
343
+ image, annots = sample['img'], sample['annot']
344
+
345
+ rows, cols, cns = image.shape
346
+
347
+ smallest_side = min(rows, cols)
348
+
349
+ # rescale the image so the smallest side is min_side
350
+ scale = min_side / smallest_side
351
+
352
+ # check if the largest side is now greater than max_side, which can happen
353
+ # when images have a large aspect ratio
354
+ largest_side = max(rows, cols)
355
+
356
+ if largest_side * scale > max_side:
357
+ scale = max_side / largest_side
358
+
359
+ # resize the image with the computed scale
360
+ image = skimage.transform.resize(image, (int(round(rows*scale)), int(round((cols*scale)))))
361
+ rows, cols, cns = image.shape
362
+
363
+ pad_w = 32 - rows%32
364
+ pad_h = 32 - cols%32
365
+
366
+ new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
367
+ new_image[:rows, :cols, :] = image.astype(np.float32)
368
+
369
+ annots[:, :4] *= scale
370
+
371
+ return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale}
372
+
373
+
374
+ class Augmenter(object):
375
+ """Convert ndarrays in sample to Tensors."""
376
+
377
+ def __call__(self, sample, flip_x=0.5):
378
+
379
+ if np.random.rand() < flip_x:
380
+ image, annots = sample['img'], sample['annot']
381
+ image = image[:, ::-1, :]
382
+
383
+ rows, cols, channels = image.shape
384
+
385
+ x1 = annots[:, 0].copy()
386
+ x2 = annots[:, 2].copy()
387
+
388
+ x_tmp = x1.copy()
389
+
390
+ annots[:, 0] = cols - x2
391
+ annots[:, 2] = cols - x_tmp
392
+
393
+ sample = {'img': image, 'annot': annots}
394
+
395
+ return sample
396
+
397
+
398
+ class Normalizer(object):
399
+
400
+ def __init__(self):
401
+ self.mean = np.array([[[0.485, 0.456, 0.406]]])
402
+ self.std = np.array([[[0.229, 0.224, 0.225]]])
403
+
404
+ def __call__(self, sample):
405
+
406
+ image, annots = sample['img'], sample['annot']
407
+
408
+ return {'img':((image.astype(np.float32)-self.mean)/self.std), 'annot': annots}
409
+
410
+ class UnNormalizer(object):
411
+ def __init__(self, mean=None, std=None):
412
+ if mean == None:
413
+ self.mean = [0.485, 0.456, 0.406]
414
+ else:
415
+ self.mean = mean
416
+ if std == None:
417
+ self.std = [0.229, 0.224, 0.225]
418
+ else:
419
+ self.std = std
420
+
421
+ def __call__(self, tensor):
422
+ """
423
+ Args:
424
+ tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
425
+ Returns:
426
+ Tensor: Normalized image.
427
+ """
428
+ for t, m, s in zip(tensor, self.mean, self.std):
429
+ t.mul_(s).add_(m)
430
+ return tensor
431
+
432
+
433
+ class AspectRatioBasedSampler(Sampler):
434
+
435
+ def __init__(self, data_source, batch_size, drop_last):
436
+ self.data_source = data_source
437
+ self.batch_size = batch_size
438
+ self.drop_last = drop_last
439
+ self.groups = self.group_images()
440
+
441
+ def __iter__(self):
442
+ random.shuffle(self.groups)
443
+ for group in self.groups:
444
+ yield group
445
+
446
+ def __len__(self):
447
+ if self.drop_last:
448
+ return len(self.data_source) // self.batch_size
449
+ else:
450
+ return (len(self.data_source) + self.batch_size - 1) // self.batch_size
451
+
452
+ def group_images(self):
453
+ # determine the order of the images
454
+ order = list(range(len(self.data_source)))
455
+ order.sort(key=lambda x: self.data_source.image_aspect_ratio(x))
456
+
457
+ # divide into groups, one group = one batch
458
+ return [[order[x % len(order)] for x in range(i, i + self.batch_size)] for i in range(0, len(order), self.batch_size)]
retinanet/losses.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+
5
+ def calc_iou(a, b):
6
+ area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
7
+
8
+ iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 0])
9
+ ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 1])
10
+
11
+ iw = torch.clamp(iw, min=0)
12
+ ih = torch.clamp(ih, min=0)
13
+
14
+ ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih
15
+
16
+ ua = torch.clamp(ua, min=1e-8)
17
+
18
+ intersection = iw * ih
19
+
20
+ IoU = intersection / ua
21
+
22
+ return IoU
23
+
24
+ class FocalLoss(nn.Module):
25
+ #def __init__(self):
26
+
27
+ def forward(self, classifications, regressions, anchors, annotations):
28
+ alpha = 0.25
29
+ gamma = 2.0
30
+ batch_size = classifications.shape[0]
31
+ classification_losses = []
32
+ regression_losses = []
33
+
34
+ anchor = anchors[0, :, :]
35
+
36
+ anchor_widths = anchor[:, 2] - anchor[:, 0]
37
+ anchor_heights = anchor[:, 3] - anchor[:, 1]
38
+ anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
39
+ anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights
40
+
41
+ for j in range(batch_size):
42
+
43
+ classification = classifications[j, :, :]
44
+ regression = regressions[j, :, :]
45
+
46
+ bbox_annotation = annotations[j, :, :]
47
+ bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
48
+
49
+ classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
50
+
51
+ if bbox_annotation.shape[0] == 0:
52
+ if torch.cuda.is_available():
53
+ alpha_factor = torch.ones(classification.shape).cuda() * alpha
54
+
55
+ alpha_factor = 1. - alpha_factor
56
+ focal_weight = classification
57
+ focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
58
+
59
+ bce = -(torch.log(1.0 - classification))
60
+
61
+ # cls_loss = focal_weight * torch.pow(bce, gamma)
62
+ cls_loss = focal_weight * bce
63
+ classification_losses.append(cls_loss.sum())
64
+ regression_losses.append(torch.tensor(0).float().cuda())
65
+
66
+ else:
67
+ alpha_factor = torch.ones(classification.shape) * alpha
68
+
69
+ alpha_factor = 1. - alpha_factor
70
+ focal_weight = classification
71
+ focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
72
+
73
+ bce = -(torch.log(1.0 - classification))
74
+
75
+ # cls_loss = focal_weight * torch.pow(bce, gamma)
76
+ cls_loss = focal_weight * bce
77
+ classification_losses.append(cls_loss.sum())
78
+ regression_losses.append(torch.tensor(0).float())
79
+
80
+ continue
81
+
82
+ IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations
83
+
84
+ IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1
85
+
86
+ #import pdb
87
+ #pdb.set_trace()
88
+
89
+ # compute the loss for classification
90
+ targets = torch.ones(classification.shape) * -1
91
+
92
+ if torch.cuda.is_available():
93
+ targets = targets.cuda()
94
+
95
+ targets[torch.lt(IoU_max, 0.4), :] = 0
96
+
97
+ positive_indices = torch.ge(IoU_max, 0.5)
98
+
99
+ num_positive_anchors = positive_indices.sum()
100
+
101
+ assigned_annotations = bbox_annotation[IoU_argmax, :]
102
+
103
+ targets[positive_indices, :] = 0
104
+ targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
105
+
106
+ if torch.cuda.is_available():
107
+ alpha_factor = torch.ones(targets.shape).cuda() * alpha
108
+ else:
109
+ alpha_factor = torch.ones(targets.shape) * alpha
110
+
111
+ alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
112
+ focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
113
+ focal_weight = alpha_factor * torch.pow(focal_weight, gamma)
114
+
115
+ bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))
116
+
117
+ # cls_loss = focal_weight * torch.pow(bce, gamma)
118
+ cls_loss = focal_weight * bce
119
+
120
+ if torch.cuda.is_available():
121
+ cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
122
+ else:
123
+ cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape))
124
+
125
+ classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0))
126
+
127
+ # compute the loss for regression
128
+
129
+ if positive_indices.sum() > 0:
130
+ assigned_annotations = assigned_annotations[positive_indices, :]
131
+
132
+ anchor_widths_pi = anchor_widths[positive_indices]
133
+ anchor_heights_pi = anchor_heights[positive_indices]
134
+ anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
135
+ anchor_ctr_y_pi = anchor_ctr_y[positive_indices]
136
+
137
+ gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0]
138
+ gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
139
+ gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
140
+ gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights
141
+
142
+ # clip widths to 1
143
+ gt_widths = torch.clamp(gt_widths, min=1)
144
+ gt_heights = torch.clamp(gt_heights, min=1)
145
+
146
+ targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
147
+ targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
148
+ targets_dw = torch.log(gt_widths / anchor_widths_pi)
149
+ targets_dh = torch.log(gt_heights / anchor_heights_pi)
150
+
151
+ targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
152
+ targets = targets.t()
153
+
154
+ if torch.cuda.is_available():
155
+ targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()
156
+ else:
157
+ targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]])
158
+
159
+ negative_indices = 1 + (~positive_indices)
160
+
161
+ regression_diff = torch.abs(targets - regression[positive_indices, :])
162
+
163
+ regression_loss = torch.where(
164
+ torch.le(regression_diff, 1.0 / 9.0),
165
+ 0.5 * 9.0 * torch.pow(regression_diff, 2),
166
+ regression_diff - 0.5 / 9.0
167
+ )
168
+ regression_losses.append(regression_loss.mean())
169
+ else:
170
+ if torch.cuda.is_available():
171
+ regression_losses.append(torch.tensor(0).float().cuda())
172
+ else:
173
+ regression_losses.append(torch.tensor(0).float())
174
+
175
+ return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
176
+
177
+
retinanet/model.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ import torch
3
+ import math
4
+ import torch.utils.model_zoo as model_zoo
5
+ from torchvision.ops import nms
6
+ from retinanet.utils import BasicBlock, Bottleneck, BBoxTransform, ClipBoxes
7
+ from retinanet.anchors import Anchors
8
+ from retinanet import losses
9
+
10
+ model_urls = {
11
+ 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
12
+ 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
13
+ 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
14
+ 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
15
+ 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
16
+ }
17
+
18
+
19
+ class PyramidFeatures(nn.Module):
20
+ def __init__(self, C3_size, C4_size, C5_size, feature_size=256):
21
+ super(PyramidFeatures, self).__init__()
22
+
23
+ # upsample C5 to get P5 from the FPN paper
24
+ self.P5_1 = nn.Conv2d(C5_size, feature_size, kernel_size=1, stride=1, padding=0)
25
+ self.P5_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
26
+ self.P5_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
27
+
28
+ # add P5 elementwise to C4
29
+ self.P4_1 = nn.Conv2d(C4_size, feature_size, kernel_size=1, stride=1, padding=0)
30
+ self.P4_upsampled = nn.Upsample(scale_factor=2, mode='nearest')
31
+ self.P4_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
32
+
33
+ # add P4 elementwise to C3
34
+ self.P3_1 = nn.Conv2d(C3_size, feature_size, kernel_size=1, stride=1, padding=0)
35
+ self.P3_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=1, padding=1)
36
+
37
+ # "P6 is obtained via a 3x3 stride-2 conv on C5"
38
+ self.P6 = nn.Conv2d(C5_size, feature_size, kernel_size=3, stride=2, padding=1)
39
+
40
+ # "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6"
41
+ self.P7_1 = nn.ReLU()
42
+ self.P7_2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, stride=2, padding=1)
43
+
44
+ def forward(self, inputs):
45
+ C3, C4, C5 = inputs
46
+
47
+ P5_x = self.P5_1(C5)
48
+ P5_upsampled_x = self.P5_upsampled(P5_x)
49
+ P5_x = self.P5_2(P5_x)
50
+
51
+ P4_x = self.P4_1(C4)
52
+ P4_x = P5_upsampled_x + P4_x
53
+ P4_upsampled_x = self.P4_upsampled(P4_x)
54
+ P4_x = self.P4_2(P4_x)
55
+
56
+ P3_x = self.P3_1(C3)
57
+ P3_x = P3_x + P4_upsampled_x
58
+ P3_x = self.P3_2(P3_x)
59
+
60
+ P6_x = self.P6(C5)
61
+
62
+ P7_x = self.P7_1(P6_x)
63
+ P7_x = self.P7_2(P7_x)
64
+
65
+ return [P3_x, P4_x, P5_x, P6_x, P7_x]
66
+
67
+
68
+ class RegressionModel(nn.Module):
69
+ def __init__(self, num_features_in, num_anchors=9, feature_size=256):
70
+ super(RegressionModel, self).__init__()
71
+
72
+ self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
73
+ self.act1 = nn.ReLU()
74
+
75
+ self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
76
+ self.act2 = nn.ReLU()
77
+
78
+ self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
79
+ self.act3 = nn.ReLU()
80
+
81
+ self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
82
+ self.act4 = nn.ReLU()
83
+
84
+ self.output = nn.Conv2d(feature_size, num_anchors * 4, kernel_size=3, padding=1)
85
+
86
+ def forward(self, x):
87
+ out = self.conv1(x)
88
+ out = self.act1(out)
89
+
90
+ out = self.conv2(out)
91
+ out = self.act2(out)
92
+
93
+ out = self.conv3(out)
94
+ out = self.act3(out)
95
+
96
+ out = self.conv4(out)
97
+ out = self.act4(out)
98
+
99
+ out = self.output(out)
100
+
101
+ # out is B x C x W x H, with C = 4*num_anchors
102
+ out = out.permute(0, 2, 3, 1)
103
+
104
+ return out.contiguous().view(out.shape[0], -1, 4)
105
+
106
+
107
+ class ClassificationModel(nn.Module):
108
+ def __init__(self, num_features_in, num_anchors=9, num_classes=80, prior=0.01, feature_size=256):
109
+ super(ClassificationModel, self).__init__()
110
+
111
+ self.num_classes = num_classes
112
+ self.num_anchors = num_anchors
113
+
114
+ self.conv1 = nn.Conv2d(num_features_in, feature_size, kernel_size=3, padding=1)
115
+ self.act1 = nn.ReLU()
116
+
117
+ self.conv2 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
118
+ self.act2 = nn.ReLU()
119
+
120
+ self.conv3 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
121
+ self.act3 = nn.ReLU()
122
+
123
+ self.conv4 = nn.Conv2d(feature_size, feature_size, kernel_size=3, padding=1)
124
+ self.act4 = nn.ReLU()
125
+
126
+ self.output = nn.Conv2d(feature_size, num_anchors * num_classes, kernel_size=3, padding=1)
127
+ self.output_act = nn.Sigmoid()
128
+
129
+ def forward(self, x):
130
+ out = self.conv1(x)
131
+ out = self.act1(out)
132
+
133
+ out = self.conv2(out)
134
+ out = self.act2(out)
135
+
136
+ out = self.conv3(out)
137
+ out = self.act3(out)
138
+
139
+ out = self.conv4(out)
140
+ out = self.act4(out)
141
+
142
+ out = self.output(out)
143
+ out = self.output_act(out)
144
+
145
+ # out is B x C x W x H, with C = n_classes + n_anchors
146
+ out1 = out.permute(0, 2, 3, 1)
147
+
148
+ batch_size, width, height, channels = out1.shape
149
+
150
+ out2 = out1.view(batch_size, width, height, self.num_anchors, self.num_classes)
151
+
152
+ return out2.contiguous().view(x.shape[0], -1, self.num_classes)
153
+
154
+
155
+ class ResNet(nn.Module):
156
+
157
+ def __init__(self, num_classes, block, layers):
158
+ self.inplanes = 64
159
+ super(ResNet, self).__init__()
160
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
161
+ self.bn1 = nn.BatchNorm2d(64)
162
+ self.relu = nn.ReLU(inplace=True)
163
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
164
+ self.layer1 = self._make_layer(block, 64, layers[0])
165
+ self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
166
+ self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
167
+ self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
168
+
169
+ if block == BasicBlock:
170
+ fpn_sizes = [self.layer2[layers[1] - 1].conv2.out_channels, self.layer3[layers[2] - 1].conv2.out_channels,
171
+ self.layer4[layers[3] - 1].conv2.out_channels]
172
+ elif block == Bottleneck:
173
+ fpn_sizes = [self.layer2[layers[1] - 1].conv3.out_channels, self.layer3[layers[2] - 1].conv3.out_channels,
174
+ self.layer4[layers[3] - 1].conv3.out_channels]
175
+ else:
176
+ raise ValueError(f"Block type {block} not understood")
177
+
178
+ self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
179
+
180
+ self.regressionModel = RegressionModel(256)
181
+ self.classificationModel = ClassificationModel(256, num_classes=num_classes)
182
+
183
+ self.anchors = Anchors()
184
+
185
+ self.regressBoxes = BBoxTransform()
186
+
187
+ self.clipBoxes = ClipBoxes()
188
+
189
+ self.focalLoss = losses.FocalLoss()
190
+
191
+ for m in self.modules():
192
+ if isinstance(m, nn.Conv2d):
193
+ n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
194
+ m.weight.data.normal_(0, math.sqrt(2. / n))
195
+ elif isinstance(m, nn.BatchNorm2d):
196
+ m.weight.data.fill_(1)
197
+ m.bias.data.zero_()
198
+
199
+ prior = 0.01
200
+
201
+ self.classificationModel.output.weight.data.fill_(0)
202
+ self.classificationModel.output.bias.data.fill_(-math.log((1.0 - prior) / prior))
203
+
204
+ self.regressionModel.output.weight.data.fill_(0)
205
+ self.regressionModel.output.bias.data.fill_(0)
206
+
207
+ self.freeze_bn()
208
+
209
+ def _make_layer(self, block, planes, blocks, stride=1):
210
+ downsample = None
211
+ if stride != 1 or self.inplanes != planes * block.expansion:
212
+ downsample = nn.Sequential(
213
+ nn.Conv2d(self.inplanes, planes * block.expansion,
214
+ kernel_size=1, stride=stride, bias=False),
215
+ nn.BatchNorm2d(planes * block.expansion),
216
+ )
217
+
218
+ layers = [block(self.inplanes, planes, stride, downsample)]
219
+ self.inplanes = planes * block.expansion
220
+ for i in range(1, blocks):
221
+ layers.append(block(self.inplanes, planes))
222
+
223
+ return nn.Sequential(*layers)
224
+
225
+ def freeze_bn(self):
226
+ '''Freeze BatchNorm layers.'''
227
+ for layer in self.modules():
228
+ if isinstance(layer, nn.BatchNorm2d):
229
+ layer.eval()
230
+
231
+ def forward(self, inputs):
232
+
233
+ if self.training:
234
+ img_batch, annotations = inputs
235
+ else:
236
+ img_batch = inputs
237
+
238
+ x = self.conv1(img_batch)
239
+ x = self.bn1(x)
240
+ x = self.relu(x)
241
+ x = self.maxpool(x)
242
+
243
+ x1 = self.layer1(x)
244
+ x2 = self.layer2(x1)
245
+ x3 = self.layer3(x2)
246
+ x4 = self.layer4(x3)
247
+
248
+ features = self.fpn([x2, x3, x4])
249
+
250
+ regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1)
251
+
252
+ classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1)
253
+
254
+ anchors = self.anchors(img_batch)
255
+
256
+ if self.training:
257
+ return self.focalLoss(classification, regression, anchors, annotations)
258
+ else:
259
+ transformed_anchors = self.regressBoxes(anchors, regression)
260
+ transformed_anchors = self.clipBoxes(transformed_anchors, img_batch)
261
+
262
+ finalResult = [[], [], []]
263
+
264
+ finalScores = torch.Tensor([])
265
+ finalAnchorBoxesIndexes = torch.Tensor([]).long()
266
+ finalAnchorBoxesCoordinates = torch.Tensor([])
267
+
268
+ if torch.cuda.is_available():
269
+ finalScores = finalScores.cuda()
270
+ finalAnchorBoxesIndexes = finalAnchorBoxesIndexes.cuda()
271
+ finalAnchorBoxesCoordinates = finalAnchorBoxesCoordinates.cuda()
272
+
273
+ for i in range(classification.shape[2]):
274
+ scores = torch.squeeze(classification[:, :, i])
275
+ scores_over_thresh = (scores > 0.05)
276
+ if scores_over_thresh.sum() == 0:
277
+ # no boxes to NMS, just continue
278
+ continue
279
+
280
+ scores = scores[scores_over_thresh]
281
+ anchorBoxes = torch.squeeze(transformed_anchors)
282
+ anchorBoxes = anchorBoxes[scores_over_thresh]
283
+ anchors_nms_idx = nms(anchorBoxes, scores, 0.5)
284
+
285
+ finalResult[0].extend(scores[anchors_nms_idx])
286
+ finalResult[1].extend(torch.tensor([i] * anchors_nms_idx.shape[0]))
287
+ finalResult[2].extend(anchorBoxes[anchors_nms_idx])
288
+
289
+ finalScores = torch.cat((finalScores, scores[anchors_nms_idx]))
290
+ finalAnchorBoxesIndexesValue = torch.tensor([i] * anchors_nms_idx.shape[0])
291
+ if torch.cuda.is_available():
292
+ finalAnchorBoxesIndexesValue = finalAnchorBoxesIndexesValue.cuda()
293
+
294
+ finalAnchorBoxesIndexes = torch.cat((finalAnchorBoxesIndexes, finalAnchorBoxesIndexesValue))
295
+ finalAnchorBoxesCoordinates = torch.cat((finalAnchorBoxesCoordinates, anchorBoxes[anchors_nms_idx]))
296
+
297
+ return [finalScores, finalAnchorBoxesIndexes, finalAnchorBoxesCoordinates]
298
+
299
+
300
+
301
+ def resnet18(num_classes, pretrained=False, **kwargs):
302
+ """Constructs a ResNet-18 model.
303
+ Args:
304
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
305
+ """
306
+ model = ResNet(num_classes, BasicBlock, [2, 2, 2, 2], **kwargs)
307
+ if pretrained:
308
+ model.load_state_dict(model_zoo.load_url(model_urls['resnet18'], model_dir='.'), strict=False)
309
+ return model
310
+
311
+
312
+ def resnet34(num_classes, pretrained=False, **kwargs):
313
+ """Constructs a ResNet-34 model.
314
+ Args:
315
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
316
+ """
317
+ model = ResNet(num_classes, BasicBlock, [3, 4, 6, 3], **kwargs)
318
+ if pretrained:
319
+ model.load_state_dict(model_zoo.load_url(model_urls['resnet34'], model_dir='.'), strict=False)
320
+ return model
321
+
322
+
323
+ def resnet50(num_classes, pretrained=False, **kwargs):
324
+ """Constructs a ResNet-50 model.
325
+ Args:
326
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
327
+ """
328
+ model = ResNet(num_classes, Bottleneck, [3, 4, 6, 3], **kwargs)
329
+ if pretrained:
330
+ model.load_state_dict(model_zoo.load_url(model_urls['resnet50'], model_dir='.'), strict=False)
331
+ return model
332
+
333
+
334
+ def resnet101(num_classes, pretrained=False, **kwargs):
335
+ """Constructs a ResNet-101 model.
336
+ Args:
337
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
338
+ """
339
+ model = ResNet(num_classes, Bottleneck, [3, 4, 23, 3], **kwargs)
340
+ if pretrained:
341
+ model.load_state_dict(model_zoo.load_url(model_urls['resnet101'], model_dir='.'), strict=False)
342
+ return model
343
+
344
+
345
+ def resnet152(num_classes, pretrained=False, **kwargs):
346
+ """Constructs a ResNet-152 model.
347
+ Args:
348
+ pretrained (bool): If True, returns a model pre-trained on ImageNet
349
+ """
350
+ model = ResNet(num_classes, Bottleneck, [3, 8, 36, 3], **kwargs)
351
+ if pretrained:
352
+ model.load_state_dict(model_zoo.load_url(model_urls['resnet152'], model_dir='.'), strict=False)
353
+ return model
retinanet/oid_dataset.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import print_function, division
2
+
3
+ import csv
4
+ import json
5
+ import os
6
+ import warnings
7
+
8
+ import numpy as np
9
+ import skimage
10
+ import skimage.color
11
+ import skimage.io
12
+ import skimage.transform
13
+ from PIL import Image
14
+ from torch.utils.data import Dataset
15
+
16
+
17
+ def get_labels(metadata_dir, version='v4'):
18
+ if version == 'v4' or version == 'challenge2018':
19
+ csv_file = 'class-descriptions-boxable.csv' if version == 'v4' else 'challenge-2018-class-descriptions-500.csv'
20
+
21
+ boxable_classes_descriptions = os.path.join(metadata_dir, csv_file)
22
+ id_to_labels = {}
23
+ cls_index = {}
24
+
25
+ i = 0
26
+ with open(boxable_classes_descriptions) as f:
27
+ for row in csv.reader(f):
28
+ # make sure the csv row is not empty (usually the last one)
29
+ if len(row):
30
+ label = row[0]
31
+ description = row[1].replace("\"", "").replace("'", "").replace('`', '')
32
+
33
+ id_to_labels[i] = description
34
+ cls_index[label] = i
35
+
36
+ i += 1
37
+ else:
38
+ trainable_classes_path = os.path.join(metadata_dir, 'classes-bbox-trainable.txt')
39
+ description_path = os.path.join(metadata_dir, 'class-descriptions.csv')
40
+
41
+ description_table = {}
42
+ with open(description_path) as f:
43
+ for row in csv.reader(f):
44
+ # make sure the csv row is not empty (usually the last one)
45
+ if len(row):
46
+ description_table[row[0]] = row[1].replace("\"", "").replace("'", "").replace('`', '')
47
+
48
+ with open(trainable_classes_path, 'rb') as f:
49
+ trainable_classes = f.read().split('\n')
50
+
51
+ id_to_labels = dict([(i, description_table[c]) for i, c in enumerate(trainable_classes)])
52
+ cls_index = dict([(c, i) for i, c in enumerate(trainable_classes)])
53
+
54
+ return id_to_labels, cls_index
55
+
56
+
57
+ def generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index, version='v4'):
58
+ validation_image_ids = {}
59
+
60
+ if version == 'v4':
61
+ annotations_path = os.path.join(metadata_dir, subset, '{}-annotations-bbox.csv'.format(subset))
62
+ elif version == 'challenge2018':
63
+ validation_image_ids_path = os.path.join(metadata_dir, 'challenge-2018-image-ids-valset-od.csv')
64
+
65
+ with open(validation_image_ids_path, 'r') as csv_file:
66
+ reader = csv.DictReader(csv_file, fieldnames=['ImageID'])
67
+ reader.next()
68
+ for line, row in enumerate(reader):
69
+ image_id = row['ImageID']
70
+ validation_image_ids[image_id] = True
71
+
72
+ annotations_path = os.path.join(metadata_dir, 'challenge-2018-train-annotations-bbox.csv')
73
+ else:
74
+ annotations_path = os.path.join(metadata_dir, subset, 'annotations-human-bbox.csv')
75
+
76
+ fieldnames = ['ImageID', 'Source', 'LabelName', 'Confidence',
77
+ 'XMin', 'XMax', 'YMin', 'YMax',
78
+ 'IsOccluded', 'IsTruncated', 'IsGroupOf', 'IsDepiction', 'IsInside']
79
+
80
+ id_annotations = dict()
81
+ with open(annotations_path, 'r') as csv_file:
82
+ reader = csv.DictReader(csv_file, fieldnames=fieldnames)
83
+ next(reader)
84
+
85
+ images_sizes = {}
86
+ for line, row in enumerate(reader):
87
+ frame = row['ImageID']
88
+
89
+ if version == 'challenge2018':
90
+ if subset == 'train':
91
+ if frame in validation_image_ids:
92
+ continue
93
+ elif subset == 'validation':
94
+ if frame not in validation_image_ids:
95
+ continue
96
+ else:
97
+ raise NotImplementedError('This generator handles only the train and validation subsets')
98
+
99
+ class_name = row['LabelName']
100
+
101
+ if class_name not in cls_index:
102
+ continue
103
+
104
+ cls_id = cls_index[class_name]
105
+
106
+ if version == 'challenge2018':
107
+ # We recommend participants to use the provided subset of the training set as a validation set.
108
+ # This is preferable over using the V4 val/test sets, as the training set is more densely annotated.
109
+ img_path = os.path.join(main_dir, 'images', 'train', frame + '.jpg')
110
+ else:
111
+ img_path = os.path.join(main_dir, 'images', subset, frame + '.jpg')
112
+
113
+ if frame in images_sizes:
114
+ width, height = images_sizes[frame]
115
+ else:
116
+ try:
117
+ with Image.open(img_path) as img:
118
+ width, height = img.width, img.height
119
+ images_sizes[frame] = (width, height)
120
+ except Exception as ex:
121
+ if version == 'challenge2018':
122
+ raise ex
123
+ continue
124
+
125
+ x1 = float(row['XMin'])
126
+ x2 = float(row['XMax'])
127
+ y1 = float(row['YMin'])
128
+ y2 = float(row['YMax'])
129
+
130
+ x1_int = int(round(x1 * width))
131
+ x2_int = int(round(x2 * width))
132
+ y1_int = int(round(y1 * height))
133
+ y2_int = int(round(y2 * height))
134
+
135
+ # Check that the bounding box is valid.
136
+ if x2 <= x1:
137
+ raise ValueError('line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1))
138
+ if y2 <= y1:
139
+ raise ValueError('line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1))
140
+
141
+ if y2_int == y1_int:
142
+ warnings.warn('filtering line {}: rounding y2 ({}) and y1 ({}) makes them equal'.format(line, y2, y1))
143
+ continue
144
+
145
+ if x2_int == x1_int:
146
+ warnings.warn('filtering line {}: rounding x2 ({}) and x1 ({}) makes them equal'.format(line, x2, x1))
147
+ continue
148
+
149
+ img_id = row['ImageID']
150
+ annotation = {'cls_id': cls_id, 'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2}
151
+
152
+ if img_id in id_annotations:
153
+ annotations = id_annotations[img_id]
154
+ annotations['boxes'].append(annotation)
155
+ else:
156
+ id_annotations[img_id] = {'w': width, 'h': height, 'boxes': [annotation]}
157
+ return id_annotations
158
+
159
+
160
+ class OidDataset(Dataset):
161
+ """Oid dataset."""
162
+
163
+ def __init__(self, main_dir, subset, version='v4', annotation_cache_dir='.', transform=None):
164
+ if version == 'v4':
165
+ metadata = '2018_04'
166
+ elif version == 'challenge2018':
167
+ metadata = 'challenge2018'
168
+ elif version == 'v3':
169
+ metadata = '2017_11'
170
+ else:
171
+ raise NotImplementedError('There is currently no implementation for versions older than v3')
172
+
173
+ self.transform = transform
174
+
175
+ if version == 'challenge2018':
176
+ self.base_dir = os.path.join(main_dir, 'images', 'train')
177
+ else:
178
+ self.base_dir = os.path.join(main_dir, 'images', subset)
179
+
180
+ metadata_dir = os.path.join(main_dir, metadata)
181
+ annotation_cache_json = os.path.join(annotation_cache_dir, subset + '.json')
182
+
183
+ self.id_to_labels, cls_index = get_labels(metadata_dir, version=version)
184
+
185
+ if os.path.exists(annotation_cache_json):
186
+ with open(annotation_cache_json, 'r') as f:
187
+ self.annotations = json.loads(f.read())
188
+ else:
189
+ self.annotations = generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index,
190
+ version=version)
191
+ json.dump(self.annotations, open(annotation_cache_json, "w"))
192
+
193
+ self.id_to_image_id = dict([(i, k) for i, k in enumerate(self.annotations)])
194
+
195
+ # (label -> name)
196
+ self.labels = self.id_to_labels
197
+
198
+ def __len__(self):
199
+ return len(self.annotations)
200
+
201
+ def __getitem__(self, idx):
202
+
203
+ img = self.load_image(idx)
204
+ annot = self.load_annotations(idx)
205
+ sample = {'img': img, 'annot': annot}
206
+ if self.transform:
207
+ sample = self.transform(sample)
208
+
209
+ return sample
210
+
211
+ def image_path(self, image_index):
212
+ path = os.path.join(self.base_dir, self.id_to_image_id[image_index] + '.jpg')
213
+ return path
214
+
215
+ def load_image(self, image_index):
216
+ path = self.image_path(image_index)
217
+ img = skimage.io.imread(path)
218
+
219
+ if len(img.shape) == 1:
220
+ img = img[0]
221
+
222
+ if len(img.shape) == 2:
223
+ img = skimage.color.gray2rgb(img)
224
+
225
+ try:
226
+ return img.astype(np.float32) / 255.0
227
+ except Exception:
228
+ print (path)
229
+ exit(0)
230
+
231
+ def load_annotations(self, image_index):
232
+ # get ground truth annotations
233
+ image_annotations = self.annotations[self.id_to_image_id[image_index]]
234
+
235
+ labels = image_annotations['boxes']
236
+ height, width = image_annotations['h'], image_annotations['w']
237
+
238
+ boxes = np.zeros((len(labels), 5))
239
+ for idx, ann in enumerate(labels):
240
+ cls_id = ann['cls_id']
241
+ x1 = ann['x1'] * width
242
+ x2 = ann['x2'] * width
243
+ y1 = ann['y1'] * height
244
+ y2 = ann['y2'] * height
245
+
246
+ boxes[idx, 0] = x1
247
+ boxes[idx, 1] = y1
248
+ boxes[idx, 2] = x2
249
+ boxes[idx, 3] = y2
250
+ boxes[idx, 4] = cls_id
251
+
252
+ return boxes
253
+
254
+ def image_aspect_ratio(self, image_index):
255
+ img_annotations = self.annotations[self.id_to_image_id[image_index]]
256
+ height, width = img_annotations['h'], img_annotations['w']
257
+ return float(width) / float(height)
258
+
259
+ def num_classes(self):
260
+ return len(self.id_to_labels)
retinanet/utils.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import numpy as np
4
+
5
+
6
+ def conv3x3(in_planes, out_planes, stride=1):
7
+ """3x3 convolution with padding"""
8
+ return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
9
+ padding=1, bias=False)
10
+
11
+
12
+ class BasicBlock(nn.Module):
13
+ expansion = 1
14
+
15
+ def __init__(self, inplanes, planes, stride=1, downsample=None):
16
+ super(BasicBlock, self).__init__()
17
+ self.conv1 = conv3x3(inplanes, planes, stride)
18
+ self.bn1 = nn.BatchNorm2d(planes)
19
+ self.relu = nn.ReLU(inplace=True)
20
+ self.conv2 = conv3x3(planes, planes)
21
+ self.bn2 = nn.BatchNorm2d(planes)
22
+ self.downsample = downsample
23
+ self.stride = stride
24
+
25
+ def forward(self, x):
26
+ residual = x
27
+
28
+ out = self.conv1(x)
29
+ out = self.bn1(out)
30
+ out = self.relu(out)
31
+
32
+ out = self.conv2(out)
33
+ out = self.bn2(out)
34
+
35
+ if self.downsample is not None:
36
+ residual = self.downsample(x)
37
+
38
+ out += residual
39
+ out = self.relu(out)
40
+
41
+ return out
42
+
43
+
44
+ class Bottleneck(nn.Module):
45
+ expansion = 4
46
+
47
+ def __init__(self, inplanes, planes, stride=1, downsample=None):
48
+ super(Bottleneck, self).__init__()
49
+ self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
50
+ self.bn1 = nn.BatchNorm2d(planes)
51
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
52
+ padding=1, bias=False)
53
+ self.bn2 = nn.BatchNorm2d(planes)
54
+ self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
55
+ self.bn3 = nn.BatchNorm2d(planes * 4)
56
+ self.relu = nn.ReLU(inplace=True)
57
+ self.downsample = downsample
58
+ self.stride = stride
59
+
60
+ def forward(self, x):
61
+ residual = x
62
+
63
+ out = self.conv1(x)
64
+ out = self.bn1(out)
65
+ out = self.relu(out)
66
+
67
+ out = self.conv2(out)
68
+ out = self.bn2(out)
69
+ out = self.relu(out)
70
+
71
+ out = self.conv3(out)
72
+ out = self.bn3(out)
73
+
74
+ if self.downsample is not None:
75
+ residual = self.downsample(x)
76
+
77
+ out += residual
78
+ out = self.relu(out)
79
+
80
+ return out
81
+
82
+ class BBoxTransform(nn.Module):
83
+
84
+ def __init__(self, mean=None, std=None):
85
+ super(BBoxTransform, self).__init__()
86
+ if mean is None:
87
+ if torch.cuda.is_available():
88
+ self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32)).cuda()
89
+ else:
90
+ self.mean = torch.from_numpy(np.array([0, 0, 0, 0]).astype(np.float32))
91
+
92
+ else:
93
+ self.mean = mean
94
+ if std is None:
95
+ if torch.cuda.is_available():
96
+ self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32)).cuda()
97
+ else:
98
+ self.std = torch.from_numpy(np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32))
99
+ else:
100
+ self.std = std
101
+
102
+ def forward(self, boxes, deltas):
103
+
104
+ widths = boxes[:, :, 2] - boxes[:, :, 0]
105
+ heights = boxes[:, :, 3] - boxes[:, :, 1]
106
+ ctr_x = boxes[:, :, 0] + 0.5 * widths
107
+ ctr_y = boxes[:, :, 1] + 0.5 * heights
108
+
109
+ dx = deltas[:, :, 0] * self.std[0] + self.mean[0]
110
+ dy = deltas[:, :, 1] * self.std[1] + self.mean[1]
111
+ dw = deltas[:, :, 2] * self.std[2] + self.mean[2]
112
+ dh = deltas[:, :, 3] * self.std[3] + self.mean[3]
113
+
114
+ pred_ctr_x = ctr_x + dx * widths
115
+ pred_ctr_y = ctr_y + dy * heights
116
+ pred_w = torch.exp(dw) * widths
117
+ pred_h = torch.exp(dh) * heights
118
+
119
+ pred_boxes_x1 = pred_ctr_x - 0.5 * pred_w
120
+ pred_boxes_y1 = pred_ctr_y - 0.5 * pred_h
121
+ pred_boxes_x2 = pred_ctr_x + 0.5 * pred_w
122
+ pred_boxes_y2 = pred_ctr_y + 0.5 * pred_h
123
+
124
+ pred_boxes = torch.stack([pred_boxes_x1, pred_boxes_y1, pred_boxes_x2, pred_boxes_y2], dim=2)
125
+
126
+ return pred_boxes
127
+
128
+
129
+ class ClipBoxes(nn.Module):
130
+
131
+ def __init__(self, width=None, height=None):
132
+ super(ClipBoxes, self).__init__()
133
+
134
+ def forward(self, boxes, img):
135
+
136
+ batch_size, num_channels, height, width = img.shape
137
+
138
+ boxes[:, :, 0] = torch.clamp(boxes[:, :, 0], min=0)
139
+ boxes[:, :, 1] = torch.clamp(boxes[:, :, 1], min=0)
140
+
141
+ boxes[:, :, 2] = torch.clamp(boxes[:, :, 2], max=width)
142
+ boxes[:, :, 3] = torch.clamp(boxes[:, :, 3], max=height)
143
+
144
+ return boxes