""" Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets """ import config import numpy as np import os import pandas as pd import torch from utils import xywhn2xyxy, xyxy2xywhn import random from PIL import Image, ImageFile from torch.utils.data import Dataset, DataLoader from utils import ( cells_to_bboxes, iou_width_height as iou, non_max_suppression as nms, plot_image ) ImageFile.LOAD_TRUNCATED_IMAGES = True class YOLODataset(Dataset): def __init__( self, csv_file, img_dir, label_dir, anchors, image_size=416, S=[13, 26, 52], C=20, transform=None, ): self.annotations = pd.read_csv(csv_file) self.img_dir = img_dir self.label_dir = label_dir self.image_size = image_size self.mosaic_border = [image_size // 2, image_size // 2] self.transform = transform self.S = S self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2]) # for all 3 scales self.num_anchors = self.anchors.shape[0] self.num_anchors_per_scale = self.num_anchors // 3 self.C = C self.ignore_iou_thresh = 0.5 def __len__(self): return len(self.annotations) def load_image(self, index): label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1]) # Load data from the file data = np.loadtxt(fname=label_path,delimiter=" ", ndmin=2) # Shift the values in each row by 4 positions to the right shifted_data = np.roll(data, 4, axis=1) # Convert the shifted data to a Python list bboxes = shifted_data.tolist() img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0]) image = np.array(Image.open(img_path).convert("RGB")) return image, bboxes def load_mosaic(self, index, p=0.75): ''' loading mosaic augmentation for only 75% times ''' k = np.random.rand(1) if k > p: return self.load_image(index) # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic labels4 = [] s = self.image_size yc, xc = (int(random.uniform(x, 2 * s - x)) for x in self.mosaic_border) # mosaic center x, y indices = [index] + random.choices(range(len(self)), k=3) # 3 additional image indices random.shuffle(indices) for i, index in enumerate(indices): # Load image label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1]) bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist() img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0]) img = np.array(Image.open(img_path).convert("RGB")) h, w = img.shape[0], img.shape[1] labels = np.array(bboxes) # place img in img4 if i == 0: # top left img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image) x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image) elif i == 1: # top right x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h elif i == 2: # bottom left x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h) x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h) elif i == 3: # bottom right x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h) x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h) img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax] padw = x1a - x1b padh = y1a - y1b # Labels if labels.size: labels[:, :-1] = xywhn2xyxy(labels[:, :-1], w, h, padw, padh) # normalized xywh to pixel xyxy format labels4.append(labels) # Concat/clip labels labels4 = np.concatenate(labels4, 0) for x in (labels4[:, :-1],): np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective() # img4, labels4 = replicate(img4, labels4) # replicate labels4[:, :-1] = xyxy2xywhn(labels4[:, :-1], 2 * s, 2 * s) labels4[:, :-1] = np.clip(labels4[:, :-1], 0, 1) labels4 = labels4[labels4[:, 2] > 0] labels4 = labels4[labels4[:, 3] > 0] return img4, labels4 def __getitem__(self, index): # k = np.random.rand(1) # if k >= 0.75: # image, (h0, w0), (h, w) = load_image(self, index) # # Letterbox # shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape # image, ratio, pad = letterbox(image, shape, auto=False, scaleup=self.augment) # shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling # # Load labels # bboxes = [] # x = self.bboxes[index] # if x is not None and x.size > 0: # # Normalized xywh to pixel xyxy format # bboxes = x.copy() # bboxes[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width # bboxes[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height # bboxes[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0] # bboxes[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1] # else: image, bboxes = self.load_mosaic(index) if self.transform: augmentations = self.transform(image=image, bboxes=bboxes) image = augmentations["image"] bboxes = augmentations["bboxes"] # Below assumes 3 scale predictions (as paper) and same num of anchors per scale targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S] for box in bboxes: iou_anchors = iou(torch.tensor(box[2:4]), self.anchors) anchor_indices = iou_anchors.argsort(descending=True, dim=0) x, y, width, height, class_label = box has_anchor = [False] * 3 # each scale should have one anchor for anchor_idx in anchor_indices: scale_idx = anchor_idx // self.num_anchors_per_scale anchor_on_scale = anchor_idx % self.num_anchors_per_scale S = self.S[scale_idx] i, j = int(S * y), int(S * x) # which cell anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0] if not anchor_taken and not has_anchor[scale_idx]: targets[scale_idx][anchor_on_scale, i, j, 0] = 1 x_cell, y_cell = S * x - j, S * y - i # both between [0,1] width_cell, height_cell = ( width * S, height * S, ) # can be greater than 1 since it's relative to cell box_coordinates = torch.tensor( [x_cell, y_cell, width_cell, height_cell] ) targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label) has_anchor[scale_idx] = True elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh: targets[scale_idx][anchor_on_scale, i, j, 0] = -1 # ignore prediction return image, tuple(targets) def load_image(self, index): # loads 1 image from dataset, returns img, original hw, resized hw img = self.imgs[index] if img is None: # not cached img_path = self.img_files[index] img = cv2.imread(img_path) # BGR assert img is not None, 'Image Not Found ' + img_path h0, w0 = img.shape[:2] # orig hw r = self.img_size / max(h0, w0) # resize image to img_size if r < 1 or (self.augment and r != 1): # always resize down, only resize up if training with augmentation interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp) return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized else: return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True): # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232 shape = img.shape[:2] # current shape [height, width] if isinstance(new_shape, int): new_shape = (new_shape, new_shape) # Scale ratio (new / old) r = max(new_shape) / max(shape) if not scaleup: # only scale down, do not scale up (for better test mAP) r = min(r, 1.0) # Compute padding ratio = r, r # width, height ratios new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding if auto: # minimum rectangle dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding elif scaleFill: # stretch dw, dh = 0.0, 0.0 new_unpad = new_shape ratio = new_shape[0] / shape[1], new_shape[1] / shape[0] # width, height ratios dw /= 2 # divide padding into 2 sides dh /= 2 if shape[::-1] != new_unpad: # resize img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR) top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1)) left, right = int(round(dw - 0.1)), int(round(dw + 0.1)) img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border return img, ratio, (dw, dh) def test(): anchors = config.ANCHORS transform = config.test_transforms dataset = YOLODataset( "COCO/train.csv", "COCO/images/images/", "COCO/labels/labels_new/", S=[13, 26, 52], anchors=anchors, transform=transform, ) S = [13, 26, 52] scaled_anchors = torch.tensor(anchors) / ( 1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2) ) loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True) for x, y in loader: boxes = [] for i in range(y[0].shape[1]): anchor = scaled_anchors[i] print(anchor.shape) print(y[i].shape) boxes += cells_to_bboxes( y[i], is_preds=False, S=y[i].shape[2], anchors=anchor )[0] boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint") print(boxes) plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes) if __name__ == "__main__": test()