Spaces:

RashiAgarwal
/

YOLO_v3_PyTorchLightning

Paused

App Files Files Community

YOLO_v3_PyTorchLightning / dataset.py

RashiAgarwal

Upload 2 files

5e987a7 over 1 year ago

raw

history blame

11.3 kB

	"""
	Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets
	"""

	import config
	import numpy as np
	import os
	import pandas as pd
	import torch
	from utils import xywhn2xyxy, xyxy2xywhn
	import random

	from PIL import Image, ImageFile
	from torch.utils.data import Dataset, DataLoader
	from utils import (
	cells_to_bboxes,
	iou_width_height as iou,
	non_max_suppression as nms,
	plot_image
	)

	ImageFile.LOAD_TRUNCATED_IMAGES = True

	class YOLODataset(Dataset):
	def __init__(
	self,
	csv_file,
	img_dir,
	label_dir,
	anchors,
	image_size=416,
	S=[13, 26, 52],
	C=20,
	transform=None,
	):
	self.annotations = pd.read_csv(csv_file)
	self.img_dir = img_dir
	self.label_dir = label_dir
	self.image_size = image_size
	self.mosaic_border = [image_size // 2, image_size // 2]
	self.transform = transform
	self.S = S
	self.anchors = torch.tensor(anchors[0] + anchors[1] + anchors[2]) # for all 3 scales
	self.num_anchors = self.anchors.shape[0]
	self.num_anchors_per_scale = self.num_anchors // 3
	self.C = C
	self.ignore_iou_thresh = 0.5

	def __len__(self):
	return len(self.annotations)

	def load_image(self, index):

	label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])

	# Load data from the file
	data = np.loadtxt(fname=label_path,delimiter=" ", ndmin=2)

	# Shift the values in each row by 4 positions to the right
	shifted_data = np.roll(data, 4, axis=1)

	# Convert the shifted data to a Python list
	bboxes = shifted_data.tolist()

	img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])

	image = np.array(Image.open(img_path).convert("RGB"))

	return image, bboxes

	def load_mosaic(self, index, p=0.75):
	''' loading mosaic augmentation for only 75% times '''

	k = np.random.rand(1)
	if k > p:

	return self.load_image(index)

	# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
	labels4 = []
	s = self.image_size
	yc, xc = (int(random.uniform(x, 2 * s - x)) for x in self.mosaic_border) # mosaic center x, y
	indices = [index] + random.choices(range(len(self)), k=3) # 3 additional image indices
	random.shuffle(indices)
	for i, index in enumerate(indices):
	# Load image
	label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
	bboxes = np.roll(np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1).tolist()
	img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
	img = np.array(Image.open(img_path).convert("RGB"))


	h, w = img.shape[0], img.shape[1]
	labels = np.array(bboxes)

	# place img in img4
	if i == 0: # top left
	img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
	x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
	x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
	elif i == 1: # top right
	x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
	x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
	elif i == 2: # bottom left
	x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
	x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
	elif i == 3: # bottom right
	x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
	x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

	img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
	padw = x1a - x1b
	padh = y1a - y1b

	# Labels
	if labels.size:
	labels[:, :-1] = xywhn2xyxy(labels[:, :-1], w, h, padw, padh) # normalized xywh to pixel xyxy format
	labels4.append(labels)

	# Concat/clip labels
	labels4 = np.concatenate(labels4, 0)
	for x in (labels4[:, :-1],):
	np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
	# img4, labels4 = replicate(img4, labels4) # replicate
	labels4[:, :-1] = xyxy2xywhn(labels4[:, :-1], 2 * s, 2 * s)
	labels4[:, :-1] = np.clip(labels4[:, :-1], 0, 1)
	labels4 = labels4[labels4[:, 2] > 0]
	labels4 = labels4[labels4[:, 3] > 0]
	return img4, labels4

	def __getitem__(self, index):

	# k = np.random.rand(1)
	# if k >= 0.75:

	# image, (h0, w0), (h, w) = load_image(self, index)

	# # Letterbox
	# shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
	# image, ratio, pad = letterbox(image, shape, auto=False, scaleup=self.augment)
	# shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling

	# # Load labels
	# bboxes = []
	# x = self.bboxes[index]
	# if x is not None and x.size > 0:
	# # Normalized xywh to pixel xyxy format
	# bboxes = x.copy()
	# bboxes[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
	# bboxes[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
	# bboxes[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
	# bboxes[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]


	# else:
	image, bboxes = self.load_mosaic(index)

	if self.transform:
	augmentations = self.transform(image=image, bboxes=bboxes)
	image = augmentations["image"]
	bboxes = augmentations["bboxes"]

	# Below assumes 3 scale predictions (as paper) and same num of anchors per scale
	targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
	for box in bboxes:
	iou_anchors = iou(torch.tensor(box[2:4]), self.anchors)
	anchor_indices = iou_anchors.argsort(descending=True, dim=0)
	x, y, width, height, class_label = box
	has_anchor = [False] * 3 # each scale should have one anchor
	for anchor_idx in anchor_indices:
	scale_idx = anchor_idx // self.num_anchors_per_scale
	anchor_on_scale = anchor_idx % self.num_anchors_per_scale
	S = self.S[scale_idx]
	i, j = int(S * y), int(S * x) # which cell
	anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
	if not anchor_taken and not has_anchor[scale_idx]:
	targets[scale_idx][anchor_on_scale, i, j, 0] = 1
	x_cell, y_cell = S * x - j, S * y - i # both between [0,1]
	width_cell, height_cell = (
	width * S,
	height * S,
	) # can be greater than 1 since it's relative to cell
	box_coordinates = torch.tensor(
	[x_cell, y_cell, width_cell, height_cell]
	)
	targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
	targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
	has_anchor[scale_idx] = True

	elif not anchor_taken and iou_anchors[anchor_idx] > self.ignore_iou_thresh:
	targets[scale_idx][anchor_on_scale, i, j, 0] = -1 # ignore prediction

	return image, tuple(targets)

	def load_image(self, index):
	# loads 1 image from dataset, returns img, original hw, resized hw
	img = self.imgs[index]
	if img is None: # not cached
	img_path = self.img_files[index]
	img = cv2.imread(img_path) # BGR
	assert img is not None, 'Image Not Found ' + img_path
	h0, w0 = img.shape[:2] # orig hw
	r = self.img_size / max(h0, w0) # resize image to img_size
	if r < 1 or (self.augment and r != 1): # always resize down, only resize up if training with augmentation
	interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
	img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
	return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized
	else:
	return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized

	def letterbox(img, new_shape=(416, 416), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
	# Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
	shape = img.shape[:2] # current shape [height, width]
	if isinstance(new_shape, int):
	new_shape = (new_shape, new_shape)

	# Scale ratio (new / old)
	r = max(new_shape) / max(shape)
	if not scaleup: # only scale down, do not scale up (for better test mAP)
	r = min(r, 1.0)

	# Compute padding
	ratio = r, r # width, height ratios
	new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
	dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
	if auto: # minimum rectangle
	dw, dh = np.mod(dw, 64), np.mod(dh, 64) # wh padding
	elif scaleFill: # stretch
	dw, dh = 0.0, 0.0
	new_unpad = new_shape
	ratio = new_shape[0] / shape[1], new_shape[1] / shape[0] # width, height ratios

	dw /= 2 # divide padding into 2 sides
	dh /= 2

	if shape[::-1] != new_unpad: # resize
	img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
	top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
	left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
	img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
	return img, ratio, (dw, dh)

	def test():
	anchors = config.ANCHORS

	transform = config.test_transforms

	dataset = YOLODataset(
	"COCO/train.csv",
	"COCO/images/images/",
	"COCO/labels/labels_new/",
	S=[13, 26, 52],
	anchors=anchors,
	transform=transform,
	)
	S = [13, 26, 52]
	scaled_anchors = torch.tensor(anchors) / (
	1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
	)
	loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
	for x, y in loader:
	boxes = []

	for i in range(y[0].shape[1]):
	anchor = scaled_anchors[i]
	print(anchor.shape)
	print(y[i].shape)
	boxes += cells_to_bboxes(
	y[i], is_preds=False, S=y[i].shape[2], anchors=anchor
	)[0]
	boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint")
	print(boxes)
	plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)


	if __name__ == "__main__":
	test()