Files Commit
Browse files- .gitignore +3 -0
- Jupyternote Cheatsheet.ipynb +1 -0
- PyTorch_Object_Detection.ipynb +0 -0
- PyTorch_Object_Tracking.ipynb +1 -0
- __pycache__/models.cpython-37.pyc +0 -0
- __pycache__/sort.cpython-37.pyc +0 -0
- darknet-coco-object_detection.ipynb +0 -0
- +350 -0
- +110 -0
- +305 -0
- +317 -0
- utils/__pycache__/__init__.cpython-36.pyc +0 -0
- utils/__pycache__/datasets.cpython-36.pyc +0 -0
- utils/__pycache__/parse_config.cpython-36.pyc +0 -0
- utils/__pycache__/parse_config.cpython-37.pyc +0 -0
- utils/__pycache__/utils.cpython-36.pyc +0 -0
- utils/__pycache__/utils.cpython-37.pyc +0 -0
- utils/ +121 -0
- utils/ +36 -0
- utils/ +258 -0
@@ -0,0 +1,3 @@
1 |
2 |
3 |
Jupyternote Cheatsheet.ipynb
@@ -0,0 +1 @@
1 |
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Jupyternote Cheatsheet.ipynb","provenance":[],"mount_file_id":"1rMSETYdooFC6fVgT0PaOovnBrB4ZWoys","authorship_tag":"ABX9TyN4O59ZYPVT0rGiUB3bfznT"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["# Models"],"metadata":{"id":"ODx9TIOB4tCe"}},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"BelRHeLw4qyQ","executionInfo":{"status":"ok","timestamp":1654537166220,"user_tz":-60,"elapsed":22,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"60695f20-3957-4958-aabd-c2ecff870977"},"outputs":[{"output_type":"stream","name":"stdout","text":["Writing\n"]}],"source":["%%writefile\n","from __future__ import division\n","\n","import torch\n","import torch.nn as nn\n","import torch.nn.functional as F\n","from torch.autograd import Variable\n","import numpy as np\n","\n","from PIL import Image\n","\n","from utils.parse_config import *\n","from utils.utils import build_targets\n","from collections import defaultdict\n","\n","##import matplotlib.pyplot as plt\n","##import matplotlib.patches as patches\n","\n","\n","def create_modules(module_defs):\n"," \"\"\"\n"," Constructs module list of layer blocks from module configuration in module_defs\n"," \"\"\"\n"," hyperparams = module_defs.pop(0)\n"," output_filters = [int(hyperparams[\"channels\"])]\n"," module_list = nn.ModuleList()\n"," for i, module_def in enumerate(module_defs):\n"," modules = nn.Sequential()\n","\n"," if module_def[\"type\"] == \"convolutional\":\n"," bn = int(module_def[\"batch_normalize\"])\n"," filters = int(module_def[\"filters\"])\n"," kernel_size = int(module_def[\"size\"])\n"," pad = (kernel_size - 1) // 2 if int(module_def[\"pad\"]) else 0\n"," modules.add_module(\n"," \"conv_%d\" % i,\n"," nn.Conv2d(\n"," in_channels=output_filters[-1],\n"," out_channels=filters,\n"," kernel_size=kernel_size,\n"," stride=int(module_def[\"stride\"]),\n"," padding=pad,\n"," bias=not bn,\n"," ),\n"," )\n"," if bn:\n"," modules.add_module(\"batch_norm_%d\" % i, nn.BatchNorm2d(filters))\n"," if module_def[\"activation\"] == \"leaky\":\n"," modules.add_module(\"leaky_%d\" % i, nn.LeakyReLU(0.1))\n","\n"," elif module_def[\"type\"] == \"maxpool\":\n"," kernel_size = int(module_def[\"size\"])\n"," stride = int(module_def[\"stride\"])\n"," if kernel_size == 2 and stride == 1:\n"," padding = nn.ZeroPad2d((0, 1, 0, 1))\n"," modules.add_module(\"_debug_padding_%d\" % i, padding)\n"," maxpool = nn.MaxPool2d(\n"," kernel_size=int(module_def[\"size\"]),\n"," stride=int(module_def[\"stride\"]),\n"," padding=int((kernel_size - 1) // 2),\n"," )\n"," modules.add_module(\"maxpool_%d\" % i, maxpool)\n","\n"," elif module_def[\"type\"] == \"upsample\":\n"," upsample = nn.Upsample(scale_factor=int(module_def[\"stride\"]), mode=\"nearest\")\n"," modules.add_module(\"upsample_%d\" % i, upsample)\n","\n"," elif module_def[\"type\"] == \"route\":\n"," layers = [int(x) for x in module_def[\"layers\"].split(\",\")]\n"," filters = sum([output_filters[layer_i] for layer_i in layers])\n"," modules.add_module(\"route_%d\" % i, EmptyLayer())\n","\n"," elif module_def[\"type\"] == \"shortcut\":\n"," filters = output_filters[int(module_def[\"from\"])]\n"," modules.add_module(\"shortcut_%d\" % i, EmptyLayer())\n","\n"," elif module_def[\"type\"] == \"yolo\":\n"," anchor_idxs = [int(x) for x in module_def[\"mask\"].split(\",\")]\n"," # Extract anchors\n"," anchors = [int(x) for x in module_def[\"anchors\"].split(\",\")]\n"," anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]\n"," anchors = [anchors[i] for i in anchor_idxs]\n"," num_classes = int(module_def[\"classes\"])\n"," img_height = int(hyperparams[\"height\"])\n"," # Define detection layer\n"," yolo_layer = YOLOLayer(anchors, num_classes, img_height)\n"," modules.add_module(\"yolo_%d\" % i, yolo_layer)\n"," # Register module list and number of output filters\n"," module_list.append(modules)\n"," output_filters.append(filters)\n","\n"," return hyperparams, module_list\n","\n","\n","class EmptyLayer(nn.Module):\n"," \"\"\"Placeholder for 'route' and 'shortcut' layers\"\"\"\n","\n"," def __init__(self):\n"," super(EmptyLayer, self).__init__()\n","\n","\n","class YOLOLayer(nn.Module):\n"," \"\"\"Detection layer\"\"\"\n","\n"," def __init__(self, anchors, num_classes, img_dim):\n"," super(YOLOLayer, self).__init__()\n"," self.anchors = anchors\n"," self.num_anchors = len(anchors)\n"," self.num_classes = num_classes\n"," self.bbox_attrs = 5 + num_classes\n"," self.image_dim = img_dim\n"," self.ignore_thres = 0.5\n"," self.lambda_coord = 1\n","\n"," self.mse_loss = nn.MSELoss(size_average=True) # Coordinate loss\n"," self.bce_loss = nn.BCELoss(size_average=True) # Confidence loss\n"," self.ce_loss = nn.CrossEntropyLoss() # Class loss\n","\n"," def forward(self, x, targets=None):\n"," nA = self.num_anchors\n"," nB = x.size(0)\n"," nG = x.size(2)\n"," stride = self.image_dim / nG\n","\n"," # Tensors for cuda support\n"," FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor\n"," LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor\n"," ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor\n","\n"," prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()\n","\n"," # Get outputs\n"," x = torch.sigmoid(prediction[..., 0]) # Center x\n"," y = torch.sigmoid(prediction[..., 1]) # Center y\n"," w = prediction[..., 2] # Width\n"," h = prediction[..., 3] # Height\n"," pred_conf = torch.sigmoid(prediction[..., 4]) # Conf\n"," pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.\n","\n"," # Calculate offsets for each grid\n"," grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)\n"," grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)\n"," scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])\n"," anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))\n"," anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))\n","\n"," # Add offset and scale with anchors\n"," pred_boxes = FloatTensor(prediction[..., :4].shape)\n"," pred_boxes[..., 0] = + grid_x\n"," pred_boxes[..., 1] = + grid_y\n"," pred_boxes[..., 2] = torch.exp( * anchor_w\n"," pred_boxes[..., 3] = torch.exp( * anchor_h\n","\n"," # Training\n"," if targets is not None:\n","\n"," if x.is_cuda:\n"," self.mse_loss = self.mse_loss.cuda()\n"," self.bce_loss = self.bce_loss.cuda()\n"," self.ce_loss = self.ce_loss.cuda()\n","\n"," nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(\n"," pred_boxes=pred_boxes.cpu().data,\n"," pred_conf=pred_conf.cpu().data,\n"," pred_cls=pred_cls.cpu().data,\n"," target=targets.cpu().data,\n"," anchors=scaled_anchors.cpu().data,\n"," num_anchors=nA,\n"," num_classes=self.num_classes,\n"," grid_size=nG,\n"," ignore_thres=self.ignore_thres,\n"," img_dim=self.image_dim,\n"," )\n","\n"," nProposals = int((pred_conf > 0.5).sum().item())\n"," recall = float(nCorrect / nGT) if nGT else 1\n"," precision = float(nCorrect / nProposals)\n","\n"," # Handle masks\n"," mask = Variable(mask.type(ByteTensor))\n"," conf_mask = Variable(conf_mask.type(ByteTensor))\n","\n"," # Handle target variables\n"," tx = Variable(tx.type(FloatTensor), requires_grad=False)\n"," ty = Variable(ty.type(FloatTensor), requires_grad=False)\n"," tw = Variable(tw.type(FloatTensor), requires_grad=False)\n"," th = Variable(th.type(FloatTensor), requires_grad=False)\n"," tconf = Variable(tconf.type(FloatTensor), requires_grad=False)\n"," tcls = Variable(tcls.type(LongTensor), requires_grad=False)\n","\n"," # Get conf mask where gt and where there is no gt\n"," conf_mask_true = mask\n"," conf_mask_false = conf_mask - mask\n","\n"," # Mask outputs to ignore non-existing objects\n"," loss_x = self.mse_loss(x[mask], tx[mask])\n"," loss_y = self.mse_loss(y[mask], ty[mask])\n"," loss_w = self.mse_loss(w[mask], tw[mask])\n"," loss_h = self.mse_loss(h[mask], th[mask])\n"," loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(\n"," pred_conf[conf_mask_true], tconf[conf_mask_true]\n"," )\n"," loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))\n"," loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls\n","\n"," return (\n"," loss,\n"," loss_x.item(),\n"," loss_y.item(),\n"," loss_w.item(),\n"," loss_h.item(),\n"," loss_conf.item(),\n"," loss_cls.item(),\n"," recall,\n"," precision,\n"," )\n","\n"," else:\n"," # If not in training phase return predictions\n"," output =\n"," (\n"," pred_boxes.view(nB, -1, 4) * stride,\n"," pred_conf.view(nB, -1, 1),\n"," pred_cls.view(nB, -1, self.num_classes),\n"," ),\n"," -1,\n"," )\n"," return output\n","\n","\n","class Darknet(nn.Module):\n"," \"\"\"YOLOv3 object detection model\"\"\"\n","\n"," def __init__(self, config_path, img_size=416):\n"," super(Darknet, self).__init__()\n"," self.module_defs = parse_model_config(config_path)\n"," self.hyperparams, self.module_list = create_modules(self.module_defs)\n"," self.img_size = img_size\n"," self.seen = 0\n"," self.header_info = np.array([0, 0, 0, self.seen, 0])\n"," self.loss_names = [\"x\", \"y\", \"w\", \"h\", \"conf\", \"cls\", \"recall\", \"precision\"]\n","\n"," def forward(self, x, targets=None):\n"," is_training = targets is not None\n"," output = []\n"," self.losses = defaultdict(float)\n"," layer_outputs = []\n"," for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):\n"," if module_def[\"type\"] in [\"convolutional\", \"upsample\", \"maxpool\"]:\n"," x = module(x)\n"," elif module_def[\"type\"] == \"route\":\n"," layer_i = [int(x) for x in module_def[\"layers\"].split(\",\")]\n"," x =[layer_outputs[i] for i in layer_i], 1)\n"," elif module_def[\"type\"] == \"shortcut\":\n"," layer_i = int(module_def[\"from\"])\n"," x = layer_outputs[-1] + layer_outputs[layer_i]\n"," elif module_def[\"type\"] == \"yolo\":\n"," # Train phase: get loss\n"," if is_training:\n"," x, *losses = module[0](x, targets)\n"," for name, loss in zip(self.loss_names, losses):\n"," self.losses[name] += loss\n"," # Test phase: Get detections\n"," else:\n"," x = module(x)\n"," output.append(x)\n"," layer_outputs.append(x)\n","\n"," self.losses[\"recall\"] /= 3\n"," self.losses[\"precision\"] /= 3\n"," return sum(output) if is_training else, 1)\n","\n"," def load_weights(self, weights_path):\n"," \"\"\"Parses and loads the weights stored in 'weights_path'\"\"\"\n","\n"," # Open the weights file\n"," fp = open(weights_path, \"rb\")\n"," header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values\n","\n"," # Needed to write header when saving weights\n"," self.header_info = header\n","\n"," self.seen = header[3]\n"," weights = np.fromfile(fp, dtype=np.float32) # The rest are weights\n"," fp.close()\n","\n"," ptr = 0\n"," for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):\n"," if module_def[\"type\"] == \"convolutional\":\n"," conv_layer = module[0]\n"," if module_def[\"batch_normalize\"]:\n"," # Load BN bias, weights, running mean and running variance\n"," bn_layer = module[1]\n"," num_b = bn_layer.bias.numel() # Number of biases\n"," # Bias\n"," bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)\n","\n"," ptr += num_b\n"," # Weight\n"," bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)\n","\n"," ptr += num_b\n"," # Running Mean\n"," bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)\n","\n"," ptr += num_b\n"," # Running Var\n"," bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)\n","\n"," ptr += num_b\n"," else:\n"," # Load conv. bias\n"," num_b = conv_layer.bias.numel()\n"," conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)\n","\n"," ptr += num_b\n"," # Load conv. weights\n"," num_w = conv_layer.weight.numel()\n"," conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)\n","\n"," ptr += num_w\n","\n"," \"\"\"\n"," @:param path - path of the new weights file\n"," @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)\n"," \"\"\"\n","\n"," def save_weights(self, path, cutoff=-1):\n","\n"," fp = open(path, \"wb\")\n"," self.header_info[3] = self.seen\n"," self.header_info.tofile(fp)\n","\n"," # Iterate through layers\n"," for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):\n"," if module_def[\"type\"] == \"convolutional\":\n"," conv_layer = module[0]\n"," # If batch norm, load bn first\n"," if module_def[\"batch_normalize\"]:\n"," bn_layer = module[1]\n","\n","\n","\n","\n"," # Load conv bias\n"," else:\n","\n"," # Load conv weights\n","\n","\n"," fp.close()"]},{"cell_type":"code","source":["!ls"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ar8FuY3z43Fk","executionInfo":{"status":"ok","timestamp":1654537174809,"user_tz":-60,"elapsed":16,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"ce227d02-75a3-477d-becf-e1c2702c7001"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":[" sample_data\n"]}]},{"cell_type":"code","source":["!pwd"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hRxa6vyoGbla","executionInfo":{"status":"ok","timestamp":1654537258168,"user_tz":-60,"elapsed":26,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"ccaaf1dc-6769-4093-8769-c8aa3b809bdf"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["/content\n"]}]},{"cell_type":"code","source":["%%writefile\n","Are you for real!!"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"cVKDwgGtGv7g","executionInfo":{"status":"ok","timestamp":1654537404197,"user_tz":-60,"elapsed":21,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"41cdc392-059d-42be-b267-2a7f66d0a1f6"},"execution_count":4,"outputs":[{"output_type":"stream","name":"stdout","text":["Overwriting\n"]}]},{"cell_type":"code","source":["%cd Computer Vision"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"780vJiykHTmT","executionInfo":{"status":"ok","timestamp":1654537643123,"user_tz":-60,"elapsed":16,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"159eb128-2a7a-41b3-b84c-7d517ff92454"},"execution_count":14,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision\n"]}]},{"cell_type":"code","source":["!pwd"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"WeA417NzHe0W","executionInfo":{"status":"ok","timestamp":1654537646111,"user_tz":-60,"elapsed":408,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"036a3c8e-b106-46a8-b5de-b7adf66938ab"},"execution_count":15,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision\n"]}]},{"cell_type":"code","source":["%%writefile test.and\n","\n","Really I can now write to my drive!"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"hrSVQd-fHzai","executionInfo":{"status":"ok","timestamp":1654537570112,"user_tz":-60,"elapsed":24,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"c58a5849-aaba-4fe3-c596-681a5e7df731"},"execution_count":10,"outputs":[{"output_type":"stream","name":"stdout","text":["Writing test.and\n"]}]},{"cell_type":"code","source":["!ls"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"jRtg6b1IH8KV","executionInfo":{"status":"ok","timestamp":1654537654214,"user_tz":-60,"elapsed":24,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"dd49447d-6924-4176-f5a9-ca184b671be8"},"execution_count":16,"outputs":[{"output_type":"stream","name":"stdout","text":["cnn-resnet-CIFAR10 darknet-COCO-object_detection feedforward-cnn-MNIST\n"]}]},{"cell_type":"code","source":["%%bash\n","\n","ls -la\n","python --version"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"iUpVW1oZIQnl","executionInfo":{"status":"ok","timestamp":1654537857269,"user_tz":-60,"elapsed":14,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"ff54c93a-9f2c-4453-d82f-c6c1683f61b8"},"execution_count":19,"outputs":[{"output_type":"stream","name":"stdout","text":["total 12\n","drwx------ 2 root root 4096 May 17 21:02 cnn-resnet-CIFAR10\n","drwx------ 2 root root 4096 Jun 6 16:38 darknet-COCO-object_detection\n","drwx------ 2 root root 4096 May 17 21:01 feedforward-cnn-MNIST\n","Python 3.7.13\n"]}]},{"cell_type":"code","source":["%cd ../"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"NJ7riTtCI2-V","executionInfo":{"status":"ok","timestamp":1654537984381,"user_tz":-60,"elapsed":14,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"713f2de8-ae10-46b9-d5e9-bbfa779de2c8"},"execution_count":21,"outputs":[{"output_type":"stream","name":"stdout","text":["/content\n"]}]},{"cell_type":"code","source":["%%bash\n","\n","cd \"drive/MyDrive/Python/Machine Learning\"\n","ls"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ZAOqxQzPJc1k","executionInfo":{"status":"ok","timestamp":1654538084191,"user_tz":-60,"elapsed":14,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"7b82c13f-3e14-47b5-bc12-25bdf0dee540"},"execution_count":25,"outputs":[{"output_type":"stream","name":"stdout","text":["Articles\n","Computer Vision\n","Datasets\n","Deep-Learning-with-PyTorch-Jovian\n","Deep RL\n","FastAI Course\n","Generative Models\n","HuggingFace-Deep-RL\n","PyTorch\n","ZeroToGANS_Revision\n"]}]},{"cell_type":"code","source":["%run"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":235},"id":"HvI6SRX8JsS7","executionInfo":{"status":"ok","timestamp":1654538109961,"user_tz":-60,"elapsed":2355,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"08a28f6a-76c2-4eaa-fa36-36d5a8e145ea"},"execution_count":27,"outputs":[{"output_type":"error","ename":"ModuleNotFoundError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)","\u001b[0;32m/content/\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mPIL\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mImage\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparse_config\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mbuild_targets\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mcollections\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdefaultdict\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'utils'"]}]},{"cell_type":"code","source":["%edit"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"JLbktoGWJvft","executionInfo":{"status":"ok","timestamp":1654538391516,"user_tz":-60,"elapsed":21,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"cce69d99-b879-4600-a9ba-9afb5a58b76a"},"execution_count":29,"outputs":[{"output_type":"stream","name":"stdout","text":["IPython will make a temporary file named: /tmp/ipython_edit_nffqr1eo/\n"]}]},{"cell_type":"code","source":["%load"],"metadata":{"id":"PI_bYsujKQfx","executionInfo":{"status":"ok","timestamp":1654538646656,"user_tz":-60,"elapsed":443,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":31,"outputs":[]},{"cell_type":"code","source":["%%writefile\n","%run\n","%cd\n","%cat\n","%load [-r, -s]\n","%edit\n","%time, %%time\n","%timeit, %%timeit\n","%%html\n","%env, ...\n","%%file, alias for writefile\n","%%bash\n","%matplotlib [inline, ...]\n","and more\n","%paste, %cpaste\n","%pinfo\n","%who\n","%lsmagic\n","%pwd"],"metadata":{"id":"GdCgR_KCL7MK"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["%quickref\n","%%js\n","%%python[2, 3]\n","%%latex\n","%%shell\n","%%svg"],"metadata":{"id":"B4QAAv64NHRW","executionInfo":{"status":"ok","timestamp":1654539235689,"user_tz":-60,"elapsed":445,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":38,"outputs":[]},{"cell_type":"code","source":["%system, %%system\n","%sx, %%sx"],"metadata":{"id":"psD0AZ7YNJBZ"},"execution_count":null,"outputs":[]}]}
The diff for this file is too large to render.
See raw diff
@@ -0,0 +1 @@
1 |
{"cells":[{"cell_type":"code","source":["from google.colab import drive\n","drive.mount('/content/drive')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"1VkPIQMBmJMO","executionInfo":{"status":"ok","timestamp":1654700494173,"user_tz":-60,"elapsed":3080,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"1e3cd91c-ca69-486b-b182-d2f31583b645"},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"]}]},{"cell_type":"code","source":["%cd ./drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"izkEuuuPmTZf","executionInfo":{"status":"ok","timestamp":1654700494174,"user_tz":-60,"elapsed":11,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"daf99ba8-1ed2-4935-e2b9-3481fef9584a"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection\n"]}]},{"cell_type":"code","source":["!pip install filterpy --quiet"],"metadata":{"id":"qXFwvyxqmXDr","executionInfo":{"status":"ok","timestamp":1654700498924,"user_tz":-60,"elapsed":4757,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["!pip install lap --quiet"],"metadata":{"id":"zqK3-Fn2oRsc","executionInfo":{"status":"ok","timestamp":1654700503070,"user_tz":-60,"elapsed":4165,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":4,"outputs":[]},{"cell_type":"code","execution_count":5,"metadata":{"id":"kHwKuAkPlviV","executionInfo":{"status":"ok","timestamp":1654700504310,"user_tz":-60,"elapsed":1248,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"outputs":[],"source":["from models import *\n","from utils import *\n","\n","import os, sys, time, datetime, random\n","import torch\n","from import DataLoader\n","from torchvision import datasets, transforms\n","from torch.autograd import Variable\n","\n","import matplotlib.pyplot as plt\n","import matplotlib.patches as patches\n","from PIL import Image"]},{"cell_type":"code","execution_count":6,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"N5uZwVlClvie","executionInfo":{"status":"ok","timestamp":1654700508098,"user_tz":-60,"elapsed":3795,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"3a3e75b1-3379-4e79-f418-0b8a48ffb62f"},"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/torch/nn/ UserWarning: size_average and reduce args will be deprecated, please use reduction='mean' instead.\n"," warnings.warn(warning.format(ret))\n"]}],"source":["config_path='config/yolov3.cfg'\n","weights_path='config/yolov3.weights'\n","class_path='config/coco.names'\n","img_size=416\n","conf_thres=0.8\n","nms_thres=0.4\n","\n","# Load model and weights\n","model = Darknet(config_path, img_size=img_size)\n","model.load_weights(weights_path)\n","model.cuda()\n","model.eval()\n","classes = utils.load_classes(class_path)\n","Tensor = torch.cuda.FloatTensor"]},{"cell_type":"code","execution_count":7,"metadata":{"id":"n4NNQSOYlvij","executionInfo":{"status":"ok","timestamp":1654700508099,"user_tz":-60,"elapsed":9,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"outputs":[],"source":["def detect_image(img):\n"," # scale and pad image\n"," ratio = min(img_size/img.size[0], img_size/img.size[1])\n"," imw = round(img.size[0] * ratio)\n"," imh = round(img.size[1] * ratio)\n"," img_transforms = transforms.Compose([ transforms.Resize((imh, imw)),\n"," transforms.Pad((max(int((imh-imw)/2),0), max(int((imw-imh)/2),0), max(int((imh-imw)/2),0), max(int((imw-imh)/2),0)),\n"," (128,128,128)),\n"," transforms.ToTensor(),\n"," ])\n"," # convert image to Tensor\n"," image_tensor = img_transforms(img).float()\n"," image_tensor = image_tensor.unsqueeze_(0)\n"," input_img = Variable(image_tensor.type(Tensor))\n"," # run inference on the model and get detections\n"," with torch.no_grad():\n"," detections = model(input_img)\n"," detections = utils.non_max_suppression(detections, 80, conf_thres, nms_thres)\n"," return detections[0]"]},{"cell_type":"code","execution_count":8,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"7VUHsE2-lvik","executionInfo":{"status":"ok","timestamp":1654700521379,"user_tz":-60,"elapsed":13287,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"6144a350-24e9-4a7c-95c5-96bb66b824e0"},"outputs":[{"output_type":"stream","name":"stdout","text":["Populating the interactive namespace from numpy and matplotlib\n"]},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/IPython/core/magics/ UserWarning: pylab import has clobbered these variables: ['random']\n","`%matplotlib` prevents importing * from pylab and numpy\n"," \"\\n`%matplotlib` prevents importing * from pylab and numpy\"\n"]},{"output_type":"stream","name":"stdout","text":["Video size 1280 720\n"]},{"output_type":"stream","name":"stderr","text":["/content/drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection/ NumbaWarning: \n","Compilation is falling back to object mode WITH looplifting enabled because Function \"iou\" failed type inference due to: non-precise type pyobject\n","During: typing of argument at /content/drive/MyDrive/Python/Machine Learning/Computer Vision/darknet-COCO-object_detection/ (43)\n","\n","File \"\", line 43:\n","def iou(bb_test,bb_gt):\n"," <source elided>\n"," \"\"\"\n"," xx1 = np.maximum(bb_test[0], bb_gt[0])\n"," ^\n","\n"," @jit\n","/usr/local/lib/python3.7/dist-packages/numba/core/ NumbaWarning: Function \"iou\" was compiled in object mode without forceobj=True.\n","\n","File \"\", line 39:\n","@jit\n","def iou(bb_test,bb_gt):\n","^\n","\n"," state.func_ir.loc))\n","/usr/local/lib/python3.7/dist-packages/numba/core/ NumbaDeprecationWarning: \n","Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.\n","\n","For more information visit\n","\n","File \"\", line 39:\n","@jit\n","def iou(bb_test,bb_gt):\n","^\n","\n"," state.func_ir.loc))\n"]}],"source":["videopath = './videos/HorseRacing.mp4'\n","\n","%pylab inline \n","import cv2\n","from IPython.display import clear_output\n","\n","cmap = plt.get_cmap('tab20b')\n","colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]\n","\n","# initialize Sort object and video capture\n","from sort import *\n","vid = cv2.VideoCapture(videopath)\n","mot_tracker = Sort()\n","\n","fourcc = cv2.VideoWriter_fourcc(*'XVID')\n","ret,\n","vw = frame.shape[1]\n","vh = frame.shape[0]\n","print (\"Video size\", vw,vh)\n","outvideo = cv2.VideoWriter(videopath.replace(\".mp4\", \"-det.mp4\"),fourcc,20.0,(vw,vh))\n","\n","# while(True):\n","for ii in range(40):\n"," ret, frame =\n"," if not ret:\n"," print(\"Done Procesing Video\")\n"," break\n"," frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n"," pilimg = Image.fromarray(frame)\n"," detections = detect_image(pilimg)\n","\n"," img = np.array(pilimg)\n"," pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape))\n"," pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape))\n"," unpad_h = img_size - pad_y\n"," unpad_w = img_size - pad_x\n"," if detections is not None:\n"," tracked_objects = mot_tracker.update(detections.cpu())\n","\n"," unique_labels = detections[:, -1].cpu().unique()\n"," n_cls_preds = len(unique_labels)\n"," for x1, y1, x2, y2, obj_id, cls_pred in tracked_objects:\n"," box_h = int(((y2 - y1) / unpad_h) * img.shape[0])\n"," box_w = int(((x2 - x1) / unpad_w) * img.shape[1])\n"," y1 = int(((y1 - pad_y // 2) / unpad_h) * img.shape[0])\n"," x1 = int(((x1 - pad_x // 2) / unpad_w) * img.shape[1])\n","\n"," color = colors[int(obj_id) % len(colors)]\n"," color = [i * 255 for i in color]\n"," cls = classes[int(cls_pred)]\n"," cv2.rectangle(frame, (x1, y1), (x1+box_w, y1+box_h), color, 4)\n"," cv2.rectangle(frame, (x1, y1-35), (x1+len(cls)*19+60, y1), color, -1)\n"," cv2.putText(frame, cls + \"-\" + str(int(obj_id)), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3)\n","\n"," outvideo.write(frame)\n","\n","outvideo.release()"]},{"cell_type":"code","source":["from pathlib import Path\n","from IPython import display as ipythondisplay\n","import base64\n","\n","def show_videos(video_path='', prefix=''):\n"," html = []\n"," for mp4 in Path(video_path).glob(f\"{prefix}*.mp4\"):\n"," video_b64 = base64.b64encode(mp4.read_bytes())\n"," html.append('''<video alt=\"{}\" autoplay \n"," loop controls style=\"height: 400px;\">\n"," <source src=\"data:video/mp4;base64,{}\" type=\"video/mp4\" />\n"," </video>'''.format(mp4, video_b64.decode('ascii')))\n"," break\n"," ipythondisplay.display(ipythondisplay.HTML(data=\"<br>\".join(html)))"],"metadata":{"id":"Xx6d_F3VstfA","executionInfo":{"status":"ok","timestamp":1654700521380,"user_tz":-60,"elapsed":19,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}}},"execution_count":9,"outputs":[]},{"cell_type":"code","source":["video_b64 = base64.b64encode(Path(videopath.replace(\".mp4\", \"-det.mp4\")).read_bytes())\n","html = '''<video alt=\"{}\" autoplay \n"," loop controls style=\"height: 400px;\">\n"," <source src=\"data:video/mp4;base64,{}\" type=\"video/mp4\" />\n"," </video>'''.format(Path(videopath), video_b64.decode('ascii'))\n","ipythondisplay.display(ipythondisplay.HTML(data=html)) "],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":421,"output_embedded_package_id":"1KE6a6Jf_qBrnIGEjOY8GYXagvaaGt84D"},"id":"K3VrKNb3yUbH","executionInfo":{"status":"ok","timestamp":1654700524974,"user_tz":-60,"elapsed":3611,"user":{"displayName":"Adejumo Daniel","userId":"02925977078148845759"}},"outputId":"92ea1435-9e17-4167-c094-dd1e380b200f"},"execution_count":10,"outputs":[{"output_type":"display_data","data":{"text/plain":"Output hidden; open in to view."},"metadata":{}}]}],"metadata":{"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"},"colab":{"name":"PyTorch_Object_Tracking.ipynb","provenance":[],"collapsed_sections":[]},"accelerator":"GPU"},"nbformat":4,"nbformat_minor":0}
Binary file (9.65 kB). View file
Binary file (10.2 kB). View file
The diff for this file is too large to render.
See raw diff
@@ -0,0 +1,350 @@
1 |
from __future__ import division
2 |
3 |
import torch
4 |
import torch.nn as nn
5 |
import torch.nn.functional as F
6 |
from torch.autograd import Variable
7 |
import numpy as np
8 |
9 |
from PIL import Image
10 |
11 |
from utils.parse_config import *
12 |
from utils.utils import build_targets
13 |
from collections import defaultdict
14 |
15 |
##import matplotlib.pyplot as plt
16 |
##import matplotlib.patches as patches
17 |
18 |
19 |
def create_modules(module_defs):
20 |
21 |
Constructs module list of layer blocks from module configuration in module_defs
22 |
23 |
hyperparams = module_defs.pop(0)
24 |
output_filters = [int(hyperparams["channels"])]
25 |
module_list = nn.ModuleList()
26 |
for i, module_def in enumerate(module_defs):
27 |
modules = nn.Sequential()
28 |
29 |
if module_def["type"] == "convolutional":
30 |
bn = int(module_def["batch_normalize"])
31 |
filters = int(module_def["filters"])
32 |
kernel_size = int(module_def["size"])
33 |
pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0
34 |
35 |
"conv_%d" % i,
36 |
37 |
38 |
39 |
40 |
41 |
42 |
bias=not bn,
43 |
44 |
45 |
if bn:
46 |
modules.add_module("batch_norm_%d" % i, nn.BatchNorm2d(filters))
47 |
if module_def["activation"] == "leaky":
48 |
modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1))
49 |
50 |
elif module_def["type"] == "maxpool":
51 |
kernel_size = int(module_def["size"])
52 |
stride = int(module_def["stride"])
53 |
if kernel_size == 2 and stride == 1:
54 |
padding = nn.ZeroPad2d((0, 1, 0, 1))
55 |
modules.add_module("_debug_padding_%d" % i, padding)
56 |
maxpool = nn.MaxPool2d(
57 |
58 |
59 |
padding=int((kernel_size - 1) // 2),
60 |
61 |
modules.add_module("maxpool_%d" % i, maxpool)
62 |
63 |
elif module_def["type"] == "upsample":
64 |
upsample = nn.Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
65 |
modules.add_module("upsample_%d" % i, upsample)
66 |
67 |
elif module_def["type"] == "route":
68 |
layers = [int(x) for x in module_def["layers"].split(",")]
69 |
filters = sum([output_filters[layer_i] for layer_i in layers])
70 |
modules.add_module("route_%d" % i, EmptyLayer())
71 |
72 |
elif module_def["type"] == "shortcut":
73 |
filters = output_filters[int(module_def["from"])]
74 |
modules.add_module("shortcut_%d" % i, EmptyLayer())
75 |
76 |
elif module_def["type"] == "yolo":
77 |
anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
78 |
# Extract anchors
79 |
anchors = [int(x) for x in module_def["anchors"].split(",")]
80 |
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
81 |
anchors = [anchors[i] for i in anchor_idxs]
82 |
num_classes = int(module_def["classes"])
83 |
img_height = int(hyperparams["height"])
84 |
# Define detection layer
85 |
yolo_layer = YOLOLayer(anchors, num_classes, img_height)
86 |
modules.add_module("yolo_%d" % i, yolo_layer)
87 |
# Register module list and number of output filters
88 |
89 |
90 |
91 |
return hyperparams, module_list
92 |
93 |
94 |
class EmptyLayer(nn.Module):
95 |
"""Placeholder for 'route' and 'shortcut' layers"""
96 |
97 |
def __init__(self):
98 |
super(EmptyLayer, self).__init__()
99 |
100 |
101 |
class YOLOLayer(nn.Module):
102 |
"""Detection layer"""
103 |
104 |
def __init__(self, anchors, num_classes, img_dim):
105 |
super(YOLOLayer, self).__init__()
106 |
self.anchors = anchors
107 |
self.num_anchors = len(anchors)
108 |
self.num_classes = num_classes
109 |
self.bbox_attrs = 5 + num_classes
110 |
self.image_dim = img_dim
111 |
self.ignore_thres = 0.5
112 |
self.lambda_coord = 1
113 |
114 |
self.mse_loss = nn.MSELoss(size_average=True) # Coordinate loss
115 |
self.bce_loss = nn.BCELoss(size_average=True) # Confidence loss
116 |
self.ce_loss = nn.CrossEntropyLoss() # Class loss
117 |
118 |
def forward(self, x, targets=None):
119 |
nA = self.num_anchors
120 |
nB = x.size(0)
121 |
nG = x.size(2)
122 |
stride = self.image_dim / nG
123 |
124 |
# Tensors for cuda support
125 |
FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
126 |
LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor
127 |
ByteTensor = torch.cuda.ByteTensor if x.is_cuda else torch.ByteTensor
128 |
129 |
prediction = x.view(nB, nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()
130 |
131 |
# Get outputs
132 |
x = torch.sigmoid(prediction[..., 0]) # Center x
133 |
y = torch.sigmoid(prediction[..., 1]) # Center y
134 |
w = prediction[..., 2] # Width
135 |
h = prediction[..., 3] # Height
136 |
pred_conf = torch.sigmoid(prediction[..., 4]) # Conf
137 |
pred_cls = torch.sigmoid(prediction[..., 5:]) # Cls pred.
138 |
139 |
# Calculate offsets for each grid
140 |
grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).type(FloatTensor)
141 |
grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).type(FloatTensor)
142 |
scaled_anchors = FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in self.anchors])
143 |
anchor_w = scaled_anchors[:, 0:1].view((1, nA, 1, 1))
144 |
anchor_h = scaled_anchors[:, 1:2].view((1, nA, 1, 1))
145 |
146 |
# Add offset and scale with anchors
147 |
pred_boxes = FloatTensor(prediction[..., :4].shape)
148 |
pred_boxes[..., 0] = + grid_x
149 |
pred_boxes[..., 1] = + grid_y
150 |
pred_boxes[..., 2] = torch.exp( * anchor_w
151 |
pred_boxes[..., 3] = torch.exp( * anchor_h
152 |
153 |
# Training
154 |
if targets is not None:
155 |
156 |
if x.is_cuda:
157 |
self.mse_loss = self.mse_loss.cuda()
158 |
self.bce_loss = self.bce_loss.cuda()
159 |
self.ce_loss = self.ce_loss.cuda()
160 |
161 |
nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls = build_targets(
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
nProposals = int((pred_conf > 0.5).sum().item())
175 |
recall = float(nCorrect / nGT) if nGT else 1
176 |
precision = float(nCorrect / nProposals)
177 |
178 |
# Handle masks
179 |
mask = Variable(mask.type(ByteTensor))
180 |
conf_mask = Variable(conf_mask.type(ByteTensor))
181 |
182 |
# Handle target variables
183 |
tx = Variable(tx.type(FloatTensor), requires_grad=False)
184 |
ty = Variable(ty.type(FloatTensor), requires_grad=False)
185 |
tw = Variable(tw.type(FloatTensor), requires_grad=False)
186 |
th = Variable(th.type(FloatTensor), requires_grad=False)
187 |
tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
188 |
tcls = Variable(tcls.type(LongTensor), requires_grad=False)
189 |
190 |
# Get conf mask where gt and where there is no gt
191 |
conf_mask_true = mask
192 |
conf_mask_false = conf_mask - mask
193 |
194 |
# Mask outputs to ignore non-existing objects
195 |
loss_x = self.mse_loss(x[mask], tx[mask])
196 |
loss_y = self.mse_loss(y[mask], ty[mask])
197 |
loss_w = self.mse_loss(w[mask], tw[mask])
198 |
loss_h = self.mse_loss(h[mask], th[mask])
199 |
loss_conf = self.bce_loss(pred_conf[conf_mask_false], tconf[conf_mask_false]) + self.bce_loss(
200 |
pred_conf[conf_mask_true], tconf[conf_mask_true]
201 |
202 |
loss_cls = (1 / nB) * self.ce_loss(pred_cls[mask], torch.argmax(tcls[mask], 1))
203 |
loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
204 |
205 |
return (
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
# If not in training phase return predictions
219 |
output =
220 |
221 |
pred_boxes.view(nB, -1, 4) * stride,
222 |
pred_conf.view(nB, -1, 1),
223 |
pred_cls.view(nB, -1, self.num_classes),
224 |
225 |
226 |
227 |
return output
228 |
229 |
230 |
class Darknet(nn.Module):
231 |
"""YOLOv3 object detection model"""
232 |
233 |
def __init__(self, config_path, img_size=416):
234 |
super(Darknet, self).__init__()
235 |
self.module_defs = parse_model_config(config_path)
236 |
self.hyperparams, self.module_list = create_modules(self.module_defs)
237 |
self.img_size = img_size
238 |
self.seen = 0
239 |
self.header_info = np.array([0, 0, 0, self.seen, 0])
240 |
self.loss_names = ["x", "y", "w", "h", "conf", "cls", "recall", "precision"]
241 |
242 |
def forward(self, x, targets=None):
243 |
is_training = targets is not None
244 |
output = []
245 |
self.losses = defaultdict(float)
246 |
layer_outputs = []
247 |
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
248 |
if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
249 |
x = module(x)
250 |
elif module_def["type"] == "route":
251 |
layer_i = [int(x) for x in module_def["layers"].split(",")]
252 |
x =[layer_outputs[i] for i in layer_i], 1)
253 |
elif module_def["type"] == "shortcut":
254 |
layer_i = int(module_def["from"])
255 |
x = layer_outputs[-1] + layer_outputs[layer_i]
256 |
elif module_def["type"] == "yolo":
257 |
# Train phase: get loss
258 |
if is_training:
259 |
x, *losses = module[0](x, targets)
260 |
for name, loss in zip(self.loss_names, losses):
261 |
self.losses[name] += loss
262 |
# Test phase: Get detections
263 |
264 |
x = module(x)
265 |
266 |
267 |
268 |
self.losses["recall"] /= 3
269 |
self.losses["precision"] /= 3
270 |
return sum(output) if is_training else, 1)
271 |
272 |
def load_weights(self, weights_path):
273 |
"""Parses and loads the weights stored in 'weights_path'"""
274 |
275 |
# Open the weights file
276 |
fp = open(weights_path, "rb")
277 |
header = np.fromfile(fp, dtype=np.int32, count=5) # First five are header values
278 |
279 |
# Needed to write header when saving weights
280 |
self.header_info = header
281 |
282 |
self.seen = header[3]
283 |
weights = np.fromfile(fp, dtype=np.float32) # The rest are weights
284 |
285 |
286 |
ptr = 0
287 |
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
288 |
if module_def["type"] == "convolutional":
289 |
conv_layer = module[0]
290 |
if module_def["batch_normalize"]:
291 |
# Load BN bias, weights, running mean and running variance
292 |
bn_layer = module[1]
293 |
num_b = bn_layer.bias.numel() # Number of biases
294 |
# Bias
295 |
bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.bias)
296 |
297 |
ptr += num_b
298 |
# Weight
299 |
bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.weight)
300 |
301 |
ptr += num_b
302 |
# Running Mean
303 |
bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_mean)
304 |
305 |
ptr += num_b
306 |
# Running Var
307 |
bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(bn_layer.running_var)
308 |
309 |
ptr += num_b
310 |
311 |
# Load conv. bias
312 |
num_b = conv_layer.bias.numel()
313 |
conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(conv_layer.bias)
314 |
315 |
ptr += num_b
316 |
# Load conv. weights
317 |
num_w = conv_layer.weight.numel()
318 |
conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(conv_layer.weight)
319 |
320 |
ptr += num_w
321 |
322 |
323 |
@:param path - path of the new weights file
324 |
@:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
325 |
326 |
327 |
def save_weights(self, path, cutoff=-1):
328 |
329 |
fp = open(path, "wb")
330 |
self.header_info[3] = self.seen
331 |
332 |
333 |
# Iterate through layers
334 |
for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
335 |
if module_def["type"] == "convolutional":
336 |
conv_layer = module[0]
337 |
# If batch norm, load bn first
338 |
if module_def["batch_normalize"]:
339 |
bn_layer = module[1]
340 |
341 |
342 |
343 |
344 |
# Load conv bias
345 |
346 |
347 |
# Load conv weights
348 |
349 |
350 |
@@ -0,0 +1,110 @@
1 |
from models import *
2 |
from utils import *
3 |
4 |
import os, sys, time, datetime, random
5 |
import torch
6 |
from import DataLoader
7 |
from torchvision import datasets, transforms
8 |
from torch.autograd import Variable
9 |
10 |
from PIL import Image
11 |
12 |
# load weights and set defaults
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
# load model and put into eval mode
21 |
model = Darknet(config_path, img_size=img_size)
22 |
23 |
24 |
25 |
26 |
classes = utils.load_classes(class_path)
27 |
Tensor = torch.cuda.FloatTensor
28 |
29 |
def detect_image(img):
30 |
# scale and pad image
31 |
ratio = min(img_size/img.size[0], img_size/img.size[1])
32 |
imw = round(img.size[0] * ratio)
33 |
imh = round(img.size[1] * ratio)
34 |
img_transforms = transforms.Compose([ transforms.Resize((imh, imw)),
35 |
transforms.Pad((max(int((imh-imw)/2),0), max(int((imw-imh)/2),0), max(int((imh-imw)/2),0), max(int((imw-imh)/2),0)),
36 |
37 |
38 |
39 |
# convert image to Tensor
40 |
image_tensor = img_transforms(img).float()
41 |
image_tensor = image_tensor.unsqueeze_(0)
42 |
input_img = Variable(image_tensor.type(Tensor))
43 |
# run inference on the model and get detections
44 |
with torch.no_grad():
45 |
detections = model(input_img)
46 |
detections = utils.non_max_suppression(detections, 80, conf_thres, nms_thres)
47 |
return detections[0]
48 |
49 |
videopath = './videos/HorseRacing.mp4'
50 |
51 |
import cv2
52 |
from sort import *
53 |
54 |
55 |
vid = cv2.VideoCapture(videopath)
56 |
mot_tracker = Sort()
57 |
58 |
59 |
cv2.resizeWindow('Stream', (800,600))
60 |
61 |
fourcc = cv2.VideoWriter_fourcc(*'XVID')
62 |
63 |
vw = frame.shape[1]
64 |
vh = frame.shape[0]
65 |
print ("Video size", vw,vh)
66 |
outvideo = cv2.VideoWriter(videopath.replace(".mp4", "-det.mp4"),fourcc,20.0,(vw,vh))
67 |
68 |
frames = 0
69 |
starttime = time.time()
70 |
71 |
ret, frame =
72 |
if not ret:
73 |
74 |
frames += 1
75 |
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
76 |
pilimg = Image.fromarray(frame)
77 |
detections = detect_image(pilimg)
78 |
79 |
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
80 |
img = np.array(pilimg)
81 |
pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape))
82 |
pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape))
83 |
unpad_h = img_size - pad_y
84 |
unpad_w = img_size - pad_x
85 |
if detections is not None:
86 |
tracked_objects = mot_tracker.update(detections.cpu())
87 |
88 |
unique_labels = detections[:, -1].cpu().unique()
89 |
n_cls_preds = len(unique_labels)
90 |
for x1, y1, x2, y2, obj_id, cls_pred in tracked_objects:
91 |
box_h = int(((y2 - y1) / unpad_h) * img.shape[0])
92 |
box_w = int(((x2 - x1) / unpad_w) * img.shape[1])
93 |
y1 = int(((y1 - pad_y // 2) / unpad_h) * img.shape[0])
94 |
x1 = int(((x1 - pad_x // 2) / unpad_w) * img.shape[1])
95 |
color = colors[int(obj_id) % len(colors)]
96 |
cls = classes[int(cls_pred)]
97 |
cv2.rectangle(frame, (x1, y1), (x1+box_w, y1+box_h), color, 4)
98 |
cv2.rectangle(frame, (x1, y1-35), (x1+len(cls)*19+80, y1), color, -1)
99 |
cv2.putText(frame, cls + "-" + str(int(obj_id)), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3)
100 |
101 |
cv2.imshow('Stream', frame)
102 |
103 |
ch = 0xFF & cv2.waitKey(1)
104 |
if ch == 27:
105 |
106 |
107 |
totaltime = time.time()-starttime
108 |
print(frames, "frames", totaltime/frames, "s/frame")
109 |
110 |
@@ -0,0 +1,305 @@
1 |
2 |
SORT: A Simple, Online and Realtime Tracker
3 |
Copyright (C) 2016 Alex Bewley
4 |
5 |
This program is free software: you can redistribute it and/or modify
6 |
it under the terms of the GNU General Public License as published by
7 |
the Free Software Foundation, either version 3 of the License, or
8 |
(at your option) any later version.
9 |
10 |
This program is distributed in the hope that it will be useful,
11 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |
13 |
GNU General Public License for more details.
14 |
15 |
You should have received a copy of the GNU General Public License
16 |
along with this program. If not, see <>.
17 |
18 |
from __future__ import print_function
19 |
20 |
from numba import jit
21 |
import os.path
22 |
import numpy as np
23 |
##import matplotlib.pyplot as plt
24 |
##import matplotlib.patches as patches
25 |
from skimage import io
26 |
# from sklearn.utils.linear_assignment_ import linear_assignment
27 |
import glob
28 |
import time
29 |
import argparse
30 |
from filterpy.kalman import KalmanFilter
31 |
32 |
from scipy.optimize import linear_sum_assignment
33 |
def linear_assignment(x):
34 |
indices = linear_sum_assignment(x)
35 |
indices = np.asarray(indices)
36 |
return np.transpose(indices)
37 |
38 |
39 |
def iou(bb_test,bb_gt):
40 |
41 |
Computes IUO between two bboxes in the form [x1,y1,x2,y2]
42 |
43 |
xx1 = np.maximum(bb_test[0], bb_gt[0])
44 |
yy1 = np.maximum(bb_test[1], bb_gt[1])
45 |
xx2 = np.minimum(bb_test[2], bb_gt[2])
46 |
yy2 = np.minimum(bb_test[3], bb_gt[3])
47 |
w = np.maximum(0., xx2 - xx1)
48 |
h = np.maximum(0., yy2 - yy1)
49 |
wh = w * h
50 |
o = wh / ((bb_test[2]-bb_test[0])*(bb_test[3]-bb_test[1])
51 |
+ (bb_gt[2]-bb_gt[0])*(bb_gt[3]-bb_gt[1]) - wh)
52 |
53 |
54 |
def convert_bbox_to_z(bbox):
55 |
56 |
Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
57 |
[x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
58 |
the aspect ratio
59 |
60 |
w = bbox[2]-bbox[0]
61 |
h = bbox[3]-bbox[1]
62 |
x = bbox[0]+w/2.
63 |
y = bbox[1]+h/2.
64 |
s = w*h #scale is just area
65 |
r = w/float(h)
66 |
return np.array([x,y,s,r]).reshape((4,1))
67 |
68 |
def convert_x_to_bbox(x,score=None):
69 |
70 |
Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
71 |
[x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
72 |
73 |
w = np.sqrt(x[2]*x[3])
74 |
h = x[2]/w
75 |
76 |
return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
77 |
78 |
return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
79 |
80 |
81 |
class KalmanBoxTracker(object):
82 |
83 |
This class represents the internel state of individual tracked objects observed as bbox.
84 |
85 |
count = 0
86 |
def __init__(self,bbox):
87 |
88 |
Initialises a tracker using initial bounding box.
89 |
90 |
#define constant velocity model
91 |
self.kf = KalmanFilter(dim_x=7, dim_z=4)
92 |
self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0], [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
93 |
self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
94 |
95 |
self.kf.R[2:,2:] *= 10.
96 |
self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
97 |
self.kf.P *= 10.
98 |
self.kf.Q[-1,-1] *= 0.01
99 |
self.kf.Q[4:,4:] *= 0.01
100 |
101 |
self.kf.x[:4] = convert_bbox_to_z(bbox)
102 |
self.time_since_update = 0
103 |
+ = KalmanBoxTracker.count
104 |
KalmanBoxTracker.count += 1
105 |
self.history = []
106 |
self.hits = 0
107 |
self.hit_streak = 0
108 |
self.age = 0
109 |
self.objclass = bbox[6]
110 |
111 |
def update(self,bbox):
112 |
113 |
Updates the state vector with observed bbox.
114 |
115 |
self.time_since_update = 0
116 |
self.history = []
117 |
self.hits += 1
118 |
self.hit_streak += 1
119 |
120 |
121 |
def predict(self):
122 |
123 |
Advances the state vector and returns the predicted bounding box estimate.
124 |
125 |
126 |
self.kf.x[6] *= 0.0
127 |
128 |
self.age += 1
129 |
130 |
self.hit_streak = 0
131 |
self.time_since_update += 1
132 |
133 |
return self.history[-1]
134 |
135 |
def get_state(self):
136 |
137 |
Returns the current bounding box estimate.
138 |
139 |
return convert_x_to_bbox(self.kf.x)
140 |
141 |
def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
142 |
143 |
Assigns detections to tracked object (both represented as bounding boxes)
144 |
145 |
Returns 3 lists of matches, unmatched_detections and unmatched_trackers
146 |
147 |
148 |
return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
149 |
iou_matrix = np.zeros((len(detections),len(trackers)),dtype=np.float32)
150 |
151 |
for d,det in enumerate(detections):
152 |
for t,trk in enumerate(trackers):
153 |
iou_matrix[d,t] = iou(det,trk)
154 |
matched_indices = linear_assignment(-iou_matrix)
155 |
156 |
unmatched_detections = []
157 |
for d,det in enumerate(detections):
158 |
if(d not in matched_indices[:,0]):
159 |
160 |
unmatched_trackers = []
161 |
for t,trk in enumerate(trackers):
162 |
if(t not in matched_indices[:,1]):
163 |
164 |
165 |
#filter out matched with low IOU
166 |
matches = []
167 |
for m in matched_indices:
168 |
169 |
170 |
171 |
172 |
173 |
174 |
matches = np.empty((0,2),dtype=int)
175 |
176 |
matches = np.concatenate(matches,axis=0)
177 |
178 |
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
179 |
180 |
181 |
182 |
class Sort(object):
183 |
def __init__(self,max_age=1,min_hits=3):
184 |
185 |
Sets key parameters for SORT
186 |
187 |
self.max_age = max_age
188 |
self.min_hits = min_hits
189 |
self.trackers = []
190 |
self.frame_count = 0
191 |
192 |
def update(self,dets):
193 |
194 |
195 |
dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
196 |
Requires: this method must be called once for each frame even with empty detections.
197 |
Returns the a similar array, where the last column is the object ID.
198 |
199 |
NOTE: The number of objects returned may differ from the number of detections provided.
200 |
201 |
self.frame_count += 1
202 |
#get predicted locations from existing trackers.
203 |
trks = np.zeros((len(self.trackers),5))
204 |
to_del = []
205 |
ret = []
206 |
for t,trk in enumerate(trks):
207 |
pos = self.trackers[t].predict()[0]
208 |
trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
209 |
210 |
211 |
trks =
212 |
for t in reversed(to_del):
213 |
214 |
matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets,trks)
215 |
216 |
#update matched trackers with assigned detections
217 |
for t,trk in enumerate(self.trackers):
218 |
if(t not in unmatched_trks):
219 |
d = matched[np.where(matched[:,1]==t)[0],0]
220 |
221 |
222 |
#create and initialise new trackers for unmatched detections
223 |
for i in unmatched_dets:
224 |
trk = KalmanBoxTracker(dets[i,:])
225 |
226 |
i = len(self.trackers)
227 |
for trk in reversed(self.trackers):
228 |
d = trk.get_state()[0]
229 |
if((trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits)):
230 |
ret.append(np.concatenate((d,[], [trk.objclass])).reshape(1,-1)) # +1 as MOT benchmark requires positive
231 |
i -= 1
232 |
#remove dead tracklet
233 |
if(trk.time_since_update > self.max_age):
234 |
235 |
236 |
return np.concatenate(ret)
237 |
return np.empty((0,5))
238 |
239 |
def parse_args():
240 |
"""Parse input arguments."""
241 |
parser = argparse.ArgumentParser(description='SORT demo')
242 |
parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
243 |
args = parser.parse_args()
244 |
return args
245 |
246 |
if __name__ == '__main__':
247 |
# all train
248 |
sequences = ['PETS09-S2L1','TUD-Campus','TUD-Stadtmitte','ETH-Bahnhof','ETH-Sunnyday','ETH-Pedcross2','KITTI-13','KITTI-17','ADL-Rundle-6','ADL-Rundle-8','Venice-2']
249 |
args = parse_args()
250 |
display = args.display
251 |
phase = 'train'
252 |
total_time = 0.0
253 |
total_frames = 0
254 |
colours = np.random.rand(32,3) #used only for display
255 |
256 |
if not os.path.exists('mot_benchmark'):
257 |
print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n ( E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
258 |
259 |
260 |
fig = plt.figure()
261 |
262 |
if not os.path.exists('output'):
263 |
264 |
265 |
for seq in sequences:
266 |
mot_tracker = Sort() #create instance of the SORT tracker
267 |
seq_dets = np.loadtxt('data/%s/det.txt'%(seq),delimiter=',') #load detections
268 |
with open('output/%s.txt'%(seq),'w') as out_file:
269 |
print("Processing %s."%(seq))
270 |
for frame in range(int(seq_dets[:,0].max())):
271 |
frame += 1 #detection and frame numbers begin at 1
272 |
dets = seq_dets[seq_dets[:,0]==frame,2:7]
273 |
dets[:,2:4] += dets[:,0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
274 |
total_frames += 1
275 |
276 |
277 |
ax1 = fig.add_subplot(111, aspect='equal')
278 |
fn = 'mot_benchmark/%s/%s/img1/%06d.jpg'%(phase,seq,frame)
279 |
im =io.imread(fn)
280 |
281 |
plt.title(seq+' Tracked Targets')
282 |
283 |
start_time = time.time()
284 |
trackers = mot_tracker.update(dets)
285 |
cycle_time = time.time() - start_time
286 |
total_time += cycle_time
287 |
288 |
for d in trackers:
289 |
290 |
291 |
d = d.astype(np.int32)
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
print("Total Tracking took: %.3f for %d frames or %.1f FPS"%(total_time,total_frames,total_frames/total_time))
301 |
302 |
print("Note: to get real runtime results run without the option: --display")
303 |
304 |
305 |
@@ -0,0 +1,317 @@
1 |
2 |
SORT: A Simple, Online and Realtime Tracker
3 |
Copyright (C) 2016 Alex Bewley
4 |
5 |
This program is free software: you can redistribute it and/or modify
6 |
it under the terms of the GNU General Public License as published by
7 |
the Free Software Foundation, either version 3 of the License, or
8 |
(at your option) any later version.
9 |
10 |
This program is distributed in the hope that it will be useful,
11 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |
13 |
GNU General Public License for more details.
14 |
15 |
You should have received a copy of the GNU General Public License
16 |
along with this program. If not, see <>.
17 |
18 |
from __future__ import print_function
19 |
20 |
from numba import jit
21 |
import os.path
22 |
import numpy as np
23 |
##import matplotlib.pyplot as plt
24 |
##import matplotlib.patches as patches
25 |
from skimage import io
26 |
# from scipy.optimize import linear_sum_assignment as linear_assignment
27 |
import glob
28 |
import time
29 |
import argparse
30 |
from filterpy.kalman import KalmanFilter
31 |
32 |
# -------- Fixes the Warning ----------------------
33 |
# def linear_assignment(cost_matrix):
34 |
# try:
35 |
# import lap
36 |
# _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
37 |
# return np.array([[y[i], i] for i in x if i >= 0])
38 |
# except ImportError:
39 |
# from scipy.optimize import linear_sum_assignment
40 |
# x, y = linear_sum_assignment(cost_matrix)
41 |
# return np.array(list(zip(x, y)))
42 |
43 |
# --------------- Fixes the Error
44 |
from scipy.optimize import linear_sum_assignment
45 |
def linear_assignment(x):
46 |
indices = linear_sum_assignment(x)
47 |
indices = np.asarray(indices)
48 |
return np.transpose(indices)
49 |
50 |
51 |
def iou(bb_test,bb_gt):
52 |
53 |
Computes IUO between two bboxes in the form [x1,y1,x2,y2]
54 |
55 |
xx1 = np.maximum(bb_test[0], bb_gt[0])
56 |
yy1 = np.maximum(bb_test[1], bb_gt[1])
57 |
xx2 = np.minimum(bb_test[2], bb_gt[2])
58 |
yy2 = np.minimum(bb_test[3], bb_gt[3])
59 |
w = np.maximum(0., xx2 - xx1)
60 |
h = np.maximum(0., yy2 - yy1)
61 |
wh = w * h
62 |
o = wh / ((bb_test[2]-bb_test[0])*(bb_test[3]-bb_test[1])
63 |
+ (bb_gt[2]-bb_gt[0])*(bb_gt[3]-bb_gt[1]) - wh)
64 |
65 |
66 |
def convert_bbox_to_z(bbox):
67 |
68 |
Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
69 |
[x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
70 |
the aspect ratio
71 |
72 |
w = bbox[2]-bbox[0]
73 |
h = bbox[3]-bbox[1]
74 |
x = bbox[0]+w/2.
75 |
y = bbox[1]+h/2.
76 |
s = w*h #scale is just area
77 |
r = w/float(h)
78 |
return np.array([x,y,s,r]).reshape((4,1))
79 |
80 |
def convert_x_to_bbox(x,score=None):
81 |
82 |
Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
83 |
[x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
84 |
85 |
w = np.sqrt(x[2]*x[3])
86 |
h = x[2]/w
87 |
88 |
return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4))
89 |
90 |
return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5))
91 |
92 |
93 |
class KalmanBoxTracker(object):
94 |
95 |
This class represents the internel state of individual tracked objects observed as bbox.
96 |
97 |
count = 0
98 |
def __init__(self,bbox):
99 |
100 |
Initialises a tracker using initial bounding box.
101 |
102 |
#define constant velocity model
103 |
self.kf = KalmanFilter(dim_x=7, dim_z=4)
104 |
self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0], [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]])
105 |
self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]])
106 |
107 |
self.kf.R[2:,2:] *= 10.
108 |
self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities
109 |
self.kf.P *= 10.
110 |
self.kf.Q[-1,-1] *= 0.01
111 |
self.kf.Q[4:,4:] *= 0.01
112 |
113 |
self.kf.x[:4] = convert_bbox_to_z(bbox)
114 |
self.time_since_update = 0
115 |
+ = KalmanBoxTracker.count
116 |
KalmanBoxTracker.count += 1
117 |
self.history = []
118 |
self.hits = 0
119 |
self.hit_streak = 0
120 |
self.age = 0
121 |
self.objclass = bbox[6]
122 |
123 |
def update(self,bbox):
124 |
125 |
Updates the state vector with observed bbox.
126 |
127 |
self.time_since_update = 0
128 |
self.history = []
129 |
self.hits += 1
130 |
self.hit_streak += 1
131 |
132 |
133 |
def predict(self):
134 |
135 |
Advances the state vector and returns the predicted bounding box estimate.
136 |
137 |
138 |
self.kf.x[6] *= 0.0
139 |
140 |
self.age += 1
141 |
142 |
self.hit_streak = 0
143 |
self.time_since_update += 1
144 |
145 |
return self.history[-1]
146 |
147 |
def get_state(self):
148 |
149 |
Returns the current bounding box estimate.
150 |
151 |
return convert_x_to_bbox(self.kf.x)
152 |
153 |
def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3):
154 |
155 |
Assigns detections to tracked object (both represented as bounding boxes)
156 |
157 |
Returns 3 lists of matches, unmatched_detections and unmatched_trackers
158 |
159 |
160 |
return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)
161 |
iou_matrix = np.zeros((len(detections),len(trackers)),dtype=np.float32)
162 |
163 |
for d,det in enumerate(detections):
164 |
for t,trk in enumerate(trackers):
165 |
iou_matrix[d,t] = iou(det,trk)
166 |
matched_indices = linear_assignment(-iou_matrix)
167 |
168 |
unmatched_detections = []
169 |
for d,det in enumerate(detections):
170 |
if(d not in matched_indices[:,0]):
171 |
172 |
unmatched_trackers = []
173 |
for t,trk in enumerate(trackers):
174 |
if(t not in matched_indices[:,1]):
175 |
176 |
177 |
#filter out matched with low IOU
178 |
matches = []
179 |
for m in matched_indices:
180 |
181 |
182 |
183 |
184 |
185 |
186 |
matches = np.empty((0,2),dtype=int)
187 |
188 |
matches = np.concatenate(matches,axis=0)
189 |
190 |
return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
191 |
192 |
193 |
194 |
class Sort(object):
195 |
def __init__(self,max_age=1,min_hits=3):
196 |
197 |
Sets key parameters for SORT
198 |
199 |
self.max_age = max_age
200 |
self.min_hits = min_hits
201 |
self.trackers = []
202 |
self.frame_count = 0
203 |
204 |
def update(self,dets):
205 |
206 |
207 |
dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]
208 |
Requires: this method must be called once for each frame even with empty detections.
209 |
Returns the a similar array, where the last column is the object ID.
210 |
211 |
NOTE: The number of objects returned may differ from the number of detections provided.
212 |
213 |
self.frame_count += 1
214 |
#get predicted locations from existing trackers.
215 |
trks = np.zeros((len(self.trackers),5))
216 |
to_del = []
217 |
ret = []
218 |
for t,trk in enumerate(trks):
219 |
pos = self.trackers[t].predict()[0]
220 |
trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
221 |
222 |
223 |
trks =
224 |
for t in reversed(to_del):
225 |
226 |
matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets,trks)
227 |
228 |
#update matched trackers with assigned detections
229 |
for t,trk in enumerate(self.trackers):
230 |
if(t not in unmatched_trks):
231 |
d = matched[np.where(matched[:,1]==t)[0],0]
232 |
233 |
234 |
#create and initialise new trackers for unmatched detections
235 |
for i in unmatched_dets:
236 |
trk = KalmanBoxTracker(dets[i,:])
237 |
238 |
i = len(self.trackers)
239 |
for trk in reversed(self.trackers):
240 |
d = trk.get_state()[0]
241 |
if((trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits)):
242 |
ret.append(np.concatenate((d,[], [trk.objclass])).reshape(1,-1)) # +1 as MOT benchmark requires positive
243 |
i -= 1
244 |
#remove dead tracklet
245 |
if(trk.time_since_update > self.max_age):
246 |
247 |
248 |
return np.concatenate(ret)
249 |
return np.empty((0,5))
250 |
251 |
def parse_args():
252 |
"""Parse input arguments."""
253 |
parser = argparse.ArgumentParser(description='SORT demo')
254 |
parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true')
255 |
args = parser.parse_args()
256 |
return args
257 |
258 |
if __name__ == '__main__':
259 |
# all train
260 |
sequences = ['PETS09-S2L1','TUD-Campus','TUD-Stadtmitte','ETH-Bahnhof','ETH-Sunnyday','ETH-Pedcross2','KITTI-13','KITTI-17','ADL-Rundle-6','ADL-Rundle-8','Venice-2']
261 |
args = parse_args()
262 |
display = args.display
263 |
phase = 'train'
264 |
total_time = 0.0
265 |
total_frames = 0
266 |
colours = np.random.rand(32,3) #used only for display
267 |
268 |
if not os.path.exists('mot_benchmark'):
269 |
print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n ( E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n')
270 |
271 |
272 |
fig = plt.figure()
273 |
274 |
if not os.path.exists('output'):
275 |
276 |
277 |
for seq in sequences:
278 |
mot_tracker = Sort() #create instance of the SORT tracker
279 |
seq_dets = np.loadtxt('data/%s/det.txt'%(seq),delimiter=',') #load detections
280 |
with open('output/%s.txt'%(seq),'w') as out_file:
281 |
print("Processing %s."%(seq))
282 |
for frame in range(int(seq_dets[:,0].max())):
283 |
frame += 1 #detection and frame numbers begin at 1
284 |
dets = seq_dets[seq_dets[:,0]==frame,2:7]
285 |
dets[:,2:4] += dets[:,0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2]
286 |
total_frames += 1
287 |
288 |
289 |
ax1 = fig.add_subplot(111, aspect='equal')
290 |
fn = 'mot_benchmark/%s/%s/img1/%06d.jpg'%(phase,seq,frame)
291 |
im =io.imread(fn)
292 |
293 |
plt.title(seq+' Tracked Targets')
294 |
295 |
start_time = time.time()
296 |
trackers = mot_tracker.update(dets)
297 |
cycle_time = time.time() - start_time
298 |
total_time += cycle_time
299 |
300 |
for d in trackers:
301 |
302 |
303 |
d = d.astype(np.int32)
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
print("Total Tracking took: %.3f for %d frames or %.1f FPS"%(total_time,total_frames,total_frames/total_time))
313 |
314 |
print("Note: to get real runtime results run without the option: --display")
315 |
316 |
317 |
Binary file (125 Bytes). View file
Binary file (3.65 kB). View file
Binary file (1.38 kB). View file
Binary file (1.43 kB). View file
Binary file (7.05 kB). View file
Binary file (7.1 kB). View file
@@ -0,0 +1,121 @@
1 |
import glob
2 |
import random
3 |
import os
4 |
import numpy as np
5 |
6 |
import torch
7 |
8 |
from import Dataset
9 |
from PIL import Image
10 |
import torchvision.transforms as transforms
11 |
12 |
##import matplotlib.pyplot as plt
13 |
##import matplotlib.patches as patches
14 |
15 |
from skimage.transform import resize
16 |
17 |
import sys
18 |
19 |
class ImageFolder(Dataset):
20 |
def __init__(self, folder_path, img_size=416):
21 |
self.files = sorted(glob.glob('%s/*.*' % folder_path))
22 |
self.img_shape = (img_size, img_size)
23 |
24 |
def __getitem__(self, index):
25 |
img_path = self.files[index % len(self.files)]
26 |
# Extract image
27 |
img = np.array(
28 |
h, w, _ = img.shape
29 |
dim_diff = np.abs(h - w)
30 |
# Upper (left) and lower (right) padding
31 |
pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
32 |
# Determine padding
33 |
pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
34 |
# Add padding
35 |
input_img = np.pad(img, pad, 'constant', constant_values=127.5) / 255.
36 |
# Resize and normalize
37 |
input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
38 |
# Channels-first
39 |
input_img = np.transpose(input_img, (2, 0, 1))
40 |
# As pytorch tensor
41 |
input_img = torch.from_numpy(input_img).float()
42 |
43 |
return img_path, input_img
44 |
45 |
def __len__(self):
46 |
return len(self.files)
47 |
48 |
49 |
class ListDataset(Dataset):
50 |
def __init__(self, list_path, img_size=416):
51 |
with open(list_path, 'r') as file:
52 |
self.img_files = file.readlines()
53 |
self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in self.img_files]
54 |
self.img_shape = (img_size, img_size)
55 |
self.max_objects = 50
56 |
57 |
def __getitem__(self, index):
58 |
59 |
60 |
# Image
61 |
62 |
63 |
img_path = self.img_files[index % len(self.img_files)].rstrip()
64 |
img = np.array(
65 |
66 |
# Handles images with less than three channels
67 |
while len(img.shape) != 3:
68 |
index += 1
69 |
img_path = self.img_files[index % len(self.img_files)].rstrip()
70 |
img = np.array(
71 |
72 |
h, w, _ = img.shape
73 |
dim_diff = np.abs(h - w)
74 |
# Upper (left) and lower (right) padding
75 |
pad1, pad2 = dim_diff // 2, dim_diff - dim_diff // 2
76 |
# Determine padding
77 |
pad = ((pad1, pad2), (0, 0), (0, 0)) if h <= w else ((0, 0), (pad1, pad2), (0, 0))
78 |
# Add padding
79 |
input_img = np.pad(img, pad, 'constant', constant_values=128) / 255.
80 |
padded_h, padded_w, _ = input_img.shape
81 |
# Resize and normalize
82 |
input_img = resize(input_img, (*self.img_shape, 3), mode='reflect')
83 |
# Channels-first
84 |
input_img = np.transpose(input_img, (2, 0, 1))
85 |
# As pytorch tensor
86 |
input_img = torch.from_numpy(input_img).float()
87 |
88 |
89 |
# Label
90 |
91 |
92 |
label_path = self.label_files[index % len(self.img_files)].rstrip()
93 |
94 |
labels = None
95 |
if os.path.exists(label_path):
96 |
labels = np.loadtxt(label_path).reshape(-1, 5)
97 |
# Extract coordinates for unpadded + unscaled image
98 |
x1 = w * (labels[:, 1] - labels[:, 3]/2)
99 |
y1 = h * (labels[:, 2] - labels[:, 4]/2)
100 |
x2 = w * (labels[:, 1] + labels[:, 3]/2)
101 |
y2 = h * (labels[:, 2] + labels[:, 4]/2)
102 |
# Adjust for added padding
103 |
x1 += pad[1][0]
104 |
y1 += pad[0][0]
105 |
x2 += pad[1][0]
106 |
y2 += pad[0][0]
107 |
# Calculate ratios from coordinates
108 |
labels[:, 1] = ((x1 + x2) / 2) / padded_w
109 |
labels[:, 2] = ((y1 + y2) / 2) / padded_h
110 |
labels[:, 3] *= w / padded_w
111 |
labels[:, 4] *= h / padded_h
112 |
# Fill matrix
113 |
filled_labels = np.zeros((self.max_objects, 5))
114 |
if labels is not None:
115 |
filled_labels[range(len(labels))[:self.max_objects]] = labels[:self.max_objects]
116 |
filled_labels = torch.from_numpy(filled_labels)
117 |
118 |
return img_path, input_img, filled_labels
119 |
120 |
def __len__(self):
121 |
return len(self.img_files)
@@ -0,0 +1,36 @@
1 |
2 |
3 |
def parse_model_config(path):
4 |
"""Parses the yolo-v3 layer configuration file and returns module definitions"""
5 |
file = open(path, 'r')
6 |
lines ='\n')
7 |
lines = [x for x in lines if x and not x.startswith('#')]
8 |
lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
9 |
module_defs = []
10 |
for line in lines:
11 |
if line.startswith('['): # This marks the start of a new block
12 |
13 |
module_defs[-1]['type'] = line[1:-1].rstrip()
14 |
if module_defs[-1]['type'] == 'convolutional':
15 |
module_defs[-1]['batch_normalize'] = 0
16 |
17 |
key, value = line.split("=")
18 |
value = value.strip()
19 |
module_defs[-1][key.rstrip()] = value.strip()
20 |
21 |
return module_defs
22 |
23 |
def parse_data_config(path):
24 |
"""Parses the data configuration file"""
25 |
options = dict()
26 |
options['gpus'] = '0,1,2,3'
27 |
options['num_workers'] = '10'
28 |
with open(path, 'r') as fp:
29 |
lines = fp.readlines()
30 |
for line in lines:
31 |
line = line.strip()
32 |
if line == '' or line.startswith('#'):
33 |
34 |
key, value = line.split('=')
35 |
options[key.strip()] = value.strip()
36 |
return options
@@ -0,0 +1,258 @@
1 |
from __future__ import division
2 |
import math
3 |
import time
4 |
import torch
5 |
import torch.nn as nn
6 |
import torch.nn.functional as F
7 |
from torch.autograd import Variable
8 |
import numpy as np
9 |
10 |
#import matplotlib.pyplot as plt
11 |
#import matplotlib.patches as patches
12 |
13 |
14 |
def load_classes(path):
15 |
16 |
Loads class labels at 'path'
17 |
18 |
fp = open(path, "r")
19 |
names ="\n")[:-1]
20 |
return names
21 |
22 |
23 |
def weights_init_normal(m):
24 |
classname = m.__class__.__name__
25 |
if classname.find("Conv") != -1:
26 |
torch.nn.init.normal_(, 0.0, 0.02)
27 |
elif classname.find("BatchNorm2d") != -1:
28 |
torch.nn.init.normal_(, 1.0, 0.02)
29 |
torch.nn.init.constant_(, 0.0)
30 |
31 |
32 |
def compute_ap(recall, precision):
33 |
""" Compute the average precision, given the recall and precision curves.
34 |
Code originally from
35 |
36 |
# Arguments
37 |
recall: The recall curve (list).
38 |
precision: The precision curve (list).
39 |
# Returns
40 |
The average precision as computed in py-faster-rcnn.
41 |
42 |
# correct AP calculation
43 |
# first append sentinel values at the end
44 |
mrec = np.concatenate(([0.0], recall, [1.0]))
45 |
mpre = np.concatenate(([0.0], precision, [0.0]))
46 |
47 |
# compute the precision envelope
48 |
for i in range(mpre.size - 1, 0, -1):
49 |
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
50 |
51 |
# to calculate area under PR curve, look for points
52 |
# where X axis (recall) changes value
53 |
i = np.where(mrec[1:] != mrec[:-1])[0]
54 |
55 |
# and sum (\Delta recall) * prec
56 |
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
57 |
return ap
58 |
59 |
60 |
def bbox_iou(box1, box2, x1y1x2y2=True):
61 |
62 |
Returns the IoU of two bounding boxes
63 |
64 |
if not x1y1x2y2:
65 |
# Transform from center and width to exact coordinates
66 |
b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
67 |
b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
68 |
b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
69 |
b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
70 |
71 |
# Get the coordinates of bounding boxes
72 |
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
73 |
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]
74 |
75 |
# get the corrdinates of the intersection rectangle
76 |
inter_rect_x1 = torch.max(b1_x1, b2_x1)
77 |
inter_rect_y1 = torch.max(b1_y1, b2_y1)
78 |
inter_rect_x2 = torch.min(b1_x2, b2_x2)
79 |
inter_rect_y2 = torch.min(b1_y2, b2_y2)
80 |
# Intersection area
81 |
inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
82 |
inter_rect_y2 - inter_rect_y1 + 1, min=0
83 |
84 |
# Union Area
85 |
b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
86 |
b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)
87 |
88 |
iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)
89 |
90 |
return iou
91 |
92 |
93 |
def bbox_iou_numpy(box1, box2):
94 |
"""Computes IoU between bounding boxes.
95 |
96 |
97 |
box1 : ndarray
98 |
(N, 4) shaped array with bboxes
99 |
box2 : ndarray
100 |
(M, 4) shaped array with bboxes
101 |
102 |
103 |
: ndarray
104 |
(N, M) shaped array with IoUs
105 |
106 |
area = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1])
107 |
108 |
iw = np.minimum(np.expand_dims(box1[:, 2], axis=1), box2[:, 2]) - np.maximum(
109 |
np.expand_dims(box1[:, 0], 1), box2[:, 0]
110 |
111 |
ih = np.minimum(np.expand_dims(box1[:, 3], axis=1), box2[:, 3]) - np.maximum(
112 |
np.expand_dims(box1[:, 1], 1), box2[:, 1]
113 |
114 |
115 |
iw = np.maximum(iw, 0)
116 |
ih = np.maximum(ih, 0)
117 |
118 |
ua = np.expand_dims((box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1]), axis=1) + area - iw * ih
119 |
120 |
ua = np.maximum(ua, np.finfo(float).eps)
121 |
122 |
intersection = iw * ih
123 |
124 |
return intersection / ua
125 |
126 |
127 |
def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
128 |
129 |
Removes detections with lower object confidence score than 'conf_thres' and performs
130 |
Non-Maximum Suppression to further filter detections.
131 |
Returns detections with shape:
132 |
(x1, y1, x2, y2, object_conf, class_score, class_pred)
133 |
134 |
135 |
# From (center x, center y, width, height) to (x1, y1, x2, y2)
136 |
box_corner =
137 |
box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
138 |
box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
139 |
box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
140 |
box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
141 |
prediction[:, :, :4] = box_corner[:, :, :4]
142 |
143 |
output = [None for _ in range(len(prediction))]
144 |
for image_i, image_pred in enumerate(prediction):
145 |
# Filter out confidence scores below threshold
146 |
conf_mask = (image_pred[:, 4] >= conf_thres).squeeze()
147 |
image_pred = image_pred[conf_mask]
148 |
# If none are remaining => process next image
149 |
if not image_pred.size(0):
150 |
151 |
# Get score and class with highest confidence
152 |
class_conf, class_pred = torch.max(image_pred[:, 5 : 5 + num_classes], 1, keepdim=True)
153 |
# Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred)
154 |
detections =[:, :5], class_conf.float(), class_pred.float()), 1)
155 |
# Iterate through all predicted classes
156 |
unique_labels = detections[:, -1].cpu().unique()
157 |
if prediction.is_cuda:
158 |
unique_labels = unique_labels.cuda()
159 |
for c in unique_labels:
160 |
# Get the detections with the particular class
161 |
detections_class = detections[detections[:, -1] == c]
162 |
# Sort the detections by maximum objectness confidence
163 |
_, conf_sort_index = torch.sort(detections_class[:, 4], descending=True)
164 |
detections_class = detections_class[conf_sort_index]
165 |
# Perform non-maximum suppression
166 |
max_detections = []
167 |
while detections_class.size(0):
168 |
# Get detection with highest confidence and save as max detection
169 |
170 |
# Stop if we're at the last detection
171 |
if len(detections_class) == 1:
172 |
173 |
# Get the IOUs for all boxes with lower confidence
174 |
ious = bbox_iou(max_detections[-1], detections_class[1:])
175 |
# Remove detections with IoU >= NMS threshold
176 |
detections_class = detections_class[1:][ious < nms_thres]
177 |
178 |
max_detections =
179 |
# Add max detections to outputs
180 |
output[image_i] = (
181 |
max_detections if output[image_i] is None else[image_i], max_detections))
182 |
183 |
184 |
return output
185 |
186 |
187 |
def build_targets(
188 |
pred_boxes, pred_conf, pred_cls, target, anchors, num_anchors, num_classes, grid_size, ignore_thres, img_dim
189 |
190 |
nB = target.size(0)
191 |
nA = num_anchors
192 |
nC = num_classes
193 |
nG = grid_size
194 |
mask = torch.zeros(nB, nA, nG, nG)
195 |
conf_mask = torch.ones(nB, nA, nG, nG)
196 |
tx = torch.zeros(nB, nA, nG, nG)
197 |
ty = torch.zeros(nB, nA, nG, nG)
198 |
tw = torch.zeros(nB, nA, nG, nG)
199 |
th = torch.zeros(nB, nA, nG, nG)
200 |
tconf = torch.ByteTensor(nB, nA, nG, nG).fill_(0)
201 |
tcls = torch.ByteTensor(nB, nA, nG, nG, nC).fill_(0)
202 |
203 |
nGT = 0
204 |
nCorrect = 0
205 |
for b in range(nB):
206 |
for t in range(target.shape[1]):
207 |
if target[b, t].sum() == 0:
208 |
209 |
nGT += 1
210 |
# Convert to position relative to box
211 |
gx = target[b, t, 1] * nG
212 |
gy = target[b, t, 2] * nG
213 |
gw = target[b, t, 3] * nG
214 |
gh = target[b, t, 4] * nG
215 |
# Get grid box indices
216 |
gi = int(gx)
217 |
gj = int(gy)
218 |
# Get shape of gt box
219 |
gt_box = torch.FloatTensor(np.array([0, 0, gw, gh])).unsqueeze(0)
220 |
# Get shape of anchor box
221 |
anchor_shapes = torch.FloatTensor(np.concatenate((np.zeros((len(anchors), 2)), np.array(anchors)), 1))
222 |
# Calculate iou between gt and anchor shapes
223 |
anch_ious = bbox_iou(gt_box, anchor_shapes)
224 |
# Where the overlap is larger than threshold set mask to zero (ignore)
225 |
conf_mask[b, anch_ious > ignore_thres, gj, gi] = 0
226 |
# Find the best matching anchor box
227 |
best_n = np.argmax(anch_ious)
228 |
# Get ground truth box
229 |
gt_box = torch.FloatTensor(np.array([gx, gy, gw, gh])).unsqueeze(0)
230 |
# Get the best prediction
231 |
pred_box = pred_boxes[b, best_n, gj, gi].unsqueeze(0)
232 |
# Masks
233 |
mask[b, best_n, gj, gi] = 1
234 |
conf_mask[b, best_n, gj, gi] = 1
235 |
# Coordinates
236 |
tx[b, best_n, gj, gi] = gx - gi
237 |
ty[b, best_n, gj, gi] = gy - gj
238 |
# Width and height
239 |
tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n][0] + 1e-16)
240 |
th[b, best_n, gj, gi] = math.log(gh / anchors[best_n][1] + 1e-16)
241 |
# One-hot encoding of label
242 |
target_label = int(target[b, t, 0])
243 |
tcls[b, best_n, gj, gi, target_label] = 1
244 |
tconf[b, best_n, gj, gi] = 1
245 |
246 |
# Calculate iou between ground truth and best matching prediction
247 |
iou = bbox_iou(gt_box, pred_box, x1y1x2y2=False)
248 |
pred_label = torch.argmax(pred_cls[b, best_n, gj, gi])
249 |
score = pred_conf[b, best_n, gj, gi]
250 |
if iou > 0.5 and pred_label == target_label and score > 0.5:
251 |
nCorrect += 1
252 |
253 |
return nGT, nCorrect, mask, conf_mask, tx, ty, tw, th, tconf, tcls
254 |
255 |
256 |
def to_categorical(y, num_classes):
257 |
""" 1-hot encodes a tensor """
258 |
return torch.from_numpy(np.eye(num_classes, dtype="uint8")[y])