darknet-coco-object_detection / object_tracker.py
danieladejumo's picture
Files Commit
1ba06ec
from models import *
from utils import *
import os, sys, time, datetime, random
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.autograd import Variable
from PIL import Image
# load weights and set defaults
config_path='config/yolov3.cfg'
weights_path='config/yolov3.weights'
class_path='config/coco.names'
img_size=416
conf_thres=0.8
nms_thres=0.4
# load model and put into eval mode
model = Darknet(config_path, img_size=img_size)
model.load_weights(weights_path)
model.cuda()
model.eval()
classes = utils.load_classes(class_path)
Tensor = torch.cuda.FloatTensor
def detect_image(img):
# scale and pad image
ratio = min(img_size/img.size[0], img_size/img.size[1])
imw = round(img.size[0] * ratio)
imh = round(img.size[1] * ratio)
img_transforms = transforms.Compose([ transforms.Resize((imh, imw)),
transforms.Pad((max(int((imh-imw)/2),0), max(int((imw-imh)/2),0), max(int((imh-imw)/2),0), max(int((imw-imh)/2),0)),
(128,128,128)),
transforms.ToTensor(),
])
# convert image to Tensor
image_tensor = img_transforms(img).float()
image_tensor = image_tensor.unsqueeze_(0)
input_img = Variable(image_tensor.type(Tensor))
# run inference on the model and get detections
with torch.no_grad():
detections = model(input_img)
detections = utils.non_max_suppression(detections, 80, conf_thres, nms_thres)
return detections[0]
videopath = './videos/HorseRacing.mp4'
import cv2
from sort import *
colors=[(255,0,0),(0,255,0),(0,0,255),(255,0,255),(128,0,0),(0,128,0),(0,0,128),(128,0,128),(128,128,0),(0,128,128)]
vid = cv2.VideoCapture(videopath)
mot_tracker = Sort()
cv2.namedWindow('Stream',cv2.WINDOW_NORMAL)
cv2.resizeWindow('Stream', (800,600))
fourcc = cv2.VideoWriter_fourcc(*'XVID')
ret,frame=vid.read()
vw = frame.shape[1]
vh = frame.shape[0]
print ("Video size", vw,vh)
outvideo = cv2.VideoWriter(videopath.replace(".mp4", "-det.mp4"),fourcc,20.0,(vw,vh))
frames = 0
starttime = time.time()
while(True):
ret, frame = vid.read()
if not ret:
break
frames += 1
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
pilimg = Image.fromarray(frame)
detections = detect_image(pilimg)
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
img = np.array(pilimg)
pad_x = max(img.shape[0] - img.shape[1], 0) * (img_size / max(img.shape))
pad_y = max(img.shape[1] - img.shape[0], 0) * (img_size / max(img.shape))
unpad_h = img_size - pad_y
unpad_w = img_size - pad_x
if detections is not None:
tracked_objects = mot_tracker.update(detections.cpu())
unique_labels = detections[:, -1].cpu().unique()
n_cls_preds = len(unique_labels)
for x1, y1, x2, y2, obj_id, cls_pred in tracked_objects:
box_h = int(((y2 - y1) / unpad_h) * img.shape[0])
box_w = int(((x2 - x1) / unpad_w) * img.shape[1])
y1 = int(((y1 - pad_y // 2) / unpad_h) * img.shape[0])
x1 = int(((x1 - pad_x // 2) / unpad_w) * img.shape[1])
color = colors[int(obj_id) % len(colors)]
cls = classes[int(cls_pred)]
cv2.rectangle(frame, (x1, y1), (x1+box_w, y1+box_h), color, 4)
cv2.rectangle(frame, (x1, y1-35), (x1+len(cls)*19+80, y1), color, -1)
cv2.putText(frame, cls + "-" + str(int(obj_id)), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 3)
cv2.imshow('Stream', frame)
outvideo.write(frame)
ch = 0xFF & cv2.waitKey(1)
if ch == 27:
break
totaltime = time.time()-starttime
print(frames, "frames", totaltime/frames, "s/frame")
cv2.destroyAllWindows()
outvideo.release()