|
|
|
"""
|
|
Created on Mon Sep 4 16:03:42 2023
|
|
|
|
@author: SABARI
|
|
"""
|
|
import time
|
|
import tensorflow as tf
|
|
import numpy as np
|
|
|
|
|
|
def box_iou(box1, box2, eps=1e-7):
|
|
"""
|
|
Calculate intersection-over-union (IoU) of boxes.
|
|
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
|
|
Args:
|
|
box1 (tf.Tensor): A tensor of shape (N, 4) representing N bounding boxes.
|
|
box2 (tf.Tensor): A tensor of shape (M, 4) representing M bounding boxes.
|
|
eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
|
|
Returns:
|
|
(tf.Tensor): An NxM tensor containing the pairwise IoU values for every element in box1 and box2.
|
|
"""
|
|
|
|
a1, a2 = tf.split(box1, 2, axis=1)
|
|
b1, b2 = tf.split(box2, 2, axis=1)
|
|
|
|
inter = tf.reduce_prod(tf.maximum(tf.minimum(a2, b2) - tf.maximum(a1, b1), 0), axis=1)
|
|
|
|
return inter / (tf.reduce_prod(a2 - a1, axis=1) + tf.reduce_prod(b2 - b1, axis=1) - inter + eps)
|
|
|
|
|
|
def xywh2xyxy(x):
|
|
"""
|
|
Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
|
|
top-left corner and (x2, y2) is the bottom-right corner.
|
|
Args:
|
|
x (tf.Tensor): The input bounding box coordinates in (x, y, width, height) format.
|
|
Returns:
|
|
y (tf.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
|
|
"""
|
|
|
|
y = np.copy(x)
|
|
y[..., 0] = x[..., 0] - x[..., 2] / 2
|
|
y[..., 1] = x[..., 1] - x[..., 3] / 2
|
|
y[..., 2] = x[..., 0] + x[..., 2] / 2
|
|
y[..., 3] = x[..., 1] + x[..., 3] / 2
|
|
return y
|
|
|
|
|
|
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, agnostic=False,
|
|
multi_label=False, max_det=300, nc=0,
|
|
max_time_img=0.05,
|
|
max_nms=100,
|
|
max_wh=7680):
|
|
"""
|
|
Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
|
|
Arguments:
|
|
prediction (tf.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
|
|
containing the predicted boxes, classes, and masks. The tensor should be in the format
|
|
output by a model, such as YOLO.
|
|
conf_thres (float): The confidence threshold below which boxes will be filtered out.
|
|
Valid values are between 0.0 and 1.0.
|
|
iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS.
|
|
Valid values are between 0.0 and 1.0.
|
|
agnostic (bool): If True, the model is agnostic to the number of classes, and all
|
|
classes will be considered as one.
|
|
multi_label (bool): If True, each box may have multiple labels.
|
|
max_det (int): The maximum number of boxes to keep after NMS.
|
|
nc (int): (optional) The number of classes output by the model. Any indices after this will be considered masks.
|
|
max_time_img (float): The maximum time (seconds) for processing one image.
|
|
max_nms (int): The maximum number of boxes into tf.image.combined_non_max_suppression().
|
|
max_wh (int): The maximum box width and height in pixels
|
|
Returns:
|
|
(List[tf.Tensor]): A list of length batch_size, where each element is a tensor of
|
|
shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns
|
|
(x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
|
|
"""
|
|
|
|
|
|
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
|
|
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
|
|
if isinstance(prediction, (list, tuple)):
|
|
prediction = prediction[0]
|
|
|
|
bs = np.shape(prediction)[0]
|
|
nc = nc or (np.shape(prediction)[1] - 4)
|
|
nm = np.shape(prediction)[1] - nc - 4
|
|
mi = 4 + nc
|
|
|
|
xc = np.amax(prediction[:, 4:mi], axis=1) > conf_thres
|
|
|
|
|
|
|
|
time_limit = 0.5 + max_time_img * tf.cast(bs, tf.float32)
|
|
multi_label &= nc > 1
|
|
|
|
t = time.time()
|
|
output = [np.zeros((0, 6 + nm))] * bs
|
|
for xi, x in enumerate(prediction):
|
|
|
|
|
|
|
|
|
|
|
|
x = np.transpose(x)
|
|
|
|
|
|
x = x[xc[xi]]
|
|
|
|
|
|
if np.shape(x)[0] == 0:
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
box = x[:, :4]
|
|
cls = x[:, 4:4 + nc]
|
|
mask = x[:, 4 + nc:]
|
|
box = xywh2xyxy(box)
|
|
|
|
|
|
if multi_label:
|
|
i, j = np.where(cls > conf_thres)
|
|
x = np.concatenate([box[i], np.expand_dims(cls[i, j], axis=-1), np.expand_dims(j, axis=-1).astype(np.float32), mask[i]], axis=1)
|
|
else:
|
|
conf = np.max(cls, axis=1)
|
|
j = np.argmax(cls, axis=1)
|
|
keep = np.where(conf > conf_thres)[0]
|
|
x = np.concatenate([box[keep], np.expand_dims(conf[keep], axis=-1), np.expand_dims(j[keep], axis=-1).astype(np.float32), mask[keep]], axis=1)
|
|
|
|
|
|
n = np.shape(x)[0]
|
|
if n == 0:
|
|
continue
|
|
|
|
sorted_indices = np.argsort(x[:, 4])[::-1]
|
|
x = x[sorted_indices[:max_nms]]
|
|
|
|
|
|
c = x[:, 5:6] * (0.0 if agnostic else tf.cast(max_wh, tf.float32))
|
|
boxes, scores = x[:, :4] + c, x[:, 4]
|
|
i = tf.image.non_max_suppression(boxes, scores, max_nms, iou_threshold=iou_thres)
|
|
i = i.numpy()
|
|
i = i[:max_det]
|
|
|
|
output[xi] = x[i,:]
|
|
|
|
if (time.time() - t) > time_limit:
|
|
break
|
|
|
|
return output
|
|
|
|
import numpy as np
|
|
|
|
def optimized_object_detection(prediction, conf_thres=0.25, iou_thres=0.45, agnostic=False,
|
|
multi_label=False, max_det=300, nc=0, max_time_img=0.05,
|
|
max_nms=100, max_wh=7680):
|
|
|
|
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
|
|
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
|
|
|
|
if isinstance(prediction, (list, tuple)):
|
|
prediction = prediction[0]
|
|
|
|
bs, _, _ = prediction.shape
|
|
|
|
if nc == 0:
|
|
nc = prediction.shape[1] - 4
|
|
|
|
nm = prediction.shape[1] - nc - 4
|
|
mi = 4 + nc
|
|
|
|
xc = np.amax(prediction[:, 4:mi], axis=1) > conf_thres
|
|
|
|
time_limit = 0.5 + max_time_img * bs
|
|
|
|
multi_label &= nc > 1
|
|
|
|
t = time.time()
|
|
output = [np.zeros((0, 6 + nm))] * bs
|
|
|
|
for xi, x in enumerate(prediction):
|
|
x = np.transpose(x)
|
|
x = x[xc[xi]]
|
|
|
|
if np.shape(x)[0] == 0:
|
|
continue
|
|
|
|
box = x[:, :4]
|
|
cls = x[:, 4:4 + nc]
|
|
mask = x[:, 4 + nc:]
|
|
box = xywh2xyxy(box)
|
|
|
|
if multi_label:
|
|
i, j = np.where(cls > conf_thres)
|
|
x = np.concatenate([box[i], np.expand_dims(cls[i, j], axis=-1), np.expand_dims(j, axis=-1).astype(np.float32), mask[i]], axis=1)
|
|
else:
|
|
conf = np.max(cls, axis=1)
|
|
j = np.argmax(cls, axis=1)
|
|
keep = np.where(conf > conf_thres)[0]
|
|
x = np.concatenate([box[keep], np.expand_dims(conf[keep], axis=-1), np.expand_dims(j[keep], axis=-1).astype(np.float32), mask[keep]], axis=1)
|
|
|
|
n = np.shape(x)[0]
|
|
if n == 0:
|
|
continue
|
|
|
|
sorted_indices = np.argsort(x[:, 4])[::-1]
|
|
x = x[sorted_indices[:max_nms]]
|
|
|
|
c = x[:, 5:6] * (0.0 if agnostic else max_wh)
|
|
boxes, scores = x[:, :4] + c, x[:, 4]
|
|
i = tf.image.non_max_suppression(boxes, scores, max_nms, iou_threshold=iou_thres)
|
|
|
|
|
|
|
|
i = i.numpy()
|
|
i = i[:max_det]
|
|
|
|
output[xi] = x[keep,:]
|
|
|
|
if (time.time() - t) > time_limit:
|
|
break
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|