File size: 9,677 Bytes
623aa4b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 4 16:03:42 2023
@author: SABARI
"""
import time
import tensorflow as tf
import numpy as np
#from lsnms import nms, wbc
def box_iou(box1, box2, eps=1e-7):
"""
Calculate intersection-over-union (IoU) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
Args:
box1 (tf.Tensor): A tensor of shape (N, 4) representing N bounding boxes.
box2 (tf.Tensor): A tensor of shape (M, 4) representing M bounding boxes.
eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.
Returns:
(tf.Tensor): An NxM tensor containing the pairwise IoU values for every element in box1 and box2.
"""
a1, a2 = tf.split(box1, 2, axis=1)
b1, b2 = tf.split(box2, 2, axis=1)
inter = tf.reduce_prod(tf.maximum(tf.minimum(a2, b2) - tf.maximum(a1, b1), 0), axis=1)
return inter / (tf.reduce_prod(a2 - a1, axis=1) + tf.reduce_prod(b2 - b1, axis=1) - inter + eps)
def xywh2xyxy(x):
"""
Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
top-left corner and (x2, y2) is the bottom-right corner.
Args:
x (tf.Tensor): The input bounding box coordinates in (x, y, width, height) format.
Returns:
y (tf.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
"""
# Assuming x is a NumPy array
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
return y
def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, agnostic=False,
multi_label=False, max_det=300, nc=0, # number of classes (optional)
max_time_img=0.05,
max_nms=100,
max_wh=7680):
"""
Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
Arguments:
prediction (tf.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
containing the predicted boxes, classes, and masks. The tensor should be in the format
output by a model, such as YOLO.
conf_thres (float): The confidence threshold below which boxes will be filtered out.
Valid values are between 0.0 and 1.0.
iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS.
Valid values are between 0.0 and 1.0.
agnostic (bool): If True, the model is agnostic to the number of classes, and all
classes will be considered as one.
multi_label (bool): If True, each box may have multiple labels.
max_det (int): The maximum number of boxes to keep after NMS.
nc (int): (optional) The number of classes output by the model. Any indices after this will be considered masks.
max_time_img (float): The maximum time (seconds) for processing one image.
max_nms (int): The maximum number of boxes into tf.image.combined_non_max_suppression().
max_wh (int): The maximum box width and height in pixels
Returns:
(List[tf.Tensor]): A list of length batch_size, where each element is a tensor of
shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns
(x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
"""
# Checks
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
prediction = prediction[0] # select only inference output
bs = np.shape(prediction)[0] # batch size
nc = nc or (np.shape(prediction)[1] - 4) # number of classes
nm = np.shape(prediction)[1] - nc - 4
mi = 4 + nc # mask start index
#xc = tf.math.reduce_any(prediction[:, 4:mi] > conf_thres, axis=1) # candidates
xc = np.amax(prediction[:, 4:mi], axis=1) > conf_thres
# Settings
# min_wh = 2 # (pixels) minimum box width and height
time_limit = 0.5 + max_time_img * tf.cast(bs, tf.float32) # seconds to quit after
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
t = time.time()
output = [np.zeros((0, 6 + nm))] * bs
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
# x = tf.where(tf.math.logical_or(x[:, 2:4] < min_wh, x[:, 2:4] > max_wh), tf.constant(0, dtype=tf.float32), x) # width-height
#x = tf.boolean_mask(x, xc[xi])
#x = x.transpose(0, -1)[xc[xi]] # confidence
# Assuming x, xc, and xi are NumPy arrays
x = np.transpose(x)
#x = x.transpose()[:, xc[xi]]
x = x[xc[xi]]
# If none remain process next image
if np.shape(x)[0] == 0:
continue
# Detections matrix nx6 (xyxy, conf, cls)
#box, cls, mask = tf.split(x, [4, nc, nm], axis=1)
# Assuming x is a NumPy array
box = x[:, :4]
cls = x[:, 4:4 + nc]
mask = x[:, 4 + nc:]
box = xywh2xyxy(box) # center_x, center_y, width, height) to (x1, y1, x2, y2)
# Assuming cls is a NumPy array
if multi_label:
i, j = np.where(cls > conf_thres)
x = np.concatenate([box[i], np.expand_dims(cls[i, j], axis=-1), np.expand_dims(j, axis=-1).astype(np.float32), mask[i]], axis=1)
else:
conf = np.max(cls, axis=1)
j = np.argmax(cls, axis=1)
keep = np.where(conf > conf_thres)[0]
x = np.concatenate([box[keep], np.expand_dims(conf[keep], axis=-1), np.expand_dims(j[keep], axis=-1).astype(np.float32), mask[keep]], axis=1)
# Check shape
n = np.shape(x)[0] # number of boxes
if n == 0: # no boxes
continue
#x = x[tf.argsort(x[:, 4], direction='DESCENDING')[:max_nms]] # sort by confidence and remove excess boxes
sorted_indices = np.argsort(x[:, 4])[::-1] # Sort indices in descending order of confidence
x = x[sorted_indices[:max_nms]] # Keep the top max_nms boxes
# Batched NMS
c = x[:, 5:6] * (0.0 if agnostic else tf.cast(max_wh, tf.float32)) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = tf.image.non_max_suppression(boxes, scores, max_nms, iou_threshold=iou_thres) # NMS
i = i.numpy()
i = i[:max_det] # limit detections
output[xi] = x[i,:]
if (time.time() - t) > time_limit:
break # time limit exceeded
return output
import numpy as np
def optimized_object_detection(prediction, conf_thres=0.25, iou_thres=0.45, agnostic=False,
multi_label=False, max_det=300, nc=0, max_time_img=0.05,
max_nms=100, max_wh=7680):
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
if isinstance(prediction, (list, tuple)):
prediction = prediction[0]
bs, _, _ = prediction.shape # Get batch size and dimensions
if nc == 0:
nc = prediction.shape[1] - 4
nm = prediction.shape[1] - nc - 4
mi = 4 + nc
xc = np.amax(prediction[:, 4:mi], axis=1) > conf_thres
time_limit = 0.5 + max_time_img * bs
multi_label &= nc > 1
t = time.time()
output = [np.zeros((0, 6 + nm))] * bs
for xi, x in enumerate(prediction):
x = np.transpose(x)
x = x[xc[xi]]
if np.shape(x)[0] == 0:
continue
box = x[:, :4]
cls = x[:, 4:4 + nc]
mask = x[:, 4 + nc:]
box = xywh2xyxy(box)
if multi_label:
i, j = np.where(cls > conf_thres)
x = np.concatenate([box[i], np.expand_dims(cls[i, j], axis=-1), np.expand_dims(j, axis=-1).astype(np.float32), mask[i]], axis=1)
else:
conf = np.max(cls, axis=1)
j = np.argmax(cls, axis=1)
keep = np.where(conf > conf_thres)[0]
x = np.concatenate([box[keep], np.expand_dims(conf[keep], axis=-1), np.expand_dims(j[keep], axis=-1).astype(np.float32), mask[keep]], axis=1)
n = np.shape(x)[0]
if n == 0:
continue
sorted_indices = np.argsort(x[:, 4])[::-1]
x = x[sorted_indices[:max_nms]]
c = x[:, 5:6] * (0.0 if agnostic else max_wh)
boxes, scores = x[:, :4] + c, x[:, 4]
i = tf.image.non_max_suppression(boxes, scores, max_nms, iou_threshold=iou_thres)
#keep = nms(boxes, scores, iou_threshold=iou_thres)
i = i.numpy()
i = i[:max_det]
output[xi] = x[keep,:]
if (time.time() - t) > time_limit:
break
return output
#output_numpy = np.load(r"D:\object_face_person_detection\yolov8_tf_results\gustavo-alves-YOXSC4zRcxw-unsplash.npy")
#detections = non_max_suppression(output_numpy, conf_thres=0.4, iou_thres=0.4)[0]
#print(detections)
|