File size: 9,677 Bytes
623aa4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# -*- coding: utf-8 -*-
"""

Created on Mon Sep  4 16:03:42 2023



@author: SABARI

"""
import time
import tensorflow as tf
import numpy as np
#from lsnms import nms, wbc

def box_iou(box1, box2, eps=1e-7):
    """

    Calculate intersection-over-union (IoU) of boxes.

    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.

    Args:

    box1 (tf.Tensor): A tensor of shape (N, 4) representing N bounding boxes.

    box2 (tf.Tensor): A tensor of shape (M, 4) representing M bounding boxes.

    eps (float, optional): A small value to avoid division by zero. Defaults to 1e-7.

    Returns:

    (tf.Tensor): An NxM tensor containing the pairwise IoU values for every element in box1 and box2.

    """
    
    a1, a2 = tf.split(box1, 2, axis=1)
    b1, b2 = tf.split(box2, 2, axis=1)
    
    inter = tf.reduce_prod(tf.maximum(tf.minimum(a2, b2) - tf.maximum(a1, b1), 0), axis=1)
    
    return inter / (tf.reduce_prod(a2 - a1, axis=1) + tf.reduce_prod(b2 - b1, axis=1) - inter + eps)


def xywh2xyxy(x):
    """

    Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the

    top-left corner and (x2, y2) is the bottom-right corner.

    Args:

    x (tf.Tensor): The input bounding box coordinates in (x, y, width, height) format.

    Returns:

    y (tf.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.

    """
    # Assuming x is a NumPy array
    y = np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2  # top left x
    y[..., 1] = x[..., 1] - x[..., 3] / 2  # top left y
    y[..., 2] = x[..., 0] + x[..., 2] / 2  # bottom right x
    y[..., 3] = x[..., 1] + x[..., 3] / 2  # bottom right y
    return y


def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, agnostic=False,

                        multi_label=False, max_det=300, nc=0,  # number of classes (optional)

                        max_time_img=0.05,

                        max_nms=100,

                        max_wh=7680):
    """

    Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.

    Arguments:

    prediction (tf.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)

    containing the predicted boxes, classes, and masks. The tensor should be in the format

    output by a model, such as YOLO.

    conf_thres (float): The confidence threshold below which boxes will be filtered out.

    Valid values are between 0.0 and 1.0.

    iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS.

    Valid values are between 0.0 and 1.0.

    agnostic (bool): If True, the model is agnostic to the number of classes, and all

    classes will be considered as one.

    multi_label (bool): If True, each box may have multiple labels.

    max_det (int): The maximum number of boxes to keep after NMS.

    nc (int): (optional) The number of classes output by the model. Any indices after this will be considered masks.

    max_time_img (float): The maximum time (seconds) for processing one image.

    max_nms (int): The maximum number of boxes into tf.image.combined_non_max_suppression().

    max_wh (int): The maximum box width and height in pixels

    Returns:

    (List[tf.Tensor]): A list of length batch_size, where each element is a tensor of

    shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns

    (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).

    """

    # Checks
    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
    if isinstance(prediction, (list, tuple)):  # YOLOv8 model in validation model, output = (inference_out, loss_out)
        prediction = prediction[0]  # select only inference output

    bs = np.shape(prediction)[0]  # batch size
    nc = nc or (np.shape(prediction)[1] - 4)  # number of classes
    nm = np.shape(prediction)[1] - nc - 4
    mi = 4 + nc  # mask start index
    #xc = tf.math.reduce_any(prediction[:, 4:mi] > conf_thres, axis=1)  # candidates
    xc = np.amax(prediction[:, 4:mi], axis=1) > conf_thres

    # Settings
    # min_wh = 2  # (pixels) minimum box width and height
    time_limit = 0.5 + max_time_img * tf.cast(bs, tf.float32)  # seconds to quit after
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)

    t = time.time()
    output = [np.zeros((0, 6 + nm))] * bs
    for xi, x in enumerate(prediction):  # image index, image inference
        # Apply constraints
        # x = tf.where(tf.math.logical_or(x[:, 2:4] < min_wh, x[:, 2:4] > max_wh), tf.constant(0, dtype=tf.float32), x)  # width-height
        #x = tf.boolean_mask(x, xc[xi])
        #x = x.transpose(0, -1)[xc[xi]]  # confidence
        # Assuming x, xc, and xi are NumPy arrays
        x = np.transpose(x)

        #x = x.transpose()[:, xc[xi]]
        x = x[xc[xi]]

        # If none remain process next image
        if np.shape(x)[0] == 0:
            continue

        # Detections matrix nx6 (xyxy, conf, cls)
        #box, cls, mask = tf.split(x, [4, nc, nm], axis=1)
        
        # Assuming x is a NumPy array
        box = x[:, :4]
        cls = x[:, 4:4 + nc]
        mask = x[:, 4 + nc:]
        box = xywh2xyxy(box)  # center_x, center_y, width, height) to (x1, y1, x2, y2)

        # Assuming cls is a NumPy array
        if multi_label:
            i, j = np.where(cls > conf_thres)
            x = np.concatenate([box[i], np.expand_dims(cls[i, j], axis=-1), np.expand_dims(j, axis=-1).astype(np.float32), mask[i]], axis=1)
        else:
            conf = np.max(cls, axis=1)
            j = np.argmax(cls, axis=1)
            keep = np.where(conf > conf_thres)[0]
            x = np.concatenate([box[keep], np.expand_dims(conf[keep], axis=-1), np.expand_dims(j[keep], axis=-1).astype(np.float32), mask[keep]], axis=1)

        # Check shape
        n = np.shape(x)[0]  # number of boxes
        if n == 0:  # no boxes
            continue
        #x = x[tf.argsort(x[:, 4], direction='DESCENDING')[:max_nms]]  # sort by confidence and remove excess boxes
        sorted_indices = np.argsort(x[:, 4])[::-1]  # Sort indices in descending order of confidence
        x = x[sorted_indices[:max_nms]]  # Keep the top max_nms boxes
        
        # Batched NMS
        c = x[:, 5:6] * (0.0 if agnostic else tf.cast(max_wh, tf.float32))  # classes
        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
        i = tf.image.non_max_suppression(boxes, scores, max_nms, iou_threshold=iou_thres)  # NMS
        i = i.numpy()
        i = i[:max_det]  # limit detections

        output[xi] =  x[i,:]
        
        if (time.time() - t) > time_limit:
            break  # time limit exceeded

    return output

import numpy as np

def optimized_object_detection(prediction, conf_thres=0.25, iou_thres=0.45, agnostic=False,

                                multi_label=False, max_det=300, nc=0, max_time_img=0.05,

                                max_nms=100, max_wh=7680):
    
    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
    
    if isinstance(prediction, (list, tuple)):
        prediction = prediction[0]
    
    bs, _, _ = prediction.shape  # Get batch size and dimensions
    
    if nc == 0:
        nc = prediction.shape[1] - 4
    
    nm = prediction.shape[1] - nc - 4
    mi = 4 + nc
    
    xc = np.amax(prediction[:, 4:mi], axis=1) > conf_thres
    
    time_limit = 0.5 + max_time_img * bs
    
    multi_label &= nc > 1
    
    t = time.time()
    output = [np.zeros((0, 6 + nm))] * bs
    
    for xi, x in enumerate(prediction):
        x = np.transpose(x)
        x = x[xc[xi]]
        
        if np.shape(x)[0] == 0:
            continue

        box = x[:, :4]
        cls = x[:, 4:4 + nc]
        mask = x[:, 4 + nc:]
        box = xywh2xyxy(box)

        if multi_label:
            i, j = np.where(cls > conf_thres)
            x = np.concatenate([box[i], np.expand_dims(cls[i, j], axis=-1), np.expand_dims(j, axis=-1).astype(np.float32), mask[i]], axis=1)
        else:
            conf = np.max(cls, axis=1)
            j = np.argmax(cls, axis=1)
            keep = np.where(conf > conf_thres)[0]
            x = np.concatenate([box[keep], np.expand_dims(conf[keep], axis=-1), np.expand_dims(j[keep], axis=-1).astype(np.float32), mask[keep]], axis=1)

        n = np.shape(x)[0]
        if n == 0:
            continue
        
        sorted_indices = np.argsort(x[:, 4])[::-1]
        x = x[sorted_indices[:max_nms]]
        
        c = x[:, 5:6] * (0.0 if agnostic else max_wh)
        boxes, scores = x[:, :4] + c, x[:, 4]
        i = tf.image.non_max_suppression(boxes, scores, max_nms, iou_threshold=iou_thres)
        
        #keep = nms(boxes, scores, iou_threshold=iou_thres)

        i = i.numpy()
        i = i[:max_det]
        
        output[xi] = x[keep,:]
        
        if (time.time() - t) > time_limit:
            break
    
    return output


#output_numpy = np.load(r"D:\object_face_person_detection\yolov8_tf_results\gustavo-alves-YOXSC4zRcxw-unsplash.npy")

#detections = non_max_suppression(output_numpy, conf_thres=0.4, iou_thres=0.4)[0]

#print(detections)