Spaces:
Running
Running
File size: 3,712 Bytes
90b4364 ebc32f0 90b4364 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import math
from typing import List
import numpy as np
from PIL import Image
def _yolo_xywh2xyxy(x: np.ndarray) -> np.ndarray:
"""
Copied from yolov8.
Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
top-left corner and (x2, y2) is the bottom-right corner.
Args:
x (np.ndarray) or (torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
Returns:
y (np.ndarray) or (torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
"""
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
return y
def _yolo_nms(boxes, scores, thresh: float = 0.7) -> List[int]:
"""
dets: ndarray, (num_boxes, 5)
每一行表示一个bounding box:[xmin, ymin, xmax, ymax, score]
其中xmin, ymin, xmax, ymax分别表示框的左上角和右下角坐标,score表示框的分数
thresh: float
两个框的IoU阈值
"""
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
# 按照score降序排列
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
# 计算其他所有框与当前框的IoU
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
iou = inter / (areas[i] + areas[order[1:]] - inter)
# 保留IoU小于阈值的框
inds = np.where(iou <= thresh)[0]
order = order[inds + 1]
return keep
def _image_preprocess(image: Image.Image, max_infer_size: int = 1216, align: int = 32):
old_width, old_height = image.width, image.height
new_width, new_height = old_width, old_height
r = max_infer_size / max(new_width, new_height)
if r < 1:
new_width, new_height = new_width * r, new_height * r
new_width = int(math.ceil(new_width / align) * align)
new_height = int(math.ceil(new_height / align) * align)
image = image.resize((new_width, new_height))
return image, (old_width, old_height), (new_width, new_height)
def _xy_postprocess(x, y, old_size, new_size):
old_width, old_height = old_size
new_width, new_height = new_size
x, y = x / new_width * old_width, y / new_height * old_height
x = int(np.clip(x, a_min=0, a_max=old_width).round())
y = int(np.clip(y, a_min=0, a_max=old_height).round())
return x, y
def _data_simple_postprocess(output, conf_threshold, iou_threshold, old_size, new_size):
output = output[:, output[-1, :] > conf_threshold]
boxes = output[:4, :].transpose(1, 0)
scores = output[4, :]
records = sorted(zip(boxes, scores), key=lambda x: -x[1])
if not records:
return []
boxes = _yolo_xywh2xyxy(np.stack([bx for bx, _ in records]))
scores = np.stack([score for _, score in records])
idx = _yolo_nms(boxes, scores, thresh=iou_threshold)
boxes, scores = boxes[idx], scores[idx]
detections = []
for box, score in zip(boxes, scores):
x0, y0 = _xy_postprocess(box[0], box[1], old_size, new_size)
x1, y1 = _xy_postprocess(box[2], box[3], old_size, new_size)
detections.append(((x0, y0, x1, y1), float(score)))
return detections
|