Spaces:
Runtime error
Runtime error
import torch | |
import numpy as np | |
from maskrcnn_benchmark.config import cfg | |
from maskrcnn_benchmark.data import transforms as T | |
from maskrcnn_benchmark.structures.image_list import to_image_list | |
from maskrcnn_benchmark.structures.bounding_box import BoxList | |
from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist | |
from maskrcnn_benchmark.layers import nms, soft_nms | |
def im_detect_bbox_aug(model, images, device, captions=None, positive_map_label_to_token=None): | |
# Collect detections computed under different transformations | |
boxlists_ts = [] | |
for _ in range(len(images)): | |
boxlists_ts.append([]) | |
def add_preds_t(boxlists_t): | |
for i, boxlist_t in enumerate(boxlists_t): | |
# Resize the boxlist as the first one | |
boxlists_ts[i].append(boxlist_t.resize(images[i].size)) | |
# Compute detections at different scales | |
if len(cfg.TEST.RANGES)==len(cfg.TEST.SCALES): | |
keep_ranges = cfg.TEST.RANGES | |
else: | |
keep_ranges = [None for _ in cfg.TEST.SCALES] | |
for scale, keep_range in zip(cfg.TEST.SCALES, keep_ranges): | |
max_size = cfg.TEST.MAX_SIZE | |
boxlists_scl = im_detect_bbox_scale( | |
model, images, scale, max_size, device, | |
captions=captions, | |
positive_map_label_to_token=positive_map_label_to_token, | |
) | |
if keep_range is not None: | |
boxlists_scl = remove_boxes(boxlists_scl, *keep_range) | |
add_preds_t(boxlists_scl) | |
if cfg.TEST.FLIP: | |
boxlists_scl_hf = im_detect_bbox_scale( | |
model, images, scale, max_size, device, | |
captions=captions, | |
positive_map_label_to_token=positive_map_label_to_token, | |
hflip=True | |
) | |
if keep_range is not None: | |
boxlists_scl_hf = remove_boxes(boxlists_scl_hf, *keep_range) | |
add_preds_t(boxlists_scl_hf) | |
# Merge boxlists detected by different bbox aug params | |
boxlists = [] | |
for i, boxlist_ts in enumerate(boxlists_ts): | |
bbox = torch.cat([boxlist_t.bbox for boxlist_t in boxlist_ts]) | |
scores = torch.cat([boxlist_t.get_field('scores') for boxlist_t in boxlist_ts]) | |
labels = torch.cat([boxlist_t.get_field('labels') for boxlist_t in boxlist_ts]) | |
boxlist = BoxList(bbox, boxlist_ts[0].size, boxlist_ts[0].mode) | |
boxlist.add_field('scores', scores) | |
boxlist.add_field('labels', labels) | |
boxlists.append(boxlist) | |
results = merge_result_from_multi_scales(boxlists) | |
return results | |
def im_detect_bbox(model, images, target_scale, target_max_size, device, | |
captions=None, | |
positive_map_label_to_token=None | |
): | |
""" | |
Performs bbox detection on the original image. | |
""" | |
if cfg.INPUT.FORMAT is not '': | |
input_format = cfg.INPUT.FORMAT | |
elif cfg.INPUT.TO_BGR255: | |
input_format = 'bgr255' | |
transform = T.Compose([ | |
T.Resize(target_scale, target_max_size), | |
T.ToTensor(), | |
T.Normalize( | |
mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, format=input_format | |
) | |
]) | |
images = [transform(image) for image in images] | |
images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) | |
if captions is None: | |
return model(images.to(device)) | |
else: | |
return model(images.to(device), | |
captions=captions, | |
positive_map=positive_map_label_to_token | |
) | |
def im_detect_bbox_hflip(model, images, target_scale, target_max_size, device, | |
captions=None, | |
positive_map_label_to_token=None | |
): | |
""" | |
Performs bbox detection on the horizontally flipped image. | |
Function signature is the same as for im_detect_bbox. | |
""" | |
if cfg.INPUT.FORMAT is not '': | |
input_format = cfg.INPUT.FORMAT | |
elif cfg.INPUT.TO_BGR255: | |
input_format = 'bgr255' | |
transform = T.Compose([ | |
T.Resize(target_scale, target_max_size), | |
T.RandomHorizontalFlip(1.0), | |
T.ToTensor(), | |
T.Normalize( | |
mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, format=input_format | |
) | |
]) | |
images = [transform(image) for image in images] | |
images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) | |
if captions is None: | |
boxlists = model(images.to(device)) | |
else: | |
boxlists = model(images.to(device), | |
captions=captions, | |
positive_map=positive_map_label_to_token | |
) | |
# Invert the detections computed on the flipped image | |
boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists] | |
return boxlists_inv | |
def im_detect_bbox_scale(model, images, target_scale, target_max_size, device, | |
captions=None, | |
positive_map_label_to_token=None, | |
hflip=False): | |
""" | |
Computes bbox detections at the given scale. | |
Returns predictions in the scaled image space. | |
""" | |
if hflip: | |
boxlists_scl = im_detect_bbox_hflip(model, images, target_scale, target_max_size, device, | |
captions=captions, | |
positive_map_label_to_token=positive_map_label_to_token | |
) | |
else: | |
boxlists_scl = im_detect_bbox(model, images, target_scale, target_max_size, device, | |
captions=captions, | |
positive_map_label_to_token=positive_map_label_to_token | |
) | |
return boxlists_scl | |
def remove_boxes(boxlist_ts, min_scale, max_scale): | |
new_boxlist_ts = [] | |
for _, boxlist_t in enumerate(boxlist_ts): | |
mode = boxlist_t.mode | |
boxlist_t = boxlist_t.convert("xyxy") | |
boxes = boxlist_t.bbox | |
keep = [] | |
for j, box in enumerate(boxes): | |
w = box[2] - box[0] + 1 | |
h = box[3] - box[1] + 1 | |
if (w * h > min_scale * min_scale) and (w * h < max_scale * max_scale): | |
keep.append(j) | |
new_boxlist_ts.append(boxlist_t[keep].convert(mode)) | |
return new_boxlist_ts | |
def merge_result_from_multi_scales(boxlists): | |
num_images = len(boxlists) | |
results = [] | |
for i in range(num_images): | |
scores = boxlists[i].get_field("scores") | |
labels = boxlists[i].get_field("labels") | |
boxes = boxlists[i].bbox | |
boxlist = boxlists[i] | |
result = [] | |
# test on classes | |
if len(cfg.TEST.SELECT_CLASSES): | |
class_list = cfg.TEST.SELECT_CLASSES | |
else: | |
class_list = range(1, cfg.TEST.NUM_CLASSES) | |
for j in class_list: | |
inds = (labels == j).nonzero().view(-1) | |
scores_j = scores[inds] | |
boxes_j = boxes[inds, :].view(-1, 4) | |
boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") | |
boxlist_for_class.add_field("scores", scores_j) | |
boxlist_for_class = boxlist_nms(boxlist_for_class, cfg.TEST.TH, score_field="scores", nms_type=cfg.TEST.SPECIAL_NMS) | |
num_labels = len(boxlist_for_class) | |
boxlist_for_class.add_field("labels", torch.full((num_labels,), j, dtype=torch.int64, device=scores.device)) | |
result.append(boxlist_for_class) | |
result = cat_boxlist(result) | |
number_of_detections = len(result) | |
# Limit to max_per_image detections **over all classes** | |
if number_of_detections > cfg.TEST.PRE_NMS_TOP_N > 0: | |
cls_scores = result.get_field("scores") | |
image_thresh, _ = torch.kthvalue( | |
cls_scores.cpu(), | |
number_of_detections - cfg.TEST.PRE_NMS_TOP_N + 1 | |
) | |
keep = cls_scores >= image_thresh.item() | |
keep = torch.nonzero(keep).squeeze(1) | |
result = result[keep] | |
results.append(result) | |
return results | |
def boxlist_nms(boxlist, thresh, max_proposals=-1, score_field="scores", nms_type='nms'): | |
if thresh <= 0: | |
return boxlist | |
mode = boxlist.mode | |
boxlist = boxlist.convert("xyxy") | |
boxes = boxlist.bbox | |
score = boxlist.get_field(score_field) | |
if nms_type == 'vote': | |
boxes_vote, scores_vote = bbox_vote(boxes, score, thresh) | |
if len(boxes_vote) > 0: | |
boxlist.bbox = boxes_vote | |
boxlist.extra_fields['scores'] = scores_vote | |
elif nms_type == 'soft-vote': | |
boxes_vote, scores_vote = soft_bbox_vote(boxes, score, thresh) | |
if len(boxes_vote) > 0: | |
boxlist.bbox = boxes_vote | |
boxlist.extra_fields['scores'] = scores_vote | |
elif nms_type == 'soft-nms': | |
keep, new_score = soft_nms(boxes.cpu(), score.cpu(), thresh, 0.95) | |
if max_proposals > 0: | |
keep = keep[: max_proposals] | |
boxlist = boxlist[keep] | |
boxlist.extra_fields['scores'] = new_score | |
else: | |
keep = nms(boxes, score, thresh) | |
if max_proposals > 0: | |
keep = keep[: max_proposals] | |
boxlist = boxlist[keep] | |
return boxlist.convert(mode) | |
def bbox_vote(boxes, scores, vote_thresh): | |
boxes = boxes.cpu().numpy() | |
scores = scores.cpu().numpy().reshape(-1, 1) | |
det = np.concatenate((boxes, scores), axis=1) | |
if det.shape[0] <= 1: | |
return np.zeros((0, 5)), np.zeros((0, 1)) | |
order = det[:, 4].ravel().argsort()[::-1] | |
det = det[order, :] | |
dets = [] | |
while det.shape[0] > 0: | |
# IOU | |
area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) | |
xx1 = np.maximum(det[0, 0], det[:, 0]) | |
yy1 = np.maximum(det[0, 1], det[:, 1]) | |
xx2 = np.minimum(det[0, 2], det[:, 2]) | |
yy2 = np.minimum(det[0, 3], det[:, 3]) | |
w = np.maximum(0.0, xx2 - xx1 + 1) | |
h = np.maximum(0.0, yy2 - yy1 + 1) | |
inter = w * h | |
o = inter / (area[0] + area[:] - inter) | |
# get needed merge det and delete these det | |
merge_index = np.where(o >= vote_thresh)[0] | |
det_accu = det[merge_index, :] | |
det = np.delete(det, merge_index, 0) | |
if merge_index.shape[0] <= 1: | |
try: | |
dets = np.row_stack((dets, det_accu)) | |
except: | |
dets = det_accu | |
continue | |
else: | |
det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) | |
max_score = np.max(det_accu[:, 4]) | |
det_accu_sum = np.zeros((1, 5)) | |
det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) | |
det_accu_sum[:, 4] = max_score | |
try: | |
dets = np.row_stack((dets, det_accu_sum)) | |
except: | |
dets = det_accu_sum | |
boxes = torch.from_numpy(dets[:, :4]).float().cuda() | |
scores = torch.from_numpy(dets[:, 4]).float().cuda() | |
return boxes, scores | |
def soft_bbox_vote(boxes, scores, vote_thresh): | |
boxes = boxes.cpu().numpy() | |
scores = scores.cpu().numpy().reshape(-1, 1) | |
det = np.concatenate((boxes, scores), axis=1) | |
if det.shape[0] <= 1: | |
return np.zeros((0, 5)), np.zeros((0, 1)) | |
order = det[:, 4].ravel().argsort()[::-1] | |
det = det[order, :] | |
dets = [] | |
while det.shape[0] > 0: | |
# IOU | |
area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1) | |
xx1 = np.maximum(det[0, 0], det[:, 0]) | |
yy1 = np.maximum(det[0, 1], det[:, 1]) | |
xx2 = np.minimum(det[0, 2], det[:, 2]) | |
yy2 = np.minimum(det[0, 3], det[:, 3]) | |
w = np.maximum(0.0, xx2 - xx1 + 1) | |
h = np.maximum(0.0, yy2 - yy1 + 1) | |
inter = w * h | |
o = inter / (area[0] + area[:] - inter) | |
# get needed merge det and delete these det | |
merge_index = np.where(o >= vote_thresh)[0] | |
det_accu = det[merge_index, :] | |
det_accu_iou = o[merge_index] | |
det = np.delete(det, merge_index, 0) | |
if merge_index.shape[0] <= 1: | |
try: | |
dets = np.row_stack((dets, det_accu)) | |
except: | |
dets = det_accu | |
continue | |
else: | |
soft_det_accu = det_accu.copy() | |
soft_det_accu[:, 4] = soft_det_accu[:, 4] * (1 - det_accu_iou) | |
soft_index = np.where(soft_det_accu[:, 4] >= cfg.MODEL.RETINANET.INFERENCE_TH)[0] | |
soft_det_accu = soft_det_accu[soft_index, :] | |
det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4)) | |
max_score = np.max(det_accu[:, 4]) | |
det_accu_sum = np.zeros((1, 5)) | |
det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4], axis=0) / np.sum(det_accu[:, -1:]) | |
det_accu_sum[:, 4] = max_score | |
if soft_det_accu.shape[0] > 0: | |
det_accu_sum = np.row_stack((det_accu_sum, soft_det_accu)) | |
try: | |
dets = np.row_stack((dets, det_accu_sum)) | |
except: | |
dets = det_accu_sum | |
order = dets[:, 4].ravel().argsort()[::-1] | |
dets = dets[order, :] | |
boxes = torch.from_numpy(dets[:, :4]).float().cuda() | |
scores = torch.from_numpy(dets[:, 4]).float().cuda() | |
return boxes, scores |