import cv2 import numpy as np import torch from torch import nn from maskrcnn_benchmark.structures.bounding_box import BoxList from maskrcnn_benchmark.structures.keypoint import PersonKeypoints class KeypointPostProcessor(nn.Module): def __init__(self, keypointer=None): super(KeypointPostProcessor, self).__init__() self.keypointer = keypointer def forward(self, x, boxes): mask_prob = x scores = None if self.keypointer: mask_prob, scores = self.keypointer(x, boxes) assert len(boxes) == 1, "Only non-batched inference supported for now" boxes_per_image = [box.bbox.size(0) for box in boxes] mask_prob = mask_prob.split(boxes_per_image, dim=0) scores = scores.split(boxes_per_image, dim=0) results = [] for prob, box, score in zip(mask_prob, boxes, scores): bbox = BoxList(box.bbox, box.size, mode="xyxy") for field in box.fields(): bbox.add_field(field, box.get_field(field)) prob = PersonKeypoints(prob, box.size) prob.add_field("logits", score) bbox.add_field("keypoints", prob) results.append(bbox) return results def heatmaps_to_keypoints(maps, rois): """Extract predicted keypoint locations from heatmaps. Output has shape (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob) for each keypoint. """ # This function converts a discrete image coordinate in a HEATMAP_SIZE x # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain # consistency with keypoints_to_heatmap_labels by using the conversion from # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a # continuous coordinate. offset_x = rois[:, 0] offset_y = rois[:, 1] widths = rois[:, 2] - rois[:, 0] heights = rois[:, 3] - rois[:, 1] widths = np.maximum(widths, 1) heights = np.maximum(heights, 1) widths_ceil = np.ceil(widths) heights_ceil = np.ceil(heights) # NCHW to NHWC for use with OpenCV maps = np.transpose(maps, [0, 2, 3, 1]) min_size = 0 # cfg.KRCNN.INFERENCE_MIN_SIZE num_keypoints = maps.shape[3] xy_preds = np.zeros((len(rois), 3, num_keypoints), dtype=np.float32) end_scores = np.zeros((len(rois), num_keypoints), dtype=np.float32) for i in range(len(rois)): if min_size > 0: roi_map_width = int(np.maximum(widths_ceil[i], min_size)) roi_map_height = int(np.maximum(heights_ceil[i], min_size)) else: roi_map_width = widths_ceil[i] roi_map_height = heights_ceil[i] width_correction = widths[i] / roi_map_width height_correction = heights[i] / roi_map_height roi_map = cv2.resize( maps[i], (roi_map_width, roi_map_height), interpolation=cv2.INTER_CUBIC ) # Bring back to CHW roi_map = np.transpose(roi_map, [2, 0, 1]) # roi_map_probs = scores_to_probs(roi_map.copy()) w = roi_map.shape[2] pos = roi_map.reshape(num_keypoints, -1).argmax(axis=1) x_int = pos % w y_int = (pos - x_int) // w # assert (roi_map_probs[k, y_int, x_int] == # roi_map_probs[k, :, :].max()) x = (x_int + 0.5) * width_correction y = (y_int + 0.5) * height_correction xy_preds[i, 0, :] = x + offset_x[i] xy_preds[i, 1, :] = y + offset_y[i] xy_preds[i, 2, :] = 1 end_scores[i, :] = roi_map[np.arange(num_keypoints), y_int, x_int] return np.transpose(xy_preds, [0, 2, 1]), end_scores class Keypointer(object): """ Projects a set of masks in an image on the locations specified by the bounding boxes """ def __init__(self, padding=0): self.padding = padding def __call__(self, masks, boxes): # TODO do this properly if isinstance(boxes, BoxList): boxes = [boxes] assert len(boxes) == 1 result, scores = heatmaps_to_keypoints( masks.detach().cpu().numpy(), boxes[0].bbox.cpu().numpy() ) return torch.from_numpy(result).to(masks.device), torch.as_tensor(scores, device=masks.device) def make_roi_keypoint_post_processor(cfg): keypointer = Keypointer() keypoint_post_processor = KeypointPostProcessor(keypointer) return keypoint_post_processor