PSHuman

Running

App Files Files Community

PSHuman / lib /pymafx /utils /segms.py

fffiloni

Migrated from GitHub

2252f3d verified about 1 month ago

raw

history blame

9.77 kB

	# Copyright (c) 2017-present, Facebook, Inc.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	##############################################################################
	"""Functions for interacting with segmentation masks in the COCO format.

	The following terms are used in this module
	mask: a binary mask encoded as a 2D numpy array
	segm: a segmentation mask in one of the two COCO formats (polygon or RLE)
	polygon: COCO's polygon format
	RLE: COCO's run length encoding format
	"""

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function
	from __future__ import unicode_literals

	import numpy as np

	import pycocotools.mask as mask_util


	def GetDensePoseMask(Polys):
	MaskGen = np.zeros([256, 256])
	for i in range(1, 15):
	if (Polys[i - 1]):
	current_mask = mask_util.decode(Polys[i - 1])
	MaskGen[current_mask > 0] = i
	return MaskGen


	def flip_segms(segms, height, width):
	"""Left/right flip each mask in a list of masks."""
	def _flip_poly(poly, width):
	flipped_poly = np.array(poly)
	flipped_poly[0::2] = width - np.array(poly[0::2]) - 1
	return flipped_poly.tolist()

	def _flip_rle(rle, height, width):
	if 'counts' in rle and type(rle['counts']) == list:
	# Magic RLE format handling painfully discovered by looking at the
	# COCO API showAnns function.
	rle = mask_util.frPyObjects([rle], height, width)
	mask = mask_util.decode(rle)
	mask = mask[:, ::-1, :]
	rle = mask_util.encode(np.array(mask, order='F', dtype=np.uint8))
	return rle

	flipped_segms = []
	for segm in segms:
	if type(segm) == list:
	# Polygon format
	flipped_segms.append([_flip_poly(poly, width) for poly in segm])
	else:
	# RLE format
	assert type(segm) == dict
	flipped_segms.append(_flip_rle(segm, height, width))
	return flipped_segms


	def polys_to_mask(polygons, height, width):
	"""Convert from the COCO polygon segmentation format to a binary mask
	encoded as a 2D array of data type numpy.float32. The polygon segmentation
	is understood to be enclosed inside a height x width image. The resulting
	mask is therefore of shape (height, width).
	"""
	rle = mask_util.frPyObjects(polygons, height, width)
	mask = np.array(mask_util.decode(rle), dtype=np.float32)
	# Flatten in case polygons was a list
	mask = np.sum(mask, axis=2)
	mask = np.array(mask > 0, dtype=np.float32)
	return mask


	def mask_to_bbox(mask):
	"""Compute the tight bounding box of a binary mask."""
	xs = np.where(np.sum(mask, axis=0) > 0)[0]
	ys = np.where(np.sum(mask, axis=1) > 0)[0]

	if len(xs) == 0 or len(ys) == 0:
	return None

	x0 = xs[0]
	x1 = xs[-1]
	y0 = ys[0]
	y1 = ys[-1]
	return np.array((x0, y0, x1, y1), dtype=np.float32)


	def polys_to_mask_wrt_box(polygons, box, M):
	"""Convert from the COCO polygon segmentation format to a binary mask
	encoded as a 2D array of data type numpy.float32. The polygon segmentation
	is understood to be enclosed in the given box and rasterized to an M x M
	mask. The resulting mask is therefore of shape (M, M).
	"""
	w = box[2] - box[0]
	h = box[3] - box[1]

	w = np.maximum(w, 1)
	h = np.maximum(h, 1)

	polygons_norm = []
	for poly in polygons:
	p = np.array(poly, dtype=np.float32)
	p[0::2] = (p[0::2] - box[0]) * M / w
	p[1::2] = (p[1::2] - box[1]) * M / h
	polygons_norm.append(p)

	rle = mask_util.frPyObjects(polygons_norm, M, M)
	mask = np.array(mask_util.decode(rle), dtype=np.float32)
	# Flatten in case polygons was a list
	mask = np.sum(mask, axis=2)
	mask = np.array(mask > 0, dtype=np.float32)
	return mask


	def polys_to_boxes(polys):
	"""Convert a list of polygons into an array of tight bounding boxes."""
	boxes_from_polys = np.zeros((len(polys), 4), dtype=np.float32)
	for i in range(len(polys)):
	poly = polys[i]
	x0 = min(min(p[::2]) for p in poly)
	x1 = max(max(p[::2]) for p in poly)
	y0 = min(min(p[1::2]) for p in poly)
	y1 = max(max(p[1::2]) for p in poly)
	boxes_from_polys[i, :] = [x0, y0, x1, y1]

	return boxes_from_polys


	def rle_mask_voting(top_masks, all_masks, all_dets, iou_thresh, binarize_thresh, method='AVG'):
	"""Returns new masks (in correspondence with `top_masks`) by combining
	multiple overlapping masks coming from the pool of `all_masks`. Two methods
	for combining masks are supported: 'AVG' uses a weighted average of
	overlapping mask pixels; 'UNION' takes the union of all mask pixels.
	"""
	if len(top_masks) == 0:
	return

	all_not_crowd = [False] * len(all_masks)
	top_to_all_overlaps = mask_util.iou(top_masks, all_masks, all_not_crowd)
	decoded_all_masks = [np.array(mask_util.decode(rle), dtype=np.float32) for rle in all_masks]
	decoded_top_masks = [np.array(mask_util.decode(rle), dtype=np.float32) for rle in top_masks]
	all_boxes = all_dets[:, :4].astype(np.int32)
	all_scores = all_dets[:, 4]

	# Fill box support with weights
	mask_shape = decoded_all_masks[0].shape
	mask_weights = np.zeros((len(all_masks), mask_shape[0], mask_shape[1]))
	for k in range(len(all_masks)):
	ref_box = all_boxes[k]
	x_0 = max(ref_box[0], 0)
	x_1 = min(ref_box[2] + 1, mask_shape[1])
	y_0 = max(ref_box[1], 0)
	y_1 = min(ref_box[3] + 1, mask_shape[0])
	mask_weights[k, y_0:y_1, x_0:x_1] = all_scores[k]
	mask_weights = np.maximum(mask_weights, 1e-5)

	top_segms_out = []
	for k in range(len(top_masks)):
	# Corner case of empty mask
	if decoded_top_masks[k].sum() == 0:
	top_segms_out.append(top_masks[k])
	continue

	inds_to_vote = np.where(top_to_all_overlaps[k] >= iou_thresh)[0]
	# Only matches itself
	if len(inds_to_vote) == 1:
	top_segms_out.append(top_masks[k])
	continue

	masks_to_vote = [decoded_all_masks[i] for i in inds_to_vote]
	if method == 'AVG':
	ws = mask_weights[inds_to_vote]
	soft_mask = np.average(masks_to_vote, axis=0, weights=ws)
	mask = np.array(soft_mask > binarize_thresh, dtype=np.uint8)
	elif method == 'UNION':
	# Any pixel that's on joins the mask
	soft_mask = np.sum(masks_to_vote, axis=0)
	mask = np.array(soft_mask > 1e-5, dtype=np.uint8)
	else:
	raise NotImplementedError('Method {} is unknown'.format(method))
	rle = mask_util.encode(np.array(mask[:, :, np.newaxis], order='F'))[0]
	top_segms_out.append(rle)

	return top_segms_out


	def rle_mask_nms(masks, dets, thresh, mode='IOU'):
	"""Performs greedy non-maximum suppression based on an overlap measurement
	between masks. The type of measurement is determined by `mode` and can be
	either 'IOU' (standard intersection over union) or 'IOMA' (intersection over
	mininum area).
	"""
	if len(masks) == 0:
	return []
	if len(masks) == 1:
	return [0]

	if mode == 'IOU':
	# Computes ious[m1, m2] = area(intersect(m1, m2)) / area(union(m1, m2))
	all_not_crowds = [False] * len(masks)
	ious = mask_util.iou(masks, masks, all_not_crowds)
	elif mode == 'IOMA':
	# Computes ious[m1, m2] = area(intersect(m1, m2)) / min(area(m1), area(m2))
	all_crowds = [True] * len(masks)
	# ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
	ious = mask_util.iou(masks, masks, all_crowds)
	# ... = max(area(intersect(m1, m2)) / area(m2),
	# area(intersect(m2, m1)) / area(m1))
	ious = np.maximum(ious, ious.transpose())
	elif mode == 'CONTAINMENT':
	# Computes ious[m1, m2] = area(intersect(m1, m2)) / area(m2)
	# Which measures how much m2 is contained inside m1
	all_crowds = [True] * len(masks)
	ious = mask_util.iou(masks, masks, all_crowds)
	else:
	raise NotImplementedError('Mode {} is unknown'.format(mode))

	scores = dets[:, 4]
	order = np.argsort(-scores)

	keep = []
	while order.size > 0:
	i = order[0]
	keep.append(i)
	ovr = ious[i, order[1:]]
	inds_to_keep = np.where(ovr <= thresh)[0]
	order = order[inds_to_keep + 1]

	return keep


	def rle_masks_to_boxes(masks):
	"""Computes the bounding box of each mask in a list of RLE encoded masks."""
	if len(masks) == 0:
	return []

	decoded_masks = [np.array(mask_util.decode(rle), dtype=np.float32) for rle in masks]

	def get_bounds(flat_mask):
	inds = np.where(flat_mask > 0)[0]
	return inds.min(), inds.max()

	boxes = np.zeros((len(decoded_masks), 4))
	keep = [True] * len(decoded_masks)
	for i, mask in enumerate(decoded_masks):
	if mask.sum() == 0:
	keep[i] = False
	continue
	flat_mask = mask.sum(axis=0)
	x0, x1 = get_bounds(flat_mask)
	flat_mask = mask.sum(axis=1)
	y0, y1 = get_bounds(flat_mask)
	boxes[i, :] = (x0, y0, x1, y1)

	return boxes, np.where(keep)[0]