zuv0
/

SDXLF17

Transformers

Model card Files Files and versions Community

SDXLF17 / extensions-builtin /forge_legacy_preprocessors /annotator /densepose /densepose.py

Viperboy123

auto

8d7ec14 10 months ago

raw

history blame contribute delete

11.9 kB

	from typing import Tuple
	import math
	import numpy as np
	from enum import IntEnum
	from typing import List, Tuple, Union
	import torch
	from torch.nn import functional as F
	import logging
	import cv2

	Image = np.ndarray
	Boxes = torch.Tensor
	ImageSizeType = Tuple[int, int]
	_RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray]
	IntTupleBox = Tuple[int, int, int, int]

	class BoxMode(IntEnum):
	"""
	Enum of different ways to represent a box.
	"""

	XYXY_ABS = 0
	"""
	(x0, y0, x1, y1) in absolute floating points coordinates.
	The coordinates in range [0, width or height].
	"""
	XYWH_ABS = 1
	"""
	(x0, y0, w, h) in absolute floating points coordinates.
	"""
	XYXY_REL = 2
	"""
	Not yet supported!
	(x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image.
	"""
	XYWH_REL = 3
	"""
	Not yet supported!
	(x0, y0, w, h) in range [0, 1]. They are relative to the size of the image.
	"""
	XYWHA_ABS = 4
	"""
	(xc, yc, w, h, a) in absolute floating points coordinates.
	(xc, yc) is the center of the rotated box, and the angle a is in degrees ccw.
	"""

	@staticmethod
	def convert(box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode") -> _RawBoxType:
	"""
	Args:
	box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5
	from_mode, to_mode (BoxMode)

	Returns:
	The converted box of the same type.
	"""
	if from_mode == to_mode:
	return box

	original_type = type(box)
	is_numpy = isinstance(box, np.ndarray)
	single_box = isinstance(box, (list, tuple))
	if single_box:
	assert len(box) == 4 or len(box) == 5, (
	"BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor,"
	" where k == 4 or 5"
	)
	arr = torch.tensor(box)[None, :]
	else:
	# avoid modifying the input box
	if is_numpy:
	arr = torch.from_numpy(np.asarray(box)).clone()
	else:
	arr = box.clone()

	assert to_mode not in [BoxMode.XYXY_REL, BoxMode.XYWH_REL] and from_mode not in [
	BoxMode.XYXY_REL,
	BoxMode.XYWH_REL,
	], "Relative mode not yet supported!"

	if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS:
	assert (
	arr.shape[-1] == 5
	), "The last dimension of input shape must be 5 for XYWHA format"
	original_dtype = arr.dtype
	arr = arr.double()

	w = arr[:, 2]
	h = arr[:, 3]
	a = arr[:, 4]
	c = torch.abs(torch.cos(a * math.pi / 180.0))
	s = torch.abs(torch.sin(a * math.pi / 180.0))
	# This basically computes the horizontal bounding rectangle of the rotated box
	new_w = c * w + s * h
	new_h = c * h + s * w

	# convert center to top-left corner
	arr[:, 0] -= new_w / 2.0
	arr[:, 1] -= new_h / 2.0
	# bottom-right corner
	arr[:, 2] = arr[:, 0] + new_w
	arr[:, 3] = arr[:, 1] + new_h

	arr = arr[:, :4].to(dtype=original_dtype)
	elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS:
	original_dtype = arr.dtype
	arr = arr.double()
	arr[:, 0] += arr[:, 2] / 2.0
	arr[:, 1] += arr[:, 3] / 2.0
	angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype)
	arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype)
	else:
	if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS:
	arr[:, 2] += arr[:, 0]
	arr[:, 3] += arr[:, 1]
	elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS:
	arr[:, 2] -= arr[:, 0]
	arr[:, 3] -= arr[:, 1]
	else:
	raise NotImplementedError(
	"Conversion from BoxMode {} to {} is not supported yet".format(
	from_mode, to_mode
	)
	)

	if single_box:
	return original_type(arr.flatten().tolist())
	if is_numpy:
	return arr.numpy()
	else:
	return arr

	class MatrixVisualizer:
	"""
	Base visualizer for matrix data
	"""

	def __init__(
	self,
	inplace=True,
	cmap=cv2.COLORMAP_PARULA,
	val_scale=1.0,
	alpha=0.7,
	interp_method_matrix=cv2.INTER_LINEAR,
	interp_method_mask=cv2.INTER_NEAREST,
	):
	self.inplace = inplace
	self.cmap = cmap
	self.val_scale = val_scale
	self.alpha = alpha
	self.interp_method_matrix = interp_method_matrix
	self.interp_method_mask = interp_method_mask

	def visualize(self, image_bgr, mask, matrix, bbox_xywh):
	self._check_image(image_bgr)
	self._check_mask_matrix(mask, matrix)
	if self.inplace:
	image_target_bgr = image_bgr
	else:
	image_target_bgr = image_bgr * 0
	x, y, w, h = [int(v) for v in bbox_xywh]
	if w <= 0 or h <= 0:
	return image_bgr
	mask, matrix = self._resize(mask, matrix, w, h)
	mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3])
	matrix_scaled = matrix.astype(np.float32) * self.val_scale
	_EPSILON = 1e-6
	if np.any(matrix_scaled > 255 + _EPSILON):
	logger = logging.getLogger(__name__)
	logger.warning(
	f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]"
	)
	matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8)
	matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap)
	matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg]
	image_target_bgr[y : y + h, x : x + w, :] = (
	image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha
	)
	return image_target_bgr.astype(np.uint8)

	def _resize(self, mask, matrix, w, h):
	if (w != mask.shape[1]) or (h != mask.shape[0]):
	mask = cv2.resize(mask, (w, h), self.interp_method_mask)
	if (w != matrix.shape[1]) or (h != matrix.shape[0]):
	matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix)
	return mask, matrix

	def _check_image(self, image_rgb):
	assert len(image_rgb.shape) == 3
	assert image_rgb.shape[2] == 3
	assert image_rgb.dtype == np.uint8

	def _check_mask_matrix(self, mask, matrix):
	assert len(matrix.shape) == 2
	assert len(mask.shape) == 2
	assert mask.dtype == np.uint8

	class DensePoseResultsVisualizer:
	def visualize(
	self,
	image_bgr: Image,
	results,
	) -> Image:
	context = self.create_visualization_context(image_bgr)
	for i, result in enumerate(results):
	boxes_xywh, labels, uv = result
	iuv_array = torch.cat(
	(labels[None].type(torch.float32), uv * 255.0)
	).type(torch.uint8)
	self.visualize_iuv_arr(context, iuv_array.cpu().numpy(), boxes_xywh)
	image_bgr = self.context_to_image_bgr(context)
	return image_bgr

	def create_visualization_context(self, image_bgr: Image):
	return image_bgr

	def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh) -> None:
	pass

	def context_to_image_bgr(self, context):
	return context

	def get_image_bgr_from_context(self, context):
	return context

	class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer):
	def __init__(
	self,
	data_extractor,
	segm_extractor,
	inplace=True,
	cmap=cv2.COLORMAP_PARULA,
	alpha=0.7,
	val_scale=1.0,
	**kwargs,
	):
	self.mask_visualizer = MatrixVisualizer(
	inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
	)
	self.data_extractor = data_extractor
	self.segm_extractor = segm_extractor

	def context_to_image_bgr(self, context):
	return context

	def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh) -> None:
	image_bgr = self.get_image_bgr_from_context(context)
	matrix = self.data_extractor(iuv_arr)
	segm = self.segm_extractor(iuv_arr)
	mask = np.zeros(matrix.shape, dtype=np.uint8)
	mask[segm > 0] = 1
	image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)


	def _extract_i_from_iuvarr(iuv_arr):
	return iuv_arr[0, :, :]


	def _extract_u_from_iuvarr(iuv_arr):
	return iuv_arr[1, :, :]


	def _extract_v_from_iuvarr(iuv_arr):
	return iuv_arr[2, :, :]

	def make_int_box(box: torch.Tensor) -> IntTupleBox:
	int_box = [0, 0, 0, 0]
	int_box[0], int_box[1], int_box[2], int_box[3] = tuple(box.long().tolist())
	return int_box[0], int_box[1], int_box[2], int_box[3]

	def densepose_chart_predictor_output_to_result_with_confidences(
	boxes: Boxes,
	coarse_segm,
	fine_segm,
	u, v

	):
	boxes_xyxy_abs = boxes.clone()
	boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
	box_xywh = make_int_box(boxes_xywh_abs[0])

	labels = resample_fine_and_coarse_segm_tensors_to_bbox(fine_segm, coarse_segm, box_xywh).squeeze(0)
	uv = resample_uv_tensors_to_bbox(u, v, labels, box_xywh)
	confidences = []
	return box_xywh, labels, uv

	def resample_fine_and_coarse_segm_tensors_to_bbox(
	fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox
	):
	"""
	Resample fine and coarse segmentation tensors to the given
	bounding box and derive labels for each pixel of the bounding box

	Args:
	fine_segm: float tensor of shape [1, C, Hout, Wout]
	coarse_segm: float tensor of shape [1, K, Hout, Wout]
	box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
	corner coordinates, width (W) and height (H)
	Return:
	Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
	"""
	x, y, w, h = box_xywh_abs
	w = max(int(w), 1)
	h = max(int(h), 1)
	# coarse segmentation
	coarse_segm_bbox = F.interpolate(
	coarse_segm,
	(h, w),
	mode="bilinear",
	align_corners=False,
	).argmax(dim=1)
	# combined coarse and fine segmentation
	labels = (
	F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
	* (coarse_segm_bbox > 0).long()
	)
	return labels

	def resample_uv_tensors_to_bbox(
	u: torch.Tensor,
	v: torch.Tensor,
	labels: torch.Tensor,
	box_xywh_abs: IntTupleBox,
	) -> torch.Tensor:
	"""
	Resamples U and V coordinate estimates for the given bounding box

	Args:
	u (tensor [1, C, H, W] of float): U coordinates
	v (tensor [1, C, H, W] of float): V coordinates
	labels (tensor [H, W] of long): labels obtained by resampling segmentation
	outputs for the given bounding box
	box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
	Return:
	Resampled U and V coordinates - a tensor [2, H, W] of float
	"""
	x, y, w, h = box_xywh_abs
	w = max(int(w), 1)
	h = max(int(h), 1)
	u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False)
	v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False)
	uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device)
	for part_id in range(1, u_bbox.size(1)):
	uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]
	uv[1][labels == part_id] = v_bbox[0, part_id][labels == part_id]
	return uv