PSHuman

Running

App Files Files Community

PSHuman / lib /pymafx /utils /common.py

fffiloni

Migrated from GitHub

2252f3d verified about 1 month ago

raw

history blame

28.7 kB

	import torch
	import numpy as np
	import logging
	from copy import deepcopy
	from .utils.libkdtree import KDTree

	logger_py = logging.getLogger(__name__)


	def compute_iou(occ1, occ2):
	''' Computes the Intersection over Union (IoU) value for two sets of
	occupancy values.
	Args:
	occ1 (tensor): first set of occupancy values
	occ2 (tensor): second set of occupancy values
	'''
	occ1 = np.asarray(occ1)
	occ2 = np.asarray(occ2)

	# Put all data in second dimension
	# Also works for 1-dimensional data
	if occ1.ndim >= 2:
	occ1 = occ1.reshape(occ1.shape[0], -1)
	if occ2.ndim >= 2:
	occ2 = occ2.reshape(occ2.shape[0], -1)

	# Convert to boolean values
	occ1 = (occ1 >= 0.5)
	occ2 = (occ2 >= 0.5)

	# Compute IOU
	area_union = (occ1 \| occ2).astype(np.float32).sum(axis=-1)
	area_intersect = (occ1 & occ2).astype(np.float32).sum(axis=-1)

	iou = (area_intersect / area_union)

	return iou


	def rgb2gray(rgb):
	''' rgb of size B x h x w x 3
	'''
	r, g, b = rgb[:, :, :, 0], rgb[:, :, :, 1], rgb[:, :, :, 2]
	gray = 0.2989 * r + 0.5870 * g + 0.1140 * b

	return gray


	def sample_patch_points(
	batch_size, n_points, patch_size=1, image_resolution=(128, 128), continuous=True
	):
	''' Returns sampled points in the range [-1, 1].

	Args:
	batch_size (int): required batch size
	n_points (int): number of points to sample
	patch_size (int): size of patch; if > 1, patches of size patch_size
	are sampled instead of individual points
	image_resolution (tuple): image resolution (required for calculating
	the pixel distances)
	continuous (bool): whether to sample continuously or only on pixel
	locations
	'''
	assert (patch_size > 0)
	# Calculate step size for [-1, 1] that is equivalent to a pixel in
	# original resolution
	h_step = 1. / image_resolution[0]
	w_step = 1. / image_resolution[1]
	# Get number of patches
	patch_size_squared = patch_size**2
	n_patches = int(n_points / patch_size_squared)
	if continuous:
	p = torch.rand(batch_size, n_patches, 2) # [0, 1]
	else:
	px = torch.randint(0, image_resolution[1],
	size=(batch_size, n_patches, 1)).float() / (image_resolution[1] - 1)
	py = torch.randint(0, image_resolution[0],
	size=(batch_size, n_patches, 1)).float() / (image_resolution[0] - 1)
	p = torch.cat([px, py], dim=-1)
	# Scale p to [0, (1 - (patch_size - 1) * step) ]
	p[:, :, 0] = 1 - (patch_size - 1) w_step
	p[:, :, 1] = 1 - (patch_size - 1) h_step

	# Add points
	patch_arange = torch.arange(patch_size)
	x_offset, y_offset = torch.meshgrid(patch_arange, patch_arange)
	patch_offsets = torch.stack([x_offset.reshape(-1), y_offset.reshape(-1)],
	dim=1).view(1, 1, -1, 2).repeat(batch_size, n_patches, 1, 1).float()

	patch_offsets[:, :, :, 0] *= w_step
	patch_offsets[:, :, :, 1] *= h_step

	# Add patch_offsets to points
	p = p.view(batch_size, n_patches, 1, 2) + patch_offsets

	# Scale to [-1, x]
	p = p * 2 - 1

	p = p.view(batch_size, -1, 2)

	amax, amin = p.max(), p.min()
	assert (amax <= 1. and amin >= -1.)

	return p


	def get_proposal_points_in_unit_cube(ray0, ray_direction, padding=0.1, eps=1e-6, n_steps=40):
	''' Returns n_steps equally spaced points inside the unit cube on the rays
	cast from ray0 with direction ray_direction.

	This function is used to get the ray marching points {p^ray_j} for a given
	camera position ray0 and
	a given ray direction ray_direction which goes from the camera_position to
	the pixel location.

	NOTE: The returned values d_proposal are the lengths of the ray:
	p^ray_j = ray0 + d_proposal_j * ray_direction

	Args:
	ray0 (tensor): Start positions of the rays
	ray_direction (tensor): Directions of rays
	padding (float): Padding which is applied to the unit cube
	eps (float): The epsilon value for numerical stability
	n_steps (int): number of steps
	'''
	batch_size, n_pts, _ = ray0.shape
	device = ray0.device

	p_intervals, d_intervals, mask_inside_cube = \
	check_ray_intersection_with_unit_cube(ray0, ray_direction, padding,
	eps)
	d_proposal = d_intervals[:, :, 0].unsqueeze(-1) + \
	torch.linspace(0, 1, steps=n_steps).to(device).view(1, 1, -1) * \
	(d_intervals[:, :, 1] - d_intervals[:, :, 0]).unsqueeze(-1)
	d_proposal = d_proposal.unsqueeze(-1)

	return d_proposal, mask_inside_cube


	def check_ray_intersection_with_unit_cube(ray0, ray_direction, padding=0.1, eps=1e-6, scale=2.0):
	''' Checks if rays ray0 + d * ray_direction intersect with unit cube with
	padding padding.

	It returns the two intersection points as well as the sorted ray lengths d.

	Args:
	ray0 (tensor): Start positions of the rays
	ray_direction (tensor): Directions of rays
	padding (float): Padding which is applied to the unit cube
	eps (float): The epsilon value for numerical stability
	scale (float): cube size
	'''
	batch_size, n_pts, _ = ray0.shape
	device = ray0.device

	# calculate intersections with unit cube (< . , . > is the dot product)
	# <n, x - p> = <n, ray0 + d * ray_direction - p_e> = 0
	# d = - <n, ray0 - p_e> / <n, ray_direction>

	# Get points on plane p_e
	p_distance = (scale * 0.5) + padding / 2
	p_e = torch.ones(batch_size, n_pts, 6).to(device) * p_distance
	p_e[:, :, 3:] *= -1.

	# Calculate the intersection points with given formula
	nominator = p_e - ray0.repeat(1, 1, 2)
	denominator = ray_direction.repeat(1, 1, 2)
	d_intersect = nominator / denominator
	p_intersect = ray0.unsqueeze(-2) + d_intersect.unsqueeze(-1) * \
	ray_direction.unsqueeze(-2)

	# Calculate mask where points intersect unit cube
	p_mask_inside_cube = (
	(p_intersect[:, :, :, 0] <= p_distance + eps) &
	(p_intersect[:, :, :, 1] <= p_distance + eps) &
	(p_intersect[:, :, :, 2] <= p_distance + eps) &
	(p_intersect[:, :, :, 0] >= -(p_distance + eps)) &
	(p_intersect[:, :, :, 1] >= -(p_distance + eps)) &
	(p_intersect[:, :, :, 2] >= -(p_distance + eps))
	).cpu()

	# Correct rays are these which intersect exactly 2 times
	mask_inside_cube = p_mask_inside_cube.sum(-1) == 2

	# Get interval values for p's which are valid
	p_intervals = p_intersect[mask_inside_cube][p_mask_inside_cube[mask_inside_cube]].view(-1, 2, 3)
	p_intervals_batch = torch.zeros(batch_size, n_pts, 2, 3).to(device)
	p_intervals_batch[mask_inside_cube] = p_intervals

	# Calculate ray lengths for the interval points
	d_intervals_batch = torch.zeros(batch_size, n_pts, 2).to(device)
	norm_ray = torch.norm(ray_direction[mask_inside_cube], dim=-1)
	d_intervals_batch[mask_inside_cube] = torch.stack(
	[
	torch.norm(p_intervals[:, 0] - ray0[mask_inside_cube], dim=-1) / norm_ray,
	torch.norm(p_intervals[:, 1] - ray0[mask_inside_cube], dim=-1) / norm_ray,
	],
	dim=-1
	)

	# Sort the ray lengths
	d_intervals_batch, indices_sort = d_intervals_batch.sort()
	p_intervals_batch = p_intervals_batch[torch.arange(batch_size).view(-1, 1, 1),
	torch.arange(n_pts).view(1, -1, 1), indices_sort]

	return p_intervals_batch, d_intervals_batch, mask_inside_cube


	def intersect_camera_rays_with_unit_cube(
	pixels, camera_mat, world_mat, scale_mat, padding=0.1, eps=1e-6, use_ray_length_as_depth=True
	):
	''' Returns the intersection points of ray cast from camera origin to
	pixel points p on the image plane.

	The function returns the intersection points as well the depth values and
	a mask specifying which ray intersects the unit cube.

	Args:
	pixels (tensor): Pixel points on image plane (range [-1, 1])
	camera_mat (tensor): camera matrix
	world_mat (tensor): world matrix
	scale_mat (tensor): scale matrix
	padding (float): Padding which is applied to the unit cube
	eps (float): The epsilon value for numerical stability

	'''
	batch_size, n_points, _ = pixels.shape

	pixel_world = image_points_to_world(pixels, camera_mat, world_mat, scale_mat)
	camera_world = origin_to_world(n_points, camera_mat, world_mat, scale_mat)
	ray_vector = (pixel_world - camera_world)

	p_cube, d_cube, mask_cube = check_ray_intersection_with_unit_cube(
	camera_world, ray_vector, padding=padding, eps=eps
	)
	if not use_ray_length_as_depth:
	p_cam = transform_to_camera_space(
	p_cube.view(batch_size, -1, 3), camera_mat, world_mat, scale_mat
	).view(batch_size, n_points, -1, 3)
	d_cube = p_cam[:, :, :, -1]
	return p_cube, d_cube, mask_cube


	def arange_pixels(resolution=(128, 128), batch_size=1, image_range=(-1., 1.), subsample_to=None):
	''' Arranges pixels for given resolution in range image_range.

	The function returns the unscaled pixel locations as integers and the
	scaled float values.

	Args:
	resolution (tuple): image resolution
	batch_size (int): batch size
	image_range (tuple): range of output points (default [-1, 1])
	subsample_to (int): if integer and > 0, the points are randomly
	subsampled to this value
	'''
	h, w = resolution
	n_points = resolution[0] * resolution[1]

	# Arrange pixel location in scale resolution
	pixel_locations = torch.meshgrid(torch.arange(0, w), torch.arange(0, h))
	pixel_locations = torch.stack([pixel_locations[0], pixel_locations[1]],
	dim=-1).long().view(1, -1, 2).repeat(batch_size, 1, 1)
	pixel_scaled = pixel_locations.clone().float()

	# Shift and scale points to match image_range
	scale = (image_range[1] - image_range[0])
	loc = scale / 2
	pixel_scaled[:, :, 0] = scale * pixel_scaled[:, :, 0] / (w - 1) - loc
	pixel_scaled[:, :, 1] = scale * pixel_scaled[:, :, 1] / (h - 1) - loc

	# Subsample points if subsample_to is not None and > 0
	if (subsample_to is not None and subsample_to > 0 and subsample_to < n_points):
	idx = np.random.choice(pixel_scaled.shape[1], size=(subsample_to, ), replace=False)
	pixel_scaled = pixel_scaled[:, idx]
	pixel_locations = pixel_locations[:, idx]

	return pixel_locations, pixel_scaled


	def to_pytorch(tensor, return_type=False):
	''' Converts input tensor to pytorch.

	Args:
	tensor (tensor): Numpy or Pytorch tensor
	return_type (bool): whether to return input type
	'''
	is_numpy = False
	if type(tensor) == np.ndarray:
	tensor = torch.from_numpy(tensor)
	is_numpy = True
	tensor = tensor.clone()
	if return_type:
	return tensor, is_numpy
	return tensor


	def get_mask(tensor):
	''' Returns mask of non-illegal values for tensor.

	Args:
	tensor (tensor): Numpy or Pytorch tensor
	'''
	tensor, is_numpy = to_pytorch(tensor, True)
	mask = ((abs(tensor) != np.inf) & (torch.isnan(tensor) == False))
	mask = mask.to(torch.bool)
	if is_numpy:
	mask = mask.numpy()

	return mask


	def transform_mesh(mesh, transform):
	''' Transforms a mesh with given transformation.

	Args:
	mesh (trimesh mesh): mesh
	transform (tensor): transformation matrix of size 4 x 4
	'''
	mesh = deepcopy(mesh)
	v = np.asarray(mesh.vertices).astype(np.float32)
	v_transformed = transform_pointcloud(v, transform)
	mesh.vertices = v_transformed
	return mesh


	def transform_pointcloud(pointcloud, transform):
	''' Transforms a point cloud with given transformation.

	Args:
	pointcloud (tensor): tensor of size N x 3
	transform (tensor): transformation of size 4 x 4
	'''

	assert (transform.shape == (4, 4) and pointcloud.shape[-1] == 3)

	pcl, is_numpy = to_pytorch(pointcloud, True)
	transform = to_pytorch(transform)

	# Transform point cloud to homogen coordinate system
	pcl_hom = torch.cat([pcl, torch.ones(pcl.shape[0], 1)], dim=-1).transpose(1, 0)

	# Apply transformation to point cloud
	pcl_hom_transformed = transform @ pcl_hom

	# Transform back to 3D coordinates
	pcl_out = pcl_hom_transformed[:3].transpose(1, 0)
	if is_numpy:
	pcl_out = pcl_out.numpy()

	return pcl_out


	def transform_points_batch(p, transform):
	''' Transform points tensor with given transform.

	Args:
	p (tensor): tensor of size B x N x 3
	transform (tensor): transformation of size B x 4 x 4
	'''
	device = p.device
	assert (transform.shape[1:] == (4, 4) and p.shape[-1] == 3 and p.shape[0] == transform.shape[0])

	# Transform points to homogen coordinates
	pcl_hom = torch.cat([p, torch.ones(p.shape[0], p.shape[1], 1).to(device)],
	dim=-1).transpose(2, 1)

	# Apply transformation
	pcl_hom_transformed = transform @ pcl_hom

	# Transform back to 3D coordinates
	pcl_out = pcl_hom_transformed[:, :3].transpose(2, 1)
	return pcl_out


	def get_tensor_values(
	tensor, p, grid_sample=True, mode='nearest', with_mask=False, squeeze_channel_dim=False
	):
	'''
	Returns values from tensor at given location p.

	Args:
	tensor (tensor): tensor of size B x C x H x W
	p (tensor): position values scaled between [-1, 1] and
	of size B x N x 2
	grid_sample (boolean): whether to use grid sampling
	mode (string): what mode to perform grid sampling in
	with_mask (bool): whether to return the mask for invalid values
	squeeze_channel_dim (bool): whether to squeeze the channel dimension
	(only applicable to 1D data)
	'''
	p = to_pytorch(p)
	tensor, is_numpy = to_pytorch(tensor, True)
	batch_size, _, h, w = tensor.shape

	if grid_sample:
	p = p.unsqueeze(1)
	values = torch.nn.functional.grid_sample(tensor, p, mode=mode)
	values = values.squeeze(2)
	values = values.permute(0, 2, 1)
	else:
	p[:, :, 0] = (p[:, :, 0] + 1) * (w) / 2
	p[:, :, 1] = (p[:, :, 1] + 1) * (h) / 2
	p = p.long()
	values = tensor[torch.arange(batch_size).unsqueeze(-1), :, p[:, :, 1], p[:, :, 0]]

	if with_mask:
	mask = get_mask(values)
	if squeeze_channel_dim:
	mask = mask.squeeze(-1)
	if is_numpy:
	mask = mask.numpy()

	if squeeze_channel_dim:
	values = values.squeeze(-1)

	if is_numpy:
	values = values.numpy()

	if with_mask:
	return values, mask
	return values


	def transform_to_world(pixels, depth, camera_mat, world_mat, scale_mat, invert=True):
	''' Transforms pixel positions p with given depth value d to world coordinates.

	Args:
	pixels (tensor): pixel tensor of size B x N x 2
	depth (tensor): depth tensor of size B x N x 1
	camera_mat (tensor): camera matrix
	world_mat (tensor): world matrix
	scale_mat (tensor): scale matrix
	invert (bool): whether to invert matrices (default: true)
	'''
	assert (pixels.shape[-1] == 2)

	# Convert to pytorch
	pixels, is_numpy = to_pytorch(pixels, True)
	depth = to_pytorch(depth)
	camera_mat = to_pytorch(camera_mat)
	world_mat = to_pytorch(world_mat)
	scale_mat = to_pytorch(scale_mat)

	# Invert camera matrices
	if invert:
	camera_mat = torch.inverse(camera_mat)
	world_mat = torch.inverse(world_mat)
	scale_mat = torch.inverse(scale_mat)

	# Transform pixels to homogen coordinates
	pixels = pixels.permute(0, 2, 1)
	pixels = torch.cat([pixels, torch.ones_like(pixels)], dim=1)

	# Project pixels into camera space
	pixels[:, :3] = pixels[:, :3] * depth.permute(0, 2, 1)

	# Transform pixels to world space
	p_world = scale_mat @ world_mat @ camera_mat @ pixels

	# Transform p_world back to 3D coordinates
	p_world = p_world[:, :3].permute(0, 2, 1)

	if is_numpy:
	p_world = p_world.numpy()
	return p_world


	def transform_to_camera_space(p_world, camera_mat, world_mat, scale_mat):
	''' Transforms world points to camera space.
	Args:
	p_world (tensor): world points tensor of size B x N x 3
	camera_mat (tensor): camera matrix
	world_mat (tensor): world matrix
	scale_mat (tensor): scale matrix
	'''
	batch_size, n_p, _ = p_world.shape
	device = p_world.device

	# Transform world points to homogen coordinates
	p_world = torch.cat([p_world, torch.ones(batch_size, n_p, 1).to(device)],
	dim=-1).permute(0, 2, 1)

	# Apply matrices to transform p_world to camera space
	p_cam = camera_mat @ world_mat @ scale_mat @ p_world

	# Transform points back to 3D coordinates
	p_cam = p_cam[:, :3].permute(0, 2, 1)
	return p_cam


	def origin_to_world(n_points, camera_mat, world_mat, scale_mat, invert=True):
	''' Transforms origin (camera location) to world coordinates.

	Args:
	n_points (int): how often the transformed origin is repeated in the
	form (batch_size, n_points, 3)
	camera_mat (tensor): camera matrix
	world_mat (tensor): world matrix
	scale_mat (tensor): scale matrix
	invert (bool): whether to invert the matrices (default: true)
	'''
	batch_size = camera_mat.shape[0]
	device = camera_mat.device

	# Create origin in homogen coordinates
	p = torch.zeros(batch_size, 4, n_points).to(device)
	p[:, -1] = 1.

	# Invert matrices
	if invert:
	camera_mat = torch.inverse(camera_mat)
	world_mat = torch.inverse(world_mat)
	scale_mat = torch.inverse(scale_mat)

	# Apply transformation
	p_world = scale_mat @ world_mat @ camera_mat @ p

	# Transform points back to 3D coordinates
	p_world = p_world[:, :3].permute(0, 2, 1)
	return p_world


	def image_points_to_world(image_points, camera_mat, world_mat, scale_mat, invert=True):
	''' Transforms points on image plane to world coordinates.

	In contrast to transform_to_world, no depth value is needed as points on
	the image plane have a fixed depth of 1.

	Args:
	image_points (tensor): image points tensor of size B x N x 2
	camera_mat (tensor): camera matrix
	world_mat (tensor): world matrix
	scale_mat (tensor): scale matrix
	invert (bool): whether to invert matrices (default: true)
	'''
	batch_size, n_pts, dim = image_points.shape
	assert (dim == 2)
	device = image_points.device

	d_image = torch.ones(batch_size, n_pts, 1).to(device)
	return transform_to_world(
	image_points, d_image, camera_mat, world_mat, scale_mat, invert=invert
	)


	def check_weights(params):
	''' Checks weights for illegal values.

	Args:
	params (tensor): parameter tensor
	'''
	for k, v in params.items():
	if torch.isnan(v).any():
	logger_py.warn('NaN Values detected in model weight %s.' % k)


	def check_tensor(tensor, tensorname='', input_tensor=None):
	''' Checks tensor for illegal values.

	Args:
	tensor (tensor): tensor
	tensorname (string): name of tensor
	input_tensor (tensor): previous input
	'''
	if torch.isnan(tensor).any():
	logger_py.warn('Tensor %s contains nan values.' % tensorname)
	if input_tensor is not None:
	logger_py.warn(f'Input was: {input_tensor}')


	def get_prob_from_logits(logits):
	''' Returns probabilities for logits

	Args:
	logits (tensor): logits
	'''
	odds = np.exp(logits)
	probs = odds / (1 + odds)
	return probs


	def get_logits_from_prob(probs, eps=1e-4):
	''' Returns logits for probabilities.

	Args:
	probs (tensor): probability tensor
	eps (float): epsilon value for numerical stability
	'''
	probs = np.clip(probs, a_min=eps, a_max=1 - eps)
	logits = np.log(probs / (1 - probs))
	return logits


	def chamfer_distance(points1, points2, use_kdtree=True, give_id=False):
	''' Returns the chamfer distance for the sets of points.

	Args:
	points1 (numpy array): first point set
	points2 (numpy array): second point set
	use_kdtree (bool): whether to use a kdtree
	give_id (bool): whether to return the IDs of nearest points
	'''
	if use_kdtree:
	return chamfer_distance_kdtree(points1, points2, give_id=give_id)
	else:
	return chamfer_distance_naive(points1, points2)


	def chamfer_distance_naive(points1, points2):
	''' Naive implementation of the Chamfer distance.

	Args:
	points1 (numpy array): first point set
	points2 (numpy array): second point set
	'''
	assert (points1.size() == points2.size())
	batch_size, T, _ = points1.size()

	points1 = points1.view(batch_size, T, 1, 3)
	points2 = points2.view(batch_size, 1, T, 3)

	distances = (points1 - points2).pow(2).sum(-1)

	chamfer1 = distances.min(dim=1)[0].mean(dim=1)
	chamfer2 = distances.min(dim=2)[0].mean(dim=1)

	chamfer = chamfer1 + chamfer2
	return chamfer


	def chamfer_distance_kdtree(points1, points2, give_id=False):
	''' KD-tree based implementation of the Chamfer distance.

	Args:
	points1 (numpy array): first point set
	points2 (numpy array): second point set
	give_id (bool): whether to return the IDs of the nearest points
	'''
	# Points have size batch_size x T x 3
	batch_size = points1.size(0)

	# First convert points to numpy
	points1_np = points1.detach().cpu().numpy()
	points2_np = points2.detach().cpu().numpy()

	# Get list of nearest neighbors indices
	idx_nn_12, _ = get_nearest_neighbors_indices_batch(points1_np, points2_np)
	idx_nn_12 = torch.LongTensor(idx_nn_12).to(points1.device)
	# Expands it as batch_size x 1 x 3
	idx_nn_12_expand = idx_nn_12.view(batch_size, -1, 1).expand_as(points1)

	# Get list of nearest neighbors indices
	idx_nn_21, _ = get_nearest_neighbors_indices_batch(points2_np, points1_np)
	idx_nn_21 = torch.LongTensor(idx_nn_21).to(points1.device)
	# Expands it as batch_size x T x 3
	idx_nn_21_expand = idx_nn_21.view(batch_size, -1, 1).expand_as(points2)

	# Compute nearest neighbors in points2 to points in points1
	# points_12[i, j, k] = points2[i, idx_nn_12_expand[i, j, k], k]
	points_12 = torch.gather(points2, dim=1, index=idx_nn_12_expand)

	# Compute nearest neighbors in points1 to points in points2
	# points_21[i, j, k] = points2[i, idx_nn_21_expand[i, j, k], k]
	points_21 = torch.gather(points1, dim=1, index=idx_nn_21_expand)

	# Compute chamfer distance
	chamfer1 = (points1 - points_12).pow(2).sum(2).mean(1)
	chamfer2 = (points2 - points_21).pow(2).sum(2).mean(1)

	# Take sum
	chamfer = chamfer1 + chamfer2

	# If required, also return nearest neighbors
	if give_id:
	return chamfer1, chamfer2, idx_nn_12, idx_nn_21

	return chamfer


	def get_nearest_neighbors_indices_batch(points_src, points_tgt, k=1):
	''' Returns the nearest neighbors for point sets batchwise.

	Args:
	points_src (numpy array): source points
	points_tgt (numpy array): target points
	k (int): number of nearest neighbors to return
	'''
	indices = []
	distances = []

	for (p1, p2) in zip(points_src, points_tgt):
	kdtree = KDTree(p2)
	dist, idx = kdtree.query(p1, k=k)
	indices.append(idx)
	distances.append(dist)

	return indices, distances


	def normalize_imagenet(x):
	''' Normalize input images according to ImageNet standards.

	Args:
	x (tensor): input images
	'''
	x = x.clone()
	x[:, 0] = (x[:, 0] - 0.485) / 0.229
	x[:, 1] = (x[:, 1] - 0.456) / 0.224
	x[:, 2] = (x[:, 2] - 0.406) / 0.225
	return x


	def make_3d_grid(bb_min, bb_max, shape):
	''' Makes a 3D grid.

	Args:
	bb_min (tuple): bounding box minimum
	bb_max (tuple): bounding box maximum
	shape (tuple): output shape
	'''
	size = shape[0] * shape[1] * shape[2]

	pxs = torch.linspace(bb_min[0], bb_max[0], shape[0])
	pys = torch.linspace(bb_min[1], bb_max[1], shape[1])
	pzs = torch.linspace(bb_min[2], bb_max[2], shape[2])

	pxs = pxs.view(-1, 1, 1).expand(*shape).contiguous().view(size)
	pys = pys.view(1, -1, 1).expand(*shape).contiguous().view(size)
	pzs = pzs.view(1, 1, -1).expand(*shape).contiguous().view(size)
	p = torch.stack([pxs, pys, pzs], dim=1)

	return p


	def get_occupancy_loss_points(
	pixels,
	camera_mat,
	world_mat,
	scale_mat,
	depth_image=None,
	use_cube_intersection=True,
	occupancy_random_normal=False,
	depth_range=[0, 2.4]
	):
	''' Returns 3D points for occupancy loss.

	Args:
	pixels (tensor): sampled pixels in range [-1, 1]
	camera_mat (tensor): camera matrix
	world_mat (tensor): world matrix
	scale_mat (tensor): scale matrix
	depth_image tensor): if not None, these depth values are used for
	initialization (e.g. depth or visual hull depth)
	use_cube_intersection (bool): whether to check unit cube intersection
	occupancy_random_normal (bool): whether to sample from a Normal
	distribution instead of a uniform one
	depth_range (float): depth range; important when no cube
	intersection is used
	'''
	device = pixels.device
	batch_size, n_points, _ = pixels.shape

	if use_cube_intersection:
	_, d_cube_intersection, mask_cube = \
	intersect_camera_rays_with_unit_cube(
	pixels, camera_mat, world_mat, scale_mat, padding=0.,
	use_ray_length_as_depth=False)
	d_cube = d_cube_intersection[mask_cube]

	d_occupancy = torch.rand(batch_size, n_points).to(device) * depth_range[1]

	if use_cube_intersection:
	d_occupancy[mask_cube] = d_cube[:, 0] + \
	torch.rand(d_cube.shape[0]).to(
	device) * (d_cube[:, 1] - d_cube[:, 0])
	if occupancy_random_normal:
	d_occupancy = torch.randn(batch_size, n_points).to(device) \
	* (depth_range[1] / 8) + depth_range[1] / 2
	if use_cube_intersection:
	mean_cube = d_cube.sum(-1) / 2
	std_cube = (d_cube[:, 1] - d_cube[:, 0]) / 8
	d_occupancy[mask_cube] = mean_cube + \
	torch.randn(mean_cube.shape[0]).to(device) * std_cube

	if depth_image is not None:
	depth_gt, mask_gt_depth = get_tensor_values(
	depth_image, pixels, squeeze_channel_dim=True, with_mask=True
	)
	d_occupancy[mask_gt_depth] = depth_gt[mask_gt_depth]

	p_occupancy = transform_to_world(
	pixels, d_occupancy.unsqueeze(-1), camera_mat, world_mat, scale_mat
	)
	return p_occupancy


	def get_freespace_loss_points(
	pixels, camera_mat, world_mat, scale_mat, use_cube_intersection=True, depth_range=[0, 2.4]
	):
	''' Returns 3D points for freespace loss.

	Args:
	pixels (tensor): sampled pixels in range [-1, 1]
	camera_mat (tensor): camera matrix
	world_mat (tensor): world matrix
	scale_mat (tensor): scale matrix
	use_cube_intersection (bool): whether to check unit cube intersection
	depth_range (float): depth range; important when no cube
	intersection is used
	'''
	device = pixels.device
	batch_size, n_points, _ = pixels.shape

	d_freespace = torch.rand(batch_size, n_points).to(device) * \
	depth_range[1]

	if use_cube_intersection:
	_, d_cube_intersection, mask_cube = \
	intersect_camera_rays_with_unit_cube(
	pixels, camera_mat, world_mat, scale_mat,
	use_ray_length_as_depth=False)
	d_cube = d_cube_intersection[mask_cube]
	d_freespace[mask_cube] = d_cube[:, 0] + \
	torch.rand(d_cube.shape[0]).to(
	device) * (d_cube[:, 1] - d_cube[:, 0])

	p_freespace = transform_to_world(
	pixels, d_freespace.unsqueeze(-1), camera_mat, world_mat, scale_mat
	)
	return p_freespace


	def normalize_tensor(tensor, min_norm=1e-5, feat_dim=-1):
	''' Normalizes the tensor.

	Args:
	tensor (tensor): tensor
	min_norm (float): minimum norm for numerical stability
	feat_dim (int): feature dimension in tensor (default: -1)
	'''
	norm_tensor = torch.clamp(torch.norm(tensor, dim=feat_dim, keepdim=True), min=min_norm)
	normed_tensor = tensor / norm_tensor
	return normed_tensor