Spaces:

fffiloni
/

PSHuman

Running on L40S

App Files Files Community

PSHuman / lib /pymafx /utils /common.py

fffiloni

Migrated from GitHub

2252f3d verified 15 days ago

raw

history blame

28.7 kB

	import torch
	import numpy as np
	import logging
	from copy import deepcopy
	from .utils.libkdtree import KDTree

	logger_py = logging.getLogger(__name__)


	def compute_iou(occ1, occ2):
	''' Computes the Intersection over Union (IoU) value for two sets of
	occupancy values.
	Args:
	occ1 (tensor): first set of occupancy values
	occ2 (tensor): second set of occupancy values
	'''
	occ1 = np.asarray(occ1)
	occ2 = np.asarray(occ2)

	# Put all data in second dimension
	# Also works for 1-dimensional data
	if occ1.ndim >= 2:
	occ1 = occ1.reshape(occ1.shape[0], -1)
	if occ2.ndim >= 2:
	occ2 = occ2.reshape(occ2.shape[0], -1)

	# Convert to boolean values
	occ1 = (occ1 >= 0.5)
	occ2 = (occ2 >= 0.5)

	# Compute IOU
	area_union = (occ1 \| occ2).astype(np.float32).sum(axis=-1)
	area_intersect = (occ1 & occ2).astype(np.float32).sum(axis=-1)

	iou = (area_intersect / area_union)

	return iou


	def rgb2gray(rgb):
	''' rgb of size B x h x w x 3
	'''
	r, g, b = rgb[:, :, :, 0], rgb[:, :, :, 1], rgb[:, :, :, 2]
	gray = 0.2989 * r + 0.5870 * g + 0.1140 * b

	return gray


	def sample_patch_points(
	batch_size, n_points, patch_size=1, image_resolution=(128, 128), continuous=True
	):
	''' Returns sampled points in the range [-1, 1].

	Args:
	batch_size (int): required batch size
	n_points (int): number of points to sample
	patch_size (int): size of patch; if > 1, patches of size patch_size
	are sampled instead of individual points
	image_resolution (tuple): image resolution (required for calculating
	the pixel distances)
	continuous (bool): whether to sample continuously or only on pixel
	locations
	'''
	assert (patch_size > 0)
	# Calculate step size for [-1, 1] that is equivalent to a pixel in
	# original resolution
	h_step = 1. / image_resolution[0]
	w_step = 1. / image_resolution[1]
	# Get number of patches
	patch_size_squared = patch_size**2
	n_patches = int(n_points / patch_size_squared)
	if continuous:
	p = torch.rand(batch_size, n_patches, 2) # [0, 1]
	else:
	px = torch.randint(0, image_resolution[1],
	size=(batch_size, n_patches, 1)).float() / (image_resolution[1] - 1)
	py = torch.randint(0, image_resolution[0],
	size=(batch_size, n_patches, 1)).float() / (image_resolution[0] - 1)
	p = torch.cat([px, py], dim=-1)
	# Scale p to [0, (1 - (patch_size - 1) * step) ]
	p[:, :, 0] = 1 - (patch_size - 1) w_step
	p[:, :, 1] = 1 - (patch_size - 1) h_step

	# Add points
	patch_arange = torch.arange(patch_size)
	x_offset, y_offset = torch.meshgrid(patch_arange, patch_arange)
	patch_offsets = torch.stack([x_offset.reshape(-1), y_offset.reshape(-1)],
	dim=1).view(1, 1, -1, 2).repeat(batch_size, n_patches, 1, 1).float()

	patch_offsets[:, :, :, 0] *= w_step
	patch_offsets[:, :, :, 1] *= h_step

	# Add patch_offsets to points
	p = p.view(batch_size, n_patches, 1, 2) + patch_offsets

	# Scale to [-1, x]
	p = p * 2 - 1

	p = p.view(batch_size, -1, 2)

	amax, amin = p.max(), p.min()
	assert (amax <= 1. and amin >= -1.)

	return p


	def get_proposal_points_in_unit_cube(ray0, ray_direction, padding=0.1, eps=1e-6, n_steps=40):
	''' Returns n_steps equally spaced points inside the unit cube on the rays
	cast from ray0 with direction ray_direction.

	This function is used to get the ray marching points {p^ray_j} for a given
	camera position ray0 and
	a given ray direction ray_direction which goes from the camera_position to
	the pixel location.

	NOTE: The returned values d_proposal are the lengths of the ray:
	p^ray_j = ray0 + d_proposal_j * ray_direction

	Args:
	ray0 (tensor): Start positions of the rays
	ray_direction (tensor): Directions of rays
	padding (float): Padding which is applied to the unit cube
	eps (float): The epsilon value for numerical stability
	n_steps (int): number of steps
	'''
	batch_size, n_pts, _ = ray0.shape
	device = ray0.device

	p_intervals, d_intervals, mask_inside_cube = \
	check_ray_intersection_with_unit_cube(ray0, ray_direction, padding,
	eps)
	d_proposal = d_intervals[:, :, 0].unsqueeze(-1) + \
	torch.linspace(0, 1, steps=n_steps).to(device).view(1, 1, -1) * \
	(d_intervals[:, :, 1] - d_intervals[:, :, 0]).unsqueeze(-1)
	d_proposal = d_proposal.unsqueeze(-1)

	return d_proposal, mask_inside_cube


	def check_ray_intersection_with_unit_cube(ray0, ray_direction, padding=0.1, eps=1e-6, scale=2.0):
	''' Checks if rays ray0 + d * ray_direction intersect with unit cube with
	padding padding.

	It returns the two intersection points as well as the sorted ray lengths d.

	Args:
	ray0 (tensor): Start positions of the rays
	ray_direction (tensor): Directions of rays
	padding (float): Padding which is applied to the unit cube
	eps (float): The epsilon value for numerical stability
	scale (float): cube size
	'''
	batch_size, n_pts, _ = ray0.shape
	device = ray0.device

	# calculate intersections with unit cube (< . , . > is the dot product)
	# <n, x - p> = <n, ray0 + d * ray_direction - p_e> = 0
	# d = - <n, ray0 - p_e> / <n, ray_direction>

	# Get points on plane p_e
	p_distance = (scale * 0.5) + padding / 2
	p_e = torch.ones(batch_size, n_pts, 6).to(device) * p_distance
	p_e[:, :, 3:] *= -1.

	# Calculate the intersection points with given formula
	nominator = p_e - ray0.repeat(1, 1, 2)
	denominator = ray_direction.repeat(1, 1, 2)
	d_intersect = nominator / denominator
	p_intersect = ray0.unsqueeze(-2) + d_intersect.unsqueeze(-1) * \
	ray_direction.unsqueeze(-2)

	# Calculate mask where points intersect unit cube
	p_mask_inside_cube = (
	(p_intersect[:, :, :, 0] <= p_distance + eps) &
	(p_intersect[:, :, :, 1] <= p_distance + eps) &
	(p_intersect[:, :, :, 2] <= p_distance + eps) &
	(p_intersect[:, :, :, 0] >= -(p_distance + eps)) &
	(p_intersect[:, :, :, 1] >= -(p_distance + eps)) &
	(p_intersect[:, :, :, 2] >= -(p_distance + eps))
	).cpu()

	# Correct rays are these which intersect exactly 2 times
	mask_inside_cube = p_mask_inside_cube.sum(-1) == 2

	# Get interval values for p's which are valid
	p_intervals = p_intersect[mask_inside_cube][p_mask_inside_cube[mask_inside_cube]].view(-1, 2, 3)
	p_intervals_batch = torch.zeros(batch_size, n_pts, 2, 3).to(device)
	p_intervals_batch[mask_inside_cube] = p_intervals

	# Calculate ray lengths for the interval points
	d_intervals_batch = torch.zeros(batch_size, n_pts, 2).to(device)
	norm_ray = torch.norm(ray_direction[mask_inside_cube], dim=-1)
	d_intervals_batch[mask_inside_cube] = torch.stack(
	[
	torch.norm(p_intervals[:, 0] - ray0[mask_inside_cube], dim=-1) / norm_ray,
	torch.norm(p_intervals[:, 1] - ray0[mask_inside_cube], dim=-1) / norm_ray,
	],
	dim=-1
	)

	# Sort the ray lengths
	d_intervals_batch, indices_sort = d_intervals_batch.sort()
	p_intervals_batch = p_intervals_batch[torch.arange(batch_size).view(-1, 1, 1),
	torch.arange(n_pts).view(1, -1, 1), indices_sort]

	return p_intervals_batch, d_intervals_batch, mask_inside_cube


	def intersect_camera_rays_with_unit_cube(
	pixels, camera_mat, world_mat, scale_mat, padding=0.1, eps=1e-6, use_ray_length_as_depth=True
	):
	''' Returns the intersection points of ray cast from camera origin to
	pixel points p on the image plane.

	The function returns the intersection points as well the depth values and
	a mask specifying which ray intersects the unit cube.

	Args:
	pixels (tensor): Pixel points on image plane (range [-1, 1])
	camera_mat (tensor): camera matrix
	world_mat (tensor): world matrix
	scale_mat (tensor): scale matrix
	padding (float): Padding which is applied to the unit cube
	eps (float): The epsilon value for numerical stability

	'''
	batch_size, n_points, _ = pixels.shape

	pixel_world = image_points_to_world(pixels, camera_mat, world_mat, scale_mat)
	camera_world = origin_to_world(n_points, camera_mat, world_mat, scale_mat)
	ray_vector = (pixel_world - camera_world)

	p_cube, d_cube, mask_cube = check_ray_intersection_with_unit_cube(
	camera_world, ray_vector, padding=padding, eps=eps
	)
	if not use_ray_length_as_depth:
	p_cam = transform_to_camera_space(
	p_cube.view(batch_size, -1, 3), camera_mat, world_mat, scale_mat
	).view(batch_size, n_points, -1, 3)
	d_cube = p_cam[:, :, :, -1]
	return p_cube, d_cube, mask_cube


	def arange_pixels(resolution=(128, 128), batch_size=1, image_range=(-1., 1.), subsample_to=None):
	''' Arranges pixels for given resolution in range image_range.

	The function returns the unscaled pixel locations as integers and the
	scaled float values.

	Args:
	resolution (tuple): image resolution
	batch_size (int): batch size
	image_range (tuple): range of output points (default [-1, 1])
	subsample_to (int): if integer and > 0, the points are randomly
	subsampled to this value
	'''
	h, w = resolution
	n_points = resolution[0] * resolution[1]

	# Arrange pixel location in scale resolution
	pixel_locations = torch.meshgrid(torch.arange(0, w), torch.arange(0, h))
	pixel_locations = torch.stack([pixel_locations[0], pixel_locations[1]],
	dim=-1).long().view(1, -1, 2).repeat(batch_size, 1, 1)
	pixel_scaled = pixel_locations.clone().float()

	# Shift and scale points to match image_range
	scale = (image_range[1] - image_range[0])
	loc = scale / 2
	pixel_scaled[:, :, 0] = scale * pixel_scaled[:, :, 0] / (w - 1) - loc
	pixel_scaled[:, :, 1] = scale * pixel_scaled[:, :, 1] / (h - 1) - loc

	# Subsample points if subsample_to is not None and > 0
	if (subsample_to is not None and subsample_to > 0 and subsample_to < n_points):
	idx = np.random.choice(pixel_scaled.shape[1], size=(subsample_to, ), replace=False)
	pixel_scaled = pixel_scaled[:, idx]
	pixel_locations = pixel_locations[:, idx]

	return pixel_locations, pixel_scaled


	def to_pytorch(tensor, return_type=False):
	''' Converts input tensor to pytorch.

	Args:
	tensor (tensor): Numpy or Pytorch tensor
	return_type (bool): whether to return input type
	'''
	is_numpy = False
	if type(tensor) == np.ndarray:
	tensor = torch.from_numpy(tensor)
	is_numpy = True
	tensor = tensor.clone()
	if return_type:
	return tensor, is_numpy
	return tensor


	def get_mask(tensor):
	''' Returns mask of non-illegal values for tensor.

	Args:
	tensor (tensor): Numpy or Pytorch tensor
	'''
	tensor, is_numpy = to_pytorch(tensor, True)
	mask = ((abs(tensor) != np.inf) & (torch.isnan(tensor) == False))
	mask = mask.to(torch.bool)
	if is_numpy:
	mask = mask.numpy()

	return mask


	def transform_mesh(mesh, transform):
	''' Transforms a mesh with given transformation.

	Args:
	mesh (trimesh mesh): mesh
	transform (tensor): transformation matrix of size 4 x 4
	'''
	mesh = deepcopy(mesh)
	v = np.asarray(mesh.vertices).astype(np.float32)
	v_transformed = transform_pointcloud(v, transform)
	mesh.vertices = v_transformed
	return mesh


	def transform_pointcloud(pointcloud, transform):
	''' Transforms a point cloud with given transformation.

	Args:
	pointcloud (tensor): tensor of size N x 3
	transform (tensor): transformation of size 4 x 4
	'''

	assert (transform.shape == (4, 4) and pointcloud.shape[-1] == 3)

	pcl, is_numpy = to_pytorch(pointcloud, True)
	transform = to_pytorch(transform)

	# Transform point cloud to homogen coordinate system
	pcl_hom = torch.cat([pcl, torch.ones(pcl.shape[0], 1)], dim=-1).transpose(1, 0)

	# Apply transformation to point cloud
	pcl_hom_transformed = transform @ pcl_hom

	# Transform back to 3D coordinates
	pcl_out = pcl_hom_transformed[:3].transpose(1, 0)
	if is_numpy:
	pcl_out = pcl_out.numpy()

	return pcl_out


	def transform_points_batch(p, transform):
	''' Transform points tensor with given transform.

	Args:
	p (tensor): tensor of size B x N x 3
	transform (tensor): transformation of size B x 4 x 4
	'''
	device = p.device
	assert (transform.shape[1:] == (4, 4) and p.shape[-1] == 3 and p.shape[0] == transform.shape[0])

	# Transform points to homogen coordinates
	pcl_hom = torch.cat([p, torch.ones(p.shape[0], p.shape[1], 1).to(device)],
	dim=-1).transpose(2, 1)

	# Apply transformation
	pcl_hom_transformed = transform @ pcl_hom

	# Transform back to 3D coordinates
	pcl_out = pcl_hom_transformed[:, :3].transpose(2, 1)
	return pcl_out


	def get_tensor_values(
	tensor, p, grid_sample=True, mode='nearest', with_mask=False, squeeze_channel_dim=False
	):
	'''
	Returns values from tensor at given location p.

	Args:
	tensor (tensor): tensor of size B x C x H x W
	p (tensor): position values scaled between [-1, 1] and
	of size B x N x 2
	grid_sample (boolean): whether to use grid sampling
	mode (string): what mode to perform grid sampling in
	with_mask (bool): whether to return the mask for invalid values
	squeeze_channel_dim (bool): whether to squeeze the channel dimension
	(only applicable to 1D data)
	'''
	p = to_pytorch(p)
	tensor, is_numpy = to_pytorch(tensor, True)
	batch_size, _, h, w = tensor.shape

	if grid_sample:
	p = p.unsqueeze(1)
	values = torch.nn.functional.grid_sample(tensor, p, mode=mode)
	values = values.squeeze(2)
	values = values.permute(0, 2, 1)
	else:
	p[:, :, 0] = (p[:, :, 0] + 1) * (w) / 2
	p[:, :, 1] = (p[:, :, 1] + 1) * (h) / 2
	p = p.long()
	values = tensor[torch.arange(batch_size).unsqueeze(-1), :, p[:, :, 1], p[:, :, 0]]

	if with_mask:
	mask = get_mask(values)
	if squeeze_channel_dim:
	mask = mask.squeeze(-1)
	if is_numpy:
	mask = mask.numpy()

	if squeeze_channel_dim:
	values = values.squeeze(-1)

	if is_numpy:
	values = values.numpy()

	if with_mask:
	return values, mask
	return values


	def transform_to_world(pixels, depth, camera_mat, world_mat, scale_mat, invert=True):
	''' Transforms pixel positions p with given depth value d to world coordinates.

	Args:
	pixels (tensor): pixel tensor of size B x N x 2
	depth (tensor): depth tensor of size B x N x 1
	camera_mat (tensor): camera matrix
	world_mat (tensor): world matrix
	scale_mat (tensor): scale matrix
	invert (bool): whether to invert matrices (default: true)
	'''
	assert (pixels.shape[-1] == 2)

	# Convert to pytorch
	pixels, is_numpy = to_pytorch(pixels, True)
	depth = to_pytorch(depth)
	camera_mat = to_pytorch(camera_mat)
	world_mat = to_pytorch(world_mat)
	scale_mat = to_pytorch(scale_mat)

	# Invert camera matrices
	if invert:
	camera_mat = torch.inverse(camera_mat)
	world_mat = torch.inverse(world_mat)
	scale_mat = torch.inverse(scale_mat)

	# Transform pixels to homogen coordinates
	pixels = pixels.permute(0, 2, 1)
	pixels = torch.cat([pixels, torch.ones_like(pixels)], dim=1)

	# Project pixels into camera space
	pixels[:, :3] = pixels[:, :3] * depth.permute(0, 2, 1)

	# Transform pixels to world space
	p_world = scale_mat @ world_mat @ camera_mat @ pixels

	# Transform p_world back to 3D coordinates
	p_world = p_world[:, :3].permute(0, 2, 1)

	if is_numpy:
	p_world = p_world.numpy()
	return p_world


	def transform_to_camera_space(p_world, camera_mat, world_mat, scale_mat):
	''' Transforms world points to camera space.
	Args:
	p_world (tensor): world points tensor of size B x N x 3
	camera_mat (tensor): camera matrix
	world_mat (tensor): world matrix
	scale_mat (tensor): scale matrix
	'''
	batch_size, n_p, _ = p_world.shape
	device = p_world.device

	# Transform world points to homogen coordinates
	p_world = torch.cat([p_world, torch.ones(batch_size, n_p, 1).to(device)],
	dim=-1).permute(0, 2, 1)

	# Apply matrices to transform p_world to camera space
	p_cam = camera_mat @ world_mat @ scale_mat @ p_world

	# Transform points back to 3D coordinates
	p_cam = p_cam[:, :3].permute(0, 2, 1)
	return p_cam


	def origin_to_world(n_points, camera_mat, world_mat, scale_mat, invert=True):
	''' Transforms origin (camera location) to world coordinates.

	Args:
	n_points (int): how often the transformed origin is repeated in the
	form (batch_size, n_points, 3)
	camera_mat (tensor): camera matrix
	world_mat (tensor): world matrix
	scale_mat (tensor): scale matrix
	invert (bool): whether to invert the matrices (default: true)
	'''
	batch_size = camera_mat.shape[0]
	device = camera_mat.device

	# Create origin in homogen coordinates
	p = torch.zeros(batch_size, 4, n_points).to(device)
	p[:, -1] = 1.

	# Invert matrices
	if invert:
	camera_mat = torch.inverse(camera_mat)
	world_mat = torch.inverse(world_mat)
	scale_mat = torch.inverse(scale_mat)

	# Apply transformation
	p_world = scale_mat @ world_mat @ camera_mat @ p

	# Transform points back to 3D coordinates
	p_world = p_world[:, :3].permute(0, 2, 1)
	return p_world


	def image_points_to_world(image_points, camera_mat, world_mat, scale_mat, invert=True):
	''' Transforms points on image plane to world coordinates.

	In contrast to transform_to_world, no depth value is needed as points on
	the image plane have a fixed depth of 1.

	Args:
	image_points (tensor): image points tensor of size B x N x 2
	camera_mat (tensor): camera matrix
	world_mat (tensor): world matrix
	scale_mat (tensor): scale matrix
	invert (bool): whether to invert matrices (default: true)
	'''
	batch_size, n_pts, dim = image_points.shape
	assert (dim == 2)
	device = image_points.device

	d_image = torch.ones(batch_size, n_pts, 1).to(device)
	return transform_to_world(
	image_points, d_image, camera_mat, world_mat, scale_mat, invert=invert
	)


	def check_weights(params):
	''' Checks weights for illegal values.

	Args:
	params (tensor): parameter tensor
	'''
	for k, v in params.items():
	if torch.isnan(v).any():
	logger_py.warn('NaN Values detected in model weight %s.' % k)


	def check_tensor(tensor, tensorname='', input_tensor=None):
	''' Checks tensor for illegal values.

	Args:
	tensor (tensor): tensor
	tensorname (string): name of tensor
	input_tensor (tensor): previous input
	'''
	if torch.isnan(tensor).any():
	logger_py.warn('Tensor %s contains nan values.' % tensorname)
	if input_tensor is not None:
	logger_py.warn(f'Input was: {input_tensor}')


	def get_prob_from_logits(logits):
	''' Returns probabilities for logits

	Args:
	logits (tensor): logits
	'''
	odds = np.exp(logits)
	probs = odds / (1 + odds)
	return probs


	def get_logits_from_prob(probs, eps=1e-4):
	''' Returns logits for probabilities.

	Args:
	probs (tensor): probability tensor
	eps (float): epsilon value for numerical stability
	'''
	probs = np.clip(probs, a_min=eps, a_max=1 - eps)
	logits = np.log(probs / (1 - probs))
	return logits


	def chamfer_distance(points1, points2, use_kdtree=True, give_id=False):
	''' Returns the chamfer distance for the sets of points.

	Args:
	points1 (numpy array): first point set
	points2 (numpy array): second point set
	use_kdtree (bool): whether to use a kdtree
	give_id (bool): whether to return the IDs of nearest points
	'''
	if use_kdtree:
	return chamfer_distance_kdtree(points1, points2, give_id=give_id)
	else:
	return chamfer_distance_naive(points1, points2)


	def chamfer_distance_naive(points1, points2):
	''' Naive implementation of the Chamfer distance.

	Args:
	points1 (numpy array): first point set
	points2 (numpy array): second point set
	'''
	assert (points1.size() == points2.size())
	batch_size, T, _ = points1.size()

	points1 = points1.view(batch_size, T, 1, 3)
	points2 = points2.view(batch_size, 1, T, 3)

	distances = (points1 - points2).pow(2).sum(-1)

	chamfer1 = distances.min(dim=1)[0].mean(dim=1)
	chamfer2 = distances.min(dim=2)[0].mean(dim=1)

	chamfer = chamfer1 + chamfer2
	return chamfer


	def chamfer_distance_kdtree(points1, points2, give_id=False):
	''' KD-tree based implementation of the Chamfer distance.

	Args:
	points1 (numpy array): first point set
	points2 (numpy array): second point set
	give_id (bool): whether to return the IDs of the nearest points
	'''
	# Points have size batch_size x T x 3
	batch_size = points1.size(0)

	# First convert points to numpy
	points1_np = points1.detach().cpu().numpy()
	points2_np = points2.detach().cpu().numpy()

	# Get list of nearest neighbors indices
	idx_nn_12, _ = get_nearest_neighbors_indices_batch(points1_np, points2_np)
	idx_nn_12 = torch.LongTensor(idx_nn_12).to(points1.device)
	# Expands it as batch_size x 1 x 3
	idx_nn_12_expand = idx_nn_12.view(batch_size, -1, 1).expand_as(points1)

	# Get list of nearest neighbors indices
	idx_nn_21, _ = get_nearest_neighbors_indices_batch(points2_np, points1_np)
	idx_nn_21 = torch.LongTensor(idx_nn_21).to(points1.device)
	# Expands it as batch_size x T x 3
	idx_nn_21_expand = idx_nn_21.view(batch_size, -1, 1).expand_as(points2)

	# Compute nearest neighbors in points2 to points in points1
	# points_12[i, j, k] = points2[i, idx_nn_12_expand[i, j, k], k]
	points_12 = torch.gather(points2, dim=1, index=idx_nn_12_expand)

	# Compute nearest neighbors in points1 to points in points2
	# points_21[i, j, k] = points2[i, idx_nn_21_expand[i, j, k], k]
	points_21 = torch.gather(points1, dim=1, index=idx_nn_21_expand)

	# Compute chamfer distance
	chamfer1 = (points1 - points_12).pow(2).sum(2).mean(1)
	chamfer2 = (points2 - points_21).pow(2).sum(2).mean(1)

	# Take sum
	chamfer = chamfer1 + chamfer2

	# If required, also return nearest neighbors
	if give_id:
	return chamfer1, chamfer2, idx_nn_12, idx_nn_21

	return chamfer


	def get_nearest_neighbors_indices_batch(points_src, points_tgt, k=1):
	''' Returns the nearest neighbors for point sets batchwise.

	Args:
	points_src (numpy array): source points
	points_tgt (numpy array): target points
	k (int): number of nearest neighbors to return
	'''
	indices = []
	distances = []

	for (p1, p2) in zip(points_src, points_tgt):
	kdtree = KDTree(p2)
	dist, idx = kdtree.query(p1, k=k)
	indices.append(idx)
	distances.append(dist)

	return indices, distances


	def normalize_imagenet(x):
	''' Normalize input images according to ImageNet standards.

	Args:
	x (tensor): input images
	'''
	x = x.clone()
	x[:, 0] = (x[:, 0] - 0.485) / 0.229
	x[:, 1] = (x[:, 1] - 0.456) / 0.224
	x[:, 2] = (x[:, 2] - 0.406) / 0.225
	return x


	def make_3d_grid(bb_min, bb_max, shape):
	''' Makes a 3D grid.

	Args:
	bb_min (tuple): bounding box minimum
	bb_max (tuple): bounding box maximum
	shape (tuple): output shape
	'''
	size = shape[0] * shape[1] * shape[2]

	pxs = torch.linspace(bb_min[0], bb_max[0], shape[0])
	pys = torch.linspace(bb_min[1], bb_max[1], shape[1])
	pzs = torch.linspace(bb_min[2], bb_max[2], shape[2])

	pxs = pxs.view(-1, 1, 1).expand(*shape).contiguous().view(size)
	pys = pys.view(1, -1, 1).expand(*shape).contiguous().view(size)
	pzs = pzs.view(1, 1, -1).expand(*shape).contiguous().view(size)
	p = torch.stack([pxs, pys, pzs], dim=1)

	return p


	def get_occupancy_loss_points(
	pixels,
	camera_mat,
	world_mat,
	scale_mat,
	depth_image=None,
	use_cube_intersection=True,
	occupancy_random_normal=False,
	depth_range=[0, 2.4]
	):
	''' Returns 3D points for occupancy loss.

	Args:
	pixels (tensor): sampled pixels in range [-1, 1]
	camera_mat (tensor): camera matrix
	world_mat (tensor): world matrix
	scale_mat (tensor): scale matrix
	depth_image tensor): if not None, these depth values are used for
	initialization (e.g. depth or visual hull depth)
	use_cube_intersection (bool): whether to check unit cube intersection
	occupancy_random_normal (bool): whether to sample from a Normal
	distribution instead of a uniform one
	depth_range (float): depth range; important when no cube
	intersection is used
	'''
	device = pixels.device
	batch_size, n_points, _ = pixels.shape

	if use_cube_intersection:
	_, d_cube_intersection, mask_cube = \
	intersect_camera_rays_with_unit_cube(
	pixels, camera_mat, world_mat, scale_mat, padding=0.,
	use_ray_length_as_depth=False)
	d_cube = d_cube_intersection[mask_cube]

	d_occupancy = torch.rand(batch_size, n_points).to(device) * depth_range[1]

	if use_cube_intersection:
	d_occupancy[mask_cube] = d_cube[:, 0] + \
	torch.rand(d_cube.shape[0]).to(
	device) * (d_cube[:, 1] - d_cube[:, 0])
	if occupancy_random_normal:
	d_occupancy = torch.randn(batch_size, n_points).to(device) \
	* (depth_range[1] / 8) + depth_range[1] / 2
	if use_cube_intersection:
	mean_cube = d_cube.sum(-1) / 2
	std_cube = (d_cube[:, 1] - d_cube[:, 0]) / 8
	d_occupancy[mask_cube] = mean_cube + \
	torch.randn(mean_cube.shape[0]).to(device) * std_cube

	if depth_image is not None:
	depth_gt, mask_gt_depth = get_tensor_values(
	depth_image, pixels, squeeze_channel_dim=True, with_mask=True
	)
	d_occupancy[mask_gt_depth] = depth_gt[mask_gt_depth]

	p_occupancy = transform_to_world(
	pixels, d_occupancy.unsqueeze(-1), camera_mat, world_mat, scale_mat
	)
	return p_occupancy


	def get_freespace_loss_points(
	pixels, camera_mat, world_mat, scale_mat, use_cube_intersection=True, depth_range=[0, 2.4]
	):
	''' Returns 3D points for freespace loss.

	Args:
	pixels (tensor): sampled pixels in range [-1, 1]
	camera_mat (tensor): camera matrix
	world_mat (tensor): world matrix
	scale_mat (tensor): scale matrix
	use_cube_intersection (bool): whether to check unit cube intersection
	depth_range (float): depth range; important when no cube
	intersection is used
	'''
	device = pixels.device
	batch_size, n_points, _ = pixels.shape

	d_freespace = torch.rand(batch_size, n_points).to(device) * \
	depth_range[1]

	if use_cube_intersection:
	_, d_cube_intersection, mask_cube = \
	intersect_camera_rays_with_unit_cube(
	pixels, camera_mat, world_mat, scale_mat,
	use_ray_length_as_depth=False)
	d_cube = d_cube_intersection[mask_cube]
	d_freespace[mask_cube] = d_cube[:, 0] + \
	torch.rand(d_cube.shape[0]).to(
	device) * (d_cube[:, 1] - d_cube[:, 0])

	p_freespace = transform_to_world(
	pixels, d_freespace.unsqueeze(-1), camera_mat, world_mat, scale_mat
	)
	return p_freespace


	def normalize_tensor(tensor, min_norm=1e-5, feat_dim=-1):
	''' Normalizes the tensor.

	Args:
	tensor (tensor): tensor
	min_norm (float): minimum norm for numerical stability
	feat_dim (int): feature dimension in tensor (default: -1)
	'''
	norm_tensor = torch.clamp(torch.norm(tensor, dim=feat_dim, keepdim=True), min=min_norm)
	normed_tensor = tensor / norm_tensor
	return normed_tensor