Spaces:

merve
/

grounding_sam_inpainting

Runtime error

App Files Files Community

grounding_sam_inpainting / voxelnext_3d_box /models /spconv_backbone_voxelnext.py

merve HF staff

Upload 219 files

9cc3ad8 10 months ago

raw

history blame

12.2 kB

	from functools import partial
	import torch
	import torch.nn as nn

	import spconv.pytorch as spconv
	from spconv.core import ConvAlgo


	def replace_feature(out, new_features):
	return out.replace_feature(new_features)


	def post_act_block(in_channels, out_channels, kernel_size, indice_key=None, stride=1, padding=0,
	conv_type='subm', norm_fn=None):

	if conv_type == 'subm':
	conv = spconv.SubMConv3d(in_channels, out_channels, kernel_size, bias=False, indice_key=indice_key, algo=ConvAlgo.Native)
	elif conv_type == 'spconv':
	conv = spconv.SparseConv3d(in_channels, out_channels, kernel_size, stride=stride, padding=padding,
	bias=False, indice_key=indice_key, algo=ConvAlgo.Native)
	elif conv_type == 'inverseconv':
	conv = spconv.SparseInverseConv3d(in_channels, out_channels, kernel_size, indice_key=indice_key, bias=False, algo=ConvAlgo.Native)
	else:
	raise NotImplementedError

	m = spconv.SparseSequential(
	conv,
	norm_fn(out_channels),
	nn.ReLU(),
	)

	return m


	class SparseBasicBlock(spconv.SparseModule):
	expansion = 1

	def __init__(self, inplanes, planes, stride=1, norm_fn=None, downsample=None, indice_key=None):
	super(SparseBasicBlock, self).__init__()

	assert norm_fn is not None
	bias = norm_fn is not None
	self.conv1 = spconv.SubMConv3d(
	inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=bias, indice_key=indice_key, algo=ConvAlgo.Native
	)
	self.bn1 = norm_fn(planes)
	self.relu = nn.ReLU()
	self.conv2 = spconv.SubMConv3d(
	planes, planes, kernel_size=3, stride=stride, padding=1, bias=bias, indice_key=indice_key, algo=ConvAlgo.Native
	)
	self.bn2 = norm_fn(planes)
	self.downsample = downsample
	self.stride = stride

	def forward(self, x):
	identity = x

	out = self.conv1(x)
	out = replace_feature(out, self.bn1(out.features))
	out = replace_feature(out, self.relu(out.features))

	out = self.conv2(out)
	out = replace_feature(out, self.bn2(out.features))

	if self.downsample is not None:
	identity = self.downsample(x)

	out = replace_feature(out, out.features + identity.features)
	out = replace_feature(out, self.relu(out.features))

	return out


	class VoxelResBackBone8xVoxelNeXt(nn.Module):
	def __init__(self, input_channels, grid_size, **kwargs):
	super().__init__()
	norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01)

	spconv_kernel_sizes = [3, 3, 3, 3]
	channels = [16, 32, 64, 128, 128]
	out_channel = 128

	self.sparse_shape = grid_size[::-1] + [1, 0, 0]

	self.conv_input = spconv.SparseSequential(
	spconv.SubMConv3d(input_channels, channels[0], 3, padding=1, bias=False, indice_key='subm1', algo=ConvAlgo.Native),
	norm_fn(channels[0]),
	nn.ReLU(),
	)
	block = post_act_block

	self.conv1 = spconv.SparseSequential(
	SparseBasicBlock(channels[0], channels[0], norm_fn=norm_fn, indice_key='res1'),
	SparseBasicBlock(channels[0], channels[0], norm_fn=norm_fn, indice_key='res1'),
	)

	self.conv2 = spconv.SparseSequential(
	# [1600, 1408, 41] <- [800, 704, 21]
	block(channels[0], channels[1], spconv_kernel_sizes[0], norm_fn=norm_fn, stride=2, padding=int(spconv_kernel_sizes[0]//2), indice_key='spconv2', conv_type='spconv'),
	SparseBasicBlock(channels[1], channels[1], norm_fn=norm_fn, indice_key='res2'),
	SparseBasicBlock(channels[1], channels[1], norm_fn=norm_fn, indice_key='res2'),
	)

	self.conv3 = spconv.SparseSequential(
	# [800, 704, 21] <- [400, 352, 11]
	block(channels[1], channels[2], spconv_kernel_sizes[1], norm_fn=norm_fn, stride=2, padding=int(spconv_kernel_sizes[1]//2), indice_key='spconv3', conv_type='spconv'),
	SparseBasicBlock(channels[2], channels[2], norm_fn=norm_fn, indice_key='res3'),
	SparseBasicBlock(channels[2], channels[2], norm_fn=norm_fn, indice_key='res3'),
	)

	self.conv4 = spconv.SparseSequential(
	# [400, 352, 11] <- [200, 176, 6]
	block(channels[2], channels[3], spconv_kernel_sizes[2], norm_fn=norm_fn, stride=2, padding=int(spconv_kernel_sizes[2]//2), indice_key='spconv4', conv_type='spconv'),
	SparseBasicBlock(channels[3], channels[3], norm_fn=norm_fn, indice_key='res4'),
	SparseBasicBlock(channels[3], channels[3], norm_fn=norm_fn, indice_key='res4'),
	)

	self.conv5 = spconv.SparseSequential(
	# [200, 176, 6] <- [100, 88, 3]
	block(channels[3], channels[4], spconv_kernel_sizes[3], norm_fn=norm_fn, stride=2, padding=int(spconv_kernel_sizes[3]//2), indice_key='spconv5', conv_type='spconv'),
	SparseBasicBlock(channels[4], channels[4], norm_fn=norm_fn, indice_key='res5'),
	SparseBasicBlock(channels[4], channels[4], norm_fn=norm_fn, indice_key='res5'),
	)

	self.conv6 = spconv.SparseSequential(
	# [200, 176, 6] <- [100, 88, 3]
	block(channels[4], channels[4], spconv_kernel_sizes[3], norm_fn=norm_fn, stride=2, padding=int(spconv_kernel_sizes[3]//2), indice_key='spconv6', conv_type='spconv'),
	SparseBasicBlock(channels[4], channels[4], norm_fn=norm_fn, indice_key='res6'),
	SparseBasicBlock(channels[4], channels[4], norm_fn=norm_fn, indice_key='res6'),
	)
	self.conv_out = spconv.SparseSequential(
	# [200, 150, 5] -> [200, 150, 2]
	spconv.SparseConv2d(channels[3], out_channel, 3, stride=1, padding=1, bias=False, indice_key='spconv_down2', algo=ConvAlgo.Native),
	norm_fn(out_channel),
	nn.ReLU(),
	)

	self.shared_conv = spconv.SparseSequential(
	spconv.SubMConv2d(out_channel, out_channel, 3, stride=1, padding=1, bias=True, algo=ConvAlgo.Native),
	nn.BatchNorm1d(out_channel),
	nn.ReLU(True),
	)

	self.forward_ret_dict = {}
	self.num_point_features = out_channel
	self.backbone_channels = {
	'x_conv1': channels[0],
	'x_conv2': channels[1],
	'x_conv3': channels[2],
	'x_conv4': channels[3]
	}

	def bev_out(self, x_conv, index):
	features_cat = x_conv.features
	indices_cat = x_conv.indices[:, [0, 2, 3]]
	spatial_shape = x_conv.spatial_shape[1:]

	indices_unique, _inv = torch.unique(indices_cat, dim=0, return_inverse=True)
	features_unique = features_cat.new_zeros((indices_unique.shape[0], features_cat.shape[1]))
	features_unique.index_add_(0, _inv, features_cat)

	perm = torch.arange(_inv.size(0), dtype=_inv.dtype, device=_inv.device)
	perm = _inv.new_empty(indices_unique.size(0)).scatter_(0, _inv, perm)
	index_out = index[perm]

	x_out = spconv.SparseConvTensor(
	features=features_unique,
	indices=indices_unique,
	spatial_shape=spatial_shape,
	batch_size=x_conv.batch_size
	)
	return x_out, index_out

	def track_voxels_2d(self, x, x_downsample, index, kernel_size=3):
	_step = int(kernel_size//2)
	kernel_offsets = [[i, j] for i in range(-_step, _step+1) for j in range(-_step, _step+1)]
	#kernel_offsets.remove([0, 0])
	kernel_offsets = torch.Tensor(kernel_offsets).to(x.indices.device)

	batch_size = x.batch_size
	index_batch = []
	indices_batch = []

	for b in range(batch_size):
	batch_index = x.indices[:, 0]==b
	indices_ori = x.indices[batch_index]
	features_ori = index[batch_index]

	features_fore = features_ori
	coords_fore = indices_ori

	voxel_kerels_imp = kernel_offsets.unsqueeze(0).repeat(features_fore.shape[0],1, 1)
	indices_fore_kernels = coords_fore[:, 1:].unsqueeze(1).repeat(1, kernel_offsets.shape[0], 1)
	indices_with_imp = indices_fore_kernels + voxel_kerels_imp
	features_fore = features_fore.repeat(1, kernel_offsets.shape[0])

	selected_indices = indices_with_imp
	spatial_indices = (selected_indices[:, :, 0] >=0) * (selected_indices[:, :, 1] >=0) * \
	(selected_indices[:, :, 0] < x.spatial_shape[0]) * (selected_indices[:, :, 1] < x.spatial_shape[1])
	selected_indices = selected_indices[spatial_indices]
	features_fore = features_fore[spatial_indices].view(-1, 1)

	selected_indices = torch.cat([torch.ones((selected_indices.shape[0], 1), device=features_fore.device)*b, selected_indices], dim=1)

	features_fore, coords_fore = features_fore, selected_indices
	index_batch.append(features_fore)
	indices_batch.append(coords_fore)

	index_batch = torch.cat(index_batch)
	indices_batch = torch.cat(indices_batch)

	return self.index_from_sparse(index_batch, indices_batch, x_downsample, True)

	def index_from_sparse(self, feature, indices, x_target, _2d=False):
	sparse_index = spconv.SparseConvTensor(
	features=feature,
	indices=indices.int(),
	spatial_shape=x_target.spatial_shape,
	batch_size=x_target.batch_size
	)
	dense_index = sparse_index.dense()
	indices_downsample = x_target.indices.long()
	if _2d:
	index_downsample = dense_index[indices_downsample[:, 0], :, indices_downsample[:, 1], indices_downsample[:, 2]]
	else:
	index_downsample = dense_index[indices_downsample[:, 0], :, indices_downsample[:, 1], indices_downsample[:, 2], indices_downsample[:, 3]]
	return index_downsample

	def forward(self, batch_dict):
	"""
	Args:
	batch_dict:
	batch_size: int
	vfe_features: (num_voxels, C)
	voxel_coords: (num_voxels, 4), [batch_idx, z_idx, y_idx, x_idx]
	Returns:
	batch_dict:
	encoded_spconv_tensor: sparse tensor
	"""
	voxel_features, voxel_coords = batch_dict['voxel_features'], batch_dict['voxel_coords']
	batch_size = batch_dict['batch_size']
	input_sp_tensor = spconv.SparseConvTensor(
	features=voxel_features,
	indices=voxel_coords.int(),
	spatial_shape=self.sparse_shape,
	batch_size=batch_size
	)
	x = self.conv_input(input_sp_tensor)

	x_conv1 = self.conv1(x)
	x_conv2 = self.conv2(x_conv1)
	x_conv3 = self.conv3(x_conv2)
	x_conv4 = self.conv4(x_conv3)
	x_conv5 = self.conv5(x_conv4)
	x_conv6 = self.conv6(x_conv5)

	x_conv5.indices[:, 1:] *= 2
	x_conv6.indices[:, 1:] *= 4
	x_conv4 = x_conv4.replace_feature(torch.cat([x_conv4.features, x_conv5.features, x_conv6.features]))
	x_conv4.indices = torch.cat([x_conv4.indices, x_conv5.indices, x_conv6.indices])

	index6_out = torch.arange(x_conv4.indices.shape[0], device=x_conv4.indices.device).unsqueeze(-1)
	out_bevout, index_bevout = self.bev_out(x_conv4, index6_out)

	out = self.conv_out(out_bevout)
	index_out = self.track_voxels_2d(out_bevout, out, index_bevout)

	out = self.shared_conv(out)

	batch_dict.update({
	'encoded_spconv_tensor': out,
	'encoded_spconv_tensor_stride': 8,
	'out_voxels': x_conv4.indices[index_out.squeeze(-1)],
	})
	batch_dict.update({
	'multi_scale_3d_features': {
	'x_conv1': x_conv1,
	'x_conv2': x_conv2,
	'x_conv3': x_conv3,
	'x_conv4': x_conv4,
	}
	})
	batch_dict.update({
	'multi_scale_3d_strides': {
	'x_conv1': 1,
	'x_conv2': 2,
	'x_conv3': 4,
	'x_conv4': 8,
	}
	})

	return batch_dict