Spaces:

zino36
/

Image-Matching-app

Sleeping

App Files Files Community

Image-Matching-app / viz.py

zino36

Upload 7 files

20a4a01 verified 5 months ago

raw

history blame

13.7 kB

	# Copyright (C) 2024-present Naver Corporation. All rights reserved.
	# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
	#
	# --------------------------------------------------------
	# Visualization utilities using trimesh
	# --------------------------------------------------------
	import PIL.Image
	import numpy as np
	from scipy.spatial.transform import Rotation
	import torch

	from dust3r.utils.geometry import geotrf, get_med_dist_between_poses, depthmap_to_absolute_camera_coordinates
	from dust3r.utils.device import to_numpy
	from dust3r.utils.image import rgb, img_to_arr

	try:
	import trimesh
	except ImportError:
	print('/!\\ module trimesh is not installed, cannot visualize results /!\\')



	def cat_3d(vecs):
	if isinstance(vecs, (np.ndarray, torch.Tensor)):
	vecs = [vecs]
	return np.concatenate([p.reshape(-1, 3) for p in to_numpy(vecs)])


	def show_raw_pointcloud(pts3d, colors, point_size=2):
	scene = trimesh.Scene()

	pct = trimesh.PointCloud(cat_3d(pts3d), colors=cat_3d(colors))
	scene.add_geometry(pct)

	scene.show(line_settings={'point_size': point_size})


	def pts3d_to_trimesh(img, pts3d, valid=None):
	H, W, THREE = img.shape
	assert THREE == 3
	assert img.shape == pts3d.shape

	vertices = pts3d.reshape(-1, 3)

	# make squares: each pixel == 2 triangles
	idx = np.arange(len(vertices)).reshape(H, W)
	idx1 = idx[:-1, :-1].ravel() # top-left corner
	idx2 = idx[:-1, +1:].ravel() # right-left corner
	idx3 = idx[+1:, :-1].ravel() # bottom-left corner
	idx4 = idx[+1:, +1:].ravel() # bottom-right corner
	faces = np.concatenate((
	np.c_[idx1, idx2, idx3],
	np.c_[idx3, idx2, idx1], # same triangle, but backward (cheap solution to cancel face culling)
	np.c_[idx2, idx3, idx4],
	np.c_[idx4, idx3, idx2], # same triangle, but backward (cheap solution to cancel face culling)
	), axis=0)

	# prepare triangle colors
	face_colors = np.concatenate((
	img[:-1, :-1].reshape(-1, 3),
	img[:-1, :-1].reshape(-1, 3),
	img[+1:, +1:].reshape(-1, 3),
	img[+1:, +1:].reshape(-1, 3)
	), axis=0)

	# remove invalid faces
	if valid is not None:
	assert valid.shape == (H, W)
	valid_idxs = valid.ravel()
	valid_faces = valid_idxs[faces].all(axis=-1)
	faces = faces[valid_faces]
	face_colors = face_colors[valid_faces]

	assert len(faces) == len(face_colors)
	return dict(vertices=vertices, face_colors=face_colors, faces=faces)


	def cat_meshes(meshes):
	vertices, faces, colors = zip(*[(m['vertices'], m['faces'], m['face_colors']) for m in meshes])
	n_vertices = np.cumsum([0]+[len(v) for v in vertices])
	for i in range(len(faces)):
	faces[i][:] += n_vertices[i]

	vertices = np.concatenate(vertices)
	colors = np.concatenate(colors)
	faces = np.concatenate(faces)
	return dict(vertices=vertices, face_colors=colors, faces=faces)


	def show_duster_pairs(view1, view2, pred1, pred2):
	import matplotlib.pyplot as pl
	pl.ion()

	for e in range(len(view1['instance'])):
	i = view1['idx'][e]
	j = view2['idx'][e]
	img1 = rgb(view1['img'][e])
	img2 = rgb(view2['img'][e])
	conf1 = pred1['conf'][e].squeeze()
	conf2 = pred2['conf'][e].squeeze()
	score = conf1.mean()*conf2.mean()
	print(f">> Showing pair #{e} {i}-{j} {score=:g}")
	pl.clf()
	pl.subplot(221).imshow(img1)
	pl.subplot(223).imshow(img2)
	pl.subplot(222).imshow(conf1, vmin=1, vmax=30)
	pl.subplot(224).imshow(conf2, vmin=1, vmax=30)
	pts1 = pred1['pts3d'][e]
	pts2 = pred2['pts3d_in_other_view'][e]
	pl.subplots_adjust(0, 0, 1, 1, 0, 0)
	if input('show pointcloud? (y/n) ') == 'y':
	show_raw_pointcloud(cat(pts1, pts2), cat(img1, img2), point_size=5)


	def auto_cam_size(im_poses):
	return 0.1 * get_med_dist_between_poses(im_poses)


	class SceneViz:
	def __init__(self):
	self.scene = trimesh.Scene()

	def add_rgbd(self, image, depth, intrinsics=None, cam2world=None, zfar=np.inf, mask=None):
	image = img_to_arr(image)

	# make up some intrinsics
	if intrinsics is None:
	H, W, THREE = image.shape
	focal = max(H, W)
	intrinsics = np.float32([[focal, 0, W/2], [0, focal, H/2], [0, 0, 1]])

	# compute 3d points
	pts3d = depthmap_to_pts3d(depth, intrinsics, cam2world=cam2world)

	return self.add_pointcloud(pts3d, image, mask=(depth<zfar) if mask is None else mask)

	def add_pointcloud(self, pts3d, color=(0,0,0), mask=None, denoise=False):
	pts3d = to_numpy(pts3d)
	mask = to_numpy(mask)
	if not isinstance(pts3d, list):
	pts3d = [pts3d.reshape(-1,3)]
	if mask is not None:
	mask = [mask.ravel()]
	if not isinstance(color, (tuple,list)):
	color = [color.reshape(-1,3)]
	if mask is None:
	mask = [slice(None)] * len(pts3d)

	pts = np.concatenate([p[m] for p,m in zip(pts3d,mask)])
	pct = trimesh.PointCloud(pts)

	if isinstance(color, (list, np.ndarray, torch.Tensor)):
	color = to_numpy(color)
	col = np.concatenate([p[m] for p,m in zip(color,mask)])
	assert col.shape == pts.shape, bb()
	pct.visual.vertex_colors = uint8(col.reshape(-1,3))
	else:
	assert len(color) == 3
	pct.visual.vertex_colors = np.broadcast_to(uint8(color), pts.shape)

	if denoise:
	# remove points which are noisy
	centroid = np.median(pct.vertices, axis=0)
	dist_to_centroid = np.linalg.norm( pct.vertices - centroid, axis=-1)
	dist_thr = np.quantile(dist_to_centroid, 0.99)
	valid = (dist_to_centroid < dist_thr)
	# new cleaned pointcloud
	pct = trimesh.PointCloud(pct.vertices[valid], color=pct.visual.vertex_colors[valid])

	self.scene.add_geometry(pct)
	return self

	def add_rgbd(self, image, depth, intrinsics=None, cam2world=None, zfar=np.inf, mask=None):
	# make up some intrinsics
	if intrinsics is None:
	H, W, THREE = image.shape
	focal = max(H, W)
	intrinsics = np.float32([[focal, 0, W/2], [0, focal, H/2], [0, 0, 1]])

	# compute 3d points
	pts3d, mask2 = depthmap_to_absolute_camera_coordinates(depth, intrinsics, cam2world)
	mask2 &= (depth<zfar)

	# combine with provided mask if any
	if mask is not None:
	mask2 &= mask

	return self.add_pointcloud(pts3d, image, mask=mask2)

	def add_camera(self, pose_c2w, focal=None, color=(0, 0, 0), image=None, imsize=None, cam_size=0.03):
	pose_c2w, focal, color, image = to_numpy((pose_c2w, focal, color, image))
	image = img_to_arr(image)
	if isinstance(focal, np.ndarray) and focal.shape == (3,3):
	intrinsics = focal
	focal = (intrinsics[0,0] * intrinsics[1,1]) ** 0.5
	if imsize is None:
	imsize = (2intrinsics[0,2], 2intrinsics[1,2])

	add_scene_cam(self.scene, pose_c2w, color, image, focal, imsize=imsize, screen_width=cam_size, marker=None)
	return self

	def add_cameras(self, poses, focals=None, images=None, imsizes=None, colors=None, **kw):
	get = lambda arr,idx: None if arr is None else arr[idx]
	for i, pose_c2w in enumerate(poses):
	self.add_camera(pose_c2w, get(focals,i), image=get(images,i), color=get(colors,i), imsize=get(imsizes,i), **kw)
	return self

	def show(self, point_size=2):
	self.scene.show(line_settings= {'point_size': point_size})


	def show_raw_pointcloud_with_cams(imgs, pts3d, mask, focals, cams2world,
	point_size=2, cam_size=0.05, cam_color=None):
	""" Visualization of a pointcloud with cameras
	imgs = (N, H, W, 3) or N-size list of [(H,W,3), ...]
	pts3d = (N, H, W, 3) or N-size list of [(H,W,3), ...]
	focals = (N,) or N-size list of [focal, ...]
	cams2world = (N,4,4) or N-size list of [(4,4), ...]
	"""
	assert len(pts3d) == len(mask) <= len(imgs) <= len(cams2world) == len(focals)
	pts3d = to_numpy(pts3d)
	imgs = to_numpy(imgs)
	focals = to_numpy(focals)
	cams2world = to_numpy(cams2world)

	scene = trimesh.Scene()

	# full pointcloud
	pts = np.concatenate([p[m] for p, m in zip(pts3d, mask)])
	col = np.concatenate([p[m] for p, m in zip(imgs, mask)])
	pct = trimesh.PointCloud(pts.reshape(-1, 3), colors=col.reshape(-1, 3))
	scene.add_geometry(pct)

	# add each camera
	for i, pose_c2w in enumerate(cams2world):
	if isinstance(cam_color, list):
	camera_edge_color = cam_color[i]
	else:
	camera_edge_color = cam_color or CAM_COLORS[i % len(CAM_COLORS)]
	add_scene_cam(scene, pose_c2w, camera_edge_color,
	imgs[i] if i < len(imgs) else None, focals[i], screen_width=cam_size)

	scene.show(line_settings={'point_size': point_size})


	def add_scene_cam(scene, pose_c2w, edge_color, image=None, focal=None, imsize=None,
	screen_width=0.03, marker=None):
	if image is not None:
	image = np.asarray(image)
	H, W, THREE = image.shape
	assert THREE == 3
	if image.dtype != np.uint8:
	image = np.uint8(255*image)
	elif imsize is not None:
	W, H = imsize
	elif focal is not None:
	H = W = focal / 1.1
	else:
	H = W = 1

	if isinstance(focal, np.ndarray):
	focal = focal[0]
	if not focal:
	focal = min(H,W) * 1.1 # default value

	# create fake camera
	height = max( screen_width/10, focal * screen_width / H )
	width = screen_width * 0.5**0.5
	rot45 = np.eye(4)
	rot45[:3, :3] = Rotation.from_euler('z', np.deg2rad(45)).as_matrix()
	rot45[2, 3] = -height # set the tip of the cone = optical center
	aspect_ratio = np.eye(4)
	aspect_ratio[0, 0] = W/H
	transform = pose_c2w @ OPENGL @ aspect_ratio @ rot45
	cam = trimesh.creation.cone(width, height, sections=4) # , transform=transform)

	# this is the image
	if image is not None:
	vertices = geotrf(transform, cam.vertices[[4, 5, 1, 3]])
	faces = np.array([[0, 1, 2], [0, 2, 3], [2, 1, 0], [3, 2, 0]])
	img = trimesh.Trimesh(vertices=vertices, faces=faces)
	uv_coords = np.float32([[0, 0], [1, 0], [1, 1], [0, 1]])
	img.visual = trimesh.visual.TextureVisuals(uv_coords, image=PIL.Image.fromarray(image))
	scene.add_geometry(img)

	# this is the camera mesh
	rot2 = np.eye(4)
	rot2[:3, :3] = Rotation.from_euler('z', np.deg2rad(2)).as_matrix()
	vertices = np.r_[cam.vertices, 0.95*cam.vertices, geotrf(rot2, cam.vertices)]
	vertices = geotrf(transform, vertices)
	faces = []
	for face in cam.faces:
	if 0 in face:
	continue
	a, b, c = face
	a2, b2, c2 = face + len(cam.vertices)
	a3, b3, c3 = face + 2*len(cam.vertices)

	# add 3 pseudo-edges
	faces.append((a, b, b2))
	faces.append((a, a2, c))
	faces.append((c2, b, c))

	faces.append((a, b, b3))
	faces.append((a, a3, c))
	faces.append((c3, b, c))

	# no culling
	faces += [(c, b, a) for a, b, c in faces]

	cam = trimesh.Trimesh(vertices=vertices, faces=faces)
	cam.visual.face_colors[:, :3] = edge_color
	scene.add_geometry(cam)

	if marker == 'o':
	marker = trimesh.creation.icosphere(3, radius=screen_width/4)
	marker.vertices += pose_c2w[:3,3]
	marker.visual.face_colors[:,:3] = edge_color
	scene.add_geometry(marker)


	def cat(a, b):
	return np.concatenate((a.reshape(-1, 3), b.reshape(-1, 3)))


	OPENGL = np.array([[1, 0, 0, 0],
	[0, -1, 0, 0],
	[0, 0, -1, 0],
	[0, 0, 0, 1]])


	CAM_COLORS = [(255, 0, 0), (0, 0, 255), (0, 255, 0), (255, 0, 255), (255, 204, 0), (0, 204, 204),
	(128, 255, 255), (255, 128, 255), (255, 255, 128), (0, 0, 0), (128, 128, 128)]


	def uint8(colors):
	if not isinstance(colors, np.ndarray):
	colors = np.array(colors)
	if np.issubdtype(colors.dtype, np.floating):
	colors *= 255
	assert 0 <= colors.min() and colors.max() < 256
	return np.uint8(colors)


	def segment_sky(image):
	import cv2
	from scipy import ndimage

	# Convert to HSV
	image = to_numpy(image)
	if np.issubdtype(image.dtype, np.floating):
	image = np.uint8(255*image.clip(min=0, max=1))
	hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

	# Define range for blue color and create mask
	lower_blue = np.array([0, 0, 100])
	upper_blue = np.array([30, 255, 255])
	mask = cv2.inRange(hsv, lower_blue, upper_blue).view(bool)

	# add luminous gray
	mask \|= (hsv[:, :, 1] < 10) & (hsv[:, :, 2] > 150)
	mask \|= (hsv[:, :, 1] < 30) & (hsv[:, :, 2] > 180)
	mask \|= (hsv[:, :, 1] < 50) & (hsv[:, :, 2] > 220)

	# Morphological operations
	kernel = np.ones((5, 5), np.uint8)
	mask2 = ndimage.binary_opening(mask, structure=kernel)

	# keep only largest CC
	_, labels, stats, _ = cv2.connectedComponentsWithStats(mask2.view(np.uint8), connectivity=8)
	cc_sizes = stats[1:, cv2.CC_STAT_AREA]
	order = cc_sizes.argsort()[::-1] # bigger first
	i = 0
	selection = []
	while i < len(order) and cc_sizes[order[i]] > cc_sizes[order[0]] / 2:
	selection.append(1 + order[i])
	i += 1
	mask3 = np.in1d(labels, selection).reshape(labels.shape)

	# Apply mask
	return torch.from_numpy(mask3)