import numpy as np import torch import skimage from scipy import ndimage from PIL import Image from .models import infer_with_zoe_dc from pytorch3d.structures import Pointclouds import math def nearest_neighbor_fill(img, mask, erosion=0): img_ = np.copy(img.cpu().numpy()) if erosion > 0: eroded_mask = skimage.morphology.binary_erosion(mask.cpu().numpy(), footprint=skimage.morphology.disk(erosion)) else: eroded_mask = mask.cpu().numpy() img_[eroded_mask <= 0] = np.nan distance_to_boundary = ndimage.distance_transform_bf((~eroded_mask>0), metric="cityblock") for current_dist in np.unique(distance_to_boundary)[1:]: ii, jj = np.where(distance_to_boundary == current_dist) ii_ = np.array([ii - 1, ii, ii + 1, ii - 1, ii, ii + 1, ii - 1, ii, ii + 1]).reshape(9, -1) jj_ = np.array([jj - 1, jj - 1, jj - 1, jj, jj, jj, jj + 1, jj + 1, jj + 1]).reshape(9, -1) ii_ = ii_.clip(0, img_.shape[0] - 1) jj_ = jj_.clip(0, img_.shape[1] - 1) img_[ii, jj] = np.nanmax(img_[ii_, jj_], axis=0) return torch.from_numpy(img_).to(img.device) def snap_high_gradients_to_nn(depth, threshold=20): grad_depth = np.copy(depth.cpu().numpy()) grad_depth = grad_depth - grad_depth.min() grad_depth = grad_depth / grad_depth.max() grad = skimage.filters.rank.gradient(grad_depth, skimage.morphology.disk(1)) return nearest_neighbor_fill(depth, torch.from_numpy(grad < threshold), erosion=3) def project_points(cameras, depth, use_pixel_centers=True): if len(cameras) > 1: import warnings warnings.warn("project_points assumes only a single camera is used") depth_t = torch.from_numpy(depth) if isinstance(depth, np.ndarray) else depth depth_t = depth_t.to(cameras.device) pixel_center = 0.5 if use_pixel_centers else 0 fx, fy = cameras.focal_length[0, 1], cameras.focal_length[0, 0] cx, cy = cameras.principal_point[0, 1], cameras.principal_point[0, 0] i, j = torch.meshgrid( torch.arange(cameras.image_size[0][0], dtype=torch.float32, device=cameras.device) + pixel_center, torch.arange(cameras.image_size[0][1], dtype=torch.float32, device=cameras.device) + pixel_center, indexing="xy", ) directions = torch.stack( [-(i - cx) * depth_t / fx, -(j - cy) * depth_t / fy, depth_t], -1 ) xy_depth_world = cameras.get_world_to_view_transform().inverse().transform_points(directions.view(-1, 3)).unsqueeze(0) return xy_depth_world def get_pointcloud(xy_depth_world, device="cpu", features=None): point_cloud = Pointclouds(points=[xy_depth_world.to(device)], features=[features] if features is not None else None) return point_cloud def merge_pointclouds(point_clouds): points = torch.cat([pc.points_padded() for pc in point_clouds], dim=1) features = torch.cat([pc.features_padded() for pc in point_clouds], dim=1) return Pointclouds(points=[points[0]], features=[features[0]]) def outpaint_with_depth_estimation(image, mask, previous_depth, h, w, pipe, zoe_dc, prompt, cameras, dilation_size: int = 2, depth_scaling: float = 1, generator = None): img_input = Image.fromarray((255*image[..., :3].cpu().numpy()).astype(np.uint8)) # we slightly dilate the mask as aliasing might cause us to receive a too small mask from pytorch3d img_mask = Image.fromarray((255*skimage.morphology.isotropic_dilation(((~mask).cpu().numpy()), radius=dilation_size)).astype(np.uint8))#footprint=skimage.morphology.disk(dilation_size))) out_image = pipe(prompt=prompt, image=img_input, mask_image=img_mask, height=h, width=w, generator=generator).images[0] out_depth = infer_with_zoe_dc(zoe_dc, torch.from_numpy(np.asarray(out_image)/255.).permute(2,0,1).float().to(zoe_dc.device), (previous_depth * mask).to(zoe_dc.device), scaling=depth_scaling).cpu().numpy() return out_image, out_depth def fov2focal(fov, pixels): return pixels / (2 * math.tan(fov / 2)) def focal2fov(focal, pixels): return 2*math.atan(pixels/(2*focal))