kcml
/

handcrafted_baseline_submission

Model card Files Files and versions Community

kcml commited on May 28, 2024

Commit

7631607

1 Parent(s): bac280a

apply both colmap depthmap and mono depthmap.

Browse files

Files changed (1) hide show

handcrafted_solution.py +41 -24

handcrafted_solution.py CHANGED Viewed

@@ -12,7 +12,7 @@ from scipy.spatial.distance import cdist
 from hoho.read_write_colmap import read_cameras_binary, read_images_binary, read_points3D_binary
 from hoho.color_mappings import gestalt_color_mapping, ade20k_color_mapping
-DUMP_IMG = True
 if DUMP_IMG:
   from scipy.sparse import random
@@ -140,27 +140,32 @@ def get_smooth_uv_depth(vertices, depth, gest_seg_np, sfm_depth_np):
     def get_local_depth(x,y, H, W, depth, r=5):
       '''return a smooth version of detph in radius r'''
       local_depths = []
-      for i in range(max(0, x - r), min(W-1, x + r)):
-        for j in range(max(0, y - r), min(H-1, y + r)):
             if np.sqrt((i - x)**2 + (j - y)**2) <= r:
-              if sfm_depth_np[j, i] != 0:
-                local_depths.append(sfm_depth_np[j, i])
               else:
                 local_depths.append(depth[j, i])
       return local_depths
     vertex_depth = []
     for x,y in zip(a,b):
       local_depths = get_local_depth(x,y, H, W, depth, 5)
       #print(f'local_depths={local_depths}')
-      local_mean = np.mean(local_depths)
       vertex_depth.append(local_mean)
     vertex_depth = np.array(vertex_depth)
     return uv, vertex_depth
 # TODO: timeit
-def get_SfM_depth(points3D, depth_np, gest_seg_np, K, R, t):
   '''Project 3D sfm pointcloud to the image plane '''
   H, W = depth_np.shape[:2]
   sfm_depth_np = np.zeros(depth_np.shape)
@@ -174,8 +179,18 @@ def get_SfM_depth(points3D, depth_np, gest_seg_np, K, R, t):
   Rt = np.concatenate( (R, t.reshape((3,1))), axis=1)
   world_to_cam = K @ Rt
   xyz = world_to_cam @ XYZ1.transpose()
-  xyz = np.transpose(xyz)
-  dilate_r = 5
   for pt, c in zip(xyz, rgb):
     x, y, z = pt
     u, v = x/z, y/z
@@ -183,16 +198,12 @@ def get_SfM_depth(points3D, depth_np, gest_seg_np, K, R, t):
     v = v.astype(np.int32)
     for i in range(max(0, u - dilate_r), min(W, u + dilate_r)):
       for j in range(max(0, v - dilate_r), min(H, v + dilate_r)):
-        if z > 0: #and sfm_depth_np[j, i]!=0 and z < sfm_depth_np[j, i]:
-          sfm_depth_np[j, i] = z
-          sfm_color_np[j, i] = c
-    '''
-    if u >= 0 and u < W and v >= 0 and v < H and z > 0:
-      if sfm_depth_np[v, u] == 0 or z < sfm_depth_np[v, u]:
-        sfm_depth_np[v, u] = z
-        sfm_color_np[v, u] = c
-    '''
   if DUMP_IMG:
     filename_sfm_depth = 'sfm_depth.png'
     cv2.imwrite(filename_sfm_depth, sfm_depth_np/100)
@@ -340,6 +351,8 @@ def get_vertices_and_edges_from_two_segmentations(ade_seg_np, gest_seg_np, edge_
     if DUMP_IMG:
       filename_edges_map = f'edges_map_{rid}.jpg'
       cv2.imwrite(filename_edges_map, line_img)
     return vertices, connections
 def get_uv_dept_category(vertices, depth, ade_seg):
@@ -464,6 +477,7 @@ def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
         elif i==2: # only visualize view 0,1
           continue
         '''
         depth_scale = 2.5
@@ -475,17 +489,20 @@ def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
         depth_np = np.array(depth) / depth_scale # / 2.5 # 2.5 is the scale estimation coefficient # don't use 2.5...
         #vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 20.)
         vertices, connections = get_vertices_and_edges_from_two_segmentations(ade_seg_np, gest_seg_np, edge_th = 20.)
         if (len(vertices) < 2) or (len(connections) < 1):
-            print (f'Not enough vertices or connections in image {i}')
             vert_edge_per_image[i] = np.empty((0, 2)), [], np.empty((0, 3))
             continue
-        sfm_depth_np = get_SfM_depth(points3D, depth_np, gest_seg_np, K, R, t)
-        #uv, depth_vert = get_uv_depth(vertices, depth_np)
-        uv, depth_vert = get_smooth_uv_depth(vertices, depth_np, gest_seg_np, sfm_depth_np)
         # Normalize the uv to the camera intrinsics
         xy_local = np.ones((len(uv), 3))
         xy_local[:, 0] = (uv[:, 0] - K[0,2]) / K[0,0]
@@ -499,7 +516,7 @@ def predict(entry, visualize=False) -> Tuple[np.ndarray, List[int]]:
         vertices_3d = cv2.transform(cv2.convertPointsToHomogeneous(vertices_3d_local), cam_to_world)
         vertices_3d = cv2.convertPointsFromHomogeneous(vertices_3d).reshape(-1, 3)
         vert_edge_per_image[i] = vertices, connections, vertices_3d
-    all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, 3.0) # TODO: 3cm looks too small
     #print(f'after merge, {len(all_3d_vertices)} 3d vertices and {len(connections_3d)} 3d connections')
     #all_3d_vertices_clean, connections_3d_clean  = prune_not_connected(all_3d_vertices, connections_3d)
     all_3d_vertices_clean, connections_3d_clean  = all_3d_vertices, connections_3d # don't prune -> cost:2.0

 from hoho.read_write_colmap import read_cameras_binary, read_images_binary, read_points3D_binary
 from hoho.color_mappings import gestalt_color_mapping, ade20k_color_mapping
+DUMP_IMG = False
 if DUMP_IMG:
   from scipy.sparse import random
     def get_local_depth(x,y, H, W, depth, r=5):
       '''return a smooth version of detph in radius r'''
       local_depths = []
+      for i in range(max(0, x - r), min(W, x + r)):
+        for j in range(max(0, y - r), min(H, y + r)):
             if np.sqrt((i - x)**2 + (j - y)**2) <= r:
+              if sfm_depth_np is not None:
+                if sfm_depth_np[j, i] != 0:
+                  local_depths.append(sfm_depth_np[j, i])
+                else:
+                  local_depths.append(depth[j, i])
               else:
                 local_depths.append(depth[j, i])
       return local_depths
     vertex_depth = []
     for x,y in zip(a,b):
       local_depths = get_local_depth(x,y, H, W, depth, 5)
       #print(f'local_depths={local_depths}')
+      #local_mean = np.mean(local_depths)
+      local_mean = np.min(local_depths)
       vertex_depth.append(local_mean)
     vertex_depth = np.array(vertex_depth)
     return uv, vertex_depth
 # TODO: timeit
+def get_SfM_depth(points3D, depth_np, gest_seg_np, K, R, t, dilate_r = 5):
   '''Project 3D sfm pointcloud to the image plane '''
   H, W = depth_np.shape[:2]
   sfm_depth_np = np.zeros(depth_np.shape)
   Rt = np.concatenate( (R, t.reshape((3,1))), axis=1)
   world_to_cam = K @ Rt
   xyz = world_to_cam @ XYZ1.transpose()
+  xyz = np.transpose(xyz)
+  us, vs, zs = xyz[0]/xyz[2], xyz[1]/xyz[2], xyz[2]
+  us = us.astype(np.int32)
+  vs = vs.astype(np.int32)
+  for u,v,z,c in zip(us,vs,zs, rgb):
+    for i in range(max(0, u - dilate_r), min(W, u + dilate_r)):
+      for j in range(max(0, v - dilate_r), min(H, v + dilate_r)):
+        if z > 0:
+          if (sfm_depth_np[j, i]!=0 and z < sfm_depth_np[j, i]) or (sfm_depth_np[j, i]==0):
+            sfm_depth_np[j, i] = z
+            sfm_color_np[j, i] = c
+  '''
   for pt, c in zip(xyz, rgb):
     x, y, z = pt
     u, v = x/z, y/z
     v = v.astype(np.int32)
     for i in range(max(0, u - dilate_r), min(W, u + dilate_r)):
       for j in range(max(0, v - dilate_r), min(H, v + dilate_r)):
+        if z > 0:
+          if (sfm_depth_np[j, i]!=0 and z < sfm_depth_np[j, i]) or (sfm_depth_np[j, i]==0):
+            sfm_depth_np[j, i] = z
+            sfm_color_np[j, i] = c
+  '''
   if DUMP_IMG:
     filename_sfm_depth = 'sfm_depth.png'
     cv2.imwrite(filename_sfm_depth, sfm_depth_np/100)
     if DUMP_IMG:
       filename_edges_map = f'edges_map_{rid}.jpg'
       cv2.imwrite(filename_edges_map, line_img)
     return vertices, connections
 def get_uv_dept_category(vertices, depth, ade_seg):
         elif i==2: # only visualize view 0,1
           continue
         '''
         depth_scale = 2.5
         depth_np = np.array(depth) / depth_scale # / 2.5 # 2.5 is the scale estimation coefficient # don't use 2.5...
         #vertices, connections = get_vertices_and_edges_from_segmentation(gest_seg_np, edge_th = 20.)
         vertices, connections = get_vertices_and_edges_from_two_segmentations(ade_seg_np, gest_seg_np, edge_th = 20.)
+        #vertices, connections = get_vertices_and_edges_from_two_segmentations(ade_seg_np, gest_seg_np, edge_th = 50.)
         if (len(vertices) < 2) or (len(connections) < 1):
+            print (f'Not enough vertices ({len(vertices)}) or connections ({len(connections)}) in image {i}')
             vert_edge_per_image[i] = np.empty((0, 2)), [], np.empty((0, 3))
             continue
+        #uv, depth_vert = get_uv_depth(vertices, depth_np)
+        sfm_depth_np = get_SfM_depth(points3D, depth_np, gest_seg_np, K, R, t, 10)
+        uv, depth_vert = get_smooth_uv_depth(vertices, depth_np, gest_seg_np, sfm_depth_np)
+        #uv, depth_vert = get_smooth_uv_depth(vertices, depth_np, gest_seg_np, None)
         # Normalize the uv to the camera intrinsics
         xy_local = np.ones((len(uv), 3))
         xy_local[:, 0] = (uv[:, 0] - K[0,2]) / K[0,0]
         vertices_3d = cv2.transform(cv2.convertPointsToHomogeneous(vertices_3d_local), cam_to_world)
         vertices_3d = cv2.convertPointsFromHomogeneous(vertices_3d).reshape(-1, 3)
         vert_edge_per_image[i] = vertices, connections, vertices_3d
+    all_3d_vertices, connections_3d = merge_vertices_3d(vert_edge_per_image, 3.0) # TODO: 3cm looks too small
     #print(f'after merge, {len(all_3d_vertices)} 3d vertices and {len(connections_3d)} 3d connections')
     #all_3d_vertices_clean, connections_3d_clean  = prune_not_connected(all_3d_vertices, connections_3d)
     all_3d_vertices_clean, connections_3d_clean  = all_3d_vertices, connections_3d # don't prune -> cost:2.0