Siromanec
/

s23dr-hoho-competition

Model card Files Files and versions Community

Siromanec commited on May 31

Commit

14542ca

•

1 Parent(s): 76ee08d

added missed edge detection, but it is not much better

Browse files

Files changed (3) hide show

handcrafted_solution.py +127 -65
script.py +5 -3
test_solution.ipynb +21 -20

handcrafted_solution.py CHANGED Viewed

@@ -26,7 +26,29 @@ def empty_solution():
     return np.zeros((2, 3)), [(0, 1)]
-def undesired_objects(image):
     image = image.astype('uint8')
     nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(image, connectivity=4)
     sizes = stats[:, -1]
@@ -47,7 +69,7 @@ def clean_image(image_gestalt) -> np.ndarray:
     image_gestalt = np.array(image_gestalt)
     unclassified_mask = cv2.inRange(image_gestalt, unclassified + 0.0, unclassified + 0.8)
     unclassified_mask = cv2.bitwise_not(unclassified_mask)
-    mask = undesired_objects(unclassified_mask).astype(np.uint8)
     mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((11, 11), np.uint8), iterations=11)
     mask = cv2.morphologyEx(mask, cv2.MORPH_DILATE, np.ones((11, 11), np.uint8), iterations=2)
@@ -105,32 +127,65 @@ def get_missed_vertices(vertices, inferred_centroids, *, min_missing_distance=20
     vertices = KDTree(vertices)
     closest = vertices.query(inferred_centroids, k=1, distance_upper_bound=min_missing_distance)
     missed_points = inferred_centroids[closest[1] == len(vertices.data)]
     return missed_points
-def convert_entry_to_human_readable(entry):
-    out = {}
-    already_good = {'__key__', 'wf_vertices', 'wf_edges', 'edge_semantics', 'mesh_vertices', 'mesh_faces',
-                    'face_semantics', 'K', 'R', 't'}
-    for k, v in entry.items():
-        if k in already_good:
-            out[k] = v
-            continue
-        match k:
-            case 'points3d':
-                out[k] = read_points3D_binary(fid=io.BytesIO(v))
-            case 'cameras':
-                out[k] = read_cameras_binary(fid=io.BytesIO(v))
-            case 'images':
-                out[k] = read_images_binary(fid=io.BytesIO(v))
-            case 'ade20k' | 'gestalt':
-                out[k] = [PImage.open(io.BytesIO(x)).convert('RGB') for x in v]
-            case 'depthcm':
-                out[k] = [PImage.open(io.BytesIO(x)) for x in entry['depthcm']]
-    return out
-def get_vertices_and_edges_from_segmentation(gest_seg_np, *, color_range=4., point_radius=30, max_angle=5., extend=35,
                                              **kwargs):
     '''Get the vertices and edges from the gestalt segmentation mask of the house'''
     # Apex
@@ -144,7 +199,8 @@ def get_vertices_and_edges_from_segmentation(gest_seg_np, *, color_range=4., poi
     # missed_vertices = get_missed_vertices(vertices, inferred_vertices, **kwargs)
     # vertices = np.concatenate([vertices, missed_vertices])
-    vertices = KDTree(vertices)
     # scale = 1
     # vertex_size = np.zeros(vertices.shape[0])
@@ -153,8 +209,6 @@ def get_vertices_and_edges_from_segmentation(gest_seg_np, *, color_range=4., poi
     #     radius = point_radius  # np.clip(int(max_depth//2 + depth_np[coords[1], coords[0]]), 10, 30)#int(np.clip(max_depth - depth_np[coords[1], coords[0]], 10, 20))
     #     vertex_size[i] = (scale * radius) ** 2  # because we are using squared distances
-    if len(vertices.data) < 2:
-        return [], []
     edges = []
     line_directions = []
@@ -163,46 +217,54 @@ def get_vertices_and_edges_from_segmentation(gest_seg_np, *, color_range=4., poi
     threshold = 20  # minimum number of votes (intersections in Hough grid cell)
     min_line_length = 60  # minimum number of pixels making up a line
     max_line_gap = 40  # maximum gap in pixels between connectable line segments
-    for edge_class in ['eave', 'ridge', 'rake', 'valley', 'flashing', 'step_flashing', 'hip']:
-        edge_color = np.array(gestalt_color_mapping[edge_class])
-        mask = cv2.inRange(gest_seg_np,
-                           edge_color - color_range,
-                           edge_color + color_range)
-        mask = cv2.morphologyEx(mask,
-                                cv2.MORPH_DILATE, np.ones((3, 3)), iterations=1)
-        if not np.any(mask):
-            continue
-        # Run Hough on edge detected image
-        # Output "lines" is an array containing endpoints of detected line segments
-        cv2.GaussianBlur(mask, (11, 11), 0, mask)
-        lines = cv2.HoughLinesP(mask, rho, theta, threshold, np.array([]),
-                                min_line_length, max_line_gap)
-        if lines is None:
-            continue
-        for line_idx, line in enumerate(lines):
-            for x1, y1, x2, y2 in line:
-                if x1 < x2:
-                    x1, y1, x2, y2 = x2, y2, x1, y1
-                direction = (np.array([x2 - x1, y2 - y1]))
-                direction = direction / np.linalg.norm(direction)
-                line_directions.append(direction)
-                direction = extend * direction
-                x1, y1 = (-direction + (x1, y1)).astype(np.int32)
-                x2, y2 = (+ direction + (x2, y2)).astype(np.int32)
-                edges.append((x1, y1, x2, y2))
-    edges = np.array(edges).astype(np.float64)
-    line_directions = np.array(line_directions).astype(np.float64)
     if len(edges) < 1:
         return [], []
     # calculate the distances between the vertices and the edge ends
@@ -264,7 +326,7 @@ def get_vertices_and_edges_from_segmentation(gest_seg_np, *, color_range=4., poi
             connections.append(possible_connections[:, fitted_line_idx])
     vertices = [{"xy": v, "type": "apex"} for v in apex_centroids]
-    # vertices += [{"xy": v, "type": "apex"} for v in missed_vertices]
     vertices += [{"xy": v, "type": "eave_end_point"} for v in eave_end_point_centroids]
     return vertices, connections

     return np.zeros((2, 3)), [(0, 1)]
+def convert_entry_to_human_readable(entry):
+    out = {}
+    already_good = {'__key__', 'wf_vertices', 'wf_edges', 'edge_semantics', 'mesh_vertices', 'mesh_faces',
+                    'face_semantics', 'K', 'R', 't'}
+    for k, v in entry.items():
+        if k in already_good:
+            out[k] = v
+            continue
+        match k:
+            case 'points3d':
+                out[k] = read_points3D_binary(fid=io.BytesIO(v))
+            case 'cameras':
+                out[k] = read_cameras_binary(fid=io.BytesIO(v))
+            case 'images':
+                out[k] = read_images_binary(fid=io.BytesIO(v))
+            case 'ade20k' | 'gestalt':
+                out[k] = [PImage.open(io.BytesIO(x)).convert('RGB') for x in v]
+            case 'depthcm':
+                out[k] = [PImage.open(io.BytesIO(x)) for x in entry['depthcm']]
+    return out
+def remove_undesired_objects(image):
     image = image.astype('uint8')
     nb_components, output, stats, centroids = cv2.connectedComponentsWithStats(image, connectivity=4)
     sizes = stats[:, -1]
     image_gestalt = np.array(image_gestalt)
     unclassified_mask = cv2.inRange(image_gestalt, unclassified + 0.0, unclassified + 0.8)
     unclassified_mask = cv2.bitwise_not(unclassified_mask)
+    mask = remove_undesired_objects(unclassified_mask).astype(np.uint8)
     mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((11, 11), np.uint8), iterations=11)
     mask = cv2.morphologyEx(mask, cv2.MORPH_DILATE, np.ones((11, 11), np.uint8), iterations=2)
     vertices = KDTree(vertices)
     closest = vertices.query(inferred_centroids, k=1, distance_upper_bound=min_missing_distance)
     missed_points = inferred_centroids[closest[1] == len(vertices.data)]
     return missed_points
+def get_lines_and_directions(gest_seg_np, edge_class, *, color_range=4., rho, theta, threshold, min_line_length,
+                             max_line_gap, extend, **kwargs):
+    edge_color = np.array(gestalt_color_mapping[edge_class])
+    mask = cv2.inRange(gest_seg_np,
+                       edge_color - color_range,
+                       edge_color + color_range)
+    mask = cv2.morphologyEx(mask,
+                            cv2.MORPH_DILATE, np.ones((3, 3)), iterations=1)
+    if not np.any(mask):
+        return [], []
+    # Run Hough on edge detected image
+    # Output "lines" is an array containing endpoints of detected line segments
+    cv2.GaussianBlur(mask, (11, 11), 0, mask)
+    lines = cv2.HoughLinesP(mask, rho, theta, threshold, np.array([]),
+                            min_line_length, max_line_gap)
+    if lines is None:
+        return [], []
+    line_directions = []
+    edges = []
+    for line_idx, line in enumerate(lines):
+        for x1, y1, x2, y2 in line:
+            if x1 < x2:
+                x1, y1, x2, y2 = x2, y2, x1, y1
+            direction = (np.array([x2 - x1, y2 - y1]))
+            direction = direction / np.linalg.norm(direction)
+            line_directions.append(direction)
+            direction = extend * direction
+            x1, y1 = (-direction + (x1, y1)).astype(np.int32)
+            x2, y2 = (+ direction + (x2, y2)).astype(np.int32)
+            edges.append((x1, y1, x2, y2))
+    return edges, line_directions
+def infer_missing_vertices(ridge_edges, rake_edges):
+    ridge_edges = np.array(ridge_edges)
+    rake_edges = np.array(rake_edges)
+    ridge_ends = np.concatenate([ridge_edges[:, 2:], ridge_edges[:, :2]])
+    rake_ends = np.concatenate([rake_edges[:, 2:], rake_edges[:, :2]])
+    ridge_ends = KDTree(ridge_ends)
+    rake_ends = KDTree(rake_ends)
+    missing_candidates = rake_ends.query_ball_tree(ridge_ends, 5)
+    missing_candidates = np.concatenate([*missing_candidates])
+    missing_candidates = np.unique(missing_candidates).astype(np.int32)
+    return ridge_ends.data[missing_candidates]
+def get_vertices_and_edges_from_segmentation(gest_seg_np, *, point_radius=30, max_angle=5.,
                                              **kwargs):
     '''Get the vertices and edges from the gestalt segmentation mask of the house'''
     # Apex
     # missed_vertices = get_missed_vertices(vertices, inferred_vertices, **kwargs)
     # vertices = np.concatenate([vertices, missed_vertices])
+    if len(vertices) < 2:
+        return [], []
     # scale = 1
     # vertex_size = np.zeros(vertices.shape[0])
     #     radius = point_radius  # np.clip(int(max_depth//2 + depth_np[coords[1], coords[0]]), 10, 30)#int(np.clip(max_depth - depth_np[coords[1], coords[0]], 10, 20))
     #     vertex_size[i] = (scale * radius) ** 2  # because we are using squared distances
     edges = []
     line_directions = []
     threshold = 20  # minimum number of votes (intersections in Hough grid cell)
     min_line_length = 60  # minimum number of pixels making up a line
     max_line_gap = 40  # maximum gap in pixels between connectable line segments
+    ridge_edges, ridge_directions = get_lines_and_directions(gest_seg_np, "ridge",
+                                                             rho=rho,
+                                                             theta=theta,
+                                                             threshold=threshold,
+                                                             min_line_length=min_line_length,
+                                                             max_line_gap=max_line_gap,
+                                                             **kwargs)
+    rake_edges, rake_directions = get_lines_and_directions(gest_seg_np, "rake",
+                                                           rho=rho,
+                                                           theta=theta,
+                                                           threshold=threshold,
+                                                           min_line_length=min_line_length,
+                                                           max_line_gap=max_line_gap,
+                                                           **kwargs)
+    if len(ridge_edges) > 0:
+        edges.append(ridge_edges)
+        line_directions.append(ridge_directions)
+    if len(rake_edges) > 0:
+        edges.append(rake_edges)
+        line_directions.append(rake_directions)
+    missed_vertices = []
+    if len(ridge_edges) > 0 and len(rake_edges) > 0:
+        inferred_vertices = infer_missing_vertices(ridge_edges, rake_edges)
+        missed_vertices = get_missed_vertices(vertices, inferred_vertices, **kwargs)
+        vertices = np.concatenate([vertices, missed_vertices])
+    vertices = KDTree(vertices)
+    for edge_class in ['eave', 'valley', 'flashing', 'step_flashing', 'hip']:
+        class_edges, class_directions = get_lines_and_directions(gest_seg_np, edge_class,
+                                                                 rho=rho,
+                                                                 theta=theta,
+                                                                 threshold=threshold,
+                                                                 min_line_length=min_line_length,
+                                                                 max_line_gap=max_line_gap,
+                                                                 **kwargs)
+        if len(class_edges) > 0:
+            edges.append(class_edges)
+            line_directions.append(class_directions)
+    edges = np.concatenate(edges).astype(np.float64)
+    line_directions = np.concatenate(line_directions).astype(np.float64)
     if len(edges) < 1:
         return [], []
     # calculate the distances between the vertices and the edge ends
             connections.append(possible_connections[:, fitted_line_idx])
     vertices = [{"xy": v, "type": "apex"} for v in apex_centroids]
+    vertices += [{"xy": v, "type": "apex"} for v in missed_vertices]
     vertices += [{"xy": v, "type": "eave_end_point"} for v in eave_end_point_centroids]
     return vertices, connections

script.py CHANGED Viewed

@@ -127,13 +127,15 @@ if __name__ == "__main__":
     with ProcessPoolExecutor(max_workers=8) as pool:
         results = []
         for i, sample in enumerate(tqdm(dataset)):
-            results.append(pool.submit(predict, sample, visualize=False,
                                        point_radius=25,
                                        max_angle=15,
                                        extend=30,
                                        merge_th=3.0,
-                                       min_missing_distance=1000000.0,
-                                       scale_estimation_coefficient=4.5))
         for i, result in enumerate(tqdm(results)):
             key, pred_vertices, pred_edges = result.result()

     with ProcessPoolExecutor(max_workers=8) as pool:
         results = []
         for i, sample in enumerate(tqdm(dataset)):
+            results.append(pool.submit(predict, sample,
+                                       visualize=False,
                                        point_radius=25,
                                        max_angle=15,
                                        extend=30,
                                        merge_th=3.0,
+                                       min_missing_distance=300.0,
+                                       scale_estimation_coefficient=4.5,
+                                       ))
         for i, result in enumerate(tqdm(results)):
             key, pred_vertices, pred_edges = result.result()

test_solution.ipynb CHANGED Viewed

@@ -6,8 +6,8 @@
    "metadata": {
     "collapsed": true,
     "ExecuteTime": {
-     "end_time": "2024-05-30T20:07:32.989151Z",
-     "start_time": "2024-05-30T20:07:28.709056Z"
     }
    },
    "source": [
@@ -44,8 +44,8 @@
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-05-30T20:07:32.997502Z",
-     "start_time": "2024-05-30T20:07:32.991160Z"
     }
    },
    "cell_type": "code",
@@ -64,17 +64,17 @@
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-05-30T20:07:33.002189Z",
-     "start_time": "2024-05-30T20:07:32.998509Z"
     }
    },
    "cell_type": "code",
    "source": [
     "\n",
     "\n",
-    "#for i, sample in tqdm(enumerate(dataset)):\n",
     "#    # if i > 170:\n",
-    "#    predict(sample, visualize=False, point_radius=40, max_angle=5)"
    ],
    "id": "f36ee7b8f0427f72",
    "outputs": [],
@@ -83,8 +83,8 @@
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-05-30T20:09:14.932688Z",
-     "start_time": "2024-05-30T20:07:33.003197Z"
     }
    },
    "cell_type": "code",
@@ -101,8 +101,9 @@
     "                                   max_angle=15, \n",
     "                                   extend=30, \n",
     "                                   merge_th=3.0, \n",
-    "                                   min_missing_distance=10000.0, \n",
-    "                                   scale_estimation_coefficient=4.5))\n",
     "\n",
     "    for i, result in enumerate(tqdm(results)):\n",
     "        key, pred_vertices, pred_edges = result.result()\n",
@@ -122,18 +123,18 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "346it [00:11, 29.46it/s] \n",
-      "100%|██████████| 346/346 [01:28<00:00,  3.89it/s]\n"
      ]
     }
    ],
-   "execution_count": 4
   },
   {
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2024-05-30T20:09:16.762781Z",
-     "start_time": "2024-05-30T20:09:14.933694Z"
     }
    },
    "cell_type": "code",
@@ -163,15 +164,15 @@
     {
      "data": {
       "text/plain": [
-       "DescribeResult(nobs=173, minmax=(1.017917771309308, 3.4203176014390544), mean=2.1252280986193086, variance=0.18178457466035677, skewness=0.3534767409028922, kurtosis=-0.13765543977621153)"
       ]
      },
-     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 5
   },
   {
    "metadata": {},

    "metadata": {
     "collapsed": true,
     "ExecuteTime": {
+     "end_time": "2024-05-31T20:17:44.768535Z",
+     "start_time": "2024-05-31T20:17:41.704428Z"
     }
    },
    "source": [
   {
    "metadata": {
     "ExecuteTime": {
+     "end_time": "2024-05-31T20:17:44.774691Z",
+     "start_time": "2024-05-31T20:17:44.769543Z"
     }
    },
    "cell_type": "code",
   {
    "metadata": {
     "ExecuteTime": {
+     "end_time": "2024-05-31T20:17:44.779814Z",
+     "start_time": "2024-05-31T20:17:44.775700Z"
     }
    },
    "cell_type": "code",
    "source": [
     "\n",
     "\n",
+    "# for i, sample in tqdm(enumerate(dataset)):\n",
     "#    # if i > 170:\n",
+    "#    predict(sample, visualize=False, point_radius=40, max_angle=5, extend=30)"
    ],
    "id": "f36ee7b8f0427f72",
    "outputs": [],
   {
    "metadata": {
     "ExecuteTime": {
+     "end_time": "2024-05-31T20:30:50.830835Z",
+     "start_time": "2024-05-31T20:29:08.806688Z"
     }
    },
    "cell_type": "code",
     "                                   max_angle=15, \n",
     "                                   extend=30, \n",
     "                                   merge_th=3.0, \n",
+    "                                   min_missing_distance=300.0, \n",
+    "                                   scale_estimation_coefficient=4.5,\n",
+    "                                   ))\n",
     "\n",
     "    for i, result in enumerate(tqdm(results)):\n",
     "        key, pred_vertices, pred_edges = result.result()\n",
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "346it [00:12, 27.31it/s] \n",
+      "100%|██████████| 346/346 [01:28<00:00,  3.93it/s]\n"
      ]
     }
    ],
+   "execution_count": 15
   },
   {
    "metadata": {
     "ExecuteTime": {
+     "end_time": "2024-05-31T20:46:13.489460Z",
+     "start_time": "2024-05-31T20:46:13.290404Z"
     }
    },
    "cell_type": "code",
     {
      "data": {
       "text/plain": [
+       "DescribeResult(nobs=173, minmax=(1.1048503424519986, 3.2776734068655204), mean=2.123907112995204, variance=0.1767523302203136, skewness=0.3492868616641026, kurtosis=-0.24007805364057333)"
       ]
      },
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
+   "execution_count": 17
   },
   {
    "metadata": {},