JianyuanWang commited on
Commit
471bf0d
1 Parent(s): c956e19

fix color visual

Browse files
Files changed (2) hide show
  1. app.py +5 -85
  2. gradio_util.py +297 -0
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import os
3
  import cv2
4
  import torch
@@ -6,6 +5,8 @@ import numpy as np
6
  import gradio as gr
7
  import spaces
8
 
 
 
9
  import trimesh
10
  import sys
11
  import os
@@ -18,9 +19,7 @@ from vggsfm_code.hf_demo import demo_fn
18
  from omegaconf import DictConfig, OmegaConf
19
  from viz_utils.viz_fn import add_camera, apply_density_filter_np
20
  import glob
21
- #
22
  from scipy.spatial.transform import Rotation
23
- # import PIL
24
  import gc
25
  import open3d as o3d
26
  import time
@@ -34,8 +33,6 @@ def vggsfm_demo(
34
  query_frame_num,
35
  max_query_pts=4096,
36
  ):
37
-
38
-
39
  start_time = time.time()
40
  gc.collect()
41
  torch.cuda.empty_cache()
@@ -116,7 +113,6 @@ def vggsfm_demo(
116
  # except:
117
  # return None, "Something seems to be incorrect. Please verify that your inputs are formatted correctly. If the issue persists, kindly create a GitHub issue for further assistance."
118
 
119
- print(predictions.keys())
120
  glbscene = vggsfm_predictions_to_glb(predictions)
121
 
122
  glbfile = target_dir + "/glbscene.glb"
@@ -133,89 +129,13 @@ def vggsfm_demo(
133
  end_time = time.time()
134
  execution_time = end_time - start_time
135
  print(f"Execution time: {execution_time} seconds")
136
-
137
- # recon_num
138
- return glbfile, f"Reconstruction complete ({recon_num} frames)"
139
-
140
-
141
-
142
-
143
- def vggsfm_predictions_to_glb(predictions, sphere=False):
144
- # del predictions['reconstruction']
145
- # torch.save(predictions, "predictions_scene2.pth")
146
- # learned from https://github.com/naver/dust3r/blob/main/dust3r/viz.py
147
- points3D = predictions["points3D"].cpu().numpy()
148
- points3D_rgb = predictions["points3D_rgb"].cpu().numpy()
149
- points3D_rgb = (points3D_rgb*255).astype(np.uint8)
150
-
151
- extrinsics_opencv = predictions["extrinsics_opencv"].cpu().numpy()
152
- intrinsics_opencv = predictions["intrinsics_opencv"].cpu().numpy()
153
-
154
-
155
- raw_image_paths = predictions["raw_image_paths"]
156
- images = predictions["images"].permute(0,2,3,1).cpu().numpy()
157
- images = (images*255).astype(np.uint8)
158
 
159
- glbscene = trimesh.Scene()
160
 
161
- if True:
162
- pcd = o3d.geometry.PointCloud()
163
- pcd.points = o3d.utility.Vector3dVector(points3D)
164
- pcd.colors = o3d.utility.Vector3dVector(points3D_rgb)
165
-
166
- cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=1.0)
167
- filtered_pcd = pcd.select_by_index(ind)
168
 
169
- print(f"Filter out {len(points3D) - len(filtered_pcd.points)} 3D points")
170
- points3D = np.asarray(filtered_pcd.points)
171
- points3D_rgb = np.asarray(filtered_pcd.colors)
172
 
173
-
174
- if sphere:
175
- # TOO SLOW
176
- print("testing sphere")
177
- # point_size = 0.02
178
- else:
179
- point_cloud = trimesh.PointCloud(points3D, colors=points3D_rgb)
180
- glbscene.add_geometry(point_cloud)
181
-
182
-
183
- camera_edge_colors = [(255, 0, 0), (0, 0, 255), (0, 255, 0), (255, 0, 255), (255, 204, 0), (0, 204, 204),
184
- (128, 255, 255), (255, 128, 255), (255, 255, 128), (0, 0, 0), (128, 128, 128)]
185
-
186
- frame_num = len(extrinsics_opencv)
187
- extrinsics_opencv_4x4 = np.zeros((frame_num, 4, 4))
188
- extrinsics_opencv_4x4[:, :3, :4] = extrinsics_opencv
189
- extrinsics_opencv_4x4[:, 3, 3] = 1
190
-
191
- for idx in range(frame_num):
192
- cam_from_world = extrinsics_opencv_4x4[idx]
193
- cam_to_world = np.linalg.inv(cam_from_world)
194
- cur_cam_color = camera_edge_colors[idx % len(camera_edge_colors)]
195
- cur_focal = intrinsics_opencv[idx, 0, 0]
196
-
197
- add_camera(glbscene, cam_to_world, cur_cam_color, image=None, imsize=(1024,1024),
198
- focal=None,screen_width=0.35)
199
-
200
- opengl_mat = np.array([[1, 0, 0, 0],
201
- [0, -1, 0, 0],
202
- [0, 0, -1, 0],
203
- [0, 0, 0, 1]])
204
-
205
- rot = np.eye(4)
206
- rot[:3, :3] = Rotation.from_euler('y', np.deg2rad(180)).as_matrix()
207
- glbscene.apply_transform(np.linalg.inv(np.linalg.inv(extrinsics_opencv_4x4[0]) @ opengl_mat @ rot))
208
-
209
- # Calculate the bounding box center and apply the translation
210
- # bounding_box = glbscene.bounds
211
- # center = (bounding_box[0] + bounding_box[1]) / 2
212
- # translation = np.eye(4)
213
- # translation[:3, 3] = -center
214
-
215
- # glbscene.apply_transform(translation)
216
- # glbfile = "glbscene.glb"
217
- # glbscene.export(file_obj=glbfile)
218
- return glbscene
219
 
220
 
221
  statue_video = "vggsfm_code/examples/videos/statue_video.mp4"
 
 
1
  import os
2
  import cv2
3
  import torch
 
5
  import gradio as gr
6
  import spaces
7
 
8
+
9
+ from gradio_util import vggsfm_predictions_to_glb
10
  import trimesh
11
  import sys
12
  import os
 
19
  from omegaconf import DictConfig, OmegaConf
20
  from viz_utils.viz_fn import add_camera, apply_density_filter_np
21
  import glob
 
22
  from scipy.spatial.transform import Rotation
 
23
  import gc
24
  import open3d as o3d
25
  import time
 
33
  query_frame_num,
34
  max_query_pts=4096,
35
  ):
 
 
36
  start_time = time.time()
37
  gc.collect()
38
  torch.cuda.empty_cache()
 
113
  # except:
114
  # return None, "Something seems to be incorrect. Please verify that your inputs are formatted correctly. If the issue persists, kindly create a GitHub issue for further assistance."
115
 
 
116
  glbscene = vggsfm_predictions_to_glb(predictions)
117
 
118
  glbfile = target_dir + "/glbscene.glb"
 
129
  end_time = time.time()
130
  execution_time = end_time - start_time
131
  print(f"Execution time: {execution_time} seconds")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
 
133
 
134
+ # glbscene.geometry['geometry_0'].colors.max()
135
+ # recon_num
136
+ return glbfile, f"Reconstruction complete ({recon_num} frames)"
 
 
 
 
137
 
 
 
 
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
 
141
  statue_video = "vggsfm_code/examples/videos/statue_video.mp4"
gradio_util.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ try:
2
+ import os
3
+
4
+ import trimesh
5
+ import open3d as o3d
6
+
7
+ import gradio as gr
8
+ import numpy as np
9
+ import matplotlib
10
+ from scipy.spatial.transform import Rotation
11
+
12
+ print("Successfully imported the packages for Gradio visualization")
13
+ except:
14
+ print(
15
+ f"Failed to import packages for Gradio visualization. Please disable gradio visualization"
16
+ )
17
+
18
+
19
+ def visualize_by_gradio(glbfile):
20
+ """
21
+ Set up and launch a Gradio interface to visualize a GLB file.
22
+
23
+ Args:
24
+ glbfile (str): Path to the GLB file to be visualized.
25
+ """
26
+
27
+ def load_glb_file(glb_path):
28
+ # Check if the file exists and return the path or error message
29
+ if os.path.exists(glb_path):
30
+ return glb_path, "3D Model Loaded Successfully"
31
+ else:
32
+ return None, "File not found"
33
+
34
+ # Load the GLB file initially to check if it's valid
35
+ initial_model, log_message = load_glb_file(glbfile)
36
+
37
+ # Create the Gradio interface
38
+ with gr.Blocks() as demo:
39
+ gr.Markdown("# GLB File Viewer")
40
+
41
+ # 3D Model viewer component
42
+ model_viewer = gr.Model3D(
43
+ label="3D Model Viewer", height=600, value=initial_model
44
+ )
45
+
46
+ # Textbox for log output
47
+ log_output = gr.Textbox(label="Log", lines=2, value=log_message)
48
+
49
+ # Launch the Gradio interface
50
+ demo.launch(share=True)
51
+
52
+
53
+ def vggsfm_predictions_to_glb(predictions) -> trimesh.Scene:
54
+ """
55
+ Converts VGG SFM predictions to a 3D scene represented as a GLB.
56
+
57
+ Args:
58
+ predictions (dict): A dictionary containing model predictions.
59
+
60
+ Returns:
61
+ trimesh.Scene: A 3D scene object.
62
+ """
63
+ # Convert predictions to numpy arrays
64
+ vertices_3d = predictions["points3D"].cpu().numpy()
65
+ colors_rgb = (predictions["points3D_rgb"].cpu().numpy() * 255).astype(
66
+ np.uint8
67
+ )
68
+
69
+
70
+ if True:
71
+ pcd = o3d.geometry.PointCloud()
72
+ pcd.points = o3d.utility.Vector3dVector(vertices_3d)
73
+ pcd.colors = o3d.utility.Vector3dVector(colors_rgb)
74
+
75
+ cl, ind = pcd.remove_statistical_outlier(nb_neighbors=20, std_ratio=1.0)
76
+ filtered_pcd = pcd.select_by_index(ind)
77
+
78
+ print(f"Filter out {len(vertices_3d) - len(filtered_pcd.points)} 3D points")
79
+ vertices_3d = np.asarray(filtered_pcd.points)
80
+ colors_rgb = np.asarray(filtered_pcd.colors).astype(np.uint8)
81
+
82
+
83
+
84
+ camera_matrices = predictions["extrinsics_opencv"].cpu().numpy()
85
+
86
+ # Calculate the 5th and 95th percentiles along each axis
87
+ lower_percentile = np.percentile(vertices_3d, 5, axis=0)
88
+ upper_percentile = np.percentile(vertices_3d, 95, axis=0)
89
+
90
+ # Calculate the diagonal length of the percentile bounding box
91
+ scene_scale = np.linalg.norm(upper_percentile - lower_percentile)
92
+
93
+ colormap = matplotlib.colormaps.get_cmap("gist_rainbow")
94
+
95
+ # Initialize a 3D scene
96
+ scene_3d = trimesh.Scene()
97
+
98
+ # Add point cloud data to the scene
99
+ point_cloud_data = trimesh.PointCloud(
100
+ vertices=vertices_3d, colors=colors_rgb
101
+ )
102
+
103
+ scene_3d.add_geometry(point_cloud_data)
104
+
105
+ # Prepare 4x4 matrices for camera extrinsics
106
+ num_cameras = len(camera_matrices)
107
+ extrinsics_matrices = np.zeros((num_cameras, 4, 4))
108
+ extrinsics_matrices[:, :3, :4] = camera_matrices
109
+ extrinsics_matrices[:, 3, 3] = 1
110
+
111
+ # Add camera models to the scene
112
+ for i in range(num_cameras):
113
+ world_to_camera = extrinsics_matrices[i]
114
+ camera_to_world = np.linalg.inv(world_to_camera)
115
+ rgba_color = colormap(i / num_cameras)
116
+ current_color = tuple(int(255 * x) for x in rgba_color[:3])
117
+
118
+ integrate_camera_into_scene(
119
+ scene_3d, camera_to_world, current_color, scene_scale
120
+ )
121
+
122
+ # Align scene to the observation of the first camera
123
+ scene_3d = apply_scene_alignment(scene_3d, extrinsics_matrices)
124
+
125
+ return scene_3d
126
+
127
+
128
+ def apply_scene_alignment(
129
+ scene_3d: trimesh.Scene, extrinsics_matrices: np.ndarray
130
+ ) -> trimesh.Scene:
131
+ """
132
+ Aligns the 3D scene based on the extrinsics of the first camera.
133
+
134
+ Args:
135
+ scene_3d (trimesh.Scene): The 3D scene to be aligned.
136
+ extrinsics_matrices (np.ndarray): Camera extrinsic matrices.
137
+
138
+ Returns:
139
+ trimesh.Scene: Aligned 3D scene.
140
+ """
141
+ # Set transformations for scene alignment
142
+ opengl_conversion_matrix = get_opengl_conversion_matrix()
143
+
144
+ # Rotation matrix for alignment (180 degrees around the y-axis)
145
+ align_rotation = np.eye(4)
146
+ align_rotation[:3, :3] = Rotation.from_euler(
147
+ "y", 180, degrees=True
148
+ ).as_matrix()
149
+
150
+ # Apply transformation
151
+ initial_transformation = (
152
+ np.linalg.inv(extrinsics_matrices[0])
153
+ @ opengl_conversion_matrix
154
+ @ align_rotation
155
+ )
156
+ scene_3d.apply_transform(initial_transformation)
157
+ return scene_3d
158
+
159
+
160
+ def integrate_camera_into_scene(
161
+ scene: trimesh.Scene,
162
+ transform: np.ndarray,
163
+ face_colors: tuple,
164
+ scene_scale: float,
165
+ ):
166
+ """
167
+ Integrates a fake camera mesh into the 3D scene.
168
+
169
+ Args:
170
+ scene (trimesh.Scene): The 3D scene to add the camera model.
171
+ transform (np.ndarray): Transformation matrix for camera positioning.
172
+ face_colors (tuple): Color of the camera face.
173
+ scene_scale (float): Scale of the scene.
174
+ """
175
+
176
+ cam_width = scene_scale * 0.05
177
+ cam_height = scene_scale * 0.1
178
+
179
+ # Create cone shape for camera
180
+ rot_45_degree = np.eye(4)
181
+ rot_45_degree[:3, :3] = Rotation.from_euler(
182
+ "z", 45, degrees=True
183
+ ).as_matrix()
184
+ rot_45_degree[2, 3] = -cam_height
185
+
186
+ opengl_transform = get_opengl_conversion_matrix()
187
+ # Combine transformations
188
+ complete_transform = transform @ opengl_transform @ rot_45_degree
189
+ camera_cone_shape = trimesh.creation.cone(cam_width, cam_height, sections=4)
190
+
191
+ # Generate mesh for the camera
192
+ slight_rotation = np.eye(4)
193
+ slight_rotation[:3, :3] = Rotation.from_euler(
194
+ "z", 2, degrees=True
195
+ ).as_matrix()
196
+
197
+ vertices_combined = np.concatenate(
198
+ [
199
+ camera_cone_shape.vertices,
200
+ 0.95 * camera_cone_shape.vertices,
201
+ transform_points(slight_rotation, camera_cone_shape.vertices),
202
+ ]
203
+ )
204
+ vertices_transformed = transform_points(
205
+ complete_transform, vertices_combined
206
+ )
207
+
208
+ mesh_faces = compute_camera_faces(camera_cone_shape)
209
+
210
+ # Add the camera mesh to the scene
211
+ camera_mesh = trimesh.Trimesh(
212
+ vertices=vertices_transformed, faces=mesh_faces
213
+ )
214
+ camera_mesh.visual.face_colors[:, :3] = face_colors
215
+ scene.add_geometry(camera_mesh)
216
+
217
+
218
+ def compute_camera_faces(cone_shape: trimesh.Trimesh) -> np.ndarray:
219
+ """
220
+ Computes the faces for the camera mesh.
221
+
222
+ Args:
223
+ cone_shape (trimesh.Trimesh): The shape of the camera cone.
224
+
225
+ Returns:
226
+ np.ndarray: Array of faces for the camera mesh.
227
+ """
228
+ # Create pseudo cameras
229
+ faces_list = []
230
+ num_vertices_cone = len(cone_shape.vertices)
231
+
232
+ for face in cone_shape.faces:
233
+ if 0 in face:
234
+ continue
235
+ v1, v2, v3 = face
236
+ v1_offset, v2_offset, v3_offset = face + num_vertices_cone
237
+ v1_offset_2, v2_offset_2, v3_offset_2 = face + 2 * num_vertices_cone
238
+
239
+ faces_list.extend(
240
+ [
241
+ (v1, v2, v2_offset),
242
+ (v1, v1_offset, v3),
243
+ (v3_offset, v2, v3),
244
+ (v1, v2, v2_offset_2),
245
+ (v1, v1_offset_2, v3),
246
+ (v3_offset_2, v2, v3),
247
+ ]
248
+ )
249
+
250
+ faces_list += [(v3, v2, v1) for v1, v2, v3 in faces_list]
251
+ return np.array(faces_list)
252
+
253
+
254
+ def transform_points(
255
+ transformation: np.ndarray, points: np.ndarray, dim: int = None
256
+ ) -> np.ndarray:
257
+ """
258
+ Applies a 4x4 transformation to a set of points.
259
+
260
+ Args:
261
+ transformation (np.ndarray): Transformation matrix.
262
+ points (np.ndarray): Points to be transformed.
263
+ dim (int, optional): Dimension for reshaping the result.
264
+
265
+ Returns:
266
+ np.ndarray: Transformed points.
267
+ """
268
+ points = np.asarray(points)
269
+ initial_shape = points.shape[:-1]
270
+ dim = dim or points.shape[-1]
271
+
272
+ # Apply transformation
273
+ transformation = transformation.swapaxes(
274
+ -1, -2
275
+ ) # Transpose the transformation matrix
276
+ points = points @ transformation[..., :-1, :] + transformation[..., -1:, :]
277
+
278
+ # Reshape the result
279
+ result = points[..., :dim].reshape(*initial_shape, dim)
280
+ return result
281
+
282
+
283
+ def get_opengl_conversion_matrix() -> np.ndarray:
284
+ """
285
+ Constructs and returns the OpenGL conversion matrix.
286
+
287
+ Returns:
288
+ numpy.ndarray: A 4x4 OpenGL conversion matrix.
289
+ """
290
+ # Create an identity matrix
291
+ matrix = np.identity(4)
292
+
293
+ # Flip the y and z axes
294
+ matrix[1, 1] = -1
295
+ matrix[2, 2] = -1
296
+
297
+ return matrix