radames commited on
Commit
932c3e0
·
1 Parent(s): 8f5830b
Files changed (2) hide show
  1. .gitignore +46 -0
  2. app.py +71 -43
.gitignore ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python build
2
+ .eggs/
3
+ gradio.egg-info/*
4
+ !gradio.egg-info/requires.txt
5
+ !gradio.egg-info/PKG-INFO
6
+ dist/
7
+ *.pyc
8
+ __pycache__/
9
+ *.py[cod]
10
+ *$py.class
11
+ build/
12
+
13
+ # JS build
14
+ gradio/templates/frontend
15
+ # Secrets
16
+ .env
17
+
18
+ # Gradio run artifacts
19
+ *.db
20
+ *.sqlite3
21
+ gradio/launches.json
22
+ flagged/
23
+ gradio_cached_examples/
24
+
25
+ # Tests
26
+ .coverage
27
+ coverage.xml
28
+ test.txt
29
+
30
+ # Demos
31
+ demo/tmp.zip
32
+ demo/files/*.avi
33
+ demo/files/*.mp4
34
+
35
+ # Etc
36
+ .idea/*
37
+ .DS_Store
38
+ *.bak
39
+ workspace.code-workspace
40
+ *.h5
41
+ .vscode/
42
+
43
+ # log files
44
+ .pnpm-debug.log
45
+ venv/
46
+ *.db-journal
app.py CHANGED
@@ -4,72 +4,100 @@ import torch
4
  import numpy as np
5
  from PIL import Image
6
  import open3d as o3d
7
-
8
- torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000039769.jpg', 'cats.jpg')
9
 
10
  feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
11
  model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
12
 
13
- def process_image(image):
 
 
 
 
14
  # prepare image for the model
15
  encoding = feature_extractor(image, return_tensors="pt")
16
-
17
  # forward pass
18
  with torch.no_grad():
19
- outputs = model(**encoding)
20
- predicted_depth = outputs.predicted_depth
21
-
22
  # interpolate to original size
23
  prediction = torch.nn.functional.interpolate(
24
- predicted_depth.unsqueeze(1),
25
- size=image.size[::-1],
26
- mode="bicubic",
27
- align_corners=False,
28
- ).squeeze()
29
  output = prediction.cpu().numpy()
30
  depth_image = (output * 255 / np.max(output)).astype('uint8')
31
- # create_obj(formatted, "test.obj")
32
- create_obj_2(np.array(image), depth_image)
33
- # img = Image.fromarray(formatted)
34
- return "output.gltf"
35
-
36
- # return result
37
 
38
- # gradio.inputs.Image3D(self, label=None, optional=False)
39
 
40
- def create_obj_2(rgb_image, depth_image):
 
41
  depth_o3d = o3d.geometry.Image(depth_image)
42
  image_o3d = o3d.geometry.Image(rgb_image)
43
- rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(image_o3d, depth_o3d)
44
- w = int(depth_image.shape[0])
45
- h = int(depth_image.shape[1])
 
46
 
47
- FOV = np.pi/4
48
  camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
49
- camera_intrinsic.set_intrinsics(w, h, w*0.5, h*0.5, w*0.5, h*0.5 )
 
 
 
50
 
51
- pcd = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image,camera_intrinsic)
52
  print('normals')
53
- pcd.normals = o3d.utility.Vector3dVector(np.zeros((1, 3))) # invalidate existing normals
54
- pcd.estimate_normals()
55
- # pcd.orient_normals_consistent_tangent_plane(100)
 
 
 
 
 
 
 
56
  print('run Poisson surface reconstruction')
57
  with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:
58
- mesh, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pcd, depth=9)
 
 
 
 
 
 
 
 
 
 
 
 
59
  print(mesh)
60
- o3d.io.write_triangle_mesh("output.gltf",mesh,write_triangle_uvs=True)
61
- return "output.gltf"
62
-
63
- title = "Interactive demo: DPT + 3D"
64
- description = "Demo for Intel's DPT, a Dense Prediction Transformer for state-of-the-art dense prediction tasks such as semantic segmentation and depth estimation."
65
- examples =[['cats.jpg']]
66
-
67
- iface = gr.Interface(fn=process_image,
68
- inputs=gr.inputs.Image(type="pil"),
69
- outputs=gr.outputs.Image3D(label="predicted depth", clear_color=[1.0,1.0,1.0,1.0]),
 
 
 
 
 
 
 
 
70
  title=title,
71
  description=description,
72
  examples=examples,
73
- allow_flagging="never",
74
- enable_queue=True)
75
- iface.launch(debug=True)
 
4
  import numpy as np
5
  from PIL import Image
6
  import open3d as o3d
7
+ from pathlib import Path
 
8
 
9
  feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
10
  model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
11
 
12
+
13
+ def process_image(image_path):
14
+ image_path = Path(image_path)
15
+ print(image_path)
16
+ image = Image.open(image_path)
17
  # prepare image for the model
18
  encoding = feature_extractor(image, return_tensors="pt")
19
+
20
  # forward pass
21
  with torch.no_grad():
22
+ outputs = model(**encoding)
23
+ predicted_depth = outputs.predicted_depth
24
+
25
  # interpolate to original size
26
  prediction = torch.nn.functional.interpolate(
27
+ predicted_depth.unsqueeze(1),
28
+ size=image.size[::-1],
29
+ mode="bicubic",
30
+ align_corners=False,
31
+ ).squeeze()
32
  output = prediction.cpu().numpy()
33
  depth_image = (output * 255 / np.max(output)).astype('uint8')
34
+ gltf_path = create_3d_obj(np.array(image), depth_image, image_path)
35
+ img = Image.fromarray(depth_image)
 
 
 
 
36
 
37
+ return [img, gltf_path, gltf_path]
38
 
39
+
40
+ def create_3d_obj(rgb_image, depth_image, image_path):
41
  depth_o3d = o3d.geometry.Image(depth_image)
42
  image_o3d = o3d.geometry.Image(rgb_image)
43
+ rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(
44
+ image_o3d, depth_o3d, convert_rgb_to_intensity=False)
45
+ w = int(depth_image.shape[1])
46
+ h = int(depth_image.shape[0])
47
 
 
48
  camera_intrinsic = o3d.camera.PinholeCameraIntrinsic()
49
+ camera_intrinsic.set_intrinsics(w, h, 500, 500, w/2, h/2)
50
+
51
+ pcd = o3d.geometry.PointCloud.create_from_rgbd_image(
52
+ rgbd_image, camera_intrinsic)
53
 
 
54
  print('normals')
55
+ pcd.normals = o3d.utility.Vector3dVector(
56
+ np.zeros((1, 3))) # invalidate existing normals
57
+ pcd.estimate_normals(
58
+ search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.01, max_nn=30))
59
+ pcd.transform([[1, 0, 0, 0],
60
+ [0, -1, 0, 0],
61
+ [0, 0, 1, 0],
62
+ [0, 0, 0, 1]])
63
+
64
+
65
  print('run Poisson surface reconstruction')
66
  with o3d.utility.VerbosityContextManager(o3d.utility.VerbosityLevel.Debug) as cm:
67
+ mesh_raw, densities = o3d.geometry.TriangleMesh.create_from_point_cloud_poisson(
68
+ pcd, depth=10, width=0, scale=1.1, linear_fit=True)
69
+
70
+ voxel_size = max(mesh_raw.get_max_bound() - mesh_raw.get_min_bound()) / 128
71
+ print(f'voxel_size = {voxel_size:e}')
72
+ mesh = mesh_raw.simplify_vertex_clustering(
73
+ voxel_size=voxel_size,
74
+ contraction=o3d.geometry.SimplificationContraction.Average)
75
+
76
+ # vertices_to_remove = densities < np.quantile(densities, 0.001)
77
+ # mesh.remove_vertices_by_mask(vertices_to_remove)
78
+ bbox = pcd.get_axis_aligned_bounding_box()
79
+ mesh_crop = mesh.crop(bbox)
80
  print(mesh)
81
+ gltf_path = f'./{image_path.stem}.gltf'
82
+ o3d.io.write_triangle_mesh(
83
+ gltf_path, mesh_crop, write_triangle_uvs=True)
84
+ return gltf_path
85
+
86
+
87
+ title = "Demo: zero-shot depth estimation with DPT + 3D Point Cloud"
88
+ description = "This demo is a variation from the original <a href='https://huggingface.co/spaces/nielsr/dpt-depth-estimation' target='_blank'>DPT Demo</a>. It uses the DPT model to predict the depth of an image and then uses 3D Point Cloud to create a 3D object."
89
+ examples = [['./examples/jonathan-borba-CgWTqYxHEkg-unsplash.jpeg'],
90
+ ['./examples/amber-kipp-75715CVEJhI-unsplash.jpeg']]
91
+
92
+ iface = gr.Interface(fn=process_image,
93
+ inputs=[gr.inputs.Image(
94
+ type="filepath", label="Input Image")],
95
+ outputs=[gr.outputs.Image(label="predicted depth", type="pil"),
96
+ gr.outputs.Image3D(label="3d mesh reconstruction", clear_color=[
97
+ 1.0, 1.0, 1.0, 1.0]),
98
+ gr.outputs.File(label="3d gLTF")],
99
  title=title,
100
  description=description,
101
  examples=examples,
102
+ allow_flagging="never")
103
+ iface.launch(debug=True, enable_queue=True, cache_examples=True)