hysts HF staff commited on
Commit
6a6f2a6
1 Parent(s): 117b175
Files changed (5) hide show
  1. app_image_to_3d.py +8 -17
  2. app_text_to_3d.py +2 -14
  3. model.py +29 -43
  4. requirements.txt +1 -1
  5. style.css +0 -8
app_image_to_3d.py CHANGED
@@ -1,5 +1,6 @@
1
  #!/usr/bin/env python
2
 
 
3
  import shlex
4
  import subprocess
5
 
@@ -11,14 +12,15 @@ from utils import randomize_seed_fn
11
 
12
 
13
  def create_demo(model: Model) -> gr.Blocks:
14
- subprocess.run(
15
- shlex.split(
16
- 'wget https://raw.githubusercontent.com/openai/shap-e/d99cedaea18e0989e340163dbaeb4b109fa9e8ec/shap_e/examples/example_data/corgi.png -O corgi.png'
17
- ))
 
18
  examples = ['corgi.png']
19
 
20
  def process_example_fn(image_path: str) -> str:
21
- return model.run_image(image_path, output_image_size=128)
22
 
23
  with gr.Blocks() as demo:
24
  with gr.Box():
@@ -26,7 +28,7 @@ def create_demo(model: Model) -> gr.Blocks:
26
  show_label=False,
27
  type='filepath')
28
  run_button = gr.Button('Run')
29
- result = gr.Video(label='Result', elem_id='result-2')
30
  with gr.Accordion('Advanced options', open=False):
31
  seed = gr.Slider(label='Seed',
32
  minimum=0,
@@ -46,15 +48,6 @@ def create_demo(model: Model) -> gr.Blocks:
46
  maximum=100,
47
  step=1,
48
  value=64)
49
- image_size = gr.Slider(label='Image size',
50
- minimum=64,
51
- maximum=256,
52
- step=64,
53
- value=128)
54
- render_mode = gr.Dropdown(label='Render mode',
55
- choices=['nerf', 'stf'],
56
- value='nerf',
57
- visible=False)
58
 
59
  gr.Examples(examples=examples,
60
  inputs=image,
@@ -67,8 +60,6 @@ def create_demo(model: Model) -> gr.Blocks:
67
  seed,
68
  guidance_scale,
69
  num_inference_steps,
70
- image_size,
71
- render_mode,
72
  ]
73
 
74
  run_button.click(
 
1
  #!/usr/bin/env python
2
 
3
+ import pathlib
4
  import shlex
5
  import subprocess
6
 
 
12
 
13
 
14
  def create_demo(model: Model) -> gr.Blocks:
15
+ if not pathlib.Path('corgi.png').exists():
16
+ subprocess.run(
17
+ shlex.split(
18
+ 'wget https://raw.githubusercontent.com/openai/shap-e/d99cedaea18e0989e340163dbaeb4b109fa9e8ec/shap_e/examples/example_data/corgi.png -O corgi.png'
19
+ ))
20
  examples = ['corgi.png']
21
 
22
  def process_example_fn(image_path: str) -> str:
23
+ return model.run_image(image_path)
24
 
25
  with gr.Blocks() as demo:
26
  with gr.Box():
 
28
  show_label=False,
29
  type='filepath')
30
  run_button = gr.Button('Run')
31
+ result = gr.Model3D(label='Result', show_label=False)
32
  with gr.Accordion('Advanced options', open=False):
33
  seed = gr.Slider(label='Seed',
34
  minimum=0,
 
48
  maximum=100,
49
  step=1,
50
  value=64)
 
 
 
 
 
 
 
 
 
51
 
52
  gr.Examples(examples=examples,
53
  inputs=image,
 
60
  seed,
61
  guidance_scale,
62
  num_inference_steps,
 
 
63
  ]
64
 
65
  run_button.click(
app_text_to_3d.py CHANGED
@@ -21,7 +21,7 @@ def create_demo(model: Model) -> gr.Blocks:
21
  ]
22
 
23
  def process_example_fn(prompt: str) -> str:
24
- return model.run_text(prompt, output_image_size=128)
25
 
26
  with gr.Blocks() as demo:
27
  with gr.Box():
@@ -32,7 +32,7 @@ def create_demo(model: Model) -> gr.Blocks:
32
  max_lines=1,
33
  placeholder='Enter your prompt').style(container=False)
34
  run_button = gr.Button('Run').style(full_width=False)
35
- result = gr.Video(label='Result', elem_id='result-1')
36
  with gr.Accordion('Advanced options', open=False):
37
  seed = gr.Slider(label='Seed',
38
  minimum=0,
@@ -52,15 +52,6 @@ def create_demo(model: Model) -> gr.Blocks:
52
  maximum=100,
53
  step=1,
54
  value=64)
55
- image_size = gr.Slider(label='Image size',
56
- minimum=64,
57
- maximum=256,
58
- step=64,
59
- value=128)
60
- render_mode = gr.Dropdown(label='Render mode',
61
- choices=['nerf', 'stf'],
62
- value='nerf',
63
- visible=False)
64
 
65
  gr.Examples(examples=examples,
66
  inputs=prompt,
@@ -73,8 +64,6 @@ def create_demo(model: Model) -> gr.Blocks:
73
  seed,
74
  guidance_scale,
75
  num_inference_steps,
76
- image_size,
77
- render_mode,
78
  ]
79
  prompt.submit(
80
  fn=randomize_seed_fn,
@@ -86,7 +75,6 @@ def create_demo(model: Model) -> gr.Blocks:
86
  inputs=inputs,
87
  outputs=result,
88
  )
89
-
90
  run_button.click(
91
  fn=randomize_seed_fn,
92
  inputs=[seed, randomize_seed],
 
21
  ]
22
 
23
  def process_example_fn(prompt: str) -> str:
24
+ return model.run_text(prompt)
25
 
26
  with gr.Blocks() as demo:
27
  with gr.Box():
 
32
  max_lines=1,
33
  placeholder='Enter your prompt').style(container=False)
34
  run_button = gr.Button('Run').style(full_width=False)
35
+ result = gr.Model3D(label='Result', show_label=False)
36
  with gr.Accordion('Advanced options', open=False):
37
  seed = gr.Slider(label='Seed',
38
  minimum=0,
 
52
  maximum=100,
53
  step=1,
54
  value=64)
 
 
 
 
 
 
 
 
 
55
 
56
  gr.Examples(examples=examples,
57
  inputs=prompt,
 
64
  seed,
65
  guidance_scale,
66
  num_inference_steps,
 
 
67
  ]
68
  prompt.submit(
69
  fn=randomize_seed_fn,
 
75
  inputs=inputs,
76
  outputs=result,
77
  )
 
78
  run_button.click(
79
  fn=randomize_seed_fn,
80
  inputs=[seed, randomize_seed],
model.py CHANGED
@@ -1,15 +1,15 @@
1
  import tempfile
2
 
3
- import imageio
4
  import numpy as np
5
- import PIL.Image
6
  import torch
 
7
  from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
8
  from shap_e.diffusion.sample import sample_latents
9
  from shap_e.models.download import load_config, load_model
10
  from shap_e.models.nn.camera import (DifferentiableCameraBatch,
11
  DifferentiableProjectiveCamera)
12
  from shap_e.models.transmitter.base import Transmitter, VectorDecoder
 
13
  from shap_e.util.collections import AttrDict
14
  from shap_e.util.image_util import load_image
15
 
@@ -47,23 +47,20 @@ def create_pan_cameras(size: int,
47
  )
48
 
49
 
50
- # Copied from https://github.com/openai/shap-e/blob/d99cedaea18e0989e340163dbaeb4b109fa9e8ec/shap_e/util/notebooks.py#L45-L60
51
  @torch.no_grad()
52
- def decode_latent_images(
53
  xm: Transmitter | VectorDecoder,
54
  latent: torch.Tensor,
55
- cameras: DifferentiableCameraBatch,
56
- rendering_mode: str = 'stf',
57
- ):
58
  decoded = xm.renderer.render_views(
59
- AttrDict(cameras=cameras),
 
60
  params=(xm.encoder if isinstance(xm, Transmitter) else
61
  xm).bottleneck_to_params(latent[None]),
62
- options=AttrDict(rendering_mode=rendering_mode,
63
- render_with_direction=False),
64
  )
65
- arr = decoded.channels.clamp(0, 255).to(torch.uint8)[0].cpu().numpy()
66
- return [PIL.Image.fromarray(x) for x in arr]
67
 
68
 
69
  class Model:
@@ -82,24 +79,29 @@ class Model:
82
  self.model = load_model(model_name, device=self.device)
83
  self.model_name = model_name
84
 
85
- @staticmethod
86
- def to_video(frames: list[PIL.Image.Image], fps: int = 5) -> str:
87
- out_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
88
- writer = imageio.get_writer(out_file.name, format='FFMPEG', fps=fps)
89
- for frame in frames:
90
- writer.append_data(np.asarray(frame))
91
- writer.close()
92
- return out_file.name
 
 
 
 
 
 
 
 
93
 
94
  def run_text(self,
95
  prompt: str,
96
  seed: int = 0,
97
  guidance_scale: float = 15.0,
98
- num_steps: int = 64,
99
- output_image_size: int = 64,
100
- render_mode: str = 'nerf') -> str:
101
  self.load_model('text300M')
102
-
103
  torch.manual_seed(seed)
104
 
105
  latents = sample_latents(
@@ -117,27 +119,17 @@ class Model:
117
  sigma_max=160,
118
  s_churn=0,
119
  )
120
-
121
- cameras = create_pan_cameras(output_image_size, self.device)
122
- frames = decode_latent_images(self.xm,
123
- latents[0],
124
- cameras,
125
- rendering_mode=render_mode)
126
- return self.to_video(frames)
127
 
128
  def run_image(self,
129
  image_path: str,
130
  seed: int = 0,
131
  guidance_scale: float = 3.0,
132
- num_steps: int = 64,
133
- output_image_size: int = 64,
134
- render_mode: str = 'nerf') -> str:
135
  self.load_model('image300M')
136
-
137
  torch.manual_seed(seed)
138
 
139
  image = load_image(image_path)
140
-
141
  latents = sample_latents(
142
  batch_size=1,
143
  model=self.model,
@@ -153,10 +145,4 @@ class Model:
153
  sigma_max=160,
154
  s_churn=0,
155
  )
156
-
157
- cameras = create_pan_cameras(output_image_size, self.device)
158
- frames = decode_latent_images(self.xm,
159
- latents[0],
160
- cameras,
161
- rendering_mode=render_mode)
162
- return self.to_video(frames)
 
1
  import tempfile
2
 
 
3
  import numpy as np
 
4
  import torch
5
+ import trimesh
6
  from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
7
  from shap_e.diffusion.sample import sample_latents
8
  from shap_e.models.download import load_config, load_model
9
  from shap_e.models.nn.camera import (DifferentiableCameraBatch,
10
  DifferentiableProjectiveCamera)
11
  from shap_e.models.transmitter.base import Transmitter, VectorDecoder
12
+ from shap_e.rendering.torch_mesh import TorchMesh
13
  from shap_e.util.collections import AttrDict
14
  from shap_e.util.image_util import load_image
15
 
 
47
  )
48
 
49
 
50
+ # Copied from https://github.com/openai/shap-e/blob/8625e7c15526d8510a2292f92165979268d0e945/shap_e/util/notebooks.py#LL64C1-L76C33
51
  @torch.no_grad()
52
+ def decode_latent_mesh(
53
  xm: Transmitter | VectorDecoder,
54
  latent: torch.Tensor,
55
+ ) -> TorchMesh:
 
 
56
  decoded = xm.renderer.render_views(
57
+ AttrDict(cameras=create_pan_cameras(
58
+ 2, latent.device)), # lowest resolution possible
59
  params=(xm.encoder if isinstance(xm, Transmitter) else
60
  xm).bottleneck_to_params(latent[None]),
61
+ options=AttrDict(rendering_mode='stf', render_with_direction=False),
 
62
  )
63
+ return decoded.raw_meshes[0]
 
64
 
65
 
66
  class Model:
 
79
  self.model = load_model(model_name, device=self.device)
80
  self.model_name = model_name
81
 
82
+ def to_glb(self, latent: torch.Tensor) -> str:
83
+ ply_path = tempfile.NamedTemporaryFile(suffix='.ply',
84
+ delete=False,
85
+ mode='w+b')
86
+ decode_latent_mesh(self.xm, latent).tri_mesh().write_ply(ply_path)
87
+
88
+ mesh = trimesh.load(ply_path.name)
89
+ rot = trimesh.transformations.rotation_matrix(-np.pi / 2, [1, 0, 0])
90
+ mesh = mesh.apply_transform(rot)
91
+ rot = trimesh.transformations.rotation_matrix(np.pi, [0, 1, 0])
92
+ mesh = mesh.apply_transform(rot)
93
+
94
+ mesh_path = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
95
+ mesh.export(mesh_path.name, file_type='glb')
96
+
97
+ return mesh_path.name
98
 
99
  def run_text(self,
100
  prompt: str,
101
  seed: int = 0,
102
  guidance_scale: float = 15.0,
103
+ num_steps: int = 64) -> str:
 
 
104
  self.load_model('text300M')
 
105
  torch.manual_seed(seed)
106
 
107
  latents = sample_latents(
 
119
  sigma_max=160,
120
  s_churn=0,
121
  )
122
+ return self.to_glb(latents[0])
 
 
 
 
 
 
123
 
124
  def run_image(self,
125
  image_path: str,
126
  seed: int = 0,
127
  guidance_scale: float = 3.0,
128
+ num_steps: int = 64) -> str:
 
 
129
  self.load_model('image300M')
 
130
  torch.manual_seed(seed)
131
 
132
  image = load_image(image_path)
 
133
  latents = sample_latents(
134
  batch_size=1,
135
  model=self.model,
 
145
  sigma_max=160,
146
  s_churn=0,
147
  )
148
+ return self.to_glb(latents[0])
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  git+https://github.com/openai/shap-e@8625e7c
2
  gradio==3.28.3
3
- imageio[ffmpeg]==2.28.1
4
  torch==2.0.0
5
  torchvision==0.15.1
 
 
1
  git+https://github.com/openai/shap-e@8625e7c
2
  gradio==3.28.3
 
3
  torch==2.0.0
4
  torchvision==0.15.1
5
+ trimesh==3.21.5
style.css CHANGED
@@ -8,14 +8,6 @@ h1 {
8
  padding-top: 1.5rem;
9
  }
10
 
11
- #result-1 video {
12
- object-fit: scale-down;
13
- }
14
-
15
- #result-2 video {
16
- object-fit: scale-down;
17
- }
18
-
19
  #prompt-container {
20
  gap: 0;
21
  }
 
8
  padding-top: 1.5rem;
9
  }
10
 
 
 
 
 
 
 
 
 
11
  #prompt-container {
12
  gap: 0;
13
  }