SerdarHelli commited on
Commit
594d040
1 Parent(s): 11c8995

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +710 -0
  2. requirements.txt +16 -0
app.py ADDED
@@ -0,0 +1,710 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import sys
3
+ import os
4
+
5
+ os.system("git clone https://github.com/royorel/StyleSDF.git")
6
+ sys.path.append("StyleSDF")
7
+
8
+
9
+ os.system(f"{sys.executable} -m pip install -U fvcore")
10
+
11
+ import torch
12
+ pyt_version_str=torch.__version__.split("+")[0].replace(".", "")
13
+ version_str="".join([
14
+ f"py3{sys.version_info.minor}_cu",
15
+ torch.version.cuda.replace(".",""),
16
+ f"_pyt{pyt_version_str}"
17
+ ])
18
+
19
+ os.system(f"{sys.executable} -m pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html")
20
+
21
+ from download_models import download_pretrained_models
22
+
23
+ download_pretrained_models()
24
+
25
+
26
+ import torch
27
+ import trimesh
28
+ import numpy as np
29
+ from munch import *
30
+ from PIL import Image
31
+ from tqdm import tqdm
32
+ from torch.nn import functional as F
33
+ from torch.utils import data
34
+ from torchvision import utils
35
+ from torchvision import transforms
36
+ from skimage.measure import marching_cubes
37
+ from scipy.spatial import Delaunay
38
+ from options import BaseOptions
39
+ from model import Generator
40
+ from utils import (
41
+ generate_camera_params,
42
+ align_volume,
43
+ extract_mesh_with_marching_cubes,
44
+ xyz2mesh,
45
+ )
46
+ from utils import (
47
+ generate_camera_params, align_volume, extract_mesh_with_marching_cubes,
48
+ xyz2mesh, create_cameras, create_mesh_renderer, add_textures,
49
+ )
50
+ from pytorch3d.structures import Meshes
51
+ from pdb import set_trace as st
52
+ import skvideo.io
53
+
54
+ def generate(opt, g_ema, surface_g_ema, device, mean_latent, surface_mean_latent):
55
+ g_ema.eval()
56
+ if not opt.no_surface_renderings:
57
+ surface_g_ema.eval()
58
+
59
+ # set camera angles
60
+ if opt.fixed_camera_angles:
61
+ # These can be changed to any other specific viewpoints.
62
+ # You can add or remove viewpoints as you wish
63
+ locations = torch.tensor([[0, 0],
64
+ [-1.5 * opt.camera.azim, 0],
65
+ [-1 * opt.camera.azim, 0],
66
+ [-0.5 * opt.camera.azim, 0],
67
+ [0.5 * opt.camera.azim, 0],
68
+ [1 * opt.camera.azim, 0],
69
+ [1.5 * opt.camera.azim, 0],
70
+ [0, -1.5 * opt.camera.elev],
71
+ [0, -1 * opt.camera.elev],
72
+ [0, -0.5 * opt.camera.elev],
73
+ [0, 0.5 * opt.camera.elev],
74
+ [0, 1 * opt.camera.elev],
75
+ [0, 1.5 * opt.camera.elev]], device=device)
76
+ # For zooming in/out change the values of fov
77
+ # (This can be defined for each view separately via a custom tensor
78
+ # like the locations tensor above. Tensor shape should be [locations.shape[0],1])
79
+ # reasonable values are [0.75 * opt.camera.fov, 1.25 * opt.camera.fov]
80
+ fov = opt.camera.fov * torch.ones((locations.shape[0],1), device=device)
81
+ num_viewdirs = locations.shape[0]
82
+ else: # draw random camera angles
83
+ locations = None
84
+ # fov = None
85
+ fov = opt.camera.fov
86
+ num_viewdirs = opt.num_views_per_id
87
+
88
+ # generate images
89
+ for i in tqdm(range(opt.identities)):
90
+ with torch.no_grad():
91
+ chunk = 8
92
+ sample_z = torch.randn(1, opt.style_dim, device=device).repeat(num_viewdirs,1)
93
+ sample_cam_extrinsics, sample_focals, sample_near, sample_far, sample_locations = \
94
+ generate_camera_params(opt.renderer_output_size, device, batch=num_viewdirs,
95
+ locations=locations, #input_fov=fov,
96
+ uniform=opt.camera.uniform, azim_range=opt.camera.azim,
97
+ elev_range=opt.camera.elev, fov_ang=fov,
98
+ dist_radius=opt.camera.dist_radius)
99
+ rgb_images = torch.Tensor(0, 3, opt.size, opt.size)
100
+ rgb_images_thumbs = torch.Tensor(0, 3, opt.renderer_output_size, opt.renderer_output_size)
101
+ for j in range(0, num_viewdirs, chunk):
102
+ out = g_ema([sample_z[j:j+chunk]],
103
+ sample_cam_extrinsics[j:j+chunk],
104
+ sample_focals[j:j+chunk],
105
+ sample_near[j:j+chunk],
106
+ sample_far[j:j+chunk],
107
+ truncation=opt.truncation_ratio,
108
+ truncation_latent=mean_latent)
109
+
110
+ rgb_images = torch.cat([rgb_images, out[0].cpu()], 0)
111
+ rgb_images_thumbs = torch.cat([rgb_images_thumbs, out[1].cpu()], 0)
112
+
113
+ utils.save_image(rgb_images,
114
+ os.path.join(opt.results_dst_dir, 'images','{}.png'.format(str(i).zfill(7))),
115
+ nrow=num_viewdirs,
116
+ normalize=True,
117
+ padding=0,
118
+ value_range=(-1, 1),)
119
+
120
+ utils.save_image(rgb_images_thumbs,
121
+ os.path.join(opt.results_dst_dir, 'images','{}_thumb.png'.format(str(i).zfill(7))),
122
+ nrow=num_viewdirs,
123
+ normalize=True,
124
+ padding=0,
125
+ value_range=(-1, 1),)
126
+
127
+ # this is done to fit to RTX2080 RAM size (11GB)
128
+ del out
129
+ torch.cuda.empty_cache()
130
+
131
+ if not opt.no_surface_renderings:
132
+ surface_chunk = 1
133
+ scale = surface_g_ema.renderer.out_im_res / g_ema.renderer.out_im_res
134
+ surface_sample_focals = sample_focals * scale
135
+ for j in range(0, num_viewdirs, surface_chunk):
136
+ surface_out = surface_g_ema([sample_z[j:j+surface_chunk]],
137
+ sample_cam_extrinsics[j:j+surface_chunk],
138
+ surface_sample_focals[j:j+surface_chunk],
139
+ sample_near[j:j+surface_chunk],
140
+ sample_far[j:j+surface_chunk],
141
+ truncation=opt.truncation_ratio,
142
+ truncation_latent=surface_mean_latent,
143
+ return_sdf=True,
144
+ return_xyz=True)
145
+
146
+ xyz = surface_out[2].cpu()
147
+ sdf = surface_out[3].cpu()
148
+
149
+ # this is done to fit to RTX2080 RAM size (11GB)
150
+ del surface_out
151
+ torch.cuda.empty_cache()
152
+
153
+ # mesh extractions are done one at a time
154
+ for k in range(surface_chunk):
155
+ curr_locations = sample_locations[j:j+surface_chunk]
156
+ loc_str = '_azim{}_elev{}'.format(int(curr_locations[k,0] * 180 / np.pi),
157
+ int(curr_locations[k,1] * 180 / np.pi))
158
+
159
+ # Save depth outputs as meshes
160
+ depth_mesh_filename = os.path.join(opt.results_dst_dir,'depth_map_meshes','sample_{}_depth_mesh{}.obj'.format(i, loc_str))
161
+ depth_mesh = xyz2mesh(xyz[k:k+surface_chunk])
162
+ if depth_mesh != None:
163
+ with open(depth_mesh_filename, 'w') as f:
164
+ depth_mesh.export(f,file_type='obj')
165
+
166
+ # extract full geometry with marching cubes
167
+ if j == 0:
168
+ try:
169
+ frostum_aligned_sdf = align_volume(sdf)
170
+ marching_cubes_mesh = extract_mesh_with_marching_cubes(frostum_aligned_sdf[k:k+surface_chunk])
171
+ except ValueError:
172
+ marching_cubes_mesh = None
173
+ print('Marching cubes extraction failed.')
174
+ print('Please check whether the SDF values are all larger (or all smaller) than 0.')
175
+ return depth_mesh,marching_cubes_mesh
176
+
177
+
178
+
179
+ # User options
180
+
181
+
182
+ def get_generate_vars(model_type):
183
+
184
+ opt = BaseOptions().parse()
185
+ opt.camera.uniform = True
186
+ opt.model.is_test = True
187
+ opt.model.freeze_renderer = False
188
+ opt.rendering.offset_sampling = True
189
+ opt.rendering.static_viewdirs = True
190
+ opt.rendering.force_background = True
191
+ opt.rendering.perturb = 0
192
+ opt.inference.renderer_output_size = opt.model.renderer_spatial_output_dim
193
+ opt.inference.style_dim = opt.model.style_dim
194
+ opt.inference.project_noise = opt.model.project_noise
195
+
196
+ # User options
197
+ opt.inference.no_surface_renderings = False # When true, only RGB images will be created
198
+ opt.inference.fixed_camera_angles = False # When true, each identity will be rendered from a specific set of 13 viewpoints. Otherwise, random views are generated
199
+ opt.inference.identities = 1 # Number of identities to generate
200
+ opt.inference.num_views_per_id = 1 # Number of viewpoints generated per identity. This option is ignored if opt.inference.fixed_camera_angles is true.
201
+ opt.inference.camera = opt.camera
202
+
203
+ # Load saved model
204
+ if model_type == 'ffhq':
205
+ model_path = 'ffhq1024x1024.pt'
206
+ opt.model.size = 1024
207
+ opt.experiment.expname = 'ffhq1024x1024'
208
+ else:
209
+ opt.inference.camera.azim = 0.15
210
+ model_path = 'afhq512x512.pt'
211
+ opt.model.size = 512
212
+ opt.experiment.expname = 'afhq512x512'
213
+
214
+ # Create results directory
215
+ result_model_dir = 'final_model'
216
+ results_dir_basename = os.path.join(opt.inference.results_dir, opt.experiment.expname)
217
+ opt.inference.results_dst_dir = os.path.join(results_dir_basename, result_model_dir)
218
+ if opt.inference.fixed_camera_angles:
219
+ opt.inference.results_dst_dir = os.path.join(opt.inference.results_dst_dir, 'fixed_angles')
220
+ else:
221
+ opt.inference.results_dst_dir = os.path.join(opt.inference.results_dst_dir, 'random_angles')
222
+
223
+ os.makedirs(opt.inference.results_dst_dir, exist_ok=True)
224
+ os.makedirs(os.path.join(opt.inference.results_dst_dir, 'images'), exist_ok=True)
225
+
226
+
227
+ if not opt.inference.no_surface_renderings:
228
+ os.makedirs(os.path.join(opt.inference.results_dst_dir, 'depth_map_meshes'), exist_ok=True)
229
+ os.makedirs(os.path.join(opt.inference.results_dst_dir, 'marching_cubes_meshes'), exist_ok=True)
230
+
231
+ opt.inference.size = opt.model.size
232
+ checkpoint_path = os.path.join('full_models', model_path)
233
+ checkpoint = torch.load(checkpoint_path)
234
+
235
+ # Load image generation model
236
+ g_ema = Generator(opt.model, opt.rendering).to(device)
237
+ pretrained_weights_dict = checkpoint["g_ema"]
238
+ model_dict = g_ema.state_dict()
239
+ for k, v in pretrained_weights_dict.items():
240
+ if v.size() == model_dict[k].size():
241
+ model_dict[k] = v
242
+
243
+ g_ema.load_state_dict(model_dict)
244
+
245
+ # Load a second volume renderer that extracts surfaces at 128x128x128 (or higher) for better surface resolution
246
+ if not opt.inference.no_surface_renderings:
247
+ opt['surf_extraction'] = Munch()
248
+ opt.surf_extraction.rendering = opt.rendering
249
+ opt.surf_extraction.model = opt.model.copy()
250
+ opt.surf_extraction.model.renderer_spatial_output_dim = 128
251
+ opt.surf_extraction.rendering.N_samples = opt.surf_extraction.model.renderer_spatial_output_dim
252
+ opt.surf_extraction.rendering.return_xyz = True
253
+ opt.surf_extraction.rendering.return_sdf = True
254
+ surface_g_ema = Generator(opt.surf_extraction.model, opt.surf_extraction.rendering, full_pipeline=False).to(device)
255
+
256
+
257
+ # Load weights to surface extractor
258
+ surface_extractor_dict = surface_g_ema.state_dict()
259
+ for k, v in pretrained_weights_dict.items():
260
+ if k in surface_extractor_dict.keys() and v.size() == surface_extractor_dict[k].size():
261
+ surface_extractor_dict[k] = v
262
+
263
+ surface_g_ema.load_state_dict(surface_extractor_dict)
264
+ else:
265
+ surface_g_ema = None
266
+
267
+ # Get the mean latent vector for g_ema
268
+ if opt.inference.truncation_ratio < 1:
269
+ with torch.no_grad():
270
+ mean_latent = g_ema.mean_latent(opt.inference.truncation_mean, device)
271
+ else:
272
+ surface_mean_latent = None
273
+
274
+ # Get the mean latent vector for surface_g_ema
275
+ if not opt.inference.no_surface_renderings:
276
+ surface_mean_latent = mean_latent[0]
277
+ else:
278
+ surface_mean_latent = None
279
+
280
+ return opt.inference, g_ema, surface_g_ema, mean_latent, surface_mean_latent,opt.inference.results_dst_dir
281
+
282
+
283
+
284
+ def get_rendervideo_vars(model_type,number_frames):
285
+ opt = BaseOptions().parse()
286
+ opt.model.is_test = True
287
+ opt.model.style_dim = 256
288
+ opt.model.freeze_renderer = False
289
+ opt.inference.size = opt.model.size
290
+ opt.inference.camera = opt.camera
291
+ opt.inference.renderer_output_size = opt.model.renderer_spatial_output_dim
292
+ opt.inference.style_dim = opt.model.style_dim
293
+ opt.inference.project_noise = opt.model.project_noise
294
+ opt.rendering.perturb = 0
295
+ opt.rendering.force_background = True
296
+ opt.rendering.static_viewdirs = True
297
+ opt.rendering.return_sdf = True
298
+ opt.rendering.N_samples = 64
299
+ opt.inference.identities = 1
300
+
301
+ # Load saved model
302
+ if model_type == 'ffhq':
303
+ model_path = 'ffhq1024x1024.pt'
304
+ opt.model.size = 1024
305
+ opt.experiment.expname = 'ffhq1024x1024'
306
+ else:
307
+ opt.inference.camera.azim = 0.15
308
+ model_path = 'afhq512x512.pt'
309
+ opt.model.size = 512
310
+ opt.experiment.expname = 'afhq512x512'
311
+
312
+ opt.inference.size = opt.model.size
313
+
314
+ # Create results directory
315
+ result_model_dir = 'final_model'
316
+ results_dir_basename = os.path.join(opt.inference.results_dir, opt.experiment.expname)
317
+
318
+ opt.inference.results_dst_dir = os.path.join(results_dir_basename, result_model_dir)
319
+
320
+
321
+ os.makedirs(opt.inference.results_dst_dir, exist_ok=True)
322
+ os.makedirs(os.path.join(opt.inference.results_dst_dir, 'videos'), exist_ok=True)
323
+
324
+ checkpoints_dir = './full_models'
325
+ checkpoint_path = os.path.join('full_models', model_path)
326
+
327
+ if os.path.isfile(checkpoint_path):
328
+ # define results directory name
329
+ result_model_dir = 'final_model'
330
+
331
+
332
+ results_dir_basename = os.path.join(opt.inference.results_dir, opt.experiment.expname)
333
+ opt.inference.results_dst_dir = os.path.join(results_dir_basename, result_model_dir, 'videos')
334
+ if opt.model.project_noise:
335
+ opt.inference.results_dst_dir = os.path.join(opt.inference.results_dst_dir, 'with_noise_projection')
336
+
337
+ os.makedirs(opt.inference.results_dst_dir, exist_ok=True)
338
+ print(checkpoint_path)
339
+ # load saved model
340
+ checkpoint = torch.load(checkpoint_path)
341
+
342
+ # load image generation model
343
+ g_ema = Generator(opt.model, opt.rendering).to(device)
344
+
345
+ # temp fix because of wrong noise sizes
346
+ pretrained_weights_dict = checkpoint["g_ema"]
347
+ model_dict = g_ema.state_dict()
348
+ for k, v in pretrained_weights_dict.items():
349
+ if v.size() == model_dict[k].size():
350
+ model_dict[k] = v
351
+
352
+ g_ema.load_state_dict(model_dict)
353
+
354
+ # load a the volume renderee to a second that extracts surfaces at 128x128x128
355
+ if not opt.inference.no_surface_renderings or opt.model.project_noise:
356
+ opt['surf_extraction'] = Munch()
357
+ opt.surf_extraction.rendering = opt.rendering
358
+ opt.surf_extraction.model = opt.model.copy()
359
+ opt.surf_extraction.model.renderer_spatial_output_dim = 128
360
+ opt.surf_extraction.rendering.N_samples = opt.surf_extraction.model.renderer_spatial_output_dim
361
+ opt.surf_extraction.rendering.return_xyz = True
362
+ opt.surf_extraction.rendering.return_sdf = True
363
+ opt.inference.surf_extraction_output_size = opt.surf_extraction.model.renderer_spatial_output_dim
364
+ surface_g_ema = Generator(opt.surf_extraction.model, opt.surf_extraction.rendering, full_pipeline=False).to(device)
365
+
366
+
367
+ # Load weights to surface extractor
368
+ surface_extractor_dict = surface_g_ema.state_dict()
369
+ for k, v in pretrained_weights_dict.items():
370
+ if k in surface_extractor_dict.keys() and v.size() == surface_extractor_dict[k].size():
371
+ surface_extractor_dict[k] = v
372
+
373
+ surface_g_ema.load_state_dict(surface_extractor_dict)
374
+ else:
375
+ surface_g_ema = None
376
+
377
+ # get the mean latent vector for g_ema
378
+ if opt.inference.truncation_ratio < 1:
379
+ with torch.no_grad():
380
+ mean_latent = g_ema.mean_latent(opt.inference.truncation_mean, device)
381
+ else:
382
+ mean_latent = None
383
+
384
+ # get the mean latent vector for surface_g_ema
385
+ if not opt.inference.no_surface_renderings or opt.model.project_noise:
386
+ surface_mean_latent = mean_latent[0]
387
+ else:
388
+ surface_mean_latent = None
389
+
390
+ return opt.inference, g_ema, surface_g_ema, mean_latent, surface_mean_latent,opt.inference.results_dst_dir
391
+
392
+
393
+
394
+
395
+ def render_video(opt, g_ema, surface_g_ema, device, mean_latent, surface_mean_latent,numberofframes):
396
+ g_ema.eval()
397
+ if not opt.no_surface_renderings or opt.project_noise:
398
+ surface_g_ema.eval()
399
+
400
+ images = torch.Tensor(0, 3, opt.size, opt.size)
401
+ num_frames = numberofframes
402
+ # Generate video trajectory
403
+ trajectory = np.zeros((num_frames,3), dtype=np.float32)
404
+
405
+ # set camera trajectory
406
+ # sweep azimuth angles (4 seconds)
407
+ if opt.azim_video:
408
+ t = np.linspace(0, 1, num_frames)
409
+ elev = 0
410
+ fov = opt.camera.fov
411
+ if opt.camera.uniform:
412
+ azim = opt.camera.azim * np.cos(t * 2 * np.pi)
413
+ else:
414
+ azim = 1.5 * opt.camera.azim * np.cos(t * 2 * np.pi)
415
+
416
+ trajectory[:num_frames,0] = azim
417
+ trajectory[:num_frames,1] = elev
418
+ trajectory[:num_frames,2] = fov
419
+
420
+ # elipsoid sweep (4 seconds)
421
+ else:
422
+ t = np.linspace(0, 1, num_frames)
423
+ fov = opt.camera.fov #+ 1 * np.sin(t * 2 * np.pi)
424
+ if opt.camera.uniform:
425
+ elev = opt.camera.elev / 2 + opt.camera.elev / 2 * np.sin(t * 2 * np.pi)
426
+ azim = opt.camera.azim * np.cos(t * 2 * np.pi)
427
+ else:
428
+ elev = 1.5 * opt.camera.elev * np.sin(t * 2 * np.pi)
429
+ azim = 1.5 * opt.camera.azim * np.cos(t * 2 * np.pi)
430
+
431
+ trajectory[:num_frames,0] = azim
432
+ trajectory[:num_frames,1] = elev
433
+ trajectory[:num_frames,2] = fov
434
+
435
+ trajectory = torch.from_numpy(trajectory).to(device)
436
+
437
+ # generate input parameters for the camera trajectory
438
+ # sample_cam_poses, sample_focals, sample_near, sample_far = \
439
+ # generate_camera_params(trajectory, opt.renderer_output_size, device, dist_radius=opt.camera.dist_radius)
440
+
441
+
442
+ sample_cam_extrinsics, sample_focals, sample_near, sample_far, _ = \
443
+ generate_camera_params(opt.renderer_output_size, device, locations=trajectory[:,:2],
444
+ fov_ang=trajectory[:,2:], dist_radius=opt.camera.dist_radius)
445
+
446
+
447
+ # In case of noise projection, generate input parameters for the frontal position.
448
+ # The reference mesh for the noise projection is extracted from the frontal position.
449
+ # For more details see section C.1 in the supplementary material.
450
+ if opt.project_noise:
451
+ frontal_pose = torch.tensor([[0.0,0.0,opt.camera.fov]]).to(device)
452
+ # frontal_cam_pose, frontal_focals, frontal_near, frontal_far = \
453
+ # generate_camera_params(frontal_pose, opt.surf_extraction_output_size, device, dist_radius=opt.camera.dist_radius)
454
+ frontal_cam_pose, frontal_focals, frontal_near, frontal_far, _ = \
455
+ generate_camera_params(opt.surf_extraction_output_size, device, location=frontal_pose[:,:2],
456
+ fov_ang=frontal_pose[:,2:], dist_radius=opt.camera.dist_radius)
457
+
458
+ # create geometry renderer (renders the depth maps)
459
+ cameras = create_cameras(azim=np.rad2deg(trajectory[0,0].cpu().numpy()),
460
+ elev=np.rad2deg(trajectory[0,1].cpu().numpy()),
461
+ dist=1, device=device)
462
+ renderer = create_mesh_renderer(cameras, image_size=512, specular_color=((0,0,0),),
463
+ ambient_color=((0.1,.1,.1),), diffuse_color=((0.75,.75,.75),),
464
+ device=device)
465
+
466
+ suffix = '_azim' if opt.azim_video else '_elipsoid'
467
+
468
+ # generate videos
469
+ for i in range(opt.identities):
470
+ print('Processing identity {}/{}...'.format(i+1, opt.identities))
471
+ chunk = 1
472
+ sample_z = torch.randn(1, opt.style_dim, device=device).repeat(chunk,1)
473
+ video_filename = 'sample_video_{}{}.mp4'.format(i,suffix)
474
+ writer = skvideo.io.FFmpegWriter(os.path.join(opt.results_dst_dir, video_filename),
475
+ outputdict={'-pix_fmt': 'yuv420p', '-crf': '10'})
476
+ if not opt.no_surface_renderings:
477
+ depth_video_filename = 'sample_depth_video_{}{}.mp4'.format(i,suffix)
478
+ depth_writer = skvideo.io.FFmpegWriter(os.path.join(opt.results_dst_dir, depth_video_filename),
479
+ outputdict={'-pix_fmt': 'yuv420p', '-crf': '1'})
480
+
481
+
482
+ ####################### Extract initial surface mesh from the frontal viewpoint #############
483
+ # For more details see section C.1 in the supplementary material.
484
+ if opt.project_noise:
485
+ with torch.no_grad():
486
+ frontal_surface_out = surface_g_ema([sample_z],
487
+ frontal_cam_pose,
488
+ frontal_focals,
489
+ frontal_near,
490
+ frontal_far,
491
+ truncation=opt.truncation_ratio,
492
+ truncation_latent=surface_mean_latent,
493
+ return_sdf=True)
494
+ frontal_sdf = frontal_surface_out[2].cpu()
495
+
496
+ print('Extracting Identity {} Frontal view Marching Cubes for consistent video rendering'.format(i))
497
+
498
+ frostum_aligned_frontal_sdf = align_volume(frontal_sdf)
499
+ del frontal_sdf
500
+
501
+ try:
502
+ frontal_marching_cubes_mesh = extract_mesh_with_marching_cubes(frostum_aligned_frontal_sdf)
503
+ except ValueError:
504
+ frontal_marching_cubes_mesh = None
505
+
506
+ if frontal_marching_cubes_mesh != None:
507
+ frontal_marching_cubes_mesh_filename = os.path.join(opt.results_dst_dir,'sample_{}_frontal_marching_cubes_mesh{}.obj'.format(i,suffix))
508
+ with open(frontal_marching_cubes_mesh_filename, 'w') as f:
509
+ frontal_marching_cubes_mesh.export(f,file_type='obj')
510
+
511
+ del frontal_surface_out
512
+ torch.cuda.empty_cache()
513
+ #############################################################################################
514
+
515
+ for j in tqdm(range(0, num_frames, chunk)):
516
+ with torch.no_grad():
517
+ out = g_ema([sample_z],
518
+ sample_cam_extrinsics[j:j+chunk],
519
+ sample_focals[j:j+chunk],
520
+ sample_near[j:j+chunk],
521
+ sample_far[j:j+chunk],
522
+ truncation=opt.truncation_ratio,
523
+ truncation_latent=mean_latent,
524
+ randomize_noise=False,
525
+ project_noise=opt.project_noise,
526
+ mesh_path=frontal_marching_cubes_mesh_filename if opt.project_noise else None)
527
+
528
+ rgb = out[0].cpu()
529
+ utils.save_image(rgb,
530
+ os.path.join(opt.results_dst_dir, '{}.png'.format(str(i).zfill(7))),
531
+ nrow= trajectory[:,:2].shape[0],
532
+ normalize=True,
533
+ padding=0,
534
+ value_range=(-1, 1),)
535
+
536
+ # this is done to fit to RTX2080 RAM size (11GB)
537
+ del out
538
+ torch.cuda.empty_cache()
539
+
540
+ # Convert RGB from [-1, 1] to [0,255]
541
+ rgb = 127.5 * (rgb.clamp(-1,1).permute(0,2,3,1).cpu().numpy() + 1)
542
+
543
+ # Add RGB, frame to video
544
+ for k in range(chunk):
545
+ writer.writeFrame(rgb[k])
546
+
547
+ ########## Extract surface ##########
548
+ if not opt.no_surface_renderings:
549
+ scale = surface_g_ema.renderer.out_im_res / g_ema.renderer.out_im_res
550
+ surface_sample_focals = sample_focals * scale
551
+ surface_out = surface_g_ema([sample_z],
552
+ sample_cam_extrinsics[j:j+chunk],
553
+ surface_sample_focals[j:j+chunk],
554
+ sample_near[j:j+chunk],
555
+ sample_far[j:j+chunk],
556
+ truncation=opt.truncation_ratio,
557
+ truncation_latent=surface_mean_latent,
558
+ return_xyz=True)
559
+ xyz = surface_out[2].cpu()
560
+
561
+ # this is done to fit to RTX2080 RAM size (11GB)
562
+ del surface_out
563
+ torch.cuda.empty_cache()
564
+
565
+ # Render mesh for video
566
+ depth_mesh = xyz2mesh(xyz)
567
+ mesh = Meshes(
568
+ verts=[torch.from_numpy(np.asarray(depth_mesh.vertices)).to(torch.float32).to(device)],
569
+ faces = [torch.from_numpy(np.asarray(depth_mesh.faces)).to(torch.float32).to(device)],
570
+ textures=None,
571
+ verts_normals=[torch.from_numpy(np.copy(np.asarray(depth_mesh.vertex_normals))).to(torch.float32).to(device)],
572
+ )
573
+ mesh = add_textures(mesh)
574
+ cameras = create_cameras(azim=np.rad2deg(trajectory[j,0].cpu().numpy()),
575
+ elev=np.rad2deg(trajectory[j,1].cpu().numpy()),
576
+ fov=2*trajectory[j,2].cpu().numpy(),
577
+ dist=1, device=device)
578
+ renderer = create_mesh_renderer(cameras, image_size=512,
579
+ light_location=((0.0,1.0,5.0),), specular_color=((0.2,0.2,0.2),),
580
+ ambient_color=((0.1,0.1,0.1),), diffuse_color=((0.65,.65,.65),),
581
+ device=device)
582
+
583
+ mesh_image = 255 * renderer(mesh).cpu().numpy()
584
+ mesh_image = mesh_image[...,:3]
585
+
586
+ # Add depth frame to video
587
+ for k in range(chunk):
588
+ depth_writer.writeFrame(mesh_image[k])
589
+
590
+ # Close video writers
591
+ writer.close()
592
+ if not opt.no_surface_renderings:
593
+ depth_writer.close()
594
+
595
+ return video_filename
596
+
597
+
598
+ import gradio as gr
599
+ import plotly.graph_objects as go
600
+ from PIL import Image
601
+
602
+ device='cuda' if torch.cuda.is_available() else 'cpu'
603
+
604
+
605
+ def get_video(model_type,numberofframes,mesh_type):
606
+ options,g_ema,surface_g_ema, mean_latent, surface_mean_latent,result_filename=get_rendervideo_vars(model_type,numberofframes)
607
+ render_video(options, g_ema, surface_g_ema, device, mean_latent, surface_mean_latent,numberofframes)
608
+ torch.cuda.empty_cache()
609
+ del options,g_ema,surface_g_ema, mean_latent, surface_mean_latent
610
+ path_img=os.path.join(result_filename,"0000000.png")
611
+ image=Image.open(path_img)
612
+
613
+ if mesh_type=="DepthMesh":
614
+ path=os.path.join(result_filename,"sample_depth_video_0_elipsoid.mp4")
615
+ else:
616
+ path=os.path.join(result_filename,"sample_video_0_elipsoid.mp4")
617
+
618
+ return path,image
619
+
620
+ def get_mesh(model_type,mesh_type):
621
+ options,g_ema,surface_g_ema, mean_latent, surface_mean_latent,result_filename=get_generate_vars(model_type)
622
+ depth_mesh,mc_mesh=generate(options, g_ema, surface_g_ema, device, mean_latent, surface_mean_latent)
623
+ torch.cuda.empty_cache()
624
+ del options,g_ema,surface_g_ema, mean_latent, surface_mean_latent
625
+ if mesh_type=="DepthMesh":
626
+ mesh=depth_mesh
627
+ else:
628
+ mesh=mc_mesh
629
+
630
+ x=np.asarray(mesh.vertices).T[0]
631
+ y=np.asarray(mesh.vertices).T[1]
632
+ z=np.asarray(mesh.vertices).T[2]
633
+
634
+ i=np.asarray(mesh.faces).T[0]
635
+ j=np.asarray(mesh.faces).T[1]
636
+ k=np.asarray(mesh.faces).T[2]
637
+ fig = go.Figure(go.Mesh3d(x=x, y=y, z=z,
638
+ i=i, j=j, k=k,
639
+ colorscale="Viridis",
640
+ colorbar_len=0.75,
641
+ flatshading=True,
642
+ lighting=dict(ambient=0.5,
643
+ diffuse=1,
644
+ fresnel=4,
645
+ specular=0.5,
646
+ roughness=0.05,
647
+ facenormalsepsilon=0,
648
+ vertexnormalsepsilon=0),
649
+ lightposition=dict(x=100,
650
+ y=100,
651
+ z=1000)))
652
+ path=os.path.join(result_filename,"images/0000000.png")
653
+
654
+ image=Image.open(path)
655
+
656
+ return fig,image
657
+
658
+ markdown=f'''
659
+ # StyleSDF: High-Resolution 3D-Consistent Image and Geometry Generation
660
+
661
+
662
+ [The space demo for the CVPR 2022 paper "StyleSDF: High-Resolution 3D-Consistent Image and Geometry Generation".](https://arxiv.org/abs/2112.11427)
663
+
664
+ [For the official implementation.](https://github.com/royorel/StyleSDF)
665
+
666
+ ### Future Work based on interest
667
+ - Adding new models for new type objects
668
+ - New Customization
669
+
670
+
671
+ It is running on {device}
672
+
673
+ The process can take long time.Especially ,To generate videos and the time of process depends the number of frames and current compiler device.
674
+
675
+ Note : For RGB video , choose marching cubes mesh type
676
+
677
+ '''
678
+ with gr.Blocks() as demo:
679
+ with gr.Row():
680
+ with gr.Column():
681
+ with gr.Row():
682
+ with gr.Column():
683
+ gr.Markdown(markdown)
684
+ with gr.Column():
685
+ with gr.Row():
686
+ with gr.Column():
687
+ image=gr.Image(type="pil",shape=(512,512))
688
+ with gr.Column():
689
+ mesh = gr.Plot()
690
+ with gr.Column():
691
+ video=gr.Video()
692
+ with gr.Row():
693
+ numberoframes = gr.Slider( minimum=30, maximum=250,label='Number Of Frame For Video Generation')
694
+ model_name=gr.Dropdown(choices=["ffhq","afhq"],label="Choose Model Type")
695
+ mesh_type=gr.Dropdown(choices=["DepthMesh","Marching Cubes"],label="Choose Mesh Type")
696
+
697
+ with gr.Row():
698
+ btn = gr.Button(value="Generate Mesh")
699
+ btn_2=gr.Button(value="Generate Video")
700
+
701
+ btn.click(get_mesh, [model_name,mesh_type],[ mesh,image])
702
+ btn_2.click(get_video,[model_name,numberoframes,mesh_type],[video,image])
703
+
704
+ demo.launch(debug=True)
705
+
706
+
707
+
708
+
709
+
710
+
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch==1.13.0+cu116
2
+ torchvision==0.10.0
3
+ plotly
4
+ lmdb
5
+ numpy
6
+ ninja
7
+ pillow
8
+ requests
9
+ tqdm
10
+ scipy
11
+ scikit-image
12
+ scikit-video
13
+ trimesh[easy]
14
+ configargparse
15
+ munch
16
+ wandb