Spaces:

SerdarHelli
/

StyleSDF-3D

Runtime error

App Files Files Community

SerdarHelli commited on Dec 23, 2022

Commit

594d040

•

1 Parent(s): 11c8995

Upload 2 files

Browse files

Files changed (2) hide show

app.py +710 -0
requirements.txt +16 -0

app.py ADDED Viewed

	@@ -0,0 +1,710 @@

+import sys
+import os
+os.system("git clone https://github.com/royorel/StyleSDF.git")
+sys.path.append("StyleSDF")
+os.system(f"{sys.executable} -m pip install -U fvcore")
+import torch
+pyt_version_str=torch.__version__.split("+")[0].replace(".", "")
+version_str="".join([
+    f"py3{sys.version_info.minor}_cu",
+    torch.version.cuda.replace(".",""),
+    f"_pyt{pyt_version_str}"
+])
+os.system(f"{sys.executable} -m pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html")
+from  download_models import download_pretrained_models
+download_pretrained_models()
+import torch
+import trimesh
+import numpy as np
+from munch import *
+from PIL import Image
+from tqdm import tqdm
+from torch.nn import functional as F
+from torch.utils import data
+from torchvision import utils
+from torchvision import transforms
+from skimage.measure import marching_cubes
+from scipy.spatial import Delaunay
+from options import BaseOptions
+from model import Generator
+from utils import (
+    generate_camera_params,
+    align_volume,
+    extract_mesh_with_marching_cubes,
+    xyz2mesh,
+)
+from utils import (
+    generate_camera_params, align_volume, extract_mesh_with_marching_cubes,
+    xyz2mesh, create_cameras, create_mesh_renderer, add_textures,
+    )
+from pytorch3d.structures import Meshes
+from pdb import set_trace as st
+import skvideo.io
+def generate(opt, g_ema, surface_g_ema, device, mean_latent, surface_mean_latent):
+    g_ema.eval()
+    if not opt.no_surface_renderings:
+        surface_g_ema.eval()
+    # set camera angles
+    if opt.fixed_camera_angles:
+        # These can be changed to any other specific viewpoints.
+        # You can add or remove viewpoints as you wish
+        locations = torch.tensor([[0, 0],
+                                  [-1.5 * opt.camera.azim, 0],
+                                  [-1 * opt.camera.azim, 0],
+                                  [-0.5 * opt.camera.azim, 0],
+                                  [0.5 * opt.camera.azim, 0],
+                                  [1 * opt.camera.azim, 0],
+                                  [1.5 * opt.camera.azim, 0],
+                                  [0, -1.5 * opt.camera.elev],
+                                  [0, -1 * opt.camera.elev],
+                                  [0, -0.5 * opt.camera.elev],
+                                  [0, 0.5 * opt.camera.elev],
+                                  [0, 1 * opt.camera.elev],
+                                  [0, 1.5 * opt.camera.elev]], device=device)
+        # For zooming in/out change the values of fov
+        # (This can be defined for each view separately via a custom tensor
+        # like the locations tensor above. Tensor shape should be [locations.shape[0],1])
+        # reasonable values are [0.75 * opt.camera.fov, 1.25 * opt.camera.fov]
+        fov = opt.camera.fov * torch.ones((locations.shape[0],1), device=device)
+        num_viewdirs = locations.shape[0]
+    else: # draw random camera angles
+        locations = None
+        # fov = None
+        fov = opt.camera.fov
+        num_viewdirs = opt.num_views_per_id
+    # generate images
+    for i in tqdm(range(opt.identities)):
+        with torch.no_grad():
+            chunk = 8
+            sample_z = torch.randn(1, opt.style_dim, device=device).repeat(num_viewdirs,1)
+            sample_cam_extrinsics, sample_focals, sample_near, sample_far, sample_locations = \
+            generate_camera_params(opt.renderer_output_size, device, batch=num_viewdirs,
+                                   locations=locations, #input_fov=fov,
+                                   uniform=opt.camera.uniform, azim_range=opt.camera.azim,
+                                   elev_range=opt.camera.elev, fov_ang=fov,
+                                   dist_radius=opt.camera.dist_radius)
+            rgb_images = torch.Tensor(0, 3, opt.size, opt.size)
+            rgb_images_thumbs = torch.Tensor(0, 3, opt.renderer_output_size, opt.renderer_output_size)
+            for j in range(0, num_viewdirs, chunk):
+                out = g_ema([sample_z[j:j+chunk]],
+                            sample_cam_extrinsics[j:j+chunk],
+                            sample_focals[j:j+chunk],
+                            sample_near[j:j+chunk],
+                            sample_far[j:j+chunk],
+                            truncation=opt.truncation_ratio,
+                            truncation_latent=mean_latent)
+                rgb_images = torch.cat([rgb_images, out[0].cpu()], 0)
+                rgb_images_thumbs = torch.cat([rgb_images_thumbs, out[1].cpu()], 0)
+            utils.save_image(rgb_images,
+                os.path.join(opt.results_dst_dir, 'images','{}.png'.format(str(i).zfill(7))),
+                nrow=num_viewdirs,
+                normalize=True,
+                padding=0,
+                value_range=(-1, 1),)
+            utils.save_image(rgb_images_thumbs,
+                os.path.join(opt.results_dst_dir, 'images','{}_thumb.png'.format(str(i).zfill(7))),
+                nrow=num_viewdirs,
+                normalize=True,
+                padding=0,
+                value_range=(-1, 1),)
+            # this is done to fit to RTX2080 RAM size (11GB)
+            del out
+            torch.cuda.empty_cache()
+            if not opt.no_surface_renderings:
+                surface_chunk = 1
+                scale = surface_g_ema.renderer.out_im_res / g_ema.renderer.out_im_res
+                surface_sample_focals = sample_focals * scale
+                for j in range(0, num_viewdirs, surface_chunk):
+                    surface_out = surface_g_ema([sample_z[j:j+surface_chunk]],
+                                                sample_cam_extrinsics[j:j+surface_chunk],
+                                                surface_sample_focals[j:j+surface_chunk],
+                                                sample_near[j:j+surface_chunk],
+                                                sample_far[j:j+surface_chunk],
+                                                truncation=opt.truncation_ratio,
+                                                truncation_latent=surface_mean_latent,
+                                                return_sdf=True,
+                                                return_xyz=True)
+                    xyz = surface_out[2].cpu()
+                    sdf = surface_out[3].cpu()
+                    # this is done to fit to RTX2080 RAM size (11GB)
+                    del surface_out
+                    torch.cuda.empty_cache()
+                    # mesh extractions are done one at a time
+                    for k in range(surface_chunk):
+                        curr_locations = sample_locations[j:j+surface_chunk]
+                        loc_str = '_azim{}_elev{}'.format(int(curr_locations[k,0] * 180 / np.pi),
+                                                          int(curr_locations[k,1] * 180 / np.pi))
+                        # Save depth outputs as meshes
+                        depth_mesh_filename = os.path.join(opt.results_dst_dir,'depth_map_meshes','sample_{}_depth_mesh{}.obj'.format(i, loc_str))
+                        depth_mesh = xyz2mesh(xyz[k:k+surface_chunk])
+                        if depth_mesh != None:
+                            with open(depth_mesh_filename, 'w') as f:
+                                depth_mesh.export(f,file_type='obj')
+                        # extract full geometry with marching cubes
+                        if j == 0:
+                            try:
+                                frostum_aligned_sdf = align_volume(sdf)
+                                marching_cubes_mesh = extract_mesh_with_marching_cubes(frostum_aligned_sdf[k:k+surface_chunk])
+                            except ValueError:
+                                marching_cubes_mesh = None
+                                print('Marching cubes extraction failed.')
+                                print('Please check whether the SDF values are all larger (or all smaller) than 0.')
+                        return depth_mesh,marching_cubes_mesh
+# User options
+def get_generate_vars(model_type):
+  opt = BaseOptions().parse()
+  opt.camera.uniform = True
+  opt.model.is_test = True
+  opt.model.freeze_renderer = False
+  opt.rendering.offset_sampling = True
+  opt.rendering.static_viewdirs = True
+  opt.rendering.force_background = True
+  opt.rendering.perturb = 0
+  opt.inference.renderer_output_size = opt.model.renderer_spatial_output_dim
+  opt.inference.style_dim = opt.model.style_dim
+  opt.inference.project_noise = opt.model.project_noise
+  # User options
+  opt.inference.no_surface_renderings = False # When true, only RGB images will be created
+  opt.inference.fixed_camera_angles = False # When true, each identity will be rendered from a specific set of 13 viewpoints. Otherwise, random views are generated
+  opt.inference.identities = 1 # Number of identities to generate
+  opt.inference.num_views_per_id = 1 # Number of viewpoints generated per identity. This option is ignored if opt.inference.fixed_camera_angles is true.
+  opt.inference.camera = opt.camera
+  # Load saved model
+  if model_type == 'ffhq':
+      model_path = 'ffhq1024x1024.pt'
+      opt.model.size = 1024
+      opt.experiment.expname = 'ffhq1024x1024'
+  else:
+      opt.inference.camera.azim = 0.15
+      model_path = 'afhq512x512.pt'
+      opt.model.size = 512
+      opt.experiment.expname = 'afhq512x512'
+  # Create results directory
+  result_model_dir = 'final_model'
+  results_dir_basename = os.path.join(opt.inference.results_dir, opt.experiment.expname)
+  opt.inference.results_dst_dir = os.path.join(results_dir_basename, result_model_dir)
+  if opt.inference.fixed_camera_angles:
+      opt.inference.results_dst_dir = os.path.join(opt.inference.results_dst_dir, 'fixed_angles')
+  else:
+      opt.inference.results_dst_dir = os.path.join(opt.inference.results_dst_dir, 'random_angles')
+  os.makedirs(opt.inference.results_dst_dir, exist_ok=True)
+  os.makedirs(os.path.join(opt.inference.results_dst_dir, 'images'), exist_ok=True)
+  if not opt.inference.no_surface_renderings:
+      os.makedirs(os.path.join(opt.inference.results_dst_dir, 'depth_map_meshes'), exist_ok=True)
+      os.makedirs(os.path.join(opt.inference.results_dst_dir, 'marching_cubes_meshes'), exist_ok=True)
+  opt.inference.size = opt.model.size
+  checkpoint_path = os.path.join('full_models', model_path)
+  checkpoint = torch.load(checkpoint_path)
+  # Load image generation model
+  g_ema = Generator(opt.model, opt.rendering).to(device)
+  pretrained_weights_dict = checkpoint["g_ema"]
+  model_dict = g_ema.state_dict()
+  for k, v in pretrained_weights_dict.items():
+      if v.size() == model_dict[k].size():
+          model_dict[k] = v
+  g_ema.load_state_dict(model_dict)
+  # Load a second volume renderer that extracts surfaces at 128x128x128 (or higher) for better surface resolution
+  if not opt.inference.no_surface_renderings:
+      opt['surf_extraction'] = Munch()
+      opt.surf_extraction.rendering = opt.rendering
+      opt.surf_extraction.model = opt.model.copy()
+      opt.surf_extraction.model.renderer_spatial_output_dim = 128
+      opt.surf_extraction.rendering.N_samples = opt.surf_extraction.model.renderer_spatial_output_dim
+      opt.surf_extraction.rendering.return_xyz = True
+      opt.surf_extraction.rendering.return_sdf = True
+      surface_g_ema = Generator(opt.surf_extraction.model, opt.surf_extraction.rendering, full_pipeline=False).to(device)
+      # Load weights to surface extractor
+      surface_extractor_dict = surface_g_ema.state_dict()
+      for k, v in pretrained_weights_dict.items():
+          if k in surface_extractor_dict.keys() and v.size() == surface_extractor_dict[k].size():
+              surface_extractor_dict[k] = v
+      surface_g_ema.load_state_dict(surface_extractor_dict)
+  else:
+      surface_g_ema = None
+  # Get the mean latent vector for g_ema
+  if opt.inference.truncation_ratio < 1:
+      with torch.no_grad():
+          mean_latent = g_ema.mean_latent(opt.inference.truncation_mean, device)
+  else:
+      surface_mean_latent = None
+  # Get the mean latent vector for surface_g_ema
+  if not opt.inference.no_surface_renderings:
+      surface_mean_latent = mean_latent[0]
+  else:
+      surface_mean_latent = None
+  return opt.inference, g_ema, surface_g_ema, mean_latent, surface_mean_latent,opt.inference.results_dst_dir
+def get_rendervideo_vars(model_type,number_frames):
+    opt = BaseOptions().parse()
+    opt.model.is_test = True
+    opt.model.style_dim = 256
+    opt.model.freeze_renderer = False
+    opt.inference.size = opt.model.size
+    opt.inference.camera = opt.camera
+    opt.inference.renderer_output_size = opt.model.renderer_spatial_output_dim
+    opt.inference.style_dim = opt.model.style_dim
+    opt.inference.project_noise = opt.model.project_noise
+    opt.rendering.perturb = 0
+    opt.rendering.force_background = True
+    opt.rendering.static_viewdirs = True
+    opt.rendering.return_sdf = True
+    opt.rendering.N_samples = 64
+    opt.inference.identities = 1
+      # Load saved model
+    if model_type == 'ffhq':
+        model_path = 'ffhq1024x1024.pt'
+        opt.model.size = 1024
+        opt.experiment.expname = 'ffhq1024x1024'
+    else:
+        opt.inference.camera.azim = 0.15
+        model_path = 'afhq512x512.pt'
+        opt.model.size = 512
+        opt.experiment.expname = 'afhq512x512'
+    opt.inference.size = opt.model.size
+    # Create results directory
+    result_model_dir = 'final_model'
+    results_dir_basename = os.path.join(opt.inference.results_dir, opt.experiment.expname)
+    opt.inference.results_dst_dir = os.path.join(results_dir_basename, result_model_dir)
+    os.makedirs(opt.inference.results_dst_dir, exist_ok=True)
+    os.makedirs(os.path.join(opt.inference.results_dst_dir, 'videos'), exist_ok=True)
+    checkpoints_dir = './full_models'
+    checkpoint_path = os.path.join('full_models', model_path)
+    if os.path.isfile(checkpoint_path):
+        # define results directory name
+        result_model_dir = 'final_model'
+    results_dir_basename = os.path.join(opt.inference.results_dir, opt.experiment.expname)
+    opt.inference.results_dst_dir = os.path.join(results_dir_basename, result_model_dir, 'videos')
+    if opt.model.project_noise:
+        opt.inference.results_dst_dir = os.path.join(opt.inference.results_dst_dir, 'with_noise_projection')
+    os.makedirs(opt.inference.results_dst_dir, exist_ok=True)
+    print(checkpoint_path)
+    # load saved model
+    checkpoint = torch.load(checkpoint_path)
+    # load image generation model
+    g_ema = Generator(opt.model, opt.rendering).to(device)
+    # temp fix because of wrong noise sizes
+    pretrained_weights_dict = checkpoint["g_ema"]
+    model_dict = g_ema.state_dict()
+    for k, v in pretrained_weights_dict.items():
+        if v.size() == model_dict[k].size():
+            model_dict[k] = v
+    g_ema.load_state_dict(model_dict)
+    # load a the volume renderee to a second that extracts surfaces at 128x128x128
+    if not opt.inference.no_surface_renderings or opt.model.project_noise:
+        opt['surf_extraction'] = Munch()
+        opt.surf_extraction.rendering = opt.rendering
+        opt.surf_extraction.model = opt.model.copy()
+        opt.surf_extraction.model.renderer_spatial_output_dim = 128
+        opt.surf_extraction.rendering.N_samples = opt.surf_extraction.model.renderer_spatial_output_dim
+        opt.surf_extraction.rendering.return_xyz = True
+        opt.surf_extraction.rendering.return_sdf = True
+        opt.inference.surf_extraction_output_size = opt.surf_extraction.model.renderer_spatial_output_dim
+        surface_g_ema = Generator(opt.surf_extraction.model, opt.surf_extraction.rendering, full_pipeline=False).to(device)
+        # Load weights to surface extractor
+        surface_extractor_dict = surface_g_ema.state_dict()
+        for k, v in pretrained_weights_dict.items():
+            if k in surface_extractor_dict.keys() and v.size() == surface_extractor_dict[k].size():
+                surface_extractor_dict[k] = v
+        surface_g_ema.load_state_dict(surface_extractor_dict)
+    else:
+        surface_g_ema = None
+    # get the mean latent vector for g_ema
+    if opt.inference.truncation_ratio < 1:
+        with torch.no_grad():
+            mean_latent = g_ema.mean_latent(opt.inference.truncation_mean, device)
+    else:
+        mean_latent = None
+    # get the mean latent vector for surface_g_ema
+    if not opt.inference.no_surface_renderings or opt.model.project_noise:
+        surface_mean_latent = mean_latent[0]
+    else:
+        surface_mean_latent = None
+    return opt.inference, g_ema, surface_g_ema, mean_latent, surface_mean_latent,opt.inference.results_dst_dir
+def render_video(opt, g_ema, surface_g_ema, device, mean_latent, surface_mean_latent,numberofframes):
+    g_ema.eval()
+    if not opt.no_surface_renderings or opt.project_noise:
+        surface_g_ema.eval()
+    images = torch.Tensor(0, 3, opt.size, opt.size)
+    num_frames = numberofframes
+    # Generate video trajectory
+    trajectory = np.zeros((num_frames,3), dtype=np.float32)
+    # set camera trajectory
+    # sweep azimuth angles (4 seconds)
+    if opt.azim_video:
+        t = np.linspace(0, 1, num_frames)
+        elev = 0
+        fov = opt.camera.fov
+        if opt.camera.uniform:
+            azim = opt.camera.azim * np.cos(t * 2 * np.pi)
+        else:
+            azim = 1.5 * opt.camera.azim * np.cos(t * 2 * np.pi)
+        trajectory[:num_frames,0] = azim
+        trajectory[:num_frames,1] = elev
+        trajectory[:num_frames,2] = fov
+    # elipsoid sweep (4 seconds)
+    else:
+        t = np.linspace(0, 1, num_frames)
+        fov = opt.camera.fov #+ 1 * np.sin(t * 2 * np.pi)
+        if opt.camera.uniform:
+            elev = opt.camera.elev / 2 + opt.camera.elev / 2  * np.sin(t * 2 * np.pi)
+            azim = opt.camera.azim  * np.cos(t * 2 * np.pi)
+        else:
+            elev = 1.5 * opt.camera.elev * np.sin(t * 2 * np.pi)
+            azim = 1.5 * opt.camera.azim * np.cos(t * 2 * np.pi)
+        trajectory[:num_frames,0] = azim
+        trajectory[:num_frames,1] = elev
+        trajectory[:num_frames,2] = fov
+    trajectory = torch.from_numpy(trajectory).to(device)
+    # generate input parameters for the camera trajectory
+    # sample_cam_poses, sample_focals, sample_near, sample_far = \
+    # generate_camera_params(trajectory, opt.renderer_output_size, device, dist_radius=opt.camera.dist_radius)
+    sample_cam_extrinsics, sample_focals, sample_near, sample_far, _ = \
+    generate_camera_params(opt.renderer_output_size, device, locations=trajectory[:,:2],
+                           fov_ang=trajectory[:,2:], dist_radius=opt.camera.dist_radius)
+    # In case of noise projection, generate input parameters for the frontal position.
+    # The reference mesh for the noise projection is extracted from the frontal position.
+    # For more details see section C.1 in the supplementary material.
+    if opt.project_noise:
+        frontal_pose = torch.tensor([[0.0,0.0,opt.camera.fov]]).to(device)
+        # frontal_cam_pose, frontal_focals, frontal_near, frontal_far = \
+        # generate_camera_params(frontal_pose, opt.surf_extraction_output_size, device, dist_radius=opt.camera.dist_radius)
+        frontal_cam_pose, frontal_focals, frontal_near, frontal_far, _ = \
+        generate_camera_params(opt.surf_extraction_output_size, device, location=frontal_pose[:,:2],
+                               fov_ang=frontal_pose[:,2:], dist_radius=opt.camera.dist_radius)
+    # create geometry renderer (renders the depth maps)
+    cameras = create_cameras(azim=np.rad2deg(trajectory[0,0].cpu().numpy()),
+                             elev=np.rad2deg(trajectory[0,1].cpu().numpy()),
+                             dist=1, device=device)
+    renderer = create_mesh_renderer(cameras, image_size=512, specular_color=((0,0,0),),
+                    ambient_color=((0.1,.1,.1),), diffuse_color=((0.75,.75,.75),),
+                    device=device)
+    suffix = '_azim' if opt.azim_video else '_elipsoid'
+    # generate videos
+    for i in range(opt.identities):
+        print('Processing identity {}/{}...'.format(i+1, opt.identities))
+        chunk = 1
+        sample_z = torch.randn(1, opt.style_dim, device=device).repeat(chunk,1)
+        video_filename = 'sample_video_{}{}.mp4'.format(i,suffix)
+        writer = skvideo.io.FFmpegWriter(os.path.join(opt.results_dst_dir, video_filename),
+                                         outputdict={'-pix_fmt': 'yuv420p', '-crf': '10'})
+        if not opt.no_surface_renderings:
+            depth_video_filename = 'sample_depth_video_{}{}.mp4'.format(i,suffix)
+            depth_writer = skvideo.io.FFmpegWriter(os.path.join(opt.results_dst_dir, depth_video_filename),
+                                             outputdict={'-pix_fmt': 'yuv420p', '-crf': '1'})
+        ####################### Extract initial surface mesh from the frontal viewpoint #############
+        # For more details see section C.1 in the supplementary material.
+        if opt.project_noise:
+            with torch.no_grad():
+                frontal_surface_out = surface_g_ema([sample_z],
+                                                    frontal_cam_pose,
+                                                    frontal_focals,
+                                                    frontal_near,
+                                                    frontal_far,
+                                                    truncation=opt.truncation_ratio,
+                                                    truncation_latent=surface_mean_latent,
+                                                    return_sdf=True)
+                frontal_sdf = frontal_surface_out[2].cpu()
+            print('Extracting Identity {} Frontal view Marching Cubes for consistent video rendering'.format(i))
+            frostum_aligned_frontal_sdf = align_volume(frontal_sdf)
+            del frontal_sdf
+            try:
+                frontal_marching_cubes_mesh = extract_mesh_with_marching_cubes(frostum_aligned_frontal_sdf)
+            except ValueError:
+                frontal_marching_cubes_mesh = None
+            if frontal_marching_cubes_mesh != None:
+                frontal_marching_cubes_mesh_filename = os.path.join(opt.results_dst_dir,'sample_{}_frontal_marching_cubes_mesh{}.obj'.format(i,suffix))
+                with open(frontal_marching_cubes_mesh_filename, 'w') as f:
+                    frontal_marching_cubes_mesh.export(f,file_type='obj')
+            del frontal_surface_out
+            torch.cuda.empty_cache()
+        #############################################################################################
+        for j in tqdm(range(0, num_frames, chunk)):
+            with torch.no_grad():
+                out = g_ema([sample_z],
+                            sample_cam_extrinsics[j:j+chunk],
+                            sample_focals[j:j+chunk],
+                            sample_near[j:j+chunk],
+                            sample_far[j:j+chunk],
+                            truncation=opt.truncation_ratio,
+                            truncation_latent=mean_latent,
+                            randomize_noise=False,
+                            project_noise=opt.project_noise,
+                            mesh_path=frontal_marching_cubes_mesh_filename if opt.project_noise else None)
+                rgb = out[0].cpu()
+                utils.save_image(rgb,
+                    os.path.join(opt.results_dst_dir, '{}.png'.format(str(i).zfill(7))),
+                    nrow= trajectory[:,:2].shape[0],
+                    normalize=True,
+                    padding=0,
+                    value_range=(-1, 1),)
+                # this is done to fit to RTX2080 RAM size (11GB)
+                del out
+                torch.cuda.empty_cache()
+                # Convert RGB from [-1, 1] to [0,255]
+                rgb = 127.5 * (rgb.clamp(-1,1).permute(0,2,3,1).cpu().numpy() + 1)
+                # Add RGB, frame to video
+                for k in range(chunk):
+                    writer.writeFrame(rgb[k])
+                ########## Extract surface ##########
+                if not opt.no_surface_renderings:
+                    scale = surface_g_ema.renderer.out_im_res / g_ema.renderer.out_im_res
+                    surface_sample_focals = sample_focals * scale
+                    surface_out = surface_g_ema([sample_z],
+                                                sample_cam_extrinsics[j:j+chunk],
+                                                surface_sample_focals[j:j+chunk],
+                                                sample_near[j:j+chunk],
+                                                sample_far[j:j+chunk],
+                                                truncation=opt.truncation_ratio,
+                                                truncation_latent=surface_mean_latent,
+                                                return_xyz=True)
+                    xyz = surface_out[2].cpu()
+                    # this is done to fit to RTX2080 RAM size (11GB)
+                    del surface_out
+                    torch.cuda.empty_cache()
+                    # Render mesh for video
+                    depth_mesh = xyz2mesh(xyz)
+                    mesh = Meshes(
+                        verts=[torch.from_numpy(np.asarray(depth_mesh.vertices)).to(torch.float32).to(device)],
+                        faces = [torch.from_numpy(np.asarray(depth_mesh.faces)).to(torch.float32).to(device)],
+                        textures=None,
+                        verts_normals=[torch.from_numpy(np.copy(np.asarray(depth_mesh.vertex_normals))).to(torch.float32).to(device)],
+                    )
+                    mesh = add_textures(mesh)
+                    cameras = create_cameras(azim=np.rad2deg(trajectory[j,0].cpu().numpy()),
+                                             elev=np.rad2deg(trajectory[j,1].cpu().numpy()),
+                                             fov=2*trajectory[j,2].cpu().numpy(),
+                                             dist=1, device=device)
+                    renderer = create_mesh_renderer(cameras, image_size=512,
+                                                    light_location=((0.0,1.0,5.0),), specular_color=((0.2,0.2,0.2),),
+                                                    ambient_color=((0.1,0.1,0.1),), diffuse_color=((0.65,.65,.65),),
+                                                    device=device)
+                    mesh_image = 255 * renderer(mesh).cpu().numpy()
+                    mesh_image = mesh_image[...,:3]
+                    # Add depth frame to video
+                    for k in range(chunk):
+                        depth_writer.writeFrame(mesh_image[k])
+        # Close video writers
+        writer.close()
+        if not opt.no_surface_renderings:
+            depth_writer.close()
+        return video_filename
+import gradio as gr
+import plotly.graph_objects as go
+from PIL import Image
+device='cuda' if torch.cuda.is_available() else 'cpu'
+def get_video(model_type,numberofframes,mesh_type):
+    options,g_ema,surface_g_ema,  mean_latent, surface_mean_latent,result_filename=get_rendervideo_vars(model_type,numberofframes)
+    render_video(options, g_ema, surface_g_ema, device, mean_latent, surface_mean_latent,numberofframes)
+    torch.cuda.empty_cache()
+    del options,g_ema,surface_g_ema,  mean_latent, surface_mean_latent
+    path_img=os.path.join(result_filename,"0000000.png")
+    image=Image.open(path_img)
+    if mesh_type=="DepthMesh":
+      path=os.path.join(result_filename,"sample_depth_video_0_elipsoid.mp4")
+    else:
+      path=os.path.join(result_filename,"sample_video_0_elipsoid.mp4")
+    return path,image
+def get_mesh(model_type,mesh_type):
+    options,g_ema,surface_g_ema,  mean_latent, surface_mean_latent,result_filename=get_generate_vars(model_type)
+    depth_mesh,mc_mesh=generate(options, g_ema, surface_g_ema, device, mean_latent, surface_mean_latent)
+    torch.cuda.empty_cache()
+    del options,g_ema,surface_g_ema,  mean_latent, surface_mean_latent
+    if mesh_type=="DepthMesh":
+      mesh=depth_mesh
+    else:
+      mesh=mc_mesh
+    x=np.asarray(mesh.vertices).T[0]
+    y=np.asarray(mesh.vertices).T[1]
+    z=np.asarray(mesh.vertices).T[2]
+    i=np.asarray(mesh.faces).T[0]
+    j=np.asarray(mesh.faces).T[1]
+    k=np.asarray(mesh.faces).T[2]
+    fig = go.Figure(go.Mesh3d(x=x, y=y, z=z,
+                    i=i, j=j, k=k,
+                    colorscale="Viridis",
+                  colorbar_len=0.75,
+                  flatshading=True,
+                  lighting=dict(ambient=0.5,
+                                diffuse=1,
+                                fresnel=4,
+                                specular=0.5,
+                                roughness=0.05,
+                                facenormalsepsilon=0,
+                                vertexnormalsepsilon=0),
+                  lightposition=dict(x=100,
+                                    y=100,
+                                    z=1000)))
+    path=os.path.join(result_filename,"images/0000000.png")
+    image=Image.open(path)
+    return fig,image
+markdown=f'''
+  # StyleSDF: High-Resolution 3D-Consistent Image and Geometry Generation
+  [The space demo for the CVPR 2022 paper "StyleSDF: High-Resolution 3D-Consistent Image and Geometry Generation".](https://arxiv.org/abs/2112.11427)
+  [For the official implementation.](https://github.com/royorel/StyleSDF)
+  ### Future Work based on interest
+  - Adding new models for new type objects
+  - New Customization
+  It is running on {device}
+  The process can take long time.Especially ,To generate videos and the time of process depends the number of frames and current compiler device.
+  Note : For RGB video , choose marching cubes mesh type
+'''
+with gr.Blocks() as demo:
+    with gr.Row():
+      with gr.Column():
+        with gr.Row():
+            with gr.Column():
+              gr.Markdown(markdown)
+            with gr.Column():
+              with gr.Row():
+                with gr.Column():
+                      image=gr.Image(type="pil",shape=(512,512))
+                with gr.Column():
+                      mesh = gr.Plot()
+                with gr.Column():
+                      video=gr.Video()
+    with gr.Row():
+      numberoframes = gr.Slider( minimum=30, maximum=250,label='Number Of Frame For Video Generation')
+      model_name=gr.Dropdown(choices=["ffhq","afhq"],label="Choose Model Type")
+      mesh_type=gr.Dropdown(choices=["DepthMesh","Marching Cubes"],label="Choose Mesh Type")
+    with gr.Row():
+      btn = gr.Button(value="Generate Mesh")
+      btn_2=gr.Button(value="Generate Video")
+    btn.click(get_mesh, [model_name,mesh_type],[ mesh,image])
+    btn_2.click(get_video,[model_name,numberoframes,mesh_type],[video,image])
+demo.launch(debug=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+torch==1.13.0+cu116
+torchvision==0.10.0
+plotly
+lmdb
+numpy
+ninja
+pillow
+requests
+tqdm
+scipy
+scikit-image
+scikit-video
+trimesh[easy]
+configargparse
+munch
+wandb