Spaces:

hongfz16
/

EVA3D

Runtime error

File size: 16,372 Bytes

import sys
import os 

os.system("git clone https://github.com/hongfz16/EVA3D.git")
sys.path.append("EVA3D")
os.system("cp -r EVA3D/assets .")

os.system(f"{sys.executable} -m pip install -U fvcore plotly")

import torch
pyt_version_str=torch.__version__.split("+")[0].replace(".", "")
version_str="".join([
    f"py3{sys.version_info.minor}_cu",
    torch.version.cuda.replace(".",""),
    f"_pyt{pyt_version_str}"
])

os.system(f"{sys.executable} -m pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html")

import os
import html
import glob
import uuid
import hashlib
import requests
from tqdm import tqdm
from pdb import set_trace as st

from download_models import download_file
eva3d_deepfashion_model = dict(file_url='https://drive.google.com/uc?id=1SYPjxnHz3XPRhTarx_Lw8SG_iz16QUMU',
                            alt_url='', file_size=160393221, file_md5='d0fae86edf76c52e94223bd3f39b2157',
                            file_path='checkpoint/512x256_deepfashion/volume_renderer/models_0420000.pt',)

smpl_model = dict(file_url='https://drive.google.com/uc?id={}'.format(os.environ['smpl_link']),
                            alt_url='', file_size=39001280, file_md5='65dc7f162f3ef21a38637663c57e14a7',
                            file_path='smpl_models/smpl/SMPL_NEUTRAL.pkl',)

from huggingface_hub import hf_hub_download

def download_pretrained_models():
    print('Downloading EVA3D model pretrained on DeepFashion.')
    # with requests.Session() as session:
    #     try:
    #         download_file(session, eva3d_deepfashion_model)
    #     except:
    #         print('Google Drive download failed.\n' \
    #               'Trying do download from alternate server')
    #         download_file(session, eva3d_deepfashion_model, use_alt_url=True)
    eva3d_ckpt = hf_hub_download(repo_id="hongfz16/EVA3D", filename="models_0420000.pt", token=os.environ['hf_token'])
    os.system("mkdir -p checkpoint/512x256_deepfashion/volume_renderer")
    os.system("mkdir -p smpl_models/smpl")
    os.system(f"cp {eva3d_ckpt} checkpoint/512x256_deepfashion/volume_renderer/models_0420000.pt")
    print('Downloading SMPL model.')
    # with requests.Session() as session:
    #     try:
    #         download_file(session, smpl_model)
    #     except:
    #         print('Google Drive download failed.\n' \
    #               'Trying do download from alternate server')
    #         download_file(session, smpl_model, use_alt_url=True)
    smpl_pkl = hf_hub_download(repo_id="hongfz16/EVA3D", filename="SMPL_NEUTRAL.pkl", token=os.environ['hf_token'])
    os.system(f"cp {smpl_pkl} smpl_models/smpl/SMPL_NEUTRAL.pkl")

download_pretrained_models()

import os
import torch
import trimesh
import imageio
import pickle
import numpy as np
from munch import *
from PIL import Image
from tqdm import tqdm
from torch.nn import functional as F
from torch.utils import data
from torchvision import utils
from torchvision import transforms
from skimage.measure import marching_cubes
from scipy.spatial import Delaunay
from scipy.spatial.transform import Rotation as R
from options import BaseOptions
from model import VoxelHumanGenerator as Generator
from dataset import DeepFashionDataset, DemoDataset
from utils import (
    generate_camera_params,
    align_volume,
    extract_mesh_with_marching_cubes,
    xyz2mesh,
    requires_grad,
    create_mesh_renderer,
    create_cameras
)
from pytorch3d.io import load_objs_as_meshes, load_obj
from pytorch3d.structures import Meshes
from pytorch3d.renderer import (
    FoVPerspectiveCameras, look_at_view_transform, look_at_rotation, 
    RasterizationSettings, MeshRenderer, MeshRasterizer, BlendParams,
    SoftSilhouetteShader, HardPhongShader, PointLights, TexturesVertex,
)

torch.random.manual_seed(8888)
import random
random.seed(8888)

panning_angle = np.pi / 3

def sample_latent(opt, device):
    return 

def generate_rgb(opt, g_ema, device, mean_latent, sample_z, sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals):
    requires_grad(g_ema, False)
    g_ema.is_train = False
    g_ema.train_renderer = False
    img_list = []
    for k in range(3):
        if k == 0:
            delta = R.from_rotvec(np.pi/8 * np.array([0, 1, 0]))
        elif k == 2:
            delta = R.from_rotvec(-np.pi/8 * np.array([0, 1, 0]))
        else:
            delta = R.from_rotvec(0 * np.array([0, 1, 0]))
        r = R.from_rotvec(sample_theta[0, :3].cpu().numpy())
        new_r = delta * r
        new_sample_theta = sample_theta.clone()
        new_sample_theta[0, :3] = torch.from_numpy(new_r.as_rotvec()).to(device)

        with torch.no_grad():
            j = 0
            chunk = 1
            out = g_ema([sample_z[j:j+chunk]],
                        sample_cam_extrinsics[j:j+chunk],
                        sample_focals[j:j+chunk],
                        sample_beta[j:j+chunk],
                        new_sample_theta[j:j+chunk],
                        sample_trans[j:j+chunk],
                        truncation=opt.truncation_ratio,
                        truncation_latent=mean_latent,
                        return_eikonal=False,
                        return_normal=False,
                        return_mask=False,
                        fix_viewdir=True)

        rgb_images_thumbs = out[1].detach().cpu()[..., :3].permute(0, 3, 1, 2)
        g_ema.zero_grad()
        img_list.append(rgb_images_thumbs)

        utils.save_image(torch.cat(img_list, 0),
                        os.path.join(opt.results_dst_dir, 'images_paper_fig','{}.png'.format(str(0).zfill(7))),
                        nrow=3,
                        normalize=True,
                        range=(-1, 1),
                        padding=0,)

def generate_mesh(opt, g_ema, device, mean_latent, sample_z, sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals):
    latent = g_ema.styles_and_noise_forward(sample_z[:1], None, opt.truncation_ratio,
                                            mean_latent, False)

    sdf = g_ema.renderer.marching_cube_posed(latent[0], sample_beta, sample_theta, resolution=350, size=1.4).detach()
    marching_cubes_mesh, _, _ = extract_mesh_with_marching_cubes(sdf, level_set=0)
    marching_cubes_mesh = trimesh.smoothing.filter_humphrey(marching_cubes_mesh, beta=0.2, iterations=5)
    # marching_cubes_mesh_filename = os.path.join(opt.results_dst_dir,'marching_cubes_meshes_posed','sample_{}_marching_cubes_mesh.obj'.format(0))
    # with open(marching_cubes_mesh_filename, 'w') as f:
    #     marching_cubes_mesh.export(f,file_type='obj')
    return marching_cubes_mesh

def generate_video(opt, g_ema, device, mean_latent, sample_z, sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals):
    video_list = []
    for k in tqdm(range(120)):
        if k < 30:
            angle = (panning_angle / 2) * (k / 30)
        elif k >= 30 and k < 90:
            angle = panning_angle / 2 - panning_angle * ((k - 30) / 60)
        else:
            angle = -panning_angle / 2 * ((120 - k) / 30)
        delta = R.from_rotvec(angle * np.array([0, 1, 0]))
        r = R.from_rotvec(sample_theta[0, :3].cpu().numpy())
        new_r = delta * r
        new_sample_theta = sample_theta.clone()
        new_sample_theta[0, :3] = torch.from_numpy(new_r.as_rotvec()).to(device)
        with torch.no_grad():
            j = 0
            chunk = 1
            out = g_ema([sample_z[j:j+chunk]],
                        sample_cam_extrinsics[j:j+chunk],
                        sample_focals[j:j+chunk],
                        sample_beta[j:j+chunk],
                        new_sample_theta[j:j+chunk],
                        sample_trans[j:j+chunk],
                        truncation=opt.truncation_ratio,
                        truncation_latent=mean_latent,
                        return_eikonal=False,
                        return_normal=False,
                        return_mask=False,
                        fix_viewdir=True)
        rgb_images_thumbs = out[1].detach().cpu()[..., :3]
        g_ema.zero_grad()
        video_list.append((rgb_images_thumbs.numpy() + 1) / 2. * 255. + 0.5)
    all_img = np.concatenate(video_list, 0).astype(np.uint8)
    imageio.mimwrite(os.path.join(opt.results_dst_dir, 'images_paper_video', 'video_{}.mp4'.format(str(0).zfill(7))), all_img, fps=30, quality=8)

def setup():
    device='cuda' if torch.cuda.is_available() else 'cpu'
    opt = BaseOptions().parse()

    opt.training.batch = 1
    opt.training.chunk = 1
    opt.experiment.expname = '512x256_deepfashion'
    opt.dataset.dataset_path = 'demodataset'
    opt.rendering.depth = 5
    opt.rendering.width = 128
    opt.model.style_dim = 128
    opt.model.renderer_spatial_output_dim = [512, 256]
    opt.training.no_sphere_init = True
    opt.rendering.input_ch_views = 3
    opt.rendering.white_bg = True
    opt.model.voxhuman_name = 'eva3d_deepfashion'
    opt.training.deltasdf = True
    opt.rendering.N_samples = 28
    opt.experiment.ckpt = '420000'
    opt.inference.identities = 1
    opt.inference.truncation_ratio = 0.6

    opt.model.is_test = True
    opt.model.freeze_renderer = False
    opt.rendering.no_features_output = True
    opt.rendering.offset_sampling = True
    opt.rendering.static_viewdirs = True
    opt.rendering.force_background = True
    opt.rendering.perturb = 0
    opt.inference.size = opt.model.size
    opt.inference.camera = opt.camera
    opt.inference.renderer_output_size = opt.model.renderer_spatial_output_dim
    opt.inference.style_dim = opt.model.style_dim
    opt.inference.project_noise = opt.model.project_noise
    opt.inference.return_xyz = opt.rendering.return_xyz
    
    checkpoints_dir = os.path.join('checkpoint', opt.experiment.expname, 'volume_renderer')
    checkpoint_path = os.path.join(checkpoints_dir,
                                    'models_{}.pt'.format(opt.experiment.ckpt.zfill(7)))
    # define results directory name
    result_model_dir = 'iter_{}'.format(opt.experiment.ckpt.zfill(7))

    # create results directory
    results_dir_basename = os.path.join(opt.inference.results_dir, opt.experiment.expname)
    opt.inference.results_dst_dir = os.path.join(results_dir_basename, result_model_dir)
    if opt.inference.fixed_camera_angles:
        opt.inference.results_dst_dir = os.path.join(opt.inference.results_dst_dir, 'fixed_angles')
    else:
        opt.inference.results_dst_dir = os.path.join(opt.inference.results_dst_dir, 'random_angles')
    os.makedirs(opt.inference.results_dst_dir, exist_ok=True)
    os.makedirs(os.path.join(opt.inference.results_dst_dir, 'images_paper_fig'), exist_ok=True)
    os.makedirs(os.path.join(opt.inference.results_dst_dir, 'images_paper_video'), exist_ok=True)
    os.makedirs(os.path.join(opt.inference.results_dst_dir, 'marching_cubes_meshes_posed'), exist_ok=True)
    checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage)

    # load generation model
    g_ema = Generator(opt.model, opt.rendering, full_pipeline=False, voxhuman_name=opt.model.voxhuman_name).to(device)
    pretrained_weights_dict = checkpoint["g_ema"]
    model_dict = g_ema.state_dict()
    for k, v in pretrained_weights_dict.items():
        if v.size() == model_dict[k].size():
            model_dict[k] = v
        else:
            print(k)

    g_ema.load_state_dict(model_dict)

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)])
    
    if 'deepfashion' in opt.dataset.dataset_path:
        file_list = '/mnt/lustre/fzhong/smplify-x/deepfashion_train_list/deepfashion_train_list_MAN.txt'
    elif '20w_fashion' in opt.dataset.dataset_path:
        file_list = '/mnt/lustre/fzhong/mmhuman3d/20w_fashion_result/nondress_flist.txt'
    else:
        file_list = None
    if file_list:
        dataset = DeepFashionDataset(opt.dataset.dataset_path, transform, opt.model.size,
                                     opt.model.renderer_spatial_output_dim, file_list)
    else:
        dataset = DemoDataset()

    # get the mean latent vector for g_ema
    if opt.inference.truncation_ratio < 1:
        with torch.no_grad():
            mean_latent = g_ema.mean_latent(opt.inference.truncation_mean, device)
    else:
        mean_latent = None

    g_ema.renderer.is_train = False
    g_ema.renderer.perturb = 0

    # generate(opt.inference, dataset, g_ema, device, mean_latent, opt.rendering.render_video)

    sample_trans, sample_beta, sample_theta = dataset.sample_smpl_param(1, device, val=False)
    sample_cam_extrinsics, sample_focals = dataset.get_camera_extrinsics(1, device, val=False)

    torch.randn(1, opt.inference.style_dim, device=device)

    return opt.inference, g_ema, device, mean_latent, torch.randn(1, opt.inference.style_dim, device=device), \
           sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals

import gradio as gr
import plotly.graph_objects as go
from PIL import Image

setup_list = None

def get_video():
    global setup_list
    if setup_list is None:
        setup_list = list(setup())
    generate_video(*setup_list)
    torch.cuda.empty_cache()
    path = 'evaluations/512x256_deepfashion/iter_0420000/random_angles/images_paper_video/video_0000000.mp4'
    return path

def get_mesh():
    global setup_list
    if setup_list is None:
        setup_list = list(setup())
    setup_list[4] = torch.randn(1, setup_list[0].style_dim, device=setup_list[2])
    generate_rgb(*setup_list)
    mesh = generate_mesh(*setup_list)
    torch.cuda.empty_cache()

    x=np.asarray(mesh.vertices).T[0]
    y=np.asarray(mesh.vertices).T[1]
    z=np.asarray(mesh.vertices).T[2]

    i=np.asarray(mesh.faces).T[0]
    j=np.asarray(mesh.faces).T[1]
    k=np.asarray(mesh.faces).T[2]
    fig = go.Figure(go.Mesh3d(x=x, y=y, z=z, 
                    i=i, j=j, k=k, 
                    color="lightpink",
                    # flatshading=True,
                    lighting=dict(ambient=0.5,
                                    diffuse=1,
                                    fresnel=4,        
                                    specular=0.5,
                                    roughness=0.05,
                                    facenormalsepsilon=0,
                                    vertexnormalsepsilon=0),))
                    # lightposition=dict(x=100,
                    #                     y=100,
                    #                     z=1000)))
    path='evaluations/512x256_deepfashion/iter_0420000/random_angles/images_paper_fig/0000000.png'

    image=Image.open(path)

    return fig,image
    
markdown=f'''
  # EVA3D: Compositional 3D Human Generation from 2D Image Collections
  
  Authored by Fangzhou Hong, Zhaoxi Chen, Yushi Lan, Liang Pan, Ziwei Liu

  The space demo for the ICLR 2023 Spotlight paper "EVA3D: Compositional 3D Human Generation from 2D Image Collections".

  ### Useful links:
  - [Official Github Repo](https://github.com/hongfz16/EVA3D)
  - [Project Page](https://hongfz16.github.io/projects/EVA3D.html)
  - [arXiv Link](https://arxiv.org/abs/2210.04888)

  Licensed under the S-Lab License.

  First use button "Generate RGB & Mesh" to randomly sample a 3D human. Then push button "Generate Video" to generate a panning video of the generated human.
'''

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown(markdown)
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    image=gr.Image(type="pil",shape=(512,256*3))
            with gr.Row():
                with gr.Column():
                    mesh = gr.Plot()
                with gr.Column():
                    video=gr.Video()
    # with gr.Row():
    #   numberoframes = gr.Slider( minimum=30, maximum=250,label='Number Of Frame For Video Generation')
    #   model_name=gr.Dropdown(choices=["ffhq","afhq"],label="Choose Model Type")
    #   mesh_type=gr.Dropdown(choices=["DepthMesh","Marching Cubes"],label="Choose Mesh Type")
    with gr.Row():
        btn = gr.Button(value="Generate RGB & Mesh")
        btn_2=gr.Button(value="Generate Video")

    btn.click(get_mesh,[],[mesh,image])
    btn_2.click(get_video,[],[video])

demo.launch()