EVA3D

Runtime error

EVA3D / app.py

HongFangzhou

resume EVA3D

9a0ea4b 11 months ago

16.4 kB

	import sys
	import os

	os.system("git clone https://github.com/hongfz16/EVA3D.git")
	sys.path.append("EVA3D")
	os.system("cp -r EVA3D/assets .")

	os.system(f"{sys.executable} -m pip install -U fvcore plotly")

	import torch
	pyt_version_str=torch.__version__.split("+")[0].replace(".", "")
	version_str="".join([
	f"py3{sys.version_info.minor}_cu",
	torch.version.cuda.replace(".",""),
	f"_pyt{pyt_version_str}"
	])

	os.system(f"{sys.executable} -m pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html")

	import os
	import html
	import glob
	import uuid
	import hashlib
	import requests
	from tqdm import tqdm
	from pdb import set_trace as st

	from download_models import download_file
	eva3d_deepfashion_model = dict(file_url='https://drive.google.com/uc?id=1SYPjxnHz3XPRhTarx_Lw8SG_iz16QUMU',
	alt_url='', file_size=160393221, file_md5='d0fae86edf76c52e94223bd3f39b2157',
	file_path='checkpoint/512x256_deepfashion/volume_renderer/models_0420000.pt',)

	smpl_model = dict(file_url='https://drive.google.com/uc?id={}'.format(os.environ['smpl_link']),
	alt_url='', file_size=39001280, file_md5='65dc7f162f3ef21a38637663c57e14a7',
	file_path='smpl_models/smpl/SMPL_NEUTRAL.pkl',)

	from huggingface_hub import hf_hub_download

	def download_pretrained_models():
	print('Downloading EVA3D model pretrained on DeepFashion.')
	# with requests.Session() as session:
	# try:
	# download_file(session, eva3d_deepfashion_model)
	# except:
	# print('Google Drive download failed.\n' \
	# 'Trying do download from alternate server')
	# download_file(session, eva3d_deepfashion_model, use_alt_url=True)
	eva3d_ckpt = hf_hub_download(repo_id="hongfz16/EVA3D", filename="models_0420000.pt", token=os.environ['hf_token'])
	os.system("mkdir -p checkpoint/512x256_deepfashion/volume_renderer")
	os.system("mkdir -p smpl_models/smpl")
	os.system(f"cp {eva3d_ckpt} checkpoint/512x256_deepfashion/volume_renderer/models_0420000.pt")
	print('Downloading SMPL model.')
	# with requests.Session() as session:
	# try:
	# download_file(session, smpl_model)
	# except:
	# print('Google Drive download failed.\n' \
	# 'Trying do download from alternate server')
	# download_file(session, smpl_model, use_alt_url=True)
	smpl_pkl = hf_hub_download(repo_id="hongfz16/EVA3D", filename="SMPL_NEUTRAL.pkl", token=os.environ['hf_token'])
	os.system(f"cp {smpl_pkl} smpl_models/smpl/SMPL_NEUTRAL.pkl")

	download_pretrained_models()

	import os
	import torch
	import trimesh
	import imageio
	import pickle
	import numpy as np
	from munch import *
	from PIL import Image
	from tqdm import tqdm
	from torch.nn import functional as F
	from torch.utils import data
	from torchvision import utils
	from torchvision import transforms
	from skimage.measure import marching_cubes
	from scipy.spatial import Delaunay
	from scipy.spatial.transform import Rotation as R
	from options import BaseOptions
	from model import VoxelHumanGenerator as Generator
	from dataset import DeepFashionDataset, DemoDataset
	from utils import (
	generate_camera_params,
	align_volume,
	extract_mesh_with_marching_cubes,
	xyz2mesh,
	requires_grad,
	create_mesh_renderer,
	create_cameras
	)
	from pytorch3d.io import load_objs_as_meshes, load_obj
	from pytorch3d.structures import Meshes
	from pytorch3d.renderer import (
	FoVPerspectiveCameras, look_at_view_transform, look_at_rotation,
	RasterizationSettings, MeshRenderer, MeshRasterizer, BlendParams,
	SoftSilhouetteShader, HardPhongShader, PointLights, TexturesVertex,
	)

	torch.random.manual_seed(8888)
	import random
	random.seed(8888)

	panning_angle = np.pi / 3

	def sample_latent(opt, device):
	return

	def generate_rgb(opt, g_ema, device, mean_latent, sample_z, sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals):
	requires_grad(g_ema, False)
	g_ema.is_train = False
	g_ema.train_renderer = False
	img_list = []
	for k in range(3):
	if k == 0:
	delta = R.from_rotvec(np.pi/8 * np.array([0, 1, 0]))
	elif k == 2:
	delta = R.from_rotvec(-np.pi/8 * np.array([0, 1, 0]))
	else:
	delta = R.from_rotvec(0 * np.array([0, 1, 0]))
	r = R.from_rotvec(sample_theta[0, :3].cpu().numpy())
	new_r = delta * r
	new_sample_theta = sample_theta.clone()
	new_sample_theta[0, :3] = torch.from_numpy(new_r.as_rotvec()).to(device)

	with torch.no_grad():
	j = 0
	chunk = 1
	out = g_ema([sample_z[j:j+chunk]],
	sample_cam_extrinsics[j:j+chunk],
	sample_focals[j:j+chunk],
	sample_beta[j:j+chunk],
	new_sample_theta[j:j+chunk],
	sample_trans[j:j+chunk],
	truncation=opt.truncation_ratio,
	truncation_latent=mean_latent,
	return_eikonal=False,
	return_normal=False,
	return_mask=False,
	fix_viewdir=True)

	rgb_images_thumbs = out[1].detach().cpu()[..., :3].permute(0, 3, 1, 2)
	g_ema.zero_grad()
	img_list.append(rgb_images_thumbs)

	utils.save_image(torch.cat(img_list, 0),
	os.path.join(opt.results_dst_dir, 'images_paper_fig','{}.png'.format(str(0).zfill(7))),
	nrow=3,
	normalize=True,
	range=(-1, 1),
	padding=0,)

	def generate_mesh(opt, g_ema, device, mean_latent, sample_z, sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals):
	latent = g_ema.styles_and_noise_forward(sample_z[:1], None, opt.truncation_ratio,
	mean_latent, False)

	sdf = g_ema.renderer.marching_cube_posed(latent[0], sample_beta, sample_theta, resolution=350, size=1.4).detach()
	marching_cubes_mesh, _, _ = extract_mesh_with_marching_cubes(sdf, level_set=0)
	marching_cubes_mesh = trimesh.smoothing.filter_humphrey(marching_cubes_mesh, beta=0.2, iterations=5)
	# marching_cubes_mesh_filename = os.path.join(opt.results_dst_dir,'marching_cubes_meshes_posed','sample_{}_marching_cubes_mesh.obj'.format(0))
	# with open(marching_cubes_mesh_filename, 'w') as f:
	# marching_cubes_mesh.export(f,file_type='obj')
	return marching_cubes_mesh

	def generate_video(opt, g_ema, device, mean_latent, sample_z, sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals):
	video_list = []
	for k in tqdm(range(120)):
	if k < 30:
	angle = (panning_angle / 2) * (k / 30)
	elif k >= 30 and k < 90:
	angle = panning_angle / 2 - panning_angle * ((k - 30) / 60)
	else:
	angle = -panning_angle / 2 * ((120 - k) / 30)
	delta = R.from_rotvec(angle * np.array([0, 1, 0]))
	r = R.from_rotvec(sample_theta[0, :3].cpu().numpy())
	new_r = delta * r
	new_sample_theta = sample_theta.clone()
	new_sample_theta[0, :3] = torch.from_numpy(new_r.as_rotvec()).to(device)
	with torch.no_grad():
	j = 0
	chunk = 1
	out = g_ema([sample_z[j:j+chunk]],
	sample_cam_extrinsics[j:j+chunk],
	sample_focals[j:j+chunk],
	sample_beta[j:j+chunk],
	new_sample_theta[j:j+chunk],
	sample_trans[j:j+chunk],
	truncation=opt.truncation_ratio,
	truncation_latent=mean_latent,
	return_eikonal=False,
	return_normal=False,
	return_mask=False,
	fix_viewdir=True)
	rgb_images_thumbs = out[1].detach().cpu()[..., :3]
	g_ema.zero_grad()
	video_list.append((rgb_images_thumbs.numpy() + 1) / 2. * 255. + 0.5)
	all_img = np.concatenate(video_list, 0).astype(np.uint8)
	imageio.mimwrite(os.path.join(opt.results_dst_dir, 'images_paper_video', 'video_{}.mp4'.format(str(0).zfill(7))), all_img, fps=30, quality=8)

	def setup():
	device='cuda' if torch.cuda.is_available() else 'cpu'
	opt = BaseOptions().parse()

	opt.training.batch = 1
	opt.training.chunk = 1
	opt.experiment.expname = '512x256_deepfashion'
	opt.dataset.dataset_path = 'demodataset'
	opt.rendering.depth = 5
	opt.rendering.width = 128
	opt.model.style_dim = 128
	opt.model.renderer_spatial_output_dim = [512, 256]
	opt.training.no_sphere_init = True
	opt.rendering.input_ch_views = 3
	opt.rendering.white_bg = True
	opt.model.voxhuman_name = 'eva3d_deepfashion'
	opt.training.deltasdf = True
	opt.rendering.N_samples = 28
	opt.experiment.ckpt = '420000'
	opt.inference.identities = 1
	opt.inference.truncation_ratio = 0.6

	opt.model.is_test = True
	opt.model.freeze_renderer = False
	opt.rendering.no_features_output = True
	opt.rendering.offset_sampling = True
	opt.rendering.static_viewdirs = True
	opt.rendering.force_background = True
	opt.rendering.perturb = 0
	opt.inference.size = opt.model.size
	opt.inference.camera = opt.camera
	opt.inference.renderer_output_size = opt.model.renderer_spatial_output_dim
	opt.inference.style_dim = opt.model.style_dim
	opt.inference.project_noise = opt.model.project_noise
	opt.inference.return_xyz = opt.rendering.return_xyz

	checkpoints_dir = os.path.join('checkpoint', opt.experiment.expname, 'volume_renderer')
	checkpoint_path = os.path.join(checkpoints_dir,
	'models_{}.pt'.format(opt.experiment.ckpt.zfill(7)))
	# define results directory name
	result_model_dir = 'iter_{}'.format(opt.experiment.ckpt.zfill(7))

	# create results directory
	results_dir_basename = os.path.join(opt.inference.results_dir, opt.experiment.expname)
	opt.inference.results_dst_dir = os.path.join(results_dir_basename, result_model_dir)
	if opt.inference.fixed_camera_angles:
	opt.inference.results_dst_dir = os.path.join(opt.inference.results_dst_dir, 'fixed_angles')
	else:
	opt.inference.results_dst_dir = os.path.join(opt.inference.results_dst_dir, 'random_angles')
	os.makedirs(opt.inference.results_dst_dir, exist_ok=True)
	os.makedirs(os.path.join(opt.inference.results_dst_dir, 'images_paper_fig'), exist_ok=True)
	os.makedirs(os.path.join(opt.inference.results_dst_dir, 'images_paper_video'), exist_ok=True)
	os.makedirs(os.path.join(opt.inference.results_dst_dir, 'marching_cubes_meshes_posed'), exist_ok=True)
	checkpoint = torch.load(checkpoint_path, map_location=lambda storage, loc: storage)

	# load generation model
	g_ema = Generator(opt.model, opt.rendering, full_pipeline=False, voxhuman_name=opt.model.voxhuman_name).to(device)
	pretrained_weights_dict = checkpoint["g_ema"]
	model_dict = g_ema.state_dict()
	for k, v in pretrained_weights_dict.items():
	if v.size() == model_dict[k].size():
	model_dict[k] = v
	else:
	print(k)

	g_ema.load_state_dict(model_dict)

	transform = transforms.Compose(
	[transforms.ToTensor(),
	transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)])

	if 'deepfashion' in opt.dataset.dataset_path:
	file_list = '/mnt/lustre/fzhong/smplify-x/deepfashion_train_list/deepfashion_train_list_MAN.txt'
	elif '20w_fashion' in opt.dataset.dataset_path:
	file_list = '/mnt/lustre/fzhong/mmhuman3d/20w_fashion_result/nondress_flist.txt'
	else:
	file_list = None
	if file_list:
	dataset = DeepFashionDataset(opt.dataset.dataset_path, transform, opt.model.size,
	opt.model.renderer_spatial_output_dim, file_list)
	else:
	dataset = DemoDataset()

	# get the mean latent vector for g_ema
	if opt.inference.truncation_ratio < 1:
	with torch.no_grad():
	mean_latent = g_ema.mean_latent(opt.inference.truncation_mean, device)
	else:
	mean_latent = None

	g_ema.renderer.is_train = False
	g_ema.renderer.perturb = 0

	# generate(opt.inference, dataset, g_ema, device, mean_latent, opt.rendering.render_video)

	sample_trans, sample_beta, sample_theta = dataset.sample_smpl_param(1, device, val=False)
	sample_cam_extrinsics, sample_focals = dataset.get_camera_extrinsics(1, device, val=False)

	torch.randn(1, opt.inference.style_dim, device=device)

	return opt.inference, g_ema, device, mean_latent, torch.randn(1, opt.inference.style_dim, device=device), \
	sample_trans, sample_beta, sample_theta, sample_cam_extrinsics, sample_focals

	import gradio as gr
	import plotly.graph_objects as go
	from PIL import Image

	setup_list = None

	def get_video():
	global setup_list
	if setup_list is None:
	setup_list = list(setup())
	generate_video(*setup_list)
	torch.cuda.empty_cache()
	path = 'evaluations/512x256_deepfashion/iter_0420000/random_angles/images_paper_video/video_0000000.mp4'
	return path

	def get_mesh():
	global setup_list
	if setup_list is None:
	setup_list = list(setup())
	setup_list[4] = torch.randn(1, setup_list[0].style_dim, device=setup_list[2])
	generate_rgb(*setup_list)
	mesh = generate_mesh(*setup_list)
	torch.cuda.empty_cache()

	x=np.asarray(mesh.vertices).T[0]
	y=np.asarray(mesh.vertices).T[1]
	z=np.asarray(mesh.vertices).T[2]

	i=np.asarray(mesh.faces).T[0]
	j=np.asarray(mesh.faces).T[1]
	k=np.asarray(mesh.faces).T[2]
	fig = go.Figure(go.Mesh3d(x=x, y=y, z=z,
	i=i, j=j, k=k,
	color="lightpink",
	# flatshading=True,
	lighting=dict(ambient=0.5,
	diffuse=1,
	fresnel=4,
	specular=0.5,
	roughness=0.05,
	facenormalsepsilon=0,
	vertexnormalsepsilon=0),))
	# lightposition=dict(x=100,
	# y=100,
	# z=1000)))
	path='evaluations/512x256_deepfashion/iter_0420000/random_angles/images_paper_fig/0000000.png'

	image=Image.open(path)

	return fig,image

	markdown=f'''
	# EVA3D: Compositional 3D Human Generation from 2D Image Collections

	Authored by Fangzhou Hong, Zhaoxi Chen, Yushi Lan, Liang Pan, Ziwei Liu

	The space demo for the ICLR 2023 Spotlight paper "EVA3D: Compositional 3D Human Generation from 2D Image Collections".

	### Useful links:
	- [Official Github Repo](https://github.com/hongfz16/EVA3D)
	- [Project Page](https://hongfz16.github.io/projects/EVA3D.html)
	- [arXiv Link](https://arxiv.org/abs/2210.04888)

	Licensed under the S-Lab License.

	First use button "Generate RGB & Mesh" to randomly sample a 3D human. Then push button "Generate Video" to generate a panning video of the generated human.
	'''

	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	gr.Markdown(markdown)
	with gr.Column():
	with gr.Row():
	with gr.Column():
	image=gr.Image(type="pil",shape=(512,256*3))
	with gr.Row():
	with gr.Column():
	mesh = gr.Plot()
	with gr.Column():
	video=gr.Video()
	# with gr.Row():
	# numberoframes = gr.Slider( minimum=30, maximum=250,label='Number Of Frame For Video Generation')
	# model_name=gr.Dropdown(choices=["ffhq","afhq"],label="Choose Model Type")
	# mesh_type=gr.Dropdown(choices=["DepthMesh","Marching Cubes"],label="Choose Mesh Type")
	with gr.Row():
	btn = gr.Button(value="Generate RGB & Mesh")
	btn_2=gr.Button(value="Generate Video")

	btn.click(get_mesh,[],[mesh,image])
	btn_2.click(get_video,[],[video])

	demo.launch()