Spaces:
Running
on
L40S
Running
on
L40S
# Open Source Model Licensed under the Apache License Version 2.0 | |
# and Other Licenses of the Third-Party Components therein: | |
# The below Model in this distribution may have been modified by THL A29 Limited | |
# ("Tencent Modifications"). All Tencent Modifications are Copyright (C) 2024 THL A29 Limited. | |
# Copyright (C) 2024 THL A29 Limited, a Tencent company. All rights reserved. | |
# The below software and/or models in this distribution may have been | |
# modified by THL A29 Limited ("Tencent Modifications"). | |
# All Tencent Modifications are Copyright (C) THL A29 Limited. | |
# Hunyuan 3D is licensed under the TENCENT HUNYUAN NON-COMMERCIAL LICENSE AGREEMENT | |
# except for the third-party components listed below. | |
# Hunyuan 3D does not impose any additional limitations beyond what is outlined | |
# in the repsective licenses of these third-party components. | |
# Users must comply with all terms and conditions of original licenses of these third-party | |
# components and must ensure that the usage of the third party components adheres to | |
# all relevant laws and regulations. | |
# For avoidance of doubts, Hunyuan 3D means the large language models and | |
# their software and algorithms, including trained model weights, parameters (including | |
# optimizer states), machine-learning model code, inference-enabling code, training-enabling code, | |
# fine-tuning enabling code and other elements of the foregoing made publicly available | |
# by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT. | |
import spaces | |
import os | |
import warnings | |
import argparse | |
import gradio as gr | |
from glob import glob | |
import shutil | |
import torch | |
import numpy as np | |
from PIL import Image | |
from einops import rearrange | |
import pandas as pd | |
from huggingface_hub import snapshot_download | |
import sys | |
import subprocess | |
from glob import glob | |
# @spaces.GPU | |
# def check_env(): | |
# print(glob("/usr/local/cuda/*")) | |
# print(torch.cuda.is_available()) | |
# print(os.environ.get('CUDA_HOME', None)) | |
# os.environ['CUDA_HOME'] = '/usr/local/cuda' | |
# # Optionally, update PATH and LD_LIBRARY_PATH if needed | |
# os.environ['PATH'] = os.environ['CUDA_HOME'] + '/bin:' + os.environ['PATH'] | |
# os.environ['LD_LIBRARY_PATH'] = os.environ['CUDA_HOME'] + '/lib64:' + os.environ.get('LD_LIBRARY_PATH', '') | |
# check_env() | |
def install_cuda_toolkit(): | |
# CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run" | |
CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run" | |
CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL) | |
subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE]) | |
subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE]) | |
subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"]) | |
os.environ["CUDA_HOME"] = "/usr/local/cuda" | |
os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"]) | |
os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % ( | |
os.environ["CUDA_HOME"], | |
"" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"], | |
) | |
# Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range | |
os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6" | |
install_cuda_toolkit() | |
from infer import seed_everything, save_gif | |
from infer import Text2Image, Removebg, Image2Views, Views2Mesh, GifRenderer | |
from third_party.check import check_bake_available | |
warnings.simplefilter('ignore', category=UserWarning) | |
warnings.simplefilter('ignore', category=FutureWarning) | |
warnings.simplefilter('ignore', category=DeprecationWarning) | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--use_lite", default=False, action="store_true") | |
parser.add_argument("--mv23d_cfg_path", default="./svrm/configs/svrm.yaml", type=str) | |
parser.add_argument("--mv23d_ckt_path", default="weights/svrm/svrm.safetensors", type=str) | |
parser.add_argument("--text2image_path", default="weights/hunyuanDiT", type=str) | |
parser.add_argument("--save_memory", default=False, action="store_true") | |
parser.add_argument("--device", default="cuda:0", type=str) | |
args = parser.parse_args() | |
def download_models(): | |
os.makedirs("weights", exist_ok=True) | |
os.makedirs("weights/hunyuanDiT", exist_ok=True) | |
os.makedirs("third_party/weights/DUSt3R_ViTLarge_BaseDecoder_512_dpt", exist_ok=True) | |
try: | |
snapshot_download( | |
repo_id="tencent/Hunyuan3D-1", | |
local_dir="./weights", | |
resume_download=True | |
) | |
print("Successfully downloaded Hunyuan3D-1 model") | |
except Exception as e: | |
print(f"Error downloading Hunyuan3D-1: {e}") | |
try: | |
snapshot_download( | |
repo_id="Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers-Distilled", | |
local_dir="./weights/hunyuanDiT", | |
resume_download=True | |
) | |
print("Successfully downloaded HunyuanDiT model") | |
except Exception as e: | |
print(f"Error downloading HunyuanDiT: {e}") | |
try: | |
snapshot_download( | |
repo_id="naver/DUSt3R_ViTLarge_BaseDecoder_512_dpt", | |
local_dir="./third_party/weights/DUSt3R_ViTLarge_BaseDecoder_512_dpt", | |
resume_download=True | |
) | |
print("Successfully downloaded DUSt3R model") | |
except Exception as e: | |
print(f"Error downloading DUSt3R: {e}") | |
download_models() | |
try: | |
from third_party.mesh_baker import MeshBaker | |
assert check_bake_available() | |
BAKE_AVAILEBLE = True | |
except Exception as err: | |
print(err) | |
print("import baking related files fail, running without baking") | |
BAKE_AVAILEBLE = False | |
################################################################ | |
# initial setting | |
################################################################ | |
CONST_HEADER = ''' | |
<h2><a href='https://github.com/tencent/Hunyuan3D-1' target='_blank'><b>Tencent Hunyuan3D-1.0: A Unified Framework for Text-to-3D and Image-to-3D Generation</b></a></h2> | |
⭐️Technical report: <a href='https://arxiv.org/pdf/2411.02293' target='_blank'>ArXiv</a>. ⭐️Code: <a href='https://github.com/tencent/Hunyuan3D-1' target='_blank'>GitHub</a>. | |
''' | |
CONST_NOTE = ''' | |
❗️❗️❗️Usage❗️❗️❗️<br> | |
Limited by format, the model can only export *.obj mesh with vertex colors. The "texture" mod can only work on *.glb.<br> | |
Please click "Do Rendering" to export a GIF.<br> | |
You can click "Do Baking" to bake multi-view imgaes onto the shape.<br> | |
If the results aren't satisfactory, please try a different radnom seed (default is 0). | |
''' | |
################################################################ | |
# prepare text examples and image examples | |
################################################################ | |
def get_example_img_list(): | |
print('Loading example img list ...') | |
return sorted(glob('./demos/example_*.png')) | |
def get_example_txt_list(): | |
print('Loading example txt list ...') | |
txt_list = list() | |
for line in open('./demos/example_list.txt'): | |
txt_list.append(line.strip()) | |
return txt_list | |
example_is = get_example_img_list() | |
example_ts = get_example_txt_list() | |
################################################################ | |
# initial models | |
################################################################ | |
print(f"loading {args.text2image_path}") | |
worker_t2i = Text2Image( | |
pretrain = args.text2image_path, | |
device = args.device, | |
save_memory = args.save_memory | |
) | |
worker_xbg = Removebg() | |
worker_i2v = Image2Views( | |
use_lite = args.use_lite, | |
device = args.device, | |
save_memory = args.save_memory | |
) | |
worker_v23 = Views2Mesh( | |
args.mv23d_cfg_path, | |
args.mv23d_ckt_path, | |
use_lite = args.use_lite, | |
device = args.device, | |
save_memory = args.save_memory | |
) | |
worker_gif = GifRenderer(device=args.device) | |
if BAKE_AVAILEBLE: | |
worker_baker = MeshBaker(device=args.device) | |
### functional modules | |
def stage_0_t2i(text, image, seed, step): | |
os.makedirs('./outputs/app_output', exist_ok=True) | |
exists = set(int(_) for _ in os.listdir('./outputs/app_output') if not _.startswith(".")) | |
if len(exists) == 30: shutil.rmtree(f"./outputs/app_output/0");cur_id = 0 | |
else: cur_id = min(set(range(30)) - exists) | |
if os.path.exists(f"./outputs/app_output/{(cur_id + 1) % 30}"): | |
shutil.rmtree(f"./outputs/app_output/{(cur_id + 1) % 30}") | |
save_folder = f'./outputs/app_output/{cur_id}' | |
os.makedirs(save_folder, exist_ok=True) | |
dst = save_folder + '/img.png' | |
if not text: | |
if image is None: | |
return dst, save_folder | |
raise gr.Error("Upload image or provide text ...") | |
image.save(dst) | |
return dst, save_folder | |
image = worker_t2i(text, seed, step) | |
image.save(dst) | |
dst = worker_xbg(image, save_folder) | |
return dst, save_folder | |
def stage_1_xbg(image, save_folder, force_remove): | |
if isinstance(image, str): | |
image = Image.open(image) | |
dst = save_folder + '/img_nobg.png' | |
rgba = worker_xbg(image, force=force_remove) | |
rgba.save(dst) | |
return dst | |
def stage_2_i2v(image, seed, step, save_folder): | |
if isinstance(image, str): | |
image = Image.open(image) | |
gif_dst = save_folder + '/views.gif' | |
res_img, pils = worker_i2v(image, seed, step) | |
save_gif(pils, gif_dst) | |
views_img, cond_img = res_img[0], res_img[1] | |
img_array = np.asarray(views_img, dtype=np.uint8) | |
show_img = rearrange(img_array, '(n h) (m w) c -> (n m) h w c', n=3, m=2) | |
show_img = show_img[worker_i2v.order, ...] | |
show_img = rearrange(show_img, '(n m) h w c -> (n h) (m w) c', n=2, m=3) | |
show_img = Image.fromarray(show_img) | |
return views_img, cond_img, show_img | |
def stage_3_v23( | |
views_pil, | |
cond_pil, | |
seed, | |
save_folder, | |
target_face_count = 30000, | |
texture_color = 'texture' | |
): | |
do_texture_mapping = texture_color == 'texture' | |
worker_v23( | |
views_pil, | |
cond_pil, | |
seed = seed, | |
save_folder = save_folder, | |
target_face_count = target_face_count, | |
do_texture_mapping = do_texture_mapping | |
) | |
glb_dst = save_folder + '/mesh.glb' if do_texture_mapping else None | |
obj_dst = save_folder + '/mesh.obj' | |
obj_dst = save_folder + '/mesh_vertex_colors.obj' # gradio just only can show vertex shading | |
return obj_dst, glb_dst | |
def stage_3p_baking(save_folder, color, bake): | |
if color == "texture" and bake: | |
obj_dst = worker_baker(save_folder) | |
glb_dst = obj_dst.replace(".obj", ".glb") | |
return glb_dst | |
else: | |
return None | |
def stage_4_gif(save_folder, color, bake, render): | |
if not render: return None | |
if os.path.exists(save_folder + '/view_1/bake/mesh.obj'): | |
obj_dst = save_folder + '/view_1/bake/mesh.obj' | |
elif os.path.exists(save_folder + '/view_0/bake/mesh.obj'): | |
obj_dst = save_folder + '/view_0/bake/mesh.obj' | |
elif os.path.exists(save_folder + '/mesh.obj'): | |
obj_dst = save_folder + '/mesh.obj' | |
else: | |
print(save_folder) | |
raise FileNotFoundError("mesh obj file not found") | |
gif_dst = obj_dst.replace(".obj", ".gif") | |
worker_gif(obj_dst, gif_dst_path=gif_dst) | |
return gif_dst | |
def check_image_available(image): | |
if image.mode == "RGBA": | |
data = np.array(image) | |
alpha_channel = data[:, :, 3] | |
unique_alpha_values = np.unique(alpha_channel) | |
if len(unique_alpha_values) == 1: | |
msg = "The alpha channel is missing or invalid. The background removal option is selected for you." | |
return msg, gr.update(value=True, interactive=False) | |
else: | |
msg = "The image has four channels, and you can choose to remove the background or not." | |
return msg, gr.update(value=False, interactive=True) | |
elif image.mode == "RGB": | |
msg = "The alpha channel is missing or invalid. The background removal option is selected for you." | |
return msg, gr.update(value=True, interactive=False) | |
else: | |
raise Exception("Image Error") | |
def update_bake_render(color): | |
if color == "vertex": | |
return gr.update(value=False, interactive=False), gr.update(value=False, interactive=False) | |
else: | |
return gr.update(interactive=True), gr.update(interactive=True) | |
# =============================================================== | |
# gradio display | |
# =============================================================== | |
with gr.Blocks() as demo: | |
gr.Markdown(CONST_HEADER) | |
with gr.Row(variant="panel"): | |
###### Input region | |
with gr.Column(scale=2): | |
### Text iutput region | |
with gr.Tab("Text to 3D"): | |
with gr.Column(): | |
text = gr.TextArea('一只黑白相间的熊猫在白色背景上居中坐着,呈现出卡通风格和可爱氛围。', | |
lines=3, max_lines=20, label='Input text') | |
with gr.Row(): | |
textgen_color = gr.Radio(choices=["vertex", "texture"], label="Color", value="texture") | |
with gr.Row(): | |
textgen_render = gr.Checkbox(label="Do Rendering", value=True, interactive=True) | |
if BAKE_AVAILEBLE: | |
textgen_bake = gr.Checkbox(label="Do Baking", value=True, interactive=True) | |
else: | |
textgen_bake = gr.Checkbox(label="Do Baking", value=False, interactive=False) | |
textgen_color.change(fn=update_bake_render, inputs=textgen_color, outputs=[textgen_bake, textgen_render]) | |
with gr.Row(): | |
textgen_seed = gr.Number(value=0, label="T2I seed", precision=0, interactive=True) | |
textgen_step = gr.Number(value=25, label="T2I steps", precision=0, | |
minimum=10, maximum=50, interactive=True) | |
textgen_SEED = gr.Number(value=0, label="Gen seed", precision=0, interactive=True) | |
textgen_STEP = gr.Number(value=50, label="Gen steps", precision=0, | |
minimum=40, maximum=100, interactive=True) | |
textgen_max_faces = gr.Number(value=90000, label="Face number", precision=0, | |
minimum=5000, maximum=1000000, interactive=True) | |
with gr.Row(): | |
textgen_submit = gr.Button("Generate", variant="primary") | |
with gr.Row(): | |
gr.Examples(examples=example_ts, inputs=[text], label="Text examples", examples_per_page=10) | |
### Image iutput region | |
with gr.Tab("Image to 3D"): | |
with gr.Row(): | |
input_image = gr.Image(label="Input image", width=256, height=256, type="pil", | |
image_mode="RGBA", sources="upload", interactive=True) | |
with gr.Row(): | |
alert_message = gr.Markdown("") # for warning | |
with gr.Row(): | |
imggen_color = gr.Radio(choices=["vertex", "texture"], label="Color", value="texture") | |
with gr.Row(): | |
imggen_removebg = gr.Checkbox(label="Remove Background", value=True, interactive=True) | |
imggen_render = gr.Checkbox(label="Do Rendering", value=True, interactive=True) | |
if BAKE_AVAILEBLE: | |
imggen_bake = gr.Checkbox(label="Do Baking", value=True, interactive=True) | |
else: | |
imggen_bake = gr.Checkbox(label="Do Baking", value=False, interactive=False) | |
input_image.change(fn=check_image_available, inputs=input_image, outputs=[alert_message, imggen_removebg]) | |
imggen_color.change(fn=update_bake_render, inputs=imggen_color, outputs=[imggen_bake, imggen_render]) | |
with gr.Row(): | |
imggen_SEED = gr.Number(value=0, label="Gen seed", precision=0, interactive=True) | |
imggen_STEP = gr.Number(value=50, label="Gen steps", precision=0, | |
minimum=40, maximum=100, interactive=True) | |
imggen_max_faces = gr.Number(value=90000, label="Face number", precision=0, | |
minimum=5000, maximum=1000000, interactive=True) | |
with gr.Row(): | |
imggen_submit = gr.Button("Generate", variant="primary") | |
with gr.Row(): | |
gr.Examples(examples=example_is, inputs=[input_image], | |
label="Img examples", examples_per_page=10) | |
gr.Markdown(CONST_NOTE) | |
###### Output region | |
with gr.Column(scale=3): | |
with gr.Row(): | |
with gr.Column(scale=2): | |
rem_bg_image = gr.Image( | |
label="Image without background", | |
type="pil", | |
image_mode="RGBA", | |
interactive=False | |
) | |
with gr.Column(scale=3): | |
result_image = gr.Image( | |
label="Multi-view images", | |
type="pil", | |
interactive=False | |
) | |
with gr.Row(): | |
result_3dobj = gr.Model3D( | |
clear_color=[0.0, 0.0, 0.0, 0.0], | |
label="OBJ vertex color", | |
show_label=True, | |
visible=True, | |
camera_position=[90, 90, None], | |
interactive=False | |
) | |
result_gif = gr.Image(label="GIF", interactive=False) | |
with gr.Row(): | |
result_3dglb_texture = gr.Model3D( | |
clear_color=[0.0, 0.0, 0.0, 0.0], | |
label="GLB texture color", | |
show_label=True, | |
visible=True, | |
camera_position=[90, 90, None], | |
interactive=False) | |
result_3dglb_baked = gr.Model3D( | |
clear_color=[0.0, 0.0, 0.0, 0.0], | |
label="GLB baked color", | |
show_label=True, | |
visible=True, | |
camera_position=[90, 90, None], | |
interactive=False) | |
with gr.Row(): | |
gr.Markdown( | |
"Due to Gradio limitations, OBJ files are displayed with vertex shading only, " | |
"while GLB files can be viewed with texture shading. <br>For the best experience, " | |
"we recommend downloading the GLB files and opening them with 3D software " | |
"like Blender or MeshLab." | |
) | |
#=============================================================== | |
# gradio running code | |
#=============================================================== | |
none = gr.State(None) | |
save_folder = gr.State() | |
cond_image = gr.State() | |
views_image = gr.State() | |
text_image = gr.State() | |
textgen_submit.click( | |
fn=stage_0_t2i, | |
inputs=[text, none, textgen_seed, textgen_step], | |
outputs=[rem_bg_image, save_folder], | |
).success( | |
fn=stage_2_i2v, | |
inputs=[rem_bg_image, textgen_SEED, textgen_STEP, save_folder], | |
outputs=[views_image, cond_image, result_image], | |
).success( | |
fn=stage_3_v23, | |
inputs=[views_image, cond_image, textgen_SEED, save_folder, textgen_max_faces, textgen_color], | |
outputs=[result_3dobj, result_3dglb_texture], | |
).success( | |
fn=stage_3p_baking, | |
inputs=[save_folder, textgen_color, textgen_bake], | |
outputs=[result_3dglb_baked], | |
).success( | |
fn=stage_4_gif, | |
inputs=[save_folder, textgen_color, textgen_bake, textgen_render], | |
outputs=[result_gif], | |
).success(lambda: print('Text_to_3D Done ...')) | |
imggen_submit.click( | |
fn=stage_0_t2i, | |
inputs=[none, input_image, textgen_seed, textgen_step], | |
outputs=[text_image, save_folder], | |
).success( | |
fn=stage_1_xbg, | |
inputs=[text_image, save_folder, imggen_removebg], | |
outputs=[rem_bg_image], | |
).success( | |
fn=stage_2_i2v, | |
inputs=[rem_bg_image, imggen_SEED, imggen_STEP, save_folder], | |
outputs=[views_image, cond_image, result_image], | |
).success( | |
fn=stage_3_v23, | |
inputs=[views_image, cond_image, imggen_SEED, save_folder, imggen_max_faces, imggen_color], | |
outputs=[result_3dobj, result_3dglb_texture], | |
).success( | |
fn=stage_3p_baking, | |
inputs=[save_folder, imggen_color, imggen_bake], | |
outputs=[result_3dglb_baked], | |
).success( | |
fn=stage_4_gif, | |
inputs=[save_folder, imggen_color, imggen_bake, imggen_render], | |
outputs=[result_gif], | |
).success(lambda: print('Image_to_3D Done ...')) | |
#=============================================================== | |
# start gradio server | |
#=============================================================== | |
demo.queue() | |
demo.launch() | |