import gradio as gr import spaces from gradio_litmodel3d import LitModel3D import os import time from os import path import shutil from datetime import datetime from safetensors.torch import load_file from huggingface_hub import hf_hub_download import torch import numpy as np import imageio import uuid from easydict import EasyDict as edict from PIL import Image from trellis.pipelines import TrellisImageTo3DPipeline from trellis.representations import Gaussian, MeshExtractResult from trellis.utils import render_utils, postprocessing_utils from diffusers import FluxPipeline from transformers import pipeline # Hugging Face 토큰 설정 HF_TOKEN = os.getenv("HF_TOKEN") if HF_TOKEN is None: raise ValueError("HF_TOKEN environment variable is not set") MAX_SEED = np.iinfo(np.int32).max TMP_DIR = "/tmp/Trellis-demo" os.makedirs(TMP_DIR, exist_ok=True) # Setup and initialization code cache_path = path.join(path.dirname(path.abspath(__file__)), "models") PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", ".") gallery_path = path.join(PERSISTENT_DIR, "gallery") os.environ["TRANSFORMERS_CACHE"] = cache_path os.environ["HF_HUB_CACHE"] = cache_path os.environ["HF_HOME"] = cache_path os.environ['SPCONV_ALGO'] = 'native' torch.backends.cuda.matmul.allow_tf32 = True # 번역기 초기화 translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en") class timer: def __init__(self, method_name="timed process"): self.method = method_name def __enter__(self): self.start = time.time() print(f"{self.method} starts") def __exit__(self, exc_type, exc_val, exc_tb): end = time.time() print(f"{self.method} took {str(round(end - self.start, 2))}s") def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]: trial_id = str(uuid.uuid4()) processed_image = pipeline.preprocess_image(image) processed_image.save(f"{TMP_DIR}/{trial_id}.png") return trial_id, processed_image def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict: return { 'gaussian': { **gs.init_params, '_xyz': gs._xyz.cpu().numpy(), '_features_dc': gs._features_dc.cpu().numpy(), '_scaling': gs._scaling.cpu().numpy(), '_rotation': gs._rotation.cpu().numpy(), '_opacity': gs._opacity.cpu().numpy(), }, 'mesh': { 'vertices': mesh.vertices.cpu().numpy(), 'faces': mesh.faces.cpu().numpy(), }, 'trial_id': trial_id, } def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]: gs = Gaussian( aabb=state['gaussian']['aabb'], sh_degree=state['gaussian']['sh_degree'], mininum_kernel_size=state['gaussian']['mininum_kernel_size'], scaling_bias=state['gaussian']['scaling_bias'], opacity_bias=state['gaussian']['opacity_bias'], scaling_activation=state['gaussian']['scaling_activation'], ) gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda') gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda') gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda') gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda') gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda') mesh = edict( vertices=torch.tensor(state['mesh']['vertices'], device='cuda'), faces=torch.tensor(state['mesh']['faces'], device='cuda'), ) return gs, mesh, state['trial_id'] @spaces.GPU def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float, ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int) -> Tuple[dict, str]: if randomize_seed: seed = np.random.randint(0, MAX_SEED) outputs = pipeline.run( Image.open(f"{TMP_DIR}/{trial_id}.png"), seed=seed, formats=["gaussian", "mesh"], preprocess_image=False, sparse_structure_sampler_params={ "steps": ss_sampling_steps, "cfg_strength": ss_guidance_strength, }, slat_sampler_params={ "steps": slat_sampling_steps, "cfg_strength": slat_guidance_strength, }, ) video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color'] video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal'] video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))] trial_id = uuid.uuid4() video_path = f"{TMP_DIR}/{trial_id}.mp4" os.makedirs(os.path.dirname(video_path), exist_ok=True) imageio.mimsave(video_path, video, fps=15) state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id) return state, video_path @spaces.GPU def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]: gs, mesh, trial_id = unpack_state(state) glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False) glb_path = f"{TMP_DIR}/{trial_id}.glb" glb.export(glb_path) return glb_path, glb_path def activate_button() -> gr.Button: return gr.Button(interactive=True) def deactivate_button() -> gr.Button: return gr.Button(interactive=False) @spaces.GPU def text_to_image(prompt: str, height: int, width: int, steps: int, scales: float, seed: int) -> Image.Image: # 한글 감지 및 번역 def contains_korean(text): return any(ord('가') <= ord(c) <= ord('힣') for c in text) # 프롬프트 전처리 if contains_korean(prompt): translated = translator(prompt)[0]['translation_text'] prompt = translated # 프롬프트 형식 강제 formatted_prompt = f"wbgmsst, 3D, {prompt}, white background" with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16): try: generated_image = pipe( prompt=[formatted_prompt], generator=torch.Generator().manual_seed(int(seed)), num_inference_steps=int(steps), guidance_scale=float(scales), height=int(height), width=int(width), max_sequence_length=256 ).images[0] trial_id = str(uuid.uuid4()) generated_image.save(f"{TMP_DIR}/{trial_id}.png") return generated_image except Exception as e: print(f"Error in image generation: {str(e)}") return None # Gradio Interface with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("""## Craft3D""") with gr.Row(): with gr.Column(): text_prompt = gr.Textbox( label="Text Prompt", placeholder="Describe what you want to create...", lines=3 ) with gr.Accordion("Image Generation Settings", open=False): with gr.Row(): height = gr.Slider( label="Height", minimum=256, maximum=1152, step=64, value=1024 ) width = gr.Slider( label="Width", minimum=256, maximum=1152, step=64, value=1024 ) with gr.Row(): steps = gr.Slider( label="Inference Steps", minimum=6, maximum=25, step=1, value=8 ) scales = gr.Slider( label="Guidance Scale", minimum=0.0, maximum=5.0, step=0.1, value=3.5 ) seed = gr.Number( label="Seed", value=lambda: torch.randint(0, MAX_SEED, (1,)).item(), precision=0 ) randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) generate_image_btn = gr.Button("Generate Image") image_prompt = gr.Image(label="Image Prompt", image_mode="RGBA", type="pil", height=300) with gr.Accordion("3D Generation Settings", open=False): ss_guidance_strength = gr.Slider(0.0, 10.0, label="Structure Guidance Strength", value=7.5, step=0.1) ss_sampling_steps = gr.Slider(1, 50, label="Structure Sampling Steps", value=12, step=1) slat_guidance_strength = gr.Slider(0.0, 10.0, label="Latent Guidance Strength", value=3.0, step=0.1) slat_sampling_steps = gr.Slider(1, 50, label="Latent Sampling Steps", value=12, step=1) generate_3d_btn = gr.Button("Generate 3D") with gr.Accordion("GLB Extraction Settings", open=False): mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01) texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512) extract_glb_btn = gr.Button("Extract GLB", interactive=False) with gr.Column(): video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300) model_output = LitModel3D(label="Extracted GLB", exposure=20.0, height=300) download_glb = gr.DownloadButton(label="Download GLB", interactive=False) trial_id = gr.Textbox(visible=False) output_buf = gr.State() # Handlers generate_image_btn.click( text_to_image, inputs=[text_prompt, height, width, steps, scales, seed], outputs=[image_prompt] ).then( preprocess_image, inputs=[image_prompt], outputs=[trial_id, image_prompt] ) # 나머지 핸들러들 image_prompt.upload( preprocess_image, inputs=[image_prompt], outputs=[trial_id, image_prompt], ) image_prompt.clear( lambda: '', outputs=[trial_id], ) generate_3d_btn.click( image_to_3d, inputs=[trial_id, seed, randomize_seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps], outputs=[output_buf, video_output], ).then( activate_button, outputs=[extract_glb_btn], ) video_output.clear( deactivate_button, outputs=[extract_glb_btn], ) extract_glb_btn.click( extract_glb, inputs=[output_buf, mesh_simplify, texture_size], outputs=[model_output, download_glb], ).then( activate_button, outputs=[download_glb], ) model_output.clear( deactivate_button, outputs=[download_glb], ) if __name__ == "__main__": # 3D 생성 파이프라인 pipeline = TrellisImageTo3DPipeline.from_pretrained( "JeffreyXiang/TRELLIS-image-large", use_auth_token=HF_TOKEN ) pipeline.cuda() # 이미지 생성 파이프라인 pipe = FluxPipeline.from_pretrained( "black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16, use_auth_token=HF_TOKEN ) # Hyper-SD LoRA 로드 pipe.load_lora_weights( hf_hub_download( "ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors", use_auth_token=HF_TOKEN ) ) pipe.fuse_lora(lora_scale=0.125) pipe.to(device="cuda", dtype=torch.bfloat16) try: pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8))) except: pass demo.launch(allowed_paths=[PERSISTENT_DIR])