SORA-3D

Paused

App Files Files Community

aiqtech commited on Dec 8, 2024

Commit

a135ad5

verified ·

1 Parent(s): 3c5364f

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -90

app.py CHANGED Viewed

@@ -2,8 +2,6 @@ import gradio as gr
 import spaces
 from gradio_litmodel3d import LitModel3D
 import os
-os.environ['SPCONV_ALGO'] = 'native'
-from typing import *
 import torch
 import numpy as np
 import imageio
@@ -15,6 +13,11 @@ from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
 from transformers import pipeline as translation_pipeline
 from diffusers import FluxPipeline
 MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = "/tmp/Trellis-demo"
@@ -24,37 +27,27 @@ def initialize_models():
     global pipeline, translator, flux_pipe
     try:
-        # GPU 메모리 초기화
-        torch.cuda.empty_cache()
-        # GPU 사용 가능 여부 확인
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        # Trellis 파이프라인 초기화
         pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
-        pipeline.to(device)
-        # 번역기 초기화
         translator = translation_pipeline(
             "translation",
             model="Helsinki-NLP/opus-mt-ko-en",
-            device=0 if device=="cuda" else -1
         )
-        # Flux 파이프라인 초기화
         flux_pipe = FluxPipeline.from_pretrained(
             "black-forest-labs/FLUX.1-dev",
-            torch_dtype=torch.float16 if device=="cuda" else torch.float32
         )
-        if device == "cuda":
-            flux_pipe.enable_model_cpu_offload()
         return True
     except Exception as e:
         print(f"Model initialization error: {str(e)}")
-        torch.cuda.empty_cache()
         return False
 def translate_if_korean(text):
@@ -63,11 +56,25 @@ def translate_if_korean(text):
         return translated
     return text
 def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
-    trial_id = str(uuid.uuid4())
-    processed_image = pipeline.preprocess_image(image)
-    processed_image.save(f"{TMP_DIR}/{trial_id}.png")
-    return trial_id, processed_image
 def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict:
     return {
@@ -86,7 +93,6 @@ def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict:
         'trial_id': trial_id,
     }
 def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     gs = Gaussian(
         aabb=state['gaussian']['aabb'],
@@ -113,31 +119,32 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
 def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
                 ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
     try:
-        torch.cuda.empty_cache()
         if randomize_seed:
             seed = np.random.randint(0, MAX_SEED)
         input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
-        with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
-            with torch.no_grad():
-                outputs = pipeline.run(
-                    input_image,
-                    seed=seed,
-                    formats=["gaussian", "mesh"],
-                    preprocess_image=False,
-                    sparse_structure_sampler_params={
-                        "steps": ss_sampling_steps,
-                        "cfg_strength": ss_guidance_strength,
-                    },
-                    slat_sampler_params={
-                        "steps": slat_sampling_steps,
-                        "cfg_strength": slat_guidance_strength,
-                    }
-                )
-        # 비디오 렌더링
         video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
         video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
         video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
@@ -149,37 +156,51 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
         state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
         return state, video_path
     except Exception as e:
         print(f"Error in image_to_3d: {str(e)}")
-        torch.cuda.empty_cache()
         raise e
 @spaces.GPU
 def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
-    # 기본 프롬프트를 추가
-    base_prompt = "wbgmsst, 3D, white background"
-    # 사용자 프롬프트를 번역 (한국어인 경우)
-    translated_prompt = translate_if_korean(prompt)
-    # 최종 프롬프트 조합
-    final_prompt = f"{translated_prompt}, {base_prompt}"
-    with torch.inference_mode():
-        image = flux_pipe(
-            prompt=[final_prompt],
-            height=height,
-            width=width,
-            guidance_scale=guidance_scale,
-            num_inference_steps=num_steps
-        ).images[0]
         return image
 @spaces.GPU
 def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
@@ -195,14 +216,13 @@ def activate_button() -> gr.Button:
 def deactivate_button() -> gr.Button:
     return gr.Button(interactive=False)
 css = """
 footer {
     visibility: hidden;
 }
 """
 with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     gr.Markdown("""
     # Craft3D : 3D Asset Creation & Text-to-Image Generation
@@ -278,7 +298,7 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
             examples_per_page=64,
         )
-# Handlers
     image_prompt.upload(
         preprocess_image,
         inputs=[image_prompt],
@@ -292,59 +312,48 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     generate_btn.click(
         image_to_3d,
-        inputs=[trial_id, seed, randomize_seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
         outputs=[output_buf, video_output],
     ).then(
         activate_button,
-        outputs=[extract_glb_btn],
-    )
-    video_output.clear(
-        deactivate_button,
-        outputs=[extract_glb_btn],
     )
     extract_glb_btn.click(
         extract_glb,
         inputs=[output_buf, mesh_simplify, texture_size],
         outputs=[model_output, download_glb],
     ).then(
         activate_button,
-        outputs=[download_glb],
     )
-    model_output.clear(
-        deactivate_button,
-        outputs=[download_glb],
-    )
-    # Text to Image 핸들러
     generate_txt2img_btn.click(
         generate_image_from_text,
         inputs=[text_prompt, txt2img_height, txt2img_width, guidance_scale, num_steps],
-        outputs=[txt2img_output]
     )
 if __name__ == "__main__":
-    # 초기 GPU 메모리 정리
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-    # 모델 초기화 확인
     if not initialize_models():
         print("Failed to initialize models")
         exit(1)
     try:
         # rembg 사전 로드 시도
-        test_image = Image.fromarray(np.zeros((256, 256, 3), dtype=np.uint8))
         pipeline.preprocess_image(test_image)
     except Exception as e:
         print(f"Warning: Failed to preload rembg: {str(e)}")
     # Gradio 앱 실행
-    demo.queue(concurrency_count=1).launch(
         share=True,
-        enable_queue=True,
-        max_threads=1
     )

 import spaces
 from gradio_litmodel3d import LitModel3D
 import os
 import torch
 import numpy as np
 import imageio
 from trellis.utils import render_utils, postprocessing_utils
 from transformers import pipeline as translation_pipeline
 from diffusers import FluxPipeline
+from typing import *
+# 환경 변수 설정
+os.environ['SPCONV_ALGO'] = 'native'
+os.environ['WARP_USE_CPU'] = '1'  # Warp를 CPU 모드로 강제
 MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = "/tmp/Trellis-demo"
     global pipeline, translator, flux_pipe
     try:
+        # Trellis 파이프라인 초기화 (CPU 모드로)
         pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
+        # 번역기 초기화 (CPU 모드로)
         translator = translation_pipeline(
             "translation",
             model="Helsinki-NLP/opus-mt-ko-en",
+            device=-1
         )
+        # Flux 파이프라인 초기화 (CPU 모드로)
         flux_pipe = FluxPipeline.from_pretrained(
             "black-forest-labs/FLUX.1-dev",
+            torch_dtype=torch.float32
         )
+        print("Models initialized successfully")
         return True
     except Exception as e:
         print(f"Model initialization error: {str(e)}")
         return False
 def translate_if_korean(text):
         return translated
     return text
+@spaces.GPU
 def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
+    try:
+        trial_id = str(uuid.uuid4())
+        # 이미지가 너무 작은 경우 크기 조정
+        min_size = 64
+        if image.size[0] < min_size or image.size[1] < min_size:
+            ratio = min_size / min(image.size)
+            new_size = tuple(int(dim * ratio) for dim in image.size)
+            image = image.resize(new_size, Image.LANCZOS)
+        processed_image = pipeline.preprocess_image(image)
+        processed_image.save(f"{TMP_DIR}/{trial_id}.png")
+        return trial_id, processed_image
+    except Exception as e:
+        print(f"Error in preprocess_image: {str(e)}")
+        return None, None
 def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict:
     return {
         'trial_id': trial_id,
     }
 def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     gs = Gaussian(
         aabb=state['gaussian']['aabb'],
 def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
                 ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
     try:
         if randomize_seed:
             seed = np.random.randint(0, MAX_SEED)
         input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
+        # GPU 설정
+        if torch.cuda.is_available():
+            pipeline.to("cuda")
+            pipeline.to(torch.float16)
+        with torch.no_grad():
+            outputs = pipeline.run(
+                input_image,
+                seed=seed,
+                formats=["gaussian", "mesh"],
+                preprocess_image=False,
+                sparse_structure_sampler_params={
+                    "steps": ss_sampling_steps,
+                    "cfg_strength": ss_guidance_strength,
+                },
+                slat_sampler_params={
+                    "steps": slat_sampling_steps,
+                    "cfg_strength": slat_guidance_strength,
+                }
+            )
         video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
         video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
         video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
         state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
+        # CPU 모드로 돌아가기
+        pipeline.to("cpu")
         return state, video_path
     except Exception as e:
         print(f"Error in image_to_3d: {str(e)}")
+        pipeline.to("cpu")
         raise e
 @spaces.GPU
 def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
+    try:
+        # GPU 설정
+        if torch.cuda.is_available():
+            flux_pipe.to("cuda")
+            flux_pipe.to(torch.float16)
+        # 기본 프롬프트를 추가
+        base_prompt = "wbgmsst, 3D, white background"
+        # 사용자 프롬프트를 번역 (한국어인 경우)
+        translated_prompt = translate_if_korean(prompt)
+        # 최종 프롬프트 조합
+        final_prompt = f"{translated_prompt}, {base_prompt}"
+        with torch.inference_mode():
+            image = flux_pipe(
+                prompt=[final_prompt],
+                height=height,
+                width=width,
+                guidance_scale=guidance_scale,
+                num_inference_steps=num_steps
+            ).images[0]
+        # CPU 모드로 돌아가기
+        flux_pipe.to("cpu")
         return image
+    except Exception as e:
+        print(f"Error in generate_image_from_text: {str(e)}")
+        flux_pipe.to("cpu")
+        raise e
 @spaces.GPU
 def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
 def deactivate_button() -> gr.Button:
     return gr.Button(interactive=False)
 css = """
 footer {
     visibility: hidden;
 }
 """
+# Gradio 인터페이스 정의
 with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     gr.Markdown("""
     # Craft3D : 3D Asset Creation & Text-to-Image Generation
             examples_per_page=64,
         )
+    # Handlers
     image_prompt.upload(
         preprocess_image,
         inputs=[image_prompt],
     generate_btn.click(
         image_to_3d,
+        inputs=[trial_id, seed, randomize_seed, ss_guidance_strength, ss_sampling_steps,
+                slat_guidance_strength, slat_sampling_steps],
         outputs=[output_buf, video_output],
+        concurrency_limit=1
     ).then(
         activate_button,
+        outputs=[extract_glb_btn]
     )
     extract_glb_btn.click(
         extract_glb,
         inputs=[output_buf, mesh_simplify, texture_size],
         outputs=[model_output, download_glb],
+        concurrency_limit=1
     ).then(
         activate_button,
+        outputs=[download_glb]
     )
     generate_txt2img_btn.click(
         generate_image_from_text,
         inputs=[text_prompt, txt2img_height, txt2img_width, guidance_scale, num_steps],
+        outputs=[txt2img_output],
+        concurrency_limit=1
     )
 if __name__ == "__main__":
+    # 모델 초기화
     if not initialize_models():
         print("Failed to initialize models")
         exit(1)
     try:
         # rembg 사전 로드 시도
+        test_image = Image.fromarray(np.ones((256, 256, 3), dtype=np.uint8) * 255)
         pipeline.preprocess_image(test_image)
     except Exception as e:
         print(f"Warning: Failed to preload rembg: {str(e)}")
     # Gradio 앱 실행
+    demo.queue(max_size=20).launch(
         share=True,
+        max_threads=4,
+        show_error=True
     )