SORA-3D

Running on L40S

App Files Files Community

aiqtech commited on Dec 8, 2024

Commit

b209823

verified ·

1 Parent(s): a135ad5

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -37

app.py CHANGED Viewed

@@ -15,6 +15,15 @@ from transformers import pipeline as translation_pipeline
 from diffusers import FluxPipeline
 from typing import *
 # 환경 변수 설정
 os.environ['SPCONV_ALGO'] = 'native'
 os.environ['WARP_USE_CPU'] = '1'  # Warp를 CPU 모드로 강제
@@ -27,27 +36,43 @@ def initialize_models():
     global pipeline, translator, flux_pipe
     try:
-        # Trellis 파이프라인 초기화 (CPU 모드로)
-        pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
-        # 번역기 초기화 (CPU 모드로)
         translator = translation_pipeline(
             "translation",
             model="Helsinki-NLP/opus-mt-ko-en",
-            device=-1
         )
-        # Flux 파이프라인 초기화 (CPU 모드로)
         flux_pipe = FluxPipeline.from_pretrained(
             "black-forest-labs/FLUX.1-dev",
-            torch_dtype=torch.float32
         )
         print("Models initialized successfully")
         return True
     except Exception as e:
         print(f"Model initialization error: {str(e)}")
         return False
 def translate_if_korean(text):
@@ -119,34 +144,46 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
 def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
                 ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
     try:
         if randomize_seed:
             seed = np.random.randint(0, MAX_SEED)
         input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
-        # GPU 설정
-        if torch.cuda.is_available():
-            pipeline.to("cuda")
-            pipeline.to(torch.float16)
-        with torch.no_grad():
-            outputs = pipeline.run(
-                input_image,
-                seed=seed,
-                formats=["gaussian", "mesh"],
-                preprocess_image=False,
-                sparse_structure_sampler_params={
-                    "steps": ss_sampling_steps,
-                    "cfg_strength": ss_guidance_strength,
-                },
-                slat_sampler_params={
-                    "steps": slat_sampling_steps,
-                    "cfg_strength": slat_guidance_strength,
-                }
             )
-        video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
-        video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
         video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
         trial_id = str(uuid.uuid4())
@@ -156,14 +193,12 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
         state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
-        # CPU 모드로 돌아가기
-        pipeline.to("cpu")
         return state, video_path
     except Exception as e:
         print(f"Error in image_to_3d: {str(e)}")
-        pipeline.to("cpu")
         raise e
 @spaces.GPU
@@ -221,7 +256,23 @@ footer {
     visibility: hidden;
 }
 """
 # Gradio 인터페이스 정의
 with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     gr.Markdown("""
@@ -339,21 +390,27 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     )
 if __name__ == "__main__":
     # 모델 초기화
     if not initialize_models():
         print("Failed to initialize models")
         exit(1)
     try:
-        # rembg 사전 로드 시도
-        test_image = Image.fromarray(np.ones((256, 256, 3), dtype=np.uint8) * 255)
         pipeline.preprocess_image(test_image)
     except Exception as e:
         print(f"Warning: Failed to preload rembg: {str(e)}")
     # Gradio 앱 실행
-    demo.queue(max_size=20).launch(
         share=True,
-        max_threads=4,
-        show_error=True
     )

 from diffusers import FluxPipeline
 from typing import *
+# 메모리 관련 환경 변수
+os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
+os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
+os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache'
+os.environ['HF_HOME'] = '/tmp/huggingface'
 # 환경 변수 설정
 os.environ['SPCONV_ALGO'] = 'native'
 os.environ['WARP_USE_CPU'] = '1'  # Warp를 CPU 모드로 강제
     global pipeline, translator, flux_pipe
     try:
+        # Trellis 파이프라인 초기화 (더 강화된 메모리 최적화)
+        pipeline = TrellisImageTo3DPipeline.from_pretrained(
+            "JeffreyXiang/TRELLIS-image-large",
+            device_map="auto",
+            low_cpu_mem_usage=True,
+            torch_dtype=torch.float16  # 반정밀도 사용
+        )
+        # 번역기 초기화 (더 작은 모델 사용)
         translator = translation_pipeline(
             "translation",
             model="Helsinki-NLP/opus-mt-ko-en",
+            device="cpu",
+            model_kwargs={
+                "low_cpu_mem_usage": True,
+                "torch_dtype": torch.float16
+            }
         )
+        # Flux 파이프라인 초기화 (메모리 최적화)
         flux_pipe = FluxPipeline.from_pretrained(
             "black-forest-labs/FLUX.1-dev",
+            device_map="auto",
+            low_cpu_mem_usage=True,
+            torch_dtype=torch.float16,
+            variant="fp16"
         )
+        # 불필요한 캐시 정리
+        free_memory()
         print("Models initialized successfully")
         return True
     except Exception as e:
         print(f"Model initialization error: {str(e)}")
+        free_memory()
         return False
 def translate_if_korean(text):
 def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
                 ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
     try:
+        free_memory()
         if randomize_seed:
             seed = np.random.randint(0, MAX_SEED)
         input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
+        # GPU 메모리 사용량 제한
+        torch.cuda.set_per_process_memory_fraction(0.6)
+        # 더 작은 이미지 크기 사용
+        max_size = 512
+        if max(input_image.size) > max_size:
+            ratio = max_size / max(input_image.size)
+            input_image = input_image.resize(
+                (int(input_image.size[0] * ratio),
+                 int(input_image.size[1] * ratio)),
+                Image.LANCZOS
             )
+        with torch.cuda.amp.autocast():
+            with torch.no_grad():
+                outputs = pipeline.run(
+                    input_image,
+                    seed=seed,
+                    formats=["gaussian", "mesh"],
+                    preprocess_image=False,
+                    sparse_structure_sampler_params={
+                        "steps": min(ss_sampling_steps, 15),
+                        "cfg_strength": ss_guidance_strength,
+                    },
+                    slat_sampler_params={
+                        "steps": min(slat_sampling_steps, 15),
+                        "cfg_strength": slat_guidance_strength,
+                    }
+                )
+        # 더 적은 프레임으로 비디오 생성
+        video = render_utils.render_video(outputs['gaussian'][0], num_frames=30)['color']
+        video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=30)['normal']
         video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
         trial_id = str(uuid.uuid4())
         state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
+        free_memory()
         return state, video_path
     except Exception as e:
         print(f"Error in image_to_3d: {str(e)}")
+        free_memory()
         raise e
 @spaces.GPU
     visibility: hidden;
 }
 """
+def free_memory():
+    """메모리를 정리하는 강화된 유틸리티 함수"""
+    import gc
+    import psutil
+    # Python 가비지 컬렉션 강제 실행
+    gc.collect()
+    # CUDA 메모리 정리
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.synchronize()
+    # RAM 캐시 정리 시도
+    if psutil.POSIX:
+        import os
+        os.system('sync')
 # Gradio 인터페이스 정의
 with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     gr.Markdown("""
     )
 if __name__ == "__main__":
+    free_memory()
     # 모델 초기화
     if not initialize_models():
         print("Failed to initialize models")
         exit(1)
     try:
+        # 최소 크기 이미지로 rembg 테스트
+        test_image = Image.fromarray(np.ones((64, 64, 3), dtype=np.uint8) * 255)
         pipeline.preprocess_image(test_image)
     except Exception as e:
         print(f"Warning: Failed to preload rembg: {str(e)}")
     # Gradio 앱 실행
+    demo.queue(max_size=5).launch(
         share=True,
+        max_threads=2,
+        show_error=True,
+        cache_examples=False,
+        enable_queue=True,
+        server_port=7860,
+        server_name="0.0.0.0"
     )