SORA-3D

Running on L40S

App Files Files Community

aiqtech commited on Dec 9, 2024

Commit

1f3fd7c

verified ·

1 Parent(s): 7619202

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -16

app.py CHANGED Viewed

@@ -81,14 +81,15 @@ torch.backends.cuda.matmul.allow_tf32 = True
 torch.backends.cudnn.benchmark = True
 # 환경 변수 설정
-# 환경 변수 설정
-os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512,garbage_collection_threshold:0.6"
 os.environ['SPCONV_ALGO'] = 'native'
 os.environ['SPARSE_BACKEND'] = 'native'
 os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
 os.environ['XFORMERS_FORCE_DISABLE_TRITON'] = '1'
 os.environ['XFORMERS_ENABLE_FLASH_ATTENTION'] = '1'
 os.environ['TORCH_CUDA_MEMORY_ALLOCATOR'] = 'native'
 # CUDA 초기화 방지
 torch.set_grad_enabled(False)
@@ -208,6 +209,7 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
     try:
         # CUDA 메모리 초기화
         torch.cuda.empty_cache()
         if randomize_seed:
             seed = np.random.randint(0, MAX_SEED)
@@ -222,37 +224,60 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
         image = Image.open(image_path)
         print(f"Successfully loaded image with size: {image.size}")
         # GPU 작업 시작
         with torch.cuda.device(0):
             try:
                 # 모델을 GPU로 이동
-                g.trellis_pipeline.to('cuda')
                 torch.cuda.synchronize()
-                with torch.inference_mode():
-                    # 첫 번째 단계: 3D 생성
                     outputs = g.trellis_pipeline.run(
                         image,
                         seed=seed,
                         formats=["gaussian", "mesh"],
                         preprocess_image=False,
                         sparse_structure_sampler_params={
-                            "steps": ss_sampling_steps,
                             "cfg_strength": ss_guidance_strength,
                         },
                         slat_sampler_params={
-                            "steps": slat_sampling_steps,
                             "cfg_strength": slat_guidance_strength,
                         },
                     )
                     torch.cuda.synchronize()
-                    # 두 번째 단계: 비디오 렌더링
-                    video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
-                    torch.cuda.synchronize()
-                    video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
-                    torch.cuda.synchronize()
                     # CPU로 데이터 이동 및 후처리
                     video = [v.cpu().numpy() if torch.is_tensor(v) else v for v in video]
@@ -271,15 +296,14 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
             finally:
                 # 정리 작업
-                g.trellis_pipeline.to('cpu')
                 torch.cuda.empty_cache()
                 torch.cuda.synchronize()
     except Exception as e:
         print(f"Error in image_to_3d: {str(e)}")
-        # 에러 발생 시 정리
         if hasattr(g.trellis_pipeline, 'to'):
-            g.trellis_pipeline.to('cpu')
         torch.cuda.empty_cache()
         torch.cuda.synchronize()
         return None, None
@@ -289,14 +313,17 @@ def clear_gpu_memory():
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
         torch.cuda.synchronize()
 def move_to_device(model, device):
     """모델을 안전하게 디바이스로 이동하는 함수"""
     try:
         if hasattr(model, 'to'):
             model.to(device)
             if device == 'cuda':
                 torch.cuda.synchronize()
     except Exception as e:
         print(f"Error moving model to {device}: {str(e)}")

 torch.backends.cudnn.benchmark = True
 # 환경 변수 설정
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:256,garbage_collection_threshold:0.8"
 os.environ['SPCONV_ALGO'] = 'native'
 os.environ['SPARSE_BACKEND'] = 'native'
 os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
 os.environ['XFORMERS_FORCE_DISABLE_TRITON'] = '1'
 os.environ['XFORMERS_ENABLE_FLASH_ATTENTION'] = '1'
 os.environ['TORCH_CUDA_MEMORY_ALLOCATOR'] = 'native'
+os.environ['PYTORCH_NO_CUDA_MEMORY_CACHING'] = '1'
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 # CUDA 초기화 방지
 torch.set_grad_enabled(False)
     try:
         # CUDA 메모리 초기화
         torch.cuda.empty_cache()
+        torch.cuda.synchronize()
         if randomize_seed:
             seed = np.random.randint(0, MAX_SEED)
         image = Image.open(image_path)
         print(f"Successfully loaded image with size: {image.size}")
+        # 이미지 크기 제한
+        max_size = 512
+        if max(image.size) > max_size:
+            ratio = max_size / max(image.size)
+            new_size = tuple(int(dim * ratio) for dim in image.size)
+            image = image.resize(new_size, Image.LANCZOS)
+            print(f"Resized image to: {image.size}")
         # GPU 작업 시작
         with torch.cuda.device(0):
             try:
                 # 모델을 GPU로 이동
+                move_to_device(g.trellis_pipeline, 'cuda')
                 torch.cuda.synchronize()
+                with torch.inference_mode(), torch.cuda.amp.autocast():
+                    # 메모리 사용량 최적화를 위한 배치 크기 설정
+                    torch.cuda.set_per_process_memory_fraction(0.8)  # GPU 메모리 사용량 제한
+                    # 3D 생성
                     outputs = g.trellis_pipeline.run(
                         image,
                         seed=seed,
                         formats=["gaussian", "mesh"],
                         preprocess_image=False,
                         sparse_structure_sampler_params={
+                            "steps": min(ss_sampling_steps, 20),  # 스텝 수 제한
                             "cfg_strength": ss_guidance_strength,
                         },
                         slat_sampler_params={
+                            "steps": min(slat_sampling_steps, 20),  # 스텝 수 제한
                             "cfg_strength": slat_guidance_strength,
                         },
                     )
                     torch.cuda.synchronize()
+                    # 비디오 렌더링을 위한 메모리 확보
+                    torch.cuda.empty_cache()
+                    # 비디오 렌더링
+                    with torch.cuda.amp.autocast():
+                        video = render_utils.render_video(
+                            outputs['gaussian'][0],
+                            num_frames=60,  # 프레임 수 감소
+                            resolution=512  # 해상도 제한
+                        )['color']
+                        torch.cuda.synchronize()
+                        video_geo = render_utils.render_video(
+                            outputs['mesh'][0],
+                            num_frames=60,  # 프레임 수 감소
+                            resolution=512  # 해상도 제한
+                        )['normal']
+                        torch.cuda.synchronize()
                     # CPU로 데이터 이동 및 후처리
                     video = [v.cpu().numpy() if torch.is_tensor(v) else v for v in video]
             finally:
                 # 정리 작업
+                move_to_device(g.trellis_pipeline, 'cpu')
                 torch.cuda.empty_cache()
                 torch.cuda.synchronize()
     except Exception as e:
         print(f"Error in image_to_3d: {str(e)}")
         if hasattr(g.trellis_pipeline, 'to'):
+            move_to_device(g.trellis_pipeline, 'cpu')
         torch.cuda.empty_cache()
         torch.cuda.synchronize()
         return None, None
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
         torch.cuda.synchronize()
+        gc.collect()  # 가비지 컬렉션 실행
 def move_to_device(model, device):
     """모델을 안전하게 디바이스로 이동하는 함수"""
     try:
         if hasattr(model, 'to'):
+            clear_gpu_memory()
             model.to(device)
             if device == 'cuda':
                 torch.cuda.synchronize()
+            clear_gpu_memory()
     except Exception as e:
         print(f"Error moving model to {device}: {str(e)}")