SORA-3D

Running

App Files Files Community

aiqtech commited on Dec 8, 2024

Commit

a7544c9

verified ·

1 Parent(s): 1f5cf77

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -31

app.py CHANGED Viewed

@@ -32,27 +32,32 @@ def initialize_models():
     try:
         import torch
-        # A100 최적화 설정
-        torch.backends.cudnn.benchmark = True  # A100에서는 성능 향상을 위해 활성화
-        torch.backends.cuda.matmul.allow_tf32 = True  # TF32 허용
         torch.backends.cudnn.allow_tf32 = True
         print("Initializing Trellis pipeline...")
         pipeline = TrellisImageTo3DPipeline.from_pretrained(
-            "JeffreyXiang/TRELLIS-image-large",
-            torch_dtype=torch.float16  # A100에서 FP16 사용
         )
         if torch.cuda.is_available():
             pipeline = pipeline.to("cuda")
         print("Initializing translator...")
         translator = translation_pipeline(
             "translation",
             model="Helsinki-NLP/opus-mt-ko-en",
-            device="cuda"  # 번역기도 GPU 사용
         )
         print("Models initialized successfully")
         return True
@@ -68,17 +73,15 @@ def get_flux_pipe():
             free_memory()
             flux_pipe = FluxPipeline.from_pretrained(
                 "black-forest-labs/FLUX.1-dev",
-                torch_dtype=torch.float16,  # A100에서 FP16 사용
                 use_safetensors=True
             ).to("cuda")
         except Exception as e:
             print(f"Error loading Flux pipeline: {e}")
             return None
     return flux_pipe
 def free_memory():
     """강화된 메모리 정리 함수"""
     import gc
@@ -108,7 +111,7 @@ def free_memory():
             except:
                 pass
-@spaces.GPU
 def setup_gpu_model(model):
     """GPU 설정이 필요한 모델을 처리하는 함수"""
     if torch.cuda.is_available():
@@ -122,7 +125,7 @@ def translate_if_korean(text):
         return translated
     return text
-@spaces.GPU
 def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
     try:
         if pipeline is None:
@@ -192,7 +195,6 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     return gs, mesh, state['trial_id']
-@spaces.GPU
 def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
                 ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
     try:
@@ -201,8 +203,8 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
         input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
-        # 이미지 크기 제한
-        max_size = 512
         if max(input_image.size) > max_size:
             ratio = max_size / max(input_image.size)
             input_image = input_image.resize(
@@ -214,31 +216,31 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
         if torch.cuda.is_available():
             pipeline.to("cuda")
-        with torch.no_grad():
             outputs = pipeline.run(
                 input_image,
                 seed=seed,
                 formats=["gaussian", "mesh"],
                 preprocess_image=False,
                 sparse_structure_sampler_params={
-                    "steps": min(ss_sampling_steps, 15),
                     "cfg_strength": ss_guidance_strength,
                 },
                 slat_sampler_params={
-                    "steps": min(slat_sampling_steps, 15),
                     "cfg_strength": slat_guidance_strength,
                 }
             )
-        # 비디오 프레임 수 감소
-        video = render_utils.render_video(outputs['gaussian'][0], num_frames=30)['color']
-        video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=30)['normal']
         video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
         trial_id = str(uuid.uuid4())
         video_path = f"{TMP_DIR}/{trial_id}.mp4"
         os.makedirs(os.path.dirname(video_path), exist_ok=True)
-        imageio.mimsave(video_path, video, fps=15)
         state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
@@ -253,26 +255,23 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
             pipeline.to("cpu")
         raise e
-@spaces.GPU
 def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
     try:
         free_memory()
-        # Flux 파이프라인 가져오기
         flux_pipe = get_flux_pipe()
         if flux_pipe is None:
             raise Exception("Failed to load Flux pipeline")
-        # 이미지 크기 제한
-        height = min(height, 1024)  # A100에서는 더 큰 이미지 허용
         width = min(width, 1024)
-        # 프롬프트 처리
-        base_prompt = "wbgmsst, 3D, white background"
         translated_prompt = translate_if_korean(prompt)
-        final_prompt = f"{translated_prompt}, {base_prompt}"
-        with torch.cuda.amp.autocast():  # A100에서 자동 혼합 정밀도 사용
             output = flux_pipe(
                 prompt=[final_prompt],
                 height=height,
@@ -292,7 +291,7 @@ def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
         free_memory()
         raise e
-@spaces.GPU
 def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
     gs, mesh, trial_id = unpack_state(state)
     glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)

     try:
         import torch
+        # L40S GPU 최적화 설정
+        torch.backends.cudnn.benchmark = True
+        torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
         print("Initializing Trellis pipeline...")
         pipeline = TrellisImageTo3DPipeline.from_pretrained(
+            "JeffreyXiang/TRELLIS-image-large"
         )
         if torch.cuda.is_available():
             pipeline = pipeline.to("cuda")
+            # 모델을 FP16으로 변환
+            for param in pipeline.parameters():
+                param.data = param.data.half()
         print("Initializing translator...")
         translator = translation_pipeline(
             "translation",
             model="Helsinki-NLP/opus-mt-ko-en",
+            device="cuda"
         )
+        # Flux 파이프라인은 나중에 초기화
+        flux_pipe = None
         print("Models initialized successfully")
         return True
             free_memory()
             flux_pipe = FluxPipeline.from_pretrained(
                 "black-forest-labs/FLUX.1-dev",
                 use_safetensors=True
             ).to("cuda")
+            # FP16으로 변환
+            flux_pipe.to(torch.float16)
         except Exception as e:
             print(f"Error loading Flux pipeline: {e}")
             return None
     return flux_pipe
 def free_memory():
     """강화된 메모리 정리 함수"""
     import gc
             except:
                 pass
 def setup_gpu_model(model):
     """GPU 설정이 필요한 모델을 처리하는 함수"""
     if torch.cuda.is_available():
         return translated
     return text
 def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
     try:
         if pipeline is None:
     return gs, mesh, state['trial_id']
 def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
                 ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
     try:
         input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
+        # L40S에 맞게 이미지 크기 제한 조정
+        max_size = 768  # L40S는 더 큰 이미지 처리 가능
         if max(input_image.size) > max_size:
             ratio = max_size / max(input_image.size)
             input_image = input_image.resize(
         if torch.cuda.is_available():
             pipeline.to("cuda")
+        with torch.cuda.amp.autocast():  # 자동 혼합 정밀도 사용
             outputs = pipeline.run(
                 input_image,
                 seed=seed,
                 formats=["gaussian", "mesh"],
                 preprocess_image=False,
                 sparse_structure_sampler_params={
+                    "steps": min(ss_sampling_steps, 20),  # L40S에서 더 많은 스텝 허용
                     "cfg_strength": ss_guidance_strength,
                 },
                 slat_sampler_params={
+                    "steps": min(slat_sampling_steps, 20),
                     "cfg_strength": slat_guidance_strength,
                 }
             )
+        # 비디오 생성
+        video = render_utils.render_video(outputs['gaussian'][0], num_frames=40)['color']
+        video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=40)['normal']
         video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
         trial_id = str(uuid.uuid4())
         video_path = f"{TMP_DIR}/{trial_id}.mp4"
         os.makedirs(os.path.dirname(video_path), exist_ok=True)
+        imageio.mimsave(video_path, video, fps=20)
         state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
             pipeline.to("cpu")
         raise e
 def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
     try:
         free_memory()
         flux_pipe = get_flux_pipe()
         if flux_pipe is None:
             raise Exception("Failed to load Flux pipeline")
+        # L40S에 맞게 크기 제한 조정
+        height = min(height, 1024)
         width = min(width, 1024)
         translated_prompt = translate_if_korean(prompt)
+        final_prompt = f"{translated_prompt}, wbgmsst, 3D, white background"
+        with torch.cuda.amp.autocast():
             output = flux_pipe(
                 prompt=[final_prompt],
                 height=height,
         free_memory()
         raise e
 def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
     gs, mesh, trial_id = unpack_state(state)
     glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)