SORA-3D

Running on L40S

App Files Files Community

aiqtech commited on 27 days ago

Commit

868eab9

•

1 Parent(s): 0ab2174

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -55

app.py CHANGED Viewed

@@ -23,30 +23,45 @@ os.makedirs(TMP_DIR, exist_ok=True)
 def initialize_models():
     global pipeline, translator, flux_pipe
-    # Hugging Face 토큰 확인
-    hf_token = os.getenv("HF_TOKEN")
-    if not hf_token:
-        raise ValueError("HF_TOKEN environment variable is not set. Please set your Hugging Face token.")
-    # Trellis 파이프라인 초기화
-    pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
-    pipeline.cuda()
-    # 번역기 초기화
-    translator = translation_pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
-    # Flux 파이프라인 초기화 - token 추가
-    flux_pipe = FluxPipeline.from_pretrained(
-        "black-forest-labs/FLUX.1-dev",
-        torch_dtype=torch.bfloat16,
-        use_auth_token=hf_token  # Hugging Face 토큰 적용
-    )
-    flux_pipe.load_lora_weights(
-        "gokaygokay/Flux-Game-Assets-LoRA-v2",
-        use_auth_token=hf_token  # LoRA 가중치 로드시에도 토큰 적용
-    )
-    flux_pipe.fuse_lora(lora_scale=1.0)
-    flux_pipe.to(device="cuda", dtype=torch.bfloat16)
 def translate_if_korean(text):
     if any(ord('가') <= ord(char) <= ord('힣') for char in text):
@@ -101,32 +116,56 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     return gs, mesh, state['trial_id']
 @spaces.GPU
-def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float, ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int) -> Tuple[dict, str]:
-    if randomize_seed:
-        seed = np.random.randint(0, MAX_SEED)
-    outputs = pipeline.run(
-        Image.open(f"{TMP_DIR}/{trial_id}.png"),
-        seed=seed,
-        formats=["gaussian", "mesh"],
-        preprocess_image=False,
-        sparse_structure_sampler_params={
-            "steps": ss_sampling_steps,
-            "cfg_strength": ss_guidance_strength,
-        },
-        slat_sampler_params={
-            "steps": slat_sampling_steps,
-            "cfg_strength": slat_guidance_strength,
-        },
-    )
-    video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
-    video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
-    video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
-    trial_id = uuid.uuid4()
-    video_path = f"{TMP_DIR}/{trial_id}.mp4"
-    os.makedirs(os.path.dirname(video_path), exist_ok=True)
-    imageio.mimsave(video_path, video, fps=15)
-    state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
-    return state, video_path
 @spaces.GPU
 def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
@@ -165,9 +204,16 @@ def deactivate_button() -> gr.Button:
     return gr.Button(interactive=False)
-with gr.Blocks() as demo:
     gr.Markdown("""
-    # 3D Asset Creation & Text-to-Image Generation
     """)
     with gr.Tabs():
@@ -287,11 +333,18 @@ with gr.Blocks() as demo:
         outputs=[txt2img_output]
     )
-# Launch the Gradio app
 if __name__ == "__main__":
-    initialize_models()  # 모든 모델 초기화
     try:
-        pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))    # Preload rembg
     except:
         pass
-    demo.launch()

 def initialize_models():
     global pipeline, translator, flux_pipe
+    # CUDA 메모리 초기화
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    try:
+        # Trellis 파이프라인 초기화
+        pipeline = TrellisImageTo3DPipeline.from_pretrained(
+            "JeffreyXiang/TRELLIS-image-large",
+            device_map="auto"  # Zero GPU 환경에 맞게 자동 device 매핑
+        )
+        # 번역기 초기화
+        translator = translation_pipeline(
+            "translation",
+            model="Helsinki-NLP/opus-mt-ko-en",
+            device_map="auto"
+        )
+        # Flux 파이프라인 초기화
+        flux_pipe = FluxPipeline.from_pretrained(
+            "black-forest-labs/FLUX.1-dev",
+            torch_dtype=torch.float16,  # bfloat16 대신 float16 사용
+            device_map="auto"
+        )
+        # LoRA 가중치 로드
+        flux_pipe.load_lora_weights(
+            "gokaygokay/Flux-Game-Assets-LoRA-v2",
+            device_map="auto"
+        )
+        flux_pipe.fuse_lora(lora_scale=1.0)
+    except Exception as e:
+        print(f"Error initializing models: {str(e)}")
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        raise e
 def translate_if_korean(text):
     if any(ord('가') <= ord(char) <= ord('힣') for char in text):
     return gs, mesh, state['trial_id']
 @spaces.GPU
+def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float,
+                ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int):
+    try:
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        if randomize_seed:
+            seed = np.random.randint(0, MAX_SEED)
+        input_image = Image.open(f"{TMP_DIR}/{trial_id}.png")
+        # 메모리 최적화를 위한 컨텍스트 매니저 사용
+        with torch.cuda.amp.autocast(enabled=True):
+            outputs = pipeline.run(
+                input_image,
+                seed=seed,
+                formats=["gaussian", "mesh"],
+                preprocess_image=False,
+                sparse_structure_sampler_params={
+                    "steps": ss_sampling_steps,
+                    "cfg_strength": ss_guidance_strength,
+                },
+                slat_sampler_params={
+                    "steps": slat_sampling_steps,
+                    "cfg_strength": slat_guidance_strength,
+                }
+            )
+        # 비디오 렌더링
+        video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
+        video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
+        video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
+        trial_id = str(uuid.uuid4())
+        video_path = f"{TMP_DIR}/{trial_id}.mp4"
+        os.makedirs(os.path.dirname(video_path), exist_ok=True)
+        imageio.mimsave(video_path, video, fps=15)
+        state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        return state, video_path
+    except Exception as e:
+        print(f"Error in image_to_3d: {str(e)}")
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        raise e
 @spaces.GPU
 def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
     return gr.Button(interactive=False)
+css = """
+footer {
+    visibility: hidden;
+}
+"""
+with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     gr.Markdown("""
+    # Craft3D : 3D Asset Creation & Text-to-Image Generation
     """)
     with gr.Tabs():
         outputs=[txt2img_output]
     )
 if __name__ == "__main__":
+    # CUDA 메모리 캐시 초기화
+    torch.cuda.empty_cache()
+    # 모델 초기화
+    initialize_models()
     try:
+        # rembg 사전 로드
+        pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))
     except:
         pass
+    # Gradio 앱 실행
+    demo.launch(share=True)  # share=True 추가