SORA-3D

Paused

App Files Files Community

aiqtech commited on Dec 8, 2024

Commit

ee210e2

verified ·

1 Parent(s): c260a18

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -90

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import gradio as gr
 import spaces
 from gradio_litmodel3d import LitModel3D
 import os
 os.environ['SPCONV_ALGO'] = 'native'
 from typing import *
@@ -14,12 +13,13 @@ from PIL import Image
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
-# 기존 import문 아래에 추가
 from transformers import pipeline as translation_pipeline
 from diffusers import FluxPipeline
-# 초기화 부분에 추가
 def initialize_models():
     global pipeline, translator, flux_pipe
@@ -35,30 +35,19 @@ def initialize_models():
     flux_pipe.load_lora_weights("gokaygokay/Flux-Game-Assets-LoRA-v2")
     flux_pipe.fuse_lora(lora_scale=1.0)
     flux_pipe.to(device="cuda", dtype=torch.bfloat16)
-MAX_SEED = np.iinfo(np.int32).max
-TMP_DIR = "/tmp/Trellis-demo"
-os.makedirs(TMP_DIR, exist_ok=True)
 def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
-    """
-    Preprocess the input image.
-    Args:
-        image (Image.Image): The input image.
-    Returns:
-        str: uuid of the trial.
-        Image.Image: The preprocessed image.
-    """
     trial_id = str(uuid.uuid4())
     processed_image = pipeline.preprocess_image(image)
     processed_image.save(f"{TMP_DIR}/{trial_id}.png")
     return trial_id, processed_image
 def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict:
     return {
         'gaussian': {
@@ -75,8 +64,8 @@ def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict:
         },
         'trial_id': trial_id,
     }
 def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     gs = Gaussian(
         aabb=state['gaussian']['aabb'],
@@ -99,25 +88,8 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     return gs, mesh, state['trial_id']
 @spaces.GPU
 def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float, ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int) -> Tuple[dict, str]:
-    """
-    Convert an image to a 3D model.
-    Args:
-        trial_id (str): The uuid of the trial.
-        seed (int): The random seed.
-        randomize_seed (bool): Whether to randomize the seed.
-        ss_guidance_strength (float): The guidance strength for sparse structure generation.
-        ss_sampling_steps (int): The number of sampling steps for sparse structure generation.
-        slat_guidance_strength (float): The guidance strength for structured latent generation.
-        slat_sampling_steps (int): The number of sampling steps for structured latent generation.
-    Returns:
-        dict: The information of the generated 3D model.
-        str: The path to the video of the 3D model.
-    """
     if randomize_seed:
         seed = np.random.randint(0, MAX_SEED)
     outputs = pipeline.run(
@@ -144,75 +116,98 @@ def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_stre
     state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
     return state, video_path
 @spaces.GPU
 def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
-    """
-    Extract a GLB file from the 3D model.
-    Args:
-        state (dict): The state of the generated 3D model.
-        mesh_simplify (float): The mesh simplification factor.
-        texture_size (int): The texture resolution.
-    Returns:
-        str: The path to the extracted GLB file.
-    """
     gs, mesh, trial_id = unpack_state(state)
     glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
     glb_path = f"{TMP_DIR}/{trial_id}.glb"
     glb.export(glb_path)
     return glb_path, glb_path
 def activate_button() -> gr.Button:
     return gr.Button(interactive=True)
 def deactivate_button() -> gr.Button:
     return gr.Button(interactive=False)
 with gr.Blocks() as demo:
     gr.Markdown("""
-    ## Image to 3D Asset with [TRELLIS](https://trellis3d.github.io/)
-    * Upload an image and click "Generate" to create a 3D asset. If the image has alpha channel, it be used as the mask. Otherwise, we use `rembg` to remove the background.
-    * If you find the generated 3D asset satisfactory, click "Extract GLB" to extract the GLB file and download it.
     """)
-    with gr.Row():
-        with gr.Column():
-            image_prompt = gr.Image(label="Image Prompt", image_mode="RGBA", type="pil", height=300)
-            with gr.Accordion(label="Generation Settings", open=False):
-                seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
-                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
-                gr.Markdown("Stage 1: Sparse Structure Generation")
-                with gr.Row():
-                    ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
-                    ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-                gr.Markdown("Stage 2: Structured Latent Generation")
-                with gr.Row():
-                    slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
-                    slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
-            generate_btn = gr.Button("Generate")
-            with gr.Accordion(label="GLB Extraction Settings", open=False):
-                mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
-                texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
-            extract_glb_btn = gr.Button("Extract GLB", interactive=False)
-        with gr.Column():
-            video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
-            model_output = LitModel3D(label="Extracted GLB", exposure=20.0, height=300)
-            download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
     trial_id = gr.Textbox(visible=False)
     output_buf = gr.State()
-    # Example images at the bottom of the page
     with gr.Row():
         examples = gr.Examples(
             examples=[
@@ -226,12 +221,13 @@ with gr.Blocks() as demo:
             examples_per_page=64,
         )
-    # Handlers
     image_prompt.upload(
         preprocess_image,
         inputs=[image_prompt],
         outputs=[trial_id, image_prompt],
     )
     image_prompt.clear(
         lambda: '',
         outputs=[trial_id],
@@ -264,14 +260,19 @@ with gr.Blocks() as demo:
         deactivate_button,
         outputs=[download_glb],
     )
 # Launch the Gradio app
 if __name__ == "__main__":
-    pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
-    pipeline.cuda()
     try:
         pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))    # Preload rembg
     except:
         pass
-    demo.launch()

 import gradio as gr
 import spaces
 from gradio_litmodel3d import LitModel3D
 import os
 os.environ['SPCONV_ALGO'] = 'native'
 from typing import *
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
 from transformers import pipeline as translation_pipeline
 from diffusers import FluxPipeline
+MAX_SEED = np.iinfo(np.int32).max
+TMP_DIR = "/tmp/Trellis-demo"
+os.makedirs(TMP_DIR, exist_ok=True)
 def initialize_models():
     global pipeline, translator, flux_pipe
     flux_pipe.load_lora_weights("gokaygokay/Flux-Game-Assets-LoRA-v2")
     flux_pipe.fuse_lora(lora_scale=1.0)
     flux_pipe.to(device="cuda", dtype=torch.bfloat16)
+def translate_if_korean(text):
+    if any(ord('가') <= ord(char) <= ord('힣') for char in text):
+        translated = translator(text)[0]['translation_text']
+        return translated
+    return text
 def preprocess_image(image: Image.Image) -> Tuple[str, Image.Image]:
     trial_id = str(uuid.uuid4())
     processed_image = pipeline.preprocess_image(image)
     processed_image.save(f"{TMP_DIR}/{trial_id}.png")
     return trial_id, processed_image
 def pack_state(gs: Gaussian, mesh: MeshExtractResult, trial_id: str) -> dict:
     return {
         'gaussian': {
         },
         'trial_id': trial_id,
     }
 def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     gs = Gaussian(
         aabb=state['gaussian']['aabb'],
     return gs, mesh, state['trial_id']
 @spaces.GPU
 def image_to_3d(trial_id: str, seed: int, randomize_seed: bool, ss_guidance_strength: float, ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int) -> Tuple[dict, str]:
     if randomize_seed:
         seed = np.random.randint(0, MAX_SEED)
     outputs = pipeline.run(
     state = pack_state(outputs['gaussian'][0], outputs['mesh'][0], trial_id)
     return state, video_path
+@spaces.GPU
+def generate_image_from_text(prompt, height, width, guidance_scale, num_steps):
+    translated_prompt = translate_if_korean(prompt)
+    with torch.inference_mode():
+        image = flux_pipe(
+            prompt=[translated_prompt],
+            height=height,
+            width=width,
+            guidance_scale=guidance_scale,
+            num_inference_steps=num_steps
+        ).images[0]
+        return image
 @spaces.GPU
 def extract_glb(state: dict, mesh_simplify: float, texture_size: int) -> Tuple[str, str]:
     gs, mesh, trial_id = unpack_state(state)
     glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
     glb_path = f"{TMP_DIR}/{trial_id}.glb"
     glb.export(glb_path)
     return glb_path, glb_path
 def activate_button() -> gr.Button:
     return gr.Button(interactive=True)
 def deactivate_button() -> gr.Button:
     return gr.Button(interactive=False)
 with gr.Blocks() as demo:
     gr.Markdown("""
+    # 3D Asset Creation & Text-to-Image Generation
     """)
+    with gr.Tabs():
+        with gr.TabItem("Image to 3D"):
+            with gr.Row():
+                with gr.Column():
+                    image_prompt = gr.Image(label="Image Prompt", image_mode="RGBA", type="pil", height=300)
+                    with gr.Accordion(label="Generation Settings", open=False):
+                        seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
+                        randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
+                        gr.Markdown("Stage 1: Sparse Structure Generation")
+                        with gr.Row():
+                            ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
+                            ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
+                        gr.Markdown("Stage 2: Structured Latent Generation")
+                        with gr.Row():
+                            slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
+                            slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
+                    generate_btn = gr.Button("Generate")
+                    with gr.Accordion(label="GLB Extraction Settings", open=False):
+                        mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
+                        texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
+                    extract_glb_btn = gr.Button("Extract GLB", interactive=False)
+                with gr.Column():
+                    video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
+                    model_output = LitModel3D(label="Extracted GLB", exposure=20.0, height=300)
+                    download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
+        with gr.TabItem("Text to Image"):
+            with gr.Row():
+                with gr.Column():
+                    text_prompt = gr.Textbox(
+                        label="Text Prompt",
+                        placeholder="Enter your image description...",
+                        lines=3
+                    )
+                    with gr.Row():
+                        txt2img_height = gr.Slider(256, 1024, value=512, step=64, label="Height")
+                        txt2img_width = gr.Slider(256, 1024, value=512, step=64, label="Width")
+                    with gr.Row():
+                        guidance_scale = gr.Slider(1.0, 20.0, value=7.5, label="Guidance Scale")
+                        num_steps = gr.Slider(1, 50, value=20, label="Number of Steps")
+                    generate_txt2img_btn = gr.Button("Generate Image")
+                with gr.Column():
+                    txt2img_output = gr.Image(label="Generated Image")
     trial_id = gr.Textbox(visible=False)
     output_buf = gr.State()
+    # Example images
     with gr.Row():
         examples = gr.Examples(
             examples=[
             examples_per_page=64,
         )
+# Handlers
     image_prompt.upload(
         preprocess_image,
         inputs=[image_prompt],
         outputs=[trial_id, image_prompt],
     )
     image_prompt.clear(
         lambda: '',
         outputs=[trial_id],
         deactivate_button,
         outputs=[download_glb],
     )
+    # Text to Image 핸들러
+    generate_txt2img_btn.click(
+        generate_image_from_text,
+        inputs=[text_prompt, txt2img_height, txt2img_width, guidance_scale, num_steps],
+        outputs=[txt2img_output]
+    )
 # Launch the Gradio app
 if __name__ == "__main__":
+    initialize_models()  # 모든 모델 초기화
     try:
         pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))    # Preload rembg
     except:
         pass
+    demo.launch()