Spaces:

abreza
/

et-viewer

Running on Zero

App Files Files Community

azizinaghsh commited on Dec 12, 2024

Commit

f5aaf3c

1 Parent(s): 727445c

add character position input

Browse files

Files changed (1) hide show

app.py +11 -15

app.py CHANGED Viewed

@@ -94,7 +94,7 @@ def generate(
     prompt: str,
     seed: int,
     guidance_weight: float,
-    sample_label: str,
     # ----------------------- #
     dataset: MultimodalDataset,
     device: torch.device,
@@ -110,10 +110,12 @@ def generate(
     diffuser.guidance_weight = guidance_weight
     # Inference
-    sample_id = SAMPLE_IDS[LABEL_TO_IDS[sample_label]]
     seq_feat = diffuser.net.model.clip_sequential
     batch = get_batch(prompt, sample_id, clip_model, dataset, seq_feat, device)
     with torch.no_grad():
         out = diffuser.predict_step(batch, 0)
@@ -158,17 +160,17 @@ def launch_app(gen_fn: Callable):
         with gr.Row():
             with gr.Column(scale=3):
                 with gr.Column(scale=2):
-                    sample_str = gr.Dropdown(
-                        choices=["static", "right", "complex"],
-                        label="Character trajectory",
-                        value="right",
-                        interactive=True,
                     )
                     text = gr.Textbox(
                         placeholder="Type the camera motion you want to generate",
                         show_label=True,
                         label="Text prompt",
-                        value=DEFAULT_TEXT[LABEL_TO_IDS[sample_str.value]],
                     )
                     seed = gr.Number(value=33, label="Seed")
                     guidance = gr.Slider(0, 10, value=1.4, label="Guidance", step=0.1)
@@ -189,13 +191,7 @@ def launch_app(gen_fn: Callable):
             processed_example = examples.non_none_processed_examples[example_id]
             return gr.utils.resolve_singleton(processed_example)
-        def change_fn(change):
-            sample_index = LABEL_TO_IDS[change]
-            return gr.update(value=DEFAULT_TEXT[sample_index])
-        sample_str.change(fn=change_fn, inputs=[sample_str], outputs=[text])
-        inputs = [text, seed, guidance, sample_str]
         examples.dataset.click(
             load_example,
             inputs=[examples.dataset],

     prompt: str,
     seed: int,
     guidance_weight: float,
+    character_position: list,
     # ----------------------- #
     dataset: MultimodalDataset,
     device: torch.device,
     diffuser.guidance_weight = guidance_weight
     # Inference
+    sample_id = SAMPLE_IDS[0]  # Default to the first sample ID
     seq_feat = diffuser.net.model.clip_sequential
     batch = get_batch(prompt, sample_id, clip_model, dataset, seq_feat, device)
+    batch["character_position"] = torch.tensor(character_position, device=device)
     with torch.no_grad():
         out = diffuser.predict_step(batch, 0)
         with gr.Row():
             with gr.Column(scale=3):
                 with gr.Column(scale=2):
+                    char_position = gr.Textbox(
+                        placeholder="Enter character position as [x, y, z]",
+                        show_label=True,
+                        label="Character Position (3D vector)",
+                        value="[0.0, 0.0, 0.0]",
                     )
                     text = gr.Textbox(
                         placeholder="Type the camera motion you want to generate",
                         show_label=True,
                         label="Text prompt",
+                        value=DEFAULT_TEXT[0],
                     )
                     seed = gr.Number(value=33, label="Seed")
                     guidance = gr.Slider(0, 10, value=1.4, label="Guidance", step=0.1)
             processed_example = examples.non_none_processed_examples[example_id]
             return gr.utils.resolve_singleton(processed_example)
+        inputs = [text, seed, guidance, char_position]
         examples.dataset.click(
             load_example,
             inputs=[examples.dataset],