Spaces:
Running
Running
smellslikeml
commited on
Commit
·
6229c52
1
Parent(s):
56ccf9e
update app.py
Browse files
app.py
CHANGED
@@ -47,9 +47,15 @@ class Llava:
|
|
47 |
)
|
48 |
return res["choices"][0]["message"]["content"]
|
49 |
|
50 |
-
# Initialize the model
|
51 |
llm_model = Llava()
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
title_and_links_markdown = """
|
54 |
# 🛸SpaceLLaVA🌋: A spatial reasoning multi-modal model
|
55 |
This space hosts our initial release of LLaVA 1.5 LoRA tuned for spatial reasoning using data generated with [VQASynth](https://github.com/remyxai/VQASynth).
|
@@ -58,26 +64,20 @@ Upload an image and ask a question.
|
|
58 |
[Model](https://huggingface.co/remyxai/SpaceLLaVA) | [Code](https://github.com/remyxai/VQASynth) | [Paper](https://spatial-vlm.github.io)
|
59 |
"""
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
64 |
|
65 |
image_input = gr.Image(type="pil", label="Input Image")
|
66 |
text_input = gr.Textbox(label="Prompt")
|
67 |
-
|
68 |
-
# Initialize interface with examples
|
69 |
iface = gr.Interface(
|
70 |
-
fn=predict,
|
71 |
-
inputs=[image_input, text_input],
|
72 |
-
outputs="text",
|
73 |
-
title="Llava Model Inference",
|
74 |
-
description="Input an image and a prompt to receive a description."
|
75 |
)
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
["examples/warehouse_2.jpg", "Is the forklift taller than the shelves of boxes?"],
|
80 |
-
]
|
81 |
|
82 |
-
iface.examples = examples
|
83 |
iface.launch()
|
|
|
47 |
)
|
48 |
return res["choices"][0]["message"]["content"]
|
49 |
|
|
|
50 |
llm_model = Llava()
|
51 |
|
52 |
+
def predict(image, prompt):
|
53 |
+
result = llm_model.run_inference(image, prompt)
|
54 |
+
return result
|
55 |
+
|
56 |
+
image_input = gr.Image(type="pil", label="Input Image")
|
57 |
+
text_input = gr.Textbox(label="Prompt")
|
58 |
+
|
59 |
title_and_links_markdown = """
|
60 |
# 🛸SpaceLLaVA🌋: A spatial reasoning multi-modal model
|
61 |
This space hosts our initial release of LLaVA 1.5 LoRA tuned for spatial reasoning using data generated with [VQASynth](https://github.com/remyxai/VQASynth).
|
|
|
64 |
[Model](https://huggingface.co/remyxai/SpaceLLaVA) | [Code](https://github.com/remyxai/VQASynth) | [Paper](https://spatial-vlm.github.io)
|
65 |
"""
|
66 |
|
67 |
+
examples = [
|
68 |
+
["examples/warehouse_1.jpg", "Is the man wearing gray pants to the left of the pile of boxes on a pallet?"],
|
69 |
+
["examples/warehouse_2.jpg", "Is the forklift taller than the shelves of boxes?"],
|
70 |
+
]
|
71 |
|
72 |
image_input = gr.Image(type="pil", label="Input Image")
|
73 |
text_input = gr.Textbox(label="Prompt")
|
|
|
|
|
74 |
iface = gr.Interface(
|
75 |
+
fn=predict,
|
76 |
+
inputs=[image_input, text_input],
|
77 |
+
outputs="text",
|
|
|
|
|
78 |
)
|
79 |
|
80 |
+
iface.add_component(gr.Markdown(title_and_links_markdown), "header")
|
81 |
+
iface.set_examples(examples)
|
|
|
|
|
82 |
|
|
|
83 |
iface.launch()
|