Spaces:
Configuration error
Configuration error
update
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os
|
|
|
2 |
os.system('pip install flash-attn --no-build-isolation')
|
3 |
os.system("pip install gradio==4.44.1")
|
4 |
|
@@ -11,9 +12,6 @@ from PIL import Image
|
|
11 |
from transformers import AutoModelForCausalLM, AutoProcessor
|
12 |
import re
|
13 |
import random
|
14 |
-
from vlms.magma import MagmaAgent
|
15 |
-
from vlms.llavaov import LLaVAOVAgent
|
16 |
-
from vlms.qwen2vl import Qwen2VLAgent
|
17 |
|
18 |
pygame.mixer.quit() # Disable sound
|
19 |
|
@@ -36,9 +34,12 @@ STATIC = (0, 0)
|
|
36 |
|
37 |
ACTIONS = ["up", "down", "left", "right", "static"]
|
38 |
|
|
|
39 |
dtype = torch.bfloat16
|
40 |
-
|
41 |
-
|
|
|
|
|
42 |
|
43 |
magma_img = pygame.image.load("./assets/images/magma_game_thin.png")
|
44 |
magma_img = pygame.transform.scale(magma_img, (GRID_SIZE, GRID_SIZE))
|
@@ -76,14 +77,14 @@ class MagmaFindGPU:
|
|
76 |
elif action == "static":
|
77 |
self.direction = STATIC
|
78 |
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
|
83 |
new_head = (self.snake[0][0] + self.direction[0], self.snake[0][1] + self.direction[1])
|
84 |
|
85 |
if new_head[0] < 0 or new_head[1] < 0 or new_head[0] >= WIDTH // GRID_SIZE or new_head[1] >= HEIGHT // GRID_SIZE:
|
86 |
-
|
87 |
return self.render(), self.score
|
88 |
|
89 |
self.snake = [new_head] # Keep only the head (single block snake)
|
@@ -132,15 +133,32 @@ class MagmaFindGPU:
|
|
132 |
def get_state(self):
|
133 |
return self.render()
|
134 |
|
135 |
-
|
136 |
-
game_2 = MagmaFindGPU()
|
137 |
|
138 |
-
def play_game(
|
139 |
state, state_som = game.get_state()
|
140 |
pil_img = Image.fromarray(state_som)
|
141 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
# extract mark id fro action use re
|
143 |
-
# print(agent.__class__.__name__, action)
|
144 |
match = re.search(r'\d+', action)
|
145 |
if match:
|
146 |
action = match.group(0)
|
@@ -153,46 +171,42 @@ def play_game(game, agent):
|
|
153 |
action = random.choice(ACTIONS[:-1])
|
154 |
|
155 |
img, score = game.step(action)
|
156 |
-
|
157 |
-
|
158 |
-
def play_game_1():
|
159 |
-
return play_game(game_1, agent_1)
|
160 |
|
161 |
-
def
|
162 |
-
|
|
|
163 |
|
164 |
-
def reset_games():
|
165 |
-
game_1.reset()
|
166 |
-
game_2.reset()
|
167 |
-
return game_1.render()[0], "Score: 0", game_2.render()[0], "Score: 0"
|
168 |
MARKDOWN = """
|
169 |
<div align="center">
|
170 |
<h2>Magma: A Foundation Model for Multimodal AI Agents</h2>
|
171 |
|
172 |
\[[arXiv Paper](https://www.arxiv.org/pdf/2502.13130)\] \[[Project Page](https://microsoft.github.io/Magma/)\] \[[Github Repo](https://github.com/microsoft/Magma)\] \[[Hugging Face Model](https://huggingface.co/microsoft/Magma-8B)\]
|
173 |
|
174 |
-
<h3>Magma Arena: A battle between two agents to collect the green blocks by automatically moving up, down, left and right.</h3>
|
175 |
-
|
176 |
This demo is powered by [Gradio](https://gradio.app/).
|
177 |
|
|
|
|
|
178 |
</div>
|
179 |
"""
|
180 |
|
181 |
with gr.Blocks() as interface:
|
182 |
gr.Markdown(MARKDOWN)
|
183 |
with gr.Row():
|
|
|
184 |
with gr.Column():
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
start_btn.click(fn=
|
197 |
-
|
198 |
-
interface.launch(
|
|
|
1 |
import os
|
2 |
+
# add a command for installing flash-attn
|
3 |
os.system('pip install flash-attn --no-build-isolation')
|
4 |
os.system("pip install gradio==4.44.1")
|
5 |
|
|
|
12 |
from transformers import AutoModelForCausalLM, AutoProcessor
|
13 |
import re
|
14 |
import random
|
|
|
|
|
|
|
15 |
|
16 |
pygame.mixer.quit() # Disable sound
|
17 |
|
|
|
34 |
|
35 |
ACTIONS = ["up", "down", "left", "right", "static"]
|
36 |
|
37 |
+
# Load AI Model
|
38 |
dtype = torch.bfloat16
|
39 |
+
magma_model_id = "microsoft/Magma-8B"
|
40 |
+
magam_model = AutoModelForCausalLM.from_pretrained(magma_model_id, trust_remote_code=True, torch_dtype=dtype)
|
41 |
+
magma_processor = AutoProcessor.from_pretrained(magma_model_id, trust_remote_code=True)
|
42 |
+
magam_model.to("cuda")
|
43 |
|
44 |
magma_img = pygame.image.load("./assets/images/magma_game_thin.png")
|
45 |
magma_img = pygame.transform.scale(magma_img, (GRID_SIZE, GRID_SIZE))
|
|
|
77 |
elif action == "static":
|
78 |
self.direction = STATIC
|
79 |
|
80 |
+
if self.game_over:
|
81 |
+
self.reset()
|
82 |
+
return self.render(), self.score
|
83 |
|
84 |
new_head = (self.snake[0][0] + self.direction[0], self.snake[0][1] + self.direction[1])
|
85 |
|
86 |
if new_head[0] < 0 or new_head[1] < 0 or new_head[0] >= WIDTH // GRID_SIZE or new_head[1] >= HEIGHT // GRID_SIZE:
|
87 |
+
self.game_over = True
|
88 |
return self.render(), self.score
|
89 |
|
90 |
self.snake = [new_head] # Keep only the head (single block snake)
|
|
|
133 |
def get_state(self):
|
134 |
return self.render()
|
135 |
|
136 |
+
game = MagmaFindGPU()
|
|
|
137 |
|
138 |
+
def play_game():
|
139 |
state, state_som = game.get_state()
|
140 |
pil_img = Image.fromarray(state_som)
|
141 |
+
convs = [
|
142 |
+
{"role": "system", "content": "You are an agent that can see, talk, and act. Avoid hitting the wall."},
|
143 |
+
{"role": "user", "content": "<image_start><image><image_end>\nWhich mark is closer to green block? Answer with a single number."},
|
144 |
+
]
|
145 |
+
prompt = magma_processor.tokenizer.apply_chat_template(convs, tokenize=False, add_generation_prompt=True)
|
146 |
+
inputs = magma_processor(images=[pil_img], texts=prompt, return_tensors="pt")
|
147 |
+
inputs['pixel_values'] = inputs['pixel_values'].unsqueeze(0)
|
148 |
+
inputs['image_sizes'] = inputs['image_sizes'].unsqueeze(0)
|
149 |
+
inputs = inputs.to("cuda").to(dtype)
|
150 |
+
generation_args = {
|
151 |
+
"max_new_tokens": 10,
|
152 |
+
"temperature": 0.3,
|
153 |
+
"do_sample": True,
|
154 |
+
"use_cache": True,
|
155 |
+
"num_beams": 1,
|
156 |
+
}
|
157 |
+
with torch.inference_mode():
|
158 |
+
generate_ids = magam_model.generate(**inputs, **generation_args)
|
159 |
+
generate_ids = generate_ids[:, inputs["input_ids"].shape[-1] :]
|
160 |
+
action = magma_processor.decode(generate_ids[0], skip_special_tokens=True).strip()
|
161 |
# extract mark id fro action use re
|
|
|
162 |
match = re.search(r'\d+', action)
|
163 |
if match:
|
164 |
action = match.group(0)
|
|
|
171 |
action = random.choice(ACTIONS[:-1])
|
172 |
|
173 |
img, score = game.step(action)
|
174 |
+
img = img[0]
|
175 |
+
return img, f"Score: {score}"
|
|
|
|
|
176 |
|
177 |
+
def reset_game():
|
178 |
+
game.reset()
|
179 |
+
return game.render()[0], "Score: 0"
|
180 |
|
|
|
|
|
|
|
|
|
181 |
MARKDOWN = """
|
182 |
<div align="center">
|
183 |
<h2>Magma: A Foundation Model for Multimodal AI Agents</h2>
|
184 |
|
185 |
\[[arXiv Paper](https://www.arxiv.org/pdf/2502.13130)\] \[[Project Page](https://microsoft.github.io/Magma/)\] \[[Github Repo](https://github.com/microsoft/Magma)\] \[[Hugging Face Model](https://huggingface.co/microsoft/Magma-8B)\]
|
186 |
|
|
|
|
|
187 |
This demo is powered by [Gradio](https://gradio.app/).
|
188 |
|
189 |
+
<b>Goal: Collects the green blocks by automatically moving up, down, left and right.</b>
|
190 |
+
|
191 |
</div>
|
192 |
"""
|
193 |
|
194 |
with gr.Blocks() as interface:
|
195 |
gr.Markdown(MARKDOWN)
|
196 |
with gr.Row():
|
197 |
+
image_output = gr.Image(label="Game Screen")
|
198 |
with gr.Column():
|
199 |
+
score_output = gr.Text(label="Score", elem_classes="large-text")
|
200 |
+
gr.HTML("""
|
201 |
+
<style>
|
202 |
+
.large-text textarea {
|
203 |
+
font-size: 24px !important;
|
204 |
+
}
|
205 |
+
</style>
|
206 |
+
""")
|
207 |
+
start_btn = gr.Button("Start/Reset Game")
|
208 |
+
|
209 |
+
interface.load(fn=play_game, every=1, inputs=[], outputs=[image_output, score_output])
|
210 |
+
start_btn.click(fn=reset_game, inputs=[], outputs=[image_output, score_output])
|
211 |
+
|
212 |
+
interface.launch()
|
vlms/__pycache__/llavanext.cpython-310.pyc
DELETED
Binary file (1.79 kB)
|
|
vlms/__pycache__/llavaov.cpython-310.pyc
DELETED
Binary file (1.81 kB)
|
|
vlms/__pycache__/magma.cpython-310.pyc
DELETED
Binary file (1.93 kB)
|
|
vlms/__pycache__/qwen25vl.cpython-310.pyc
DELETED
Binary file (2.05 kB)
|
|
vlms/__pycache__/qwen2vl.cpython-310.pyc
DELETED
Binary file (2.01 kB)
|
|