Spaces:
Sleeping
Sleeping
Upload 11 files
Browse files- LICENSE +21 -0
- README.md +1 -13
- app.py +26 -0
- grip_env/environment.py +172 -0
- grip_env/layout.py +157 -0
- grip_env/pieces.py +104 -0
- grip_env/test_env.py +34 -0
- requirements.txt +17 -0
- src/boards.py +43 -0
- src/test_easy.json +0 -0
- src/utils.py +0 -0
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2024 kushal-10
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,13 +1 @@
|
|
1 |
-
|
2 |
-
title: Pento LLaVA
|
3 |
-
emoji: 🤖
|
4 |
-
colorFrom: purple
|
5 |
-
colorTo: gray
|
6 |
-
sdk: gradio
|
7 |
-
sdk_version: 5.5.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
short_description: 'Demo Gameplay for Pento-LLaVA '
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
# pento-llava
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
|
5 |
+
from src.boards import GenerateBoard
|
6 |
+
|
7 |
+
TITLE = """<h1 align="center" id="space-title"> Pento-LLaVA 🤖🎯🎮</h1>"""
|
8 |
+
|
9 |
+
initial_board_image, target_positions, info = GenerateBoard('easy', 18).setup_initial_board()
|
10 |
+
|
11 |
+
# Convert initial_board_image to a matplotlib figure
|
12 |
+
fig, ax = plt.subplots()
|
13 |
+
ax.imshow(initial_board_image)
|
14 |
+
ax.axis('off')
|
15 |
+
|
16 |
+
pento_llava_app = gr.Blocks()
|
17 |
+
|
18 |
+
with pento_llava_app:
|
19 |
+
|
20 |
+
gr.HTML(TITLE)
|
21 |
+
gr.Plot(fig)
|
22 |
+
|
23 |
+
pento_llava_app.load()
|
24 |
+
|
25 |
+
pento_llava_app.queue()
|
26 |
+
pento_llava_app.launch()
|
grip_env/environment.py
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Working Grid environment
|
2 |
+
|
3 |
+
import gym
|
4 |
+
from gym import spaces
|
5 |
+
import pygame
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
|
9 |
+
class GridWorldEnv(gym.Env):
|
10 |
+
|
11 |
+
metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 2}
|
12 |
+
|
13 |
+
def __init__(self, render_mode=None, size=5, grid_info=None, agent_pos=None, target_pos=None):
|
14 |
+
self.size = size # The size of the square grid
|
15 |
+
self.window_size = 500 # The size of the PyGame window
|
16 |
+
self.grid_info = grid_info
|
17 |
+
self.agent_pos = agent_pos
|
18 |
+
self.target_pos = target_pos
|
19 |
+
|
20 |
+
# Observations are dictionaries with the agent's and the target's location.
|
21 |
+
# Each location is encoded as an element of {0, ..., `size`}^2, i.e. MultiDiscrete([size, size]).
|
22 |
+
self.observation_space = spaces.Dict(
|
23 |
+
{
|
24 |
+
"agent": spaces.Box(0, size - 1, shape=(2,), dtype=int),
|
25 |
+
"target": spaces.Box(0, size - 1, shape=(2,), dtype=int),
|
26 |
+
}
|
27 |
+
)
|
28 |
+
|
29 |
+
# We have 5 actions, corresponding to "right", "up", "left", "down", "wait" and "grip".
|
30 |
+
self.action_space = spaces.Discrete(6)
|
31 |
+
|
32 |
+
"""
|
33 |
+
The following dictionary maps abstract actions from `self.action_space` to
|
34 |
+
the direction we will walk in if that action is taken.
|
35 |
+
I.e. 0 corresponds to "right", 1 to "up" etc.
|
36 |
+
"""
|
37 |
+
self._action_to_direction = {
|
38 |
+
0: np.array([1, 0]), # Right
|
39 |
+
1: np.array([0, 1]), # Down
|
40 |
+
2: np.array([-1, 0]), # Left
|
41 |
+
3: np.array([0, -1]), # Up
|
42 |
+
4: np.array([0, 0]),
|
43 |
+
5: np.array([1, 1])
|
44 |
+
}
|
45 |
+
|
46 |
+
assert render_mode is None or render_mode in self.metadata["render_modes"]
|
47 |
+
self.render_mode = render_mode
|
48 |
+
|
49 |
+
"""
|
50 |
+
If human-rendering is used, `self.window` will be a reference
|
51 |
+
to the window that we draw to. `self.clock` will be a clock that is used
|
52 |
+
to ensure that the environment is rendered at the correct framerate in
|
53 |
+
human-mode. They will remain `None` until human-mode is used for the
|
54 |
+
first time.
|
55 |
+
"""
|
56 |
+
self.window = None
|
57 |
+
self.clock = None
|
58 |
+
|
59 |
+
def _get_obs(self):
|
60 |
+
return {"agent": self._agent_location, "target": self._target_location}
|
61 |
+
|
62 |
+
def _get_info(self):
|
63 |
+
return {"distance": np.linalg.norm(self._agent_location - self._target_location, ord=1)}
|
64 |
+
|
65 |
+
def reset(self, seed=None, options=None):
|
66 |
+
# We need the following line to seed self.np_random
|
67 |
+
# super().reset(seed=seed)
|
68 |
+
|
69 |
+
self._agent_location = self.agent_pos
|
70 |
+
self._target_location = self.target_pos
|
71 |
+
|
72 |
+
observation = self._get_obs()
|
73 |
+
info = self._get_info()
|
74 |
+
|
75 |
+
if self.render_mode == "human":
|
76 |
+
self._render_frame()
|
77 |
+
|
78 |
+
return observation, info
|
79 |
+
|
80 |
+
def step(self, action):
|
81 |
+
# Map the action (element of {0,1,2,3}) to the direction we walk in
|
82 |
+
direction = self._action_to_direction[action]
|
83 |
+
# We use `np.clip` to make sure we don't leave the grid
|
84 |
+
self._agent_location = np.clip(
|
85 |
+
self._agent_location + direction, 0, self.size - 1
|
86 |
+
)
|
87 |
+
# An episode is done if the agent has reached the target
|
88 |
+
# terminated = np.array_equal(self._agent_location, self._target_location)
|
89 |
+
terminated = 0
|
90 |
+
for i in range(len(self.target_pos)):
|
91 |
+
if np.array_equal(self.agent_pos, self.target_pos[i]):
|
92 |
+
terminated = 1
|
93 |
+
reward = 1 if terminated else 0 # Binary sparse rewards
|
94 |
+
observation = self._get_obs()
|
95 |
+
info = self._get_info()
|
96 |
+
|
97 |
+
if self.render_mode == "human":
|
98 |
+
self._render_frame()
|
99 |
+
|
100 |
+
return observation, reward, terminated, info
|
101 |
+
|
102 |
+
def render(self):
|
103 |
+
if self.render_mode == "rgb_array":
|
104 |
+
return self._render_frame()
|
105 |
+
|
106 |
+
def _draw_rect(self, canvas, color, pos, pix_square_size):
|
107 |
+
# Ensure pos is a tuple of integers
|
108 |
+
if not isinstance(pos, (tuple, list)):
|
109 |
+
pos = list(pos)
|
110 |
+
|
111 |
+
|
112 |
+
pygame.draw.rect(
|
113 |
+
canvas,
|
114 |
+
color,
|
115 |
+
pygame.Rect(
|
116 |
+
pix_square_size * np.array(pos), # Ensure pos is multiplied correctly
|
117 |
+
(pix_square_size, pix_square_size),
|
118 |
+
),
|
119 |
+
)
|
120 |
+
|
121 |
+
def _render_frame(self):
|
122 |
+
if self.window is None and self.render_mode == "human":
|
123 |
+
pygame.init()
|
124 |
+
pygame.display.init()
|
125 |
+
self.window = pygame.display.set_mode((self.window_size, self.window_size))
|
126 |
+
if self.clock is None and self.render_mode == "human":
|
127 |
+
self.clock = pygame.time.Clock()
|
128 |
+
|
129 |
+
canvas = pygame.Surface((self.window_size, self.window_size))
|
130 |
+
canvas.fill((255, 255, 255))
|
131 |
+
pix_square_size = int(
|
132 |
+
self.window_size / self.size
|
133 |
+
) # The size of a single grid square in pixels
|
134 |
+
|
135 |
+
# Draw Pieces
|
136 |
+
for piece in self.grid_info:
|
137 |
+
for pos in piece["piece_grids"]:
|
138 |
+
self._draw_rect(canvas, piece["piece_colour"], pos, pix_square_size)
|
139 |
+
|
140 |
+
# Now we draw the agent
|
141 |
+
pygame.draw.circle(
|
142 |
+
canvas,
|
143 |
+
(0, 0, 0),
|
144 |
+
(self._agent_location + 0.5) * pix_square_size,
|
145 |
+
pix_square_size / 3,
|
146 |
+
)
|
147 |
+
|
148 |
+
if self.render_mode == "human":
|
149 |
+
# The following line copies our drawings from `canvas` to the visible window
|
150 |
+
self.window.blit(canvas, canvas.get_rect())
|
151 |
+
pygame.event.pump()
|
152 |
+
pygame.display.update()
|
153 |
+
|
154 |
+
# We need to ensure that human-rendering occurs at the predefined framerate.
|
155 |
+
# The following line will automatically add a delay to keep the framerate stable.
|
156 |
+
self.clock.tick(self.metadata["render_fps"])
|
157 |
+
else: # rgb_array
|
158 |
+
return np.transpose(
|
159 |
+
np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
|
160 |
+
)
|
161 |
+
|
162 |
+
|
163 |
+
def close(self):
|
164 |
+
if self.window is not None:
|
165 |
+
pygame.display.quit()
|
166 |
+
pygame.quit()
|
167 |
+
|
168 |
+
if __name__ == '__main__':
|
169 |
+
env = GridWorldEnv(size=20)
|
170 |
+
env.reset()
|
171 |
+
env.render()
|
172 |
+
env.close()
|
grip_env/layout.py
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
from grip_env.pieces import PentominoPiece, COLOURS, COLOUR_NAMES, PIECE_NAMES
|
4 |
+
from utils import layout_utils
|
5 |
+
import math
|
6 |
+
|
7 |
+
# Setting the layout fixed for now i.e start positions of each piece are at every 5x5 grid
|
8 |
+
class BoardLayout():
|
9 |
+
'''
|
10 |
+
This class is used to generate a random layout of pentomino pieces on a board.
|
11 |
+
Args:
|
12 |
+
board_size: The number of grids in one dimension ofthe Pentomino Board
|
13 |
+
num_pieces: The number of pieces to be placed on the board, including the target piece
|
14 |
+
shapes: A list of the pentomino shapes to be selected from
|
15 |
+
colours: A list of the pentomino colours to be selected from
|
16 |
+
seed: The seed for the random number generator
|
17 |
+
'''
|
18 |
+
def __init__(self, board_size: int, num_pieces: int, shapes: np.array, colours: np.array, seed: int):
|
19 |
+
self.board_size = board_size
|
20 |
+
self.num_pieces = num_pieces
|
21 |
+
self.shapes = shapes
|
22 |
+
self.colours = colours
|
23 |
+
self.mapped_regions = layout_utils.map_regions(self.board_size)
|
24 |
+
np.random.seed(seed)
|
25 |
+
|
26 |
+
def set_start_positions(self) -> np.array:
|
27 |
+
'''
|
28 |
+
Get start positions of everything on the board
|
29 |
+
Args:
|
30 |
+
regions: A list defining the regions where the piece will be spawned (['top', 'top left', 'right',...])
|
31 |
+
If None, then use all possible regions including the center grid
|
32 |
+
Returns:
|
33 |
+
all_start_positions - [[ax, ay], [p1x, p1y], [p2x, p2y], ....]
|
34 |
+
The starting positions of agents and all the pieces (top left corner of 5x5 grid)
|
35 |
+
'''
|
36 |
+
|
37 |
+
# # Set the starting position at the center of the grid where the gripper will be spawned
|
38 |
+
# center_sq = math.ceil((self.board_size)/2)
|
39 |
+
# agent_start_pos = np.array([center_sq, center_sq], dtype=np.int64) # Get start position of agent
|
40 |
+
# # Use this location to check for overlaps for new pieces generated
|
41 |
+
# all_start_positions = np.array([agent_start_pos]) # Initialize with agent start position, so atleast one step is taken
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
max_tries = 100 # Maximum number of tries
|
46 |
+
tries = 0 # Counter for tries
|
47 |
+
flag = True
|
48 |
+
while flag:
|
49 |
+
all_start_positions = [] # Initialize with empty list - Piece can be spawned on center gird as well, overlapping with agent
|
50 |
+
|
51 |
+
# Select a random start position for each piece
|
52 |
+
tries += 1 # Increment the try counter
|
53 |
+
if tries > max_tries: # Check if max tries exceeded
|
54 |
+
print("Max tries exceeded - Restart Board Layout - Try increasing the board size or reducing the number of piecess")
|
55 |
+
break # Exit the main while loop
|
56 |
+
|
57 |
+
spawn_choices = [[x, y] for x in range(self.board_size) for y in range(self.board_size)] # Get possible spawn locations across the board
|
58 |
+
for i in range(self.num_pieces):
|
59 |
+
random_choice = np.random.randint(0, len(spawn_choices)) # Select a random index
|
60 |
+
piece_start_pos = spawn_choices[random_choice] # Random grid mark in the specified region
|
61 |
+
|
62 |
+
# Draw randomly, until a valid value is found
|
63 |
+
# This ensures no overlaps between pieces and center grid (central 3x3 will always be empty)
|
64 |
+
while not layout_utils.valid(self.board_size, piece_start_pos, all_start_positions):
|
65 |
+
# Remove invalid starting position and select a start position again
|
66 |
+
spawn_choices.remove(piece_start_pos)
|
67 |
+
if not spawn_choices: # Check if all positions are exhausted
|
68 |
+
flag = False
|
69 |
+
break # Exit the inner while loop
|
70 |
+
random_choice = np.random.randint(0, len(spawn_choices))
|
71 |
+
piece_start_pos = spawn_choices[random_choice]
|
72 |
+
|
73 |
+
all_start_positions.append(piece_start_pos)
|
74 |
+
if not flag:
|
75 |
+
break
|
76 |
+
|
77 |
+
|
78 |
+
# The search space is not exhausted and all pieces have been spawned successfully
|
79 |
+
if flag:
|
80 |
+
break
|
81 |
+
# else try again
|
82 |
+
|
83 |
+
assert len(all_start_positions) == self.num_pieces, "Number of pieces spawned is not equal to the number of pieces specified"
|
84 |
+
return all_start_positions
|
85 |
+
|
86 |
+
def set_board_layout(self, target_shape=None, target_colour=None, level=None):
|
87 |
+
# Get all start positions for all pieces on the board
|
88 |
+
all_start_positions = self.set_start_positions()
|
89 |
+
|
90 |
+
# Set agent start position at the center of the board
|
91 |
+
center_sq = math.ceil((self.board_size)/2)
|
92 |
+
agent_start_pos = np.array([center_sq, center_sq], dtype=np.int64) # Get start position of agent
|
93 |
+
|
94 |
+
grid_info = []
|
95 |
+
available_shapes = list(self.shapes) # List of available shapes
|
96 |
+
available_colours = list(self.colours) # List of available colours
|
97 |
+
|
98 |
+
for i in range(len(all_start_positions)):
|
99 |
+
piece_position = all_start_positions[i]
|
100 |
+
|
101 |
+
# Select a random shape from the available shapes
|
102 |
+
piece_shape = np.random.choice(available_shapes)
|
103 |
+
# Select a random colour from the available colours
|
104 |
+
colour_name = np.random.choice(available_colours)
|
105 |
+
|
106 |
+
# Get target_pos
|
107 |
+
if i == 0:
|
108 |
+
target_pos = piece_position
|
109 |
+
if target_shape:
|
110 |
+
piece_shape = target_shape # Overwrite target shape if specified
|
111 |
+
if target_colour:
|
112 |
+
colour_name = target_colour # Overwrite target colour if specified
|
113 |
+
|
114 |
+
if level == "easy" or level == "sample":
|
115 |
+
available_shapes.remove(piece_shape) # Remove the selected shape from the available shapes
|
116 |
+
available_colours.remove(colour_name) # Remove the selected colour from the available colours
|
117 |
+
piece_rotation = 0 # No rotation
|
118 |
+
elif level == "medium":
|
119 |
+
# Introduce rotation for medium level
|
120 |
+
available_shapes.remove(piece_shape) # Remove the selected shape from the available shapes
|
121 |
+
available_colours.remove(colour_name) # Remove the selected colour from the available colours
|
122 |
+
piece_rotation = np.random.randint(0, 4) # Random rotation
|
123 |
+
else:
|
124 |
+
# Hard level, allow same shape or colour repitition, based on randomness
|
125 |
+
random_value = np.random.randint(0, 2)
|
126 |
+
if random_value:
|
127 |
+
available_colours.remove(colour_name) # Remove the selected colour from the available colours
|
128 |
+
else:
|
129 |
+
available_shapes.remove(piece_shape) # Remove the selected shape from the available shapes
|
130 |
+
|
131 |
+
piece_rotation = np.random.randint(0, 4) # Random rotation
|
132 |
+
|
133 |
+
piece = PentominoPiece(piece_shape, piece_rotation, piece_position)
|
134 |
+
piece_grids = piece.get_grid_locations()
|
135 |
+
piece_region = layout_utils.get_region(piece_position, self.mapped_regions)
|
136 |
+
piece_data = {
|
137 |
+
"piece_grids": piece_grids,
|
138 |
+
"piece_colour": colour_name,
|
139 |
+
"colour_value": COLOURS[colour_name],
|
140 |
+
"start_position": piece_position,
|
141 |
+
"piece_shape": piece_shape,
|
142 |
+
"piece_rotation": piece_rotation,
|
143 |
+
"piece_region": piece_region
|
144 |
+
}
|
145 |
+
|
146 |
+
grid_info.append(piece_data)
|
147 |
+
|
148 |
+
return agent_start_pos, target_pos, grid_info
|
149 |
+
|
150 |
+
|
151 |
+
if __name__ == '__main__':
|
152 |
+
board1 = BoardLayout(board_size=18, num_pieces=4, shapes=PIECE_NAMES, colours=COLOUR_NAMES, seed=640)
|
153 |
+
agent_start_pos, target_pos, info = board1.set_board_layout(
|
154 |
+
target_shape = 'P',
|
155 |
+
target_colour = 'red',
|
156 |
+
level = 'easy')
|
157 |
+
|
grip_env/pieces.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Define the pentomino pieces and handle their rotations
|
2 |
+
# Output should be an array/list of 1x2 blocks to be marked in the pentomino grid environment
|
3 |
+
|
4 |
+
import numpy as np
|
5 |
+
|
6 |
+
# Piece definitions for 0 rotation
|
7 |
+
# Pieces are P, T, U, W, X, Z
|
8 |
+
|
9 |
+
# Shift from 5x5 to 3x3 grid
|
10 |
+
P = [
|
11 |
+
[0, 1, 1],
|
12 |
+
[0, 1, 1],
|
13 |
+
[0, 1, 0]]
|
14 |
+
|
15 |
+
T = [
|
16 |
+
[1, 1, 1],
|
17 |
+
[0, 1, 0],
|
18 |
+
[0, 1, 0]]
|
19 |
+
|
20 |
+
U = [
|
21 |
+
[1, 0, 1],
|
22 |
+
[1, 1, 1],
|
23 |
+
[0, 0, 0]]
|
24 |
+
|
25 |
+
W = [
|
26 |
+
[1, 0, 0],
|
27 |
+
[1, 1, 0],
|
28 |
+
[0, 1, 1]]
|
29 |
+
|
30 |
+
X = [
|
31 |
+
[0, 1, 0],
|
32 |
+
[1, 1, 1],
|
33 |
+
[0, 1, 0]]
|
34 |
+
|
35 |
+
Z = [
|
36 |
+
[1, 1, 0],
|
37 |
+
[0, 1, 0],
|
38 |
+
[0, 1, 1]]
|
39 |
+
|
40 |
+
# Create a dictionary item for the pieces
|
41 |
+
pieces_dict = {
|
42 |
+
'P': P,
|
43 |
+
'T': T,
|
44 |
+
'U': U,
|
45 |
+
'W': W,
|
46 |
+
'X': X,
|
47 |
+
'Z': Z
|
48 |
+
}
|
49 |
+
|
50 |
+
# Define a list of piece names from above dict - to select a piece randomly
|
51 |
+
PIECE_NAMES = ['P', 'T', 'U', 'W', 'X', 'Z']
|
52 |
+
|
53 |
+
# Define colours
|
54 |
+
# Define a dict of colours in RGB format- str: tuple
|
55 |
+
|
56 |
+
COLOURS = {
|
57 |
+
'red': (255, 0, 0),
|
58 |
+
'blue': (0, 0, 255),
|
59 |
+
'green': (0, 255, 0),
|
60 |
+
'yellow': (255, 255, 0),
|
61 |
+
'cyan': (0, 0, 255),
|
62 |
+
'magenta': (255, 0, 255)
|
63 |
+
}
|
64 |
+
|
65 |
+
# Define a list of colour names from above dict - to select a colour randomly
|
66 |
+
COLOUR_NAMES = ['red', 'blue', 'green', 'yellow', 'cyan', 'magenta']
|
67 |
+
|
68 |
+
class PentominoPiece():
|
69 |
+
'''
|
70 |
+
Intialize a pentomino piece with a symbol, rotation and position
|
71 |
+
Args:
|
72 |
+
symbol: A single letter string correesponding to the piece shape
|
73 |
+
rotation: An integer r in {0, 1, 2, 3} to define the angle of rotation corresponding to r*pi/2
|
74 |
+
position: The block in the Pentomino Board Grid coressponding to the center of the Piece Grid
|
75 |
+
'''
|
76 |
+
def __init__(self, symbol: str, rotation: int, position: np.array):
|
77 |
+
self.symbol = symbol
|
78 |
+
self.rotation = rotation
|
79 |
+
self.position = [position[0]-1, position[1]-1] # Convert to 0-indexing - top-left corner of the piece
|
80 |
+
|
81 |
+
def get_grid_locations(self) -> np.array:
|
82 |
+
'''
|
83 |
+
Get the locations of blocks to mark as a piece in the Pentomino Board Grid
|
84 |
+
Returns:
|
85 |
+
grid_marks: An np.array containing multiple vectors of length 2 for a single piece
|
86 |
+
(co-ordinates to be marked in the Pentomino Board Grid)
|
87 |
+
'''
|
88 |
+
|
89 |
+
default_piece_grid = pieces_dict[self.symbol]
|
90 |
+
rotated_piece_grid = np.rot90(default_piece_grid, self.rotation)
|
91 |
+
|
92 |
+
# Get the grid marks for the piece accoding to the Gym coordinate system = Inverted Y - axis
|
93 |
+
grid_marks = []
|
94 |
+
for i in range(rotated_piece_grid.shape[0]):
|
95 |
+
for j in range(rotated_piece_grid.shape[1]):
|
96 |
+
if rotated_piece_grid[i][j] == 1:
|
97 |
+
grid_marks.append(self.position + np.array([j, i]))
|
98 |
+
grid_marks = np.array(grid_marks)
|
99 |
+
|
100 |
+
return grid_marks
|
101 |
+
|
102 |
+
if __name__ == '__main__':
|
103 |
+
piece1 = PentominoPiece('P', 0, np.array([1, 1]))
|
104 |
+
print(piece1.get_grid_locations())
|
grip_env/test_env.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from grip_env.environment import GridWorldEnv
|
2 |
+
from grip_env.layout import BoardLayout
|
3 |
+
from grip_env.pieces import PIECE_NAMES, COLOUR_NAMES
|
4 |
+
|
5 |
+
if __name__ == '__main__':
|
6 |
+
board1 = BoardLayout(board_size=18, num_pieces=4, shapes=PIECE_NAMES, colours=COLOUR_NAMES, seed=640)
|
7 |
+
agent_start_pos, target_pos, info = board1.set_board_layout(
|
8 |
+
target_shape = 'P',
|
9 |
+
target_colour = 'red',
|
10 |
+
level = 'easy')
|
11 |
+
|
12 |
+
env = GridWorldEnv(render_mode="human", size=18, grid_info=info, agent_pos=agent_start_pos, target_pos=target_pos)
|
13 |
+
env.reset()
|
14 |
+
env.render()
|
15 |
+
for i in range(1000):
|
16 |
+
# RIGHT, DOWN, LEFT, UP
|
17 |
+
env.step(0)
|
18 |
+
env.render()
|
19 |
+
env.step(0)
|
20 |
+
env.render()
|
21 |
+
env.step(0)
|
22 |
+
env.render()
|
23 |
+
env.step(1)
|
24 |
+
env.render()
|
25 |
+
env.step(1)
|
26 |
+
env.render()
|
27 |
+
env.step(2)
|
28 |
+
env.render()
|
29 |
+
env.step(2)
|
30 |
+
env.render()
|
31 |
+
env.step(3)
|
32 |
+
env.render()
|
33 |
+
env.step(3)
|
34 |
+
env.render()
|
requirements.txt
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
lightning
|
3 |
+
sentence-transformers
|
4 |
+
tqdm
|
5 |
+
adapters
|
6 |
+
transformers
|
7 |
+
gym==0.26.2
|
8 |
+
rich==13.4.2
|
9 |
+
pygame==2.5.2
|
10 |
+
progressbar==2.5
|
11 |
+
protobuf==5.27.0
|
12 |
+
peft==0.11.1
|
13 |
+
accelerate==0.31.0
|
14 |
+
bitsandbytes
|
15 |
+
datasets==3.0.1
|
16 |
+
gradio==5.5.0
|
17 |
+
matplotlib==3.9.2
|
src/boards.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PIL import Image
|
2 |
+
from grip_env.environment import GridWorldEnv
|
3 |
+
import os
|
4 |
+
import json
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
|
8 |
+
class GenerateBoard():
|
9 |
+
|
10 |
+
def __init__(self, level: str, board_size: int):
|
11 |
+
self.level = level
|
12 |
+
self.board_size = board_size
|
13 |
+
|
14 |
+
metadata_path = os.path.join('src', f'test_{level}.json')
|
15 |
+
|
16 |
+
with open(metadata_path, 'r') as f:
|
17 |
+
metadata = json.load(f)
|
18 |
+
|
19 |
+
num_boards = len(metadata)
|
20 |
+
random_board_num = np.random.randint(0, num_boards)
|
21 |
+
self.board_data = metadata[random_board_num]
|
22 |
+
|
23 |
+
def setup_initial_board(self):
|
24 |
+
|
25 |
+
metadata_obj = self.board_data[-1]
|
26 |
+
default_start_pos = np.array(metadata_obj['agent_start_pos'])
|
27 |
+
default_target_pos = np.array(metadata_obj['target_pos'])
|
28 |
+
|
29 |
+
info = metadata_obj['info']
|
30 |
+
target_options = []
|
31 |
+
for piece in info:
|
32 |
+
target = f"{piece['piece_colour']} {piece['piece_shape']} at {piece['piece_region']}"
|
33 |
+
target_options.append(target)
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
env = GridWorldEnv(render_mode="rgb_array", size=self.board_size, grid_info=info, agent_pos=default_start_pos, target_pos=default_target_pos)
|
38 |
+
env.reset()
|
39 |
+
image = env.render()
|
40 |
+
image = Image.fromarray(image)
|
41 |
+
|
42 |
+
return image, target_options, info
|
43 |
+
|
src/test_easy.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
src/utils.py
ADDED
File without changes
|