Koshti10 commited on
Commit
9aed787
·
verified ·
1 Parent(s): 53f05d8

Upload 11 files

Browse files
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 kushal-10
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,13 +1 @@
1
- ---
2
- title: Pento LLaVA
3
- emoji: 🤖
4
- colorFrom: purple
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.5.0
8
- app_file: app.py
9
- pinned: false
10
- short_description: 'Demo Gameplay for Pento-LLaVA '
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # pento-llava
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+
5
+ from src.boards import GenerateBoard
6
+
7
+ TITLE = """<h1 align="center" id="space-title"> Pento-LLaVA 🤖🎯🎮</h1>"""
8
+
9
+ initial_board_image, target_positions, info = GenerateBoard('easy', 18).setup_initial_board()
10
+
11
+ # Convert initial_board_image to a matplotlib figure
12
+ fig, ax = plt.subplots()
13
+ ax.imshow(initial_board_image)
14
+ ax.axis('off')
15
+
16
+ pento_llava_app = gr.Blocks()
17
+
18
+ with pento_llava_app:
19
+
20
+ gr.HTML(TITLE)
21
+ gr.Plot(fig)
22
+
23
+ pento_llava_app.load()
24
+
25
+ pento_llava_app.queue()
26
+ pento_llava_app.launch()
grip_env/environment.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Working Grid environment
2
+
3
+ import gym
4
+ from gym import spaces
5
+ import pygame
6
+ import numpy as np
7
+
8
+
9
+ class GridWorldEnv(gym.Env):
10
+
11
+ metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 2}
12
+
13
+ def __init__(self, render_mode=None, size=5, grid_info=None, agent_pos=None, target_pos=None):
14
+ self.size = size # The size of the square grid
15
+ self.window_size = 500 # The size of the PyGame window
16
+ self.grid_info = grid_info
17
+ self.agent_pos = agent_pos
18
+ self.target_pos = target_pos
19
+
20
+ # Observations are dictionaries with the agent's and the target's location.
21
+ # Each location is encoded as an element of {0, ..., `size`}^2, i.e. MultiDiscrete([size, size]).
22
+ self.observation_space = spaces.Dict(
23
+ {
24
+ "agent": spaces.Box(0, size - 1, shape=(2,), dtype=int),
25
+ "target": spaces.Box(0, size - 1, shape=(2,), dtype=int),
26
+ }
27
+ )
28
+
29
+ # We have 5 actions, corresponding to "right", "up", "left", "down", "wait" and "grip".
30
+ self.action_space = spaces.Discrete(6)
31
+
32
+ """
33
+ The following dictionary maps abstract actions from `self.action_space` to
34
+ the direction we will walk in if that action is taken.
35
+ I.e. 0 corresponds to "right", 1 to "up" etc.
36
+ """
37
+ self._action_to_direction = {
38
+ 0: np.array([1, 0]), # Right
39
+ 1: np.array([0, 1]), # Down
40
+ 2: np.array([-1, 0]), # Left
41
+ 3: np.array([0, -1]), # Up
42
+ 4: np.array([0, 0]),
43
+ 5: np.array([1, 1])
44
+ }
45
+
46
+ assert render_mode is None or render_mode in self.metadata["render_modes"]
47
+ self.render_mode = render_mode
48
+
49
+ """
50
+ If human-rendering is used, `self.window` will be a reference
51
+ to the window that we draw to. `self.clock` will be a clock that is used
52
+ to ensure that the environment is rendered at the correct framerate in
53
+ human-mode. They will remain `None` until human-mode is used for the
54
+ first time.
55
+ """
56
+ self.window = None
57
+ self.clock = None
58
+
59
+ def _get_obs(self):
60
+ return {"agent": self._agent_location, "target": self._target_location}
61
+
62
+ def _get_info(self):
63
+ return {"distance": np.linalg.norm(self._agent_location - self._target_location, ord=1)}
64
+
65
+ def reset(self, seed=None, options=None):
66
+ # We need the following line to seed self.np_random
67
+ # super().reset(seed=seed)
68
+
69
+ self._agent_location = self.agent_pos
70
+ self._target_location = self.target_pos
71
+
72
+ observation = self._get_obs()
73
+ info = self._get_info()
74
+
75
+ if self.render_mode == "human":
76
+ self._render_frame()
77
+
78
+ return observation, info
79
+
80
+ def step(self, action):
81
+ # Map the action (element of {0,1,2,3}) to the direction we walk in
82
+ direction = self._action_to_direction[action]
83
+ # We use `np.clip` to make sure we don't leave the grid
84
+ self._agent_location = np.clip(
85
+ self._agent_location + direction, 0, self.size - 1
86
+ )
87
+ # An episode is done if the agent has reached the target
88
+ # terminated = np.array_equal(self._agent_location, self._target_location)
89
+ terminated = 0
90
+ for i in range(len(self.target_pos)):
91
+ if np.array_equal(self.agent_pos, self.target_pos[i]):
92
+ terminated = 1
93
+ reward = 1 if terminated else 0 # Binary sparse rewards
94
+ observation = self._get_obs()
95
+ info = self._get_info()
96
+
97
+ if self.render_mode == "human":
98
+ self._render_frame()
99
+
100
+ return observation, reward, terminated, info
101
+
102
+ def render(self):
103
+ if self.render_mode == "rgb_array":
104
+ return self._render_frame()
105
+
106
+ def _draw_rect(self, canvas, color, pos, pix_square_size):
107
+ # Ensure pos is a tuple of integers
108
+ if not isinstance(pos, (tuple, list)):
109
+ pos = list(pos)
110
+
111
+
112
+ pygame.draw.rect(
113
+ canvas,
114
+ color,
115
+ pygame.Rect(
116
+ pix_square_size * np.array(pos), # Ensure pos is multiplied correctly
117
+ (pix_square_size, pix_square_size),
118
+ ),
119
+ )
120
+
121
+ def _render_frame(self):
122
+ if self.window is None and self.render_mode == "human":
123
+ pygame.init()
124
+ pygame.display.init()
125
+ self.window = pygame.display.set_mode((self.window_size, self.window_size))
126
+ if self.clock is None and self.render_mode == "human":
127
+ self.clock = pygame.time.Clock()
128
+
129
+ canvas = pygame.Surface((self.window_size, self.window_size))
130
+ canvas.fill((255, 255, 255))
131
+ pix_square_size = int(
132
+ self.window_size / self.size
133
+ ) # The size of a single grid square in pixels
134
+
135
+ # Draw Pieces
136
+ for piece in self.grid_info:
137
+ for pos in piece["piece_grids"]:
138
+ self._draw_rect(canvas, piece["piece_colour"], pos, pix_square_size)
139
+
140
+ # Now we draw the agent
141
+ pygame.draw.circle(
142
+ canvas,
143
+ (0, 0, 0),
144
+ (self._agent_location + 0.5) * pix_square_size,
145
+ pix_square_size / 3,
146
+ )
147
+
148
+ if self.render_mode == "human":
149
+ # The following line copies our drawings from `canvas` to the visible window
150
+ self.window.blit(canvas, canvas.get_rect())
151
+ pygame.event.pump()
152
+ pygame.display.update()
153
+
154
+ # We need to ensure that human-rendering occurs at the predefined framerate.
155
+ # The following line will automatically add a delay to keep the framerate stable.
156
+ self.clock.tick(self.metadata["render_fps"])
157
+ else: # rgb_array
158
+ return np.transpose(
159
+ np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
160
+ )
161
+
162
+
163
+ def close(self):
164
+ if self.window is not None:
165
+ pygame.display.quit()
166
+ pygame.quit()
167
+
168
+ if __name__ == '__main__':
169
+ env = GridWorldEnv(size=20)
170
+ env.reset()
171
+ env.render()
172
+ env.close()
grip_env/layout.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ from grip_env.pieces import PentominoPiece, COLOURS, COLOUR_NAMES, PIECE_NAMES
4
+ from utils import layout_utils
5
+ import math
6
+
7
+ # Setting the layout fixed for now i.e start positions of each piece are at every 5x5 grid
8
+ class BoardLayout():
9
+ '''
10
+ This class is used to generate a random layout of pentomino pieces on a board.
11
+ Args:
12
+ board_size: The number of grids in one dimension ofthe Pentomino Board
13
+ num_pieces: The number of pieces to be placed on the board, including the target piece
14
+ shapes: A list of the pentomino shapes to be selected from
15
+ colours: A list of the pentomino colours to be selected from
16
+ seed: The seed for the random number generator
17
+ '''
18
+ def __init__(self, board_size: int, num_pieces: int, shapes: np.array, colours: np.array, seed: int):
19
+ self.board_size = board_size
20
+ self.num_pieces = num_pieces
21
+ self.shapes = shapes
22
+ self.colours = colours
23
+ self.mapped_regions = layout_utils.map_regions(self.board_size)
24
+ np.random.seed(seed)
25
+
26
+ def set_start_positions(self) -> np.array:
27
+ '''
28
+ Get start positions of everything on the board
29
+ Args:
30
+ regions: A list defining the regions where the piece will be spawned (['top', 'top left', 'right',...])
31
+ If None, then use all possible regions including the center grid
32
+ Returns:
33
+ all_start_positions - [[ax, ay], [p1x, p1y], [p2x, p2y], ....]
34
+ The starting positions of agents and all the pieces (top left corner of 5x5 grid)
35
+ '''
36
+
37
+ # # Set the starting position at the center of the grid where the gripper will be spawned
38
+ # center_sq = math.ceil((self.board_size)/2)
39
+ # agent_start_pos = np.array([center_sq, center_sq], dtype=np.int64) # Get start position of agent
40
+ # # Use this location to check for overlaps for new pieces generated
41
+ # all_start_positions = np.array([agent_start_pos]) # Initialize with agent start position, so atleast one step is taken
42
+
43
+
44
+
45
+ max_tries = 100 # Maximum number of tries
46
+ tries = 0 # Counter for tries
47
+ flag = True
48
+ while flag:
49
+ all_start_positions = [] # Initialize with empty list - Piece can be spawned on center gird as well, overlapping with agent
50
+
51
+ # Select a random start position for each piece
52
+ tries += 1 # Increment the try counter
53
+ if tries > max_tries: # Check if max tries exceeded
54
+ print("Max tries exceeded - Restart Board Layout - Try increasing the board size or reducing the number of piecess")
55
+ break # Exit the main while loop
56
+
57
+ spawn_choices = [[x, y] for x in range(self.board_size) for y in range(self.board_size)] # Get possible spawn locations across the board
58
+ for i in range(self.num_pieces):
59
+ random_choice = np.random.randint(0, len(spawn_choices)) # Select a random index
60
+ piece_start_pos = spawn_choices[random_choice] # Random grid mark in the specified region
61
+
62
+ # Draw randomly, until a valid value is found
63
+ # This ensures no overlaps between pieces and center grid (central 3x3 will always be empty)
64
+ while not layout_utils.valid(self.board_size, piece_start_pos, all_start_positions):
65
+ # Remove invalid starting position and select a start position again
66
+ spawn_choices.remove(piece_start_pos)
67
+ if not spawn_choices: # Check if all positions are exhausted
68
+ flag = False
69
+ break # Exit the inner while loop
70
+ random_choice = np.random.randint(0, len(spawn_choices))
71
+ piece_start_pos = spawn_choices[random_choice]
72
+
73
+ all_start_positions.append(piece_start_pos)
74
+ if not flag:
75
+ break
76
+
77
+
78
+ # The search space is not exhausted and all pieces have been spawned successfully
79
+ if flag:
80
+ break
81
+ # else try again
82
+
83
+ assert len(all_start_positions) == self.num_pieces, "Number of pieces spawned is not equal to the number of pieces specified"
84
+ return all_start_positions
85
+
86
+ def set_board_layout(self, target_shape=None, target_colour=None, level=None):
87
+ # Get all start positions for all pieces on the board
88
+ all_start_positions = self.set_start_positions()
89
+
90
+ # Set agent start position at the center of the board
91
+ center_sq = math.ceil((self.board_size)/2)
92
+ agent_start_pos = np.array([center_sq, center_sq], dtype=np.int64) # Get start position of agent
93
+
94
+ grid_info = []
95
+ available_shapes = list(self.shapes) # List of available shapes
96
+ available_colours = list(self.colours) # List of available colours
97
+
98
+ for i in range(len(all_start_positions)):
99
+ piece_position = all_start_positions[i]
100
+
101
+ # Select a random shape from the available shapes
102
+ piece_shape = np.random.choice(available_shapes)
103
+ # Select a random colour from the available colours
104
+ colour_name = np.random.choice(available_colours)
105
+
106
+ # Get target_pos
107
+ if i == 0:
108
+ target_pos = piece_position
109
+ if target_shape:
110
+ piece_shape = target_shape # Overwrite target shape if specified
111
+ if target_colour:
112
+ colour_name = target_colour # Overwrite target colour if specified
113
+
114
+ if level == "easy" or level == "sample":
115
+ available_shapes.remove(piece_shape) # Remove the selected shape from the available shapes
116
+ available_colours.remove(colour_name) # Remove the selected colour from the available colours
117
+ piece_rotation = 0 # No rotation
118
+ elif level == "medium":
119
+ # Introduce rotation for medium level
120
+ available_shapes.remove(piece_shape) # Remove the selected shape from the available shapes
121
+ available_colours.remove(colour_name) # Remove the selected colour from the available colours
122
+ piece_rotation = np.random.randint(0, 4) # Random rotation
123
+ else:
124
+ # Hard level, allow same shape or colour repitition, based on randomness
125
+ random_value = np.random.randint(0, 2)
126
+ if random_value:
127
+ available_colours.remove(colour_name) # Remove the selected colour from the available colours
128
+ else:
129
+ available_shapes.remove(piece_shape) # Remove the selected shape from the available shapes
130
+
131
+ piece_rotation = np.random.randint(0, 4) # Random rotation
132
+
133
+ piece = PentominoPiece(piece_shape, piece_rotation, piece_position)
134
+ piece_grids = piece.get_grid_locations()
135
+ piece_region = layout_utils.get_region(piece_position, self.mapped_regions)
136
+ piece_data = {
137
+ "piece_grids": piece_grids,
138
+ "piece_colour": colour_name,
139
+ "colour_value": COLOURS[colour_name],
140
+ "start_position": piece_position,
141
+ "piece_shape": piece_shape,
142
+ "piece_rotation": piece_rotation,
143
+ "piece_region": piece_region
144
+ }
145
+
146
+ grid_info.append(piece_data)
147
+
148
+ return agent_start_pos, target_pos, grid_info
149
+
150
+
151
+ if __name__ == '__main__':
152
+ board1 = BoardLayout(board_size=18, num_pieces=4, shapes=PIECE_NAMES, colours=COLOUR_NAMES, seed=640)
153
+ agent_start_pos, target_pos, info = board1.set_board_layout(
154
+ target_shape = 'P',
155
+ target_colour = 'red',
156
+ level = 'easy')
157
+
grip_env/pieces.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Define the pentomino pieces and handle their rotations
2
+ # Output should be an array/list of 1x2 blocks to be marked in the pentomino grid environment
3
+
4
+ import numpy as np
5
+
6
+ # Piece definitions for 0 rotation
7
+ # Pieces are P, T, U, W, X, Z
8
+
9
+ # Shift from 5x5 to 3x3 grid
10
+ P = [
11
+ [0, 1, 1],
12
+ [0, 1, 1],
13
+ [0, 1, 0]]
14
+
15
+ T = [
16
+ [1, 1, 1],
17
+ [0, 1, 0],
18
+ [0, 1, 0]]
19
+
20
+ U = [
21
+ [1, 0, 1],
22
+ [1, 1, 1],
23
+ [0, 0, 0]]
24
+
25
+ W = [
26
+ [1, 0, 0],
27
+ [1, 1, 0],
28
+ [0, 1, 1]]
29
+
30
+ X = [
31
+ [0, 1, 0],
32
+ [1, 1, 1],
33
+ [0, 1, 0]]
34
+
35
+ Z = [
36
+ [1, 1, 0],
37
+ [0, 1, 0],
38
+ [0, 1, 1]]
39
+
40
+ # Create a dictionary item for the pieces
41
+ pieces_dict = {
42
+ 'P': P,
43
+ 'T': T,
44
+ 'U': U,
45
+ 'W': W,
46
+ 'X': X,
47
+ 'Z': Z
48
+ }
49
+
50
+ # Define a list of piece names from above dict - to select a piece randomly
51
+ PIECE_NAMES = ['P', 'T', 'U', 'W', 'X', 'Z']
52
+
53
+ # Define colours
54
+ # Define a dict of colours in RGB format- str: tuple
55
+
56
+ COLOURS = {
57
+ 'red': (255, 0, 0),
58
+ 'blue': (0, 0, 255),
59
+ 'green': (0, 255, 0),
60
+ 'yellow': (255, 255, 0),
61
+ 'cyan': (0, 0, 255),
62
+ 'magenta': (255, 0, 255)
63
+ }
64
+
65
+ # Define a list of colour names from above dict - to select a colour randomly
66
+ COLOUR_NAMES = ['red', 'blue', 'green', 'yellow', 'cyan', 'magenta']
67
+
68
+ class PentominoPiece():
69
+ '''
70
+ Intialize a pentomino piece with a symbol, rotation and position
71
+ Args:
72
+ symbol: A single letter string correesponding to the piece shape
73
+ rotation: An integer r in {0, 1, 2, 3} to define the angle of rotation corresponding to r*pi/2
74
+ position: The block in the Pentomino Board Grid coressponding to the center of the Piece Grid
75
+ '''
76
+ def __init__(self, symbol: str, rotation: int, position: np.array):
77
+ self.symbol = symbol
78
+ self.rotation = rotation
79
+ self.position = [position[0]-1, position[1]-1] # Convert to 0-indexing - top-left corner of the piece
80
+
81
+ def get_grid_locations(self) -> np.array:
82
+ '''
83
+ Get the locations of blocks to mark as a piece in the Pentomino Board Grid
84
+ Returns:
85
+ grid_marks: An np.array containing multiple vectors of length 2 for a single piece
86
+ (co-ordinates to be marked in the Pentomino Board Grid)
87
+ '''
88
+
89
+ default_piece_grid = pieces_dict[self.symbol]
90
+ rotated_piece_grid = np.rot90(default_piece_grid, self.rotation)
91
+
92
+ # Get the grid marks for the piece accoding to the Gym coordinate system = Inverted Y - axis
93
+ grid_marks = []
94
+ for i in range(rotated_piece_grid.shape[0]):
95
+ for j in range(rotated_piece_grid.shape[1]):
96
+ if rotated_piece_grid[i][j] == 1:
97
+ grid_marks.append(self.position + np.array([j, i]))
98
+ grid_marks = np.array(grid_marks)
99
+
100
+ return grid_marks
101
+
102
+ if __name__ == '__main__':
103
+ piece1 = PentominoPiece('P', 0, np.array([1, 1]))
104
+ print(piece1.get_grid_locations())
grip_env/test_env.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from grip_env.environment import GridWorldEnv
2
+ from grip_env.layout import BoardLayout
3
+ from grip_env.pieces import PIECE_NAMES, COLOUR_NAMES
4
+
5
+ if __name__ == '__main__':
6
+ board1 = BoardLayout(board_size=18, num_pieces=4, shapes=PIECE_NAMES, colours=COLOUR_NAMES, seed=640)
7
+ agent_start_pos, target_pos, info = board1.set_board_layout(
8
+ target_shape = 'P',
9
+ target_colour = 'red',
10
+ level = 'easy')
11
+
12
+ env = GridWorldEnv(render_mode="human", size=18, grid_info=info, agent_pos=agent_start_pos, target_pos=target_pos)
13
+ env.reset()
14
+ env.render()
15
+ for i in range(1000):
16
+ # RIGHT, DOWN, LEFT, UP
17
+ env.step(0)
18
+ env.render()
19
+ env.step(0)
20
+ env.render()
21
+ env.step(0)
22
+ env.render()
23
+ env.step(1)
24
+ env.render()
25
+ env.step(1)
26
+ env.render()
27
+ env.step(2)
28
+ env.render()
29
+ env.step(2)
30
+ env.render()
31
+ env.step(3)
32
+ env.render()
33
+ env.step(3)
34
+ env.render()
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ lightning
3
+ sentence-transformers
4
+ tqdm
5
+ adapters
6
+ transformers
7
+ gym==0.26.2
8
+ rich==13.4.2
9
+ pygame==2.5.2
10
+ progressbar==2.5
11
+ protobuf==5.27.0
12
+ peft==0.11.1
13
+ accelerate==0.31.0
14
+ bitsandbytes
15
+ datasets==3.0.1
16
+ gradio==5.5.0
17
+ matplotlib==3.9.2
src/boards.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ from grip_env.environment import GridWorldEnv
3
+ import os
4
+ import json
5
+ import numpy as np
6
+
7
+
8
+ class GenerateBoard():
9
+
10
+ def __init__(self, level: str, board_size: int):
11
+ self.level = level
12
+ self.board_size = board_size
13
+
14
+ metadata_path = os.path.join('src', f'test_{level}.json')
15
+
16
+ with open(metadata_path, 'r') as f:
17
+ metadata = json.load(f)
18
+
19
+ num_boards = len(metadata)
20
+ random_board_num = np.random.randint(0, num_boards)
21
+ self.board_data = metadata[random_board_num]
22
+
23
+ def setup_initial_board(self):
24
+
25
+ metadata_obj = self.board_data[-1]
26
+ default_start_pos = np.array(metadata_obj['agent_start_pos'])
27
+ default_target_pos = np.array(metadata_obj['target_pos'])
28
+
29
+ info = metadata_obj['info']
30
+ target_options = []
31
+ for piece in info:
32
+ target = f"{piece['piece_colour']} {piece['piece_shape']} at {piece['piece_region']}"
33
+ target_options.append(target)
34
+
35
+
36
+
37
+ env = GridWorldEnv(render_mode="rgb_array", size=self.board_size, grid_info=info, agent_pos=default_start_pos, target_pos=default_target_pos)
38
+ env.reset()
39
+ image = env.render()
40
+ image = Image.fromarray(image)
41
+
42
+ return image, target_options, info
43
+
src/test_easy.json ADDED
The diff for this file is too large to render. See raw diff
 
src/utils.py ADDED
File without changes