Spaces:

Koshti10
/

Pento-LLaVA

Sleeping

App Files Files Community

Koshti10 commited on Nov 6, 2024

Commit

9aed787

verified ·

1 Parent(s): 53f05d8

Upload 11 files

Browse files

Files changed (11) hide show

LICENSE +21 -0
README.md +1 -13
app.py +26 -0
grip_env/environment.py +172 -0
grip_env/layout.py +157 -0
grip_env/pieces.py +104 -0
grip_env/test_env.py +34 -0
requirements.txt +17 -0
src/boards.py +43 -0
src/test_easy.json +0 -0
src/utils.py +0 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 kushal-10
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1 @@
----
-title: Pento LLaVA
-emoji: 🤖
-colorFrom: purple
-colorTo: gray
-sdk: gradio
-sdk_version: 5.5.0
-app_file: app.py
-pinned: false
-short_description: 'Demo Gameplay for Pento-LLaVA '
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ # pento-llava

app.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import gradio as gr
+import numpy as np
+import matplotlib.pyplot as plt
+from src.boards import GenerateBoard
+TITLE = """<h1 align="center" id="space-title"> Pento-LLaVA 🤖🎯🎮</h1>"""
+initial_board_image, target_positions, info = GenerateBoard('easy', 18).setup_initial_board()
+# Convert initial_board_image to a matplotlib figure
+fig, ax = plt.subplots()
+ax.imshow(initial_board_image)
+ax.axis('off')
+pento_llava_app = gr.Blocks()
+with pento_llava_app:
+    gr.HTML(TITLE)
+    gr.Plot(fig)
+    pento_llava_app.load()
+pento_llava_app.queue()
+pento_llava_app.launch()

grip_env/environment.py ADDED Viewed

	@@ -0,0 +1,172 @@

+## Working Grid environment
+import gym
+from gym import spaces
+import pygame
+import numpy as np
+class GridWorldEnv(gym.Env):
+    metadata = {"render_modes": ["human", "rgb_array"], "render_fps": 2}
+    def __init__(self, render_mode=None, size=5, grid_info=None, agent_pos=None, target_pos=None):
+        self.size = size  # The size of the square grid
+        self.window_size = 500  # The size of the PyGame window
+        self.grid_info = grid_info
+        self.agent_pos = agent_pos
+        self.target_pos = target_pos
+        # Observations are dictionaries with the agent's and the target's location.
+        # Each location is encoded as an element of {0, ..., `size`}^2, i.e. MultiDiscrete([size, size]).
+        self.observation_space = spaces.Dict(
+            {
+                "agent": spaces.Box(0, size - 1, shape=(2,), dtype=int),
+                "target": spaces.Box(0, size - 1, shape=(2,), dtype=int),
+            }
+        )
+        # We have 5 actions, corresponding to "right", "up", "left", "down", "wait" and "grip".
+        self.action_space = spaces.Discrete(6)
+        """
+        The following dictionary maps abstract actions from `self.action_space` to
+        the direction we will walk in if that action is taken.
+        I.e. 0 corresponds to "right", 1 to "up" etc.
+        """
+        self._action_to_direction = {
+            0: np.array([1, 0]), # Right
+            1: np.array([0, 1]), # Down
+            2: np.array([-1, 0]), # Left
+            3: np.array([0, -1]), # Up
+            4: np.array([0, 0]),
+            5: np.array([1, 1])
+        }
+        assert render_mode is None or render_mode in self.metadata["render_modes"]
+        self.render_mode = render_mode
+        """
+        If human-rendering is used, `self.window` will be a reference
+        to the window that we draw to. `self.clock` will be a clock that is used
+        to ensure that the environment is rendered at the correct framerate in
+        human-mode. They will remain `None` until human-mode is used for the
+        first time.
+        """
+        self.window = None
+        self.clock = None
+    def _get_obs(self):
+        return {"agent": self._agent_location, "target": self._target_location}
+    def _get_info(self):
+        return {"distance": np.linalg.norm(self._agent_location - self._target_location, ord=1)}
+    def reset(self, seed=None, options=None):
+        # We need the following line to seed self.np_random
+        # super().reset(seed=seed)
+        self._agent_location = self.agent_pos
+        self._target_location = self.target_pos
+        observation = self._get_obs()
+        info = self._get_info()
+        if self.render_mode == "human":
+            self._render_frame()
+        return observation, info
+    def step(self, action):
+        # Map the action (element of {0,1,2,3}) to the direction we walk in
+        direction = self._action_to_direction[action]
+        # We use `np.clip` to make sure we don't leave the grid
+        self._agent_location = np.clip(
+            self._agent_location + direction, 0, self.size - 1
+        )
+        # An episode is done if the agent has reached the target
+        # terminated = np.array_equal(self._agent_location, self._target_location)
+        terminated = 0
+        for i in range(len(self.target_pos)):
+            if np.array_equal(self.agent_pos, self.target_pos[i]):
+                terminated = 1
+        reward = 1 if terminated else 0  # Binary sparse rewards
+        observation = self._get_obs()
+        info = self._get_info()
+        if self.render_mode == "human":
+            self._render_frame()
+        return observation, reward, terminated, info
+    def render(self):
+        if self.render_mode == "rgb_array":
+            return self._render_frame()
+    def _draw_rect(self, canvas, color, pos, pix_square_size):
+        # Ensure pos is a tuple of integers
+        if not isinstance(pos, (tuple, list)):
+            pos = list(pos)
+        pygame.draw.rect(
+            canvas,
+            color,
+            pygame.Rect(
+                pix_square_size * np.array(pos),  # Ensure pos is multiplied correctly
+                (pix_square_size, pix_square_size),
+            ),
+        )
+    def _render_frame(self):
+        if self.window is None and self.render_mode == "human":
+            pygame.init()
+            pygame.display.init()
+            self.window = pygame.display.set_mode((self.window_size, self.window_size))
+        if self.clock is None and self.render_mode == "human":
+            self.clock = pygame.time.Clock()
+        canvas = pygame.Surface((self.window_size, self.window_size))
+        canvas.fill((255, 255, 255))
+        pix_square_size = int(
+                self.window_size / self.size
+        )  # The size of a single grid square in pixels
+        # Draw Pieces
+        for piece in self.grid_info:
+            for pos in piece["piece_grids"]:
+                self._draw_rect(canvas, piece["piece_colour"], pos, pix_square_size)
+        # Now we draw the agent
+        pygame.draw.circle(
+            canvas,
+            (0, 0, 0),
+            (self._agent_location + 0.5) * pix_square_size,
+            pix_square_size / 3,
+        )
+        if self.render_mode == "human":
+            # The following line copies our drawings from `canvas` to the visible window
+            self.window.blit(canvas, canvas.get_rect())
+            pygame.event.pump()
+            pygame.display.update()
+            # We need to ensure that human-rendering occurs at the predefined framerate.
+            # The following line will automatically add a delay to keep the framerate stable.
+            self.clock.tick(self.metadata["render_fps"])
+        else:  # rgb_array
+            return np.transpose(
+                np.array(pygame.surfarray.pixels3d(canvas)), axes=(1, 0, 2)
+            )
+    def close(self):
+        if self.window is not None:
+            pygame.display.quit()
+            pygame.quit()
+if __name__ == '__main__':
+    env = GridWorldEnv(size=20)
+    env.reset()
+    env.render()
+    env.close()

grip_env/layout.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import numpy as np
+from grip_env.pieces import PentominoPiece, COLOURS, COLOUR_NAMES, PIECE_NAMES
+from utils import layout_utils
+import math
+# Setting the layout fixed for now i.e start positions of each piece are at every 5x5 grid
+class BoardLayout():
+    '''
+    This class is used to generate a random layout of pentomino pieces on a board.
+    Args:
+        board_size: The number of grids in one dimension ofthe Pentomino Board
+        num_pieces: The number of pieces to be placed on the board, including the target piece
+        shapes: A list of the pentomino shapes to be selected from
+        colours: A list of the pentomino colours to be selected from
+        seed: The seed for the random number generator
+    '''
+    def __init__(self, board_size: int, num_pieces: int, shapes: np.array, colours: np.array, seed: int):
+        self.board_size = board_size
+        self.num_pieces = num_pieces
+        self.shapes = shapes
+        self.colours = colours
+        self.mapped_regions = layout_utils.map_regions(self.board_size)
+        np.random.seed(seed)
+    def set_start_positions(self) -> np.array:
+        '''
+        Get start positions of everything on the board
+        Args:
+            regions: A list defining the regions where the piece will be spawned (['top', 'top left', 'right',...])
+                    If None, then use all possible regions including the center grid
+        Returns:
+            all_start_positions - [[ax, ay], [p1x, p1y], [p2x, p2y], ....]
+            The starting positions of agents and all the pieces (top left corner of 5x5 grid)
+        '''
+        # # Set the starting position at the center of the grid where the gripper will be spawned
+        # center_sq = math.ceil((self.board_size)/2)
+        # agent_start_pos = np.array([center_sq, center_sq], dtype=np.int64) # Get start position of agent
+        # # Use this location to check for overlaps for new pieces generated
+        # all_start_positions = np.array([agent_start_pos]) # Initialize with agent start position, so atleast one step is taken
+        max_tries = 100  # Maximum number of tries
+        tries = 0  # Counter for tries
+        flag = True
+        while flag:
+            all_start_positions = [] # Initialize with empty list - Piece can be spawned on center gird as well, overlapping with agent
+            # Select a random start position for each piece
+            tries += 1  # Increment the try counter
+            if tries > max_tries:  # Check if max tries exceeded
+                print("Max tries exceeded - Restart Board Layout - Try increasing the board size or reducing the number of piecess")
+                break  # Exit the main while loop
+            spawn_choices = [[x, y] for x in range(self.board_size) for y in range(self.board_size)] # Get possible spawn locations across the board
+            for i in range(self.num_pieces):
+                random_choice = np.random.randint(0, len(spawn_choices)) # Select a random index
+                piece_start_pos = spawn_choices[random_choice] # Random grid mark in the specified region
+                # Draw randomly, until a valid value is found
+                # This ensures no overlaps between pieces and center grid (central 3x3 will always be empty)
+                while not layout_utils.valid(self.board_size, piece_start_pos, all_start_positions):
+                    # Remove invalid starting position and select a start position again
+                    spawn_choices.remove(piece_start_pos)
+                    if not spawn_choices:  # Check if all positions are exhausted
+                        flag = False
+                        break  # Exit the inner while loop
+                    random_choice = np.random.randint(0, len(spawn_choices))
+                    piece_start_pos = spawn_choices[random_choice]
+                all_start_positions.append(piece_start_pos)
+                if not flag:
+                    break
+            # The search space is not exhausted and all pieces have been spawned successfully
+            if flag:
+                break
+            # else try again
+        assert len(all_start_positions) == self.num_pieces, "Number of pieces spawned is not equal to the number of pieces specified"
+        return all_start_positions
+    def set_board_layout(self, target_shape=None, target_colour=None, level=None):
+        # Get all start positions for all pieces on the board
+        all_start_positions = self.set_start_positions()
+        # Set agent start position at the center of the board
+        center_sq = math.ceil((self.board_size)/2)
+        agent_start_pos = np.array([center_sq, center_sq], dtype=np.int64) # Get start position of agent
+        grid_info = []
+        available_shapes = list(self.shapes)  # List of available shapes
+        available_colours = list(self.colours)  # List of available colours
+        for i in range(len(all_start_positions)):
+            piece_position = all_start_positions[i]
+            # Select a random shape from the available shapes
+            piece_shape = np.random.choice(available_shapes)
+            # Select a random colour from the available colours
+            colour_name = np.random.choice(available_colours)
+            # Get target_pos
+            if i == 0:
+                target_pos = piece_position
+                if target_shape:
+                    piece_shape = target_shape  # Overwrite target shape if specified
+                if target_colour:
+                    colour_name = target_colour  # Overwrite target colour if specified
+            if level == "easy" or level == "sample":
+                available_shapes.remove(piece_shape)  # Remove the selected shape from the available shapes
+                available_colours.remove(colour_name)  # Remove the selected colour from the available colours
+                piece_rotation = 0  # No rotation
+            elif level == "medium":
+                # Introduce rotation for medium level
+                available_shapes.remove(piece_shape)  # Remove the selected shape from the available shapes
+                available_colours.remove(colour_name)  # Remove the selected colour from the available colours
+                piece_rotation = np.random.randint(0, 4) # Random rotation
+            else:
+                # Hard level, allow same shape or colour repitition, based on randomness
+                random_value = np.random.randint(0, 2)
+                if random_value:
+                    available_colours.remove(colour_name)  # Remove the selected colour from the available colours
+                else:
+                    available_shapes.remove(piece_shape)  # Remove the selected shape from the available shapes
+                piece_rotation = np.random.randint(0, 4) # Random rotation
+            piece = PentominoPiece(piece_shape, piece_rotation, piece_position)
+            piece_grids = piece.get_grid_locations()
+            piece_region = layout_utils.get_region(piece_position, self.mapped_regions)
+            piece_data = {
+                "piece_grids": piece_grids,
+                "piece_colour": colour_name,
+                "colour_value": COLOURS[colour_name],
+                "start_position": piece_position,
+                "piece_shape": piece_shape,
+                "piece_rotation": piece_rotation,
+                "piece_region": piece_region
+            }
+            grid_info.append(piece_data)
+        return agent_start_pos, target_pos, grid_info
+if __name__ == '__main__':
+    board1 = BoardLayout(board_size=18, num_pieces=4, shapes=PIECE_NAMES, colours=COLOUR_NAMES, seed=640)
+    agent_start_pos, target_pos, info = board1.set_board_layout(
+        target_shape = 'P',
+        target_colour = 'red',
+        level = 'easy')

grip_env/pieces.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# Define the pentomino pieces and handle their rotations
+# Output should be an array/list of 1x2 blocks to be marked in the pentomino grid environment
+import numpy as np
+# Piece definitions for 0 rotation
+# Pieces are P, T, U, W, X, Z
+# Shift from 5x5 to 3x3 grid
+P = [
+    [0, 1, 1],
+    [0, 1, 1],
+    [0, 1, 0]]
+T = [
+    [1, 1, 1],
+    [0, 1, 0],
+    [0, 1, 0]]
+U = [
+    [1, 0, 1],
+    [1, 1, 1],
+    [0, 0, 0]]
+W = [
+    [1, 0, 0],
+    [1, 1, 0],
+    [0, 1, 1]]
+X = [
+    [0, 1, 0],
+    [1, 1, 1],
+    [0, 1, 0]]
+Z = [
+    [1, 1, 0],
+    [0, 1, 0],
+    [0, 1, 1]]
+# Create a dictionary item for the pieces
+pieces_dict = {
+    'P': P,
+    'T': T,
+    'U': U,
+    'W': W,
+    'X': X,
+    'Z': Z
+}
+# Define a list of piece names from above dict - to select a piece randomly
+PIECE_NAMES = ['P', 'T', 'U', 'W', 'X', 'Z']
+# Define colours
+# Define a dict of colours in RGB format- str: tuple
+COLOURS = {
+    'red': (255, 0, 0),
+    'blue': (0, 0, 255),
+    'green': (0, 255, 0),
+    'yellow': (255, 255, 0),
+    'cyan': (0, 0, 255),
+    'magenta': (255, 0, 255)
+}
+# Define a list of colour names from above dict - to select a colour randomly
+COLOUR_NAMES = ['red', 'blue', 'green', 'yellow', 'cyan', 'magenta']
+class PentominoPiece():
+    '''
+    Intialize a pentomino piece with a symbol, rotation and position
+    Args:
+        symbol: A single letter string correesponding to the piece shape
+        rotation: An integer r in {0, 1, 2, 3} to define the angle of rotation corresponding to  r*pi/2
+        position: The block in the Pentomino Board Grid coressponding to the center of the Piece Grid
+    '''
+    def __init__(self, symbol: str, rotation: int, position: np.array):
+        self.symbol = symbol
+        self.rotation = rotation
+        self.position = [position[0]-1, position[1]-1] # Convert to 0-indexing - top-left corner of the piece
+    def get_grid_locations(self) -> np.array:
+        '''
+        Get the locations of blocks to mark as a piece in the Pentomino Board Grid
+        Returns:
+            grid_marks: An np.array containing multiple vectors of length 2 for a single piece
+                        (co-ordinates to be marked in the Pentomino Board Grid)
+        '''
+        default_piece_grid = pieces_dict[self.symbol]
+        rotated_piece_grid = np.rot90(default_piece_grid, self.rotation)
+        # Get the grid marks for the piece accoding to the Gym coordinate system = Inverted Y - axis
+        grid_marks = []
+        for i in range(rotated_piece_grid.shape[0]):
+            for j in range(rotated_piece_grid.shape[1]):
+                if rotated_piece_grid[i][j] == 1:
+                    grid_marks.append(self.position + np.array([j, i]))
+        grid_marks = np.array(grid_marks)
+        return grid_marks
+if __name__ == '__main__':
+    piece1 = PentominoPiece('P', 0, np.array([1, 1]))
+    print(piece1.get_grid_locations())

grip_env/test_env.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from grip_env.environment import GridWorldEnv
+from grip_env.layout import BoardLayout
+from grip_env.pieces import PIECE_NAMES, COLOUR_NAMES
+if __name__ == '__main__':
+    board1 = BoardLayout(board_size=18, num_pieces=4, shapes=PIECE_NAMES, colours=COLOUR_NAMES, seed=640)
+    agent_start_pos, target_pos, info = board1.set_board_layout(
+        target_shape = 'P',
+        target_colour = 'red',
+        level = 'easy')
+    env = GridWorldEnv(render_mode="human", size=18, grid_info=info, agent_pos=agent_start_pos, target_pos=target_pos)
+    env.reset()
+    env.render()
+    for i in range(1000):
+        # RIGHT, DOWN, LEFT, UP
+        env.step(0)
+        env.render()
+        env.step(0)
+        env.render()
+        env.step(0)
+        env.render()
+        env.step(1)
+        env.render()
+        env.step(1)
+        env.render()
+        env.step(2)
+        env.render()
+        env.step(2)
+        env.render()
+        env.step(3)
+        env.render()
+        env.step(3)
+        env.render()

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+torch
+lightning
+sentence-transformers
+tqdm
+adapters
+transformers
+gym==0.26.2
+rich==13.4.2
+pygame==2.5.2
+progressbar==2.5
+protobuf==5.27.0
+peft==0.11.1
+accelerate==0.31.0
+bitsandbytes
+datasets==3.0.1
+gradio==5.5.0
+matplotlib==3.9.2

src/boards.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from PIL import Image
+from grip_env.environment import GridWorldEnv
+import os
+import json
+import numpy as np
+class GenerateBoard():
+    def __init__(self, level: str, board_size: int):
+        self.level = level
+        self.board_size = board_size
+        metadata_path = os.path.join('src', f'test_{level}.json')
+        with open(metadata_path, 'r') as f:
+            metadata = json.load(f)
+        num_boards = len(metadata)
+        random_board_num = np.random.randint(0, num_boards)
+        self.board_data = metadata[random_board_num]
+    def setup_initial_board(self):
+        metadata_obj = self.board_data[-1]
+        default_start_pos = np.array(metadata_obj['agent_start_pos'])
+        default_target_pos = np.array(metadata_obj['target_pos'])
+        info = metadata_obj['info']
+        target_options = []
+        for piece in info:
+            target = f"{piece['piece_colour']} {piece['piece_shape']} at {piece['piece_region']}"
+            target_options.append(target)
+        env = GridWorldEnv(render_mode="rgb_array", size=self.board_size, grid_info=info, agent_pos=default_start_pos, target_pos=default_target_pos)
+        env.reset()
+        image = env.render()
+        image = Image.fromarray(image)
+        return image, target_options, info

src/test_easy.json ADDED Viewed

The diff for this file is too large to render. See raw diff

src/utils.py ADDED Viewed

File without changes