{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "385eab34-79dd-41a6-821f-f84fdd558cfb", "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting ultralytics\n", " Obtaining dependency information for ultralytics from https://files.pythonhosted.org/packages/75/ec/9f9f64358fe1a93bf4bb64903f379f498fdc145d5a7e875775cd26b8ed89/ultralytics-8.0.142-py3-none-any.whl.metadata\n", " Downloading ultralytics-8.0.142-py3-none-any.whl.metadata (28 kB)\n", "Requirement already satisfied: matplotlib>=3.2.2 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (3.7.2)\n", "Collecting opencv-python>=4.6.0 (from ultralytics)\n", " Obtaining dependency information for opencv-python>=4.6.0 from https://files.pythonhosted.org/packages/34/7c/8a5043f362b0a55f07812a0db3f86092cdbd0fe41b933d7bc6fce3ab6c15/opencv_python-4.8.0.74-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata\n", " Downloading opencv_python-4.8.0.74-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)\n", "Requirement already satisfied: pillow>=7.1.2 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (10.0.0)\n", "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (6.0)\n", "Requirement already satisfied: requests>=2.23.0 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (2.28.1)\n", "Requirement already satisfied: scipy>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (1.11.1)\n", "Requirement already satisfied: torch>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (2.0.1+cu117)\n", "Requirement already satisfied: torchvision>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (0.15.2+cu117)\n", "Requirement already satisfied: tqdm>=4.64.0 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (4.65.0)\n", "Requirement already satisfied: pandas>=1.1.4 in /usr/local/lib/python3.10/dist-packages (from ultralytics) (2.0.3)\n", "Collecting seaborn>=0.11.0 (from ultralytics)\n", " Downloading seaborn-0.12.2-py3-none-any.whl (293 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m293.3/293.3 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from ultralytics) (5.9.5)\n", "Requirement already satisfied: py-cpuinfo in /usr/local/lib/python3.10/dist-packages (from ultralytics) (9.0.0)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.2.2->ultralytics) (1.1.0)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.2.2->ultralytics) (0.11.0)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.2.2->ultralytics) (4.41.1)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.2.2->ultralytics) (1.4.4)\n", "Requirement already satisfied: numpy>=1.20 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.2.2->ultralytics) (1.24.1)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.2.2->ultralytics) (23.1)\n", "Requirement already satisfied: pyparsing<3.1,>=2.3.1 in /usr/lib/python3/dist-packages (from matplotlib>=3.2.2->ultralytics) (2.4.7)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.2.2->ultralytics) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.1.4->ultralytics) (2023.3)\n", "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=1.1.4->ultralytics) (2023.3)\n", "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.23.0->ultralytics) (2.1.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.23.0->ultralytics) (3.4)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.23.0->ultralytics) (1.26.13)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.23.0->ultralytics) (2022.12.7)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.7.0->ultralytics) (3.9.0)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.7.0->ultralytics) (4.7.1)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.7.0->ultralytics) (1.11.1)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.7.0->ultralytics) (3.0)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.7.0->ultralytics) (3.1.2)\n", "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.7.0->ultralytics) (2.0.0)\n", "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.7.0->ultralytics) (3.25.0)\n", "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.7.0->ultralytics) (15.0.7)\n", "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.7->matplotlib>=3.2.2->ultralytics) (1.16.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.7.0->ultralytics) (2.1.2)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.7.0->ultralytics) (1.2.1)\n", "Downloading ultralytics-8.0.142-py3-none-any.whl (607 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m608.0/608.0 kB\u001b[0m \u001b[31m24.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading opencv_python-4.8.0.74-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (61.7 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.7/61.7 MB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m:00:01\u001b[0m\n", "\u001b[?25hInstalling collected packages: opencv-python, seaborn, ultralytics\n", "Successfully installed opencv-python-4.8.0.74 seaborn-0.12.2 ultralytics-8.0.142\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m" ] } ], "source": [ "#!pip install ultralytics\n", "#!git clone LearnItAnyway/YOLO_LLaMaVisNav" ] }, { "cell_type": "code", "execution_count": 5, "id": "1fda72f5-de91-4b55-b2aa-7296b30d81f1", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "You are using a model of type llava to instantiate a model of type llama. This is not supported for all configurations of models and can yield errors.\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a3d7a03e147f447480c54fcdc47f1f01", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at ./YOLO_LLaMa_VisNav were not used when initializing LlamaForCausalLM: ['model.mm_projector.weight', 'model.mm_projector.bias']\n", "- This IS expected if you are initializing LlamaForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing LlamaForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] } ], "source": [ "import torch\n", "\n", "import io\n", "from PIL import Image\n", "import json\n", "from llava import LlavaLlamaForCausalLM\n", "from transformers import LlamaForCausalLM, AutoTokenizer\n", "from transformers import CLIPImageProcessor\n", "from ultralytics import YOLO\n", "import pandas as pd\n", "\n", "class_name_to_id_mapping = {'kiosk': 0, 'person': 1, 'motorcycle': 2,\n", " 'potted_plant': 3, 'movable_signage': 4, 'truck': 5, 'stop': 6,\n", " 'cat': 7, 'bollard': 8, 'traffic_sign': 9, 'tree_trunk': 10, 'car': 11,\n", " 'wheelchair': 12, 'fire_hydrant': 13, 'bicycle': 14, 'chair': 15,\n", " 'traffic_light': 16, 'pole': 17, 'parking_meter': 18, 'dog': 19,\n", " 'table': 20, 'scooter': 21, 'bench': 22, 'barricade': 23,\n", " 'stroller': 24, 'bus': 25, 'carrier': 26}\n", "class_ids = list(class_name_to_id_mapping.keys())\n", "\n", "## Load Model\n", "llm = './YOLO_LLaMa_VisNav'\n", "device='cuda'\n", "yolo_model = YOLO('YOLO_LLaMa_VisNav/yolo.pt')\n", "\n", "def image_to_table(imgs):\n", " output = yolo_model.predict(imgs)[0]\n", " xyxy = output.boxes.xyxy.cpu().numpy()\n", " y_, x_ = output.orig_shape\n", "\n", " xyxy[:, 0] = xyxy[:, 0]/x_\n", " xyxy[:, 1] = xyxy[:, 1]/y_\n", " xyxy[:, 2] = xyxy[:, 2]/x_\n", " xyxy[:, 3] = xyxy[:, 3]/y_\n", "\n", " xyxy = xyxy.round(3)\n", "\n", " cc = pd.DataFrame.from_dict({'class':[class_ids[a] for a in output.boxes.cls.int().cpu().numpy()],\n", " 'xmin': xyxy[:, 0], 'ymin': xyxy[:, 1],\n", " 'xmax': xyxy[:, 2], 'ymax': xyxy[:, 3],\n", " 'size': (((xyxy[:, 2]-xyxy[:, 0])*(xyxy[:, 3]-xyxy[:, 1]))**0.5).round(2)})\n", " dd = cc.iloc[cc['size'].argsort().values][['class', 'xmin', 'ymin', 'xmax', 'ymax', 'size']]\n", " str_ = '