Upload 10 files
Browse files- Dockerfile +30 -0
- app.py +87 -0
- classes.py +15 -0
- coco.py +519 -0
- config.py +172 -0
- model.py +0 -0
- parallel_model.py +173 -0
- requirements.txt +20 -0
- shapes.py +184 -0
- utils.py +736 -0
Dockerfile
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#Use the official Python 3.9 image
|
2 |
+
FROM python:3.7
|
3 |
+
|
4 |
+
# Set the working directory to /code
|
5 |
+
WORKDIR /code
|
6 |
+
|
7 |
+
# Copy the current directory contents into the container at /code
|
8 |
+
COPY ./requirements.txt /code/requirements.txt
|
9 |
+
|
10 |
+
# Install requirements.txt
|
11 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
12 |
+
|
13 |
+
# Set up a new user named "user" with user ID 1000
|
14 |
+
RUN useradd -m -u 1000 user
|
15 |
+
# Switch to the "user" user
|
16 |
+
USER user
|
17 |
+
# Set home to the user's home directory
|
18 |
+
ENV HOME=/home/user \
|
19 |
+
PATH=/home/user/.local/bin:$PATH
|
20 |
+
|
21 |
+
# Set the working directory to the user's home directory
|
22 |
+
WORKDIR $HOME/app
|
23 |
+
|
24 |
+
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
25 |
+
COPY --chown=user . $HOME/app
|
26 |
+
|
27 |
+
# Start the FastAPI app on port 7860, the default port expected by Spaces
|
28 |
+
# CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
29 |
+
|
30 |
+
CMD gunicorn -k uvicorn.workers.UvicornWorker --workers 2 --threads=2 --max-requests 512 --bind 0.0.0.0:7860 app:app
|
app.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import warnings
|
2 |
+
warnings.filterwarnings("ignore")
|
3 |
+
## import necessary packages
|
4 |
+
|
5 |
+
import os
|
6 |
+
import io
|
7 |
+
import sys
|
8 |
+
import base64
|
9 |
+
import random
|
10 |
+
import argparse
|
11 |
+
import math
|
12 |
+
import numpy as np
|
13 |
+
|
14 |
+
from typing import Any, Union,Dict, List
|
15 |
+
import numpy as np
|
16 |
+
import requests
|
17 |
+
from PIL import Image
|
18 |
+
from imageio import imread
|
19 |
+
from keras import backend as K
|
20 |
+
|
21 |
+
import coco
|
22 |
+
import utils
|
23 |
+
import model as modellib
|
24 |
+
import visualize
|
25 |
+
from classes import class_names
|
26 |
+
from fastapi import FastAPI
|
27 |
+
|
28 |
+
# Create a new FastAPI app instance
|
29 |
+
app = FastAPI()
|
30 |
+
|
31 |
+
# Root directory of the project
|
32 |
+
ROOT_DIR = os.getcwd()
|
33 |
+
|
34 |
+
# Directory to save logs and trained model
|
35 |
+
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
|
36 |
+
|
37 |
+
# Local path to trained weights file
|
38 |
+
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
|
39 |
+
os.system("pip install pycocotools==2.0.0")
|
40 |
+
K.clear_session()
|
41 |
+
|
42 |
+
if not os.path.exists(COCO_MODEL_PATH):
|
43 |
+
utils.download_trained_weights(COCO_MODEL_PATH)
|
44 |
+
|
45 |
+
class InferenceConfig(coco.CocoConfig):
|
46 |
+
GPU_COUNT = 1
|
47 |
+
IMAGES_PER_GPU = 1
|
48 |
+
config = InferenceConfig()
|
49 |
+
|
50 |
+
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)
|
51 |
+
model.load_weights(COCO_MODEL_PATH, by_name=True)
|
52 |
+
|
53 |
+
|
54 |
+
# Define a function to handle the GET request at `/generate`
|
55 |
+
# The generate() function is defined as a FastAPI route that takes a
|
56 |
+
# string parameter called text. The function generates text based on the # input using the pipeline() object, and returns a JSON response
|
57 |
+
# containing the generated text under the key "output"
|
58 |
+
@app.get("/generate")
|
59 |
+
def generate(path: str):
|
60 |
+
"""
|
61 |
+
Using the text summarization pipeline from `transformers`, summerize text
|
62 |
+
from the given input text. The model used is `philschmid/bart-large-cnn-samsum`, which
|
63 |
+
can be found [here](<https://huggingface.co/philschmid/bart-large-cnn-samsum>).
|
64 |
+
"""
|
65 |
+
# Use the pipeline to generate text from the given input text
|
66 |
+
|
67 |
+
r = requests.get(path, stream=True)
|
68 |
+
img = Image.open(io.BytesIO(r.content)).convert('RGB')
|
69 |
+
open_cv_image = np.array(img)
|
70 |
+
image = open_cv_image
|
71 |
+
|
72 |
+
results = model.detect([image], verbose=1)
|
73 |
+
|
74 |
+
# Get results and save them
|
75 |
+
r = results[0]
|
76 |
+
output_image = visualize.display_instances_and_save(image,
|
77 |
+
r['rois'], r['masks'], r['class_ids'], class_names, r['scores'])
|
78 |
+
|
79 |
+
|
80 |
+
image = Image.fromarray(output_image)
|
81 |
+
im_file = io.BytesIO()
|
82 |
+
image.save(im_file, format="JPEG")
|
83 |
+
im_bytes = im_file.getvalue() # im_bytes: image in binary for
|
84 |
+
# Return the generated text in a JSON response
|
85 |
+
return {"output": im_bytes}
|
86 |
+
|
87 |
+
|
classes.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
|
2 |
+
'bus', 'train', 'truck', 'boat', 'traffic light',
|
3 |
+
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
|
4 |
+
'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
|
5 |
+
'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
|
6 |
+
'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
|
7 |
+
'kite', 'baseball bat', 'baseball glove', 'skateboard',
|
8 |
+
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
|
9 |
+
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
|
10 |
+
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
|
11 |
+
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
|
12 |
+
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
|
13 |
+
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
|
14 |
+
'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
|
15 |
+
'teddy bear', 'hair drier', 'toothbrush']
|
coco.py
ADDED
@@ -0,0 +1,519 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Mask R-CNN
|
3 |
+
Configurations and data loading code for MS COCO.
|
4 |
+
|
5 |
+
Copyright (c) 2017 Matterport, Inc.
|
6 |
+
Licensed under the MIT License (see LICENSE for details)
|
7 |
+
Written by Waleed Abdulla
|
8 |
+
|
9 |
+
------------------------------------------------------------
|
10 |
+
|
11 |
+
Usage: import the module (see Jupyter notebooks for examples), or run from
|
12 |
+
the command line as such:
|
13 |
+
|
14 |
+
# Train a new model starting from pre-trained COCO weights
|
15 |
+
python3 coco.py train --dataset=/path/to/coco/ --model=coco
|
16 |
+
|
17 |
+
# Train a new model starting from ImageNet weights
|
18 |
+
python3 coco.py train --dataset=/path/to/coco/ --model=imagenet
|
19 |
+
|
20 |
+
# Continue training a model that you had trained earlier
|
21 |
+
python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
|
22 |
+
|
23 |
+
# Continue training the last model you trained
|
24 |
+
python3 coco.py train --dataset=/path/to/coco/ --model=last
|
25 |
+
|
26 |
+
# Run COCO evaluatoin on the last model you trained
|
27 |
+
python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
|
28 |
+
"""
|
29 |
+
|
30 |
+
import os
|
31 |
+
import time
|
32 |
+
import numpy as np
|
33 |
+
|
34 |
+
# Download and install the Python COCO tools from https://github.com/waleedka/coco
|
35 |
+
# That's a fork from the original https://github.com/pdollar/coco with a bug
|
36 |
+
# fix for Python 3.
|
37 |
+
# I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
|
38 |
+
# If the PR is merged then use the original repo.
|
39 |
+
# Note: Edit PythonAPI/Makefile and replace "python" with "python3".
|
40 |
+
from pycocotools.coco import COCO
|
41 |
+
from pycocotools.cocoeval import COCOeval
|
42 |
+
from pycocotools import mask as maskUtils
|
43 |
+
|
44 |
+
import zipfile
|
45 |
+
import urllib.request
|
46 |
+
import shutil
|
47 |
+
|
48 |
+
from config import Config
|
49 |
+
import utils
|
50 |
+
import model as modellib
|
51 |
+
|
52 |
+
# Root directory of the project
|
53 |
+
ROOT_DIR = os.getcwd()
|
54 |
+
|
55 |
+
# Path to trained weights file
|
56 |
+
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
|
57 |
+
|
58 |
+
# Directory to save logs and model checkpoints, if not provided
|
59 |
+
# through the command line argument --logs
|
60 |
+
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
|
61 |
+
DEFAULT_DATASET_YEAR = "2014"
|
62 |
+
|
63 |
+
############################################################
|
64 |
+
# Configurations
|
65 |
+
############################################################
|
66 |
+
|
67 |
+
|
68 |
+
class CocoConfig(Config):
|
69 |
+
"""Configuration for training on MS COCO.
|
70 |
+
Derives from the base Config class and overrides values specific
|
71 |
+
to the COCO dataset.
|
72 |
+
"""
|
73 |
+
# Give the configuration a recognizable name
|
74 |
+
NAME = "coco"
|
75 |
+
|
76 |
+
# We use a GPU with 12GB memory, which can fit two images.
|
77 |
+
# Adjust down if you use a smaller GPU.
|
78 |
+
IMAGES_PER_GPU = 2
|
79 |
+
|
80 |
+
# Uncomment to train on 8 GPUs (default is 1)
|
81 |
+
# GPU_COUNT = 8
|
82 |
+
|
83 |
+
# Number of classes (including background)
|
84 |
+
NUM_CLASSES = 1 + 80 # COCO has 80 classes
|
85 |
+
|
86 |
+
|
87 |
+
############################################################
|
88 |
+
# Dataset
|
89 |
+
############################################################
|
90 |
+
|
91 |
+
class CocoDataset(utils.Dataset):
|
92 |
+
def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
|
93 |
+
class_map=None, return_coco=False, auto_download=False):
|
94 |
+
"""Load a subset of the COCO dataset.
|
95 |
+
dataset_dir: The root directory of the COCO dataset.
|
96 |
+
subset: What to load (train, val, minival, valminusminival)
|
97 |
+
year: What dataset year to load (2014, 2017) as a string, not an integer
|
98 |
+
class_ids: If provided, only loads images that have the given classes.
|
99 |
+
class_map: TODO: Not implemented yet. Supports maping classes from
|
100 |
+
different datasets to the same class ID.
|
101 |
+
return_coco: If True, returns the COCO object.
|
102 |
+
auto_download: Automatically download and unzip MS-COCO images and annotations
|
103 |
+
"""
|
104 |
+
|
105 |
+
if auto_download is True:
|
106 |
+
self.auto_download(dataset_dir, subset, year)
|
107 |
+
|
108 |
+
coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
|
109 |
+
if subset == "minival" or subset == "valminusminival":
|
110 |
+
subset = "val"
|
111 |
+
image_dir = "{}/{}{}".format(dataset_dir, subset, year)
|
112 |
+
|
113 |
+
# Load all classes or a subset?
|
114 |
+
if not class_ids:
|
115 |
+
# All classes
|
116 |
+
class_ids = sorted(coco.getCatIds())
|
117 |
+
|
118 |
+
# All images or a subset?
|
119 |
+
if class_ids:
|
120 |
+
image_ids = []
|
121 |
+
for id in class_ids:
|
122 |
+
image_ids.extend(list(coco.getImgIds(catIds=[id])))
|
123 |
+
# Remove duplicates
|
124 |
+
image_ids = list(set(image_ids))
|
125 |
+
else:
|
126 |
+
# All images
|
127 |
+
image_ids = list(coco.imgs.keys())
|
128 |
+
|
129 |
+
# Add classes
|
130 |
+
for i in class_ids:
|
131 |
+
self.add_class("coco", i, coco.loadCats(i)[0]["name"])
|
132 |
+
|
133 |
+
# Add images
|
134 |
+
for i in image_ids:
|
135 |
+
self.add_image(
|
136 |
+
"coco", image_id=i,
|
137 |
+
path=os.path.join(image_dir, coco.imgs[i]['file_name']),
|
138 |
+
width=coco.imgs[i]["width"],
|
139 |
+
height=coco.imgs[i]["height"],
|
140 |
+
annotations=coco.loadAnns(coco.getAnnIds(
|
141 |
+
imgIds=[i], catIds=class_ids, iscrowd=None)))
|
142 |
+
if return_coco:
|
143 |
+
return coco
|
144 |
+
|
145 |
+
def auto_download(self, dataDir, dataType, dataYear):
|
146 |
+
"""Download the COCO dataset/annotations if requested.
|
147 |
+
dataDir: The root directory of the COCO dataset.
|
148 |
+
dataType: What to load (train, val, minival, valminusminival)
|
149 |
+
dataYear: What dataset year to load (2014, 2017) as a string, not an integer
|
150 |
+
Note:
|
151 |
+
For 2014, use "train", "val", "minival", or "valminusminival"
|
152 |
+
For 2017, only "train" and "val" annotations are available
|
153 |
+
"""
|
154 |
+
|
155 |
+
# Setup paths and file names
|
156 |
+
if dataType == "minival" or dataType == "valminusminival":
|
157 |
+
imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
|
158 |
+
imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
|
159 |
+
imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
|
160 |
+
else:
|
161 |
+
imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
|
162 |
+
imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
|
163 |
+
imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
|
164 |
+
# print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
|
165 |
+
|
166 |
+
# Create main folder if it doesn't exist yet
|
167 |
+
if not os.path.exists(dataDir):
|
168 |
+
os.makedirs(dataDir)
|
169 |
+
|
170 |
+
# Download images if not available locally
|
171 |
+
if not os.path.exists(imgDir):
|
172 |
+
os.makedirs(imgDir)
|
173 |
+
print("Downloading images to " + imgZipFile + " ...")
|
174 |
+
with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
|
175 |
+
shutil.copyfileobj(resp, out)
|
176 |
+
print("... done downloading.")
|
177 |
+
print("Unzipping " + imgZipFile)
|
178 |
+
with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
|
179 |
+
zip_ref.extractall(dataDir)
|
180 |
+
print("... done unzipping")
|
181 |
+
print("Will use images in " + imgDir)
|
182 |
+
|
183 |
+
# Setup annotations data paths
|
184 |
+
annDir = "{}/annotations".format(dataDir)
|
185 |
+
if dataType == "minival":
|
186 |
+
annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
|
187 |
+
annFile = "{}/instances_minival2014.json".format(annDir)
|
188 |
+
annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
|
189 |
+
unZipDir = annDir
|
190 |
+
elif dataType == "valminusminival":
|
191 |
+
annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
|
192 |
+
annFile = "{}/instances_valminusminival2014.json".format(annDir)
|
193 |
+
annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
|
194 |
+
unZipDir = annDir
|
195 |
+
else:
|
196 |
+
annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
|
197 |
+
annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
|
198 |
+
annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
|
199 |
+
unZipDir = dataDir
|
200 |
+
# print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
|
201 |
+
|
202 |
+
# Download annotations if not available locally
|
203 |
+
if not os.path.exists(annDir):
|
204 |
+
os.makedirs(annDir)
|
205 |
+
if not os.path.exists(annFile):
|
206 |
+
if not os.path.exists(annZipFile):
|
207 |
+
print("Downloading zipped annotations to " + annZipFile + " ...")
|
208 |
+
with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
|
209 |
+
shutil.copyfileobj(resp, out)
|
210 |
+
print("... done downloading.")
|
211 |
+
print("Unzipping " + annZipFile)
|
212 |
+
with zipfile.ZipFile(annZipFile, "r") as zip_ref:
|
213 |
+
zip_ref.extractall(unZipDir)
|
214 |
+
print("... done unzipping")
|
215 |
+
print("Will use annotations in " + annFile)
|
216 |
+
|
217 |
+
def load_mask(self, image_id):
|
218 |
+
"""Load instance masks for the given image.
|
219 |
+
|
220 |
+
Different datasets use different ways to store masks. This
|
221 |
+
function converts the different mask format to one format
|
222 |
+
in the form of a bitmap [height, width, instances].
|
223 |
+
|
224 |
+
Returns:
|
225 |
+
masks: A bool array of shape [height, width, instance count] with
|
226 |
+
one mask per instance.
|
227 |
+
class_ids: a 1D array of class IDs of the instance masks.
|
228 |
+
"""
|
229 |
+
# If not a COCO image, delegate to parent class.
|
230 |
+
image_info = self.image_info[image_id]
|
231 |
+
if image_info["source"] != "coco":
|
232 |
+
return super(CocoDataset, self).load_mask(image_id)
|
233 |
+
|
234 |
+
instance_masks = []
|
235 |
+
class_ids = []
|
236 |
+
annotations = self.image_info[image_id]["annotations"]
|
237 |
+
# Build mask of shape [height, width, instance_count] and list
|
238 |
+
# of class IDs that correspond to each channel of the mask.
|
239 |
+
for annotation in annotations:
|
240 |
+
class_id = self.map_source_class_id(
|
241 |
+
"coco.{}".format(annotation['category_id']))
|
242 |
+
if class_id:
|
243 |
+
m = self.annToMask(annotation, image_info["height"],
|
244 |
+
image_info["width"])
|
245 |
+
# Some objects are so small that they're less than 1 pixel area
|
246 |
+
# and end up rounded out. Skip those objects.
|
247 |
+
if m.max() < 1:
|
248 |
+
continue
|
249 |
+
# Is it a crowd? If so, use a negative class ID.
|
250 |
+
if annotation['iscrowd']:
|
251 |
+
# Use negative class ID for crowds
|
252 |
+
class_id *= -1
|
253 |
+
# For crowd masks, annToMask() sometimes returns a mask
|
254 |
+
# smaller than the given dimensions. If so, resize it.
|
255 |
+
if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
|
256 |
+
m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
|
257 |
+
instance_masks.append(m)
|
258 |
+
class_ids.append(class_id)
|
259 |
+
|
260 |
+
# Pack instance masks into an array
|
261 |
+
if class_ids:
|
262 |
+
mask = np.stack(instance_masks, axis=2)
|
263 |
+
class_ids = np.array(class_ids, dtype=np.int32)
|
264 |
+
return mask, class_ids
|
265 |
+
else:
|
266 |
+
# Call super class to return an empty mask
|
267 |
+
return super(CocoDataset, self).load_mask(image_id)
|
268 |
+
|
269 |
+
def image_reference(self, image_id):
|
270 |
+
"""Return a link to the image in the COCO Website."""
|
271 |
+
info = self.image_info[image_id]
|
272 |
+
if info["source"] == "coco":
|
273 |
+
return "http://cocodataset.org/#explore?id={}".format(info["id"])
|
274 |
+
else:
|
275 |
+
super(CocoDataset, self).image_reference(image_id)
|
276 |
+
|
277 |
+
# The following two functions are from pycocotools with a few changes.
|
278 |
+
|
279 |
+
def annToRLE(self, ann, height, width):
|
280 |
+
"""
|
281 |
+
Convert annotation which can be polygons, uncompressed RLE to RLE.
|
282 |
+
:return: binary mask (numpy 2D array)
|
283 |
+
"""
|
284 |
+
segm = ann['segmentation']
|
285 |
+
if isinstance(segm, list):
|
286 |
+
# polygon -- a single object might consist of multiple parts
|
287 |
+
# we merge all parts into one mask rle code
|
288 |
+
rles = maskUtils.frPyObjects(segm, height, width)
|
289 |
+
rle = maskUtils.merge(rles)
|
290 |
+
elif isinstance(segm['counts'], list):
|
291 |
+
# uncompressed RLE
|
292 |
+
rle = maskUtils.frPyObjects(segm, height, width)
|
293 |
+
else:
|
294 |
+
# rle
|
295 |
+
rle = ann['segmentation']
|
296 |
+
return rle
|
297 |
+
|
298 |
+
def annToMask(self, ann, height, width):
|
299 |
+
"""
|
300 |
+
Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
|
301 |
+
:return: binary mask (numpy 2D array)
|
302 |
+
"""
|
303 |
+
rle = self.annToRLE(ann, height, width)
|
304 |
+
m = maskUtils.decode(rle)
|
305 |
+
return m
|
306 |
+
|
307 |
+
|
308 |
+
############################################################
|
309 |
+
# COCO Evaluation
|
310 |
+
############################################################
|
311 |
+
|
312 |
+
def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
|
313 |
+
"""Arrange resutls to match COCO specs in http://cocodataset.org/#format
|
314 |
+
"""
|
315 |
+
# If no results, return an empty list
|
316 |
+
if rois is None:
|
317 |
+
return []
|
318 |
+
|
319 |
+
results = []
|
320 |
+
for image_id in image_ids:
|
321 |
+
# Loop through detections
|
322 |
+
for i in range(rois.shape[0]):
|
323 |
+
class_id = class_ids[i]
|
324 |
+
score = scores[i]
|
325 |
+
bbox = np.around(rois[i], 1)
|
326 |
+
mask = masks[:, :, i]
|
327 |
+
|
328 |
+
result = {
|
329 |
+
"image_id": image_id,
|
330 |
+
"category_id": dataset.get_source_class_id(class_id, "coco"),
|
331 |
+
"bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
|
332 |
+
"score": score,
|
333 |
+
"segmentation": maskUtils.encode(np.asfortranarray(mask))
|
334 |
+
}
|
335 |
+
results.append(result)
|
336 |
+
return results
|
337 |
+
|
338 |
+
|
339 |
+
def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
|
340 |
+
"""Runs official COCO evaluation.
|
341 |
+
dataset: A Dataset object with valiadtion data
|
342 |
+
eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
|
343 |
+
limit: if not 0, it's the number of images to use for evaluation
|
344 |
+
"""
|
345 |
+
# Pick COCO images from the dataset
|
346 |
+
image_ids = image_ids or dataset.image_ids
|
347 |
+
|
348 |
+
# Limit to a subset
|
349 |
+
if limit:
|
350 |
+
image_ids = image_ids[:limit]
|
351 |
+
|
352 |
+
# Get corresponding COCO image IDs.
|
353 |
+
coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
|
354 |
+
|
355 |
+
t_prediction = 0
|
356 |
+
t_start = time.time()
|
357 |
+
|
358 |
+
results = []
|
359 |
+
for i, image_id in enumerate(image_ids):
|
360 |
+
# Load image
|
361 |
+
image = dataset.load_image(image_id)
|
362 |
+
|
363 |
+
# Run detection
|
364 |
+
t = time.time()
|
365 |
+
r = model.detect([image], verbose=0)[0]
|
366 |
+
t_prediction += (time.time() - t)
|
367 |
+
|
368 |
+
# Convert results to COCO format
|
369 |
+
image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
|
370 |
+
r["rois"], r["class_ids"],
|
371 |
+
r["scores"], r["masks"])
|
372 |
+
results.extend(image_results)
|
373 |
+
|
374 |
+
# Load results. This modifies results with additional attributes.
|
375 |
+
coco_results = coco.loadRes(results)
|
376 |
+
|
377 |
+
# Evaluate
|
378 |
+
cocoEval = COCOeval(coco, coco_results, eval_type)
|
379 |
+
cocoEval.params.imgIds = coco_image_ids
|
380 |
+
cocoEval.evaluate()
|
381 |
+
cocoEval.accumulate()
|
382 |
+
cocoEval.summarize()
|
383 |
+
|
384 |
+
print("Prediction time: {}. Average {}/image".format(
|
385 |
+
t_prediction, t_prediction / len(image_ids)))
|
386 |
+
print("Total time: ", time.time() - t_start)
|
387 |
+
|
388 |
+
|
389 |
+
############################################################
|
390 |
+
# Training
|
391 |
+
############################################################
|
392 |
+
|
393 |
+
|
394 |
+
if __name__ == '__main__':
|
395 |
+
import argparse
|
396 |
+
|
397 |
+
# Parse command line arguments
|
398 |
+
parser = argparse.ArgumentParser(
|
399 |
+
description='Train Mask R-CNN on MS COCO.')
|
400 |
+
parser.add_argument("command",
|
401 |
+
metavar="<command>",
|
402 |
+
help="'train' or 'evaluate' on MS COCO")
|
403 |
+
parser.add_argument('--dataset', required=True,
|
404 |
+
metavar="/path/to/coco/",
|
405 |
+
help='Directory of the MS-COCO dataset')
|
406 |
+
parser.add_argument('--year', required=False,
|
407 |
+
default=DEFAULT_DATASET_YEAR,
|
408 |
+
metavar="<year>",
|
409 |
+
help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
|
410 |
+
parser.add_argument('--model', required=True,
|
411 |
+
metavar="/path/to/weights.h5",
|
412 |
+
help="Path to weights .h5 file or 'coco'")
|
413 |
+
parser.add_argument('--logs', required=False,
|
414 |
+
default=DEFAULT_LOGS_DIR,
|
415 |
+
metavar="/path/to/logs/",
|
416 |
+
help='Logs and checkpoints directory (default=logs/)')
|
417 |
+
parser.add_argument('--limit', required=False,
|
418 |
+
default=500,
|
419 |
+
metavar="<image count>",
|
420 |
+
help='Images to use for evaluation (default=500)')
|
421 |
+
parser.add_argument('--download', required=False,
|
422 |
+
default=False,
|
423 |
+
metavar="<True|False>",
|
424 |
+
help='Automatically download and unzip MS-COCO files (default=False)',
|
425 |
+
type=bool)
|
426 |
+
args = parser.parse_args()
|
427 |
+
print("Command: ", args.command)
|
428 |
+
print("Model: ", args.model)
|
429 |
+
print("Dataset: ", args.dataset)
|
430 |
+
print("Year: ", args.year)
|
431 |
+
print("Logs: ", args.logs)
|
432 |
+
print("Auto Download: ", args.download)
|
433 |
+
|
434 |
+
# Configurations
|
435 |
+
if args.command == "train":
|
436 |
+
config = CocoConfig()
|
437 |
+
else:
|
438 |
+
class InferenceConfig(CocoConfig):
|
439 |
+
# Set batch size to 1 since we'll be running inference on
|
440 |
+
# one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
|
441 |
+
GPU_COUNT = 1
|
442 |
+
IMAGES_PER_GPU = 1
|
443 |
+
DETECTION_MIN_CONFIDENCE = 0
|
444 |
+
config = InferenceConfig()
|
445 |
+
config.display()
|
446 |
+
|
447 |
+
# Create model
|
448 |
+
if args.command == "train":
|
449 |
+
model = modellib.MaskRCNN(mode="training", config=config,
|
450 |
+
model_dir=args.logs)
|
451 |
+
else:
|
452 |
+
model = modellib.MaskRCNN(mode="inference", config=config,
|
453 |
+
model_dir=args.logs)
|
454 |
+
|
455 |
+
# Select weights file to load
|
456 |
+
if args.model.lower() == "coco":
|
457 |
+
model_path = COCO_MODEL_PATH
|
458 |
+
elif args.model.lower() == "last":
|
459 |
+
# Find last trained weights
|
460 |
+
model_path = model.find_last()[1]
|
461 |
+
elif args.model.lower() == "imagenet":
|
462 |
+
# Start from ImageNet trained weights
|
463 |
+
model_path = model.get_imagenet_weights()
|
464 |
+
else:
|
465 |
+
model_path = args.model
|
466 |
+
|
467 |
+
# Load weights
|
468 |
+
print("Loading weights ", model_path)
|
469 |
+
model.load_weights(model_path, by_name=True)
|
470 |
+
|
471 |
+
# Train or evaluate
|
472 |
+
if args.command == "train":
|
473 |
+
# Training dataset. Use the training set and 35K from the
|
474 |
+
# validation set, as as in the Mask RCNN paper.
|
475 |
+
dataset_train = CocoDataset()
|
476 |
+
dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
|
477 |
+
dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
|
478 |
+
dataset_train.prepare()
|
479 |
+
|
480 |
+
# Validation dataset
|
481 |
+
dataset_val = CocoDataset()
|
482 |
+
dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download)
|
483 |
+
dataset_val.prepare()
|
484 |
+
|
485 |
+
# *** This training schedule is an example. Update to your needs ***
|
486 |
+
|
487 |
+
# Training - Stage 1
|
488 |
+
print("Training network heads")
|
489 |
+
model.train(dataset_train, dataset_val,
|
490 |
+
learning_rate=config.LEARNING_RATE,
|
491 |
+
epochs=40,
|
492 |
+
layers='heads')
|
493 |
+
|
494 |
+
# Training - Stage 2
|
495 |
+
# Finetune layers from ResNet stage 4 and up
|
496 |
+
print("Fine tune Resnet stage 4 and up")
|
497 |
+
model.train(dataset_train, dataset_val,
|
498 |
+
learning_rate=config.LEARNING_RATE,
|
499 |
+
epochs=120,
|
500 |
+
layers='4+')
|
501 |
+
|
502 |
+
# Training - Stage 3
|
503 |
+
# Fine tune all layers
|
504 |
+
print("Fine tune all layers")
|
505 |
+
model.train(dataset_train, dataset_val,
|
506 |
+
learning_rate=config.LEARNING_RATE / 10,
|
507 |
+
epochs=160,
|
508 |
+
layers='all')
|
509 |
+
|
510 |
+
elif args.command == "evaluate":
|
511 |
+
# Validation dataset
|
512 |
+
dataset_val = CocoDataset()
|
513 |
+
coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download)
|
514 |
+
dataset_val.prepare()
|
515 |
+
print("Running COCO evaluation on {} images.".format(args.limit))
|
516 |
+
evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
|
517 |
+
else:
|
518 |
+
print("'{}' is not recognized. "
|
519 |
+
"Use 'train' or 'evaluate'".format(args.command))
|
config.py
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Mask R-CNN
|
3 |
+
Base Configurations class.
|
4 |
+
|
5 |
+
Copyright (c) 2017 Matterport, Inc.
|
6 |
+
Licensed under the MIT License (see LICENSE for details)
|
7 |
+
Written by Waleed Abdulla
|
8 |
+
"""
|
9 |
+
|
10 |
+
import math
|
11 |
+
import numpy as np
|
12 |
+
|
13 |
+
|
14 |
+
# Base Configuration Class
|
15 |
+
# Don't use this class directly. Instead, sub-class it and override
|
16 |
+
# the configurations you need to change.
|
17 |
+
|
18 |
+
class Config(object):
|
19 |
+
"""Base configuration class. For custom configurations, create a
|
20 |
+
sub-class that inherits from this one and override properties
|
21 |
+
that need to be changed.
|
22 |
+
"""
|
23 |
+
# Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
|
24 |
+
# Useful if your code needs to do things differently depending on which
|
25 |
+
# experiment is running.
|
26 |
+
NAME = None # Override in sub-classes
|
27 |
+
|
28 |
+
# NUMBER OF GPUs to use. For CPU training, use 1
|
29 |
+
GPU_COUNT = 1
|
30 |
+
|
31 |
+
# Number of images to train with on each GPU. A 12GB GPU can typically
|
32 |
+
# handle 2 images of 1024x1024px.
|
33 |
+
# Adjust based on your GPU memory and image sizes. Use the highest
|
34 |
+
# number that your GPU can handle for best performance.
|
35 |
+
IMAGES_PER_GPU = 2
|
36 |
+
|
37 |
+
# Number of training steps per epoch
|
38 |
+
# This doesn't need to match the size of the training set. Tensorboard
|
39 |
+
# updates are saved at the end of each epoch, so setting this to a
|
40 |
+
# smaller number means getting more frequent TensorBoard updates.
|
41 |
+
# Validation stats are also calculated at each epoch end and they
|
42 |
+
# might take a while, so don't set this too small to avoid spending
|
43 |
+
# a lot of time on validation stats.
|
44 |
+
STEPS_PER_EPOCH = 1000
|
45 |
+
|
46 |
+
# Number of validation steps to run at the end of every training epoch.
|
47 |
+
# A bigger number improves accuracy of validation stats, but slows
|
48 |
+
# down the training.
|
49 |
+
VALIDATION_STEPS = 50
|
50 |
+
|
51 |
+
# Backbone network architecture
|
52 |
+
# Supported values are: resnet50, resnet101
|
53 |
+
BACKBONE = "resnet101"
|
54 |
+
|
55 |
+
# The strides of each layer of the FPN Pyramid. These values
|
56 |
+
# are based on a Resnet101 backbone.
|
57 |
+
BACKBONE_STRIDES = [4, 8, 16, 32, 64]
|
58 |
+
|
59 |
+
# Number of classification classes (including background)
|
60 |
+
NUM_CLASSES = 1 # Override in sub-classes
|
61 |
+
|
62 |
+
# Length of square anchor side in pixels
|
63 |
+
RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
|
64 |
+
|
65 |
+
# Ratios of anchors at each cell (width/height)
|
66 |
+
# A value of 1 represents a square anchor, and 0.5 is a wide anchor
|
67 |
+
RPN_ANCHOR_RATIOS = [0.5, 1, 2]
|
68 |
+
|
69 |
+
# Anchor stride
|
70 |
+
# If 1 then anchors are created for each cell in the backbone feature map.
|
71 |
+
# If 2, then anchors are created for every other cell, and so on.
|
72 |
+
RPN_ANCHOR_STRIDE = 1
|
73 |
+
|
74 |
+
# Non-max suppression threshold to filter RPN proposals.
|
75 |
+
# You can reduce this during training to generate more propsals.
|
76 |
+
RPN_NMS_THRESHOLD = 0.7
|
77 |
+
|
78 |
+
# How many anchors per image to use for RPN training
|
79 |
+
RPN_TRAIN_ANCHORS_PER_IMAGE = 256
|
80 |
+
|
81 |
+
# ROIs kept after non-maximum supression (training and inference)
|
82 |
+
POST_NMS_ROIS_TRAINING = 2000
|
83 |
+
POST_NMS_ROIS_INFERENCE = 1000
|
84 |
+
|
85 |
+
# If enabled, resizes instance masks to a smaller size to reduce
|
86 |
+
# memory load. Recommended when using high-resolution images.
|
87 |
+
USE_MINI_MASK = True
|
88 |
+
MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask
|
89 |
+
|
90 |
+
# Input image resing
|
91 |
+
# Images are resized such that the smallest side is >= IMAGE_MIN_DIM and
|
92 |
+
# the longest side is <= IMAGE_MAX_DIM. In case both conditions can't
|
93 |
+
# be satisfied together the IMAGE_MAX_DIM is enforced.
|
94 |
+
IMAGE_MIN_DIM = 800
|
95 |
+
IMAGE_MAX_DIM = 1024
|
96 |
+
# If True, pad images with zeros such that they're (max_dim by max_dim)
|
97 |
+
IMAGE_PADDING = True # currently, the False option is not supported
|
98 |
+
|
99 |
+
# Image mean (RGB)
|
100 |
+
MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
|
101 |
+
|
102 |
+
# Number of ROIs per image to feed to classifier/mask heads
|
103 |
+
# The Mask RCNN paper uses 512 but often the RPN doesn't generate
|
104 |
+
# enough positive proposals to fill this and keep a positive:negative
|
105 |
+
# ratio of 1:3. You can increase the number of proposals by adjusting
|
106 |
+
# the RPN NMS threshold.
|
107 |
+
TRAIN_ROIS_PER_IMAGE = 200
|
108 |
+
|
109 |
+
# Percent of positive ROIs used to train classifier/mask heads
|
110 |
+
ROI_POSITIVE_RATIO = 0.33
|
111 |
+
|
112 |
+
# Pooled ROIs
|
113 |
+
POOL_SIZE = 7
|
114 |
+
MASK_POOL_SIZE = 14
|
115 |
+
MASK_SHAPE = [28, 28]
|
116 |
+
|
117 |
+
# Maximum number of ground truth instances to use in one image
|
118 |
+
MAX_GT_INSTANCES = 100
|
119 |
+
|
120 |
+
# Bounding box refinement standard deviation for RPN and final detections.
|
121 |
+
RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
|
122 |
+
BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
|
123 |
+
|
124 |
+
# Max number of final detections
|
125 |
+
DETECTION_MAX_INSTANCES = 100
|
126 |
+
|
127 |
+
# Minimum probability value to accept a detected instance
|
128 |
+
# ROIs below this threshold are skipped
|
129 |
+
DETECTION_MIN_CONFIDENCE = 0.7
|
130 |
+
|
131 |
+
# Non-maximum suppression threshold for detection
|
132 |
+
DETECTION_NMS_THRESHOLD = 0.3
|
133 |
+
|
134 |
+
# Learning rate and momentum
|
135 |
+
# The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
|
136 |
+
# weights to explode. Likely due to differences in optimzer
|
137 |
+
# implementation.
|
138 |
+
LEARNING_RATE = 0.001
|
139 |
+
LEARNING_MOMENTUM = 0.9
|
140 |
+
|
141 |
+
# Weight decay regularization
|
142 |
+
WEIGHT_DECAY = 0.0001
|
143 |
+
|
144 |
+
# Use RPN ROIs or externally generated ROIs for training
|
145 |
+
# Keep this True for most situations. Set to False if you want to train
|
146 |
+
# the head branches on ROI generated by code rather than the ROIs from
|
147 |
+
# the RPN. For example, to debug the classifier head without having to
|
148 |
+
# train the RPN.
|
149 |
+
USE_RPN_ROIS = True
|
150 |
+
|
151 |
+
def __init__(self):
|
152 |
+
"""Set values of computed attributes."""
|
153 |
+
# Effective batch size
|
154 |
+
self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
|
155 |
+
|
156 |
+
# Input image size
|
157 |
+
self.IMAGE_SHAPE = np.array(
|
158 |
+
[self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
|
159 |
+
|
160 |
+
# Compute backbone size from input image size
|
161 |
+
self.BACKBONE_SHAPES = np.array(
|
162 |
+
[[int(math.ceil(self.IMAGE_SHAPE[0] / stride)),
|
163 |
+
int(math.ceil(self.IMAGE_SHAPE[1] / stride))]
|
164 |
+
for stride in self.BACKBONE_STRIDES])
|
165 |
+
|
166 |
+
def display(self):
|
167 |
+
"""Display Configuration values."""
|
168 |
+
print("\nConfigurations:")
|
169 |
+
for a in dir(self):
|
170 |
+
if not a.startswith("__") and not callable(getattr(self, a)):
|
171 |
+
print("{:30} {}".format(a, getattr(self, a)))
|
172 |
+
print("\n")
|
model.py
ADDED
The diff for this file is too large to render.
See raw diff
|
|
parallel_model.py
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Mask R-CNN
|
3 |
+
Multi-GPU Support for Keras.
|
4 |
+
|
5 |
+
Copyright (c) 2017 Matterport, Inc.
|
6 |
+
Licensed under the MIT License (see LICENSE for details)
|
7 |
+
Written by Waleed Abdulla
|
8 |
+
|
9 |
+
Ideas and a small code snippets from these sources:
|
10 |
+
https://github.com/fchollet/keras/issues/2436
|
11 |
+
https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
|
12 |
+
https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
|
13 |
+
https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
|
14 |
+
"""
|
15 |
+
|
16 |
+
import tensorflow as tf
|
17 |
+
import keras.backend as K
|
18 |
+
import keras.layers as KL
|
19 |
+
import keras.models as KM
|
20 |
+
|
21 |
+
|
22 |
+
class ParallelModel(KM.Model):
|
23 |
+
"""Subclasses the standard Keras Model and adds multi-GPU support.
|
24 |
+
It works by creating a copy of the model on each GPU. Then it slices
|
25 |
+
the inputs and sends a slice to each copy of the model, and then
|
26 |
+
merges the outputs together and applies the loss on the combined
|
27 |
+
outputs.
|
28 |
+
"""
|
29 |
+
|
30 |
+
def __init__(self, keras_model, gpu_count):
|
31 |
+
"""Class constructor.
|
32 |
+
keras_model: The Keras model to parallelize
|
33 |
+
gpu_count: Number of GPUs. Must be > 1
|
34 |
+
"""
|
35 |
+
self.inner_model = keras_model
|
36 |
+
self.gpu_count = gpu_count
|
37 |
+
merged_outputs = self.make_parallel()
|
38 |
+
super(ParallelModel, self).__init__(inputs=self.inner_model.inputs,
|
39 |
+
outputs=merged_outputs)
|
40 |
+
|
41 |
+
def __getattribute__(self, attrname):
|
42 |
+
"""Redirect loading and saving methods to the inner model. That's where
|
43 |
+
the weights are stored."""
|
44 |
+
if 'load' in attrname or 'save' in attrname:
|
45 |
+
return getattr(self.inner_model, attrname)
|
46 |
+
return super(ParallelModel, self).__getattribute__(attrname)
|
47 |
+
|
48 |
+
def summary(self, *args, **kwargs):
|
49 |
+
"""Override summary() to display summaries of both, the wrapper
|
50 |
+
and inner models."""
|
51 |
+
super(ParallelModel, self).summary(*args, **kwargs)
|
52 |
+
self.inner_model.summary(*args, **kwargs)
|
53 |
+
|
54 |
+
def make_parallel(self):
|
55 |
+
"""Creates a new wrapper model that consists of multiple replicas of
|
56 |
+
the original model placed on different GPUs.
|
57 |
+
"""
|
58 |
+
# Slice inputs. Slice inputs on the CPU to avoid sending a copy
|
59 |
+
# of the full inputs to all GPUs. Saves on bandwidth and memory.
|
60 |
+
input_slices = {name: tf.split(x, self.gpu_count)
|
61 |
+
for name, x in zip(self.inner_model.input_names,
|
62 |
+
self.inner_model.inputs)}
|
63 |
+
|
64 |
+
output_names = self.inner_model.output_names
|
65 |
+
outputs_all = []
|
66 |
+
for i in range(len(self.inner_model.outputs)):
|
67 |
+
outputs_all.append([])
|
68 |
+
|
69 |
+
# Run the model call() on each GPU to place the ops there
|
70 |
+
for i in range(self.gpu_count):
|
71 |
+
with tf.device('/gpu:%d' % i):
|
72 |
+
with tf.name_scope('tower_%d' % i):
|
73 |
+
# Run a slice of inputs through this replica
|
74 |
+
zipped_inputs = zip(self.inner_model.input_names,
|
75 |
+
self.inner_model.inputs)
|
76 |
+
inputs = [
|
77 |
+
KL.Lambda(lambda s: input_slices[name][i],
|
78 |
+
output_shape=lambda s: (None,) + s[1:])(tensor)
|
79 |
+
for name, tensor in zipped_inputs]
|
80 |
+
# Create the model replica and get the outputs
|
81 |
+
outputs = self.inner_model(inputs)
|
82 |
+
if not isinstance(outputs, list):
|
83 |
+
outputs = [outputs]
|
84 |
+
# Save the outputs for merging back together later
|
85 |
+
for l, o in enumerate(outputs):
|
86 |
+
outputs_all[l].append(o)
|
87 |
+
|
88 |
+
# Merge outputs on CPU
|
89 |
+
with tf.device('/cpu:0'):
|
90 |
+
merged = []
|
91 |
+
for outputs, name in zip(outputs_all, output_names):
|
92 |
+
# If outputs are numbers without dimensions, add a batch dim.
|
93 |
+
def add_dim(tensor):
|
94 |
+
"""Add a dimension to tensors that don't have any."""
|
95 |
+
if K.int_shape(tensor) == ():
|
96 |
+
return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor)
|
97 |
+
return tensor
|
98 |
+
outputs = list(map(add_dim, outputs))
|
99 |
+
|
100 |
+
# Concatenate
|
101 |
+
merged.append(KL.Concatenate(axis=0, name=name)(outputs))
|
102 |
+
return merged
|
103 |
+
|
104 |
+
|
105 |
+
if __name__ == "__main__":
|
106 |
+
# Testing code below. It creates a simple model to train on MNIST and
|
107 |
+
# tries to run it on 2 GPUs. It saves the graph so it can be viewed
|
108 |
+
# in TensorBoard. Run it as:
|
109 |
+
#
|
110 |
+
# python3 parallel_model.py
|
111 |
+
|
112 |
+
import os
|
113 |
+
import numpy as np
|
114 |
+
import keras.optimizers
|
115 |
+
from keras.datasets import mnist
|
116 |
+
from keras.preprocessing.image import ImageDataGenerator
|
117 |
+
|
118 |
+
GPU_COUNT = 2
|
119 |
+
|
120 |
+
# Root directory of the project
|
121 |
+
ROOT_DIR = os.getcwd()
|
122 |
+
|
123 |
+
# Directory to save logs and trained model
|
124 |
+
MODEL_DIR = os.path.join(ROOT_DIR, "logs/parallel")
|
125 |
+
|
126 |
+
def build_model(x_train, num_classes):
|
127 |
+
# Reset default graph. Keras leaves old ops in the graph,
|
128 |
+
# which are ignored for execution but clutter graph
|
129 |
+
# visualization in TensorBoard.
|
130 |
+
tf.reset_default_graph()
|
131 |
+
|
132 |
+
inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
|
133 |
+
x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
|
134 |
+
name="conv1")(inputs)
|
135 |
+
x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
|
136 |
+
name="conv2")(x)
|
137 |
+
x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
|
138 |
+
x = KL.Flatten(name="flat1")(x)
|
139 |
+
x = KL.Dense(128, activation='relu', name="dense1")(x)
|
140 |
+
x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
|
141 |
+
|
142 |
+
return KM.Model(inputs, x, "digit_classifier_model")
|
143 |
+
|
144 |
+
# Load MNIST Data
|
145 |
+
(x_train, y_train), (x_test, y_test) = mnist.load_data()
|
146 |
+
x_train = np.expand_dims(x_train, -1).astype('float32') / 255
|
147 |
+
x_test = np.expand_dims(x_test, -1).astype('float32') / 255
|
148 |
+
|
149 |
+
print('x_train shape:', x_train.shape)
|
150 |
+
print('x_test shape:', x_test.shape)
|
151 |
+
|
152 |
+
# Build data generator and model
|
153 |
+
datagen = ImageDataGenerator()
|
154 |
+
model = build_model(x_train, 10)
|
155 |
+
|
156 |
+
# Add multi-GPU support.
|
157 |
+
model = ParallelModel(model, GPU_COUNT)
|
158 |
+
|
159 |
+
optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
|
160 |
+
|
161 |
+
model.compile(loss='sparse_categorical_crossentropy',
|
162 |
+
optimizer=optimizer, metrics=['accuracy'])
|
163 |
+
|
164 |
+
model.summary()
|
165 |
+
|
166 |
+
# Train
|
167 |
+
model.fit_generator(
|
168 |
+
datagen.flow(x_train, y_train, batch_size=64),
|
169 |
+
steps_per_epoch=50, epochs=10, verbose=1,
|
170 |
+
validation_data=(x_test, y_test),
|
171 |
+
callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
|
172 |
+
write_graph=True)]
|
173 |
+
)
|
requirements.txt
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy==1.21.6
|
2 |
+
scipy==1.2.2
|
3 |
+
Pillow==9.5.0
|
4 |
+
Cython==0.29.34
|
5 |
+
matplotlib==3.5.3
|
6 |
+
scikit-image==0.19.3
|
7 |
+
tensorflow==1.13.1
|
8 |
+
keras==2.0.8
|
9 |
+
opencv-python-headless==4.7.0.72
|
10 |
+
h5py==2.10.0
|
11 |
+
imgaug==0.4.0
|
12 |
+
ipython==7.34.0
|
13 |
+
|
14 |
+
imageio==2.9.0
|
15 |
+
requests==2.27.*
|
16 |
+
uvloop==0.15.2
|
17 |
+
uvicorn==0.13.4
|
18 |
+
httptools==0.2.0
|
19 |
+
fastapi==0.74.*
|
20 |
+
gunicorn==20.1.0
|
shapes.py
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Mask R-CNN
|
3 |
+
Configurations and data loading code for the synthetic Shapes dataset.
|
4 |
+
This is a duplicate of the code in the noteobook train_shapes.ipynb for easy
|
5 |
+
import into other notebooks, such as inspect_model.ipynb.
|
6 |
+
|
7 |
+
Copyright (c) 2017 Matterport, Inc.
|
8 |
+
Licensed under the MIT License (see LICENSE for details)
|
9 |
+
Written by Waleed Abdulla
|
10 |
+
"""
|
11 |
+
|
12 |
+
import math
|
13 |
+
import random
|
14 |
+
import numpy as np
|
15 |
+
import cv2
|
16 |
+
|
17 |
+
from config import Config
|
18 |
+
import utils
|
19 |
+
|
20 |
+
|
21 |
+
class ShapesConfig(Config):
|
22 |
+
"""Configuration for training on the toy shapes dataset.
|
23 |
+
Derives from the base Config class and overrides values specific
|
24 |
+
to the toy shapes dataset.
|
25 |
+
"""
|
26 |
+
# Give the configuration a recognizable name
|
27 |
+
NAME = "shapes"
|
28 |
+
|
29 |
+
# Train on 1 GPU and 8 images per GPU. We can put multiple images on each
|
30 |
+
# GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
|
31 |
+
GPU_COUNT = 1
|
32 |
+
IMAGES_PER_GPU = 8
|
33 |
+
|
34 |
+
# Number of classes (including background)
|
35 |
+
NUM_CLASSES = 1 + 3 # background + 3 shapes
|
36 |
+
|
37 |
+
# Use small images for faster training. Set the limits of the small side
|
38 |
+
# the large side, and that determines the image shape.
|
39 |
+
IMAGE_MIN_DIM = 128
|
40 |
+
IMAGE_MAX_DIM = 128
|
41 |
+
|
42 |
+
# Use smaller anchors because our image and objects are small
|
43 |
+
RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) # anchor side in pixels
|
44 |
+
|
45 |
+
# Reduce training ROIs per image because the images are small and have
|
46 |
+
# few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
|
47 |
+
TRAIN_ROIS_PER_IMAGE = 32
|
48 |
+
|
49 |
+
# Use a small epoch since the data is simple
|
50 |
+
STEPS_PER_EPOCH = 100
|
51 |
+
|
52 |
+
# use small validation steps since the epoch is small
|
53 |
+
VALIDATION_STEPS = 5
|
54 |
+
|
55 |
+
|
56 |
+
class ShapesDataset(utils.Dataset):
|
57 |
+
"""Generates the shapes synthetic dataset. The dataset consists of simple
|
58 |
+
shapes (triangles, squares, circles) placed randomly on a blank surface.
|
59 |
+
The images are generated on the fly. No file access required.
|
60 |
+
"""
|
61 |
+
|
62 |
+
def load_shapes(self, count, height, width):
|
63 |
+
"""Generate the requested number of synthetic images.
|
64 |
+
count: number of images to generate.
|
65 |
+
height, width: the size of the generated images.
|
66 |
+
"""
|
67 |
+
# Add classes
|
68 |
+
self.add_class("shapes", 1, "square")
|
69 |
+
self.add_class("shapes", 2, "circle")
|
70 |
+
self.add_class("shapes", 3, "triangle")
|
71 |
+
|
72 |
+
# Add images
|
73 |
+
# Generate random specifications of images (i.e. color and
|
74 |
+
# list of shapes sizes and locations). This is more compact than
|
75 |
+
# actual images. Images are generated on the fly in load_image().
|
76 |
+
for i in range(count):
|
77 |
+
bg_color, shapes = self.random_image(height, width)
|
78 |
+
self.add_image("shapes", image_id=i, path=None,
|
79 |
+
width=width, height=height,
|
80 |
+
bg_color=bg_color, shapes=shapes)
|
81 |
+
|
82 |
+
def load_image(self, image_id):
|
83 |
+
"""Generate an image from the specs of the given image ID.
|
84 |
+
Typically this function loads the image from a file, but
|
85 |
+
in this case it generates the image on the fly from the
|
86 |
+
specs in image_info.
|
87 |
+
"""
|
88 |
+
info = self.image_info[image_id]
|
89 |
+
bg_color = np.array(info['bg_color']).reshape([1, 1, 3])
|
90 |
+
image = np.ones([info['height'], info['width'], 3], dtype=np.uint8)
|
91 |
+
image = image * bg_color.astype(np.uint8)
|
92 |
+
for shape, color, dims in info['shapes']:
|
93 |
+
image = self.draw_shape(image, shape, dims, color)
|
94 |
+
return image
|
95 |
+
|
96 |
+
def image_reference(self, image_id):
|
97 |
+
"""Return the shapes data of the image."""
|
98 |
+
info = self.image_info[image_id]
|
99 |
+
if info["source"] == "shapes":
|
100 |
+
return info["shapes"]
|
101 |
+
else:
|
102 |
+
super(self.__class__).image_reference(self, image_id)
|
103 |
+
|
104 |
+
def load_mask(self, image_id):
|
105 |
+
"""Generate instance masks for shapes of the given image ID.
|
106 |
+
"""
|
107 |
+
info = self.image_info[image_id]
|
108 |
+
shapes = info['shapes']
|
109 |
+
count = len(shapes)
|
110 |
+
mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
|
111 |
+
for i, (shape, _, dims) in enumerate(info['shapes']):
|
112 |
+
mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(),
|
113 |
+
shape, dims, 1)
|
114 |
+
# Handle occlusions
|
115 |
+
occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
|
116 |
+
for i in range(count - 2, -1, -1):
|
117 |
+
mask[:, :, i] = mask[:, :, i] * occlusion
|
118 |
+
occlusion = np.logical_and(
|
119 |
+
occlusion, np.logical_not(mask[:, :, i]))
|
120 |
+
# Map class names to class IDs.
|
121 |
+
class_ids = np.array([self.class_names.index(s[0]) for s in shapes])
|
122 |
+
return mask, class_ids.astype(np.int32)
|
123 |
+
|
124 |
+
def draw_shape(self, image, shape, dims, color):
|
125 |
+
"""Draws a shape from the given specs."""
|
126 |
+
# Get the center x, y and the size s
|
127 |
+
x, y, s = dims
|
128 |
+
if shape == 'square':
|
129 |
+
image = cv2.rectangle(image, (x - s, y - s),
|
130 |
+
(x + s, y + s), color, -1)
|
131 |
+
elif shape == "circle":
|
132 |
+
image = cv2.circle(image, (x, y), s, color, -1)
|
133 |
+
elif shape == "triangle":
|
134 |
+
points = np.array([[(x, y - s),
|
135 |
+
(x - s / math.sin(math.radians(60)), y + s),
|
136 |
+
(x + s / math.sin(math.radians(60)), y + s),
|
137 |
+
]], dtype=np.int32)
|
138 |
+
image = cv2.fillPoly(image, points, color)
|
139 |
+
return image
|
140 |
+
|
141 |
+
def random_shape(self, height, width):
|
142 |
+
"""Generates specifications of a random shape that lies within
|
143 |
+
the given height and width boundaries.
|
144 |
+
Returns a tuple of three valus:
|
145 |
+
* The shape name (square, circle, ...)
|
146 |
+
* Shape color: a tuple of 3 values, RGB.
|
147 |
+
* Shape dimensions: A tuple of values that define the shape size
|
148 |
+
and location. Differs per shape type.
|
149 |
+
"""
|
150 |
+
# Shape
|
151 |
+
shape = random.choice(["square", "circle", "triangle"])
|
152 |
+
# Color
|
153 |
+
color = tuple([random.randint(0, 255) for _ in range(3)])
|
154 |
+
# Center x, y
|
155 |
+
buffer = 20
|
156 |
+
y = random.randint(buffer, height - buffer - 1)
|
157 |
+
x = random.randint(buffer, width - buffer - 1)
|
158 |
+
# Size
|
159 |
+
s = random.randint(buffer, height // 4)
|
160 |
+
return shape, color, (x, y, s)
|
161 |
+
|
162 |
+
def random_image(self, height, width):
|
163 |
+
"""Creates random specifications of an image with multiple shapes.
|
164 |
+
Returns the background color of the image and a list of shape
|
165 |
+
specifications that can be used to draw the image.
|
166 |
+
"""
|
167 |
+
# Pick random background color
|
168 |
+
bg_color = np.array([random.randint(0, 255) for _ in range(3)])
|
169 |
+
# Generate a few random shapes and record their
|
170 |
+
# bounding boxes
|
171 |
+
shapes = []
|
172 |
+
boxes = []
|
173 |
+
N = random.randint(1, 4)
|
174 |
+
for _ in range(N):
|
175 |
+
shape, color, dims = self.random_shape(height, width)
|
176 |
+
shapes.append((shape, color, dims))
|
177 |
+
x, y, s = dims
|
178 |
+
boxes.append([y - s, x - s, y + s, x + s])
|
179 |
+
# Apply non-max suppression wit 0.3 threshold to avoid
|
180 |
+
# shapes covering each other
|
181 |
+
keep_ixs = utils.non_max_suppression(
|
182 |
+
np.array(boxes), np.arange(N), 0.3)
|
183 |
+
shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
|
184 |
+
return bg_color, shapes
|
utils.py
ADDED
@@ -0,0 +1,736 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Mask R-CNN
|
3 |
+
Common utility functions and classes.
|
4 |
+
|
5 |
+
Copyright (c) 2017 Matterport, Inc.
|
6 |
+
Licensed under the MIT License (see LICENSE for details)
|
7 |
+
Written by Waleed Abdulla
|
8 |
+
"""
|
9 |
+
|
10 |
+
import sys
|
11 |
+
import os
|
12 |
+
import math
|
13 |
+
import random
|
14 |
+
import numpy as np
|
15 |
+
import tensorflow as tf
|
16 |
+
import scipy.misc
|
17 |
+
import skimage.color
|
18 |
+
import skimage.io
|
19 |
+
import urllib.request
|
20 |
+
import shutil
|
21 |
+
|
22 |
+
# URL from which to download the latest COCO trained weights
|
23 |
+
COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
|
24 |
+
|
25 |
+
|
26 |
+
############################################################
|
27 |
+
# Bounding Boxes
|
28 |
+
############################################################
|
29 |
+
|
30 |
+
def extract_bboxes(mask):
|
31 |
+
"""Compute bounding boxes from masks.
|
32 |
+
mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
|
33 |
+
|
34 |
+
Returns: bbox array [num_instances, (y1, x1, y2, x2)].
|
35 |
+
"""
|
36 |
+
boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
|
37 |
+
for i in range(mask.shape[-1]):
|
38 |
+
m = mask[:, :, i]
|
39 |
+
# Bounding box.
|
40 |
+
horizontal_indicies = np.where(np.any(m, axis=0))[0]
|
41 |
+
vertical_indicies = np.where(np.any(m, axis=1))[0]
|
42 |
+
if horizontal_indicies.shape[0]:
|
43 |
+
x1, x2 = horizontal_indicies[[0, -1]]
|
44 |
+
y1, y2 = vertical_indicies[[0, -1]]
|
45 |
+
# x2 and y2 should not be part of the box. Increment by 1.
|
46 |
+
x2 += 1
|
47 |
+
y2 += 1
|
48 |
+
else:
|
49 |
+
# No mask for this instance. Might happen due to
|
50 |
+
# resizing or cropping. Set bbox to zeros
|
51 |
+
x1, x2, y1, y2 = 0, 0, 0, 0
|
52 |
+
boxes[i] = np.array([y1, x1, y2, x2])
|
53 |
+
return boxes.astype(np.int32)
|
54 |
+
|
55 |
+
|
56 |
+
def compute_iou(box, boxes, box_area, boxes_area):
|
57 |
+
"""Calculates IoU of the given box with the array of the given boxes.
|
58 |
+
box: 1D vector [y1, x1, y2, x2]
|
59 |
+
boxes: [boxes_count, (y1, x1, y2, x2)]
|
60 |
+
box_area: float. the area of 'box'
|
61 |
+
boxes_area: array of length boxes_count.
|
62 |
+
|
63 |
+
Note: the areas are passed in rather than calculated here for
|
64 |
+
efficency. Calculate once in the caller to avoid duplicate work.
|
65 |
+
"""
|
66 |
+
# Calculate intersection areas
|
67 |
+
y1 = np.maximum(box[0], boxes[:, 0])
|
68 |
+
y2 = np.minimum(box[2], boxes[:, 2])
|
69 |
+
x1 = np.maximum(box[1], boxes[:, 1])
|
70 |
+
x2 = np.minimum(box[3], boxes[:, 3])
|
71 |
+
intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
|
72 |
+
union = box_area + boxes_area[:] - intersection[:]
|
73 |
+
iou = intersection / union
|
74 |
+
return iou
|
75 |
+
|
76 |
+
|
77 |
+
def compute_overlaps(boxes1, boxes2):
|
78 |
+
"""Computes IoU overlaps between two sets of boxes.
|
79 |
+
boxes1, boxes2: [N, (y1, x1, y2, x2)].
|
80 |
+
|
81 |
+
For better performance, pass the largest set first and the smaller second.
|
82 |
+
"""
|
83 |
+
# Areas of anchors and GT boxes
|
84 |
+
area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
|
85 |
+
area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
|
86 |
+
|
87 |
+
# Compute overlaps to generate matrix [boxes1 count, boxes2 count]
|
88 |
+
# Each cell contains the IoU value.
|
89 |
+
overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
|
90 |
+
for i in range(overlaps.shape[1]):
|
91 |
+
box2 = boxes2[i]
|
92 |
+
overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
|
93 |
+
return overlaps
|
94 |
+
|
95 |
+
|
96 |
+
def compute_overlaps_masks(masks1, masks2):
|
97 |
+
'''Computes IoU overlaps between two sets of masks.
|
98 |
+
masks1, masks2: [Height, Width, instances]
|
99 |
+
'''
|
100 |
+
# flatten masks
|
101 |
+
masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
|
102 |
+
masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
|
103 |
+
area1 = np.sum(masks1, axis=0)
|
104 |
+
area2 = np.sum(masks2, axis=0)
|
105 |
+
|
106 |
+
# intersections and union
|
107 |
+
intersections = np.dot(masks1.T, masks2)
|
108 |
+
union = area1[:, None] + area2[None, :] - intersections
|
109 |
+
overlaps = intersections / union
|
110 |
+
|
111 |
+
return overlaps
|
112 |
+
|
113 |
+
|
114 |
+
def non_max_suppression(boxes, scores, threshold):
|
115 |
+
"""Performs non-maximum supression and returns indicies of kept boxes.
|
116 |
+
boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
|
117 |
+
scores: 1-D array of box scores.
|
118 |
+
threshold: Float. IoU threshold to use for filtering.
|
119 |
+
"""
|
120 |
+
assert boxes.shape[0] > 0
|
121 |
+
if boxes.dtype.kind != "f":
|
122 |
+
boxes = boxes.astype(np.float32)
|
123 |
+
|
124 |
+
# Compute box areas
|
125 |
+
y1 = boxes[:, 0]
|
126 |
+
x1 = boxes[:, 1]
|
127 |
+
y2 = boxes[:, 2]
|
128 |
+
x2 = boxes[:, 3]
|
129 |
+
area = (y2 - y1) * (x2 - x1)
|
130 |
+
|
131 |
+
# Get indicies of boxes sorted by scores (highest first)
|
132 |
+
ixs = scores.argsort()[::-1]
|
133 |
+
|
134 |
+
pick = []
|
135 |
+
while len(ixs) > 0:
|
136 |
+
# Pick top box and add its index to the list
|
137 |
+
i = ixs[0]
|
138 |
+
pick.append(i)
|
139 |
+
# Compute IoU of the picked box with the rest
|
140 |
+
iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
|
141 |
+
# Identify boxes with IoU over the threshold. This
|
142 |
+
# returns indicies into ixs[1:], so add 1 to get
|
143 |
+
# indicies into ixs.
|
144 |
+
remove_ixs = np.where(iou > threshold)[0] + 1
|
145 |
+
# Remove indicies of the picked and overlapped boxes.
|
146 |
+
ixs = np.delete(ixs, remove_ixs)
|
147 |
+
ixs = np.delete(ixs, 0)
|
148 |
+
return np.array(pick, dtype=np.int32)
|
149 |
+
|
150 |
+
|
151 |
+
def apply_box_deltas(boxes, deltas):
|
152 |
+
"""Applies the given deltas to the given boxes.
|
153 |
+
boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
|
154 |
+
deltas: [N, (dy, dx, log(dh), log(dw))]
|
155 |
+
"""
|
156 |
+
boxes = boxes.astype(np.float32)
|
157 |
+
# Convert to y, x, h, w
|
158 |
+
height = boxes[:, 2] - boxes[:, 0]
|
159 |
+
width = boxes[:, 3] - boxes[:, 1]
|
160 |
+
center_y = boxes[:, 0] + 0.5 * height
|
161 |
+
center_x = boxes[:, 1] + 0.5 * width
|
162 |
+
# Apply deltas
|
163 |
+
center_y += deltas[:, 0] * height
|
164 |
+
center_x += deltas[:, 1] * width
|
165 |
+
height *= np.exp(deltas[:, 2])
|
166 |
+
width *= np.exp(deltas[:, 3])
|
167 |
+
# Convert back to y1, x1, y2, x2
|
168 |
+
y1 = center_y - 0.5 * height
|
169 |
+
x1 = center_x - 0.5 * width
|
170 |
+
y2 = y1 + height
|
171 |
+
x2 = x1 + width
|
172 |
+
return np.stack([y1, x1, y2, x2], axis=1)
|
173 |
+
|
174 |
+
|
175 |
+
def box_refinement_graph(box, gt_box):
|
176 |
+
"""Compute refinement needed to transform box to gt_box.
|
177 |
+
box and gt_box are [N, (y1, x1, y2, x2)]
|
178 |
+
"""
|
179 |
+
box = tf.cast(box, tf.float32)
|
180 |
+
gt_box = tf.cast(gt_box, tf.float32)
|
181 |
+
|
182 |
+
height = box[:, 2] - box[:, 0]
|
183 |
+
width = box[:, 3] - box[:, 1]
|
184 |
+
center_y = box[:, 0] + 0.5 * height
|
185 |
+
center_x = box[:, 1] + 0.5 * width
|
186 |
+
|
187 |
+
gt_height = gt_box[:, 2] - gt_box[:, 0]
|
188 |
+
gt_width = gt_box[:, 3] - gt_box[:, 1]
|
189 |
+
gt_center_y = gt_box[:, 0] + 0.5 * gt_height
|
190 |
+
gt_center_x = gt_box[:, 1] + 0.5 * gt_width
|
191 |
+
|
192 |
+
dy = (gt_center_y - center_y) / height
|
193 |
+
dx = (gt_center_x - center_x) / width
|
194 |
+
dh = tf.log(gt_height / height)
|
195 |
+
dw = tf.log(gt_width / width)
|
196 |
+
|
197 |
+
result = tf.stack([dy, dx, dh, dw], axis=1)
|
198 |
+
return result
|
199 |
+
|
200 |
+
|
201 |
+
def box_refinement(box, gt_box):
|
202 |
+
"""Compute refinement needed to transform box to gt_box.
|
203 |
+
box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
|
204 |
+
assumed to be outside the box.
|
205 |
+
"""
|
206 |
+
box = box.astype(np.float32)
|
207 |
+
gt_box = gt_box.astype(np.float32)
|
208 |
+
|
209 |
+
height = box[:, 2] - box[:, 0]
|
210 |
+
width = box[:, 3] - box[:, 1]
|
211 |
+
center_y = box[:, 0] + 0.5 * height
|
212 |
+
center_x = box[:, 1] + 0.5 * width
|
213 |
+
|
214 |
+
gt_height = gt_box[:, 2] - gt_box[:, 0]
|
215 |
+
gt_width = gt_box[:, 3] - gt_box[:, 1]
|
216 |
+
gt_center_y = gt_box[:, 0] + 0.5 * gt_height
|
217 |
+
gt_center_x = gt_box[:, 1] + 0.5 * gt_width
|
218 |
+
|
219 |
+
dy = (gt_center_y - center_y) / height
|
220 |
+
dx = (gt_center_x - center_x) / width
|
221 |
+
dh = np.log(gt_height / height)
|
222 |
+
dw = np.log(gt_width / width)
|
223 |
+
|
224 |
+
return np.stack([dy, dx, dh, dw], axis=1)
|
225 |
+
|
226 |
+
|
227 |
+
############################################################
|
228 |
+
# Dataset
|
229 |
+
############################################################
|
230 |
+
|
231 |
+
class Dataset(object):
|
232 |
+
"""The base class for dataset classes.
|
233 |
+
To use it, create a new class that adds functions specific to the dataset
|
234 |
+
you want to use. For example:
|
235 |
+
|
236 |
+
class CatsAndDogsDataset(Dataset):
|
237 |
+
def load_cats_and_dogs(self):
|
238 |
+
...
|
239 |
+
def load_mask(self, image_id):
|
240 |
+
...
|
241 |
+
def image_reference(self, image_id):
|
242 |
+
...
|
243 |
+
|
244 |
+
See COCODataset and ShapesDataset as examples.
|
245 |
+
"""
|
246 |
+
|
247 |
+
def __init__(self, class_map=None):
|
248 |
+
self._image_ids = []
|
249 |
+
self.image_info = []
|
250 |
+
# Background is always the first class
|
251 |
+
self.class_info = [{"source": "", "id": 0, "name": "BG"}]
|
252 |
+
self.source_class_ids = {}
|
253 |
+
|
254 |
+
def add_class(self, source, class_id, class_name):
|
255 |
+
assert "." not in source, "Source name cannot contain a dot"
|
256 |
+
# Does the class exist already?
|
257 |
+
for info in self.class_info:
|
258 |
+
if info['source'] == source and info["id"] == class_id:
|
259 |
+
# source.class_id combination already available, skip
|
260 |
+
return
|
261 |
+
# Add the class
|
262 |
+
self.class_info.append({
|
263 |
+
"source": source,
|
264 |
+
"id": class_id,
|
265 |
+
"name": class_name,
|
266 |
+
})
|
267 |
+
|
268 |
+
def add_image(self, source, image_id, path, **kwargs):
|
269 |
+
image_info = {
|
270 |
+
"id": image_id,
|
271 |
+
"source": source,
|
272 |
+
"path": path,
|
273 |
+
}
|
274 |
+
image_info.update(kwargs)
|
275 |
+
self.image_info.append(image_info)
|
276 |
+
|
277 |
+
def image_reference(self, image_id):
|
278 |
+
"""Return a link to the image in its source Website or details about
|
279 |
+
the image that help looking it up or debugging it.
|
280 |
+
|
281 |
+
Override for your dataset, but pass to this function
|
282 |
+
if you encounter images not in your dataset.
|
283 |
+
"""
|
284 |
+
return ""
|
285 |
+
|
286 |
+
def prepare(self, class_map=None):
|
287 |
+
"""Prepares the Dataset class for use.
|
288 |
+
|
289 |
+
TODO: class map is not supported yet. When done, it should handle mapping
|
290 |
+
classes from different datasets to the same class ID.
|
291 |
+
"""
|
292 |
+
|
293 |
+
def clean_name(name):
|
294 |
+
"""Returns a shorter version of object names for cleaner display."""
|
295 |
+
return ",".join(name.split(",")[:1])
|
296 |
+
|
297 |
+
# Build (or rebuild) everything else from the info dicts.
|
298 |
+
self.num_classes = len(self.class_info)
|
299 |
+
self.class_ids = np.arange(self.num_classes)
|
300 |
+
self.class_names = [clean_name(c["name"]) for c in self.class_info]
|
301 |
+
self.num_images = len(self.image_info)
|
302 |
+
self._image_ids = np.arange(self.num_images)
|
303 |
+
|
304 |
+
self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
|
305 |
+
for info, id in zip(self.class_info, self.class_ids)}
|
306 |
+
|
307 |
+
# Map sources to class_ids they support
|
308 |
+
self.sources = list(set([i['source'] for i in self.class_info]))
|
309 |
+
self.source_class_ids = {}
|
310 |
+
# Loop over datasets
|
311 |
+
for source in self.sources:
|
312 |
+
self.source_class_ids[source] = []
|
313 |
+
# Find classes that belong to this dataset
|
314 |
+
for i, info in enumerate(self.class_info):
|
315 |
+
# Include BG class in all datasets
|
316 |
+
if i == 0 or source == info['source']:
|
317 |
+
self.source_class_ids[source].append(i)
|
318 |
+
|
319 |
+
def map_source_class_id(self, source_class_id):
|
320 |
+
"""Takes a source class ID and returns the int class ID assigned to it.
|
321 |
+
|
322 |
+
For example:
|
323 |
+
dataset.map_source_class_id("coco.12") -> 23
|
324 |
+
"""
|
325 |
+
return self.class_from_source_map[source_class_id]
|
326 |
+
|
327 |
+
def get_source_class_id(self, class_id, source):
|
328 |
+
"""Map an internal class ID to the corresponding class ID in the source dataset."""
|
329 |
+
info = self.class_info[class_id]
|
330 |
+
assert info['source'] == source
|
331 |
+
return info['id']
|
332 |
+
|
333 |
+
def append_data(self, class_info, image_info):
|
334 |
+
self.external_to_class_id = {}
|
335 |
+
for i, c in enumerate(self.class_info):
|
336 |
+
for ds, id in c["map"]:
|
337 |
+
self.external_to_class_id[ds + str(id)] = i
|
338 |
+
|
339 |
+
# Map external image IDs to internal ones.
|
340 |
+
self.external_to_image_id = {}
|
341 |
+
for i, info in enumerate(self.image_info):
|
342 |
+
self.external_to_image_id[info["ds"] + str(info["id"])] = i
|
343 |
+
|
344 |
+
@property
|
345 |
+
def image_ids(self):
|
346 |
+
return self._image_ids
|
347 |
+
|
348 |
+
def source_image_link(self, image_id):
|
349 |
+
"""Returns the path or URL to the image.
|
350 |
+
Override this to return a URL to the image if it's availble online for easy
|
351 |
+
debugging.
|
352 |
+
"""
|
353 |
+
return self.image_info[image_id]["path"]
|
354 |
+
|
355 |
+
def load_image(self, image_id):
|
356 |
+
"""Load the specified image and return a [H,W,3] Numpy array.
|
357 |
+
"""
|
358 |
+
# Load image
|
359 |
+
image = skimage.io.imread(self.image_info[image_id]['path'])
|
360 |
+
# If grayscale. Convert to RGB for consistency.
|
361 |
+
if image.ndim != 3:
|
362 |
+
image = skimage.color.gray2rgb(image)
|
363 |
+
return image
|
364 |
+
|
365 |
+
def load_mask(self, image_id):
|
366 |
+
"""Load instance masks for the given image.
|
367 |
+
|
368 |
+
Different datasets use different ways to store masks. Override this
|
369 |
+
method to load instance masks and return them in the form of am
|
370 |
+
array of binary masks of shape [height, width, instances].
|
371 |
+
|
372 |
+
Returns:
|
373 |
+
masks: A bool array of shape [height, width, instance count] with
|
374 |
+
a binary mask per instance.
|
375 |
+
class_ids: a 1D array of class IDs of the instance masks.
|
376 |
+
"""
|
377 |
+
# Override this function to load a mask from your dataset.
|
378 |
+
# Otherwise, it returns an empty mask.
|
379 |
+
mask = np.empty([0, 0, 0])
|
380 |
+
class_ids = np.empty([0], np.int32)
|
381 |
+
return mask, class_ids
|
382 |
+
|
383 |
+
|
384 |
+
def resize_image(image, min_dim=None, max_dim=None, padding=False):
|
385 |
+
"""
|
386 |
+
Resizes an image keeping the aspect ratio.
|
387 |
+
|
388 |
+
min_dim: if provided, resizes the image such that it's smaller
|
389 |
+
dimension == min_dim
|
390 |
+
max_dim: if provided, ensures that the image longest side doesn't
|
391 |
+
exceed this value.
|
392 |
+
padding: If true, pads image with zeros so it's size is max_dim x max_dim
|
393 |
+
|
394 |
+
Returns:
|
395 |
+
image: the resized image
|
396 |
+
window: (y1, x1, y2, x2). If max_dim is provided, padding might
|
397 |
+
be inserted in the returned image. If so, this window is the
|
398 |
+
coordinates of the image part of the full image (excluding
|
399 |
+
the padding). The x2, y2 pixels are not included.
|
400 |
+
scale: The scale factor used to resize the image
|
401 |
+
padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
|
402 |
+
"""
|
403 |
+
# Default window (y1, x1, y2, x2) and default scale == 1.
|
404 |
+
h, w = image.shape[:2]
|
405 |
+
window = (0, 0, h, w)
|
406 |
+
scale = 1
|
407 |
+
|
408 |
+
# Scale?
|
409 |
+
if min_dim:
|
410 |
+
# Scale up but not down
|
411 |
+
scale = max(1, min_dim / min(h, w))
|
412 |
+
# Does it exceed max dim?
|
413 |
+
if max_dim:
|
414 |
+
image_max = max(h, w)
|
415 |
+
if round(image_max * scale) > max_dim:
|
416 |
+
scale = max_dim / image_max
|
417 |
+
# Resize image and mask
|
418 |
+
if scale != 1:
|
419 |
+
image = scipy.misc.imresize(
|
420 |
+
image, (round(h * scale), round(w * scale)))
|
421 |
+
# Need padding?
|
422 |
+
if padding:
|
423 |
+
# Get new height and width
|
424 |
+
h, w = image.shape[:2]
|
425 |
+
top_pad = (max_dim - h) // 2
|
426 |
+
bottom_pad = max_dim - h - top_pad
|
427 |
+
left_pad = (max_dim - w) // 2
|
428 |
+
right_pad = max_dim - w - left_pad
|
429 |
+
padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
|
430 |
+
image = np.pad(image, padding, mode='constant', constant_values=0)
|
431 |
+
window = (top_pad, left_pad, h + top_pad, w + left_pad)
|
432 |
+
return image, window, scale, padding
|
433 |
+
|
434 |
+
|
435 |
+
def resize_mask(mask, scale, padding):
|
436 |
+
"""Resizes a mask using the given scale and padding.
|
437 |
+
Typically, you get the scale and padding from resize_image() to
|
438 |
+
ensure both, the image and the mask, are resized consistently.
|
439 |
+
|
440 |
+
scale: mask scaling factor
|
441 |
+
padding: Padding to add to the mask in the form
|
442 |
+
[(top, bottom), (left, right), (0, 0)]
|
443 |
+
"""
|
444 |
+
h, w = mask.shape[:2]
|
445 |
+
mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
|
446 |
+
mask = np.pad(mask, padding, mode='constant', constant_values=0)
|
447 |
+
return mask
|
448 |
+
|
449 |
+
|
450 |
+
def minimize_mask(bbox, mask, mini_shape):
|
451 |
+
"""Resize masks to a smaller version to cut memory load.
|
452 |
+
Mini-masks can then resized back to image scale using expand_masks()
|
453 |
+
|
454 |
+
See inspect_data.ipynb notebook for more details.
|
455 |
+
"""
|
456 |
+
mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
|
457 |
+
for i in range(mask.shape[-1]):
|
458 |
+
m = mask[:, :, i]
|
459 |
+
y1, x1, y2, x2 = bbox[i][:4]
|
460 |
+
m = m[y1:y2, x1:x2]
|
461 |
+
if m.size == 0:
|
462 |
+
raise Exception("Invalid bounding box with area of zero")
|
463 |
+
m = scipy.misc.imresize(m.astype(float), mini_shape, interp='bilinear')
|
464 |
+
mini_mask[:, :, i] = np.where(m >= 128, 1, 0)
|
465 |
+
return mini_mask
|
466 |
+
|
467 |
+
|
468 |
+
def expand_mask(bbox, mini_mask, image_shape):
|
469 |
+
"""Resizes mini masks back to image size. Reverses the change
|
470 |
+
of minimize_mask().
|
471 |
+
|
472 |
+
See inspect_data.ipynb notebook for more details.
|
473 |
+
"""
|
474 |
+
mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
|
475 |
+
for i in range(mask.shape[-1]):
|
476 |
+
m = mini_mask[:, :, i]
|
477 |
+
y1, x1, y2, x2 = bbox[i][:4]
|
478 |
+
h = y2 - y1
|
479 |
+
w = x2 - x1
|
480 |
+
m = scipy.misc.imresize(m.astype(float), (h, w), interp='bilinear')
|
481 |
+
mask[y1:y2, x1:x2, i] = np.where(m >= 128, 1, 0)
|
482 |
+
return mask
|
483 |
+
|
484 |
+
|
485 |
+
# TODO: Build and use this function to reduce code duplication
|
486 |
+
def mold_mask(mask, config):
|
487 |
+
pass
|
488 |
+
|
489 |
+
|
490 |
+
def unmold_mask(mask, bbox, image_shape):
|
491 |
+
"""Converts a mask generated by the neural network into a format similar
|
492 |
+
to it's original shape.
|
493 |
+
mask: [height, width] of type float. A small, typically 28x28 mask.
|
494 |
+
bbox: [y1, x1, y2, x2]. The box to fit the mask in.
|
495 |
+
|
496 |
+
Returns a binary mask with the same size as the original image.
|
497 |
+
"""
|
498 |
+
threshold = 0.5
|
499 |
+
y1, x1, y2, x2 = bbox
|
500 |
+
mask = scipy.misc.imresize(
|
501 |
+
mask, (y2 - y1, x2 - x1), interp='bilinear').astype(np.float32) / 255.0
|
502 |
+
mask = np.where(mask >= threshold, 1, 0).astype(np.uint8)
|
503 |
+
|
504 |
+
# Put the mask in the right location.
|
505 |
+
full_mask = np.zeros(image_shape[:2], dtype=np.uint8)
|
506 |
+
full_mask[y1:y2, x1:x2] = mask
|
507 |
+
return full_mask
|
508 |
+
|
509 |
+
|
510 |
+
############################################################
|
511 |
+
# Anchors
|
512 |
+
############################################################
|
513 |
+
|
514 |
+
def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
|
515 |
+
"""
|
516 |
+
scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
|
517 |
+
ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
|
518 |
+
shape: [height, width] spatial shape of the feature map over which
|
519 |
+
to generate anchors.
|
520 |
+
feature_stride: Stride of the feature map relative to the image in pixels.
|
521 |
+
anchor_stride: Stride of anchors on the feature map. For example, if the
|
522 |
+
value is 2 then generate anchors for every other feature map pixel.
|
523 |
+
"""
|
524 |
+
# Get all combinations of scales and ratios
|
525 |
+
scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
|
526 |
+
scales = scales.flatten()
|
527 |
+
ratios = ratios.flatten()
|
528 |
+
|
529 |
+
# Enumerate heights and widths from scales and ratios
|
530 |
+
heights = scales / np.sqrt(ratios)
|
531 |
+
widths = scales * np.sqrt(ratios)
|
532 |
+
|
533 |
+
# Enumerate shifts in feature space
|
534 |
+
shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
|
535 |
+
shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
|
536 |
+
shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
|
537 |
+
|
538 |
+
# Enumerate combinations of shifts, widths, and heights
|
539 |
+
box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
|
540 |
+
box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
|
541 |
+
|
542 |
+
# Reshape to get a list of (y, x) and a list of (h, w)
|
543 |
+
box_centers = np.stack(
|
544 |
+
[box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
|
545 |
+
box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
|
546 |
+
|
547 |
+
# Convert to corner coordinates (y1, x1, y2, x2)
|
548 |
+
boxes = np.concatenate([box_centers - 0.5 * box_sizes,
|
549 |
+
box_centers + 0.5 * box_sizes], axis=1)
|
550 |
+
return boxes
|
551 |
+
|
552 |
+
|
553 |
+
def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
|
554 |
+
anchor_stride):
|
555 |
+
"""Generate anchors at different levels of a feature pyramid. Each scale
|
556 |
+
is associated with a level of the pyramid, but each ratio is used in
|
557 |
+
all levels of the pyramid.
|
558 |
+
|
559 |
+
Returns:
|
560 |
+
anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
|
561 |
+
with the same order of the given scales. So, anchors of scale[0] come
|
562 |
+
first, then anchors of scale[1], and so on.
|
563 |
+
"""
|
564 |
+
# Anchors
|
565 |
+
# [anchor_count, (y1, x1, y2, x2)]
|
566 |
+
anchors = []
|
567 |
+
for i in range(len(scales)):
|
568 |
+
anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
|
569 |
+
feature_strides[i], anchor_stride))
|
570 |
+
return np.concatenate(anchors, axis=0)
|
571 |
+
|
572 |
+
|
573 |
+
############################################################
|
574 |
+
# Miscellaneous
|
575 |
+
############################################################
|
576 |
+
|
577 |
+
def trim_zeros(x):
|
578 |
+
"""It's common to have tensors larger than the available data and
|
579 |
+
pad with zeros. This function removes rows that are all zeros.
|
580 |
+
|
581 |
+
x: [rows, columns].
|
582 |
+
"""
|
583 |
+
assert len(x.shape) == 2
|
584 |
+
return x[~np.all(x == 0, axis=1)]
|
585 |
+
|
586 |
+
|
587 |
+
def compute_ap(gt_boxes, gt_class_ids, gt_masks,
|
588 |
+
pred_boxes, pred_class_ids, pred_scores, pred_masks,
|
589 |
+
iou_threshold=0.5):
|
590 |
+
"""Compute Average Precision at a set IoU threshold (default 0.5).
|
591 |
+
|
592 |
+
Returns:
|
593 |
+
mAP: Mean Average Precision
|
594 |
+
precisions: List of precisions at different class score thresholds.
|
595 |
+
recalls: List of recall values at different class score thresholds.
|
596 |
+
overlaps: [pred_boxes, gt_boxes] IoU overlaps.
|
597 |
+
"""
|
598 |
+
# Trim zero padding and sort predictions by score from high to low
|
599 |
+
# TODO: cleaner to do zero unpadding upstream
|
600 |
+
gt_boxes = trim_zeros(gt_boxes)
|
601 |
+
gt_masks = gt_masks[..., :gt_boxes.shape[0]]
|
602 |
+
pred_boxes = trim_zeros(pred_boxes)
|
603 |
+
pred_scores = pred_scores[:pred_boxes.shape[0]]
|
604 |
+
indices = np.argsort(pred_scores)[::-1]
|
605 |
+
pred_boxes = pred_boxes[indices]
|
606 |
+
pred_class_ids = pred_class_ids[indices]
|
607 |
+
pred_scores = pred_scores[indices]
|
608 |
+
pred_masks = pred_masks[..., indices]
|
609 |
+
|
610 |
+
# Compute IoU overlaps [pred_masks, gt_masks]
|
611 |
+
overlaps = compute_overlaps_masks(pred_masks, gt_masks)
|
612 |
+
|
613 |
+
# Loop through ground truth boxes and find matching predictions
|
614 |
+
match_count = 0
|
615 |
+
pred_match = np.zeros([pred_boxes.shape[0]])
|
616 |
+
gt_match = np.zeros([gt_boxes.shape[0]])
|
617 |
+
for i in range(len(pred_boxes)):
|
618 |
+
# Find best matching ground truth box
|
619 |
+
sorted_ixs = np.argsort(overlaps[i])[::-1]
|
620 |
+
for j in sorted_ixs:
|
621 |
+
# If ground truth box is already matched, go to next one
|
622 |
+
if gt_match[j] == 1:
|
623 |
+
continue
|
624 |
+
# If we reach IoU smaller than the threshold, end the loop
|
625 |
+
iou = overlaps[i, j]
|
626 |
+
if iou < iou_threshold:
|
627 |
+
break
|
628 |
+
# Do we have a match?
|
629 |
+
if pred_class_ids[i] == gt_class_ids[j]:
|
630 |
+
match_count += 1
|
631 |
+
gt_match[j] = 1
|
632 |
+
pred_match[i] = 1
|
633 |
+
break
|
634 |
+
|
635 |
+
# Compute precision and recall at each prediction box step
|
636 |
+
precisions = np.cumsum(pred_match) / (np.arange(len(pred_match)) + 1)
|
637 |
+
recalls = np.cumsum(pred_match).astype(np.float32) / len(gt_match)
|
638 |
+
|
639 |
+
# Pad with start and end values to simplify the math
|
640 |
+
precisions = np.concatenate([[0], precisions, [0]])
|
641 |
+
recalls = np.concatenate([[0], recalls, [1]])
|
642 |
+
|
643 |
+
# Ensure precision values decrease but don't increase. This way, the
|
644 |
+
# precision value at each recall threshold is the maximum it can be
|
645 |
+
# for all following recall thresholds, as specified by the VOC paper.
|
646 |
+
for i in range(len(precisions) - 2, -1, -1):
|
647 |
+
precisions[i] = np.maximum(precisions[i], precisions[i + 1])
|
648 |
+
|
649 |
+
# Compute mean AP over recall range
|
650 |
+
indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
|
651 |
+
mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
|
652 |
+
precisions[indices])
|
653 |
+
|
654 |
+
return mAP, precisions, recalls, overlaps
|
655 |
+
|
656 |
+
|
657 |
+
def compute_recall(pred_boxes, gt_boxes, iou):
|
658 |
+
"""Compute the recall at the given IoU threshold. It's an indication
|
659 |
+
of how many GT boxes were found by the given prediction boxes.
|
660 |
+
|
661 |
+
pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
|
662 |
+
gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
|
663 |
+
"""
|
664 |
+
# Measure overlaps
|
665 |
+
overlaps = compute_overlaps(pred_boxes, gt_boxes)
|
666 |
+
iou_max = np.max(overlaps, axis=1)
|
667 |
+
iou_argmax = np.argmax(overlaps, axis=1)
|
668 |
+
positive_ids = np.where(iou_max >= iou)[0]
|
669 |
+
matched_gt_boxes = iou_argmax[positive_ids]
|
670 |
+
|
671 |
+
recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
|
672 |
+
return recall, positive_ids
|
673 |
+
|
674 |
+
|
675 |
+
# ## Batch Slicing
|
676 |
+
# Some custom layers support a batch size of 1 only, and require a lot of work
|
677 |
+
# to support batches greater than 1. This function slices an input tensor
|
678 |
+
# across the batch dimension and feeds batches of size 1. Effectively,
|
679 |
+
# an easy way to support batches > 1 quickly with little code modification.
|
680 |
+
# In the long run, it's more efficient to modify the code to support large
|
681 |
+
# batches and getting rid of this function. Consider this a temporary solution
|
682 |
+
def batch_slice(inputs, graph_fn, batch_size, names=None):
|
683 |
+
"""Splits inputs into slices and feeds each slice to a copy of the given
|
684 |
+
computation graph and then combines the results. It allows you to run a
|
685 |
+
graph on a batch of inputs even if the graph is written to support one
|
686 |
+
instance only.
|
687 |
+
|
688 |
+
inputs: list of tensors. All must have the same first dimension length
|
689 |
+
graph_fn: A function that returns a TF tensor that's part of a graph.
|
690 |
+
batch_size: number of slices to divide the data into.
|
691 |
+
names: If provided, assigns names to the resulting tensors.
|
692 |
+
"""
|
693 |
+
if not isinstance(inputs, list):
|
694 |
+
inputs = [inputs]
|
695 |
+
|
696 |
+
outputs = []
|
697 |
+
for i in range(batch_size):
|
698 |
+
inputs_slice = [x[i] for x in inputs]
|
699 |
+
output_slice = graph_fn(*inputs_slice)
|
700 |
+
if not isinstance(output_slice, (tuple, list)):
|
701 |
+
output_slice = [output_slice]
|
702 |
+
outputs.append(output_slice)
|
703 |
+
# Change outputs from a list of slices where each is
|
704 |
+
# a list of outputs to a list of outputs and each has
|
705 |
+
# a list of slices
|
706 |
+
outputs = list(zip(*outputs))
|
707 |
+
|
708 |
+
if names is None:
|
709 |
+
names = [None] * len(outputs)
|
710 |
+
|
711 |
+
result = [tf.stack(o, axis=0, name=n)
|
712 |
+
for o, n in zip(outputs, names)]
|
713 |
+
if len(result) == 1:
|
714 |
+
result = result[0]
|
715 |
+
|
716 |
+
return result
|
717 |
+
|
718 |
+
|
719 |
+
def download_trained_weights(coco_model_path, verbose=1):
|
720 |
+
"""Download COCO trained weights from Releases.
|
721 |
+
|
722 |
+
coco_model_path: local path of COCO trained weights
|
723 |
+
"""
|
724 |
+
if verbose > 0:
|
725 |
+
print("Downloading pretrained model to " + coco_model_path + " ...")
|
726 |
+
with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
|
727 |
+
shutil.copyfileobj(resp, out)
|
728 |
+
if verbose > 0:
|
729 |
+
print("... done downloading pretrained model!")
|
730 |
+
|
731 |
+
|
732 |
+
def resize_image_with_scale(h1, w1, h2_max, w2_max):
|
733 |
+
"""resize image with scale and which fits in rectangle h2_max x w2_max"""
|
734 |
+
if h1 == w1: return h2_max, h2_max # square image
|
735 |
+
elif h1 < w1: return int(h1/(w1/w2_max)), int(w2_max) # horizontal image
|
736 |
+
else: return int(h2_max), int(w1/(h1/h2_max)) # vertical image
|