Kroy commited on
Commit
e80fad1
·
1 Parent(s): 162c0bc

Upload 10 files

Browse files
Files changed (10) hide show
  1. Dockerfile +30 -0
  2. app.py +87 -0
  3. classes.py +15 -0
  4. coco.py +519 -0
  5. config.py +172 -0
  6. model.py +0 -0
  7. parallel_model.py +173 -0
  8. requirements.txt +20 -0
  9. shapes.py +184 -0
  10. utils.py +736 -0
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Use the official Python 3.9 image
2
+ FROM python:3.7
3
+
4
+ # Set the working directory to /code
5
+ WORKDIR /code
6
+
7
+ # Copy the current directory contents into the container at /code
8
+ COPY ./requirements.txt /code/requirements.txt
9
+
10
+ # Install requirements.txt
11
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
12
+
13
+ # Set up a new user named "user" with user ID 1000
14
+ RUN useradd -m -u 1000 user
15
+ # Switch to the "user" user
16
+ USER user
17
+ # Set home to the user's home directory
18
+ ENV HOME=/home/user \
19
+ PATH=/home/user/.local/bin:$PATH
20
+
21
+ # Set the working directory to the user's home directory
22
+ WORKDIR $HOME/app
23
+
24
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
25
+ COPY --chown=user . $HOME/app
26
+
27
+ # Start the FastAPI app on port 7860, the default port expected by Spaces
28
+ # CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
29
+
30
+ CMD gunicorn -k uvicorn.workers.UvicornWorker --workers 2 --threads=2 --max-requests 512 --bind 0.0.0.0:7860 app:app
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+ ## import necessary packages
4
+
5
+ import os
6
+ import io
7
+ import sys
8
+ import base64
9
+ import random
10
+ import argparse
11
+ import math
12
+ import numpy as np
13
+
14
+ from typing import Any, Union,Dict, List
15
+ import numpy as np
16
+ import requests
17
+ from PIL import Image
18
+ from imageio import imread
19
+ from keras import backend as K
20
+
21
+ import coco
22
+ import utils
23
+ import model as modellib
24
+ import visualize
25
+ from classes import class_names
26
+ from fastapi import FastAPI
27
+
28
+ # Create a new FastAPI app instance
29
+ app = FastAPI()
30
+
31
+ # Root directory of the project
32
+ ROOT_DIR = os.getcwd()
33
+
34
+ # Directory to save logs and trained model
35
+ MODEL_DIR = os.path.join(ROOT_DIR, "logs")
36
+
37
+ # Local path to trained weights file
38
+ COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
39
+ os.system("pip install pycocotools==2.0.0")
40
+ K.clear_session()
41
+
42
+ if not os.path.exists(COCO_MODEL_PATH):
43
+ utils.download_trained_weights(COCO_MODEL_PATH)
44
+
45
+ class InferenceConfig(coco.CocoConfig):
46
+ GPU_COUNT = 1
47
+ IMAGES_PER_GPU = 1
48
+ config = InferenceConfig()
49
+
50
+ model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)
51
+ model.load_weights(COCO_MODEL_PATH, by_name=True)
52
+
53
+
54
+ # Define a function to handle the GET request at `/generate`
55
+ # The generate() function is defined as a FastAPI route that takes a
56
+ # string parameter called text. The function generates text based on the # input using the pipeline() object, and returns a JSON response
57
+ # containing the generated text under the key "output"
58
+ @app.get("/generate")
59
+ def generate(path: str):
60
+ """
61
+ Using the text summarization pipeline from `transformers`, summerize text
62
+ from the given input text. The model used is `philschmid/bart-large-cnn-samsum`, which
63
+ can be found [here](<https://huggingface.co/philschmid/bart-large-cnn-samsum>).
64
+ """
65
+ # Use the pipeline to generate text from the given input text
66
+
67
+ r = requests.get(path, stream=True)
68
+ img = Image.open(io.BytesIO(r.content)).convert('RGB')
69
+ open_cv_image = np.array(img)
70
+ image = open_cv_image
71
+
72
+ results = model.detect([image], verbose=1)
73
+
74
+ # Get results and save them
75
+ r = results[0]
76
+ output_image = visualize.display_instances_and_save(image,
77
+ r['rois'], r['masks'], r['class_ids'], class_names, r['scores'])
78
+
79
+
80
+ image = Image.fromarray(output_image)
81
+ im_file = io.BytesIO()
82
+ image.save(im_file, format="JPEG")
83
+ im_bytes = im_file.getvalue() # im_bytes: image in binary for
84
+ # Return the generated text in a JSON response
85
+ return {"output": im_bytes}
86
+
87
+
classes.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
2
+ 'bus', 'train', 'truck', 'boat', 'traffic light',
3
+ 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
4
+ 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
5
+ 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
6
+ 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
7
+ 'kite', 'baseball bat', 'baseball glove', 'skateboard',
8
+ 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
9
+ 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
10
+ 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
11
+ 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
12
+ 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
13
+ 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
14
+ 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
15
+ 'teddy bear', 'hair drier', 'toothbrush']
coco.py ADDED
@@ -0,0 +1,519 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mask R-CNN
3
+ Configurations and data loading code for MS COCO.
4
+
5
+ Copyright (c) 2017 Matterport, Inc.
6
+ Licensed under the MIT License (see LICENSE for details)
7
+ Written by Waleed Abdulla
8
+
9
+ ------------------------------------------------------------
10
+
11
+ Usage: import the module (see Jupyter notebooks for examples), or run from
12
+ the command line as such:
13
+
14
+ # Train a new model starting from pre-trained COCO weights
15
+ python3 coco.py train --dataset=/path/to/coco/ --model=coco
16
+
17
+ # Train a new model starting from ImageNet weights
18
+ python3 coco.py train --dataset=/path/to/coco/ --model=imagenet
19
+
20
+ # Continue training a model that you had trained earlier
21
+ python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5
22
+
23
+ # Continue training the last model you trained
24
+ python3 coco.py train --dataset=/path/to/coco/ --model=last
25
+
26
+ # Run COCO evaluatoin on the last model you trained
27
+ python3 coco.py evaluate --dataset=/path/to/coco/ --model=last
28
+ """
29
+
30
+ import os
31
+ import time
32
+ import numpy as np
33
+
34
+ # Download and install the Python COCO tools from https://github.com/waleedka/coco
35
+ # That's a fork from the original https://github.com/pdollar/coco with a bug
36
+ # fix for Python 3.
37
+ # I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50
38
+ # If the PR is merged then use the original repo.
39
+ # Note: Edit PythonAPI/Makefile and replace "python" with "python3".
40
+ from pycocotools.coco import COCO
41
+ from pycocotools.cocoeval import COCOeval
42
+ from pycocotools import mask as maskUtils
43
+
44
+ import zipfile
45
+ import urllib.request
46
+ import shutil
47
+
48
+ from config import Config
49
+ import utils
50
+ import model as modellib
51
+
52
+ # Root directory of the project
53
+ ROOT_DIR = os.getcwd()
54
+
55
+ # Path to trained weights file
56
+ COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
57
+
58
+ # Directory to save logs and model checkpoints, if not provided
59
+ # through the command line argument --logs
60
+ DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs")
61
+ DEFAULT_DATASET_YEAR = "2014"
62
+
63
+ ############################################################
64
+ # Configurations
65
+ ############################################################
66
+
67
+
68
+ class CocoConfig(Config):
69
+ """Configuration for training on MS COCO.
70
+ Derives from the base Config class and overrides values specific
71
+ to the COCO dataset.
72
+ """
73
+ # Give the configuration a recognizable name
74
+ NAME = "coco"
75
+
76
+ # We use a GPU with 12GB memory, which can fit two images.
77
+ # Adjust down if you use a smaller GPU.
78
+ IMAGES_PER_GPU = 2
79
+
80
+ # Uncomment to train on 8 GPUs (default is 1)
81
+ # GPU_COUNT = 8
82
+
83
+ # Number of classes (including background)
84
+ NUM_CLASSES = 1 + 80 # COCO has 80 classes
85
+
86
+
87
+ ############################################################
88
+ # Dataset
89
+ ############################################################
90
+
91
+ class CocoDataset(utils.Dataset):
92
+ def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None,
93
+ class_map=None, return_coco=False, auto_download=False):
94
+ """Load a subset of the COCO dataset.
95
+ dataset_dir: The root directory of the COCO dataset.
96
+ subset: What to load (train, val, minival, valminusminival)
97
+ year: What dataset year to load (2014, 2017) as a string, not an integer
98
+ class_ids: If provided, only loads images that have the given classes.
99
+ class_map: TODO: Not implemented yet. Supports maping classes from
100
+ different datasets to the same class ID.
101
+ return_coco: If True, returns the COCO object.
102
+ auto_download: Automatically download and unzip MS-COCO images and annotations
103
+ """
104
+
105
+ if auto_download is True:
106
+ self.auto_download(dataset_dir, subset, year)
107
+
108
+ coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year))
109
+ if subset == "minival" or subset == "valminusminival":
110
+ subset = "val"
111
+ image_dir = "{}/{}{}".format(dataset_dir, subset, year)
112
+
113
+ # Load all classes or a subset?
114
+ if not class_ids:
115
+ # All classes
116
+ class_ids = sorted(coco.getCatIds())
117
+
118
+ # All images or a subset?
119
+ if class_ids:
120
+ image_ids = []
121
+ for id in class_ids:
122
+ image_ids.extend(list(coco.getImgIds(catIds=[id])))
123
+ # Remove duplicates
124
+ image_ids = list(set(image_ids))
125
+ else:
126
+ # All images
127
+ image_ids = list(coco.imgs.keys())
128
+
129
+ # Add classes
130
+ for i in class_ids:
131
+ self.add_class("coco", i, coco.loadCats(i)[0]["name"])
132
+
133
+ # Add images
134
+ for i in image_ids:
135
+ self.add_image(
136
+ "coco", image_id=i,
137
+ path=os.path.join(image_dir, coco.imgs[i]['file_name']),
138
+ width=coco.imgs[i]["width"],
139
+ height=coco.imgs[i]["height"],
140
+ annotations=coco.loadAnns(coco.getAnnIds(
141
+ imgIds=[i], catIds=class_ids, iscrowd=None)))
142
+ if return_coco:
143
+ return coco
144
+
145
+ def auto_download(self, dataDir, dataType, dataYear):
146
+ """Download the COCO dataset/annotations if requested.
147
+ dataDir: The root directory of the COCO dataset.
148
+ dataType: What to load (train, val, minival, valminusminival)
149
+ dataYear: What dataset year to load (2014, 2017) as a string, not an integer
150
+ Note:
151
+ For 2014, use "train", "val", "minival", or "valminusminival"
152
+ For 2017, only "train" and "val" annotations are available
153
+ """
154
+
155
+ # Setup paths and file names
156
+ if dataType == "minival" or dataType == "valminusminival":
157
+ imgDir = "{}/{}{}".format(dataDir, "val", dataYear)
158
+ imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear)
159
+ imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear)
160
+ else:
161
+ imgDir = "{}/{}{}".format(dataDir, dataType, dataYear)
162
+ imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear)
163
+ imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear)
164
+ # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL)
165
+
166
+ # Create main folder if it doesn't exist yet
167
+ if not os.path.exists(dataDir):
168
+ os.makedirs(dataDir)
169
+
170
+ # Download images if not available locally
171
+ if not os.path.exists(imgDir):
172
+ os.makedirs(imgDir)
173
+ print("Downloading images to " + imgZipFile + " ...")
174
+ with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out:
175
+ shutil.copyfileobj(resp, out)
176
+ print("... done downloading.")
177
+ print("Unzipping " + imgZipFile)
178
+ with zipfile.ZipFile(imgZipFile, "r") as zip_ref:
179
+ zip_ref.extractall(dataDir)
180
+ print("... done unzipping")
181
+ print("Will use images in " + imgDir)
182
+
183
+ # Setup annotations data paths
184
+ annDir = "{}/annotations".format(dataDir)
185
+ if dataType == "minival":
186
+ annZipFile = "{}/instances_minival2014.json.zip".format(dataDir)
187
+ annFile = "{}/instances_minival2014.json".format(annDir)
188
+ annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0"
189
+ unZipDir = annDir
190
+ elif dataType == "valminusminival":
191
+ annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir)
192
+ annFile = "{}/instances_valminusminival2014.json".format(annDir)
193
+ annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0"
194
+ unZipDir = annDir
195
+ else:
196
+ annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear)
197
+ annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear)
198
+ annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear)
199
+ unZipDir = dataDir
200
+ # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL)
201
+
202
+ # Download annotations if not available locally
203
+ if not os.path.exists(annDir):
204
+ os.makedirs(annDir)
205
+ if not os.path.exists(annFile):
206
+ if not os.path.exists(annZipFile):
207
+ print("Downloading zipped annotations to " + annZipFile + " ...")
208
+ with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out:
209
+ shutil.copyfileobj(resp, out)
210
+ print("... done downloading.")
211
+ print("Unzipping " + annZipFile)
212
+ with zipfile.ZipFile(annZipFile, "r") as zip_ref:
213
+ zip_ref.extractall(unZipDir)
214
+ print("... done unzipping")
215
+ print("Will use annotations in " + annFile)
216
+
217
+ def load_mask(self, image_id):
218
+ """Load instance masks for the given image.
219
+
220
+ Different datasets use different ways to store masks. This
221
+ function converts the different mask format to one format
222
+ in the form of a bitmap [height, width, instances].
223
+
224
+ Returns:
225
+ masks: A bool array of shape [height, width, instance count] with
226
+ one mask per instance.
227
+ class_ids: a 1D array of class IDs of the instance masks.
228
+ """
229
+ # If not a COCO image, delegate to parent class.
230
+ image_info = self.image_info[image_id]
231
+ if image_info["source"] != "coco":
232
+ return super(CocoDataset, self).load_mask(image_id)
233
+
234
+ instance_masks = []
235
+ class_ids = []
236
+ annotations = self.image_info[image_id]["annotations"]
237
+ # Build mask of shape [height, width, instance_count] and list
238
+ # of class IDs that correspond to each channel of the mask.
239
+ for annotation in annotations:
240
+ class_id = self.map_source_class_id(
241
+ "coco.{}".format(annotation['category_id']))
242
+ if class_id:
243
+ m = self.annToMask(annotation, image_info["height"],
244
+ image_info["width"])
245
+ # Some objects are so small that they're less than 1 pixel area
246
+ # and end up rounded out. Skip those objects.
247
+ if m.max() < 1:
248
+ continue
249
+ # Is it a crowd? If so, use a negative class ID.
250
+ if annotation['iscrowd']:
251
+ # Use negative class ID for crowds
252
+ class_id *= -1
253
+ # For crowd masks, annToMask() sometimes returns a mask
254
+ # smaller than the given dimensions. If so, resize it.
255
+ if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
256
+ m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
257
+ instance_masks.append(m)
258
+ class_ids.append(class_id)
259
+
260
+ # Pack instance masks into an array
261
+ if class_ids:
262
+ mask = np.stack(instance_masks, axis=2)
263
+ class_ids = np.array(class_ids, dtype=np.int32)
264
+ return mask, class_ids
265
+ else:
266
+ # Call super class to return an empty mask
267
+ return super(CocoDataset, self).load_mask(image_id)
268
+
269
+ def image_reference(self, image_id):
270
+ """Return a link to the image in the COCO Website."""
271
+ info = self.image_info[image_id]
272
+ if info["source"] == "coco":
273
+ return "http://cocodataset.org/#explore?id={}".format(info["id"])
274
+ else:
275
+ super(CocoDataset, self).image_reference(image_id)
276
+
277
+ # The following two functions are from pycocotools with a few changes.
278
+
279
+ def annToRLE(self, ann, height, width):
280
+ """
281
+ Convert annotation which can be polygons, uncompressed RLE to RLE.
282
+ :return: binary mask (numpy 2D array)
283
+ """
284
+ segm = ann['segmentation']
285
+ if isinstance(segm, list):
286
+ # polygon -- a single object might consist of multiple parts
287
+ # we merge all parts into one mask rle code
288
+ rles = maskUtils.frPyObjects(segm, height, width)
289
+ rle = maskUtils.merge(rles)
290
+ elif isinstance(segm['counts'], list):
291
+ # uncompressed RLE
292
+ rle = maskUtils.frPyObjects(segm, height, width)
293
+ else:
294
+ # rle
295
+ rle = ann['segmentation']
296
+ return rle
297
+
298
+ def annToMask(self, ann, height, width):
299
+ """
300
+ Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
301
+ :return: binary mask (numpy 2D array)
302
+ """
303
+ rle = self.annToRLE(ann, height, width)
304
+ m = maskUtils.decode(rle)
305
+ return m
306
+
307
+
308
+ ############################################################
309
+ # COCO Evaluation
310
+ ############################################################
311
+
312
+ def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks):
313
+ """Arrange resutls to match COCO specs in http://cocodataset.org/#format
314
+ """
315
+ # If no results, return an empty list
316
+ if rois is None:
317
+ return []
318
+
319
+ results = []
320
+ for image_id in image_ids:
321
+ # Loop through detections
322
+ for i in range(rois.shape[0]):
323
+ class_id = class_ids[i]
324
+ score = scores[i]
325
+ bbox = np.around(rois[i], 1)
326
+ mask = masks[:, :, i]
327
+
328
+ result = {
329
+ "image_id": image_id,
330
+ "category_id": dataset.get_source_class_id(class_id, "coco"),
331
+ "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]],
332
+ "score": score,
333
+ "segmentation": maskUtils.encode(np.asfortranarray(mask))
334
+ }
335
+ results.append(result)
336
+ return results
337
+
338
+
339
+ def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None):
340
+ """Runs official COCO evaluation.
341
+ dataset: A Dataset object with valiadtion data
342
+ eval_type: "bbox" or "segm" for bounding box or segmentation evaluation
343
+ limit: if not 0, it's the number of images to use for evaluation
344
+ """
345
+ # Pick COCO images from the dataset
346
+ image_ids = image_ids or dataset.image_ids
347
+
348
+ # Limit to a subset
349
+ if limit:
350
+ image_ids = image_ids[:limit]
351
+
352
+ # Get corresponding COCO image IDs.
353
+ coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids]
354
+
355
+ t_prediction = 0
356
+ t_start = time.time()
357
+
358
+ results = []
359
+ for i, image_id in enumerate(image_ids):
360
+ # Load image
361
+ image = dataset.load_image(image_id)
362
+
363
+ # Run detection
364
+ t = time.time()
365
+ r = model.detect([image], verbose=0)[0]
366
+ t_prediction += (time.time() - t)
367
+
368
+ # Convert results to COCO format
369
+ image_results = build_coco_results(dataset, coco_image_ids[i:i + 1],
370
+ r["rois"], r["class_ids"],
371
+ r["scores"], r["masks"])
372
+ results.extend(image_results)
373
+
374
+ # Load results. This modifies results with additional attributes.
375
+ coco_results = coco.loadRes(results)
376
+
377
+ # Evaluate
378
+ cocoEval = COCOeval(coco, coco_results, eval_type)
379
+ cocoEval.params.imgIds = coco_image_ids
380
+ cocoEval.evaluate()
381
+ cocoEval.accumulate()
382
+ cocoEval.summarize()
383
+
384
+ print("Prediction time: {}. Average {}/image".format(
385
+ t_prediction, t_prediction / len(image_ids)))
386
+ print("Total time: ", time.time() - t_start)
387
+
388
+
389
+ ############################################################
390
+ # Training
391
+ ############################################################
392
+
393
+
394
+ if __name__ == '__main__':
395
+ import argparse
396
+
397
+ # Parse command line arguments
398
+ parser = argparse.ArgumentParser(
399
+ description='Train Mask R-CNN on MS COCO.')
400
+ parser.add_argument("command",
401
+ metavar="<command>",
402
+ help="'train' or 'evaluate' on MS COCO")
403
+ parser.add_argument('--dataset', required=True,
404
+ metavar="/path/to/coco/",
405
+ help='Directory of the MS-COCO dataset')
406
+ parser.add_argument('--year', required=False,
407
+ default=DEFAULT_DATASET_YEAR,
408
+ metavar="<year>",
409
+ help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)')
410
+ parser.add_argument('--model', required=True,
411
+ metavar="/path/to/weights.h5",
412
+ help="Path to weights .h5 file or 'coco'")
413
+ parser.add_argument('--logs', required=False,
414
+ default=DEFAULT_LOGS_DIR,
415
+ metavar="/path/to/logs/",
416
+ help='Logs and checkpoints directory (default=logs/)')
417
+ parser.add_argument('--limit', required=False,
418
+ default=500,
419
+ metavar="<image count>",
420
+ help='Images to use for evaluation (default=500)')
421
+ parser.add_argument('--download', required=False,
422
+ default=False,
423
+ metavar="<True|False>",
424
+ help='Automatically download and unzip MS-COCO files (default=False)',
425
+ type=bool)
426
+ args = parser.parse_args()
427
+ print("Command: ", args.command)
428
+ print("Model: ", args.model)
429
+ print("Dataset: ", args.dataset)
430
+ print("Year: ", args.year)
431
+ print("Logs: ", args.logs)
432
+ print("Auto Download: ", args.download)
433
+
434
+ # Configurations
435
+ if args.command == "train":
436
+ config = CocoConfig()
437
+ else:
438
+ class InferenceConfig(CocoConfig):
439
+ # Set batch size to 1 since we'll be running inference on
440
+ # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
441
+ GPU_COUNT = 1
442
+ IMAGES_PER_GPU = 1
443
+ DETECTION_MIN_CONFIDENCE = 0
444
+ config = InferenceConfig()
445
+ config.display()
446
+
447
+ # Create model
448
+ if args.command == "train":
449
+ model = modellib.MaskRCNN(mode="training", config=config,
450
+ model_dir=args.logs)
451
+ else:
452
+ model = modellib.MaskRCNN(mode="inference", config=config,
453
+ model_dir=args.logs)
454
+
455
+ # Select weights file to load
456
+ if args.model.lower() == "coco":
457
+ model_path = COCO_MODEL_PATH
458
+ elif args.model.lower() == "last":
459
+ # Find last trained weights
460
+ model_path = model.find_last()[1]
461
+ elif args.model.lower() == "imagenet":
462
+ # Start from ImageNet trained weights
463
+ model_path = model.get_imagenet_weights()
464
+ else:
465
+ model_path = args.model
466
+
467
+ # Load weights
468
+ print("Loading weights ", model_path)
469
+ model.load_weights(model_path, by_name=True)
470
+
471
+ # Train or evaluate
472
+ if args.command == "train":
473
+ # Training dataset. Use the training set and 35K from the
474
+ # validation set, as as in the Mask RCNN paper.
475
+ dataset_train = CocoDataset()
476
+ dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download)
477
+ dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download)
478
+ dataset_train.prepare()
479
+
480
+ # Validation dataset
481
+ dataset_val = CocoDataset()
482
+ dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download)
483
+ dataset_val.prepare()
484
+
485
+ # *** This training schedule is an example. Update to your needs ***
486
+
487
+ # Training - Stage 1
488
+ print("Training network heads")
489
+ model.train(dataset_train, dataset_val,
490
+ learning_rate=config.LEARNING_RATE,
491
+ epochs=40,
492
+ layers='heads')
493
+
494
+ # Training - Stage 2
495
+ # Finetune layers from ResNet stage 4 and up
496
+ print("Fine tune Resnet stage 4 and up")
497
+ model.train(dataset_train, dataset_val,
498
+ learning_rate=config.LEARNING_RATE,
499
+ epochs=120,
500
+ layers='4+')
501
+
502
+ # Training - Stage 3
503
+ # Fine tune all layers
504
+ print("Fine tune all layers")
505
+ model.train(dataset_train, dataset_val,
506
+ learning_rate=config.LEARNING_RATE / 10,
507
+ epochs=160,
508
+ layers='all')
509
+
510
+ elif args.command == "evaluate":
511
+ # Validation dataset
512
+ dataset_val = CocoDataset()
513
+ coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download)
514
+ dataset_val.prepare()
515
+ print("Running COCO evaluation on {} images.".format(args.limit))
516
+ evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit))
517
+ else:
518
+ print("'{}' is not recognized. "
519
+ "Use 'train' or 'evaluate'".format(args.command))
config.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mask R-CNN
3
+ Base Configurations class.
4
+
5
+ Copyright (c) 2017 Matterport, Inc.
6
+ Licensed under the MIT License (see LICENSE for details)
7
+ Written by Waleed Abdulla
8
+ """
9
+
10
+ import math
11
+ import numpy as np
12
+
13
+
14
+ # Base Configuration Class
15
+ # Don't use this class directly. Instead, sub-class it and override
16
+ # the configurations you need to change.
17
+
18
+ class Config(object):
19
+ """Base configuration class. For custom configurations, create a
20
+ sub-class that inherits from this one and override properties
21
+ that need to be changed.
22
+ """
23
+ # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
24
+ # Useful if your code needs to do things differently depending on which
25
+ # experiment is running.
26
+ NAME = None # Override in sub-classes
27
+
28
+ # NUMBER OF GPUs to use. For CPU training, use 1
29
+ GPU_COUNT = 1
30
+
31
+ # Number of images to train with on each GPU. A 12GB GPU can typically
32
+ # handle 2 images of 1024x1024px.
33
+ # Adjust based on your GPU memory and image sizes. Use the highest
34
+ # number that your GPU can handle for best performance.
35
+ IMAGES_PER_GPU = 2
36
+
37
+ # Number of training steps per epoch
38
+ # This doesn't need to match the size of the training set. Tensorboard
39
+ # updates are saved at the end of each epoch, so setting this to a
40
+ # smaller number means getting more frequent TensorBoard updates.
41
+ # Validation stats are also calculated at each epoch end and they
42
+ # might take a while, so don't set this too small to avoid spending
43
+ # a lot of time on validation stats.
44
+ STEPS_PER_EPOCH = 1000
45
+
46
+ # Number of validation steps to run at the end of every training epoch.
47
+ # A bigger number improves accuracy of validation stats, but slows
48
+ # down the training.
49
+ VALIDATION_STEPS = 50
50
+
51
+ # Backbone network architecture
52
+ # Supported values are: resnet50, resnet101
53
+ BACKBONE = "resnet101"
54
+
55
+ # The strides of each layer of the FPN Pyramid. These values
56
+ # are based on a Resnet101 backbone.
57
+ BACKBONE_STRIDES = [4, 8, 16, 32, 64]
58
+
59
+ # Number of classification classes (including background)
60
+ NUM_CLASSES = 1 # Override in sub-classes
61
+
62
+ # Length of square anchor side in pixels
63
+ RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
64
+
65
+ # Ratios of anchors at each cell (width/height)
66
+ # A value of 1 represents a square anchor, and 0.5 is a wide anchor
67
+ RPN_ANCHOR_RATIOS = [0.5, 1, 2]
68
+
69
+ # Anchor stride
70
+ # If 1 then anchors are created for each cell in the backbone feature map.
71
+ # If 2, then anchors are created for every other cell, and so on.
72
+ RPN_ANCHOR_STRIDE = 1
73
+
74
+ # Non-max suppression threshold to filter RPN proposals.
75
+ # You can reduce this during training to generate more propsals.
76
+ RPN_NMS_THRESHOLD = 0.7
77
+
78
+ # How many anchors per image to use for RPN training
79
+ RPN_TRAIN_ANCHORS_PER_IMAGE = 256
80
+
81
+ # ROIs kept after non-maximum supression (training and inference)
82
+ POST_NMS_ROIS_TRAINING = 2000
83
+ POST_NMS_ROIS_INFERENCE = 1000
84
+
85
+ # If enabled, resizes instance masks to a smaller size to reduce
86
+ # memory load. Recommended when using high-resolution images.
87
+ USE_MINI_MASK = True
88
+ MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask
89
+
90
+ # Input image resing
91
+ # Images are resized such that the smallest side is >= IMAGE_MIN_DIM and
92
+ # the longest side is <= IMAGE_MAX_DIM. In case both conditions can't
93
+ # be satisfied together the IMAGE_MAX_DIM is enforced.
94
+ IMAGE_MIN_DIM = 800
95
+ IMAGE_MAX_DIM = 1024
96
+ # If True, pad images with zeros such that they're (max_dim by max_dim)
97
+ IMAGE_PADDING = True # currently, the False option is not supported
98
+
99
+ # Image mean (RGB)
100
+ MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
101
+
102
+ # Number of ROIs per image to feed to classifier/mask heads
103
+ # The Mask RCNN paper uses 512 but often the RPN doesn't generate
104
+ # enough positive proposals to fill this and keep a positive:negative
105
+ # ratio of 1:3. You can increase the number of proposals by adjusting
106
+ # the RPN NMS threshold.
107
+ TRAIN_ROIS_PER_IMAGE = 200
108
+
109
+ # Percent of positive ROIs used to train classifier/mask heads
110
+ ROI_POSITIVE_RATIO = 0.33
111
+
112
+ # Pooled ROIs
113
+ POOL_SIZE = 7
114
+ MASK_POOL_SIZE = 14
115
+ MASK_SHAPE = [28, 28]
116
+
117
+ # Maximum number of ground truth instances to use in one image
118
+ MAX_GT_INSTANCES = 100
119
+
120
+ # Bounding box refinement standard deviation for RPN and final detections.
121
+ RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
122
+ BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
123
+
124
+ # Max number of final detections
125
+ DETECTION_MAX_INSTANCES = 100
126
+
127
+ # Minimum probability value to accept a detected instance
128
+ # ROIs below this threshold are skipped
129
+ DETECTION_MIN_CONFIDENCE = 0.7
130
+
131
+ # Non-maximum suppression threshold for detection
132
+ DETECTION_NMS_THRESHOLD = 0.3
133
+
134
+ # Learning rate and momentum
135
+ # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
136
+ # weights to explode. Likely due to differences in optimzer
137
+ # implementation.
138
+ LEARNING_RATE = 0.001
139
+ LEARNING_MOMENTUM = 0.9
140
+
141
+ # Weight decay regularization
142
+ WEIGHT_DECAY = 0.0001
143
+
144
+ # Use RPN ROIs or externally generated ROIs for training
145
+ # Keep this True for most situations. Set to False if you want to train
146
+ # the head branches on ROI generated by code rather than the ROIs from
147
+ # the RPN. For example, to debug the classifier head without having to
148
+ # train the RPN.
149
+ USE_RPN_ROIS = True
150
+
151
+ def __init__(self):
152
+ """Set values of computed attributes."""
153
+ # Effective batch size
154
+ self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
155
+
156
+ # Input image size
157
+ self.IMAGE_SHAPE = np.array(
158
+ [self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3])
159
+
160
+ # Compute backbone size from input image size
161
+ self.BACKBONE_SHAPES = np.array(
162
+ [[int(math.ceil(self.IMAGE_SHAPE[0] / stride)),
163
+ int(math.ceil(self.IMAGE_SHAPE[1] / stride))]
164
+ for stride in self.BACKBONE_STRIDES])
165
+
166
+ def display(self):
167
+ """Display Configuration values."""
168
+ print("\nConfigurations:")
169
+ for a in dir(self):
170
+ if not a.startswith("__") and not callable(getattr(self, a)):
171
+ print("{:30} {}".format(a, getattr(self, a)))
172
+ print("\n")
model.py ADDED
The diff for this file is too large to render. See raw diff
 
parallel_model.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mask R-CNN
3
+ Multi-GPU Support for Keras.
4
+
5
+ Copyright (c) 2017 Matterport, Inc.
6
+ Licensed under the MIT License (see LICENSE for details)
7
+ Written by Waleed Abdulla
8
+
9
+ Ideas and a small code snippets from these sources:
10
+ https://github.com/fchollet/keras/issues/2436
11
+ https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
12
+ https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
13
+ https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
14
+ """
15
+
16
+ import tensorflow as tf
17
+ import keras.backend as K
18
+ import keras.layers as KL
19
+ import keras.models as KM
20
+
21
+
22
+ class ParallelModel(KM.Model):
23
+ """Subclasses the standard Keras Model and adds multi-GPU support.
24
+ It works by creating a copy of the model on each GPU. Then it slices
25
+ the inputs and sends a slice to each copy of the model, and then
26
+ merges the outputs together and applies the loss on the combined
27
+ outputs.
28
+ """
29
+
30
+ def __init__(self, keras_model, gpu_count):
31
+ """Class constructor.
32
+ keras_model: The Keras model to parallelize
33
+ gpu_count: Number of GPUs. Must be > 1
34
+ """
35
+ self.inner_model = keras_model
36
+ self.gpu_count = gpu_count
37
+ merged_outputs = self.make_parallel()
38
+ super(ParallelModel, self).__init__(inputs=self.inner_model.inputs,
39
+ outputs=merged_outputs)
40
+
41
+ def __getattribute__(self, attrname):
42
+ """Redirect loading and saving methods to the inner model. That's where
43
+ the weights are stored."""
44
+ if 'load' in attrname or 'save' in attrname:
45
+ return getattr(self.inner_model, attrname)
46
+ return super(ParallelModel, self).__getattribute__(attrname)
47
+
48
+ def summary(self, *args, **kwargs):
49
+ """Override summary() to display summaries of both, the wrapper
50
+ and inner models."""
51
+ super(ParallelModel, self).summary(*args, **kwargs)
52
+ self.inner_model.summary(*args, **kwargs)
53
+
54
+ def make_parallel(self):
55
+ """Creates a new wrapper model that consists of multiple replicas of
56
+ the original model placed on different GPUs.
57
+ """
58
+ # Slice inputs. Slice inputs on the CPU to avoid sending a copy
59
+ # of the full inputs to all GPUs. Saves on bandwidth and memory.
60
+ input_slices = {name: tf.split(x, self.gpu_count)
61
+ for name, x in zip(self.inner_model.input_names,
62
+ self.inner_model.inputs)}
63
+
64
+ output_names = self.inner_model.output_names
65
+ outputs_all = []
66
+ for i in range(len(self.inner_model.outputs)):
67
+ outputs_all.append([])
68
+
69
+ # Run the model call() on each GPU to place the ops there
70
+ for i in range(self.gpu_count):
71
+ with tf.device('/gpu:%d' % i):
72
+ with tf.name_scope('tower_%d' % i):
73
+ # Run a slice of inputs through this replica
74
+ zipped_inputs = zip(self.inner_model.input_names,
75
+ self.inner_model.inputs)
76
+ inputs = [
77
+ KL.Lambda(lambda s: input_slices[name][i],
78
+ output_shape=lambda s: (None,) + s[1:])(tensor)
79
+ for name, tensor in zipped_inputs]
80
+ # Create the model replica and get the outputs
81
+ outputs = self.inner_model(inputs)
82
+ if not isinstance(outputs, list):
83
+ outputs = [outputs]
84
+ # Save the outputs for merging back together later
85
+ for l, o in enumerate(outputs):
86
+ outputs_all[l].append(o)
87
+
88
+ # Merge outputs on CPU
89
+ with tf.device('/cpu:0'):
90
+ merged = []
91
+ for outputs, name in zip(outputs_all, output_names):
92
+ # If outputs are numbers without dimensions, add a batch dim.
93
+ def add_dim(tensor):
94
+ """Add a dimension to tensors that don't have any."""
95
+ if K.int_shape(tensor) == ():
96
+ return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor)
97
+ return tensor
98
+ outputs = list(map(add_dim, outputs))
99
+
100
+ # Concatenate
101
+ merged.append(KL.Concatenate(axis=0, name=name)(outputs))
102
+ return merged
103
+
104
+
105
+ if __name__ == "__main__":
106
+ # Testing code below. It creates a simple model to train on MNIST and
107
+ # tries to run it on 2 GPUs. It saves the graph so it can be viewed
108
+ # in TensorBoard. Run it as:
109
+ #
110
+ # python3 parallel_model.py
111
+
112
+ import os
113
+ import numpy as np
114
+ import keras.optimizers
115
+ from keras.datasets import mnist
116
+ from keras.preprocessing.image import ImageDataGenerator
117
+
118
+ GPU_COUNT = 2
119
+
120
+ # Root directory of the project
121
+ ROOT_DIR = os.getcwd()
122
+
123
+ # Directory to save logs and trained model
124
+ MODEL_DIR = os.path.join(ROOT_DIR, "logs/parallel")
125
+
126
+ def build_model(x_train, num_classes):
127
+ # Reset default graph. Keras leaves old ops in the graph,
128
+ # which are ignored for execution but clutter graph
129
+ # visualization in TensorBoard.
130
+ tf.reset_default_graph()
131
+
132
+ inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
133
+ x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
134
+ name="conv1")(inputs)
135
+ x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
136
+ name="conv2")(x)
137
+ x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
138
+ x = KL.Flatten(name="flat1")(x)
139
+ x = KL.Dense(128, activation='relu', name="dense1")(x)
140
+ x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
141
+
142
+ return KM.Model(inputs, x, "digit_classifier_model")
143
+
144
+ # Load MNIST Data
145
+ (x_train, y_train), (x_test, y_test) = mnist.load_data()
146
+ x_train = np.expand_dims(x_train, -1).astype('float32') / 255
147
+ x_test = np.expand_dims(x_test, -1).astype('float32') / 255
148
+
149
+ print('x_train shape:', x_train.shape)
150
+ print('x_test shape:', x_test.shape)
151
+
152
+ # Build data generator and model
153
+ datagen = ImageDataGenerator()
154
+ model = build_model(x_train, 10)
155
+
156
+ # Add multi-GPU support.
157
+ model = ParallelModel(model, GPU_COUNT)
158
+
159
+ optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
160
+
161
+ model.compile(loss='sparse_categorical_crossentropy',
162
+ optimizer=optimizer, metrics=['accuracy'])
163
+
164
+ model.summary()
165
+
166
+ # Train
167
+ model.fit_generator(
168
+ datagen.flow(x_train, y_train, batch_size=64),
169
+ steps_per_epoch=50, epochs=10, verbose=1,
170
+ validation_data=(x_test, y_test),
171
+ callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
172
+ write_graph=True)]
173
+ )
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy==1.21.6
2
+ scipy==1.2.2
3
+ Pillow==9.5.0
4
+ Cython==0.29.34
5
+ matplotlib==3.5.3
6
+ scikit-image==0.19.3
7
+ tensorflow==1.13.1
8
+ keras==2.0.8
9
+ opencv-python-headless==4.7.0.72
10
+ h5py==2.10.0
11
+ imgaug==0.4.0
12
+ ipython==7.34.0
13
+
14
+ imageio==2.9.0
15
+ requests==2.27.*
16
+ uvloop==0.15.2
17
+ uvicorn==0.13.4
18
+ httptools==0.2.0
19
+ fastapi==0.74.*
20
+ gunicorn==20.1.0
shapes.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mask R-CNN
3
+ Configurations and data loading code for the synthetic Shapes dataset.
4
+ This is a duplicate of the code in the noteobook train_shapes.ipynb for easy
5
+ import into other notebooks, such as inspect_model.ipynb.
6
+
7
+ Copyright (c) 2017 Matterport, Inc.
8
+ Licensed under the MIT License (see LICENSE for details)
9
+ Written by Waleed Abdulla
10
+ """
11
+
12
+ import math
13
+ import random
14
+ import numpy as np
15
+ import cv2
16
+
17
+ from config import Config
18
+ import utils
19
+
20
+
21
+ class ShapesConfig(Config):
22
+ """Configuration for training on the toy shapes dataset.
23
+ Derives from the base Config class and overrides values specific
24
+ to the toy shapes dataset.
25
+ """
26
+ # Give the configuration a recognizable name
27
+ NAME = "shapes"
28
+
29
+ # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
30
+ # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
31
+ GPU_COUNT = 1
32
+ IMAGES_PER_GPU = 8
33
+
34
+ # Number of classes (including background)
35
+ NUM_CLASSES = 1 + 3 # background + 3 shapes
36
+
37
+ # Use small images for faster training. Set the limits of the small side
38
+ # the large side, and that determines the image shape.
39
+ IMAGE_MIN_DIM = 128
40
+ IMAGE_MAX_DIM = 128
41
+
42
+ # Use smaller anchors because our image and objects are small
43
+ RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) # anchor side in pixels
44
+
45
+ # Reduce training ROIs per image because the images are small and have
46
+ # few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
47
+ TRAIN_ROIS_PER_IMAGE = 32
48
+
49
+ # Use a small epoch since the data is simple
50
+ STEPS_PER_EPOCH = 100
51
+
52
+ # use small validation steps since the epoch is small
53
+ VALIDATION_STEPS = 5
54
+
55
+
56
+ class ShapesDataset(utils.Dataset):
57
+ """Generates the shapes synthetic dataset. The dataset consists of simple
58
+ shapes (triangles, squares, circles) placed randomly on a blank surface.
59
+ The images are generated on the fly. No file access required.
60
+ """
61
+
62
+ def load_shapes(self, count, height, width):
63
+ """Generate the requested number of synthetic images.
64
+ count: number of images to generate.
65
+ height, width: the size of the generated images.
66
+ """
67
+ # Add classes
68
+ self.add_class("shapes", 1, "square")
69
+ self.add_class("shapes", 2, "circle")
70
+ self.add_class("shapes", 3, "triangle")
71
+
72
+ # Add images
73
+ # Generate random specifications of images (i.e. color and
74
+ # list of shapes sizes and locations). This is more compact than
75
+ # actual images. Images are generated on the fly in load_image().
76
+ for i in range(count):
77
+ bg_color, shapes = self.random_image(height, width)
78
+ self.add_image("shapes", image_id=i, path=None,
79
+ width=width, height=height,
80
+ bg_color=bg_color, shapes=shapes)
81
+
82
+ def load_image(self, image_id):
83
+ """Generate an image from the specs of the given image ID.
84
+ Typically this function loads the image from a file, but
85
+ in this case it generates the image on the fly from the
86
+ specs in image_info.
87
+ """
88
+ info = self.image_info[image_id]
89
+ bg_color = np.array(info['bg_color']).reshape([1, 1, 3])
90
+ image = np.ones([info['height'], info['width'], 3], dtype=np.uint8)
91
+ image = image * bg_color.astype(np.uint8)
92
+ for shape, color, dims in info['shapes']:
93
+ image = self.draw_shape(image, shape, dims, color)
94
+ return image
95
+
96
+ def image_reference(self, image_id):
97
+ """Return the shapes data of the image."""
98
+ info = self.image_info[image_id]
99
+ if info["source"] == "shapes":
100
+ return info["shapes"]
101
+ else:
102
+ super(self.__class__).image_reference(self, image_id)
103
+
104
+ def load_mask(self, image_id):
105
+ """Generate instance masks for shapes of the given image ID.
106
+ """
107
+ info = self.image_info[image_id]
108
+ shapes = info['shapes']
109
+ count = len(shapes)
110
+ mask = np.zeros([info['height'], info['width'], count], dtype=np.uint8)
111
+ for i, (shape, _, dims) in enumerate(info['shapes']):
112
+ mask[:, :, i:i + 1] = self.draw_shape(mask[:, :, i:i + 1].copy(),
113
+ shape, dims, 1)
114
+ # Handle occlusions
115
+ occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
116
+ for i in range(count - 2, -1, -1):
117
+ mask[:, :, i] = mask[:, :, i] * occlusion
118
+ occlusion = np.logical_and(
119
+ occlusion, np.logical_not(mask[:, :, i]))
120
+ # Map class names to class IDs.
121
+ class_ids = np.array([self.class_names.index(s[0]) for s in shapes])
122
+ return mask, class_ids.astype(np.int32)
123
+
124
+ def draw_shape(self, image, shape, dims, color):
125
+ """Draws a shape from the given specs."""
126
+ # Get the center x, y and the size s
127
+ x, y, s = dims
128
+ if shape == 'square':
129
+ image = cv2.rectangle(image, (x - s, y - s),
130
+ (x + s, y + s), color, -1)
131
+ elif shape == "circle":
132
+ image = cv2.circle(image, (x, y), s, color, -1)
133
+ elif shape == "triangle":
134
+ points = np.array([[(x, y - s),
135
+ (x - s / math.sin(math.radians(60)), y + s),
136
+ (x + s / math.sin(math.radians(60)), y + s),
137
+ ]], dtype=np.int32)
138
+ image = cv2.fillPoly(image, points, color)
139
+ return image
140
+
141
+ def random_shape(self, height, width):
142
+ """Generates specifications of a random shape that lies within
143
+ the given height and width boundaries.
144
+ Returns a tuple of three valus:
145
+ * The shape name (square, circle, ...)
146
+ * Shape color: a tuple of 3 values, RGB.
147
+ * Shape dimensions: A tuple of values that define the shape size
148
+ and location. Differs per shape type.
149
+ """
150
+ # Shape
151
+ shape = random.choice(["square", "circle", "triangle"])
152
+ # Color
153
+ color = tuple([random.randint(0, 255) for _ in range(3)])
154
+ # Center x, y
155
+ buffer = 20
156
+ y = random.randint(buffer, height - buffer - 1)
157
+ x = random.randint(buffer, width - buffer - 1)
158
+ # Size
159
+ s = random.randint(buffer, height // 4)
160
+ return shape, color, (x, y, s)
161
+
162
+ def random_image(self, height, width):
163
+ """Creates random specifications of an image with multiple shapes.
164
+ Returns the background color of the image and a list of shape
165
+ specifications that can be used to draw the image.
166
+ """
167
+ # Pick random background color
168
+ bg_color = np.array([random.randint(0, 255) for _ in range(3)])
169
+ # Generate a few random shapes and record their
170
+ # bounding boxes
171
+ shapes = []
172
+ boxes = []
173
+ N = random.randint(1, 4)
174
+ for _ in range(N):
175
+ shape, color, dims = self.random_shape(height, width)
176
+ shapes.append((shape, color, dims))
177
+ x, y, s = dims
178
+ boxes.append([y - s, x - s, y + s, x + s])
179
+ # Apply non-max suppression wit 0.3 threshold to avoid
180
+ # shapes covering each other
181
+ keep_ixs = utils.non_max_suppression(
182
+ np.array(boxes), np.arange(N), 0.3)
183
+ shapes = [s for i, s in enumerate(shapes) if i in keep_ixs]
184
+ return bg_color, shapes
utils.py ADDED
@@ -0,0 +1,736 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Mask R-CNN
3
+ Common utility functions and classes.
4
+
5
+ Copyright (c) 2017 Matterport, Inc.
6
+ Licensed under the MIT License (see LICENSE for details)
7
+ Written by Waleed Abdulla
8
+ """
9
+
10
+ import sys
11
+ import os
12
+ import math
13
+ import random
14
+ import numpy as np
15
+ import tensorflow as tf
16
+ import scipy.misc
17
+ import skimage.color
18
+ import skimage.io
19
+ import urllib.request
20
+ import shutil
21
+
22
+ # URL from which to download the latest COCO trained weights
23
+ COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
24
+
25
+
26
+ ############################################################
27
+ # Bounding Boxes
28
+ ############################################################
29
+
30
+ def extract_bboxes(mask):
31
+ """Compute bounding boxes from masks.
32
+ mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
33
+
34
+ Returns: bbox array [num_instances, (y1, x1, y2, x2)].
35
+ """
36
+ boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
37
+ for i in range(mask.shape[-1]):
38
+ m = mask[:, :, i]
39
+ # Bounding box.
40
+ horizontal_indicies = np.where(np.any(m, axis=0))[0]
41
+ vertical_indicies = np.where(np.any(m, axis=1))[0]
42
+ if horizontal_indicies.shape[0]:
43
+ x1, x2 = horizontal_indicies[[0, -1]]
44
+ y1, y2 = vertical_indicies[[0, -1]]
45
+ # x2 and y2 should not be part of the box. Increment by 1.
46
+ x2 += 1
47
+ y2 += 1
48
+ else:
49
+ # No mask for this instance. Might happen due to
50
+ # resizing or cropping. Set bbox to zeros
51
+ x1, x2, y1, y2 = 0, 0, 0, 0
52
+ boxes[i] = np.array([y1, x1, y2, x2])
53
+ return boxes.astype(np.int32)
54
+
55
+
56
+ def compute_iou(box, boxes, box_area, boxes_area):
57
+ """Calculates IoU of the given box with the array of the given boxes.
58
+ box: 1D vector [y1, x1, y2, x2]
59
+ boxes: [boxes_count, (y1, x1, y2, x2)]
60
+ box_area: float. the area of 'box'
61
+ boxes_area: array of length boxes_count.
62
+
63
+ Note: the areas are passed in rather than calculated here for
64
+ efficency. Calculate once in the caller to avoid duplicate work.
65
+ """
66
+ # Calculate intersection areas
67
+ y1 = np.maximum(box[0], boxes[:, 0])
68
+ y2 = np.minimum(box[2], boxes[:, 2])
69
+ x1 = np.maximum(box[1], boxes[:, 1])
70
+ x2 = np.minimum(box[3], boxes[:, 3])
71
+ intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
72
+ union = box_area + boxes_area[:] - intersection[:]
73
+ iou = intersection / union
74
+ return iou
75
+
76
+
77
+ def compute_overlaps(boxes1, boxes2):
78
+ """Computes IoU overlaps between two sets of boxes.
79
+ boxes1, boxes2: [N, (y1, x1, y2, x2)].
80
+
81
+ For better performance, pass the largest set first and the smaller second.
82
+ """
83
+ # Areas of anchors and GT boxes
84
+ area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
85
+ area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
86
+
87
+ # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
88
+ # Each cell contains the IoU value.
89
+ overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
90
+ for i in range(overlaps.shape[1]):
91
+ box2 = boxes2[i]
92
+ overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
93
+ return overlaps
94
+
95
+
96
+ def compute_overlaps_masks(masks1, masks2):
97
+ '''Computes IoU overlaps between two sets of masks.
98
+ masks1, masks2: [Height, Width, instances]
99
+ '''
100
+ # flatten masks
101
+ masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
102
+ masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
103
+ area1 = np.sum(masks1, axis=0)
104
+ area2 = np.sum(masks2, axis=0)
105
+
106
+ # intersections and union
107
+ intersections = np.dot(masks1.T, masks2)
108
+ union = area1[:, None] + area2[None, :] - intersections
109
+ overlaps = intersections / union
110
+
111
+ return overlaps
112
+
113
+
114
+ def non_max_suppression(boxes, scores, threshold):
115
+ """Performs non-maximum supression and returns indicies of kept boxes.
116
+ boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
117
+ scores: 1-D array of box scores.
118
+ threshold: Float. IoU threshold to use for filtering.
119
+ """
120
+ assert boxes.shape[0] > 0
121
+ if boxes.dtype.kind != "f":
122
+ boxes = boxes.astype(np.float32)
123
+
124
+ # Compute box areas
125
+ y1 = boxes[:, 0]
126
+ x1 = boxes[:, 1]
127
+ y2 = boxes[:, 2]
128
+ x2 = boxes[:, 3]
129
+ area = (y2 - y1) * (x2 - x1)
130
+
131
+ # Get indicies of boxes sorted by scores (highest first)
132
+ ixs = scores.argsort()[::-1]
133
+
134
+ pick = []
135
+ while len(ixs) > 0:
136
+ # Pick top box and add its index to the list
137
+ i = ixs[0]
138
+ pick.append(i)
139
+ # Compute IoU of the picked box with the rest
140
+ iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
141
+ # Identify boxes with IoU over the threshold. This
142
+ # returns indicies into ixs[1:], so add 1 to get
143
+ # indicies into ixs.
144
+ remove_ixs = np.where(iou > threshold)[0] + 1
145
+ # Remove indicies of the picked and overlapped boxes.
146
+ ixs = np.delete(ixs, remove_ixs)
147
+ ixs = np.delete(ixs, 0)
148
+ return np.array(pick, dtype=np.int32)
149
+
150
+
151
+ def apply_box_deltas(boxes, deltas):
152
+ """Applies the given deltas to the given boxes.
153
+ boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
154
+ deltas: [N, (dy, dx, log(dh), log(dw))]
155
+ """
156
+ boxes = boxes.astype(np.float32)
157
+ # Convert to y, x, h, w
158
+ height = boxes[:, 2] - boxes[:, 0]
159
+ width = boxes[:, 3] - boxes[:, 1]
160
+ center_y = boxes[:, 0] + 0.5 * height
161
+ center_x = boxes[:, 1] + 0.5 * width
162
+ # Apply deltas
163
+ center_y += deltas[:, 0] * height
164
+ center_x += deltas[:, 1] * width
165
+ height *= np.exp(deltas[:, 2])
166
+ width *= np.exp(deltas[:, 3])
167
+ # Convert back to y1, x1, y2, x2
168
+ y1 = center_y - 0.5 * height
169
+ x1 = center_x - 0.5 * width
170
+ y2 = y1 + height
171
+ x2 = x1 + width
172
+ return np.stack([y1, x1, y2, x2], axis=1)
173
+
174
+
175
+ def box_refinement_graph(box, gt_box):
176
+ """Compute refinement needed to transform box to gt_box.
177
+ box and gt_box are [N, (y1, x1, y2, x2)]
178
+ """
179
+ box = tf.cast(box, tf.float32)
180
+ gt_box = tf.cast(gt_box, tf.float32)
181
+
182
+ height = box[:, 2] - box[:, 0]
183
+ width = box[:, 3] - box[:, 1]
184
+ center_y = box[:, 0] + 0.5 * height
185
+ center_x = box[:, 1] + 0.5 * width
186
+
187
+ gt_height = gt_box[:, 2] - gt_box[:, 0]
188
+ gt_width = gt_box[:, 3] - gt_box[:, 1]
189
+ gt_center_y = gt_box[:, 0] + 0.5 * gt_height
190
+ gt_center_x = gt_box[:, 1] + 0.5 * gt_width
191
+
192
+ dy = (gt_center_y - center_y) / height
193
+ dx = (gt_center_x - center_x) / width
194
+ dh = tf.log(gt_height / height)
195
+ dw = tf.log(gt_width / width)
196
+
197
+ result = tf.stack([dy, dx, dh, dw], axis=1)
198
+ return result
199
+
200
+
201
+ def box_refinement(box, gt_box):
202
+ """Compute refinement needed to transform box to gt_box.
203
+ box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
204
+ assumed to be outside the box.
205
+ """
206
+ box = box.astype(np.float32)
207
+ gt_box = gt_box.astype(np.float32)
208
+
209
+ height = box[:, 2] - box[:, 0]
210
+ width = box[:, 3] - box[:, 1]
211
+ center_y = box[:, 0] + 0.5 * height
212
+ center_x = box[:, 1] + 0.5 * width
213
+
214
+ gt_height = gt_box[:, 2] - gt_box[:, 0]
215
+ gt_width = gt_box[:, 3] - gt_box[:, 1]
216
+ gt_center_y = gt_box[:, 0] + 0.5 * gt_height
217
+ gt_center_x = gt_box[:, 1] + 0.5 * gt_width
218
+
219
+ dy = (gt_center_y - center_y) / height
220
+ dx = (gt_center_x - center_x) / width
221
+ dh = np.log(gt_height / height)
222
+ dw = np.log(gt_width / width)
223
+
224
+ return np.stack([dy, dx, dh, dw], axis=1)
225
+
226
+
227
+ ############################################################
228
+ # Dataset
229
+ ############################################################
230
+
231
+ class Dataset(object):
232
+ """The base class for dataset classes.
233
+ To use it, create a new class that adds functions specific to the dataset
234
+ you want to use. For example:
235
+
236
+ class CatsAndDogsDataset(Dataset):
237
+ def load_cats_and_dogs(self):
238
+ ...
239
+ def load_mask(self, image_id):
240
+ ...
241
+ def image_reference(self, image_id):
242
+ ...
243
+
244
+ See COCODataset and ShapesDataset as examples.
245
+ """
246
+
247
+ def __init__(self, class_map=None):
248
+ self._image_ids = []
249
+ self.image_info = []
250
+ # Background is always the first class
251
+ self.class_info = [{"source": "", "id": 0, "name": "BG"}]
252
+ self.source_class_ids = {}
253
+
254
+ def add_class(self, source, class_id, class_name):
255
+ assert "." not in source, "Source name cannot contain a dot"
256
+ # Does the class exist already?
257
+ for info in self.class_info:
258
+ if info['source'] == source and info["id"] == class_id:
259
+ # source.class_id combination already available, skip
260
+ return
261
+ # Add the class
262
+ self.class_info.append({
263
+ "source": source,
264
+ "id": class_id,
265
+ "name": class_name,
266
+ })
267
+
268
+ def add_image(self, source, image_id, path, **kwargs):
269
+ image_info = {
270
+ "id": image_id,
271
+ "source": source,
272
+ "path": path,
273
+ }
274
+ image_info.update(kwargs)
275
+ self.image_info.append(image_info)
276
+
277
+ def image_reference(self, image_id):
278
+ """Return a link to the image in its source Website or details about
279
+ the image that help looking it up or debugging it.
280
+
281
+ Override for your dataset, but pass to this function
282
+ if you encounter images not in your dataset.
283
+ """
284
+ return ""
285
+
286
+ def prepare(self, class_map=None):
287
+ """Prepares the Dataset class for use.
288
+
289
+ TODO: class map is not supported yet. When done, it should handle mapping
290
+ classes from different datasets to the same class ID.
291
+ """
292
+
293
+ def clean_name(name):
294
+ """Returns a shorter version of object names for cleaner display."""
295
+ return ",".join(name.split(",")[:1])
296
+
297
+ # Build (or rebuild) everything else from the info dicts.
298
+ self.num_classes = len(self.class_info)
299
+ self.class_ids = np.arange(self.num_classes)
300
+ self.class_names = [clean_name(c["name"]) for c in self.class_info]
301
+ self.num_images = len(self.image_info)
302
+ self._image_ids = np.arange(self.num_images)
303
+
304
+ self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
305
+ for info, id in zip(self.class_info, self.class_ids)}
306
+
307
+ # Map sources to class_ids they support
308
+ self.sources = list(set([i['source'] for i in self.class_info]))
309
+ self.source_class_ids = {}
310
+ # Loop over datasets
311
+ for source in self.sources:
312
+ self.source_class_ids[source] = []
313
+ # Find classes that belong to this dataset
314
+ for i, info in enumerate(self.class_info):
315
+ # Include BG class in all datasets
316
+ if i == 0 or source == info['source']:
317
+ self.source_class_ids[source].append(i)
318
+
319
+ def map_source_class_id(self, source_class_id):
320
+ """Takes a source class ID and returns the int class ID assigned to it.
321
+
322
+ For example:
323
+ dataset.map_source_class_id("coco.12") -> 23
324
+ """
325
+ return self.class_from_source_map[source_class_id]
326
+
327
+ def get_source_class_id(self, class_id, source):
328
+ """Map an internal class ID to the corresponding class ID in the source dataset."""
329
+ info = self.class_info[class_id]
330
+ assert info['source'] == source
331
+ return info['id']
332
+
333
+ def append_data(self, class_info, image_info):
334
+ self.external_to_class_id = {}
335
+ for i, c in enumerate(self.class_info):
336
+ for ds, id in c["map"]:
337
+ self.external_to_class_id[ds + str(id)] = i
338
+
339
+ # Map external image IDs to internal ones.
340
+ self.external_to_image_id = {}
341
+ for i, info in enumerate(self.image_info):
342
+ self.external_to_image_id[info["ds"] + str(info["id"])] = i
343
+
344
+ @property
345
+ def image_ids(self):
346
+ return self._image_ids
347
+
348
+ def source_image_link(self, image_id):
349
+ """Returns the path or URL to the image.
350
+ Override this to return a URL to the image if it's availble online for easy
351
+ debugging.
352
+ """
353
+ return self.image_info[image_id]["path"]
354
+
355
+ def load_image(self, image_id):
356
+ """Load the specified image and return a [H,W,3] Numpy array.
357
+ """
358
+ # Load image
359
+ image = skimage.io.imread(self.image_info[image_id]['path'])
360
+ # If grayscale. Convert to RGB for consistency.
361
+ if image.ndim != 3:
362
+ image = skimage.color.gray2rgb(image)
363
+ return image
364
+
365
+ def load_mask(self, image_id):
366
+ """Load instance masks for the given image.
367
+
368
+ Different datasets use different ways to store masks. Override this
369
+ method to load instance masks and return them in the form of am
370
+ array of binary masks of shape [height, width, instances].
371
+
372
+ Returns:
373
+ masks: A bool array of shape [height, width, instance count] with
374
+ a binary mask per instance.
375
+ class_ids: a 1D array of class IDs of the instance masks.
376
+ """
377
+ # Override this function to load a mask from your dataset.
378
+ # Otherwise, it returns an empty mask.
379
+ mask = np.empty([0, 0, 0])
380
+ class_ids = np.empty([0], np.int32)
381
+ return mask, class_ids
382
+
383
+
384
+ def resize_image(image, min_dim=None, max_dim=None, padding=False):
385
+ """
386
+ Resizes an image keeping the aspect ratio.
387
+
388
+ min_dim: if provided, resizes the image such that it's smaller
389
+ dimension == min_dim
390
+ max_dim: if provided, ensures that the image longest side doesn't
391
+ exceed this value.
392
+ padding: If true, pads image with zeros so it's size is max_dim x max_dim
393
+
394
+ Returns:
395
+ image: the resized image
396
+ window: (y1, x1, y2, x2). If max_dim is provided, padding might
397
+ be inserted in the returned image. If so, this window is the
398
+ coordinates of the image part of the full image (excluding
399
+ the padding). The x2, y2 pixels are not included.
400
+ scale: The scale factor used to resize the image
401
+ padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
402
+ """
403
+ # Default window (y1, x1, y2, x2) and default scale == 1.
404
+ h, w = image.shape[:2]
405
+ window = (0, 0, h, w)
406
+ scale = 1
407
+
408
+ # Scale?
409
+ if min_dim:
410
+ # Scale up but not down
411
+ scale = max(1, min_dim / min(h, w))
412
+ # Does it exceed max dim?
413
+ if max_dim:
414
+ image_max = max(h, w)
415
+ if round(image_max * scale) > max_dim:
416
+ scale = max_dim / image_max
417
+ # Resize image and mask
418
+ if scale != 1:
419
+ image = scipy.misc.imresize(
420
+ image, (round(h * scale), round(w * scale)))
421
+ # Need padding?
422
+ if padding:
423
+ # Get new height and width
424
+ h, w = image.shape[:2]
425
+ top_pad = (max_dim - h) // 2
426
+ bottom_pad = max_dim - h - top_pad
427
+ left_pad = (max_dim - w) // 2
428
+ right_pad = max_dim - w - left_pad
429
+ padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
430
+ image = np.pad(image, padding, mode='constant', constant_values=0)
431
+ window = (top_pad, left_pad, h + top_pad, w + left_pad)
432
+ return image, window, scale, padding
433
+
434
+
435
+ def resize_mask(mask, scale, padding):
436
+ """Resizes a mask using the given scale and padding.
437
+ Typically, you get the scale and padding from resize_image() to
438
+ ensure both, the image and the mask, are resized consistently.
439
+
440
+ scale: mask scaling factor
441
+ padding: Padding to add to the mask in the form
442
+ [(top, bottom), (left, right), (0, 0)]
443
+ """
444
+ h, w = mask.shape[:2]
445
+ mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
446
+ mask = np.pad(mask, padding, mode='constant', constant_values=0)
447
+ return mask
448
+
449
+
450
+ def minimize_mask(bbox, mask, mini_shape):
451
+ """Resize masks to a smaller version to cut memory load.
452
+ Mini-masks can then resized back to image scale using expand_masks()
453
+
454
+ See inspect_data.ipynb notebook for more details.
455
+ """
456
+ mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
457
+ for i in range(mask.shape[-1]):
458
+ m = mask[:, :, i]
459
+ y1, x1, y2, x2 = bbox[i][:4]
460
+ m = m[y1:y2, x1:x2]
461
+ if m.size == 0:
462
+ raise Exception("Invalid bounding box with area of zero")
463
+ m = scipy.misc.imresize(m.astype(float), mini_shape, interp='bilinear')
464
+ mini_mask[:, :, i] = np.where(m >= 128, 1, 0)
465
+ return mini_mask
466
+
467
+
468
+ def expand_mask(bbox, mini_mask, image_shape):
469
+ """Resizes mini masks back to image size. Reverses the change
470
+ of minimize_mask().
471
+
472
+ See inspect_data.ipynb notebook for more details.
473
+ """
474
+ mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
475
+ for i in range(mask.shape[-1]):
476
+ m = mini_mask[:, :, i]
477
+ y1, x1, y2, x2 = bbox[i][:4]
478
+ h = y2 - y1
479
+ w = x2 - x1
480
+ m = scipy.misc.imresize(m.astype(float), (h, w), interp='bilinear')
481
+ mask[y1:y2, x1:x2, i] = np.where(m >= 128, 1, 0)
482
+ return mask
483
+
484
+
485
+ # TODO: Build and use this function to reduce code duplication
486
+ def mold_mask(mask, config):
487
+ pass
488
+
489
+
490
+ def unmold_mask(mask, bbox, image_shape):
491
+ """Converts a mask generated by the neural network into a format similar
492
+ to it's original shape.
493
+ mask: [height, width] of type float. A small, typically 28x28 mask.
494
+ bbox: [y1, x1, y2, x2]. The box to fit the mask in.
495
+
496
+ Returns a binary mask with the same size as the original image.
497
+ """
498
+ threshold = 0.5
499
+ y1, x1, y2, x2 = bbox
500
+ mask = scipy.misc.imresize(
501
+ mask, (y2 - y1, x2 - x1), interp='bilinear').astype(np.float32) / 255.0
502
+ mask = np.where(mask >= threshold, 1, 0).astype(np.uint8)
503
+
504
+ # Put the mask in the right location.
505
+ full_mask = np.zeros(image_shape[:2], dtype=np.uint8)
506
+ full_mask[y1:y2, x1:x2] = mask
507
+ return full_mask
508
+
509
+
510
+ ############################################################
511
+ # Anchors
512
+ ############################################################
513
+
514
+ def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
515
+ """
516
+ scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
517
+ ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
518
+ shape: [height, width] spatial shape of the feature map over which
519
+ to generate anchors.
520
+ feature_stride: Stride of the feature map relative to the image in pixels.
521
+ anchor_stride: Stride of anchors on the feature map. For example, if the
522
+ value is 2 then generate anchors for every other feature map pixel.
523
+ """
524
+ # Get all combinations of scales and ratios
525
+ scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
526
+ scales = scales.flatten()
527
+ ratios = ratios.flatten()
528
+
529
+ # Enumerate heights and widths from scales and ratios
530
+ heights = scales / np.sqrt(ratios)
531
+ widths = scales * np.sqrt(ratios)
532
+
533
+ # Enumerate shifts in feature space
534
+ shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
535
+ shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
536
+ shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
537
+
538
+ # Enumerate combinations of shifts, widths, and heights
539
+ box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
540
+ box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
541
+
542
+ # Reshape to get a list of (y, x) and a list of (h, w)
543
+ box_centers = np.stack(
544
+ [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
545
+ box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
546
+
547
+ # Convert to corner coordinates (y1, x1, y2, x2)
548
+ boxes = np.concatenate([box_centers - 0.5 * box_sizes,
549
+ box_centers + 0.5 * box_sizes], axis=1)
550
+ return boxes
551
+
552
+
553
+ def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
554
+ anchor_stride):
555
+ """Generate anchors at different levels of a feature pyramid. Each scale
556
+ is associated with a level of the pyramid, but each ratio is used in
557
+ all levels of the pyramid.
558
+
559
+ Returns:
560
+ anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
561
+ with the same order of the given scales. So, anchors of scale[0] come
562
+ first, then anchors of scale[1], and so on.
563
+ """
564
+ # Anchors
565
+ # [anchor_count, (y1, x1, y2, x2)]
566
+ anchors = []
567
+ for i in range(len(scales)):
568
+ anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
569
+ feature_strides[i], anchor_stride))
570
+ return np.concatenate(anchors, axis=0)
571
+
572
+
573
+ ############################################################
574
+ # Miscellaneous
575
+ ############################################################
576
+
577
+ def trim_zeros(x):
578
+ """It's common to have tensors larger than the available data and
579
+ pad with zeros. This function removes rows that are all zeros.
580
+
581
+ x: [rows, columns].
582
+ """
583
+ assert len(x.shape) == 2
584
+ return x[~np.all(x == 0, axis=1)]
585
+
586
+
587
+ def compute_ap(gt_boxes, gt_class_ids, gt_masks,
588
+ pred_boxes, pred_class_ids, pred_scores, pred_masks,
589
+ iou_threshold=0.5):
590
+ """Compute Average Precision at a set IoU threshold (default 0.5).
591
+
592
+ Returns:
593
+ mAP: Mean Average Precision
594
+ precisions: List of precisions at different class score thresholds.
595
+ recalls: List of recall values at different class score thresholds.
596
+ overlaps: [pred_boxes, gt_boxes] IoU overlaps.
597
+ """
598
+ # Trim zero padding and sort predictions by score from high to low
599
+ # TODO: cleaner to do zero unpadding upstream
600
+ gt_boxes = trim_zeros(gt_boxes)
601
+ gt_masks = gt_masks[..., :gt_boxes.shape[0]]
602
+ pred_boxes = trim_zeros(pred_boxes)
603
+ pred_scores = pred_scores[:pred_boxes.shape[0]]
604
+ indices = np.argsort(pred_scores)[::-1]
605
+ pred_boxes = pred_boxes[indices]
606
+ pred_class_ids = pred_class_ids[indices]
607
+ pred_scores = pred_scores[indices]
608
+ pred_masks = pred_masks[..., indices]
609
+
610
+ # Compute IoU overlaps [pred_masks, gt_masks]
611
+ overlaps = compute_overlaps_masks(pred_masks, gt_masks)
612
+
613
+ # Loop through ground truth boxes and find matching predictions
614
+ match_count = 0
615
+ pred_match = np.zeros([pred_boxes.shape[0]])
616
+ gt_match = np.zeros([gt_boxes.shape[0]])
617
+ for i in range(len(pred_boxes)):
618
+ # Find best matching ground truth box
619
+ sorted_ixs = np.argsort(overlaps[i])[::-1]
620
+ for j in sorted_ixs:
621
+ # If ground truth box is already matched, go to next one
622
+ if gt_match[j] == 1:
623
+ continue
624
+ # If we reach IoU smaller than the threshold, end the loop
625
+ iou = overlaps[i, j]
626
+ if iou < iou_threshold:
627
+ break
628
+ # Do we have a match?
629
+ if pred_class_ids[i] == gt_class_ids[j]:
630
+ match_count += 1
631
+ gt_match[j] = 1
632
+ pred_match[i] = 1
633
+ break
634
+
635
+ # Compute precision and recall at each prediction box step
636
+ precisions = np.cumsum(pred_match) / (np.arange(len(pred_match)) + 1)
637
+ recalls = np.cumsum(pred_match).astype(np.float32) / len(gt_match)
638
+
639
+ # Pad with start and end values to simplify the math
640
+ precisions = np.concatenate([[0], precisions, [0]])
641
+ recalls = np.concatenate([[0], recalls, [1]])
642
+
643
+ # Ensure precision values decrease but don't increase. This way, the
644
+ # precision value at each recall threshold is the maximum it can be
645
+ # for all following recall thresholds, as specified by the VOC paper.
646
+ for i in range(len(precisions) - 2, -1, -1):
647
+ precisions[i] = np.maximum(precisions[i], precisions[i + 1])
648
+
649
+ # Compute mean AP over recall range
650
+ indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
651
+ mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
652
+ precisions[indices])
653
+
654
+ return mAP, precisions, recalls, overlaps
655
+
656
+
657
+ def compute_recall(pred_boxes, gt_boxes, iou):
658
+ """Compute the recall at the given IoU threshold. It's an indication
659
+ of how many GT boxes were found by the given prediction boxes.
660
+
661
+ pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
662
+ gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
663
+ """
664
+ # Measure overlaps
665
+ overlaps = compute_overlaps(pred_boxes, gt_boxes)
666
+ iou_max = np.max(overlaps, axis=1)
667
+ iou_argmax = np.argmax(overlaps, axis=1)
668
+ positive_ids = np.where(iou_max >= iou)[0]
669
+ matched_gt_boxes = iou_argmax[positive_ids]
670
+
671
+ recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
672
+ return recall, positive_ids
673
+
674
+
675
+ # ## Batch Slicing
676
+ # Some custom layers support a batch size of 1 only, and require a lot of work
677
+ # to support batches greater than 1. This function slices an input tensor
678
+ # across the batch dimension and feeds batches of size 1. Effectively,
679
+ # an easy way to support batches > 1 quickly with little code modification.
680
+ # In the long run, it's more efficient to modify the code to support large
681
+ # batches and getting rid of this function. Consider this a temporary solution
682
+ def batch_slice(inputs, graph_fn, batch_size, names=None):
683
+ """Splits inputs into slices and feeds each slice to a copy of the given
684
+ computation graph and then combines the results. It allows you to run a
685
+ graph on a batch of inputs even if the graph is written to support one
686
+ instance only.
687
+
688
+ inputs: list of tensors. All must have the same first dimension length
689
+ graph_fn: A function that returns a TF tensor that's part of a graph.
690
+ batch_size: number of slices to divide the data into.
691
+ names: If provided, assigns names to the resulting tensors.
692
+ """
693
+ if not isinstance(inputs, list):
694
+ inputs = [inputs]
695
+
696
+ outputs = []
697
+ for i in range(batch_size):
698
+ inputs_slice = [x[i] for x in inputs]
699
+ output_slice = graph_fn(*inputs_slice)
700
+ if not isinstance(output_slice, (tuple, list)):
701
+ output_slice = [output_slice]
702
+ outputs.append(output_slice)
703
+ # Change outputs from a list of slices where each is
704
+ # a list of outputs to a list of outputs and each has
705
+ # a list of slices
706
+ outputs = list(zip(*outputs))
707
+
708
+ if names is None:
709
+ names = [None] * len(outputs)
710
+
711
+ result = [tf.stack(o, axis=0, name=n)
712
+ for o, n in zip(outputs, names)]
713
+ if len(result) == 1:
714
+ result = result[0]
715
+
716
+ return result
717
+
718
+
719
+ def download_trained_weights(coco_model_path, verbose=1):
720
+ """Download COCO trained weights from Releases.
721
+
722
+ coco_model_path: local path of COCO trained weights
723
+ """
724
+ if verbose > 0:
725
+ print("Downloading pretrained model to " + coco_model_path + " ...")
726
+ with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
727
+ shutil.copyfileobj(resp, out)
728
+ if verbose > 0:
729
+ print("... done downloading pretrained model!")
730
+
731
+
732
+ def resize_image_with_scale(h1, w1, h2_max, w2_max):
733
+ """resize image with scale and which fits in rectangle h2_max x w2_max"""
734
+ if h1 == w1: return h2_max, h2_max # square image
735
+ elif h1 < w1: return int(h1/(w1/w2_max)), int(w2_max) # horizontal image
736
+ else: return int(h2_max), int(w1/(h1/h2_max)) # vertical image