|
""" |
|
Mask R-CNN |
|
Configurations and data loading code for MS COCO. |
|
|
|
Copyright (c) 2017 Matterport, Inc. |
|
Licensed under the MIT License (see LICENSE for details) |
|
Written by Waleed Abdulla |
|
|
|
------------------------------------------------------------ |
|
|
|
Usage: import the module (see Jupyter notebooks for examples), or run from |
|
the command line as such: |
|
|
|
# Train a new model starting from pre-trained COCO weights |
|
python3 coco.py train --dataset=/path/to/coco/ --model=coco |
|
|
|
# Train a new model starting from ImageNet weights |
|
python3 coco.py train --dataset=/path/to/coco/ --model=imagenet |
|
|
|
# Continue training a model that you had trained earlier |
|
python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5 |
|
|
|
# Continue training the last model you trained |
|
python3 coco.py train --dataset=/path/to/coco/ --model=last |
|
|
|
# Run COCO evaluatoin on the last model you trained |
|
python3 coco.py evaluate --dataset=/path/to/coco/ --model=last |
|
""" |
|
|
|
import os |
|
import time |
|
import numpy as np |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from pycocotools.coco import COCO |
|
from pycocotools.cocoeval import COCOeval |
|
from pycocotools import mask as maskUtils |
|
|
|
import zipfile |
|
import urllib.request |
|
import shutil |
|
|
|
from config import Config |
|
import utils |
|
import model as modellib |
|
|
|
|
|
ROOT_DIR = os.getcwd() |
|
|
|
|
|
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") |
|
|
|
|
|
|
|
DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") |
|
DEFAULT_DATASET_YEAR = "2014" |
|
|
|
|
|
|
|
|
|
|
|
|
|
class CocoConfig(Config): |
|
"""Configuration for training on MS COCO. |
|
Derives from the base Config class and overrides values specific |
|
to the COCO dataset. |
|
""" |
|
|
|
NAME = "coco" |
|
|
|
|
|
|
|
IMAGES_PER_GPU = 2 |
|
|
|
|
|
|
|
|
|
|
|
NUM_CLASSES = 1 + 80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
class CocoDataset(utils.Dataset): |
|
def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None, |
|
class_map=None, return_coco=False, auto_download=False): |
|
"""Load a subset of the COCO dataset. |
|
dataset_dir: The root directory of the COCO dataset. |
|
subset: What to load (train, val, minival, valminusminival) |
|
year: What dataset year to load (2014, 2017) as a string, not an integer |
|
class_ids: If provided, only loads images that have the given classes. |
|
class_map: TODO: Not implemented yet. Supports maping classes from |
|
different datasets to the same class ID. |
|
return_coco: If True, returns the COCO object. |
|
auto_download: Automatically download and unzip MS-COCO images and annotations |
|
""" |
|
|
|
if auto_download is True: |
|
self.auto_download(dataset_dir, subset, year) |
|
|
|
coco = COCO("{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year)) |
|
if subset == "minival" or subset == "valminusminival": |
|
subset = "val" |
|
image_dir = "{}/{}{}".format(dataset_dir, subset, year) |
|
|
|
|
|
if not class_ids: |
|
|
|
class_ids = sorted(coco.getCatIds()) |
|
|
|
|
|
if class_ids: |
|
image_ids = [] |
|
for id in class_ids: |
|
image_ids.extend(list(coco.getImgIds(catIds=[id]))) |
|
|
|
image_ids = list(set(image_ids)) |
|
else: |
|
|
|
image_ids = list(coco.imgs.keys()) |
|
|
|
|
|
for i in class_ids: |
|
self.add_class("coco", i, coco.loadCats(i)[0]["name"]) |
|
|
|
|
|
for i in image_ids: |
|
self.add_image( |
|
"coco", image_id=i, |
|
path=os.path.join(image_dir, coco.imgs[i]['file_name']), |
|
width=coco.imgs[i]["width"], |
|
height=coco.imgs[i]["height"], |
|
annotations=coco.loadAnns(coco.getAnnIds( |
|
imgIds=[i], catIds=class_ids, iscrowd=None))) |
|
if return_coco: |
|
return coco |
|
|
|
def auto_download(self, dataDir, dataType, dataYear): |
|
"""Download the COCO dataset/annotations if requested. |
|
dataDir: The root directory of the COCO dataset. |
|
dataType: What to load (train, val, minival, valminusminival) |
|
dataYear: What dataset year to load (2014, 2017) as a string, not an integer |
|
Note: |
|
For 2014, use "train", "val", "minival", or "valminusminival" |
|
For 2017, only "train" and "val" annotations are available |
|
""" |
|
|
|
|
|
if dataType == "minival" or dataType == "valminusminival": |
|
imgDir = "{}/{}{}".format(dataDir, "val", dataYear) |
|
imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear) |
|
imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format("val", dataYear) |
|
else: |
|
imgDir = "{}/{}{}".format(dataDir, dataType, dataYear) |
|
imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear) |
|
imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format(dataType, dataYear) |
|
|
|
|
|
|
|
if not os.path.exists(dataDir): |
|
os.makedirs(dataDir) |
|
|
|
|
|
if not os.path.exists(imgDir): |
|
os.makedirs(imgDir) |
|
print("Downloading images to " + imgZipFile + " ...") |
|
with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out: |
|
shutil.copyfileobj(resp, out) |
|
print("... done downloading.") |
|
print("Unzipping " + imgZipFile) |
|
with zipfile.ZipFile(imgZipFile, "r") as zip_ref: |
|
zip_ref.extractall(dataDir) |
|
print("... done unzipping") |
|
print("Will use images in " + imgDir) |
|
|
|
|
|
annDir = "{}/annotations".format(dataDir) |
|
if dataType == "minival": |
|
annZipFile = "{}/instances_minival2014.json.zip".format(dataDir) |
|
annFile = "{}/instances_minival2014.json".format(annDir) |
|
annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0" |
|
unZipDir = annDir |
|
elif dataType == "valminusminival": |
|
annZipFile = "{}/instances_valminusminival2014.json.zip".format(dataDir) |
|
annFile = "{}/instances_valminusminival2014.json".format(annDir) |
|
annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0" |
|
unZipDir = annDir |
|
else: |
|
annZipFile = "{}/annotations_trainval{}.zip".format(dataDir, dataYear) |
|
annFile = "{}/instances_{}{}.json".format(annDir, dataType, dataYear) |
|
annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format(dataYear) |
|
unZipDir = dataDir |
|
|
|
|
|
|
|
if not os.path.exists(annDir): |
|
os.makedirs(annDir) |
|
if not os.path.exists(annFile): |
|
if not os.path.exists(annZipFile): |
|
print("Downloading zipped annotations to " + annZipFile + " ...") |
|
with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out: |
|
shutil.copyfileobj(resp, out) |
|
print("... done downloading.") |
|
print("Unzipping " + annZipFile) |
|
with zipfile.ZipFile(annZipFile, "r") as zip_ref: |
|
zip_ref.extractall(unZipDir) |
|
print("... done unzipping") |
|
print("Will use annotations in " + annFile) |
|
|
|
def load_mask(self, image_id): |
|
"""Load instance masks for the given image. |
|
|
|
Different datasets use different ways to store masks. This |
|
function converts the different mask format to one format |
|
in the form of a bitmap [height, width, instances]. |
|
|
|
Returns: |
|
masks: A bool array of shape [height, width, instance count] with |
|
one mask per instance. |
|
class_ids: a 1D array of class IDs of the instance masks. |
|
""" |
|
|
|
image_info = self.image_info[image_id] |
|
if image_info["source"] != "coco": |
|
return super(CocoDataset, self).load_mask(image_id) |
|
|
|
instance_masks = [] |
|
class_ids = [] |
|
annotations = self.image_info[image_id]["annotations"] |
|
|
|
|
|
for annotation in annotations: |
|
class_id = self.map_source_class_id( |
|
"coco.{}".format(annotation['category_id'])) |
|
if class_id: |
|
m = self.annToMask(annotation, image_info["height"], |
|
image_info["width"]) |
|
|
|
|
|
if m.max() < 1: |
|
continue |
|
|
|
if annotation['iscrowd']: |
|
|
|
class_id *= -1 |
|
|
|
|
|
if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]: |
|
m = np.ones([image_info["height"], image_info["width"]], dtype=bool) |
|
instance_masks.append(m) |
|
class_ids.append(class_id) |
|
|
|
|
|
if class_ids: |
|
mask = np.stack(instance_masks, axis=2) |
|
class_ids = np.array(class_ids, dtype=np.int32) |
|
return mask, class_ids |
|
else: |
|
|
|
return super(CocoDataset, self).load_mask(image_id) |
|
|
|
def image_reference(self, image_id): |
|
"""Return a link to the image in the COCO Website.""" |
|
info = self.image_info[image_id] |
|
if info["source"] == "coco": |
|
return "http://cocodataset.org/#explore?id={}".format(info["id"]) |
|
else: |
|
super(CocoDataset, self).image_reference(image_id) |
|
|
|
|
|
|
|
def annToRLE(self, ann, height, width): |
|
""" |
|
Convert annotation which can be polygons, uncompressed RLE to RLE. |
|
:return: binary mask (numpy 2D array) |
|
""" |
|
segm = ann['segmentation'] |
|
if isinstance(segm, list): |
|
|
|
|
|
rles = maskUtils.frPyObjects(segm, height, width) |
|
rle = maskUtils.merge(rles) |
|
elif isinstance(segm['counts'], list): |
|
|
|
rle = maskUtils.frPyObjects(segm, height, width) |
|
else: |
|
|
|
rle = ann['segmentation'] |
|
return rle |
|
|
|
def annToMask(self, ann, height, width): |
|
""" |
|
Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. |
|
:return: binary mask (numpy 2D array) |
|
""" |
|
rle = self.annToRLE(ann, height, width) |
|
m = maskUtils.decode(rle) |
|
return m |
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks): |
|
"""Arrange resutls to match COCO specs in http://cocodataset.org/#format |
|
""" |
|
|
|
if rois is None: |
|
return [] |
|
|
|
results = [] |
|
for image_id in image_ids: |
|
|
|
for i in range(rois.shape[0]): |
|
class_id = class_ids[i] |
|
score = scores[i] |
|
bbox = np.around(rois[i], 1) |
|
mask = masks[:, :, i] |
|
|
|
result = { |
|
"image_id": image_id, |
|
"category_id": dataset.get_source_class_id(class_id, "coco"), |
|
"bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]], |
|
"score": score, |
|
"segmentation": maskUtils.encode(np.asfortranarray(mask)) |
|
} |
|
results.append(result) |
|
return results |
|
|
|
|
|
def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None): |
|
"""Runs official COCO evaluation. |
|
dataset: A Dataset object with valiadtion data |
|
eval_type: "bbox" or "segm" for bounding box or segmentation evaluation |
|
limit: if not 0, it's the number of images to use for evaluation |
|
""" |
|
|
|
image_ids = image_ids or dataset.image_ids |
|
|
|
|
|
if limit: |
|
image_ids = image_ids[:limit] |
|
|
|
|
|
coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids] |
|
|
|
t_prediction = 0 |
|
t_start = time.time() |
|
|
|
results = [] |
|
for i, image_id in enumerate(image_ids): |
|
|
|
image = dataset.load_image(image_id) |
|
|
|
|
|
t = time.time() |
|
r = model.detect([image], verbose=0)[0] |
|
t_prediction += (time.time() - t) |
|
|
|
|
|
image_results = build_coco_results(dataset, coco_image_ids[i:i + 1], |
|
r["rois"], r["class_ids"], |
|
r["scores"], r["masks"]) |
|
results.extend(image_results) |
|
|
|
|
|
coco_results = coco.loadRes(results) |
|
|
|
|
|
cocoEval = COCOeval(coco, coco_results, eval_type) |
|
cocoEval.params.imgIds = coco_image_ids |
|
cocoEval.evaluate() |
|
cocoEval.accumulate() |
|
cocoEval.summarize() |
|
|
|
print("Prediction time: {}. Average {}/image".format( |
|
t_prediction, t_prediction / len(image_ids))) |
|
print("Total time: ", time.time() - t_start) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
import argparse |
|
|
|
|
|
parser = argparse.ArgumentParser( |
|
description='Train Mask R-CNN on MS COCO.') |
|
parser.add_argument("command", |
|
metavar="<command>", |
|
help="'train' or 'evaluate' on MS COCO") |
|
parser.add_argument('--dataset', required=True, |
|
metavar="/path/to/coco/", |
|
help='Directory of the MS-COCO dataset') |
|
parser.add_argument('--year', required=False, |
|
default=DEFAULT_DATASET_YEAR, |
|
metavar="<year>", |
|
help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)') |
|
parser.add_argument('--model', required=True, |
|
metavar="/path/to/weights.h5", |
|
help="Path to weights .h5 file or 'coco'") |
|
parser.add_argument('--logs', required=False, |
|
default=DEFAULT_LOGS_DIR, |
|
metavar="/path/to/logs/", |
|
help='Logs and checkpoints directory (default=logs/)') |
|
parser.add_argument('--limit', required=False, |
|
default=500, |
|
metavar="<image count>", |
|
help='Images to use for evaluation (default=500)') |
|
parser.add_argument('--download', required=False, |
|
default=False, |
|
metavar="<True|False>", |
|
help='Automatically download and unzip MS-COCO files (default=False)', |
|
type=bool) |
|
args = parser.parse_args() |
|
print("Command: ", args.command) |
|
print("Model: ", args.model) |
|
print("Dataset: ", args.dataset) |
|
print("Year: ", args.year) |
|
print("Logs: ", args.logs) |
|
print("Auto Download: ", args.download) |
|
|
|
|
|
if args.command == "train": |
|
config = CocoConfig() |
|
else: |
|
class InferenceConfig(CocoConfig): |
|
|
|
|
|
GPU_COUNT = 1 |
|
IMAGES_PER_GPU = 1 |
|
DETECTION_MIN_CONFIDENCE = 0 |
|
config = InferenceConfig() |
|
config.display() |
|
|
|
|
|
if args.command == "train": |
|
model = modellib.MaskRCNN(mode="training", config=config, |
|
model_dir=args.logs) |
|
else: |
|
model = modellib.MaskRCNN(mode="inference", config=config, |
|
model_dir=args.logs) |
|
|
|
|
|
if args.model.lower() == "coco": |
|
model_path = COCO_MODEL_PATH |
|
elif args.model.lower() == "last": |
|
|
|
model_path = model.find_last()[1] |
|
elif args.model.lower() == "imagenet": |
|
|
|
model_path = model.get_imagenet_weights() |
|
else: |
|
model_path = args.model |
|
|
|
|
|
print("Loading weights ", model_path) |
|
model.load_weights(model_path, by_name=True) |
|
|
|
|
|
if args.command == "train": |
|
|
|
|
|
dataset_train = CocoDataset() |
|
dataset_train.load_coco(args.dataset, "train", year=args.year, auto_download=args.download) |
|
dataset_train.load_coco(args.dataset, "valminusminival", year=args.year, auto_download=args.download) |
|
dataset_train.prepare() |
|
|
|
|
|
dataset_val = CocoDataset() |
|
dataset_val.load_coco(args.dataset, "minival", year=args.year, auto_download=args.download) |
|
dataset_val.prepare() |
|
|
|
|
|
|
|
|
|
print("Training network heads") |
|
model.train(dataset_train, dataset_val, |
|
learning_rate=config.LEARNING_RATE, |
|
epochs=40, |
|
layers='heads') |
|
|
|
|
|
|
|
print("Fine tune Resnet stage 4 and up") |
|
model.train(dataset_train, dataset_val, |
|
learning_rate=config.LEARNING_RATE, |
|
epochs=120, |
|
layers='4+') |
|
|
|
|
|
|
|
print("Fine tune all layers") |
|
model.train(dataset_train, dataset_val, |
|
learning_rate=config.LEARNING_RATE / 10, |
|
epochs=160, |
|
layers='all') |
|
|
|
elif args.command == "evaluate": |
|
|
|
dataset_val = CocoDataset() |
|
coco = dataset_val.load_coco(args.dataset, "minival", year=args.year, return_coco=True, auto_download=args.download) |
|
dataset_val.prepare() |
|
print("Running COCO evaluation on {} images.".format(args.limit)) |
|
evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit)) |
|
else: |
|
print("'{}' is not recognized. " |
|
"Use 'train' or 'evaluate'".format(args.command)) |
|
|