""" | |
Mask R-CNN | |
Base Configurations class. | |
Copyright (c) 2017 Matterport, Inc. | |
Licensed under the MIT License (see LICENSE for details) | |
Written by Waleed Abdulla | |
""" | |
import math | |
import numpy as np | |
# Base Configuration Class | |
# Don't use this class directly. Instead, sub-class it and override | |
# the configurations you need to change. | |
class Config(object): | |
"""Base configuration class. For custom configurations, create a | |
sub-class that inherits from this one and override properties | |
that need to be changed. | |
""" | |
# Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. | |
# Useful if your code needs to do things differently depending on which | |
# experiment is running. | |
NAME = None # Override in sub-classes | |
# NUMBER OF GPUs to use. For CPU training, use 1 | |
GPU_COUNT = 1 | |
# Number of images to train with on each GPU. A 12GB GPU can typically | |
# handle 2 images of 1024x1024px. | |
# Adjust based on your GPU memory and image sizes. Use the highest | |
# number that your GPU can handle for best performance. | |
# Number of training steps per epoch | |
# This doesn't need to match the size of the training set. Tensorboard | |
# updates are saved at the end of each epoch, so setting this to a | |
# smaller number means getting more frequent TensorBoard updates. | |
# Validation stats are also calculated at each epoch end and they | |
# might take a while, so don't set this too small to avoid spending | |
# a lot of time on validation stats. | |
STEPS_PER_EPOCH = 1000 | |
# Number of validation steps to run at the end of every training epoch. | |
# A bigger number improves accuracy of validation stats, but slows | |
# down the training. | |
# Backbone network architecture | |
# Supported values are: resnet50, resnet101 | |
BACKBONE = "resnet101" | |
# The strides of each layer of the FPN Pyramid. These values | |
# are based on a Resnet101 backbone. | |
BACKBONE_STRIDES = [4, 8, 16, 32, 64] | |
# Number of classification classes (including background) | |
NUM_CLASSES = 1 # Override in sub-classes | |
# Length of square anchor side in pixels | |
RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) | |
# Ratios of anchors at each cell (width/height) | |
# A value of 1 represents a square anchor, and 0.5 is a wide anchor | |
RPN_ANCHOR_RATIOS = [0.5, 1, 2] | |
# Anchor stride | |
# If 1 then anchors are created for each cell in the backbone feature map. | |
# If 2, then anchors are created for every other cell, and so on. | |
# Non-max suppression threshold to filter RPN proposals. | |
# You can reduce this during training to generate more propsals. | |
# How many anchors per image to use for RPN training | |
# ROIs kept after non-maximum supression (training and inference) | |
# If enabled, resizes instance masks to a smaller size to reduce | |
# memory load. Recommended when using high-resolution images. | |
USE_MINI_MASK = True | |
MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask | |
# Input image resing | |
# Images are resized such that the smallest side is >= IMAGE_MIN_DIM and | |
# the longest side is <= IMAGE_MAX_DIM. In case both conditions can't | |
# be satisfied together the IMAGE_MAX_DIM is enforced. | |
IMAGE_MIN_DIM = 800 | |
IMAGE_MAX_DIM = 1024 | |
# If True, pad images with zeros such that they're (max_dim by max_dim) | |
IMAGE_PADDING = True # currently, the False option is not supported | |
# Image mean (RGB) | |
MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) | |
# Number of ROIs per image to feed to classifier/mask heads | |
# The Mask RCNN paper uses 512 but often the RPN doesn't generate | |
# enough positive proposals to fill this and keep a positive:negative | |
# ratio of 1:3. You can increase the number of proposals by adjusting | |
# the RPN NMS threshold. | |
# Percent of positive ROIs used to train classifier/mask heads | |
# Pooled ROIs | |
POOL_SIZE = 7 | |
MASK_SHAPE = [28, 28] | |
# Maximum number of ground truth instances to use in one image | |
# Bounding box refinement standard deviation for RPN and final detections. | |
RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) | |
BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) | |
# Max number of final detections | |
# Minimum probability value to accept a detected instance | |
# ROIs below this threshold are skipped | |
# Non-maximum suppression threshold for detection | |
# Learning rate and momentum | |
# The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes | |
# weights to explode. Likely due to differences in optimzer | |
# implementation. | |
LEARNING_RATE = 0.001 | |
# Weight decay regularization | |
WEIGHT_DECAY = 0.0001 | |
# Use RPN ROIs or externally generated ROIs for training | |
# Keep this True for most situations. Set to False if you want to train | |
# the head branches on ROI generated by code rather than the ROIs from | |
# the RPN. For example, to debug the classifier head without having to | |
# train the RPN. | |
USE_RPN_ROIS = True | |
def __init__(self): | |
"""Set values of computed attributes.""" | |
# Effective batch size | |
self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT | |
# Input image size | |
self.IMAGE_SHAPE = np.array( | |
[self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3]) | |
# Compute backbone size from input image size | |
self.BACKBONE_SHAPES = np.array( | |
[[int(math.ceil(self.IMAGE_SHAPE[0] / stride)), | |
int(math.ceil(self.IMAGE_SHAPE[1] / stride))] | |
for stride in self.BACKBONE_STRIDES]) | |
def display(self): | |
"""Display Configuration values.""" | |
print("\nConfigurations:") | |
for a in dir(self): | |
if not a.startswith("__") and not callable(getattr(self, a)): | |
print("{:30} {}".format(a, getattr(self, a))) | |
print("\n") | |