File size: 9,392 Bytes
0b8359d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 |
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/anchor_generator.proto";
import "object_detection/protos/box_coder.proto";
import "object_detection/protos/box_predictor.proto";
import "object_detection/protos/hyperparams.proto";
import "object_detection/protos/image_resizer.proto";
import "object_detection/protos/losses.proto";
import "object_detection/protos/matcher.proto";
import "object_detection/protos/post_processing.proto";
import "object_detection/protos/region_similarity_calculator.proto";
// Configuration for Single Shot Detection (SSD) models.
// Next id: 27
message Ssd {
// Number of classes to predict.
optional int32 num_classes = 1;
// Image resizer for preprocessing the input image.
optional ImageResizer image_resizer = 2;
// Feature extractor config.
optional SsdFeatureExtractor feature_extractor = 3;
// Box coder to encode the boxes.
optional BoxCoder box_coder = 4;
// Matcher to match groundtruth with anchors.
optional Matcher matcher = 5;
// Region similarity calculator to compute similarity of boxes.
optional RegionSimilarityCalculator similarity_calculator = 6;
// Whether background targets are to be encoded as an all
// zeros vector or a one-hot vector (where background is the 0th class).
optional bool encode_background_as_zeros = 12 [default = false];
// classification weight to be associated to negative
// anchors (default: 1.0). The weight must be in [0., 1.].
optional float negative_class_weight = 13 [default = 1.0];
// Box predictor to attach to the features.
optional BoxPredictor box_predictor = 7;
// Anchor generator to compute anchors.
optional AnchorGenerator anchor_generator = 8;
// Post processing to apply on the predictions.
optional PostProcessing post_processing = 9;
// Whether to normalize the loss by number of groundtruth boxes that match to
// the anchors.
optional bool normalize_loss_by_num_matches = 10 [default = true];
// Whether to normalize the localization loss by the code size of the box
// encodings. This is applied along with other normalization factors.
optional bool normalize_loc_loss_by_codesize = 14 [default = false];
// Loss configuration for training.
optional Loss loss = 11;
// Whether to update batch norm parameters during training or not.
// When training with a relative small batch size (e.g. 1), it is
// desirable to disable batch norm update and use pretrained batch norm
// params.
// Note: Some feature extractors are used with canned arg_scopes
// (e.g resnet arg scopes). In these cases training behavior of batch norm
// variables may depend on both values of `batch_norm_trainable` and
// `is_training`.
// When canned arg_scopes are used with feature extractors `conv_hyperparams`
// will apply only to the additional layers that are added and are outside the
// canned arg_scope.
optional bool freeze_batchnorm = 16 [default = false];
// Whether to update batch_norm inplace during training. This is required
// for batch norm to work correctly on TPUs. When this is false, user must add
// a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
// to update the batch norm moving average parameters.
optional bool inplace_batchnorm_update = 15 [default = false];
// Whether to add an implicit background class to one-hot encodings of
// groundtruth labels. Set to false if training a single
// class model or using an explicit background class.
optional bool add_background_class = 21 [default = true];
// Whether to use an explicit background class. Set to true if using
// groundtruth labels with an explicit background class, as in multiclass
// scores.
optional bool explicit_background_class = 24 [default = false];
optional bool use_confidences_as_targets = 22 [default = false];
optional float implicit_example_weight = 23 [default = 1.0];
optional bool return_raw_detections_during_predict = 26 [default = false];
// Configuration proto for MaskHead.
// Next id: 11
message MaskHead {
// The height and the width of the predicted mask. Only used when
// predict_instance_masks is true.
optional int32 mask_height = 1 [default = 15];
optional int32 mask_width = 2 [default = 15];
// Whether to predict class agnostic masks. Only used when
// predict_instance_masks is true.
optional bool masks_are_class_agnostic = 3 [default = true];
// The depth for the first conv2d_transpose op applied to the
// image_features in the mask prediction branch. If set to 0, the value
// will be set automatically based on the number of channels in the image
// features and the number of classes.
optional int32 mask_prediction_conv_depth = 4 [default = 256];
// The number of convolutions applied to image_features in the mask
// prediction branch.
optional int32 mask_prediction_num_conv_layers = 5 [default = 2];
// Whether to apply convolutions on mask features before upsampling using
// nearest neighbor resizing.
// By default, mask features are resized to [`mask_height`, `mask_width`]
// before applying convolutions and predicting masks.
optional bool convolve_then_upsample_masks = 6 [default = false];
// Mask loss weight.
optional float mask_loss_weight = 7 [default = 5.0];
// Number of boxes to be generated at training time for computing mask loss.
optional int32 mask_loss_sample_size = 8 [default = 16];
// Hyperparameters for convolution ops used in the box predictor.
optional Hyperparams conv_hyperparams = 9;
// Output size (width and height are set to be the same) of the initial
// bilinear interpolation based cropping during ROI pooling. Only used when
// we have second stage prediction head enabled (e.g. mask head).
optional int32 initial_crop_size = 10 [default = 15];
// Configs for mask head.
optional MaskHead mask_head_config = 25;
// Next id: 18.
message SsdFeatureExtractor {
reserved 6;
// Type of ssd feature extractor.
optional string type = 1;
// The factor to alter the depth of the channels in the feature extractor.
optional float depth_multiplier = 2 [default = 1.0];
// Minimum number of the channels in the feature extractor.
optional int32 min_depth = 3 [default = 16];
// Hyperparameters that affect the layers of feature extractor added on top
// of the base feature extractor.
optional Hyperparams conv_hyperparams = 4;
// Normally, SSD feature extractors are constructed by reusing an existing
// base feature extractor (that has its own hyperparams) and adding new layers
// on top of it. `conv_hyperparams` above normally applies only to the new
// layers while base feature extractor uses its own default hyperparams. If
// this value is set to true, the base feature extractor's hyperparams will be
// overridden with the `conv_hyperparams`.
optional bool override_base_feature_extractor_hyperparams = 9
[default = false];
// The nearest multiple to zero-pad the input height and width dimensions to.
// For example, if pad_to_multiple = 2, input dimensions are zero-padded
// until the resulting dimensions are even.
optional int32 pad_to_multiple = 5 [default = 1];
// Whether to use explicit padding when extracting SSD multiresolution
// features. This will also apply to the base feature extractor if a MobileNet
// architecture is used.
optional bool use_explicit_padding = 7 [default = false];
// Whether to use depthwise separable convolutions for to extract additional
// feature maps added by SSD.
optional bool use_depthwise = 8 [default = false];
// Feature Pyramid Networks config.
optional FeaturePyramidNetworks fpn = 10;
// If true, replace preprocess function of feature extractor with a
// placeholder. This should only be used if all the image preprocessing steps
// happen outside the graph.
optional bool replace_preprocessor_with_placeholder = 11 [default = false];
// The number of SSD layers.
optional int32 num_layers = 12 [default = 6];
// Configuration for Feature Pyramid Networks.
message FeaturePyramidNetworks {
// We recommend to use multi_resolution_feature_map_generator with FPN, and
// the levels there must match the levels defined below for better
// performance.
// Correspondence from FPN levels to Resnet/Mobilenet V1 feature maps:
// FPN Level Resnet Feature Map Mobilenet-V1 Feature Map
// 2 Block 1 Conv2d_3_pointwise
// 3 Block 2 Conv2d_5_pointwise
// 4 Block 3 Conv2d_11_pointwise
// 5 Block 4 Conv2d_13_pointwise
// 6 Bottomup_5 bottom_up_Conv2d_14
// 7 Bottomup_6 bottom_up_Conv2d_15
// 8 Bottomup_7 bottom_up_Conv2d_16
// 9 Bottomup_8 bottom_up_Conv2d_17
// minimum level in feature pyramid
optional int32 min_level = 1 [default = 3];
// maximum level in feature pyramid
optional int32 max_level = 2 [default = 7];
// channel depth for additional coarse feature layers.
optional int32 additional_layer_depth = 3 [default = 256];