syntax = "proto2"; | |
package object_detection.protos; | |
import "object_detection/protos/anchor_generator.proto"; | |
import "object_detection/protos/box_coder.proto"; | |
import "object_detection/protos/box_predictor.proto"; | |
import "object_detection/protos/hyperparams.proto"; | |
import "object_detection/protos/image_resizer.proto"; | |
import "object_detection/protos/losses.proto"; | |
import "object_detection/protos/matcher.proto"; | |
import "object_detection/protos/post_processing.proto"; | |
import "object_detection/protos/region_similarity_calculator.proto"; | |
// Configuration for Single Shot Detection (SSD) models. | |
// Next id: 27 | |
message Ssd { | |
// Number of classes to predict. | |
optional int32 num_classes = 1; | |
// Image resizer for preprocessing the input image. | |
optional ImageResizer image_resizer = 2; | |
// Feature extractor config. | |
optional SsdFeatureExtractor feature_extractor = 3; | |
// Box coder to encode the boxes. | |
optional BoxCoder box_coder = 4; | |
// Matcher to match groundtruth with anchors. | |
optional Matcher matcher = 5; | |
// Region similarity calculator to compute similarity of boxes. | |
optional RegionSimilarityCalculator similarity_calculator = 6; | |
// Whether background targets are to be encoded as an all | |
// zeros vector or a one-hot vector (where background is the 0th class). | |
optional bool encode_background_as_zeros = 12 [default = false]; | |
// classification weight to be associated to negative | |
// anchors (default: 1.0). The weight must be in [0., 1.]. | |
optional float negative_class_weight = 13 [default = 1.0]; | |
// Box predictor to attach to the features. | |
optional BoxPredictor box_predictor = 7; | |
// Anchor generator to compute anchors. | |
optional AnchorGenerator anchor_generator = 8; | |
// Post processing to apply on the predictions. | |
optional PostProcessing post_processing = 9; | |
// Whether to normalize the loss by number of groundtruth boxes that match to | |
// the anchors. | |
optional bool normalize_loss_by_num_matches = 10 [default = true]; | |
// Whether to normalize the localization loss by the code size of the box | |
// encodings. This is applied along with other normalization factors. | |
optional bool normalize_loc_loss_by_codesize = 14 [default = false]; | |
// Loss configuration for training. | |
optional Loss loss = 11; | |
// Whether to update batch norm parameters during training or not. | |
// When training with a relative small batch size (e.g. 1), it is | |
// desirable to disable batch norm update and use pretrained batch norm | |
// params. | |
// | |
// Note: Some feature extractors are used with canned arg_scopes | |
// (e.g resnet arg scopes). In these cases training behavior of batch norm | |
// variables may depend on both values of `batch_norm_trainable` and | |
// `is_training`. | |
// | |
// When canned arg_scopes are used with feature extractors `conv_hyperparams` | |
// will apply only to the additional layers that are added and are outside the | |
// canned arg_scope. | |
optional bool freeze_batchnorm = 16 [default = false]; | |
// Whether to update batch_norm inplace during training. This is required | |
// for batch norm to work correctly on TPUs. When this is false, user must add | |
// a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order | |
// to update the batch norm moving average parameters. | |
optional bool inplace_batchnorm_update = 15 [default = false]; | |
// Whether to add an implicit background class to one-hot encodings of | |
// groundtruth labels. Set to false if training a single | |
// class model or using an explicit background class. | |
optional bool add_background_class = 21 [default = true]; | |
// Whether to use an explicit background class. Set to true if using | |
// groundtruth labels with an explicit background class, as in multiclass | |
// scores. | |
optional bool explicit_background_class = 24 [default = false]; | |
optional bool use_confidences_as_targets = 22 [default = false]; | |
optional float implicit_example_weight = 23 [default = 1.0]; | |
optional bool return_raw_detections_during_predict = 26 [default = false]; | |
// Configuration proto for MaskHead. | |
// Next id: 11 | |
message MaskHead { | |
// The height and the width of the predicted mask. Only used when | |
// predict_instance_masks is true. | |
optional int32 mask_height = 1 [default = 15]; | |
optional int32 mask_width = 2 [default = 15]; | |
// Whether to predict class agnostic masks. Only used when | |
// predict_instance_masks is true. | |
optional bool masks_are_class_agnostic = 3 [default = true]; | |
// The depth for the first conv2d_transpose op applied to the | |
// image_features in the mask prediction branch. If set to 0, the value | |
// will be set automatically based on the number of channels in the image | |
// features and the number of classes. | |
optional int32 mask_prediction_conv_depth = 4 [default = 256]; | |
// The number of convolutions applied to image_features in the mask | |
// prediction branch. | |
optional int32 mask_prediction_num_conv_layers = 5 [default = 2]; | |
// Whether to apply convolutions on mask features before upsampling using | |
// nearest neighbor resizing. | |
// By default, mask features are resized to [`mask_height`, `mask_width`] | |
// before applying convolutions and predicting masks. | |
optional bool convolve_then_upsample_masks = 6 [default = false]; | |
// Mask loss weight. | |
optional float mask_loss_weight = 7 [default = 5.0]; | |
// Number of boxes to be generated at training time for computing mask loss. | |
optional int32 mask_loss_sample_size = 8 [default = 16]; | |
// Hyperparameters for convolution ops used in the box predictor. | |
optional Hyperparams conv_hyperparams = 9; | |
// Output size (width and height are set to be the same) of the initial | |
// bilinear interpolation based cropping during ROI pooling. Only used when | |
// we have second stage prediction head enabled (e.g. mask head). | |
optional int32 initial_crop_size = 10 [default = 15]; | |
} | |
// Configs for mask head. | |
optional MaskHead mask_head_config = 25; | |
} | |
// Next id: 18. | |
message SsdFeatureExtractor { | |
reserved 6; | |
// Type of ssd feature extractor. | |
optional string type = 1; | |
// The factor to alter the depth of the channels in the feature extractor. | |
optional float depth_multiplier = 2 [default = 1.0]; | |
// Minimum number of the channels in the feature extractor. | |
optional int32 min_depth = 3 [default = 16]; | |
// Hyperparameters that affect the layers of feature extractor added on top | |
// of the base feature extractor. | |
optional Hyperparams conv_hyperparams = 4; | |
// Normally, SSD feature extractors are constructed by reusing an existing | |
// base feature extractor (that has its own hyperparams) and adding new layers | |
// on top of it. `conv_hyperparams` above normally applies only to the new | |
// layers while base feature extractor uses its own default hyperparams. If | |
// this value is set to true, the base feature extractor's hyperparams will be | |
// overridden with the `conv_hyperparams`. | |
optional bool override_base_feature_extractor_hyperparams = 9 | |
[default = false]; | |
// The nearest multiple to zero-pad the input height and width dimensions to. | |
// For example, if pad_to_multiple = 2, input dimensions are zero-padded | |
// until the resulting dimensions are even. | |
optional int32 pad_to_multiple = 5 [default = 1]; | |
// Whether to use explicit padding when extracting SSD multiresolution | |
// features. This will also apply to the base feature extractor if a MobileNet | |
// architecture is used. | |
optional bool use_explicit_padding = 7 [default = false]; | |
// Whether to use depthwise separable convolutions for to extract additional | |
// feature maps added by SSD. | |
optional bool use_depthwise = 8 [default = false]; | |
// Feature Pyramid Networks config. | |
optional FeaturePyramidNetworks fpn = 10; | |
// If true, replace preprocess function of feature extractor with a | |
// placeholder. This should only be used if all the image preprocessing steps | |
// happen outside the graph. | |
optional bool replace_preprocessor_with_placeholder = 11 [default = false]; | |
// The number of SSD layers. | |
optional int32 num_layers = 12 [default = 6]; | |
} | |
// Configuration for Feature Pyramid Networks. | |
message FeaturePyramidNetworks { | |
// We recommend to use multi_resolution_feature_map_generator with FPN, and | |
// the levels there must match the levels defined below for better | |
// performance. | |
// Correspondence from FPN levels to Resnet/Mobilenet V1 feature maps: | |
// FPN Level Resnet Feature Map Mobilenet-V1 Feature Map | |
// 2 Block 1 Conv2d_3_pointwise | |
// 3 Block 2 Conv2d_5_pointwise | |
// 4 Block 3 Conv2d_11_pointwise | |
// 5 Block 4 Conv2d_13_pointwise | |
// 6 Bottomup_5 bottom_up_Conv2d_14 | |
// 7 Bottomup_6 bottom_up_Conv2d_15 | |
// 8 Bottomup_7 bottom_up_Conv2d_16 | |
// 9 Bottomup_8 bottom_up_Conv2d_17 | |
// minimum level in feature pyramid | |
optional int32 min_level = 1 [default = 3]; | |
// maximum level in feature pyramid | |
optional int32 max_level = 2 [default = 7]; | |
// channel depth for additional coarse feature layers. | |
optional int32 additional_layer_depth = 3 [default = 256]; | |
} | |