Spaces:

NCTCMumbai
/

NCTC

Running

App Files Files Community

NCTC / models /research /object_detection /protos /faster_rcnn.proto

NCTCMumbai

Upload 2571 files

0b8359d over 1 year ago

raw

history blame

9.34 kB

	syntax = "proto2";

	package object_detection.protos;

	import "object_detection/protos/anchor_generator.proto";
	import "object_detection/protos/box_predictor.proto";
	import "object_detection/protos/hyperparams.proto";
	import "object_detection/protos/image_resizer.proto";
	import "object_detection/protos/losses.proto";
	import "object_detection/protos/post_processing.proto";

	// Configuration for Faster R-CNN models.
	// See meta_architectures/faster_rcnn_meta_arch.py and models/model_builder.py
	//
	// Naming conventions:
	// Faster R-CNN models have two stages: a first stage region proposal network
	// (or RPN) and a second stage box classifier. We thus use the prefixes
	// `first_stage_` and `second_stage_` to indicate the stage to which each
	// parameter pertains when relevant.
	message FasterRcnn {
	// Whether to construct only the Region Proposal Network (RPN).
	optional int32 number_of_stages = 1 [default = 2];

	// Number of classes to predict.
	optional int32 num_classes = 3;

	// Image resizer for preprocessing the input image.
	optional ImageResizer image_resizer = 4;

	// Feature extractor config.
	optional FasterRcnnFeatureExtractor feature_extractor = 5;

	// (First stage) region proposal network (RPN) parameters.

	// Anchor generator to compute RPN anchors.
	optional AnchorGenerator first_stage_anchor_generator = 6;

	// Atrous rate for the convolution op applied to the
	// `first_stage_features_to_crop` tensor to obtain box predictions.
	optional int32 first_stage_atrous_rate = 7 [default = 1];

	// Hyperparameters for the convolutional RPN box predictor.
	optional Hyperparams first_stage_box_predictor_conv_hyperparams = 8;

	// Kernel size to use for the convolution op just prior to RPN box
	// predictions.
	optional int32 first_stage_box_predictor_kernel_size = 9 [default = 3];

	// Output depth for the convolution op just prior to RPN box predictions.
	optional int32 first_stage_box_predictor_depth = 10 [default = 512];

	// The batch size to use for computing the first stage objectness and
	// location losses.
	optional int32 first_stage_minibatch_size = 11 [default = 256];

	// Fraction of positive examples per image for the RPN.
	optional float first_stage_positive_balance_fraction = 12 [default = 0.5];

	// Non max suppression score threshold applied to first stage RPN proposals.
	optional float first_stage_nms_score_threshold = 13 [default = 0.0];

	// Non max suppression IOU threshold applied to first stage RPN proposals.
	optional float first_stage_nms_iou_threshold = 14 [default = 0.7];

	// Maximum number of RPN proposals retained after first stage postprocessing.
	optional int32 first_stage_max_proposals = 15 [default = 300];

	// First stage RPN localization loss weight.
	optional float first_stage_localization_loss_weight = 16 [default = 1.0];

	// First stage RPN objectness loss weight.
	optional float first_stage_objectness_loss_weight = 17 [default = 1.0];

	// Per-region cropping parameters.
	// Note that if a R-FCN model is constructed the per region cropping
	// parameters below are ignored.

	// Output size (width and height are set to be the same) of the initial
	// bilinear interpolation based cropping during ROI pooling.
	optional int32 initial_crop_size = 18;

	// Kernel size of the max pool op on the cropped feature map during
	// ROI pooling.
	optional int32 maxpool_kernel_size = 19;

	// Stride of the max pool op on the cropped feature map during ROI pooling.
	optional int32 maxpool_stride = 20;

	// (Second stage) box classifier parameters

	// Hyperparameters for the second stage box predictor. If box predictor type
	// is set to rfcn_box_predictor, a R-FCN model is constructed, otherwise a
	// Faster R-CNN model is constructed.
	optional BoxPredictor second_stage_box_predictor = 21;

	// The batch size per image used for computing the classification and refined
	// location loss of the box classifier.
	// Note that this field is ignored if `hard_example_miner` is configured.
	optional int32 second_stage_batch_size = 22 [default = 64];

	// Fraction of positive examples to use per image for the box classifier.
	optional float second_stage_balance_fraction = 23 [default = 0.25];

	// Post processing to apply on the second stage box classifier predictions.
	// Note: the `score_converter` provided to the FasterRCNNMetaArch constructor
	// is taken from this `second_stage_post_processing` proto.
	optional PostProcessing second_stage_post_processing = 24;

	// Second stage refined localization loss weight.
	optional float second_stage_localization_loss_weight = 25 [default = 1.0];

	// Second stage classification loss weight
	optional float second_stage_classification_loss_weight = 26 [default = 1.0];

	// Second stage instance mask loss weight. Note that this is only applicable
	// when `MaskRCNNBoxPredictor` is selected for second stage and configured to
	// predict instance masks.
	optional float second_stage_mask_prediction_loss_weight = 27 [default = 1.0];

	// If not left to default, applies hard example mining only to classification
	// and localization loss..
	optional HardExampleMiner hard_example_miner = 28;

	// Loss for second stage box classifers, supports Softmax and Sigmoid.
	// Note that score converter must be consistent with loss type.
	// When there are multiple labels assigned to the same boxes, recommend
	// to use sigmoid loss and enable merge_multiple_label_boxes.
	// If not specified, Softmax loss is used as default.
	optional ClassificationLoss second_stage_classification_loss = 29;

	// Whether to update batch_norm inplace during training. This is required
	// for batch norm to work correctly on TPUs. When this is false, user must add
	// a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
	// to update the batch norm moving average parameters.
	optional bool inplace_batchnorm_update = 30 [default = false];

	// Force the use of matrix multiplication based crop and resize instead of
	// standard tf.image.crop_and_resize while computing second stage input
	// feature maps.
	optional bool use_matmul_crop_and_resize = 31 [default = false];

	// Normally, anchors generated for a given image size are pruned during
	// training if they lie outside the image window. Setting this option to true,
	// clips the anchors to be within the image instead of pruning.
	optional bool clip_anchors_to_image = 32 [default = false];

	// After peforming matching between anchors and targets, in order to pull out
	// targets for training Faster R-CNN meta architecture we perform a gather
	// operation. This options specifies whether to use an alternate
	// implementation of tf.gather that is faster on TPUs.
	optional bool use_matmul_gather_in_matcher = 33 [default = false];

	// Whether to use the balanced positive negative sampler implementation with
	// static shape guarantees.
	optional bool use_static_balanced_label_sampler = 34 [default = false];

	// If True, uses implementation of ops with static shape guarantees.
	optional bool use_static_shapes = 35 [default = false];

	// Whether the masks present in groundtruth should be resized in the model to
	// match the image size.
	optional bool resize_masks = 36 [default = true];

	// If True, uses implementation of ops with static shape guarantees when
	// running evaluation (specifically not is_training if False).
	optional bool use_static_shapes_for_eval = 37 [default = false];

	// If true, uses implementation of partitioned_non_max_suppression in first
	// stage.
	optional bool use_partitioned_nms_in_first_stage = 38 [default = true];

	// Whether to return raw detections (pre NMS).
	optional bool return_raw_detections_during_predict = 39 [default = false];

	// Whether to use tf.image.combined_non_max_suppression.
	optional bool use_combined_nms_in_first_stage = 40 [default = false];

	// Whether to output final box feature. If true, it will crop the feature map
	// in the postprocess() method based on the final predictions.
	optional bool output_final_box_features = 42 [default = false];

	// Configs for context model.
	optional Context context_config = 41;
	}

	message Context {
	// Configuration proto for Context .
	// Next id: 4

	// The maximum number of contextual features per-image, used for padding
	optional int32 max_num_context_features = 1 [default = 2000];

	// The bottleneck feature dimension of the attention block.
	optional int32 attention_bottleneck_dimension = 2 [default = 2048];

	// The attention temperature.
	optional float attention_temperature = 3 [default = 0.01];

	// The context feature length.
	optional int32 context_feature_length = 4 [default = 2057];
	}

	message FasterRcnnFeatureExtractor {
	// Type of Faster R-CNN model (e.g., 'faster_rcnn_resnet101';
	// See builders/model_builder.py for expected types).
	optional string type = 1;

	// Output stride of extracted RPN feature map.
	optional int32 first_stage_features_stride = 2 [default = 16];

	// Whether to update batch norm parameters during training or not.
	// When training with a relative large batch size (e.g. 8), it could be
	// desirable to enable batch norm update.
	optional bool batch_norm_trainable = 3 [default = false];
	}