Spaces:

NCTCMumbai
/

NCTC

Sleeping

App Files Files Community

NCTC / models /research /feelvos /common.py

NCTCMumbai

Upload 2571 files

0b8359d over 1 year ago

raw

history blame

6.17 kB

	# Copyright 2018 The TensorFlow Authors All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Provides flags that are common to scripts.

	Common flags from train/vis_video.py are collected in this script.
	"""
	import tensorflow as tf

	from deeplab import common

	flags = tf.app.flags

	flags.DEFINE_enum(
	'classification_loss', 'softmax_with_attention',
	['softmax', 'triplet', 'softmax_with_attention'],
	'Type of loss function used for classifying pixels, can be either softmax, '
	'softmax_with_attention, or triplet.')

	flags.DEFINE_integer('k_nearest_neighbors', 1,
	'The number of nearest neighbors to use.')

	flags.DEFINE_integer('embedding_dimension', 100, 'The dimension used for the '
	'learned embedding')

	flags.DEFINE_boolean('use_softmax_feedback', True,
	'Whether to give the softmax predictions of the last '
	'frame as additional input to the segmentation head.')

	flags.DEFINE_boolean('sample_adjacent_and_consistent_query_frames', True,
	'If true, the query frames (all but the first frame '
	'which is the reference frame) will be sampled such '
	'that they are adjacent video frames and have the same '
	'crop coordinates and flip augmentation. Note that if '
	'use_softmax_feedback is True, this option will '
	'automatically be activated.')

	flags.DEFINE_integer('embedding_seg_feature_dimension', 256,
	'The dimensionality used in the segmentation head layers.')

	flags.DEFINE_integer('embedding_seg_n_layers', 4, 'The number of layers in the '
	'segmentation head.')

	flags.DEFINE_integer('embedding_seg_kernel_size', 7, 'The kernel size used in '
	'the segmentation head.')

	flags.DEFINE_multi_integer('embedding_seg_atrous_rates', [],
	'The atrous rates to use for the segmentation head.')

	flags.DEFINE_boolean('normalize_nearest_neighbor_distances', True,
	'Whether to normalize the nearest neighbor distances '
	'to [0,1] using sigmoid, scale and shift.')

	flags.DEFINE_boolean('also_attend_to_previous_frame', True, 'Whether to also '
	'use nearest neighbor attention with respect to the '
	'previous frame.')

	flags.DEFINE_bool('use_local_previous_frame_attention', True,
	'Whether to restrict the previous frame attention to a local '
	'search window. Only has an effect, if '
	'also_attend_to_previous_frame is True.')

	flags.DEFINE_integer('previous_frame_attention_window_size', 15,
	'The window size used for local previous frame attention,'
	' if use_local_previous_frame_attention is True.')

	flags.DEFINE_boolean('use_first_frame_matching', True, 'Whether to extract '
	'features by matching to the reference frame. This should '
	'always be true except for ablation experiments.')

	FLAGS = flags.FLAGS

	# Constants

	# Perform semantic segmentation predictions.
	OUTPUT_TYPE = common.OUTPUT_TYPE

	# Semantic segmentation item names.
	LABELS_CLASS = common.LABELS_CLASS
	IMAGE = common.IMAGE
	HEIGHT = common.HEIGHT
	WIDTH = common.WIDTH
	IMAGE_NAME = common.IMAGE_NAME
	SOURCE_ID = 'source_id'
	VIDEO_ID = 'video_id'
	LABEL = common.LABEL
	ORIGINAL_IMAGE = common.ORIGINAL_IMAGE
	PRECEDING_FRAME_LABEL = 'preceding_frame_label'

	# Test set name.
	TEST_SET = common.TEST_SET

	# Internal constants.
	OBJECT_LABEL = 'object_label'


	class VideoModelOptions(common.ModelOptions):
	"""Internal version of immutable class to hold model options."""

	def __new__(cls,
	outputs_to_num_classes,
	crop_size=None,
	atrous_rates=None,
	output_stride=8):
	"""Constructor to set default values.

	Args:
	outputs_to_num_classes: A dictionary from output type to the number of
	classes. For example, for the task of semantic segmentation with 21
	semantic classes, we would have outputs_to_num_classes['semantic'] = 21.
	crop_size: A tuple [crop_height, crop_width].
	atrous_rates: A list of atrous convolution rates for ASPP.
	output_stride: The ratio of input to output spatial resolution.

	Returns:
	A new VideoModelOptions instance.
	"""
	self = super(VideoModelOptions, cls).__new__(
	cls,
	outputs_to_num_classes,
	crop_size,
	atrous_rates,
	output_stride)
	# Add internal flags.
	self.classification_loss = FLAGS.classification_loss

	return self


	def parse_decoder_output_stride():
	"""Parses decoder output stride.

	FEELVOS assumes decoder_output_stride = 4. Thus, this function is created for
	this particular purpose.

	Returns:
	An integer specifying the decoder_output_stride.

	Raises:
	ValueError: If decoder_output_stride is None or contains more than one
	element.
	"""
	if FLAGS.decoder_output_stride:
	decoder_output_stride = [
	int(x) for x in FLAGS.decoder_output_stride]
	if len(decoder_output_stride) != 1:
	raise ValueError('Expect decoder output stride has only one element.')
	decoder_output_stride = decoder_output_stride[0]
	else:
	raise ValueError('Expect flag decoder output stride not to be None.')
	return decoder_output_stride