Spaces:

NCTCMumbai
/

NCTC

Running

App Files Files Community

NCTC / models /research /feelvos /input_preprocess.py

NCTCMumbai

Upload 2571 files

0b8359d over 1 year ago

raw

history blame

11.7 kB

	# Copyright 2018 The TensorFlow Authors All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Prepare the data used for FEELVOS training/evaluation."""
	import tensorflow as tf

	from deeplab.core import feature_extractor
	from deeplab.core import preprocess_utils

	# The probability of flipping the images and labels
	# left-right during training
	_PROB_OF_FLIP = 0.5

	get_random_scale = preprocess_utils.get_random_scale
	randomly_scale_image_and_label = (
	preprocess_utils.randomly_scale_image_and_label)


	def preprocess_image_and_label(image,
	label,
	crop_height,
	crop_width,
	min_resize_value=None,
	max_resize_value=None,
	resize_factor=None,
	min_scale_factor=1.,
	max_scale_factor=1.,
	scale_factor_step_size=0,
	ignore_label=255,
	is_training=True,
	model_variant=None):
	"""Preprocesses the image and label.

	Args:
	image: Input image.
	label: Ground truth annotation label.
	crop_height: The height value used to crop the image and label.
	crop_width: The width value used to crop the image and label.
	min_resize_value: Desired size of the smaller image side.
	max_resize_value: Maximum allowed size of the larger image side.
	resize_factor: Resized dimensions are multiple of factor plus one.
	min_scale_factor: Minimum scale factor value.
	max_scale_factor: Maximum scale factor value.
	scale_factor_step_size: The step size from min scale factor to max scale
	factor. The input is randomly scaled based on the value of
	(min_scale_factor, max_scale_factor, scale_factor_step_size).
	ignore_label: The label value which will be ignored for training and
	evaluation.
	is_training: If the preprocessing is used for training or not.
	model_variant: Model variant (string) for choosing how to mean-subtract the
	images. See feature_extractor.network_map for supported model variants.

	Returns:
	original_image: Original image (could be resized).
	processed_image: Preprocessed image.
	label: Preprocessed ground truth segmentation label.

	Raises:
	ValueError: Ground truth label not provided during training.
	"""
	if is_training and label is None:
	raise ValueError('During training, label must be provided.')
	if model_variant is None:
	tf.logging.warning('Default mean-subtraction is performed. Please specify '
	'a model_variant. See feature_extractor.network_map for '
	'supported model variants.')

	# Keep reference to original image.
	original_image = image

	processed_image = tf.cast(image, tf.float32)

	if label is not None:
	label = tf.cast(label, tf.int32)

	# Resize image and label to the desired range.
	if min_resize_value is not None or max_resize_value is not None:
	[processed_image, label] = (
	preprocess_utils.resize_to_range(
	image=processed_image,
	label=label,
	min_size=min_resize_value,
	max_size=max_resize_value,
	factor=resize_factor,
	align_corners=True))
	# The `original_image` becomes the resized image.
	original_image = tf.identity(processed_image)

	# Data augmentation by randomly scaling the inputs.
	scale = get_random_scale(
	min_scale_factor, max_scale_factor, scale_factor_step_size)
	processed_image, label = randomly_scale_image_and_label(
	processed_image, label, scale)

	processed_image.set_shape([None, None, 3])

	if crop_height is not None and crop_width is not None:
	# Pad image and label to have dimensions >= [crop_height, crop_width].
	image_shape = tf.shape(processed_image)
	image_height = image_shape[0]
	image_width = image_shape[1]

	target_height = image_height + tf.maximum(crop_height - image_height, 0)
	target_width = image_width + tf.maximum(crop_width - image_width, 0)

	# Pad image with mean pixel value.
	mean_pixel = tf.reshape(
	feature_extractor.mean_pixel(model_variant), [1, 1, 3])
	processed_image = preprocess_utils.pad_to_bounding_box(
	processed_image, 0, 0, target_height, target_width, mean_pixel)

	if label is not None:
	label = preprocess_utils.pad_to_bounding_box(
	label, 0, 0, target_height, target_width, ignore_label)

	# Randomly crop the image and label.
	if is_training and label is not None:
	processed_image, label = preprocess_utils.random_crop(
	[processed_image, label], crop_height, crop_width)

	processed_image.set_shape([crop_height, crop_width, 3])

	if label is not None:
	label.set_shape([crop_height, crop_width, 1])

	if is_training:
	# Randomly left-right flip the image and label.
	processed_image, label, _ = preprocess_utils.flip_dim(
	[processed_image, label], _PROB_OF_FLIP, dim=1)

	return original_image, processed_image, label


	def preprocess_images_and_labels_consistently(images,
	labels,
	crop_height,
	crop_width,
	min_resize_value=None,
	max_resize_value=None,
	resize_factor=None,
	min_scale_factor=1.,
	max_scale_factor=1.,
	scale_factor_step_size=0,
	ignore_label=255,
	is_training=True,
	model_variant=None):
	"""Preprocesses images and labels in a consistent way.

	Similar to preprocess_image_and_label, but works on a list of images
	and a list of labels and uses the same crop coordinates and either flips
	all images and labels or none of them.

	Args:
	images: List of input images.
	labels: List of ground truth annotation labels.
	crop_height: The height value used to crop the image and label.
	crop_width: The width value used to crop the image and label.
	min_resize_value: Desired size of the smaller image side.
	max_resize_value: Maximum allowed size of the larger image side.
	resize_factor: Resized dimensions are multiple of factor plus one.
	min_scale_factor: Minimum scale factor value.
	max_scale_factor: Maximum scale factor value.
	scale_factor_step_size: The step size from min scale factor to max scale
	factor. The input is randomly scaled based on the value of
	(min_scale_factor, max_scale_factor, scale_factor_step_size).
	ignore_label: The label value which will be ignored for training and
	evaluation.
	is_training: If the preprocessing is used for training or not.
	model_variant: Model variant (string) for choosing how to mean-subtract the
	images. See feature_extractor.network_map for supported model variants.

	Returns:
	original_images: Original images (could be resized).
	processed_images: Preprocessed images.
	labels: Preprocessed ground truth segmentation labels.

	Raises:
	ValueError: Ground truth label not provided during training.
	"""
	if is_training and labels is None:
	raise ValueError('During training, labels must be provided.')
	if model_variant is None:
	tf.logging.warning('Default mean-subtraction is performed. Please specify '
	'a model_variant. See feature_extractor.network_map for '
	'supported model variants.')
	if labels is not None:
	assert len(images) == len(labels)
	num_imgs = len(images)

	# Keep reference to original images.
	original_images = images

	processed_images = [tf.cast(image, tf.float32) for image in images]

	if labels is not None:
	labels = [tf.cast(label, tf.int32) for label in labels]

	# Resize images and labels to the desired range.
	if min_resize_value is not None or max_resize_value is not None:
	processed_images, labels = zip(*[
	preprocess_utils.resize_to_range(
	image=processed_image,
	label=label,
	min_size=min_resize_value,
	max_size=max_resize_value,
	factor=resize_factor,
	align_corners=True) for processed_image, label
	in zip(processed_images, labels)])
	# The `original_images` becomes the resized images.
	original_images = [tf.identity(processed_image)
	for processed_image in processed_images]

	# Data augmentation by randomly scaling the inputs.
	scale = get_random_scale(
	min_scale_factor, max_scale_factor, scale_factor_step_size)
	processed_images, labels = zip(
	*[randomly_scale_image_and_label(processed_image, label, scale)
	for processed_image, label in zip(processed_images, labels)])

	for processed_image in processed_images:
	processed_image.set_shape([None, None, 3])

	if crop_height is not None and crop_width is not None:
	# Pad image and label to have dimensions >= [crop_height, crop_width].
	image_shape = tf.shape(processed_images[0])
	image_height = image_shape[0]
	image_width = image_shape[1]

	target_height = image_height + tf.maximum(crop_height - image_height, 0)
	target_width = image_width + tf.maximum(crop_width - image_width, 0)

	# Pad image with mean pixel value.
	mean_pixel = tf.reshape(
	feature_extractor.mean_pixel(model_variant), [1, 1, 3])
	processed_images = [preprocess_utils.pad_to_bounding_box(
	processed_image, 0, 0, target_height, target_width, mean_pixel)
	for processed_image in processed_images]

	if labels is not None:
	labels = [preprocess_utils.pad_to_bounding_box(
	label, 0, 0, target_height, target_width, ignore_label)
	for label in labels]

	# Randomly crop the images and labels.
	if is_training and labels is not None:
	cropped = preprocess_utils.random_crop(
	processed_images + labels, crop_height, crop_width)
	assert len(cropped) == 2 * num_imgs
	processed_images = cropped[:num_imgs]
	labels = cropped[num_imgs:]

	for processed_image in processed_images:
	processed_image.set_shape([crop_height, crop_width, 3])

	if labels is not None:
	for label in labels:
	label.set_shape([crop_height, crop_width, 1])

	if is_training:
	# Randomly left-right flip the image and label.
	res = preprocess_utils.flip_dim(
	list(processed_images + labels), _PROB_OF_FLIP, dim=1)
	maybe_flipped = res[:-1]
	assert len(maybe_flipped) == 2 * num_imgs
	processed_images = maybe_flipped[:num_imgs]
	labels = maybe_flipped[num_imgs:]

	return original_images, processed_images, labels