Spaces:

NCTCMumbai
/

NCTC

Running

App Files Files Community

NCTC / models /research /cognitive_planning /preprocessing /inception_preprocessing.py

NCTCMumbai

Upload 2571 files

0b8359d over 1 year ago

raw

history blame

14.1 kB

	# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================
	"""Provides utilities to preprocess images for the Inception networks."""

	from __future__ import absolute_import
	from __future__ import division
	from __future__ import print_function

	import tensorflow as tf

	from tensorflow.python.ops import control_flow_ops


	def apply_with_random_selector(x, func, num_cases):
	"""Computes func(x, sel), with sel sampled from [0...num_cases-1].

	Args:
	x: input Tensor.
	func: Python function to apply.
	num_cases: Python int32, number of cases to sample sel from.

	Returns:
	The result of func(x, sel), where func receives the value of the
	selector as a python integer, but sel is sampled dynamically.
	"""
	sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
	# Pass the real x only to one of the func calls.
	return control_flow_ops.merge([
	func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
	for case in range(num_cases)])[0]


	def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
	"""Distort the color of a Tensor image.

	Each color distortion is non-commutative and thus ordering of the color ops
	matters. Ideally we would randomly permute the ordering of the color ops.
	Rather then adding that level of complication, we select a distinct ordering
	of color ops for each preprocessing thread.

	Args:
	image: 3-D Tensor containing single image in [0, 1].
	color_ordering: Python int, a type of distortion (valid values: 0-3).
	fast_mode: Avoids slower ops (random_hue and random_contrast)
	scope: Optional scope for name_scope.
	Returns:
	3-D Tensor color-distorted image on range [0, 1]
	Raises:
	ValueError: if color_ordering not in [0, 3]
	"""
	with tf.name_scope(scope, 'distort_color', [image]):
	if fast_mode:
	if color_ordering == 0:
	image = tf.image.random_brightness(image, max_delta=32. / 255.)
	image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
	else:
	image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
	image = tf.image.random_brightness(image, max_delta=32. / 255.)
	else:
	if color_ordering == 0:
	image = tf.image.random_brightness(image, max_delta=32. / 255.)
	image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
	image = tf.image.random_hue(image, max_delta=0.2)
	image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
	elif color_ordering == 1:
	image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
	image = tf.image.random_brightness(image, max_delta=32. / 255.)
	image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
	image = tf.image.random_hue(image, max_delta=0.2)
	elif color_ordering == 2:
	image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
	image = tf.image.random_hue(image, max_delta=0.2)
	image = tf.image.random_brightness(image, max_delta=32. / 255.)
	image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
	elif color_ordering == 3:
	image = tf.image.random_hue(image, max_delta=0.2)
	image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
	image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
	image = tf.image.random_brightness(image, max_delta=32. / 255.)
	else:
	raise ValueError('color_ordering must be in [0, 3]')

	# The random_* ops do not necessarily clamp.
	return tf.clip_by_value(image, 0.0, 1.0)


	def distorted_bounding_box_crop(image,
	bbox,
	min_object_covered=0.1,
	aspect_ratio_range=(0.75, 1.33),
	area_range=(0.05, 1.0),
	max_attempts=100,
	scope=None):
	"""Generates cropped_image using a one of the bboxes randomly distorted.

	See `tf.image.sample_distorted_bounding_box` for more documentation.

	Args:
	image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
	bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
	where each coordinate is [0, 1) and the coordinates are arranged
	as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
	image.
	min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
	area of the image must contain at least this fraction of any bounding box
	supplied.
	aspect_ratio_range: An optional list of `floats`. The cropped area of the
	image must have an aspect ratio = width / height within this range.
	area_range: An optional list of `floats`. The cropped area of the image
	must contain a fraction of the supplied image within in this range.
	max_attempts: An optional `int`. Number of attempts at generating a cropped
	region of the image of the specified constraints. After `max_attempts`
	failures, return the entire image.
	scope: Optional scope for name_scope.
	Returns:
	A tuple, a 3-D Tensor cropped_image and the distorted bbox
	"""
	with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]):
	# Each bounding box has shape [1, num_boxes, box coords] and
	# the coordinates are ordered [ymin, xmin, ymax, xmax].

	# A large fraction of image datasets contain a human-annotated bounding
	# box delineating the region of the image containing the object of interest.
	# We choose to create a new bounding box for the object which is a randomly
	# distorted version of the human-annotated bounding box that obeys an
	# allowed range of aspect ratios, sizes and overlap with the human-annotated
	# bounding box. If no box is supplied, then we assume the bounding box is
	# the entire image.
	sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
	tf.shape(image),
	bounding_boxes=bbox,
	min_object_covered=min_object_covered,
	aspect_ratio_range=aspect_ratio_range,
	area_range=area_range,
	max_attempts=max_attempts,
	use_image_if_no_bounding_boxes=True)
	bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box

	# Crop the image to the specified bounding box.
	cropped_image = tf.slice(image, bbox_begin, bbox_size)
	return cropped_image, distort_bbox


	def preprocess_for_train(image, height, width, bbox,
	fast_mode=True,
	scope=None,
	add_image_summaries=True):
	"""Distort one image for training a network.

	Distorting images provides a useful technique for augmenting the data
	set during training in order to make the network invariant to aspects
	of the image that do not effect the label.

	Additionally it would create image_summaries to display the different
	transformations applied to the image.

	Args:
	image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
	[0, 1], otherwise it would converted to tf.float32 assuming that the range
	is [0, MAX], where MAX is largest positive representable number for
	int(8/16/32) data type (see `tf.image.convert_image_dtype` for details).
	height: integer
	width: integer
	bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
	where each coordinate is [0, 1) and the coordinates are arranged
	as [ymin, xmin, ymax, xmax].
	fast_mode: Optional boolean, if True avoids slower transformations (i.e.
	bi-cubic resizing, random_hue or random_contrast).
	scope: Optional scope for name_scope.
	add_image_summaries: Enable image summaries.
	Returns:
	3-D float Tensor of distorted image used for training with range [-1, 1].
	"""
	with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]):
	if bbox is None:
	bbox = tf.constant([0.0, 0.0, 1.0, 1.0],
	dtype=tf.float32,
	shape=[1, 1, 4])
	if image.dtype != tf.float32:
	image = tf.image.convert_image_dtype(image, dtype=tf.float32)
	# Each bounding box has shape [1, num_boxes, box coords] and
	# the coordinates are ordered [ymin, xmin, ymax, xmax].
	image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
	bbox)
	if add_image_summaries:
	tf.summary.image('image_with_bounding_boxes', image_with_box)

	distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox)
	# Restore the shape since the dynamic slice based upon the bbox_size loses
	# the third dimension.
	distorted_image.set_shape([None, None, 3])
	image_with_distorted_box = tf.image.draw_bounding_boxes(
	tf.expand_dims(image, 0), distorted_bbox)
	if add_image_summaries:
	tf.summary.image('images_with_distorted_bounding_box',
	image_with_distorted_box)

	# This resizing operation may distort the images because the aspect
	# ratio is not respected. We select a resize method in a round robin
	# fashion based on the thread number.
	# Note that ResizeMethod contains 4 enumerated resizing methods.

	# We select only 1 case for fast_mode bilinear.
	num_resize_cases = 1 if fast_mode else 4
	distorted_image = apply_with_random_selector(
	distorted_image,
	lambda x, method: tf.image.resize_images(x, [height, width], method),
	num_cases=num_resize_cases)

	if add_image_summaries:
	tf.summary.image('cropped_resized_image',
	tf.expand_dims(distorted_image, 0))

	# Randomly flip the image horizontally.
	distorted_image = tf.image.random_flip_left_right(distorted_image)

	# Randomly distort the colors. There are 1 or 4 ways to do it.
	num_distort_cases = 1 if fast_mode else 4
	distorted_image = apply_with_random_selector(
	distorted_image,
	lambda x, ordering: distort_color(x, ordering, fast_mode),
	num_cases=num_distort_cases)

	if add_image_summaries:
	tf.summary.image('final_distorted_image',
	tf.expand_dims(distorted_image, 0))
	distorted_image = tf.subtract(distorted_image, 0.5)
	distorted_image = tf.multiply(distorted_image, 2.0)
	return distorted_image


	def preprocess_for_eval(image, height, width,
	central_fraction=0.875, scope=None):
	"""Prepare one image for evaluation.

	If height and width are specified it would output an image with that size by
	applying resize_bilinear.

	If central_fraction is specified it would crop the central fraction of the
	input image.

	Args:
	image: 3-D Tensor of image. If dtype is tf.float32 then the range should be
	[0, 1], otherwise it would converted to tf.float32 assuming that the range
	is [0, MAX], where MAX is largest positive representable number for
	int(8/16/32) data type (see `tf.image.convert_image_dtype` for details).
	height: integer
	width: integer
	central_fraction: Optional Float, fraction of the image to crop.
	scope: Optional scope for name_scope.
	Returns:
	3-D float Tensor of prepared image.
	"""
	with tf.name_scope(scope, 'eval_image', [image, height, width]):
	if image.dtype != tf.float32:
	image = tf.image.convert_image_dtype(image, dtype=tf.float32)
	# Crop the central region of the image with an area containing 87.5% of
	# the original image.
	if central_fraction:
	image = tf.image.central_crop(image, central_fraction=central_fraction)

	if height and width:
	# Resize the image to the specified height and width.
	image = tf.expand_dims(image, 0)
	image = tf.image.resize_bilinear(image, [height, width],
	align_corners=False)
	image = tf.squeeze(image, [0])
	image = tf.subtract(image, 0.5)
	image = tf.multiply(image, 2.0)
	return image


	def preprocess_image(image, height, width,
	is_training=False,
	bbox=None,
	fast_mode=True,
	add_image_summaries=True):
	"""Pre-process one image for training or evaluation.

	Args:
	image: 3-D Tensor [height, width, channels] with the image. If dtype is
	tf.float32 then the range should be [0, 1], otherwise it would converted
	to tf.float32 assuming that the range is [0, MAX], where MAX is largest
	positive representable number for int(8/16/32) data type (see
	`tf.image.convert_image_dtype` for details).
	height: integer, image expected height.
	width: integer, image expected width.
	is_training: Boolean. If true it would transform an image for train,
	otherwise it would transform it for evaluation.
	bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
	where each coordinate is [0, 1) and the coordinates are arranged as
	[ymin, xmin, ymax, xmax].
	fast_mode: Optional boolean, if True avoids slower transformations.
	add_image_summaries: Enable image summaries.

	Returns:
	3-D float Tensor containing an appropriately scaled image

	Raises:
	ValueError: if user does not provide bounding box
	"""
	if is_training:
	return preprocess_for_train(image, height, width, bbox, fast_mode,
	add_image_summaries=add_image_summaries)
	else:
	return preprocess_for_eval(image, height, width)