Spaces:
Running
Running
# Copyright 2016 The TensorFlow Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
"""Provides utilities to preprocess images for the Inception networks.""" | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import tensorflow as tf | |
from tensorflow.python.ops import control_flow_ops | |
def apply_with_random_selector(x, func, num_cases): | |
"""Computes func(x, sel), with sel sampled from [0...num_cases-1]. | |
Args: | |
x: input Tensor. | |
func: Python function to apply. | |
num_cases: Python int32, number of cases to sample sel from. | |
Returns: | |
The result of func(x, sel), where func receives the value of the | |
selector as a python integer, but sel is sampled dynamically. | |
""" | |
sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32) | |
# Pass the real x only to one of the func calls. | |
return control_flow_ops.merge([ | |
func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case) | |
for case in range(num_cases)])[0] | |
def distort_color(image, color_ordering=0, fast_mode=True, scope=None): | |
"""Distort the color of a Tensor image. | |
Each color distortion is non-commutative and thus ordering of the color ops | |
matters. Ideally we would randomly permute the ordering of the color ops. | |
Rather then adding that level of complication, we select a distinct ordering | |
of color ops for each preprocessing thread. | |
Args: | |
image: 3-D Tensor containing single image in [0, 1]. | |
color_ordering: Python int, a type of distortion (valid values: 0-3). | |
fast_mode: Avoids slower ops (random_hue and random_contrast) | |
scope: Optional scope for name_scope. | |
Returns: | |
3-D Tensor color-distorted image on range [0, 1] | |
Raises: | |
ValueError: if color_ordering not in [0, 3] | |
""" | |
with tf.name_scope(scope, 'distort_color', [image]): | |
if fast_mode: | |
if color_ordering == 0: | |
image = tf.image.random_brightness(image, max_delta=32. / 255.) | |
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) | |
else: | |
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) | |
image = tf.image.random_brightness(image, max_delta=32. / 255.) | |
else: | |
if color_ordering == 0: | |
image = tf.image.random_brightness(image, max_delta=32. / 255.) | |
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) | |
image = tf.image.random_hue(image, max_delta=0.2) | |
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) | |
elif color_ordering == 1: | |
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) | |
image = tf.image.random_brightness(image, max_delta=32. / 255.) | |
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) | |
image = tf.image.random_hue(image, max_delta=0.2) | |
elif color_ordering == 2: | |
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) | |
image = tf.image.random_hue(image, max_delta=0.2) | |
image = tf.image.random_brightness(image, max_delta=32. / 255.) | |
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) | |
elif color_ordering == 3: | |
image = tf.image.random_hue(image, max_delta=0.2) | |
image = tf.image.random_saturation(image, lower=0.5, upper=1.5) | |
image = tf.image.random_contrast(image, lower=0.5, upper=1.5) | |
image = tf.image.random_brightness(image, max_delta=32. / 255.) | |
else: | |
raise ValueError('color_ordering must be in [0, 3]') | |
# The random_* ops do not necessarily clamp. | |
return tf.clip_by_value(image, 0.0, 1.0) | |
def distorted_bounding_box_crop(image, | |
bbox, | |
min_object_covered=0.1, | |
aspect_ratio_range=(0.75, 1.33), | |
area_range=(0.05, 1.0), | |
max_attempts=100, | |
scope=None): | |
"""Generates cropped_image using a one of the bboxes randomly distorted. | |
See `tf.image.sample_distorted_bounding_box` for more documentation. | |
Args: | |
image: 3-D Tensor of image (it will be converted to floats in [0, 1]). | |
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] | |
where each coordinate is [0, 1) and the coordinates are arranged | |
as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole | |
image. | |
min_object_covered: An optional `float`. Defaults to `0.1`. The cropped | |
area of the image must contain at least this fraction of any bounding box | |
supplied. | |
aspect_ratio_range: An optional list of `floats`. The cropped area of the | |
image must have an aspect ratio = width / height within this range. | |
area_range: An optional list of `floats`. The cropped area of the image | |
must contain a fraction of the supplied image within in this range. | |
max_attempts: An optional `int`. Number of attempts at generating a cropped | |
region of the image of the specified constraints. After `max_attempts` | |
failures, return the entire image. | |
scope: Optional scope for name_scope. | |
Returns: | |
A tuple, a 3-D Tensor cropped_image and the distorted bbox | |
""" | |
with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]): | |
# Each bounding box has shape [1, num_boxes, box coords] and | |
# the coordinates are ordered [ymin, xmin, ymax, xmax]. | |
# A large fraction of image datasets contain a human-annotated bounding | |
# box delineating the region of the image containing the object of interest. | |
# We choose to create a new bounding box for the object which is a randomly | |
# distorted version of the human-annotated bounding box that obeys an | |
# allowed range of aspect ratios, sizes and overlap with the human-annotated | |
# bounding box. If no box is supplied, then we assume the bounding box is | |
# the entire image. | |
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box( | |
tf.shape(image), | |
bounding_boxes=bbox, | |
min_object_covered=min_object_covered, | |
aspect_ratio_range=aspect_ratio_range, | |
area_range=area_range, | |
max_attempts=max_attempts, | |
use_image_if_no_bounding_boxes=True) | |
bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box | |
# Crop the image to the specified bounding box. | |
cropped_image = tf.slice(image, bbox_begin, bbox_size) | |
return cropped_image, distort_bbox | |
def preprocess_for_train(image, height, width, bbox, | |
fast_mode=True, | |
scope=None, | |
add_image_summaries=True): | |
"""Distort one image for training a network. | |
Distorting images provides a useful technique for augmenting the data | |
set during training in order to make the network invariant to aspects | |
of the image that do not effect the label. | |
Additionally it would create image_summaries to display the different | |
transformations applied to the image. | |
Args: | |
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be | |
[0, 1], otherwise it would converted to tf.float32 assuming that the range | |
is [0, MAX], where MAX is largest positive representable number for | |
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). | |
height: integer | |
width: integer | |
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] | |
where each coordinate is [0, 1) and the coordinates are arranged | |
as [ymin, xmin, ymax, xmax]. | |
fast_mode: Optional boolean, if True avoids slower transformations (i.e. | |
bi-cubic resizing, random_hue or random_contrast). | |
scope: Optional scope for name_scope. | |
add_image_summaries: Enable image summaries. | |
Returns: | |
3-D float Tensor of distorted image used for training with range [-1, 1]. | |
""" | |
with tf.name_scope(scope, 'distort_image', [image, height, width, bbox]): | |
if bbox is None: | |
bbox = tf.constant([0.0, 0.0, 1.0, 1.0], | |
dtype=tf.float32, | |
shape=[1, 1, 4]) | |
if image.dtype != tf.float32: | |
image = tf.image.convert_image_dtype(image, dtype=tf.float32) | |
# Each bounding box has shape [1, num_boxes, box coords] and | |
# the coordinates are ordered [ymin, xmin, ymax, xmax]. | |
image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0), | |
bbox) | |
if add_image_summaries: | |
tf.summary.image('image_with_bounding_boxes', image_with_box) | |
distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox) | |
# Restore the shape since the dynamic slice based upon the bbox_size loses | |
# the third dimension. | |
distorted_image.set_shape([None, None, 3]) | |
image_with_distorted_box = tf.image.draw_bounding_boxes( | |
tf.expand_dims(image, 0), distorted_bbox) | |
if add_image_summaries: | |
tf.summary.image('images_with_distorted_bounding_box', | |
image_with_distorted_box) | |
# This resizing operation may distort the images because the aspect | |
# ratio is not respected. We select a resize method in a round robin | |
# fashion based on the thread number. | |
# Note that ResizeMethod contains 4 enumerated resizing methods. | |
# We select only 1 case for fast_mode bilinear. | |
num_resize_cases = 1 if fast_mode else 4 | |
distorted_image = apply_with_random_selector( | |
distorted_image, | |
lambda x, method: tf.image.resize_images(x, [height, width], method), | |
num_cases=num_resize_cases) | |
if add_image_summaries: | |
tf.summary.image('cropped_resized_image', | |
tf.expand_dims(distorted_image, 0)) | |
# Randomly flip the image horizontally. | |
distorted_image = tf.image.random_flip_left_right(distorted_image) | |
# Randomly distort the colors. There are 1 or 4 ways to do it. | |
num_distort_cases = 1 if fast_mode else 4 | |
distorted_image = apply_with_random_selector( | |
distorted_image, | |
lambda x, ordering: distort_color(x, ordering, fast_mode), | |
num_cases=num_distort_cases) | |
if add_image_summaries: | |
tf.summary.image('final_distorted_image', | |
tf.expand_dims(distorted_image, 0)) | |
distorted_image = tf.subtract(distorted_image, 0.5) | |
distorted_image = tf.multiply(distorted_image, 2.0) | |
return distorted_image | |
def preprocess_for_eval(image, height, width, | |
central_fraction=0.875, scope=None): | |
"""Prepare one image for evaluation. | |
If height and width are specified it would output an image with that size by | |
applying resize_bilinear. | |
If central_fraction is specified it would crop the central fraction of the | |
input image. | |
Args: | |
image: 3-D Tensor of image. If dtype is tf.float32 then the range should be | |
[0, 1], otherwise it would converted to tf.float32 assuming that the range | |
is [0, MAX], where MAX is largest positive representable number for | |
int(8/16/32) data type (see `tf.image.convert_image_dtype` for details). | |
height: integer | |
width: integer | |
central_fraction: Optional Float, fraction of the image to crop. | |
scope: Optional scope for name_scope. | |
Returns: | |
3-D float Tensor of prepared image. | |
""" | |
with tf.name_scope(scope, 'eval_image', [image, height, width]): | |
if image.dtype != tf.float32: | |
image = tf.image.convert_image_dtype(image, dtype=tf.float32) | |
# Crop the central region of the image with an area containing 87.5% of | |
# the original image. | |
if central_fraction: | |
image = tf.image.central_crop(image, central_fraction=central_fraction) | |
if height and width: | |
# Resize the image to the specified height and width. | |
image = tf.expand_dims(image, 0) | |
image = tf.image.resize_bilinear(image, [height, width], | |
align_corners=False) | |
image = tf.squeeze(image, [0]) | |
image = tf.subtract(image, 0.5) | |
image = tf.multiply(image, 2.0) | |
return image | |
def preprocess_image(image, height, width, | |
is_training=False, | |
bbox=None, | |
fast_mode=True, | |
add_image_summaries=True): | |
"""Pre-process one image for training or evaluation. | |
Args: | |
image: 3-D Tensor [height, width, channels] with the image. If dtype is | |
tf.float32 then the range should be [0, 1], otherwise it would converted | |
to tf.float32 assuming that the range is [0, MAX], where MAX is largest | |
positive representable number for int(8/16/32) data type (see | |
`tf.image.convert_image_dtype` for details). | |
height: integer, image expected height. | |
width: integer, image expected width. | |
is_training: Boolean. If true it would transform an image for train, | |
otherwise it would transform it for evaluation. | |
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] | |
where each coordinate is [0, 1) and the coordinates are arranged as | |
[ymin, xmin, ymax, xmax]. | |
fast_mode: Optional boolean, if True avoids slower transformations. | |
add_image_summaries: Enable image summaries. | |
Returns: | |
3-D float Tensor containing an appropriately scaled image | |
Raises: | |
ValueError: if user does not provide bounding box | |
""" | |
if is_training: | |
return preprocess_for_train(image, height, width, bbox, fast_mode, | |
add_image_summaries=add_image_summaries) | |
else: | |
return preprocess_for_eval(image, height, width) | |