Spaces:
Running
Running
# Lint as: python2, python3 | |
# Copyright 2018 The TensorFlow Authors All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
"""Utility functions related to preprocessing inputs.""" | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
from six.moves import range | |
from six.moves import zip | |
import tensorflow as tf | |
def flip_dim(tensor_list, prob=0.5, dim=1): | |
"""Randomly flips a dimension of the given tensor. | |
The decision to randomly flip the `Tensors` is made together. In other words, | |
all or none of the images pass in are flipped. | |
Note that tf.random_flip_left_right and tf.random_flip_up_down isn't used so | |
that we can control for the probability as well as ensure the same decision | |
is applied across the images. | |
Args: | |
tensor_list: A list of `Tensors` with the same number of dimensions. | |
prob: The probability of a left-right flip. | |
dim: The dimension to flip, 0, 1, .. | |
Returns: | |
outputs: A list of the possibly flipped `Tensors` as well as an indicator | |
`Tensor` at the end whose value is `True` if the inputs were flipped and | |
`False` otherwise. | |
Raises: | |
ValueError: If dim is negative or greater than the dimension of a `Tensor`. | |
""" | |
random_value = tf.random_uniform([]) | |
def flip(): | |
flipped = [] | |
for tensor in tensor_list: | |
if dim < 0 or dim >= len(tensor.get_shape().as_list()): | |
raise ValueError('dim must represent a valid dimension.') | |
flipped.append(tf.reverse_v2(tensor, [dim])) | |
return flipped | |
is_flipped = tf.less_equal(random_value, prob) | |
outputs = tf.cond(is_flipped, flip, lambda: tensor_list) | |
if not isinstance(outputs, (list, tuple)): | |
outputs = [outputs] | |
outputs.append(is_flipped) | |
return outputs | |
def _image_dimensions(image, rank): | |
"""Returns the dimensions of an image tensor. | |
Args: | |
image: A rank-D Tensor. For 3-D of shape: `[height, width, channels]`. | |
rank: The expected rank of the image | |
Returns: | |
A list of corresponding to the dimensions of the input image. Dimensions | |
that are statically known are python integers, otherwise they are integer | |
scalar tensors. | |
""" | |
if image.get_shape().is_fully_defined(): | |
return image.get_shape().as_list() | |
else: | |
static_shape = image.get_shape().with_rank(rank).as_list() | |
dynamic_shape = tf.unstack(tf.shape(image), rank) | |
return [ | |
s if s is not None else d for s, d in zip(static_shape, dynamic_shape) | |
] | |
def get_label_resize_method(label): | |
"""Returns the resize method of labels depending on label dtype. | |
Args: | |
label: Groundtruth label tensor. | |
Returns: | |
tf.image.ResizeMethod.BILINEAR, if label dtype is floating. | |
tf.image.ResizeMethod.NEAREST_NEIGHBOR, if label dtype is integer. | |
Raises: | |
ValueError: If label is neither floating nor integer. | |
""" | |
if label.dtype.is_floating: | |
return tf.image.ResizeMethod.BILINEAR | |
elif label.dtype.is_integer: | |
return tf.image.ResizeMethod.NEAREST_NEIGHBOR | |
else: | |
raise ValueError('Label type must be either floating or integer.') | |
def pad_to_bounding_box(image, offset_height, offset_width, target_height, | |
target_width, pad_value): | |
"""Pads the given image with the given pad_value. | |
Works like tf.image.pad_to_bounding_box, except it can pad the image | |
with any given arbitrary pad value and also handle images whose sizes are not | |
known during graph construction. | |
Args: | |
image: 3-D tensor with shape [height, width, channels] | |
offset_height: Number of rows of zeros to add on top. | |
offset_width: Number of columns of zeros to add on the left. | |
target_height: Height of output image. | |
target_width: Width of output image. | |
pad_value: Value to pad the image tensor with. | |
Returns: | |
3-D tensor of shape [target_height, target_width, channels]. | |
Raises: | |
ValueError: If the shape of image is incompatible with the offset_* or | |
target_* arguments. | |
""" | |
with tf.name_scope(None, 'pad_to_bounding_box', [image]): | |
image = tf.convert_to_tensor(image, name='image') | |
original_dtype = image.dtype | |
if original_dtype != tf.float32 and original_dtype != tf.float64: | |
# If image dtype is not float, we convert it to int32 to avoid overflow. | |
image = tf.cast(image, tf.int32) | |
image_rank_assert = tf.Assert( | |
tf.logical_or( | |
tf.equal(tf.rank(image), 3), | |
tf.equal(tf.rank(image), 4)), | |
['Wrong image tensor rank.']) | |
with tf.control_dependencies([image_rank_assert]): | |
image -= pad_value | |
image_shape = image.get_shape() | |
is_batch = True | |
if image_shape.ndims == 3: | |
is_batch = False | |
image = tf.expand_dims(image, 0) | |
elif image_shape.ndims is None: | |
is_batch = False | |
image = tf.expand_dims(image, 0) | |
image.set_shape([None] * 4) | |
elif image.get_shape().ndims != 4: | |
raise ValueError('Input image must have either 3 or 4 dimensions.') | |
_, height, width, _ = _image_dimensions(image, rank=4) | |
target_width_assert = tf.Assert( | |
tf.greater_equal( | |
target_width, width), | |
['target_width must be >= width']) | |
target_height_assert = tf.Assert( | |
tf.greater_equal(target_height, height), | |
['target_height must be >= height']) | |
with tf.control_dependencies([target_width_assert]): | |
after_padding_width = target_width - offset_width - width | |
with tf.control_dependencies([target_height_assert]): | |
after_padding_height = target_height - offset_height - height | |
offset_assert = tf.Assert( | |
tf.logical_and( | |
tf.greater_equal(after_padding_width, 0), | |
tf.greater_equal(after_padding_height, 0)), | |
['target size not possible with the given target offsets']) | |
batch_params = tf.stack([0, 0]) | |
height_params = tf.stack([offset_height, after_padding_height]) | |
width_params = tf.stack([offset_width, after_padding_width]) | |
channel_params = tf.stack([0, 0]) | |
with tf.control_dependencies([offset_assert]): | |
paddings = tf.stack([batch_params, height_params, width_params, | |
channel_params]) | |
padded = tf.pad(image, paddings) | |
if not is_batch: | |
padded = tf.squeeze(padded, axis=[0]) | |
outputs = padded + pad_value | |
if outputs.dtype != original_dtype: | |
outputs = tf.cast(outputs, original_dtype) | |
return outputs | |
def _crop(image, offset_height, offset_width, crop_height, crop_width): | |
"""Crops the given image using the provided offsets and sizes. | |
Note that the method doesn't assume we know the input image size but it does | |
assume we know the input image rank. | |
Args: | |
image: an image of shape [height, width, channels]. | |
offset_height: a scalar tensor indicating the height offset. | |
offset_width: a scalar tensor indicating the width offset. | |
crop_height: the height of the cropped image. | |
crop_width: the width of the cropped image. | |
Returns: | |
The cropped (and resized) image. | |
Raises: | |
ValueError: if `image` doesn't have rank of 3. | |
InvalidArgumentError: if the rank is not 3 or if the image dimensions are | |
less than the crop size. | |
""" | |
original_shape = tf.shape(image) | |
if len(image.get_shape().as_list()) != 3: | |
raise ValueError('input must have rank of 3') | |
original_channels = image.get_shape().as_list()[2] | |
rank_assertion = tf.Assert( | |
tf.equal(tf.rank(image), 3), | |
['Rank of image must be equal to 3.']) | |
with tf.control_dependencies([rank_assertion]): | |
cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]]) | |
size_assertion = tf.Assert( | |
tf.logical_and( | |
tf.greater_equal(original_shape[0], crop_height), | |
tf.greater_equal(original_shape[1], crop_width)), | |
['Crop size greater than the image size.']) | |
offsets = tf.cast(tf.stack([offset_height, offset_width, 0]), tf.int32) | |
# Use tf.slice instead of crop_to_bounding box as it accepts tensors to | |
# define the crop size. | |
with tf.control_dependencies([size_assertion]): | |
image = tf.slice(image, offsets, cropped_shape) | |
image = tf.reshape(image, cropped_shape) | |
image.set_shape([crop_height, crop_width, original_channels]) | |
return image | |
def random_crop(image_list, crop_height, crop_width): | |
"""Crops the given list of images. | |
The function applies the same crop to each image in the list. This can be | |
effectively applied when there are multiple image inputs of the same | |
dimension such as: | |
image, depths, normals = random_crop([image, depths, normals], 120, 150) | |
Args: | |
image_list: a list of image tensors of the same dimension but possibly | |
varying channel. | |
crop_height: the new height. | |
crop_width: the new width. | |
Returns: | |
the image_list with cropped images. | |
Raises: | |
ValueError: if there are multiple image inputs provided with different size | |
or the images are smaller than the crop dimensions. | |
""" | |
if not image_list: | |
raise ValueError('Empty image_list.') | |
# Compute the rank assertions. | |
rank_assertions = [] | |
for i in range(len(image_list)): | |
image_rank = tf.rank(image_list[i]) | |
rank_assert = tf.Assert( | |
tf.equal(image_rank, 3), | |
['Wrong rank for tensor %s [expected] [actual]', | |
image_list[i].name, 3, image_rank]) | |
rank_assertions.append(rank_assert) | |
with tf.control_dependencies([rank_assertions[0]]): | |
image_shape = tf.shape(image_list[0]) | |
image_height = image_shape[0] | |
image_width = image_shape[1] | |
crop_size_assert = tf.Assert( | |
tf.logical_and( | |
tf.greater_equal(image_height, crop_height), | |
tf.greater_equal(image_width, crop_width)), | |
['Crop size greater than the image size.']) | |
asserts = [rank_assertions[0], crop_size_assert] | |
for i in range(1, len(image_list)): | |
image = image_list[i] | |
asserts.append(rank_assertions[i]) | |
with tf.control_dependencies([rank_assertions[i]]): | |
shape = tf.shape(image) | |
height = shape[0] | |
width = shape[1] | |
height_assert = tf.Assert( | |
tf.equal(height, image_height), | |
['Wrong height for tensor %s [expected][actual]', | |
image.name, height, image_height]) | |
width_assert = tf.Assert( | |
tf.equal(width, image_width), | |
['Wrong width for tensor %s [expected][actual]', | |
image.name, width, image_width]) | |
asserts.extend([height_assert, width_assert]) | |
# Create a random bounding box. | |
# | |
# Use tf.random_uniform and not numpy.random.rand as doing the former would | |
# generate random numbers at graph eval time, unlike the latter which | |
# generates random numbers at graph definition time. | |
with tf.control_dependencies(asserts): | |
max_offset_height = tf.reshape(image_height - crop_height + 1, []) | |
max_offset_width = tf.reshape(image_width - crop_width + 1, []) | |
offset_height = tf.random_uniform( | |
[], maxval=max_offset_height, dtype=tf.int32) | |
offset_width = tf.random_uniform( | |
[], maxval=max_offset_width, dtype=tf.int32) | |
return [_crop(image, offset_height, offset_width, | |
crop_height, crop_width) for image in image_list] | |
def get_random_scale(min_scale_factor, max_scale_factor, step_size): | |
"""Gets a random scale value. | |
Args: | |
min_scale_factor: Minimum scale value. | |
max_scale_factor: Maximum scale value. | |
step_size: The step size from minimum to maximum value. | |
Returns: | |
A random scale value selected between minimum and maximum value. | |
Raises: | |
ValueError: min_scale_factor has unexpected value. | |
""" | |
if min_scale_factor < 0 or min_scale_factor > max_scale_factor: | |
raise ValueError('Unexpected value of min_scale_factor.') | |
if min_scale_factor == max_scale_factor: | |
return tf.cast(min_scale_factor, tf.float32) | |
# When step_size = 0, we sample the value uniformly from [min, max). | |
if step_size == 0: | |
return tf.random_uniform([1], | |
minval=min_scale_factor, | |
maxval=max_scale_factor) | |
# When step_size != 0, we randomly select one discrete value from [min, max]. | |
num_steps = int((max_scale_factor - min_scale_factor) / step_size + 1) | |
scale_factors = tf.lin_space(min_scale_factor, max_scale_factor, num_steps) | |
shuffled_scale_factors = tf.random_shuffle(scale_factors) | |
return shuffled_scale_factors[0] | |
def randomly_scale_image_and_label(image, label=None, scale=1.0): | |
"""Randomly scales image and label. | |
Args: | |
image: Image with shape [height, width, 3]. | |
label: Label with shape [height, width, 1]. | |
scale: The value to scale image and label. | |
Returns: | |
Scaled image and label. | |
""" | |
# No random scaling if scale == 1. | |
if scale == 1.0: | |
return image, label | |
image_shape = tf.shape(image) | |
new_dim = tf.cast( | |
tf.cast([image_shape[0], image_shape[1]], tf.float32) * scale, | |
tf.int32) | |
# Need squeeze and expand_dims because image interpolation takes | |
# 4D tensors as input. | |
image = tf.squeeze(tf.image.resize_bilinear( | |
tf.expand_dims(image, 0), | |
new_dim, | |
align_corners=True), [0]) | |
if label is not None: | |
label = tf.image.resize( | |
label, | |
new_dim, | |
method=get_label_resize_method(label), | |
align_corners=True) | |
return image, label | |
def resolve_shape(tensor, rank=None, scope=None): | |
"""Fully resolves the shape of a Tensor. | |
Use as much as possible the shape components already known during graph | |
creation and resolve the remaining ones during runtime. | |
Args: | |
tensor: Input tensor whose shape we query. | |
rank: The rank of the tensor, provided that we know it. | |
scope: Optional name scope. | |
Returns: | |
shape: The full shape of the tensor. | |
""" | |
with tf.name_scope(scope, 'resolve_shape', [tensor]): | |
if rank is not None: | |
shape = tensor.get_shape().with_rank(rank).as_list() | |
else: | |
shape = tensor.get_shape().as_list() | |
if None in shape: | |
shape_dynamic = tf.shape(tensor) | |
for i in range(len(shape)): | |
if shape[i] is None: | |
shape[i] = shape_dynamic[i] | |
return shape | |
def resize_to_range(image, | |
label=None, | |
min_size=None, | |
max_size=None, | |
factor=None, | |
keep_aspect_ratio=True, | |
align_corners=True, | |
label_layout_is_chw=False, | |
scope=None, | |
method=tf.image.ResizeMethod.BILINEAR): | |
"""Resizes image or label so their sides are within the provided range. | |
The output size can be described by two cases: | |
1. If the image can be rescaled so its minimum size is equal to min_size | |
without the other side exceeding max_size, then do so. | |
2. Otherwise, resize so the largest side is equal to max_size. | |
An integer in `range(factor)` is added to the computed sides so that the | |
final dimensions are multiples of `factor` plus one. | |
Args: | |
image: A 3D tensor of shape [height, width, channels]. | |
label: (optional) A 3D tensor of shape [height, width, channels] (default) | |
or [channels, height, width] when label_layout_is_chw = True. | |
min_size: (scalar) desired size of the smaller image side. | |
max_size: (scalar) maximum allowed size of the larger image side. Note | |
that the output dimension is no larger than max_size and may be slightly | |
smaller than max_size when factor is not None. | |
factor: Make output size multiple of factor plus one. | |
keep_aspect_ratio: Boolean, keep aspect ratio or not. If True, the input | |
will be resized while keeping the original aspect ratio. If False, the | |
input will be resized to [max_resize_value, max_resize_value] without | |
keeping the original aspect ratio. | |
align_corners: If True, exactly align all 4 corners of input and output. | |
label_layout_is_chw: If true, the label has shape [channel, height, width]. | |
We support this case because for some instance segmentation dataset, the | |
instance segmentation is saved as [num_instances, height, width]. | |
scope: Optional name scope. | |
method: Image resize method. Defaults to tf.image.ResizeMethod.BILINEAR. | |
Returns: | |
A 3-D tensor of shape [new_height, new_width, channels], where the image | |
has been resized (with the specified method) so that | |
min(new_height, new_width) == ceil(min_size) or | |
max(new_height, new_width) == ceil(max_size). | |
Raises: | |
ValueError: If the image is not a 3D tensor. | |
""" | |
with tf.name_scope(scope, 'resize_to_range', [image]): | |
new_tensor_list = [] | |
min_size = tf.cast(min_size, tf.float32) | |
if max_size is not None: | |
max_size = tf.cast(max_size, tf.float32) | |
# Modify the max_size to be a multiple of factor plus 1 and make sure the | |
# max dimension after resizing is no larger than max_size. | |
if factor is not None: | |
max_size = (max_size - (max_size - 1) % factor) | |
[orig_height, orig_width, _] = resolve_shape(image, rank=3) | |
orig_height = tf.cast(orig_height, tf.float32) | |
orig_width = tf.cast(orig_width, tf.float32) | |
orig_min_size = tf.minimum(orig_height, orig_width) | |
# Calculate the larger of the possible sizes | |
large_scale_factor = min_size / orig_min_size | |
large_height = tf.cast(tf.floor(orig_height * large_scale_factor), tf.int32) | |
large_width = tf.cast(tf.floor(orig_width * large_scale_factor), tf.int32) | |
large_size = tf.stack([large_height, large_width]) | |
new_size = large_size | |
if max_size is not None: | |
# Calculate the smaller of the possible sizes, use that if the larger | |
# is too big. | |
orig_max_size = tf.maximum(orig_height, orig_width) | |
small_scale_factor = max_size / orig_max_size | |
small_height = tf.cast( | |
tf.floor(orig_height * small_scale_factor), tf.int32) | |
small_width = tf.cast(tf.floor(orig_width * small_scale_factor), tf.int32) | |
small_size = tf.stack([small_height, small_width]) | |
new_size = tf.cond( | |
tf.cast(tf.reduce_max(large_size), tf.float32) > max_size, | |
lambda: small_size, | |
lambda: large_size) | |
# Ensure that both output sides are multiples of factor plus one. | |
if factor is not None: | |
new_size += (factor - (new_size - 1) % factor) % factor | |
if not keep_aspect_ratio: | |
# If not keep the aspect ratio, we resize everything to max_size, allowing | |
# us to do pre-processing without extra padding. | |
new_size = [tf.reduce_max(new_size), tf.reduce_max(new_size)] | |
new_tensor_list.append(tf.image.resize( | |
image, new_size, method=method, align_corners=align_corners)) | |
if label is not None: | |
if label_layout_is_chw: | |
# Input label has shape [channel, height, width]. | |
resized_label = tf.expand_dims(label, 3) | |
resized_label = tf.image.resize( | |
resized_label, | |
new_size, | |
method=get_label_resize_method(label), | |
align_corners=align_corners) | |
resized_label = tf.squeeze(resized_label, 3) | |
else: | |
# Input label has shape [height, width, channel]. | |
resized_label = tf.image.resize( | |
label, | |
new_size, | |
method=get_label_resize_method(label), | |
align_corners=align_corners) | |
new_tensor_list.append(resized_label) | |
else: | |
new_tensor_list.append(None) | |
return new_tensor_list | |