File size: 6,415 Bytes
2252f3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
"""blob helper functions."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from six.moves import cPickle as pickle
import numpy as np
import cv2
from models.core.config import cfg
def get_image_blob(im, target_scale, target_max_size):
"""Convert an image into a network input.
Arguments:
im (ndarray): a color image in BGR order
Returns:
blob (ndarray): a data blob holding an image pyramid
im_scale (float): image scale (target size) / (original size)
im_info (ndarray)
"""
processed_im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, [target_scale], target_max_size)
blob = im_list_to_blob(processed_im)
# NOTE: this height and width may be larger than actual scaled input image
# due to the FPN.COARSEST_STRIDE related padding in im_list_to_blob. We are
# maintaining this behavior for now to make existing results exactly
# reproducible (in practice using the true input image height and width
# yields nearly the same results, but they are sometimes slightly different
# because predictions near the edge of the image will be pruned more
# aggressively).
height, width = blob.shape[2], blob.shape[3]
im_info = np.hstack((height, width, im_scale))[np.newaxis, :]
return blob, im_scale, im_info.astype(np.float32)
def im_list_to_blob(ims):
"""Convert a list of images into a network input. Assumes images were
prepared using prep_im_for_blob or equivalent: i.e.
- BGR channel order
- pixel means subtracted
- resized to the desired input size
- float32 numpy ndarray format
Output is a 4D HCHW tensor of the images concatenated along axis 0 with
shape.
"""
if not isinstance(ims, list):
ims = [ims]
max_shape = get_max_shape([im.shape[:2] for im in ims])
num_images = len(ims)
blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), dtype=np.float32)
for i in range(num_images):
im = ims[i]
blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
# Move channels (axis 3) to axis 1
# Axis order will become: (batch elem, channel, height, width)
channel_swap = (0, 3, 1, 2)
blob = blob.transpose(channel_swap)
return blob
def get_max_shape(im_shapes):
"""Calculate max spatial size (h, w) for batching given a list of image shapes
"""
max_shape = np.array(im_shapes).max(axis=0)
assert max_shape.size == 2
# Pad the image so they can be divisible by a stride
if cfg.FPN.FPN_ON:
stride = float(cfg.FPN.COARSEST_STRIDE)
max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride)
max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride)
return max_shape
def prep_im_for_blob(im, pixel_means, target_sizes, max_size):
"""Prepare an image for use as a network input blob. Specially:
- Subtract per-channel pixel mean
- Convert to float32
- Rescale to each of the specified target size (capped at max_size)
Returns a list of transformed images, one for each target size. Also returns
the scale factors that were used to compute each returned image.
"""
im = im.astype(np.float32, copy=False)
im -= pixel_means
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
ims = []
im_scales = []
for target_size in target_sizes:
im_scale = get_target_scale(im_size_min, im_size_max, target_size, max_size)
im_resized = cv2.resize(
im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR
)
ims.append(im_resized)
im_scales.append(im_scale)
return ims, im_scales
def get_im_blob_sizes(im_shape, target_sizes, max_size):
"""Calculate im blob size for multiple target_sizes given original im shape
"""
im_size_min = np.min(im_shape)
im_size_max = np.max(im_shape)
im_sizes = []
for target_size in target_sizes:
im_scale = get_target_scale(im_size_min, im_size_max, target_size, max_size)
im_sizes.append(np.round(im_shape * im_scale))
return np.array(im_sizes)
def get_target_scale(im_size_min, im_size_max, target_size, max_size):
"""Calculate target resize scale
"""
im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than max_size
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
return im_scale
def zeros(shape, int32=False):
"""Return a blob of all zeros of the given shape with the correct float or
int data type.
"""
return np.zeros(shape, dtype=np.int32 if int32 else np.float32)
def ones(shape, int32=False):
"""Return a blob of all ones of the given shape with the correct float or
int data type.
"""
return np.ones(shape, dtype=np.int32 if int32 else np.float32)
def serialize(obj):
"""Serialize a Python object using pickle and encode it as an array of
float32 values so that it can be feed into the workspace. See deserialize().
"""
return np.fromstring(pickle.dumps(obj), dtype=np.uint8).astype(np.float32)
def deserialize(arr):
"""Unserialize a Python object from an array of float32 values fetched from
a workspace. See serialize().
"""
return pickle.loads(arr.astype(np.uint8).tobytes())
|