|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""blob helper functions.""" |
|
|
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
from __future__ import unicode_literals |
|
|
|
from six.moves import cPickle as pickle |
|
import numpy as np |
|
import cv2 |
|
|
|
from models.core.config import cfg |
|
|
|
|
|
def get_image_blob(im, target_scale, target_max_size): |
|
"""Convert an image into a network input. |
|
|
|
Arguments: |
|
im (ndarray): a color image in BGR order |
|
|
|
Returns: |
|
blob (ndarray): a data blob holding an image pyramid |
|
im_scale (float): image scale (target size) / (original size) |
|
im_info (ndarray) |
|
""" |
|
processed_im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, [target_scale], target_max_size) |
|
blob = im_list_to_blob(processed_im) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
height, width = blob.shape[2], blob.shape[3] |
|
im_info = np.hstack((height, width, im_scale))[np.newaxis, :] |
|
return blob, im_scale, im_info.astype(np.float32) |
|
|
|
|
|
def im_list_to_blob(ims): |
|
"""Convert a list of images into a network input. Assumes images were |
|
prepared using prep_im_for_blob or equivalent: i.e. |
|
- BGR channel order |
|
- pixel means subtracted |
|
- resized to the desired input size |
|
- float32 numpy ndarray format |
|
Output is a 4D HCHW tensor of the images concatenated along axis 0 with |
|
shape. |
|
""" |
|
if not isinstance(ims, list): |
|
ims = [ims] |
|
max_shape = get_max_shape([im.shape[:2] for im in ims]) |
|
|
|
num_images = len(ims) |
|
blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), dtype=np.float32) |
|
for i in range(num_images): |
|
im = ims[i] |
|
blob[i, 0:im.shape[0], 0:im.shape[1], :] = im |
|
|
|
|
|
channel_swap = (0, 3, 1, 2) |
|
blob = blob.transpose(channel_swap) |
|
return blob |
|
|
|
|
|
def get_max_shape(im_shapes): |
|
"""Calculate max spatial size (h, w) for batching given a list of image shapes |
|
""" |
|
max_shape = np.array(im_shapes).max(axis=0) |
|
assert max_shape.size == 2 |
|
|
|
if cfg.FPN.FPN_ON: |
|
stride = float(cfg.FPN.COARSEST_STRIDE) |
|
max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride) |
|
max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride) |
|
return max_shape |
|
|
|
|
|
def prep_im_for_blob(im, pixel_means, target_sizes, max_size): |
|
"""Prepare an image for use as a network input blob. Specially: |
|
- Subtract per-channel pixel mean |
|
- Convert to float32 |
|
- Rescale to each of the specified target size (capped at max_size) |
|
Returns a list of transformed images, one for each target size. Also returns |
|
the scale factors that were used to compute each returned image. |
|
""" |
|
im = im.astype(np.float32, copy=False) |
|
im -= pixel_means |
|
im_shape = im.shape |
|
im_size_min = np.min(im_shape[0:2]) |
|
im_size_max = np.max(im_shape[0:2]) |
|
|
|
ims = [] |
|
im_scales = [] |
|
for target_size in target_sizes: |
|
im_scale = get_target_scale(im_size_min, im_size_max, target_size, max_size) |
|
im_resized = cv2.resize( |
|
im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR |
|
) |
|
ims.append(im_resized) |
|
im_scales.append(im_scale) |
|
return ims, im_scales |
|
|
|
|
|
def get_im_blob_sizes(im_shape, target_sizes, max_size): |
|
"""Calculate im blob size for multiple target_sizes given original im shape |
|
""" |
|
im_size_min = np.min(im_shape) |
|
im_size_max = np.max(im_shape) |
|
im_sizes = [] |
|
for target_size in target_sizes: |
|
im_scale = get_target_scale(im_size_min, im_size_max, target_size, max_size) |
|
im_sizes.append(np.round(im_shape * im_scale)) |
|
return np.array(im_sizes) |
|
|
|
|
|
def get_target_scale(im_size_min, im_size_max, target_size, max_size): |
|
"""Calculate target resize scale |
|
""" |
|
im_scale = float(target_size) / float(im_size_min) |
|
|
|
if np.round(im_scale * im_size_max) > max_size: |
|
im_scale = float(max_size) / float(im_size_max) |
|
return im_scale |
|
|
|
|
|
def zeros(shape, int32=False): |
|
"""Return a blob of all zeros of the given shape with the correct float or |
|
int data type. |
|
""" |
|
return np.zeros(shape, dtype=np.int32 if int32 else np.float32) |
|
|
|
|
|
def ones(shape, int32=False): |
|
"""Return a blob of all ones of the given shape with the correct float or |
|
int data type. |
|
""" |
|
return np.ones(shape, dtype=np.int32 if int32 else np.float32) |
|
|
|
|
|
def serialize(obj): |
|
"""Serialize a Python object using pickle and encode it as an array of |
|
float32 values so that it can be feed into the workspace. See deserialize(). |
|
""" |
|
return np.fromstring(pickle.dumps(obj), dtype=np.uint8).astype(np.float32) |
|
|
|
|
|
def deserialize(arr): |
|
"""Unserialize a Python object from an array of float32 values fetched from |
|
a workspace. See serialize(). |
|
""" |
|
return pickle.loads(arr.astype(np.uint8).tobytes()) |
|
|