Spaces:
Running
on
L40S
Running
on
L40S
# Copyright (c) 2017-present, Facebook, Inc. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
############################################################################## | |
# | |
# Based on: | |
# -------------------------------------------------------- | |
# Fast R-CNN | |
# Copyright (c) 2015 Microsoft | |
# Licensed under The MIT License [see LICENSE for details] | |
# Written by Ross Girshick | |
# -------------------------------------------------------- | |
"""blob helper functions.""" | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
from __future__ import unicode_literals | |
from six.moves import cPickle as pickle | |
import numpy as np | |
import cv2 | |
from models.core.config import cfg | |
def get_image_blob(im, target_scale, target_max_size): | |
"""Convert an image into a network input. | |
Arguments: | |
im (ndarray): a color image in BGR order | |
Returns: | |
blob (ndarray): a data blob holding an image pyramid | |
im_scale (float): image scale (target size) / (original size) | |
im_info (ndarray) | |
""" | |
processed_im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, [target_scale], target_max_size) | |
blob = im_list_to_blob(processed_im) | |
# NOTE: this height and width may be larger than actual scaled input image | |
# due to the FPN.COARSEST_STRIDE related padding in im_list_to_blob. We are | |
# maintaining this behavior for now to make existing results exactly | |
# reproducible (in practice using the true input image height and width | |
# yields nearly the same results, but they are sometimes slightly different | |
# because predictions near the edge of the image will be pruned more | |
# aggressively). | |
height, width = blob.shape[2], blob.shape[3] | |
im_info = np.hstack((height, width, im_scale))[np.newaxis, :] | |
return blob, im_scale, im_info.astype(np.float32) | |
def im_list_to_blob(ims): | |
"""Convert a list of images into a network input. Assumes images were | |
prepared using prep_im_for_blob or equivalent: i.e. | |
- BGR channel order | |
- pixel means subtracted | |
- resized to the desired input size | |
- float32 numpy ndarray format | |
Output is a 4D HCHW tensor of the images concatenated along axis 0 with | |
shape. | |
""" | |
if not isinstance(ims, list): | |
ims = [ims] | |
max_shape = get_max_shape([im.shape[:2] for im in ims]) | |
num_images = len(ims) | |
blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), dtype=np.float32) | |
for i in range(num_images): | |
im = ims[i] | |
blob[i, 0:im.shape[0], 0:im.shape[1], :] = im | |
# Move channels (axis 3) to axis 1 | |
# Axis order will become: (batch elem, channel, height, width) | |
channel_swap = (0, 3, 1, 2) | |
blob = blob.transpose(channel_swap) | |
return blob | |
def get_max_shape(im_shapes): | |
"""Calculate max spatial size (h, w) for batching given a list of image shapes | |
""" | |
max_shape = np.array(im_shapes).max(axis=0) | |
assert max_shape.size == 2 | |
# Pad the image so they can be divisible by a stride | |
if cfg.FPN.FPN_ON: | |
stride = float(cfg.FPN.COARSEST_STRIDE) | |
max_shape[0] = int(np.ceil(max_shape[0] / stride) * stride) | |
max_shape[1] = int(np.ceil(max_shape[1] / stride) * stride) | |
return max_shape | |
def prep_im_for_blob(im, pixel_means, target_sizes, max_size): | |
"""Prepare an image for use as a network input blob. Specially: | |
- Subtract per-channel pixel mean | |
- Convert to float32 | |
- Rescale to each of the specified target size (capped at max_size) | |
Returns a list of transformed images, one for each target size. Also returns | |
the scale factors that were used to compute each returned image. | |
""" | |
im = im.astype(np.float32, copy=False) | |
im -= pixel_means | |
im_shape = im.shape | |
im_size_min = np.min(im_shape[0:2]) | |
im_size_max = np.max(im_shape[0:2]) | |
ims = [] | |
im_scales = [] | |
for target_size in target_sizes: | |
im_scale = get_target_scale(im_size_min, im_size_max, target_size, max_size) | |
im_resized = cv2.resize( | |
im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR | |
) | |
ims.append(im_resized) | |
im_scales.append(im_scale) | |
return ims, im_scales | |
def get_im_blob_sizes(im_shape, target_sizes, max_size): | |
"""Calculate im blob size for multiple target_sizes given original im shape | |
""" | |
im_size_min = np.min(im_shape) | |
im_size_max = np.max(im_shape) | |
im_sizes = [] | |
for target_size in target_sizes: | |
im_scale = get_target_scale(im_size_min, im_size_max, target_size, max_size) | |
im_sizes.append(np.round(im_shape * im_scale)) | |
return np.array(im_sizes) | |
def get_target_scale(im_size_min, im_size_max, target_size, max_size): | |
"""Calculate target resize scale | |
""" | |
im_scale = float(target_size) / float(im_size_min) | |
# Prevent the biggest axis from being more than max_size | |
if np.round(im_scale * im_size_max) > max_size: | |
im_scale = float(max_size) / float(im_size_max) | |
return im_scale | |
def zeros(shape, int32=False): | |
"""Return a blob of all zeros of the given shape with the correct float or | |
int data type. | |
""" | |
return np.zeros(shape, dtype=np.int32 if int32 else np.float32) | |
def ones(shape, int32=False): | |
"""Return a blob of all ones of the given shape with the correct float or | |
int data type. | |
""" | |
return np.ones(shape, dtype=np.int32 if int32 else np.float32) | |
def serialize(obj): | |
"""Serialize a Python object using pickle and encode it as an array of | |
float32 values so that it can be feed into the workspace. See deserialize(). | |
""" | |
return np.fromstring(pickle.dumps(obj), dtype=np.uint8).astype(np.float32) | |
def deserialize(arr): | |
"""Unserialize a Python object from an array of float32 values fetched from | |
a workspace. See serialize(). | |
""" | |
return pickle.loads(arr.astype(np.uint8).tobytes()) | |