|
import face_alignment |
|
import skimage.io |
|
import numpy |
|
from argparse import ArgumentParser |
|
from skimage import img_as_ubyte |
|
from skimage.transform import resize |
|
from tqdm import tqdm |
|
import os |
|
import imageio |
|
import numpy as np |
|
import warnings |
|
warnings.filterwarnings("ignore") |
|
|
|
def extract_bbox(frame, fa): |
|
if max(frame.shape[0], frame.shape[1]) > 640: |
|
scale_factor = max(frame.shape[0], frame.shape[1]) / 640.0 |
|
frame = resize(frame, (int(frame.shape[0] / scale_factor), int(frame.shape[1] / scale_factor))) |
|
frame = img_as_ubyte(frame) |
|
else: |
|
scale_factor = 1 |
|
frame = frame[..., :3] |
|
bboxes = fa.face_detector.detect_from_image(frame[..., ::-1]) |
|
if len(bboxes) == 0: |
|
return [] |
|
return np.array(bboxes)[:, :-1] * scale_factor |
|
|
|
|
|
|
|
def bb_intersection_over_union(boxA, boxB): |
|
xA = max(boxA[0], boxB[0]) |
|
yA = max(boxA[1], boxB[1]) |
|
xB = min(boxA[2], boxB[2]) |
|
yB = min(boxA[3], boxB[3]) |
|
interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1) |
|
boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1) |
|
boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1) |
|
iou = interArea / float(boxAArea + boxBArea - interArea) |
|
return iou |
|
|
|
|
|
def join(tube_bbox, bbox): |
|
xA = min(tube_bbox[0], bbox[0]) |
|
yA = min(tube_bbox[1], bbox[1]) |
|
xB = max(tube_bbox[2], bbox[2]) |
|
yB = max(tube_bbox[3], bbox[3]) |
|
return (xA, yA, xB, yB) |
|
|
|
|
|
def compute_bbox(start, end, fps, tube_bbox, frame_shape, inp, image_shape, increase_area=0.1): |
|
left, top, right, bot = tube_bbox |
|
width = right - left |
|
height = bot - top |
|
|
|
|
|
width_increase = max(increase_area, ((1 + 2 * increase_area) * height - width) / (2 * width)) |
|
height_increase = max(increase_area, ((1 + 2 * increase_area) * width - height) / (2 * height)) |
|
|
|
left = int(left - width_increase * width) |
|
top = int(top - height_increase * height) |
|
right = int(right + width_increase * width) |
|
bot = int(bot + height_increase * height) |
|
|
|
top, bot, left, right = max(0, top), min(bot, frame_shape[0]), max(0, left), min(right, frame_shape[1]) |
|
h, w = bot - top, right - left |
|
|
|
start = start / fps |
|
end = end / fps |
|
time = end - start |
|
|
|
scale = f'{image_shape[0]}:{image_shape[1]}' |
|
|
|
return f'ffmpeg -i {inp} -ss {start} -t {time} -filter:v "crop={w}:{h}:{left}:{top}, scale={scale}" crop.mp4' |
|
|
|
|
|
def compute_bbox_trajectories(trajectories, fps, frame_shape, args): |
|
commands = [] |
|
for i, (bbox, tube_bbox, start, end) in enumerate(trajectories): |
|
if (end - start) > args.min_frames: |
|
command = compute_bbox(start, end, fps, tube_bbox, frame_shape, inp=args.inp, image_shape=args.image_shape, increase_area=args.increase) |
|
commands.append(command) |
|
return commands |
|
|
|
|
|
def process_video(args): |
|
device = 'cpu' if args.cpu else 'cuda' |
|
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=False, device=device) |
|
video = imageio.get_reader(args.inp) |
|
|
|
trajectories = [] |
|
previous_frame = None |
|
fps = video.get_meta_data()['fps'] |
|
commands = [] |
|
try: |
|
for i, frame in tqdm(enumerate(video)): |
|
frame_shape = frame.shape |
|
bboxes = extract_bbox(frame, fa) |
|
|
|
not_valid_trajectories = [] |
|
valid_trajectories = [] |
|
|
|
for trajectory in trajectories: |
|
tube_bbox = trajectory[0] |
|
intersection = 0 |
|
for bbox in bboxes: |
|
intersection = max(intersection, bb_intersection_over_union(tube_bbox, bbox)) |
|
if intersection > args.iou_with_initial: |
|
valid_trajectories.append(trajectory) |
|
else: |
|
not_valid_trajectories.append(trajectory) |
|
|
|
commands += compute_bbox_trajectories(not_valid_trajectories, fps, frame_shape, args) |
|
trajectories = valid_trajectories |
|
|
|
|
|
for bbox in bboxes: |
|
intersection = 0 |
|
current_trajectory = None |
|
for trajectory in trajectories: |
|
tube_bbox = trajectory[0] |
|
current_intersection = bb_intersection_over_union(tube_bbox, bbox) |
|
if intersection < current_intersection and current_intersection > args.iou_with_initial: |
|
intersection = bb_intersection_over_union(tube_bbox, bbox) |
|
current_trajectory = trajectory |
|
|
|
|
|
if current_trajectory is None: |
|
trajectories.append([bbox, bbox, i, i]) |
|
else: |
|
current_trajectory[3] = i |
|
current_trajectory[1] = join(current_trajectory[1], bbox) |
|
|
|
|
|
except IndexError as e: |
|
raise (e) |
|
|
|
commands += compute_bbox_trajectories(trajectories, fps, frame_shape, args) |
|
return commands |
|
|
|
|
|
if __name__ == "__main__": |
|
parser = ArgumentParser() |
|
|
|
parser.add_argument("--image_shape", default=(256, 256), type=lambda x: tuple(map(int, x.split(','))), |
|
help="Image shape") |
|
parser.add_argument("--increase", default=0.1, type=float, help='Increase bbox by this amount') |
|
parser.add_argument("--iou_with_initial", type=float, default=0.25, help="The minimal allowed iou with inital bbox") |
|
parser.add_argument("--inp", required=True, help='Input image or video') |
|
parser.add_argument("--min_frames", type=int, default=150, help='Minimum number of frames') |
|
parser.add_argument("--cpu", dest="cpu", action="store_true", help="cpu mode.") |
|
|
|
|
|
args = parser.parse_args() |
|
|
|
commands = process_video(args) |
|
for command in commands: |
|
print (command) |
|
|
|
|