FrameVis / framevis.py
GenAIJake's picture
first commit
d80a719
raw
history blame
21.7 kB
#
# Project FrameVis - Video Frame Visualizer Script
# @author David Madison
# @link github.com/dmadison/FrameVis
# @version v1.0.1
# @license MIT - Copyright (c) 2019 David Madison
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
import cv2
import numpy as np
import argparse
from enum import Enum, auto
import time
class FrameVis:
"""
Reads a video file and outputs an image comprised of n resized frames, spread evenly throughout the file.
"""
default_frame_height = None # auto, or in pixels
default_frame_width = None # auto, or in pixels
default_concat_size = 1 # size of concatenated frame if automatically calculated, in pixels
default_direction = "horizontal" # left to right
def visualize(self, source, nframes, height=default_frame_height, width=default_frame_width, \
direction=default_direction, trim=False, quiet=True):
"""
Reads a video file and outputs an image comprised of n resized frames, spread evenly throughout the file.
Parameters:
source (str): filepath to source video file
nframes (int): number of frames to process from the video
height (int): height of each frame, in pixels
width (int): width of each frame, in pixels
direction (str): direction to concatenate frames ("horizontal" or "vertical")
quiet (bool): suppress console messages
Returns:
visualization image as numpy array
"""
video = cv2.VideoCapture(source) # open video file
if not video.isOpened():
raise FileNotFoundError("Source Video Not Found")
if not quiet:
print("") # create space from script call line
# calculate keyframe interval
video_total_frames = video.get(cv2.CAP_PROP_FRAME_COUNT) # retrieve total frame count from metadata
if not isinstance(nframes, int) or nframes < 1:
raise ValueError("Number of frames must be a positive integer")
elif nframes > video_total_frames:
raise ValueError("Requested frame count larger than total available ({})".format(video_total_frames))
keyframe_interval = video_total_frames / nframes # calculate number of frames between captures
# grab frame for dimension calculations
success,image = video.read() # get first frame
if not success:
raise IOError("Cannot read from video file")
# calculate letterbox / pillarbox trimming, if specified
matte_type = 0
if trim == True:
if not quiet:
print("Trimming enabled, checking matting... ", end="", flush=True)
# 10 frame samples, seen as matted if an axis has all color channels at 3 / 255 or lower (avg)
success, cropping_bounds = MatteTrimmer.determine_video_bounds(source, 10, 3)
matte_type = 0
if success: # only calculate cropping if bounds are valid
crop_width = cropping_bounds[1][0] - cropping_bounds[0][0] + 1
crop_height = cropping_bounds[1][1] - cropping_bounds[0][1] + 1
if crop_height != image.shape[0]: # letterboxing
matte_type += 1
if crop_width != image.shape[1]: # pillarboxing
matte_type +=2
if not quiet:
if matte_type == 0:
print("no matting detected")
elif matte_type == 1:
print("letterboxing detected, cropping {} px from the top and bottom".format(int((image.shape[0] - crop_height) / 2)))
elif matte_type == 2:
print("pillarboxing detected, trimming {} px from the sides".format(int((image.shape[1] - crop_width) / 2)))
elif matte_type == 3:
print("multiple matting detected - cropping ({}, {}) to ({}, {})".format(image.shape[1], image.shape[0], crop_width, crop_height))
# calculate height
if height is None: # auto-calculate
if direction == "horizontal": # non-concat, use video size
if matte_type & 1 == 1: # letterboxing present
height = crop_height
else:
height = image.shape[0] # save frame height
else: # concat, use default value
height = FrameVis.default_concat_size
elif not isinstance(height, int) or height < 1:
raise ValueError("Frame height must be a positive integer")
# calculate width
if width is None: # auto-calculate
if direction == "vertical": # non-concat, use video size
if matte_type & 2 == 2: # pillarboxing present
width = crop_width
else:
width = image.shape[1] # save frame width
else: # concat, use default value
width = FrameVis.default_concat_size
elif not isinstance(width, int) or width < 1:
raise ValueError("Frame width must be a positive integer")
# assign direction function and calculate output size
if direction == "horizontal":
concatenate = cv2.hconcat
output_width = width * nframes
output_height = height
elif direction == "vertical":
concatenate = cv2.vconcat
output_width = width
output_height = height * nframes
else:
raise ValueError("Invalid direction specified")
if not quiet:
aspect_ratio = output_width / output_height
print("Visualizing \"{}\" - {} by {} ({:.2f}), from {} frames (every {:.2f} seconds)"\
.format(source, output_width, output_height, aspect_ratio, nframes, FrameVis.interval_from_nframes(source, nframes)))
# set up for the frame processing loop
next_keyframe = keyframe_interval / 2 # frame number for the next frame grab, starting evenly offset from start/end
finished_frames = 0 # counter for number of processed frames
output_image = None
progress = ProgressBar("Processing:")
while True:
if finished_frames == nframes:
break # done!
video.set(cv2.CAP_PROP_POS_FRAMES, int(next_keyframe)) # move cursor to next sampled frame
success,image = video.read() # read the next frame
if not success:
raise IOError("Cannot read from video file (frame {} out of {})".format(int(next_keyframe), video_total_frames))
if matte_type != 0: # crop out matting, if specified and matting is present
image = MatteTrimmer.crop_image(image, cropping_bounds)
image = cv2.resize(image, (width, height)) # resize to output size
# save to output image
if output_image is None:
output_image = image
else:
output_image = concatenate([output_image, image]) # concatenate horizontally from left -> right
finished_frames += 1
next_keyframe += keyframe_interval # set next frame capture time, maintaining floats
if not quiet:
progress.write(finished_frames / nframes) # print progress bar to the console
video.release() # close video capture
return output_image
@staticmethod
def average_image(image, direction):
"""
Averages the colors in an axis across an entire image
Parameters:
image (arr x.y.c): image as 3-dimensional numpy array
direction (str): direction to average frames ("horizontal" or "vertical")
Returns:
image, with pixel data averaged along provided axis
"""
height, width, depth = image.shape
if direction == "horizontal":
scale_height = 1
scale_width = width
elif direction == "vertical":
scale_height = height
scale_width = 1
else:
raise ValueError("Invalid direction specified")
image = cv2.resize(image, (scale_width, scale_height)) # scale down to '1', averaging values
image = cv2.resize(image, (width, height)) # scale back up to size
return image
@staticmethod
def motion_blur(image, direction, blur_amount):
"""
Blurs the pixels in a given axis across an entire image.
Parameters:
image (arr x.y.c): image as 3-dimensional numpy array
direction (str): direction of stacked images for blurring ("horizontal" or "vertical")
blur_amount (int): how much to blur the image, as the convolution kernel size
Returns:
image, with pixel data blurred along provided axis
"""
kernel = np.zeros((blur_amount, blur_amount)) # create convolution kernel
# fill group with '1's
if direction == "horizontal":
kernel[:, int((blur_amount - 1)/2)] = np.ones(blur_amount) # fill center column (blurring vertically for horizontal concat)
elif direction == "vertical":
kernel[int((blur_amount - 1)/2), :] = np.ones(blur_amount) # fill center row (blurring horizontally for vertical concat)
else:
raise ValueError("Invalid direction specified")
kernel /= blur_amount # normalize kernel matrix
return cv2.filter2D(image, -1, kernel) # filter using kernel with same depth as source
@staticmethod
def nframes_from_interval(source, interval):
"""
Calculates the number of frames available in a video file for a given capture interval
Parameters:
source (str): filepath to source video file
interval (float): capture frame every i seconds
Returns:
number of frames per time interval (int)
"""
video = cv2.VideoCapture(source) # open video file
if not video.isOpened():
raise FileNotFoundError("Source Video Not Found")
frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT) # total number of frames
fps = video.get(cv2.CAP_PROP_FPS) # framerate of the video
duration = frame_count / fps # duration of the video, in seconds
video.release() # close video capture
return int(round(duration / interval)) # number of frames per interval
@staticmethod
def interval_from_nframes(source, nframes):
"""
Calculates the capture interval, in seconds, for a video file given the
number of frames to capture
Parameters:
source (str): filepath to source video file
nframes (int): number of frames to capture from the video file
Returns:
time interval (seconds) between frame captures (float)
"""
video = cv2.VideoCapture(source) # open video file
if not video.isOpened():
raise FileNotFoundError("Source Video Not Found")
frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT) # total number of frames
fps = video.get(cv2.CAP_PROP_FPS) # framerate of the video
keyframe_interval = frame_count / nframes # calculate number of frames between captures
video.release() # close video capture
return keyframe_interval / fps # seconds between captures
class MatteTrimmer:
"""
Functions for finding and removing black mattes around video frames
"""
@staticmethod
def find_matrix_edges(matrix, threshold):
"""
Finds the start and end points of a 1D array above a given threshold
Parameters:
matrix (arr, 1.x): 1D array of data to check
threshold (value): valid data is above this trigger level
Returns:
tuple with the array indices of data bounds, start and end
"""
if not isinstance(matrix, (list, tuple, np.ndarray)) or len(matrix.shape) != 1:
raise ValueError("Provided matrix is not the right size (must be 1D)")
data_start = None
data_end = None
for value_id, value in enumerate(matrix):
if value > threshold:
if data_start is None:
data_start = value_id
data_end = value_id
return (data_start, data_end)
@staticmethod
def find_larger_bound(first, second):
"""
Takes two sets of diagonal rectangular boundary coordinates and determines
the set of rectangular boundary coordinates that contains both
Parameters:
first (arr, 1.2.2): pair of rectangular coordinates, in the form [(X,Y), (X,Y)]
second (arr, 1.2.2): pair of rectangular coordinates, in the form [(X,Y), (X,Y)]
Where for both arrays the first coordinate is in the top left-hand corner,
and the second coordinate is in the bottom right-hand corner.
Returns:
numpy coordinate matrix containing both of the provided boundaries
"""
left_edge = first[0][0] if first[0][0] <= second[0][0] else second[0][0]
right_edge = first[1][0] if first[1][0] >= second[1][0] else second[1][0]
top_edge = first[0][1] if first[0][1] <= second[0][1] else second[0][1]
bottom_edge = first[1][1] if first[1][1] >= second[1][1] else second[1][1]
return np.array([[left_edge, top_edge], [right_edge, bottom_edge]])
@staticmethod
def valid_bounds(bounds):
"""
Checks if the frame bounds are a valid format
Parameters:
bounds (arr, 1.2.2): pair of rectangular coordinates, in the form [(X,Y), (X,Y)]
Returns:
True or False
"""
for x, x_coordinate in enumerate(bounds):
for y, y_coordinate in enumerate(bounds):
if bounds[x][y] is None:
return False # not a number
if bounds[0][0] > bounds[1][0] or \
bounds[0][1] > bounds[1][1]:
return False # left > right or top > bottom
return True
@staticmethod
def determine_image_bounds(image, threshold):
"""
Determines if there are any hard mattes (black bars) surrounding
an image on either the top (letterboxing) or the sides (pillarboxing)
Parameters:
image (arr, x.y.c): image as 3-dimensional numpy array
threshold (8-bit int): min color channel value to judge as 'image present'
Returns:
success (bool): True or False if the bounds are valid
image_bounds: numpy coordinate matrix with the two opposite corners of the
image bounds, in the form [(X,Y), (X,Y)]
"""
height, width, depth = image.shape
# check for letterboxing
horizontal_sums = np.sum(image, axis=(1,2)) # sum all color channels across all rows
hthreshold = (threshold * width * depth) # must be below every pixel having a value of "threshold" in every channel
vertical_edges = MatteTrimmer.find_matrix_edges(horizontal_sums, hthreshold)
# check for pillarboxing
vertical_sums = np.sum(image, axis=(0,2)) # sum all color channels across all columns
vthreshold = (threshold * height * depth) # must be below every pixel having a value of "threshold" in every channel
horizontal_edges = MatteTrimmer.find_matrix_edges(vertical_sums, vthreshold)
image_bounds = np.array([[horizontal_edges[0], vertical_edges[0]], [horizontal_edges[1], vertical_edges[1]]])
return MatteTrimmer.valid_bounds(image_bounds), image_bounds
@staticmethod
def determine_video_bounds(source, nsamples, threshold):
"""
Determines if any matting exists in a video source
Parameters:
source (str): filepath to source video file
nsamples (int): number of frames from the video to determine bounds,
evenly spaced throughout the video
threshold (8-bit int): min color channel value to judge as 'image present'
Returns:
success (bool): True or False if the bounds are valid
video_bounds: numpy coordinate matrix with the two opposite corners of the
video bounds, in the form [(X,Y), (X,Y)]
"""
video = cv2.VideoCapture(source) # open video file
if not video.isOpened():
raise FileNotFoundError("Source Video Not Found")
video_total_frames = video.get(cv2.CAP_PROP_FRAME_COUNT) # retrieve total frame count from metadata
if not isinstance(nsamples, int) or nsamples < 1:
raise ValueError("Number of samples must be a positive integer")
keyframe_interval = video_total_frames / nsamples # calculate number of frames between captures
# open video to make results consistent with visualizer
# (this also GREATLY increases the read speed? no idea why)
success,image = video.read() # get first frame
if not success:
raise IOError("Cannot read from video file")
next_keyframe = keyframe_interval / 2 # frame number for the next frame grab, starting evenly offset from start/end
video_bounds = None
for frame_number in range(nsamples):
video.set(cv2.CAP_PROP_POS_FRAMES, int(next_keyframe)) # move cursor to next sampled frame
success,image = video.read() # read the next frame
if not success:
raise IOError("Cannot read from video file")
success, frame_bounds = MatteTrimmer.determine_image_bounds(image, threshold)
if not success:
continue # don't compare bounds, frame bounds are invalid
video_bounds = frame_bounds if video_bounds is None else MatteTrimmer.find_larger_bound(video_bounds, frame_bounds)
next_keyframe += keyframe_interval # set next frame capture time, maintaining floats
video.release() # close video capture
return MatteTrimmer.valid_bounds(video_bounds), video_bounds
@staticmethod
def crop_image(image, bounds):
"""
Crops a provided image by the coordinate bounds pair provided.
Parameters:
image (arr, x.y.c): image as 3-dimensional numpy array
second (arr, 1.2.2): pair of rectangular coordinates, in the form [(X,Y), (X,Y)]
Returns:
image as 3-dimensional numpy array, cropped to the coordinate bounds
"""
return image[bounds[0][1]:bounds[1][1], bounds[0][0]:bounds[1][0]]
class ProgressBar:
"""
Generates a progress bar for the console output
Args:
pre (str): string to prepend before the progress bar
bar_length (int): length of the progress bar itself, in characters
print_elapsed (bool): option to print time elapsed or not
Attributes:
pre (str): string to prepend before the progress bar
bar_length (int): length of the progress bar itself, in characters
print_time (bool): option to print time elapsed or not
print_elapsed (int): starting time for the progress bar, in unix seconds
"""
def __init__(self, pre="", bar_length=25, print_elapsed=True):
pre = (pre + '\t') if pre != "" else pre # append separator if string present
self.pre = pre
self.bar_length = bar_length
self.print_elapsed = print_elapsed
if self.print_elapsed:
self.__start_time = time.time() # store start time as unix
def write(self, percent):
"""Prints a progress bar to the console based on the input percentage (float)."""
term_char = '\r' if percent < 1.0 else '\n' # rewrite the line unless finished
filled_size = int(round(self.bar_length * percent)) # number of 'filled' characters in the bar
progress_bar = "#" * filled_size + " " * (self.bar_length - filled_size) # progress bar characters, as a string
time_string = ""
if self.print_elapsed:
time_elapsed = time.time() - self.__start_time
time_string = "\tTime Elapsed: {}".format(time.strftime("%H:%M:%S", time.gmtime(time_elapsed)))
print("{}[{}]\t{:.2%}{}".format(self.pre, progress_bar, percent, time_string), end=term_char, flush=True)
def main():
parser = argparse.ArgumentParser(description="video frame visualizer and movie barcode generator", add_help=False) # removing help so I can use '-h' for height
parser.add_argument("source", help="file path for the video file to be visualized", type=str)
parser.add_argument("destination", help="file path output for the final image", type=str)
parser.add_argument("-n", "--nframes", help="the number of frames in the visualization", type=int)
parser.add_argument("-i", "--interval", help="interval between frames for the visualization", type=float)
parser.add_argument("-h", "--height", help="the height of each frame, in pixels", type=int, default=FrameVis.default_frame_height)
parser.add_argument("-w", "--width", help="the output width of each frame, in pixels", type=int, default=FrameVis.default_frame_width)
parser.add_argument("-d", "--direction", help="direction to concatenate frames, horizontal or vertical", type=str, \
choices=["horizontal", "vertical"], default=FrameVis.default_direction)
parser.add_argument("-t", "--trim", help="detect and trim any hard matting (letterboxing or pillarboxing)", action='store_true', default=False)
parser.add_argument("-a", "--average", help="average colors for each frame", action='store_true', default=False)
parser.add_argument("-b", "--blur", help="apply motion blur to the frames (kernel size)", type=int, nargs='?', const=100, default=0)
parser.add_argument("-q", "--quiet", help="mute console outputs", action='store_true', default=False)
parser.add_argument("--help", action="help", help="show this help message and exit")
args = parser.parse_args()
# check number of frames arguments
if args.nframes is None:
if args.interval is not None: # calculate nframes from interval
args.nframes = FrameVis.nframes_from_interval(args.source, args.interval)
else:
parser.error("You must provide either an --(n)frames or --(i)nterval argument")
# check postprocessing arguments
if args.average is True and args.blur != 0:
parser.error("Cannot (a)verage and (b)lur, you must choose one or the other")
fv = FrameVis()
output_image = fv.visualize(args.source, args.nframes, height=args.height, width=args.width, \
direction=args.direction, trim=args.trim, quiet=args.quiet)
# postprocess
if args.average or args.blur != 0:
if args.average:
if not args.quiet:
print("Averaging frame colors... ", end="", flush=True)
output_image = fv.average_image(output_image, args.direction)
if args.blur != 0:
if not args.quiet:
print("Adding motion blur to final frame... ", end="", flush=True)
output_image = fv.motion_blur(output_image, args.direction, args.blur)
if not args.quiet:
print("done")
cv2.imwrite(args.destination, output_image) # save visualization to file
if not args.quiet:
print("Visualization saved to {}".format(args.destination))
if __name__ == "__main__":
main()