File size: 6,239 Bytes
dde56f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
"""Utility functions for videos, plotting and computing performance metrics."""
import os
import typing
import cv2 # pytype: disable=attribute-error
import matplotlib
import numpy as np
import torch
import tqdm
from . import video
from . import segmentation
def loadvideo(filename: str) -> np.ndarray:
"""Loads a video from a file.
filename (str): filename of video
A np.ndarray with dimensions (channels=3, frames, height, width). The
values will be uint8's ranging from 0 to 255.
FileNotFoundError: Could not find `filename`
ValueError: An error occurred while reading the video
if not os.path.exists(filename):
raise FileNotFoundError(filename)
capture = cv2.VideoCapture(filename)
frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
frame_width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
v = np.zeros((frame_count, frame_height, frame_width, 3), np.uint8)
for count in range(frame_count):
ret, frame =
if not ret:
raise ValueError("Failed to load frame #{} of {}.".format(count, filename))
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
v[count, :, :] = frame
v = v.transpose((3, 0, 1, 2))
return v
def savevideo(filename: str, array: np.ndarray, fps: typing.Union[float, int] = 1):
"""Saves a video to a file.
filename (str): filename of video
array (np.ndarray): video of uint8's with shape (channels=3, frames, height, width)
fps (float or int): frames per second
c, _, height, width = array.shape
if c != 3:
raise ValueError("savevideo expects array of shape (channels=3, frames, height, width), got shape ({})".format(", ".join(map(str, array.shape))))
fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
out = cv2.VideoWriter(filename, fourcc, fps, (width, height))
for frame in array.transpose((1, 2, 3, 0)):
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
def get_mean_and_std(dataset:,
samples: int = 128,
batch_size: int = 8,
num_workers: int = 4):
"""Computes mean and std from samples from a Pytorch dataset.
dataset ( A Pytorch dataset.
``dataset[i][0]'' is expected to be the i-th video in the dataset, which
should be a ``torch.Tensor'' of dimensions (channels=3, frames, height, width)
samples (int or None, optional): Number of samples to take from dataset. If ``None'', mean and
standard deviation are computed over all elements.
Defaults to 128.
batch_size (int, optional): how many samples per batch to load
Defaults to 8.
num_workers (int, optional): how many subprocesses to use for data
loading. If 0, the data will be loaded in the main process.
Defaults to 4.
A tuple of the mean and standard deviation. Both are represented as np.array's of dimension (channels,).
if samples is not None and len(dataset) > samples:
indices = np.random.choice(len(dataset), samples, replace=False)
dataset =, indices)
dataloader =
dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)
n = 0 # number of elements taken (should be equal to samples by end of for loop)
s1 = 0. # sum of elements along channels (ends up as np.array of dimension (channels,))
s2 = 0. # sum of squares of elements along channels (ends up as np.array of dimension (channels,))
for (x, *_) in tqdm.tqdm(dataloader):
x = x.transpose(0, 1).contiguous().view(3, -1)
n += x.shape[1]
s1 += torch.sum(x, dim=1).numpy()
s2 += torch.sum(x ** 2, dim=1).numpy()
mean = s1 / n # type: np.ndarray
std = np.sqrt(s2 / n - mean ** 2) # type: np.ndarray
mean = mean.astype(np.float32)
std = std.astype(np.float32)
return mean, std
def bootstrap(a, b, func, samples=10000):
"""Computes a bootstrapped confidence intervals for ``func(a, b)''.
a (array_like): first argument to `func`.
b (array_like): second argument to `func`.
func (callable): Function to compute confidence intervals for.
``dataset[i][0]'' is expected to be the i-th video in the dataset, which
should be a ``torch.Tensor'' of dimensions (channels=3, frames, height, width)
samples (int, optional): Number of samples to compute.
Defaults to 10000.
A tuple of (`func(a, b)`, estimated 5-th percentile, estimated 95-th percentile).
a = np.array(a)
b = np.array(b)
bootstraps = []
for _ in range(samples):
ind = np.random.choice(len(a), len(a))
bootstraps.append(func(a[ind], b[ind]))
bootstraps = sorted(bootstraps)
return func(a, b), bootstraps[round(0.05 * len(bootstraps))], bootstraps[round(0.95 * len(bootstraps))]
def latexify():
"""Sets matplotlib params to appear more like LaTeX.
Based on
params = {'backend': 'pdf',
'axes.titlesize': 8,
'axes.labelsize': 8,
'font.size': 8,
'legend.fontsize': 8,
'xtick.labelsize': 8,
'ytick.labelsize': 8,
'': 'DejaVu Serif',
'font.serif': 'Computer Modern',
def dice_similarity_coefficient(inter, union):
"""Computes the dice similarity coefficient.
inter (iterable): iterable of the intersections
union (iterable): iterable of the unions
return 2 * sum(inter) / (sum(union) + sum(inter))
__all__ = ["video", "segmentation", "loadvideo", "savevideo", "get_mean_and_std", "bootstrap", "latexify", "dice_similarity_coefficient"]