# python3.7 """Utility functions for visualizing results.""" import base64 import os.path import cv2 import numpy as np from bs4 import BeautifulSoup __all__ = [ 'get_grid_shape', 'get_blank_image', 'load_image', 'save_image', 'resize_image', 'postprocess_image', 'add_text_to_image', 'parse_image_size', 'fuse_images', 'HtmlPageVisualizer', 'HtmlPageReader', 'VideoReader', 'VideoWriter' ] def get_grid_shape(size, row=0, col=0, is_portrait=False): """Gets the shape of a grid based on the size. This function makes greatest effort on making the output grid square if neither `row` nor `col` is set. If `is_portrait` is set as `False`, the height will always be equal to or smaller than the width. For example, if input `size = 16`, output shape will be `(4, 4)`; if input `size = 15`, output shape will be (3, 5). Otherwise, the height will always be equal to or larger than the width. Args: size: Size (height * width) of the target grid. is_portrait: Whether to return a portrait size of a landscape size. (default: False) Returns: A two-element tuple, representing height and width respectively. """ assert isinstance(size, int) assert isinstance(row, int) assert isinstance(col, int) if size == 0: return (0, 0) if row > 0 and col > 0 and row * col != size: row = 0 col = 0 if row > 0 and size % row == 0: return (row, size // row) if col > 0 and size % col == 0: return (size // col, col) row = int(np.sqrt(size)) while row > 0: if size % row == 0: col = size // row break row = row - 1 return (col, row) if is_portrait else (row, col) def get_blank_image(height, width, channels=3, is_black=True): """Gets a blank image, either white of black. NOTE: This function will always return an image with `RGB` channel order for color image and pixel range [0, 255]. Args: height: Height of the returned image. width: Width of the returned image. channels: Number of channels. (default: 3) is_black: Whether to return a black image. (default: True) """ shape = (height, width, channels) if is_black: return np.zeros(shape, dtype=np.uint8) return np.ones(shape, dtype=np.uint8) * 255 def load_image(path, image_channels=3): """Loads an image from disk. NOTE: This function will always return an image with `RGB` channel order for color image and pixel range [0, 255]. Args: path: Path to load the image from. image_channels: Number of image channels of returned image. This field is employed since `cv2.imread()` will always return a 3-channel image, even for grayscale image. Returns: An image with dtype `np.ndarray`, or `None` if `path` does not exist. """ if not os.path.isfile(path): return None assert image_channels in [1, 3] image = cv2.imread(path) assert image.ndim == 3 and image.shape[2] == 3 if image_channels == 1: return image[:, :, 0:1] return image[:, :, ::-1] def save_image(path, image): """Saves an image to disk. NOTE: The input image (if colorful) is assumed to be with `RGB` channel order and pixel range [0, 255]. Args: path: Path to save the image to. image: Image to save. """ if image is None: return assert image.ndim == 3 and image.shape[2] in [1, 3] cv2.imwrite(path, image[:, :, ::-1]) def resize_image(image, *args, **kwargs): """Resizes image. This is a wrap of `cv2.resize()`. NOTE: THe channel order of the input image will not be changed. Args: image: Image to resize. """ if image is None: return None assert image.ndim == 3 and image.shape[2] in [1, 3] image = cv2.resize(image, *args, **kwargs) if image.ndim == 2: return image[:, :, np.newaxis] return image def postprocess_image(image, min_val=-1.0, max_val=1.0, data_format='NCHW'): """Post-processes image to pixel range [0, 255] with dtype `uint8`. NOTE: The returned image will always be with `HWC` format. Args: min_val: Minimum value of the input image. max_val: Maximum value of the input image. data_format: Data format of the input image. Supporting `NCHW`, `NHWC`, `CHW`, `HWC`. Returns: The post-processed image. Raises: NotImplementedError: If the input `data_format` is not support. """ assert isinstance(image, np.ndarray) image = image.astype(np.float64) image = (image - min_val) * 255 / (max_val - min_val) image = np.clip(image + 0.5, 0, 255).astype(np.uint8) data_format = data_format.upper() if data_format == 'NCHW': assert image.ndim == 4 and image.shape[1] in [1, 3] return image.transpose(0, 2, 3, 1) if data_format == 'NHWC': assert image.ndim == 4 and image.shape[3] in [1, 3] return image if data_format == 'CHW': assert image.ndim == 3 and image.shape[0] in [1, 3] return image.transpose(1, 2, 0) if data_format == 'HWC': assert image.ndim == 3 and image.shape[2] in [1, 3] return image raise NotImplementedError(f'Data format `{data_format}` is not supported!') def add_text_to_image(image, text='', position=None, font=cv2.FONT_HERSHEY_TRIPLEX, font_size=1.0, line_type=cv2.LINE_8, line_width=1, color=(255, 255, 255)): """Overlays text on given image. NOTE: The input image is assumed to be with `RGB` channel order. Args: image: The image to overlay text on. text: Text content to overlay on the image. (default: '') position: Target position (bottom-left corner) to add text. If not set, center of the image will be used by default. (default: None) font: Font of the text added. (default: cv2.FONT_HERSHEY_TRIPLEX) font_size: Font size of the text added. (default: 1.0) line_type: Line type used to depict the text. (default: cv2.LINE_8) line_width: Line width used to depict the text. (default: 1) color: Color of the text added in `RGB` channel order. (default: (255, 255, 255)) Returns: An image with target text overlayed on. """ if image is None or not text: return image cv2.putText(img=image, text=text, org=position, fontFace=font, fontScale=font_size, color=color, thickness=line_width, lineType=line_type, bottomLeftOrigin=False) return image def parse_image_size(obj): """Parses object to a pair of image size, i.e., (width, height). Args: obj: The input object to parse image size from. Returns: A two-element tuple, indicating image width and height respectively. Raises: If the input is invalid, i.e., neither a list or tuple, nor a string. """ if obj is None or obj == '': width = height = 0 elif isinstance(obj, int): width = height = obj elif isinstance(obj, (list, tuple, np.ndarray)): numbers = tuple(obj) if len(numbers) == 0: width = height = 0 elif len(numbers) == 1: width = height = numbers[0] elif len(numbers) == 2: width = numbers[0] height = numbers[1] else: raise ValueError(f'At most two elements for image size.') elif isinstance(obj, str): splits = obj.replace(' ', '').split(',') numbers = tuple(map(int, splits)) if len(numbers) == 0: width = height = 0 elif len(numbers) == 1: width = height = numbers[0] elif len(numbers) == 2: width = numbers[0] height = numbers[1] else: raise ValueError(f'At most two elements for image size.') else: raise ValueError(f'Invalid type of input: {type(obj)}!') return (max(0, width), max(0, height)) def fuse_images(images, image_size=None, row=0, col=0, is_row_major=True, is_portrait=False, row_spacing=0, col_spacing=0, border_left=0, border_right=0, border_top=0, border_bottom=0, black_background=True): """Fuses a collection of images into an entire image. Args: images: A collection of images to fuse. Should be with shape [num, height, width, channels]. image_size: This field is used to resize the image before fusion. `0` disables resizing. (default: None) row: Number of rows used for image fusion. If not set, this field will be automatically assigned based on `col` and total number of images. (default: None) col: Number of columns used for image fusion. If not set, this field will be automatically assigned based on `row` and total number of images. (default: None) is_row_major: Whether the input images should be arranged row-major or column-major. (default: True) is_portrait: Only active when both `row` and `col` should be assigned automatically. (default: False) row_spacing: Space between rows. (default: 0) col_spacing: Space between columns. (default: 0) border_left: Width of left border. (default: 0) border_right: Width of right border. (default: 0) border_top: Width of top border. (default: 0) border_bottom: Width of bottom border. (default: 0) Returns: The fused image. Raises: ValueError: If the input `images` is not with shape [num, height, width, width]. """ if images is None: return images if images.ndim != 4: raise ValueError(f'Input `images` should be with shape [num, height, ' f'width, channels], but {images.shape} is received!') num, image_height, image_width, channels = images.shape width, height = parse_image_size(image_size) height = height or image_height width = width or image_width row, col = get_grid_shape(num, row=row, col=col, is_portrait=is_portrait) fused_height = ( height * row + row_spacing * (row - 1) + border_top + border_bottom) fused_width = ( width * col + col_spacing * (col - 1) + border_left + border_right) fused_image = get_blank_image( fused_height, fused_width, channels=channels, is_black=black_background) images = images.reshape(row, col, image_height, image_width, channels) if not is_row_major: images = images.transpose(1, 0, 2, 3, 4) for i in range(row): y = border_top + i * (height + row_spacing) for j in range(col): x = border_left + j * (width + col_spacing) if height != image_height or width != image_width: image = cv2.resize(images[i, j], (width, height)) else: image = images[i, j] fused_image[y:y + height, x:x + width] = image return fused_image def get_sortable_html_header(column_name_list, sort_by_ascending=False): """Gets header for sortable html page. Basically, the html page contains a sortable table, where user can sort the rows by a particular column by clicking the column head. Example: column_name_list = [name_1, name_2, name_3] header = get_sortable_html_header(column_name_list) footer = get_sortable_html_footer() sortable_table = ... html_page = header + sortable_table + footer Args: column_name_list: List of column header names. sort_by_ascending: Default sorting order. If set as `True`, the html page will be sorted by ascending order when the header is clicked for the first time. Returns: A string, which represents for the header for a sortable html page. """ header = '\n'.join([ '', '', '', '', '', '', '', '', '', '', '', '', '', '']) for idx, name in enumerate(column_name_list): header += f' \n' header += '\n' header += '\n' header += '\n' return header def get_sortable_html_footer(): """Gets footer for sortable html page. Check function `get_sortable_html_header()` for more details. """ return '\n
{name}
\n\n\n\n' def encode_image_to_html_str(image, image_size=None): """Encodes an image to html language. NOTE: Input image is always assumed to be with `RGB` channel order. Args: image: The input image to encode. Should be with `RGB` channel order. image_size: This field is used to resize the image before encoding. `0` disables resizing. (default: None) Returns: A string which represents the encoded image. """ if image is None: return '' assert image.ndim == 3 and image.shape[2] in [1, 3] # Change channel order to `BGR`, which is opencv-friendly. image = image[:, :, ::-1] # Resize the image if needed. width, height = parse_image_size(image_size) if height or width: height = height or image.shape[0] width = width or image.shape[1] image = cv2.resize(image, (width, height)) # Encode the image to html-format string. encoded_image = cv2.imencode('.jpg', image)[1].tostring() encoded_image_base64 = base64.b64encode(encoded_image).decode('utf-8') html_str = f'' return html_str def decode_html_str_to_image(html_str, image_size=None): """Decodes image from html. Args: html_str: Image string parsed from html. image_size: This field is used to resize the image after decoding. `0` disables resizing. (default: None) Returns: An image with `RGB` channel order. """ if not html_str: return None assert isinstance(html_str, str) image_str = html_str.split(',')[-1] encoded_image = base64.b64decode(image_str) encoded_image_numpy = np.frombuffer(encoded_image, dtype=np.uint8) image = cv2.imdecode(encoded_image_numpy, flags=cv2.IMREAD_COLOR) # Resize the image if needed. width, height = parse_image_size(image_size) if height or width: height = height or image.shape[0] width = width or image.shape[1] image = cv2.resize(image, (width, height)) return image[:, :, ::-1] class HtmlPageVisualizer(object): """Defines the html page visualizer. This class can be used to visualize image results as html page. Basically, it is based on an html-format sorted table with helper functions `get_sortable_html_header()`, `get_sortable_html_footer()`, and `encode_image_to_html_str()`. To simplify the usage, specifying the following fields are enough to create a visualization page: (1) num_rows: Number of rows of the table (header-row exclusive). (2) num_cols: Number of columns of the table. (3) header contents (optional): Title of each column. NOTE: `grid_size` can be used to assign `num_rows` and `num_cols` automatically. Example: html = HtmlPageVisualizer(num_rows, num_cols) html.set_headers([...]) for i in range(num_rows): for j in range(num_cols): html.set_cell(i, j, text=..., image=..., highlight=False) html.save('visualize.html') """ def __init__(self, num_rows=0, num_cols=0, grid_size=0, is_portrait=True, viz_size=None): if grid_size > 0: num_rows, num_cols = get_grid_shape( grid_size, row=num_rows, col=num_cols, is_portrait=is_portrait) assert num_rows > 0 and num_cols > 0 self.num_rows = num_rows self.num_cols = num_cols self.viz_size = parse_image_size(viz_size) self.headers = ['' for _ in range(self.num_cols)] self.cells = [[{ 'text': '', 'image': '', 'highlight': False, } for _ in range(self.num_cols)] for _ in range(self.num_rows)] def set_header(self, col_idx, content): """Sets the content of a particular header by column index.""" self.headers[col_idx] = content def set_headers(self, contents): """Sets the contents of all headers.""" if isinstance(contents, str): contents = [contents] assert isinstance(contents, (list, tuple)) assert len(contents) == self.num_cols for col_idx, content in enumerate(contents): self.set_header(col_idx, content) def set_cell(self, row_idx, col_idx, text='', image=None, highlight=False): """Sets the content of a particular cell. Basically, a cell contains some text as well as an image. Both text and image can be empty. Args: row_idx: Row index of the cell to edit. col_idx: Column index of the cell to edit. text: Text to add into the target cell. (default: None) image: Image to show in the target cell. Should be with `RGB` channel order. (default: None) highlight: Whether to highlight this cell. (default: False) """ self.cells[row_idx][col_idx]['text'] = text self.cells[row_idx][col_idx]['image'] = encode_image_to_html_str( image, self.viz_size) self.cells[row_idx][col_idx]['highlight'] = bool(highlight) def save(self, save_path): """Saves the html page.""" html = '' for i in range(self.num_rows): html += f'\n' for j in range(self.num_cols): text = self.cells[i][j]['text'] image = self.cells[i][j]['image'] if self.cells[i][j]['highlight']: color = ' bgcolor="#FF8888"' else: color = '' if text: html += f' {text}

{image}\n' else: html += f' {image}\n' html += f'\n' header = get_sortable_html_header(self.headers) footer = get_sortable_html_footer() with open(save_path, 'w') as f: f.write(header + html + footer) class HtmlPageReader(object): """Defines the html page reader. This class can be used to parse results from the visualization page generated by `HtmlPageVisualizer`. Example: html = HtmlPageReader(html_path) for j in range(html.num_cols): header = html.get_header(j) for i in range(html.num_rows): for j in range(html.num_cols): text = html.get_text(i, j) image = html.get_image(i, j, image_size=None) """ def __init__(self, html_path): """Initializes by loading the content from file.""" self.html_path = html_path if not os.path.isfile(html_path): raise ValueError(f'File `{html_path}` does not exist!') # Load content. with open(html_path, 'r') as f: self.html = BeautifulSoup(f, 'html.parser') # Parse headers. thead = self.html.find('thead') headers = thead.findAll('th') self.headers = [] for header in headers: self.headers.append(header.text) self.num_cols = len(self.headers) # Parse cells. tbody = self.html.find('tbody') rows = tbody.findAll('tr') self.cells = [] for row in rows: cells = row.findAll('td') self.cells.append([]) for cell in cells: self.cells[-1].append({ 'text': cell.text, 'image': cell.find('img')['src'], }) assert len(self.cells[-1]) == self.num_cols self.num_rows = len(self.cells) def get_header(self, j): """Gets header for a particular column.""" return self.headers[j] def get_text(self, i, j): """Gets text from a particular cell.""" return self.cells[i][j]['text'] def get_image(self, i, j, image_size=None): """Gets image from a particular cell.""" return decode_html_str_to_image(self.cells[i][j]['image'], image_size) class VideoReader(object): """Defines the video reader. This class can be used to read frames from a given video. """ def __init__(self, path): """Initializes the video reader by loading the video from disk.""" if not os.path.isfile(path): raise ValueError(f'Video `{path}` does not exist!') self.path = path self.video = cv2.VideoCapture(path) assert self.video.isOpened() self.position = 0 self.length = int(self.video.get(cv2.CAP_PROP_FRAME_COUNT)) self.frame_height = int(self.video.get(cv2.CAP_PROP_FRAME_HEIGHT)) self.frame_width = int(self.video.get(cv2.CAP_PROP_FRAME_WIDTH)) self.fps = self.video.get(cv2.CAP_PROP_FPS) def __del__(self): """Releases the opened video.""" self.video.release() def read(self, position=None): """Reads a certain frame. NOTE: The returned frame is assumed to be with `RGB` channel order. Args: position: Optional. If set, the reader will read frames from the exact position. Otherwise, the reader will read next frames. (default: None) """ if position is not None and position < self.length: self.video.set(cv2.CAP_PROP_POS_FRAMES, position) self.position = position success, frame = self.video.read() self.position = self.position + 1 return frame[:, :, ::-1] if success else None class VideoWriter(object): """Defines the video writer. This class can be used to create a video. NOTE: `.avi` and `DIVX` is the most recommended codec format since it does not rely on other dependencies. """ def __init__(self, path, frame_height, frame_width, fps=24, codec='DIVX'): """Creates the video writer.""" self.path = path self.frame_height = frame_height self.frame_width = frame_width self.fps = fps self.codec = codec self.video = cv2.VideoWriter(filename=path, fourcc=cv2.VideoWriter_fourcc(*codec), fps=fps, frameSize=(frame_width, frame_height)) def __del__(self): """Releases the opened video.""" self.video.release() def write(self, frame): """Writes a target frame. NOTE: The input frame is assumed to be with `RGB` channel order. """ self.video.write(frame[:, :, ::-1])