# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. import csv import os.path as osp from collections import defaultdict from typing import Dict, List, Optional import numpy as np from mmengine.fileio import get_local_path, load from mmengine.utils import is_abs from mmdet.registry import DATASETS from .base_det_dataset import BaseDetDataset @DATASETS.register_module() class OpenImagesDataset(BaseDetDataset): """Open Images dataset for detection. Args: ann_file (str): Annotation file path. label_file (str): File path of the label description file that maps the classes names in MID format to their short descriptions. meta_file (str): File path to get image metas. hierarchy_file (str): The file path of the class hierarchy. image_level_ann_file (str): Human-verified image level annotation, which is used in evaluation. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. """ METAINFO: dict = dict(dataset_type='oid_v6') def __init__(self, label_file: str, meta_file: str, hierarchy_file: str, image_level_ann_file: Optional[str] = None, **kwargs) -> None: self.label_file = label_file self.meta_file = meta_file self.hierarchy_file = hierarchy_file self.image_level_ann_file = image_level_ann_file super().__init__(**kwargs) def load_data_list(self) -> List[dict]: """Load annotations from an annotation file named as ``self.ann_file`` Returns: List[dict]: A list of annotation. """ classes_names, label_id_mapping = self._parse_label_file( self.label_file) self._metainfo['classes'] = classes_names self.label_id_mapping = label_id_mapping if self.image_level_ann_file is not None: img_level_anns = self._parse_img_level_ann( self.image_level_ann_file) else: img_level_anns = None # OpenImagesMetric can get the relation matrix from the dataset meta relation_matrix = self._get_relation_matrix(self.hierarchy_file) self._metainfo['RELATION_MATRIX'] = relation_matrix data_list = [] with get_local_path( self.ann_file, backend_args=self.backend_args) as local_path: with open(local_path, 'r') as f: reader = csv.reader(f) last_img_id = None instances = [] for i, line in enumerate(reader): if i == 0: continue img_id = line[0] if last_img_id is None: last_img_id = img_id label_id = line[2] assert label_id in self.label_id_mapping label = int(self.label_id_mapping[label_id]) bbox = [ float(line[4]), # xmin float(line[6]), # ymin float(line[5]), # xmax float(line[7]) # ymax ] is_occluded = True if int(line[8]) == 1 else False is_truncated = True if int(line[9]) == 1 else False is_group_of = True if int(line[10]) == 1 else False is_depiction = True if int(line[11]) == 1 else False is_inside = True if int(line[12]) == 1 else False instance = dict( bbox=bbox, bbox_label=label, ignore_flag=0, is_occluded=is_occluded, is_truncated=is_truncated, is_group_of=is_group_of, is_depiction=is_depiction, is_inside=is_inside) last_img_path = osp.join(self.data_prefix['img'], f'{last_img_id}.jpg') if img_id != last_img_id: # switch to a new image, record previous image's data. data_info = dict( img_path=last_img_path, img_id=last_img_id, instances=instances, ) data_list.append(data_info) instances = [] instances.append(instance) last_img_id = img_id data_list.append( dict( img_path=last_img_path, img_id=last_img_id, instances=instances, )) # add image metas to data list img_metas = load( self.meta_file, file_format='pkl', backend_args=self.backend_args) assert len(img_metas) == len(data_list) for i, meta in enumerate(img_metas): img_id = data_list[i]['img_id'] assert f'{img_id}.jpg' == osp.split(meta['filename'])[-1] h, w = meta['ori_shape'][:2] data_list[i]['height'] = h data_list[i]['width'] = w # denormalize bboxes for j in range(len(data_list[i]['instances'])): data_list[i]['instances'][j]['bbox'][0] *= w data_list[i]['instances'][j]['bbox'][2] *= w data_list[i]['instances'][j]['bbox'][1] *= h data_list[i]['instances'][j]['bbox'][3] *= h # add image-level annotation if img_level_anns is not None: img_labels = [] confidences = [] img_ann_list = img_level_anns.get(img_id, []) for ann in img_ann_list: img_labels.append(int(ann['image_level_label'])) confidences.append(float(ann['confidence'])) data_list[i]['image_level_labels'] = np.array( img_labels, dtype=np.int64) data_list[i]['confidences'] = np.array( confidences, dtype=np.float32) return data_list def _parse_label_file(self, label_file: str) -> tuple: """Get classes name and index mapping from cls-label-description file. Args: label_file (str): File path of the label description file that maps the classes names in MID format to their short descriptions. Returns: tuple: Class name of OpenImages. """ index_list = [] classes_names = [] with get_local_path( label_file, backend_args=self.backend_args) as local_path: with open(local_path, 'r') as f: reader = csv.reader(f) for line in reader: # self.cat2label[line[0]] = line[1] classes_names.append(line[1]) index_list.append(line[0]) index_mapping = {index: i for i, index in enumerate(index_list)} return classes_names, index_mapping def _parse_img_level_ann(self, img_level_ann_file: str) -> Dict[str, List[dict]]: """Parse image level annotations from csv style ann_file. Args: img_level_ann_file (str): CSV style image level annotation file path. Returns: Dict[str, List[dict]]: Annotations where item of the defaultdict indicates an image, each of which has (n) dicts. Keys of dicts are: - `image_level_label` (int): Label id. - `confidence` (float): Labels that are human-verified to be present in an image have confidence = 1 (positive labels). Labels that are human-verified to be absent from an image have confidence = 0 (negative labels). Machine-generated labels have fractional confidences, generally >= 0.5. The higher the confidence, the smaller the chance for the label to be a false positive. """ item_lists = defaultdict(list) with get_local_path( img_level_ann_file, backend_args=self.backend_args) as local_path: with open(local_path, 'r') as f: reader = csv.reader(f) for i, line in enumerate(reader): if i == 0: continue img_id = line[0] item_lists[img_id].append( dict( image_level_label=int( self.label_id_mapping[line[2]]), confidence=float(line[3]))) return item_lists def _get_relation_matrix(self, hierarchy_file: str) -> np.ndarray: """Get the matrix of class hierarchy from the hierarchy file. Hierarchy for 600 classes can be found at https://storage.googleapis.com/openimag es/2018_04/bbox_labels_600_hierarchy_visualizer/circle.html. Args: hierarchy_file (str): File path to the hierarchy for classes. Returns: np.ndarray: The matrix of the corresponding relationship between the parent class and the child class, of shape (class_num, class_num). """ # noqa hierarchy = load( hierarchy_file, file_format='json', backend_args=self.backend_args) class_num = len(self._metainfo['classes']) relation_matrix = np.eye(class_num, class_num) relation_matrix = self._convert_hierarchy_tree(hierarchy, relation_matrix) return relation_matrix def _convert_hierarchy_tree(self, hierarchy_map: dict, relation_matrix: np.ndarray, parents: list = [], get_all_parents: bool = True) -> np.ndarray: """Get matrix of the corresponding relationship between the parent class and the child class. Args: hierarchy_map (dict): Including label name and corresponding subcategory. Keys of dicts are: - `LabeName` (str): Name of the label. - `Subcategory` (dict | list): Corresponding subcategory(ies). relation_matrix (ndarray): The matrix of the corresponding relationship between the parent class and the child class, of shape (class_num, class_num). parents (list): Corresponding parent class. get_all_parents (bool): Whether get all parent names. Default: True Returns: ndarray: The matrix of the corresponding relationship between the parent class and the child class, of shape (class_num, class_num). """ if 'Subcategory' in hierarchy_map: for node in hierarchy_map['Subcategory']: if 'LabelName' in node: children_name = node['LabelName'] children_index = self.label_id_mapping[children_name] children = [children_index] else: continue if len(parents) > 0: for parent_index in parents: if get_all_parents: children.append(parent_index) relation_matrix[children_index, parent_index] = 1 relation_matrix = self._convert_hierarchy_tree( node, relation_matrix, parents=children) return relation_matrix def _join_prefix(self): """Join ``self.data_root`` with annotation path.""" super()._join_prefix() if not is_abs(self.label_file) and self.label_file: self.label_file = osp.join(self.data_root, self.label_file) if not is_abs(self.meta_file) and self.meta_file: self.meta_file = osp.join(self.data_root, self.meta_file) if not is_abs(self.hierarchy_file) and self.hierarchy_file: self.hierarchy_file = osp.join(self.data_root, self.hierarchy_file) if self.image_level_ann_file and not is_abs(self.image_level_ann_file): self.image_level_ann_file = osp.join(self.data_root, self.image_level_ann_file) @DATASETS.register_module() class OpenImagesChallengeDataset(OpenImagesDataset): """Open Images Challenge dataset for detection. Args: ann_file (str): Open Images Challenge box annotation in txt format. """ METAINFO: dict = dict(dataset_type='oid_challenge') def __init__(self, ann_file: str, **kwargs) -> None: if not ann_file.endswith('txt'): raise TypeError('The annotation file of Open Images Challenge ' 'should be a txt file.') super().__init__(ann_file=ann_file, **kwargs) def load_data_list(self) -> List[dict]: """Load annotations from an annotation file named as ``self.ann_file`` Returns: List[dict]: A list of annotation. """ classes_names, label_id_mapping = self._parse_label_file( self.label_file) self._metainfo['classes'] = classes_names self.label_id_mapping = label_id_mapping if self.image_level_ann_file is not None: img_level_anns = self._parse_img_level_ann( self.image_level_ann_file) else: img_level_anns = None # OpenImagesMetric can get the relation matrix from the dataset meta relation_matrix = self._get_relation_matrix(self.hierarchy_file) self._metainfo['RELATION_MATRIX'] = relation_matrix data_list = [] with get_local_path( self.ann_file, backend_args=self.backend_args) as local_path: with open(local_path, 'r') as f: lines = f.readlines() i = 0 while i < len(lines): instances = [] filename = lines[i].rstrip() i += 2 img_gt_size = int(lines[i]) i += 1 for j in range(img_gt_size): sp = lines[i + j].split() instances.append( dict( bbox=[ float(sp[1]), float(sp[2]), float(sp[3]), float(sp[4]) ], bbox_label=int(sp[0]) - 1, # labels begin from 1 ignore_flag=0, is_group_ofs=True if int(sp[5]) == 1 else False)) i += img_gt_size data_list.append( dict( img_path=osp.join(self.data_prefix['img'], filename), instances=instances, )) # add image metas to data list img_metas = load( self.meta_file, file_format='pkl', backend_args=self.backend_args) assert len(img_metas) == len(data_list) for i, meta in enumerate(img_metas): img_id = osp.split(data_list[i]['img_path'])[-1][:-4] assert img_id == osp.split(meta['filename'])[-1][:-4] h, w = meta['ori_shape'][:2] data_list[i]['height'] = h data_list[i]['width'] = w data_list[i]['img_id'] = img_id # denormalize bboxes for j in range(len(data_list[i]['instances'])): data_list[i]['instances'][j]['bbox'][0] *= w data_list[i]['instances'][j]['bbox'][2] *= w data_list[i]['instances'][j]['bbox'][1] *= h data_list[i]['instances'][j]['bbox'][3] *= h # add image-level annotation if img_level_anns is not None: img_labels = [] confidences = [] img_ann_list = img_level_anns.get(img_id, []) for ann in img_ann_list: img_labels.append(int(ann['image_level_label'])) confidences.append(float(ann['confidence'])) data_list[i]['image_level_labels'] = np.array( img_labels, dtype=np.int64) data_list[i]['confidences'] = np.array( confidences, dtype=np.float32) return data_list def _parse_label_file(self, label_file: str) -> tuple: """Get classes name and index mapping from cls-label-description file. Args: label_file (str): File path of the label description file that maps the classes names in MID format to their short descriptions. Returns: tuple: Class name of OpenImages. """ label_list = [] id_list = [] index_mapping = {} with get_local_path( label_file, backend_args=self.backend_args) as local_path: with open(local_path, 'r') as f: reader = csv.reader(f) for line in reader: label_name = line[0] label_id = int(line[2]) label_list.append(line[1]) id_list.append(label_id) index_mapping[label_name] = label_id - 1 indexes = np.argsort(id_list) classes_names = [] for index in indexes: classes_names.append(label_list[index]) return classes_names, index_mapping def _parse_img_level_ann(self, image_level_ann_file): """Parse image level annotations from csv style ann_file. Args: image_level_ann_file (str): CSV style image level annotation file path. Returns: defaultdict[list[dict]]: Annotations where item of the defaultdict indicates an image, each of which has (n) dicts. Keys of dicts are: - `image_level_label` (int): of shape 1. - `confidence` (float): of shape 1. """ item_lists = defaultdict(list) with get_local_path( image_level_ann_file, backend_args=self.backend_args) as local_path: with open(local_path, 'r') as f: reader = csv.reader(f) i = -1 for line in reader: i += 1 if i == 0: continue else: img_id = line[0] label_id = line[1] assert label_id in self.label_id_mapping image_level_label = int( self.label_id_mapping[label_id]) confidence = float(line[2]) item_lists[img_id].append( dict( image_level_label=image_level_label, confidence=confidence)) return item_lists def _get_relation_matrix(self, hierarchy_file: str) -> np.ndarray: """Get the matrix of class hierarchy from the hierarchy file. Args: hierarchy_file (str): File path to the hierarchy for classes. Returns: np.ndarray: The matrix of the corresponding relationship between the parent class and the child class, of shape (class_num, class_num). """ with get_local_path( hierarchy_file, backend_args=self.backend_args) as local_path: class_label_tree = np.load(local_path, allow_pickle=True) return class_label_tree[1:, 1:]