# Copyright (c) OpenMMLab. All rights reserved. """This script helps to convert labelme-style dataset to the coco format. Usage: $ python labelme2coco.py \ --img-dir /path/to/images \ --labels-dir /path/to/labels \ --out /path/to/coco_instances.json \ [--class-id-txt /path/to/class_with_id.txt] Note: Labels dir file structure: . └── PATH_TO_LABELS ├── image1.json ├── image2.json └── ... Images dir file structure: . └── PATH_TO_IMAGES ├── image1.jpg ├── image2.png └── ... If user set `--class-id-txt` then will use it in `categories` field, if not set, then will generate auto base on the all labelme label files to `class_with_id.json`. class_with_id.txt example, each line is "id class_name": ```text 1 cat 2 dog 3 bicycle 4 motorcycle ``` """ import argparse import json from pathlib import Path from typing import Optional import numpy as np from mmengine import track_iter_progress from mmyolo.utils.misc import IMG_EXTENSIONS def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--img-dir', type=str, help='Dataset image directory') parser.add_argument( '--labels-dir', type=str, help='Dataset labels directory') parser.add_argument('--out', type=str, help='COCO label json output path') parser.add_argument( '--class-id-txt', default=None, type=str, help='All class id txt path') args = parser.parse_args() return args def format_coco_annotations(points: list, image_id: int, annotations_id: int, category_id: int) -> dict: """Gen COCO annotations format label from labelme format label. Args: points (list): Coordinates of four vertices of rectangle bbox. image_id (int): Image id. annotations_id (int): Annotations id. category_id (int): Image dir path. Return: annotation_info (dict): COCO annotation data. """ annotation_info = dict() annotation_info['iscrowd'] = 0 annotation_info['category_id'] = category_id annotation_info['id'] = annotations_id annotation_info['image_id'] = image_id # bbox is [x1, y1, w, h] annotation_info['bbox'] = [ points[0][0], points[0][1], points[1][0] - points[0][0], points[1][1] - points[0][1] ] annotation_info['area'] = annotation_info['bbox'][2] * annotation_info[ 'bbox'][3] # bbox w * h segmentation_points = np.asarray(points).copy() segmentation_points[1, :] = np.asarray(points)[2, :] segmentation_points[2, :] = np.asarray(points)[1, :] annotation_info['segmentation'] = [list(segmentation_points.flatten())] return annotation_info def parse_labelme_to_coco( image_dir: str, labels_root: str, all_classes_id: Optional[dict] = None) -> (dict, dict): """Gen COCO json format label from labelme format label. Args: image_dir (str): Image dir path. labels_root (str): Image label root path. all_classes_id (Optional[dict]): All class with id. Default None. Return: coco_json (dict): COCO json data. category_to_id (dict): category id and name. COCO json example: { "images": [ { "height": 3000, "width": 4000, "id": 1, "file_name": "IMG_20210627_225110.jpg" }, ... ], "categories": [ { "id": 1, "name": "cat" }, ... ], "annotations": [ { "iscrowd": 0, "category_id": 1, "id": 1, "image_id": 1, "bbox": [ 1183.7313232421875, 1230.0509033203125, 1270.9998779296875, 927.0848388671875 ], "area": 1178324.7170306593, "segmentation": [ [ 1183.7313232421875, 1230.0509033203125, 1183.7313232421875, 2157.1357421875, 2454.731201171875, 2157.1357421875, 2454.731201171875, 1230.0509033203125 ] ] }, ... ] } """ # init coco json field coco_json = {'images': [], 'categories': [], 'annotations': []} image_id = 0 annotations_id = 0 if all_classes_id is None: category_to_id = dict() categories_labels = [] else: category_to_id = all_classes_id categories_labels = list(all_classes_id.keys()) # add class_ids and class_names to the categories list in coco_json for class_name, class_id in category_to_id.items(): coco_json['categories'].append({ 'id': class_id, 'name': class_name }) # filter incorrect image file img_file_list = [ img_file for img_file in Path(image_dir).iterdir() if img_file.suffix.lower() in IMG_EXTENSIONS ] for img_file in track_iter_progress(img_file_list): # get label file according to the image file name label_path = Path(labels_root).joinpath( img_file.stem).with_suffix('.json') if not label_path.exists(): print(f'Can not find label file: {label_path}, skip...') continue # load labelme label with open(label_path, encoding='utf-8') as f: labelme_data = json.load(f) image_id = image_id + 1 # coco id begin from 1 # update coco 'images' field coco_json['images'].append({ 'height': labelme_data['imageHeight'], 'width': labelme_data['imageWidth'], 'id': image_id, 'file_name': Path(labelme_data['imagePath']).name }) for label_shapes in labelme_data['shapes']: # Update coco 'categories' field class_name = label_shapes['label'] if (all_classes_id is None) and (class_name not in categories_labels): # only update when not been added before coco_json['categories'].append({ 'id': len(categories_labels) + 1, # categories id start with 1 'name': class_name }) categories_labels.append(class_name) category_to_id[class_name] = len(categories_labels) elif (all_classes_id is not None) and (class_name not in categories_labels): # check class name raise ValueError(f'Got unexpected class name {class_name}, ' 'which is not in your `--class-id-txt`.') # get shape type and convert it to coco format shape_type = label_shapes['shape_type'] if shape_type != 'rectangle': print(f'not support `{shape_type}` yet, skip...') continue annotations_id = annotations_id + 1 # convert point from [xmin, ymin, xmax, ymax] to [x1, y1, w, h] (x1, y1), (x2, y2) = label_shapes['points'] x1, x2 = sorted([x1, x2]) # xmin, xmax y1, y2 = sorted([y1, y2]) # ymin, ymax points = [[x1, y1], [x2, y2], [x1, y2], [x2, y1]] coco_annotations = format_coco_annotations( points, image_id, annotations_id, category_to_id[class_name]) coco_json['annotations'].append(coco_annotations) print(f'Total image = {image_id}') print(f'Total annotations = {annotations_id}') print(f'Number of categories = {len(categories_labels)}, ' f'which is {categories_labels}') return coco_json, category_to_id def convert_labelme_to_coco(image_dir: str, labels_dir: str, out_path: str, class_id_txt: Optional[str] = None): """Convert labelme format label to COCO json format label. Args: image_dir (str): Image dir path. labels_dir (str): Image label path. out_path (str): COCO json file save path. class_id_txt (Optional[str]): All class id txt file path. Default None. """ assert Path(out_path).suffix == '.json' if class_id_txt is not None: assert Path(class_id_txt).suffix == '.txt' all_classes_id = dict() with open(class_id_txt, encoding='utf-8') as f: txt_lines = f.read().splitlines() assert len(txt_lines) > 0 for txt_line in txt_lines: class_info = txt_line.split(' ') if len(class_info) != 2: raise ValueError('Error parse "class_id_txt" file ' f'{class_id_txt}, please check if some of ' 'the class names is blank, like "1 " -> ' '"1 blank", or class name has space between' ' words, like "1 Big house" -> "1 ' 'Big-house".') v, k = class_info all_classes_id.update({k: int(v)}) else: all_classes_id = None # convert to coco json coco_json_data, category_to_id = parse_labelme_to_coco( image_dir, labels_dir, all_classes_id) # save json result Path(out_path).parent.mkdir(exist_ok=True, parents=True) print(f'Saving json to {out_path}') json.dump(coco_json_data, open(out_path, 'w'), indent=2) if class_id_txt is None: category_to_id_path = Path(out_path).with_name('class_with_id.txt') print(f'Saving class id txt to {category_to_id_path}') with open(category_to_id_path, 'w', encoding='utf-8') as f: for k, v in category_to_id.items(): f.write(f'{v} {k}\n') else: print('Not Saving new class id txt, user should using ' f'{class_id_txt} for training config') def main(): args = parse_args() convert_labelme_to_coco(args.img_dir, args.labels_dir, args.out, args.class_id_txt) print('All done!') if __name__ == '__main__': main()