Spaces:
Running
Running
# Lint as: python2, python3 | |
# Copyright 2018 The TensorFlow Authors All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
"""Provides data from semantic segmentation datasets. | |
The SegmentationDataset class provides both images and annotations (semantic | |
segmentation and/or instance segmentation) for TensorFlow. Currently, we | |
support the following datasets: | |
1. PASCAL VOC 2012 (http://host.robots.ox.ac.uk/pascal/VOC/voc2012/). | |
PASCAL VOC 2012 semantic segmentation dataset annotates 20 foreground objects | |
(e.g., bike, person, and so on) and leaves all the other semantic classes as | |
one background class. The dataset contains 1464, 1449, and 1456 annotated | |
images for the training, validation and test respectively. | |
2. Cityscapes dataset (https://www.cityscapes-dataset.com) | |
The Cityscapes dataset contains 19 semantic labels (such as road, person, car, | |
and so on) for urban street scenes. | |
3. ADE20K dataset (http://groups.csail.mit.edu/vision/datasets/ADE20K) | |
The ADE20K dataset contains 150 semantic labels both urban street scenes and | |
indoor scenes. | |
References: | |
M. Everingham, S. M. A. Eslami, L. V. Gool, C. K. I. Williams, J. Winn, | |
and A. Zisserman, The pascal visual object classes challenge a retrospective. | |
IJCV, 2014. | |
M. Cordts, M. Omran, S. Ramos, T. Rehfeld, M. Enzweiler, R. Benenson, | |
U. Franke, S. Roth, and B. Schiele, "The cityscapes dataset for semantic urban | |
scene understanding," In Proc. of CVPR, 2016. | |
B. Zhou, H. Zhao, X. Puig, S. Fidler, A. Barriuso, A. Torralba, "Scene Parsing | |
through ADE20K dataset", In Proc. of CVPR, 2017. | |
""" | |
import collections | |
import os.path | |
import tensorflow as tf | |
from tensorflow.contrib import slim as contrib_slim | |
slim = contrib_slim | |
dataset = slim.dataset | |
tfexample_decoder = slim.tfexample_decoder | |
_ITEMS_TO_DESCRIPTIONS = { | |
'image': 'A color image of varying height and width.', | |
'labels_class': ('A semantic segmentation label whose size matches image.' | |
'Its values range from 0 (background) to num_classes.'), | |
} | |
# Named tuple to describe the dataset properties. | |
DatasetDescriptor = collections.namedtuple( | |
'DatasetDescriptor', | |
['splits_to_sizes', # Splits of the dataset into training, val, and test. | |
'num_classes', # Number of semantic classes, including the background | |
# class (if exists). For example, there are 20 | |
# foreground classes + 1 background class in the PASCAL | |
# VOC 2012 dataset. Thus, we set num_classes=21. | |
'ignore_label', # Ignore label value. | |
] | |
) | |
_CITYSCAPES_INFORMATION = DatasetDescriptor( | |
splits_to_sizes={ | |
'train': 2975, | |
'val': 500, | |
}, | |
num_classes=19, | |
ignore_label=255, | |
) | |
_PASCAL_VOC_SEG_INFORMATION = DatasetDescriptor( | |
splits_to_sizes={ | |
'train': 1464, | |
'train_aug': 10582, | |
'trainval': 2913, | |
'val': 1449, | |
}, | |
num_classes=21, | |
ignore_label=255, | |
) | |
# These number (i.e., 'train'/'test') seems to have to be hard coded | |
# You are required to figure it out for your training/testing example. | |
_ADE20K_INFORMATION = DatasetDescriptor( | |
splits_to_sizes={ | |
'train': 20210, # num of samples in images/training | |
'val': 2000, # num of samples in images/validation | |
}, | |
num_classes=151, | |
ignore_label=0, | |
) | |
_DATASETS_INFORMATION = { | |
'cityscapes': _CITYSCAPES_INFORMATION, | |
'pascal_voc_seg': _PASCAL_VOC_SEG_INFORMATION, | |
'ade20k': _ADE20K_INFORMATION, | |
} | |
# Default file pattern of TFRecord of TensorFlow Example. | |
_FILE_PATTERN = '%s-*' | |
def get_cityscapes_dataset_name(): | |
return 'cityscapes' | |
def get_dataset(dataset_name, split_name, dataset_dir): | |
"""Gets an instance of slim Dataset. | |
Args: | |
dataset_name: Dataset name. | |
split_name: A train/val Split name. | |
dataset_dir: The directory of the dataset sources. | |
Returns: | |
An instance of slim Dataset. | |
Raises: | |
ValueError: if the dataset_name or split_name is not recognized. | |
""" | |
if dataset_name not in _DATASETS_INFORMATION: | |
raise ValueError('The specified dataset is not supported yet.') | |
splits_to_sizes = _DATASETS_INFORMATION[dataset_name].splits_to_sizes | |
if split_name not in splits_to_sizes: | |
raise ValueError('data split name %s not recognized' % split_name) | |
# Prepare the variables for different datasets. | |
num_classes = _DATASETS_INFORMATION[dataset_name].num_classes | |
ignore_label = _DATASETS_INFORMATION[dataset_name].ignore_label | |
file_pattern = _FILE_PATTERN | |
file_pattern = os.path.join(dataset_dir, file_pattern % split_name) | |
# Specify how the TF-Examples are decoded. | |
keys_to_features = { | |
'image/encoded': tf.FixedLenFeature( | |
(), tf.string, default_value=''), | |
'image/filename': tf.FixedLenFeature( | |
(), tf.string, default_value=''), | |
'image/format': tf.FixedLenFeature( | |
(), tf.string, default_value='jpeg'), | |
'image/height': tf.FixedLenFeature( | |
(), tf.int64, default_value=0), | |
'image/width': tf.FixedLenFeature( | |
(), tf.int64, default_value=0), | |
'image/segmentation/class/encoded': tf.FixedLenFeature( | |
(), tf.string, default_value=''), | |
'image/segmentation/class/format': tf.FixedLenFeature( | |
(), tf.string, default_value='png'), | |
} | |
items_to_handlers = { | |
'image': tfexample_decoder.Image( | |
image_key='image/encoded', | |
format_key='image/format', | |
channels=3), | |
'image_name': tfexample_decoder.Tensor('image/filename'), | |
'height': tfexample_decoder.Tensor('image/height'), | |
'width': tfexample_decoder.Tensor('image/width'), | |
'labels_class': tfexample_decoder.Image( | |
image_key='image/segmentation/class/encoded', | |
format_key='image/segmentation/class/format', | |
channels=1), | |
} | |
decoder = tfexample_decoder.TFExampleDecoder( | |
keys_to_features, items_to_handlers) | |
return dataset.Dataset( | |
data_sources=file_pattern, | |
reader=tf.TFRecordReader, | |
decoder=decoder, | |
num_samples=splits_to_sizes[split_name], | |
items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, | |
ignore_label=ignore_label, | |
num_classes=num_classes, | |
name=dataset_name, | |
multi_label=True) | |