3dtest / projects /CENet /configs /cenet-64x512_4xb4_semantickitti.py
giantmonkeyTC
mm2
c2ca15f
raw
history blame
9.8 kB
_base_ = ['../../../configs/_base_/default_runtime.py']
custom_imports = dict(
imports=['projects.CENet.cenet'], allow_failed_imports=False)
# For SemanticKitti we usually do 19-class segmentation.
# For labels_map we follow the uniform format of MMDetection & MMSegmentation
# i.e. we consider the unlabeled class as the last one, which is different
# from the original implementation of some methods e.g. Cylinder3D.
dataset_type = 'SemanticKittiDataset'
data_root = 'data/semantickitti/'
class_names = [
'car', 'bicycle', 'motorcycle', 'truck', 'bus', 'person', 'bicyclist',
'motorcyclist', 'road', 'parking', 'sidewalk', 'other-ground', 'building',
'fence', 'vegetation', 'trunck', 'terrian', 'pole', 'traffic-sign'
]
labels_map = {
0: 19, # "unlabeled"
1: 19, # "outlier" mapped to "unlabeled" --------------mapped
10: 0, # "car"
11: 1, # "bicycle"
13: 4, # "bus" mapped to "other-vehicle" --------------mapped
15: 2, # "motorcycle"
16: 4, # "on-rails" mapped to "other-vehicle" ---------mapped
18: 3, # "truck"
20: 4, # "other-vehicle"
30: 5, # "person"
31: 6, # "bicyclist"
32: 7, # "motorcyclist"
40: 8, # "road"
44: 9, # "parking"
48: 10, # "sidewalk"
49: 11, # "other-ground"
50: 12, # "building"
51: 13, # "fence"
52: 19, # "other-structure" mapped to "unlabeled" ------mapped
60: 8, # "lane-marking" to "road" ---------------------mapped
70: 14, # "vegetation"
71: 15, # "trunk"
72: 16, # "terrain"
80: 17, # "pole"
81: 18, # "traffic-sign"
99: 19, # "other-object" to "unlabeled" ----------------mapped
252: 0, # "moving-car" to "car" ------------------------mapped
253: 6, # "moving-bicyclist" to "bicyclist" ------------mapped
254: 5, # "moving-person" to "person" ------------------mapped
255: 7, # "moving-motorcyclist" to "motorcyclist" ------mapped
256: 4, # "moving-on-rails" mapped to "other-vehic------mapped
257: 4, # "moving-bus" mapped to "other-vehicle" -------mapped
258: 3, # "moving-truck" to "truck" --------------------mapped
259: 4 # "moving-other"-vehicle to "other-vehicle"-----mapped
}
metainfo = dict(
classes=class_names, seg_label_mapping=labels_map, max_label=259)
input_modality = dict(use_lidar=True, use_camera=False)
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/semantickitti/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti',
backend_args=backend_args),
dict(type='PointSegClassMapping'),
dict(type='PointSample', num_points=0.9),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-3.1415929, 3.1415929],
scale_ratio_range=[0.95, 1.05],
translation_std=[0.1, 0.1, 0.1],
),
dict(
type='SemkittiRangeView',
H=64,
W=512,
fov_up=3.0,
fov_down=-25.0,
means=(11.71279, -0.1023471, 0.4952, -1.0545, 0.2877),
stds=(10.24, 12.295865, 9.4287, 0.8643, 0.1450),
ignore_index=19),
dict(type='Pack3DDetInputs', keys=['img', 'gt_semantic_seg'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti',
backend_args=backend_args),
dict(type='PointSegClassMapping'),
dict(
type='SemkittiRangeView',
H=64,
W=512,
fov_up=3.0,
fov_down=-25.0,
means=(11.71279, -0.1023471, 0.4952, -1.0545, 0.2877),
stds=(10.24, 12.295865, 9.4287, 0.8643, 0.1450),
ignore_index=19),
dict(
type='Pack3DDetInputs',
keys=['img'],
meta_keys=('proj_x', 'proj_y', 'proj_range', 'unproj_range'))
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='semantickitti_infos_train.pkl',
pipeline=train_pipeline,
metainfo=metainfo,
modality=input_modality,
ignore_index=19,
backend_args=backend_args))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='semantickitti_infos_val.pkl',
pipeline=test_pipeline,
metainfo=metainfo,
modality=input_modality,
ignore_index=19,
test_mode=True,
backend_args=backend_args))
test_dataloader = val_dataloader
val_evaluator = dict(type='SegMetric')
test_evaluator = val_evaluator
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
model = dict(
type='RangeImageSegmentor',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
backbone=dict(
type='CENet',
in_channels=5,
stem_channels=128,
num_stages=4,
stage_blocks=(3, 4, 6, 3),
out_channels=(128, 128, 128, 128),
fuse_channels=(256, 128),
strides=(1, 2, 2, 2),
dilations=(1, 1, 1, 1),
act_cfg=dict(type='HSwish', inplace=True)),
decode_head=dict(
type='RangeImageHead',
channels=128,
num_classes=20,
dropout_ratio=0,
loss_ce=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
class_weight=None,
loss_weight=1.0),
loss_lovasz=dict(type='LovaszLoss', loss_weight=1.5, reduction='none'),
loss_boundary=dict(type='BoundaryLoss', loss_weight=1.0),
conv_seg_kernel_size=1,
ignore_index=19),
auxiliary_head=[
dict(
type='RangeImageHead',
channels=128,
num_classes=20,
dropout_ratio=0,
loss_ce=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
class_weight=None,
loss_weight=1.0),
loss_lovasz=dict(
type='LovaszLoss', loss_weight=1.5, reduction='none'),
loss_boundary=dict(type='BoundaryLoss', loss_weight=1.0),
conv_seg_kernel_size=1,
ignore_index=19,
indices=2),
dict(
type='RangeImageHead',
channels=128,
num_classes=20,
dropout_ratio=0,
loss_ce=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
class_weight=None,
loss_weight=1.0),
loss_lovasz=dict(
type='LovaszLoss', loss_weight=1.5, reduction='none'),
loss_boundary=dict(type='BoundaryLoss', loss_weight=1.0),
conv_seg_kernel_size=1,
ignore_index=19,
indices=3),
dict(
type='RangeImageHead',
channels=128,
num_classes=20,
dropout_ratio=0,
loss_ce=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
class_weight=None,
loss_weight=1.0),
loss_lovasz=dict(
type='LovaszLoss', loss_weight=1.5, reduction='none'),
loss_boundary=dict(type='BoundaryLoss', loss_weight=1.0),
conv_seg_kernel_size=1,
ignore_index=19,
indices=4)
],
train_cfg=None,
test_cfg=dict(use_knn=True, knn=7, search=7, sigma=1.0, cutoff=2.0))
# optimizer
# This schedule is mainly used on Semantickitti dataset in segmentation task
optim_wrapper = dict(
type='AmpOptimWrapper',
loss_scale='dynamic',
optimizer=dict(
type='AdamW',
lr=0.04,
betas=(0.9, 0.999),
weight_decay=(0.01),
eps=0.000005))
param_scheduler = [
dict(
type='OneCycleLR',
total_steps=50,
by_epoch=True,
eta_max=0.0025,
pct_start=0.2,
div_factor=25.0,
final_div_factor=100.0,
convert_to_iter_based=True)
]
# runtime settings
train_cfg = dict(by_epoch=True, max_epochs=50, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (4 GPUs) x (4 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=16)
default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=1))