# dataset settings # D3 in the config name means the whole dataset is divided into 3 folds # We only use one fold for efficient experiments dataset_type = 'WaymoDataset' data_root = 'data/waymo/kitti_format/' class_names = ['Pedestrian', 'Cyclist', 'Car'] metainfo = dict(classes=class_names) input_modality = dict(use_lidar=False, use_camera=True) # Example to use different file client # Method 1: simply set the data root and let the file I/O module # automatically infer from prefix (not support LMDB and Memcache yet) # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/' # Method 2: Use backend_args, file_client_args in versions before 1.1.0 # backend_args = dict( # backend='petrel', # path_mapping=dict({ # './data/': 's3://openmmlab/datasets/detection3d/', # 'data/': 's3://openmmlab/datasets/detection3d/' # })) backend_args = None train_pipeline = [ dict(type='LoadImageFromFileMono3D', backend_args=backend_args), dict( type='LoadAnnotations3D', with_bbox=True, with_label=True, with_attr_label=False, with_bbox_3d=True, with_label_3d=True, with_bbox_depth=True), # base shape (1248, 832), scale (0.95, 1.05) dict( type='RandomResize3D', scale=(1248, 832), # ratio_range=(1., 1.), ratio_range=(0.95, 1.05), interpolation='nearest', keep_ratio=True, ), dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), dict( type='Pack3DDetInputs', keys=[ 'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d', 'gt_labels_3d', 'centers_2d', 'depths' ]), ] test_pipeline = [ dict(type='LoadImageFromFileMono3D', backend_args=backend_args), dict( type='Resize3D', scale_factor=0.65, interpolation='nearest', keep_ratio=True), dict( type='Pack3DDetInputs', keys=['img'], meta_keys=[ 'box_type_3d', 'img_shape', 'cam2img', 'scale_factor', 'sample_idx', 'context_name', 'timestamp', 'lidar2cam' ]), ] # construct a pipeline for data and gt loading in show function # please keep its loading function consistent with test_pipeline (e.g. client) eval_pipeline = [ dict(type='LoadImageFromFileMono3D', backend_args=backend_args), dict( type='Resize3D', scale_factor=0.65, interpolation='nearest', keep_ratio=True), dict( type='Pack3DDetInputs', keys=['img'], meta_keys=[ 'box_type_3d', 'img_shape', 'cam2img', 'scale_factor', 'sample_idx', 'context_name', 'timestamp', 'lidar2cam' ]), ] train_dataloader = dict( batch_size=3, num_workers=3, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, data_root=data_root, ann_file='waymo_infos_train.pkl', data_prefix=dict( pts='training/velodyne', CAM_FRONT='training/image_0', CAM_FRONT_LEFT='training/image_1', CAM_FRONT_RIGHT='training/image_2', CAM_SIDE_LEFT='training/image_3', CAM_SIDE_RIGHT='training/image_4'), pipeline=train_pipeline, modality=input_modality, test_mode=False, metainfo=metainfo, cam_sync_instances=True, # we use box_type_3d='LiDAR' in kitti and nuscenes dataset # and box_type_3d='Depth' in sunrgbd and scannet dataset. box_type_3d='Camera', load_type='mv_image_based', # load one frame every three frames load_interval=3, backend_args=backend_args)) val_dataloader = dict( batch_size=1, num_workers=0, persistent_workers=False, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), dataset=dict( type=dataset_type, data_root=data_root, data_prefix=dict( pts='training/velodyne', CAM_FRONT='training/image_0', CAM_FRONT_LEFT='training/image_1', CAM_FRONT_RIGHT='training/image_2', CAM_SIDE_LEFT='training/image_3', CAM_SIDE_RIGHT='training/image_4'), ann_file='waymo_infos_val.pkl', pipeline=eval_pipeline, modality=input_modality, test_mode=True, metainfo=metainfo, cam_sync_instances=True, # we use box_type_3d='LiDAR' in kitti and nuscenes dataset # and box_type_3d='Depth' in sunrgbd and scannet dataset. box_type_3d='Camera', load_type='mv_image_based', # load_eval_anns=False, backend_args=backend_args)) test_dataloader = dict( batch_size=1, num_workers=0, persistent_workers=False, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), dataset=dict( type=dataset_type, data_root=data_root, data_prefix=dict( pts='training/velodyne', CAM_FRONT='training/image_0', CAM_FRONT_LEFT='training/image_1', CAM_FRONT_RIGHT='training/image_2', CAM_SIDE_LEFT='training/image_3', CAM_SIDE_RIGHT='training/image_4'), ann_file='waymo_infos_val.pkl', pipeline=eval_pipeline, modality=input_modality, test_mode=True, metainfo=metainfo, cam_sync_instances=True, # we use box_type_3d='LiDAR' in kitti and nuscenes dataset # and box_type_3d='Depth' in sunrgbd and scannet dataset. box_type_3d='Camera', load_type='mv_image_based', load_eval_anns=False, backend_args=backend_args)) val_evaluator = dict( type='WaymoMetric', waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin', metric='LET_mAP', load_type='mv_image_based', result_prefix='./pgd_mv_pred', nms_cfg=dict( use_rotate_nms=True, nms_across_levels=False, nms_pre=500, nms_thr=0.05, score_thr=0.001, min_bbox_size=0, max_per_frame=100)) test_evaluator = val_evaluator vis_backends = [dict(type='LocalVisBackend')] visualizer = dict( type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')