giantmonkeyTC commited on
Commit
34d1f8b
·
1 Parent(s): 4c86b49
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .circleci/config.yml +35 -0
  2. .circleci/docker/Dockerfile +13 -0
  3. .circleci/test.yml +199 -0
  4. .dev_scripts/benchmark_full_models.txt +26 -0
  5. .dev_scripts/benchmark_options.py +11 -0
  6. .dev_scripts/benchmark_train_models.txt +13 -0
  7. .dev_scripts/covignore.cfg +6 -0
  8. .dev_scripts/diff_coverage_test.sh +42 -0
  9. .dev_scripts/gather_models.py +229 -0
  10. .dev_scripts/gen_benchmark_script.py +193 -0
  11. .dev_scripts/linter.sh +3 -0
  12. .dev_scripts/test_benchmark.sh +128 -0
  13. .dev_scripts/train_benchmark.sh +128 -0
  14. configs/.DS_Store +0 -0
  15. configs/3dssd/3dssd_4xb4_kitti-3d-car.py +119 -0
  16. configs/3dssd/README.md +45 -0
  17. configs/3dssd/metafile.yml +29 -0
  18. configs/_base_/datasets/kitti-3d-3class.py +167 -0
  19. configs/_base_/datasets/kitti-3d-car.py +165 -0
  20. configs/_base_/datasets/kitti-mono3d.py +100 -0
  21. configs/_base_/datasets/lyft-3d-range100.py +150 -0
  22. configs/_base_/datasets/lyft-3d.py +160 -0
  23. configs/_base_/datasets/nuim-instance.py +70 -0
  24. configs/_base_/datasets/nus-3d.py +169 -0
  25. configs/_base_/datasets/nus-mono3d.py +119 -0
  26. configs/_base_/datasets/s3dis-3d.py +134 -0
  27. configs/_base_/datasets/s3dis-seg.py +169 -0
  28. configs/_base_/datasets/scannet-3d.py +141 -0
  29. configs/_base_/datasets/scannet-seg.py +164 -0
  30. configs/_base_/datasets/semantickitti.py +224 -0
  31. configs/_base_/datasets/sunrgbd-3d.py +126 -0
  32. configs/_base_/datasets/waymoD3-fov-mono3d-3class.py +184 -0
  33. configs/_base_/datasets/waymoD3-mv-mono3d-3class.py +191 -0
  34. configs/_base_/datasets/waymoD5-3d-3class.py +178 -0
  35. configs/_base_/datasets/waymoD5-3d-car.py +173 -0
  36. configs/_base_/datasets/waymoD5-fov-mono3d-3class.py +163 -0
  37. configs/_base_/datasets/waymoD5-mv-mono3d-3class.py +163 -0
  38. configs/_base_/datasets/waymoD5-mv3d-3class.py +178 -0
  39. configs/_base_/default_runtime.py +23 -0
  40. configs/_base_/models/3dssd.py +76 -0
  41. configs/_base_/models/cascade-mask-rcnn_r50_fpn.py +199 -0
  42. configs/_base_/models/centerpoint_pillar02_second_secfpn_nus.py +89 -0
  43. configs/_base_/models/centerpoint_voxel01_second_secfpn_nus.py +89 -0
  44. configs/_base_/models/cylinder3d.py +41 -0
  45. configs/_base_/models/dgcnn.py +29 -0
  46. configs/_base_/models/fcaf3d.py +20 -0
  47. configs/_base_/models/fcos3d.py +86 -0
  48. configs/_base_/models/groupfree3d.py +75 -0
  49. configs/_base_/models/h3dnet.py +351 -0
  50. configs/_base_/models/imvotenet.py +118 -0
.circleci/config.yml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 2.1
2
+
3
+ # this allows you to use CircleCI's dynamic configuration feature
4
+ setup: true
5
+
6
+ # the path-filtering orb is required to continue a pipeline based on
7
+ # the path of an updated fileset
8
+ orbs:
9
+ path-filtering: circleci/path-filtering@0.1.2
10
+
11
+ workflows:
12
+ # the always-run workflow is always triggered, regardless of the pipeline parameters.
13
+ always-run:
14
+ jobs:
15
+ # the path-filtering/filter job determines which pipeline
16
+ # parameters to update.
17
+ - path-filtering/filter:
18
+ name: check-updated-files
19
+ # 3-column, whitespace-delimited mapping. One mapping per
20
+ # line:
21
+ # <regex path-to-test> <parameter-to-set> <value-of-pipeline-parameter>
22
+ mapping: |
23
+ mmdet3d/.* lint_only false
24
+ requirements/.* lint_only false
25
+ tests/.* lint_only false
26
+ tools/.* lint_only false
27
+ configs/.* lint_only false
28
+ .circleci/.* lint_only false
29
+ projects/.* lint_only false
30
+ base-revision: dev-1.x
31
+ # this is the path of the configuration we should trigger once
32
+ # path filtering and pipeline parameter value updates are
33
+ # complete. In this case, we are using the parent dynamic
34
+ # configuration itself.
35
+ config-path: .circleci/test.yml
.circleci/docker/Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG PYTORCH="1.8.1"
2
+ ARG CUDA="10.2"
3
+ ARG CUDNN="7"
4
+
5
+ ARG DEBIAN_FRONTEND=noninteractive
6
+
7
+ FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
8
+
9
+ # To fix GPG key error when running apt-get update
10
+ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
11
+ RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
12
+
13
+ RUN apt-get update && apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx
.circleci/test.yml ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 2.1
2
+
3
+ # the default pipeline parameters, which will be updated according to
4
+ # the results of the path-filtering orb
5
+ parameters:
6
+ lint_only:
7
+ type: boolean
8
+ default: true
9
+
10
+ jobs:
11
+ lint:
12
+ docker:
13
+ - image: cimg/python:3.7.4
14
+ steps:
15
+ - checkout
16
+ - run:
17
+ name: Install pre-commit hook
18
+ command: |
19
+ pip install pre-commit
20
+ pre-commit install
21
+ - run:
22
+ name: Linting
23
+ command: pre-commit run --all-files
24
+ - run:
25
+ name: Check docstring coverage
26
+ command: |
27
+ pip install interrogate
28
+ interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-magic --ignore-regex "__repr__" --fail-under 90 mmdet3d
29
+
30
+ build_cpu:
31
+ parameters:
32
+ # The python version must match available image tags in
33
+ # https://circleci.com/developer/images/image/cimg/python
34
+ python:
35
+ type: string
36
+ torch:
37
+ type: string
38
+ torchvision:
39
+ type: string
40
+ docker:
41
+ - image: cimg/python:<< parameters.python >>
42
+ resource_class: large
43
+ steps:
44
+ - checkout
45
+ - run:
46
+ name: Install Libraries
47
+ command: |
48
+ sudo apt-get update
49
+ sudo apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx libjpeg-dev zlib1g-dev libtinfo-dev libncurses5
50
+ - run:
51
+ name: Configure Python & pip
52
+ command: |
53
+ pip install --upgrade pip
54
+ pip install wheel
55
+ - run:
56
+ name: Install PyTorch
57
+ command: pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html
58
+ - when:
59
+ condition:
60
+ equal: ["3.9.0", << parameters.python >>]
61
+ steps:
62
+ - run: pip install "protobuf <= 3.20.1" && sudo apt-get update && sudo apt-get -y install libprotobuf-dev protobuf-compiler cmake
63
+ - run:
64
+ name: Install mmdet3d dependencies
65
+ command: |
66
+ pip install git+ssh://git@github.com/open-mmlab/mmengine.git@main
67
+ pip install -U openmim
68
+ mim install 'mmcv >= 2.0.0rc4'
69
+ pip install git+ssh://git@github.com/open-mmlab/mmdetection.git@dev-3.x
70
+ pip install -r requirements/tests.txt
71
+ - run:
72
+ name: Build and install
73
+ command: |
74
+ pip install -e .
75
+ - run:
76
+ name: Run unittests
77
+ command: |
78
+ coverage run --branch --source mmdet3d -m pytest tests/
79
+ coverage xml
80
+ coverage report -m
81
+
82
+ build_cuda:
83
+ parameters:
84
+ torch:
85
+ type: string
86
+ cuda:
87
+ type: enum
88
+ enum: ["10.2", "11.7"]
89
+ cudnn:
90
+ type: integer
91
+ default: 8
92
+ machine:
93
+ image: linux-cuda-11:default
94
+ # docker_layer_caching: true
95
+ resource_class: gpu.nvidia.small.multi
96
+ steps:
97
+ - checkout
98
+ - run:
99
+ name: Install nvidia-container-toolkit and Restart Docker
100
+ command: |
101
+ sudo apt-get update
102
+ sudo apt-get install -y nvidia-container-toolkit
103
+ sudo systemctl restart docker
104
+ - run:
105
+ # Cloning repos in VM since Docker doesn't have access to the private key
106
+ name: Clone Repos
107
+ command: |
108
+ git clone -b main --depth 1 ssh://git@github.com/open-mmlab/mmengine.git /home/circleci/mmengine
109
+ git clone -b dev-3.x --depth 1 ssh://git@github.com/open-mmlab/mmdetection.git /home/circleci/mmdetection
110
+ - run:
111
+ name: Build Docker image
112
+ command: |
113
+ docker build .circleci/docker -t mmdet3d:gpu --build-arg PYTORCH=<< parameters.torch >> --build-arg CUDA=<< parameters.cuda >> --build-arg CUDNN=<< parameters.cudnn >>
114
+ docker run --gpus all -t -d -v /home/circleci/project:/mmdetection3d -v /home/circleci/mmengine:/mmengine -v /home/circleci/mmdetection:/mmdetection -w /mmdetection3d --name mmdet3d mmdet3d:gpu
115
+ docker exec mmdet3d apt-get install -y git
116
+ - run:
117
+ name: Install mmdet3d dependencies
118
+ command: |
119
+ docker exec mmdet3d pip install -e /mmengine
120
+ docker exec mmdet3d pip install -U openmim
121
+ docker exec mmdet3d mim install 'mmcv >= 2.0.0rc4'
122
+ docker exec mmdet3d pip install -e /mmdetection
123
+ docker exec mmdet3d pip install -r requirements/tests.txt
124
+ - run:
125
+ name: Build and install
126
+ command: |
127
+ docker exec mmdet3d pip install -e .
128
+ - run:
129
+ name: Run unittests
130
+ command: |
131
+ docker exec mmdet3d pytest tests/
132
+
133
+ workflows:
134
+ pr_stage_lint:
135
+ when: << pipeline.parameters.lint_only >>
136
+ jobs:
137
+ - lint:
138
+ name: lint
139
+ filters:
140
+ branches:
141
+ ignore:
142
+ - dev-1.x
143
+ pr_stage_test:
144
+ when:
145
+ not: << pipeline.parameters.lint_only >>
146
+ jobs:
147
+ - lint:
148
+ name: lint
149
+ filters:
150
+ branches:
151
+ ignore:
152
+ - dev-1.x
153
+ - build_cpu:
154
+ name: minimum_version_cpu
155
+ torch: 1.8.1
156
+ torchvision: 0.9.1
157
+ python: 3.7.4 # The lowest python 3.7.x version available on CircleCI images
158
+ requires:
159
+ - lint
160
+ - build_cpu:
161
+ name: maximum_version_cpu
162
+ torch: 2.0.0
163
+ torchvision: 0.15.1
164
+ python: 3.9.0
165
+ requires:
166
+ - minimum_version_cpu
167
+ - hold:
168
+ type: approval
169
+ requires:
170
+ - maximum_version_cpu
171
+ - build_cuda:
172
+ name: mainstream_version_gpu
173
+ torch: 1.8.1
174
+ # Use double quotation mark to explicitly specify its type
175
+ # as string instead of number
176
+ cuda: "10.2"
177
+ cudnn: 7
178
+ requires:
179
+ - hold
180
+ - build_cuda:
181
+ name: maximum_version_gpu
182
+ torch: 2.0.0
183
+ cuda: "11.7"
184
+ cudnn: 8
185
+ requires:
186
+ - hold
187
+ merge_stage_test:
188
+ when:
189
+ not: << pipeline.parameters.lint_only >>
190
+ jobs:
191
+ - build_cuda:
192
+ name: minimum_version_gpu
193
+ torch: 1.8.1
194
+ cuda: "10.2"
195
+ cudnn: 7
196
+ filters:
197
+ branches:
198
+ only:
199
+ - dev-1.x
.dev_scripts/benchmark_full_models.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ configs/3dssd/3dssd_4xb4_kitti-3d-car.py
2
+ configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn_8xb4-cyclic-20e_nus-3d.py
3
+ configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
4
+ configs/fcaf3d/fcaf3d_2xb8_s3dis-3d-5class.py
5
+ configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py
6
+ configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py
7
+ configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py
8
+ configs/h3dnet/h3dnet_8xb3_scannet-seg.py
9
+ configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py
10
+ configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py
11
+ configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py
12
+ configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py
13
+ configs/paconv/paconv_ssg_8xb8-cosine-150e_s3dis-seg.py
14
+ configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py
15
+ configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py
16
+ configs/point_rcnn/point-rcnn_8xb2_kitti-3d-3class.py
17
+ configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py
18
+ configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py
19
+ configs/pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py
20
+ configs/pv_rcnn/pv_rcnn_8xb2-80e_kitti-3d-3class.py
21
+ configs/regnet/pointpillars_hv_regnet-1.6gf_fpn_sbn-all_8xb4-2x_nus-3d.py
22
+ configs/second/second_hv_secfpn_8xb6-80e_kitti-3d-3class.py
23
+ configs/second/second_hv_secfpn_8xb6-amp-80e_kitti-3d-3class.py
24
+ configs/smoke/smoke_dla34_dlaneck_gn-all_4xb8-6x_kitti-mono3d.py
25
+ configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_nus-3d.py
26
+ configs/votenet/votenet_8xb8_scannet-3d.py
.dev_scripts/benchmark_options.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ third_part_libs = [
4
+ 'conda install openblas-devel -c anaconda',
5
+ "pip install -U git+https://github.com/NVIDIA/MinkowskiEngine -v --no-deps --install-option='--blas_include_dirs=/opt/conda/include' --install-option='--blas=openblas'" # noqa
6
+ ]
7
+ default_floating_range = 0.5
8
+ model_floating_ranges = {
9
+ 'configs/pointpillars/pointpillars_hv_secfpn_sbn-all_16xb2-2x_waymoD5-3d-3class.py': # noqa
10
+ 0.7
11
+ }
.dev_scripts/benchmark_train_models.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ configs/3dssd/3dssd_4xb4_kitti-3d-car.py
2
+ configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn_8xb4-cyclic-20e_nus-3d.py
3
+ configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
4
+ configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py
5
+ configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py
6
+ configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py
7
+ configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py
8
+ configs/pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py
9
+ configs/pv_rcnn/pv_rcnn_8xb2-80e_kitti-3d-3class.py
10
+ configs/second/second_hv_secfpn_8xb6-80e_kitti-3d-3class.py
11
+ configs/second/second_hv_secfpn_8xb6-amp-80e_kitti-3d-3class.py
12
+ configs/smoke/smoke_dla34_dlaneck_gn-all_4xb8-6x_kitti-mono3d.py
13
+ configs/votenet/votenet_8xb8_scannet-3d.py
.dev_scripts/covignore.cfg ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Each line should be the relative path to the root directory
2
+ # of this repo. Support regular expression as well.
3
+ # For example:
4
+ # .*/utils.py
5
+
6
+ .*/__init__.py
.dev_scripts/diff_coverage_test.sh ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ readarray -t IGNORED_FILES < $( dirname "$0" )/covignore.cfg
4
+
5
+
6
+ REUSE_COVERAGE_REPORT=${REUSE_COVERAGE_REPORT:-0}
7
+ REPO=${1:-"origin"}
8
+ BRANCH=${2:-"refactor_dev"}
9
+
10
+ git fetch $REPO $BRANCH
11
+
12
+ PY_FILES=""
13
+ for FILE_NAME in $(git diff --name-only ${REPO}/${BRANCH}); do
14
+ # Only test python files in mmdet3d/ existing in current branch, and not ignored in covignore.cfg
15
+ if [ ${FILE_NAME: -3} == ".py" ] && [ ${FILE_NAME:0:8} == "mmdet3d/" ] && [ -f "$FILE_NAME" ]; then
16
+ IGNORED=false
17
+ for IGNORED_FILE_NAME in "${IGNORED_FILES[@]}"; do
18
+ # Skip blank lines
19
+ if [ -z "$IGNORED_FILE_NAME" ]; then
20
+ continue
21
+ fi
22
+ if [ "${IGNORED_FILE_NAME::1}" != "#" ] && [[ "$FILE_NAME" =~ $IGNORED_FILE_NAME ]]; then
23
+ echo "Ignoring $FILE_NAME"
24
+ IGNORED=true
25
+ break
26
+ fi
27
+ done
28
+ if [ "$IGNORED" = false ]; then
29
+ PY_FILES="$PY_FILES $FILE_NAME"
30
+ fi
31
+ fi
32
+ done
33
+
34
+ # Only test the coverage when PY_FILES are not empty, otherwise they will test the entire project
35
+ if [ ! -z "${PY_FILES}" ]
36
+ then
37
+ if [ "$REUSE_COVERAGE_REPORT" == "0" ]; then
38
+ coverage run --branch --source mmdet3d -m pytest tests/
39
+ fi
40
+ coverage report --fail-under 80 -m $PY_FILES
41
+ interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-magic --ignore-regex "__repr__" --fail-under 95 $PY_FILES
42
+ fi
.dev_scripts/gather_models.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ """Script to gather benchmarked models and prepare them for upload.
3
+
4
+ Usage:
5
+ python gather_models.py ${root_path} ${out_dir}
6
+
7
+ Example:
8
+ python gather_models.py \
9
+ work_dirs/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d \
10
+ work_dirs/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d
11
+
12
+ Note that before running the above command, rename the directory with the
13
+ config name if you did not use the default directory name, create
14
+ a corresponding directory 'pgd' under the above path and put the used config
15
+ into it.
16
+ """
17
+
18
+ import argparse
19
+ import glob
20
+ import json
21
+ import shutil
22
+ import subprocess
23
+ from os import path as osp
24
+
25
+ import mmengine
26
+ import torch
27
+
28
+ # build schedule look-up table to automatically find the final model
29
+ SCHEDULES_LUT = {
30
+ '_1x_': 12,
31
+ '_2x_': 24,
32
+ '_20e_': 20,
33
+ '_3x_': 36,
34
+ '_4x_': 48,
35
+ '_24e_': 24,
36
+ '_6x_': 73,
37
+ '_50e_': 50,
38
+ '_80e_': 80,
39
+ '_100e_': 100,
40
+ '_150e_': 150,
41
+ '_200e_': 200,
42
+ '_250e_': 250,
43
+ '_400e_': 400
44
+ }
45
+
46
+ # TODO: add support for lyft dataset
47
+ RESULTS_LUT = {
48
+ 'coco': ['bbox_mAP', 'segm_mAP'],
49
+ 'nus': ['pts_bbox_NuScenes/NDS', 'NDS'],
50
+ 'kitti-3d-3class': ['KITTI/Overall_3D_moderate', 'Overall_3D_moderate'],
51
+ 'kitti-3d-car': ['KITTI/Car_3D_moderate_strict', 'Car_3D_moderate_strict'],
52
+ 'lyft': ['score'],
53
+ 'scannet_seg': ['miou'],
54
+ 's3dis_seg': ['miou'],
55
+ 'scannet': ['mAP_0.50'],
56
+ 'sunrgbd': ['mAP_0.50'],
57
+ 'kitti-mono3d': [
58
+ 'img_bbox/KITTI/Car_3D_AP40_moderate_strict',
59
+ 'Car_3D_AP40_moderate_strict'
60
+ ],
61
+ 'nus-mono3d': ['img_bbox_NuScenes/NDS', 'NDS']
62
+ }
63
+
64
+
65
+ def get_model_dataset(log_json_path):
66
+ for key in RESULTS_LUT:
67
+ if log_json_path.find(key) != -1:
68
+ return key
69
+
70
+
71
+ def process_checkpoint(in_file, out_file):
72
+ checkpoint = torch.load(in_file, map_location='cpu')
73
+ # remove optimizer for smaller file size
74
+ if 'optimizer' in checkpoint:
75
+ del checkpoint['optimizer']
76
+ # if it is necessary to remove some sensitive data in checkpoint['meta'],
77
+ # add the code here.
78
+ torch.save(checkpoint, out_file)
79
+ sha = subprocess.check_output(['sha256sum', out_file]).decode()
80
+ final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
81
+ subprocess.Popen(['mv', out_file, final_file])
82
+ return final_file
83
+
84
+
85
+ def get_final_epoch(config):
86
+ if config.find('grid_rcnn') != -1 and config.find('2x') != -1:
87
+ # grid_rcnn 2x trains 25 epochs
88
+ return 25
89
+
90
+ for schedule_name, epoch_num in SCHEDULES_LUT.items():
91
+ if config.find(schedule_name) != -1:
92
+ return epoch_num
93
+
94
+
95
+ def get_best_results(log_json_path):
96
+ dataset = get_model_dataset(log_json_path)
97
+ max_dict = dict()
98
+ max_memory = 0
99
+ with open(log_json_path, 'r') as f:
100
+ for line in f.readlines():
101
+ log_line = json.loads(line)
102
+ if 'mode' not in log_line.keys():
103
+ continue
104
+
105
+ # record memory and find best results & epochs
106
+ if log_line['mode'] == 'train' \
107
+ and max_memory <= log_line['memory']:
108
+ max_memory = log_line['memory']
109
+
110
+ elif log_line['mode'] == 'val':
111
+ result_dict = {
112
+ key: log_line[key]
113
+ for key in RESULTS_LUT[dataset] if key in log_line
114
+ }
115
+ if len(max_dict) == 0:
116
+ max_dict = result_dict
117
+ max_dict['epoch'] = log_line['epoch']
118
+ elif all(
119
+ [max_dict[key] <= result_dict[key]
120
+ for key in result_dict]):
121
+ max_dict.update(result_dict)
122
+ max_dict['epoch'] = log_line['epoch']
123
+
124
+ max_dict['memory'] = max_memory
125
+ return max_dict
126
+
127
+
128
+ def parse_args():
129
+ parser = argparse.ArgumentParser(description='Gather benchmarked models')
130
+ parser.add_argument(
131
+ 'root',
132
+ type=str,
133
+ help='root path of benchmarked models to be gathered')
134
+ parser.add_argument(
135
+ 'out', type=str, help='output path of gathered models to be stored')
136
+
137
+ args = parser.parse_args()
138
+ return args
139
+
140
+
141
+ def main():
142
+ args = parse_args()
143
+ models_root = args.root
144
+ models_out = args.out
145
+ mmengine.mkdir_or_exist(models_out)
146
+
147
+ # find all models in the root directory to be gathered
148
+ raw_configs = list(mmengine.scandir('./configs', '.py', recursive=True))
149
+
150
+ # filter configs that is not trained in the experiments dir
151
+ used_configs = []
152
+ for raw_config in raw_configs:
153
+ if osp.exists(osp.join(models_root, raw_config)):
154
+ used_configs.append(raw_config)
155
+ print(f'Find {len(used_configs)} models to be gathered')
156
+
157
+ # find final_ckpt and log file for trained each config
158
+ # and parse the best performance
159
+ model_infos = []
160
+ for used_config in used_configs:
161
+ # get logs
162
+ log_json_path = glob.glob(osp.join(models_root, '*.log.json'))[0]
163
+ log_txt_path = glob.glob(osp.join(models_root, '*.log'))[0]
164
+ model_performance = get_best_results(log_json_path)
165
+ final_epoch = model_performance['epoch']
166
+ final_model = 'epoch_{}.pth'.format(final_epoch)
167
+ model_path = osp.join(models_root, final_model)
168
+
169
+ # skip if the model is still training
170
+ if not osp.exists(model_path):
171
+ print(f'Expected {model_path} does not exist!')
172
+ continue
173
+
174
+ if model_performance is None:
175
+ print(f'Obtained no performance for model {used_config}')
176
+ continue
177
+
178
+ model_time = osp.split(log_txt_path)[-1].split('.')[0]
179
+ model_infos.append(
180
+ dict(
181
+ config=used_config,
182
+ results=model_performance,
183
+ epochs=final_epoch,
184
+ model_time=model_time,
185
+ log_json_path=osp.split(log_json_path)[-1]))
186
+
187
+ # publish model for each checkpoint
188
+ publish_model_infos = []
189
+ for model in model_infos:
190
+ model_publish_dir = osp.join(models_out, model['config'].rstrip('.py'))
191
+ mmengine.mkdir_or_exist(model_publish_dir)
192
+
193
+ model_name = model['config'].split('/')[-1].rstrip(
194
+ '.py') + '_' + model['model_time']
195
+ publish_model_path = osp.join(model_publish_dir, model_name)
196
+ trained_model_path = osp.join(models_root,
197
+ 'epoch_{}.pth'.format(model['epochs']))
198
+
199
+ # convert model
200
+ final_model_path = process_checkpoint(trained_model_path,
201
+ publish_model_path)
202
+
203
+ # copy log
204
+ shutil.copy(
205
+ osp.join(models_root, model['log_json_path']),
206
+ osp.join(model_publish_dir, f'{model_name}.log.json'))
207
+ shutil.copy(
208
+ osp.join(models_root, model['log_json_path'].rstrip('.json')),
209
+ osp.join(model_publish_dir, f'{model_name}.log'))
210
+
211
+ # copy config to guarantee reproducibility
212
+ config_path = model['config']
213
+ config_path = osp.join(
214
+ 'configs',
215
+ config_path) if 'configs' not in config_path else config_path
216
+ target_cconfig_path = osp.split(config_path)[-1]
217
+ shutil.copy(config_path,
218
+ osp.join(model_publish_dir, target_cconfig_path))
219
+
220
+ model['model_path'] = final_model_path
221
+ publish_model_infos.append(model)
222
+
223
+ models = dict(models=publish_model_infos)
224
+ print(f'Totally gathered {len(publish_model_infos)} models')
225
+ mmengine.dump(models, osp.join(models_out, 'model_info.json'))
226
+
227
+
228
+ if __name__ == '__main__':
229
+ main()
.dev_scripts/gen_benchmark_script.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import re
3
+ from os import path as osp
4
+
5
+
6
+ def parse_args():
7
+ parser = argparse.ArgumentParser(
8
+ description='Generate benchmark training/testing scripts')
9
+ parser.add_argument(
10
+ '--input_file',
11
+ required=False,
12
+ type=str,
13
+ help='Input file containing the paths '
14
+ 'of configs to be trained/tested.')
15
+ parser.add_argument(
16
+ '--output_file',
17
+ required=True,
18
+ type=str,
19
+ help='Output file containing the '
20
+ 'commands to train/test selected models.')
21
+ parser.add_argument(
22
+ '--gpus_per_node',
23
+ type=int,
24
+ default=8,
25
+ help='GPUs per node config for slurm, '
26
+ 'should be set according to your slurm environment')
27
+ parser.add_argument(
28
+ '--cpus_per_task',
29
+ type=int,
30
+ default=5,
31
+ help='CPUs per task config for slurm, '
32
+ 'should be set according to your slurm environment')
33
+ parser.add_argument(
34
+ '--gpus',
35
+ type=int,
36
+ default=8,
37
+ help='Totally used num of GPUs config for slurm (in testing), '
38
+ 'should be set according to your slurm environment')
39
+ parser.add_argument(
40
+ '--mode', type=str, default='train', help='Train or test')
41
+ parser.add_argument(
42
+ '--long_work_dir',
43
+ action='store_true',
44
+ help='Whether use full relative path of config as work dir')
45
+ parser.add_argument(
46
+ '--max_keep_ckpts',
47
+ type=int,
48
+ default=1,
49
+ help='The max number of checkpoints saved in training')
50
+ parser.add_argument(
51
+ '--full_log',
52
+ action='store_true',
53
+ help='Whether save full log in a file')
54
+
55
+ args = parser.parse_args()
56
+ return args
57
+
58
+
59
+ args = parse_args()
60
+ assert args.mode in ['train', 'test'], 'Currently we only support ' \
61
+ 'automatically generating training or testing scripts.'
62
+
63
+ config_paths = []
64
+
65
+ if args.input_file is not None:
66
+ with open(args.input_file, 'r') as fi:
67
+ config_paths = fi.read().strip().split('\n')
68
+ else:
69
+ while True:
70
+ print('Please type a config path and '
71
+ 'press enter (press enter directly to exit):')
72
+ config_path = input()
73
+ if config_path != '':
74
+ config_paths.append(config_path)
75
+ else:
76
+ break
77
+
78
+ script = '''PARTITION=$1
79
+ CHECKPOINT_DIR=$2
80
+
81
+ '''
82
+
83
+ if args.mode == 'train':
84
+ for i, config_path in enumerate(config_paths):
85
+ root_dir = osp.dirname(osp.dirname(osp.abspath(__file__)))
86
+ if not osp.exists(osp.join(root_dir, config_path)):
87
+ print(f'Invalid config path (does not exist):\n{config_path}')
88
+ continue
89
+
90
+ config_name = config_path.split('/')[-1][:-3]
91
+ match_obj = re.match(r'^.*_[0-9]+x([0-9]+)_.*$', config_name)
92
+ if match_obj is None:
93
+ print(f'Invalid config path (no GPU num in '
94
+ f'config name):\n{config_path}')
95
+ continue
96
+
97
+ gpu_num = int(match_obj.group(1))
98
+ work_dir_name = config_path if args.long_work_dir else config_name
99
+
100
+ script += f"echo '{config_path}' &\n"
101
+ if args.full_log:
102
+ script += f'mkdir -p $CHECKPOINT_DIR/{work_dir_name}\n'
103
+
104
+ # training commands
105
+ script += f'GPUS={gpu_num} GPUS_PER_NODE={args.gpus_per_node} ' \
106
+ f'CPUS_PER_TASK={args.cpus_per_task} ' \
107
+ f'./tools/slurm_train.sh $PARTITION {config_name} ' \
108
+ f'{config_path} \\\n'
109
+ script += f'$CHECKPOINT_DIR/{work_dir_name} --cfg-options ' \
110
+ f'checkpoint_config.max_keep_ckpts=' \
111
+ f'{args.max_keep_ckpts} \\\n' \
112
+
113
+ # if output full log, redirect stdout and stderr to
114
+ # another log file in work dir
115
+ if args.full_log:
116
+ script += f'2>&1|tee $CHECKPOINT_DIR/{work_dir_name}' \
117
+ f'/FULL_LOG.txt &\n'
118
+ else:
119
+ script += '>/dev/null &\n'
120
+
121
+ if i != len(config_paths) - 1:
122
+ script += '\n'
123
+
124
+ print(f'Successfully generated script for {config_name}')
125
+
126
+ with open(args.output_file, 'w') as fo:
127
+ fo.write(script)
128
+
129
+ elif args.mode == 'test':
130
+ for i, config_path in enumerate(config_paths):
131
+ root_dir = osp.dirname(osp.dirname(osp.abspath(__file__)))
132
+ if not osp.exists(osp.join(root_dir, config_path)):
133
+ print(f'Invalid config path (does not exist):\n{config_path}')
134
+ continue
135
+
136
+ config_name = config_path.split('/')[-1][:-3]
137
+
138
+ tasks = {
139
+ 'scannet_seg', 'scannet', 's3dis_seg', 'sunrgbd', 'kitti', 'nus',
140
+ 'lyft', 'waymo'
141
+ }
142
+ eval_option = None
143
+ for task in tasks:
144
+ if task in config_name:
145
+ eval_option = task
146
+ break
147
+ if eval_option is None:
148
+ print(f'Invalid config path (invalid task):\n{config_path}')
149
+ continue
150
+
151
+ work_dir_name = config_path if args.long_work_dir else config_name
152
+
153
+ script += f"echo '{config_path}' &\n"
154
+ if args.full_log:
155
+ script += f'mkdir -p $CHECKPOINT_DIR/{work_dir_name}\n'
156
+
157
+ # training commands
158
+ script += f'GPUS={args.gpus} GPUS_PER_NODE={args.gpus_per_node} ' \
159
+ f'CPUS_PER_TASK={args.cpus_per_task} ' \
160
+ f'./tools/slurm_test.sh $PARTITION {config_name} ' \
161
+ f'{config_path} \\\n'
162
+ script += f'$CHECKPOINT_DIR/{work_dir_name}/latest.pth ' \
163
+
164
+ if eval_option in ['scannet_seg', 's3dis_seg']:
165
+ script += '--eval mIoU \\\n'
166
+ elif eval_option in ['scannet', 'sunrgbd', 'kitti', 'nus']:
167
+ script += '--eval map \\\n'
168
+ elif eval_option in ['lyft']:
169
+ script += f'--format-only --eval-options jsonfile_prefix=' \
170
+ f'$CHECKPOINT_DIR/{work_dir_name}/results_challenge ' \
171
+ f'csv_savepath=$CHECKPOINT_DIR/{work_dir_name}/' \
172
+ f'results_challenge.csv \\\n'
173
+ elif eval_option in ['waymo']:
174
+ script += f'--eval waymo --eval-options pklfile_prefix=' \
175
+ f'$CHECKPOINT_DIR/{work_dir_name}/kitti_results ' \
176
+ f'submission_prefix=$CHECKPOINT_DIR/{work_dir_name}/' \
177
+ f'kitti_results \\\n'
178
+
179
+ # if output full log, redirect stdout and stderr to
180
+ # another log file in work dir
181
+ if args.full_log:
182
+ script += f'2>&1|tee $CHECKPOINT_DIR/{work_dir_name}' \
183
+ f'/FULL_LOG.txt &\n'
184
+ else:
185
+ script += '>/dev/null &\n'
186
+
187
+ if i != len(config_paths) - 1:
188
+ script += '\n'
189
+
190
+ print(f'Successfully generated script for {config_name}')
191
+
192
+ with open(args.output_file, 'w') as fo:
193
+ fo.write(script)
.dev_scripts/linter.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ yapf -r -i mmdet3d/ configs/ tests/ tools/
2
+ isort mmdet3d/ configs/ tests/ tools/
3
+ flake8 .
.dev_scripts/test_benchmark.sh ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PARTITION=$1
2
+ CHECKPOINT_DIR=$2
3
+
4
+ echo 'configs/3dssd/3dssd_4xb4_kitti-3d-car.py' &
5
+ mkdir -p $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py
6
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION 3dssd_4x4_kitti-3d-car configs/3dssd/3dssd_4xb4_kitti-3d-car.py \
7
+ $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py/latest.pth --eval map \
8
+ 2>&1|tee $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py/FULL_LOG.txt &
9
+
10
+ echo 'configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py' &
11
+ mkdir -p $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py
12
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION centerpoint_02pillar_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py \
13
+ $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py/latest.pth --eval map \
14
+ 2>&1|tee $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py/FULL_LOG.txt &
15
+
16
+ echo 'configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py' &
17
+ mkdir -p $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
18
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py \
19
+ $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py/latest.pth --eval map \
20
+ 2>&1|tee $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py/FULL_LOG.txt &
21
+
22
+ echo 'configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py' &
23
+ mkdir -p $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py
24
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py \
25
+ $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py/latest.pth --eval map \
26
+ 2>&1|tee $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py/FULL_LOG.txt &
27
+
28
+ echo 'configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py' &
29
+ mkdir -p $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py
30
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py \
31
+ $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py/latest.pth --eval map \
32
+ 2>&1|tee $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py/FULL_LOG.txt &
33
+
34
+ echo 'configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py' &
35
+ mkdir -p $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py
36
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py \
37
+ $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py/latest.pth --eval map \
38
+ 2>&1|tee $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py/FULL_LOG.txt &
39
+
40
+ echo 'configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py' &
41
+ mkdir -p $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py
42
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION groupfree3d_8x4_scannet-3d-18class-L6-O256 configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py \
43
+ $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py/latest.pth --eval map \
44
+ 2>&1|tee $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py/FULL_LOG.txt &
45
+
46
+ echo 'configs/h3dnet/h3dnet_8xb3_scannet-seg.py' &
47
+ mkdir -p $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py
48
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION h3dnet_3x8_scannet-3d-18class configs/h3dnet/h3dnet_8xb3_scannet-seg.py \
49
+ $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py/latest.pth --eval map \
50
+ 2>&1|tee $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py/FULL_LOG.txt &
51
+
52
+ echo 'configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py' &
53
+ mkdir -p $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py
54
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py \
55
+ $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py/latest.pth --eval map \
56
+ 2>&1|tee $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py/FULL_LOG.txt &
57
+
58
+ echo 'configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py' &
59
+ mkdir -p $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py
60
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION imvotenet_stage2_16x8_sunrgbd-3d-10class configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py \
61
+ $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py/latest.pth --eval map \
62
+ 2>&1|tee $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py/FULL_LOG.txt &
63
+
64
+ echo 'configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py' &
65
+ mkdir -p $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py
66
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION imvoxelnet_4x8_kitti-3d-car configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py \
67
+ $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py/latest.pth --eval map \
68
+ 2>&1|tee $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py/FULL_LOG.txt &
69
+
70
+ echo 'configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py' &
71
+ mkdir -p $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py
72
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py \
73
+ $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py/latest.pth --eval map \
74
+ 2>&1|tee $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py/FULL_LOG.txt &
75
+
76
+ echo 'configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py' &
77
+ mkdir -p $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py
78
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py \
79
+ $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py/latest.pth --eval map \
80
+ 2>&1|tee $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py/FULL_LOG.txt &
81
+
82
+ echo 'configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py' &
83
+ mkdir -p $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py
84
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION pointnet2_msg_16x2_cosine_80e_s3dis_seg-3d-13class configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py \
85
+ $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py/latest.pth --eval mIoU \
86
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py/FULL_LOG.txt &
87
+
88
+ echo 'configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py' &
89
+ mkdir -p $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py
90
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION pointnet2_msg_16x2_cosine_250e_scannet_seg-3d-20class configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py \
91
+ $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py/latest.pth --eval map \
92
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py/FULL_LOG.txt &
93
+
94
+ echo 'configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py' &
95
+ mkdir -p $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py
96
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_pointpillars_fpn_sbn-all_2x8_2x_lyft-3d configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py \
97
+ $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/latest.pth --format-only --eval-options jsonfile_prefix=$CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/results_challenge csv_savepath=$CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/results_challenge.csv \
98
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/FULL_LOG.txt &
99
+
100
+ echo 'configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py' &
101
+ mkdir -p $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py
102
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py \
103
+ $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/latest.pth --eval waymo --eval-options pklfile_prefix=$CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/kitti_results submission_prefix=$CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/kitti_results \
104
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/FULL_LOG.txt &
105
+
106
+ echo 'configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py' &
107
+ mkdir -p $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py
108
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py \
109
+ $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py/latest.pth --eval map \
110
+ 2>&1|tee $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py/FULL_LOG.txt &
111
+
112
+ echo 'configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py' &
113
+ mkdir -p $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py
114
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_second_secfpn_6x8_80e_kitti-3d-3class configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py \
115
+ $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py/latest.pth --eval map \
116
+ 2>&1|tee $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py/FULL_LOG.txt &
117
+
118
+ echo 'configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py' &
119
+ mkdir -p $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py
120
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py \
121
+ $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/latest.pth --format-only --eval-options jsonfile_prefix=$CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/results_challenge csv_savepath=$CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/results_challenge.csv \
122
+ 2>&1|tee $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/FULL_LOG.txt &
123
+
124
+ echo 'configs/votenet/votenet_8xb8_scannet-3d.py' &
125
+ mkdir -p $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py
126
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION votenet_8x8_scannet-3d-18class configs/votenet/votenet_8xb8_scannet-3d.py \
127
+ $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py/latest.pth --eval map \
128
+ 2>&1|tee $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py/FULL_LOG.txt &
.dev_scripts/train_benchmark.sh ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PARTITION=$1
2
+ CHECKPOINT_DIR=$2
3
+
4
+ echo 'configs/3dssd/3dssd_4xb4_kitti-3d-car.py' &
5
+ mkdir -p $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py
6
+ GPUS=4 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION 3dssd_4x4_kitti-3d-car configs/3dssd/3dssd_4xb4_kitti-3d-car.py \
7
+ $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
8
+ 2>&1|tee $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py/FULL_LOG.txt &
9
+
10
+ echo 'configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py' &
11
+ mkdir -p $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py
12
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION centerpoint_02pillar_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py \
13
+ $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
14
+ 2>&1|tee $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py/FULL_LOG.txt &
15
+
16
+ echo 'configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py' &
17
+ mkdir -p $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
18
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py \
19
+ $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
20
+ 2>&1|tee $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py/FULL_LOG.txt &
21
+
22
+ echo 'configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py' &
23
+ mkdir -p $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py
24
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py \
25
+ $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
26
+ 2>&1|tee $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py/FULL_LOG.txt &
27
+
28
+ echo 'configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py' &
29
+ mkdir -p $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py
30
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py \
31
+ $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
32
+ 2>&1|tee $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py/FULL_LOG.txt &
33
+
34
+ echo 'configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py' &
35
+ mkdir -p $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py
36
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py \
37
+ $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
38
+ 2>&1|tee $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py/FULL_LOG.txt &
39
+
40
+ echo 'configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py' &
41
+ mkdir -p $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py
42
+ GPUS=4 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION groupfree3d_8x4_scannet-3d-18class-L6-O256 configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py \
43
+ $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
44
+ 2>&1|tee $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py/FULL_LOG.txt &
45
+
46
+ echo 'configs/h3dnet/h3dnet_8xb3_scannet-seg.py' &
47
+ mkdir -p $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py
48
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION h3dnet_3x8_scannet-3d-18class configs/h3dnet/h3dnet_8xb3_scannet-seg.py \
49
+ $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
50
+ 2>&1|tee $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py/FULL_LOG.txt &
51
+
52
+ echo 'configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py' &
53
+ mkdir -p $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py
54
+ GPUS=4 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py \
55
+ $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
56
+ 2>&1|tee $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py/FULL_LOG.txt &
57
+
58
+ echo 'configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py' &
59
+ mkdir -p $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py
60
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION imvotenet_stage2_16x8_sunrgbd-3d-10class configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py \
61
+ $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
62
+ 2>&1|tee $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py/FULL_LOG.txt &
63
+
64
+ echo 'configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py' &
65
+ mkdir -p $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py
66
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION imvoxelnet_4x8_kitti-3d-car configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py \
67
+ $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
68
+ 2>&1|tee $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py/FULL_LOG.txt &
69
+
70
+ echo 'configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py' &
71
+ mkdir -p $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py
72
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py \
73
+ $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
74
+ 2>&1|tee $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py/FULL_LOG.txt &
75
+
76
+ echo 'configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py' &
77
+ mkdir -p $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py
78
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py \
79
+ $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
80
+ 2>&1|tee $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py/FULL_LOG.txt &
81
+
82
+ echo 'configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py' &
83
+ mkdir -p $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py
84
+ GPUS=2 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION pointnet2_msg_16x2_cosine_80e_s3dis_seg-3d-13class configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py \
85
+ $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
86
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py/FULL_LOG.txt &
87
+
88
+ echo 'configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py' &
89
+ mkdir -p $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py
90
+ GPUS=2 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION pointnet2_msg_16x2_cosine_250e_scannet_seg-3d-20class configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py \
91
+ $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
92
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py/FULL_LOG.txt &
93
+
94
+ echo 'configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py' &
95
+ mkdir -p $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py
96
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_pointpillars_fpn_sbn-all_2x8_2x_lyft-3d configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py \
97
+ $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
98
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/FULL_LOG.txt &
99
+
100
+ echo 'configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py' &
101
+ mkdir -p $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py
102
+ GPUS=16 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py \
103
+ $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
104
+ 2>&1|tee $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/FULL_LOG.txt &
105
+
106
+ echo 'configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py' &
107
+ mkdir -p $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py
108
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py \
109
+ $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
110
+ 2>&1|tee $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py/FULL_LOG.txt &
111
+
112
+ echo 'configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py' &
113
+ mkdir -p $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py
114
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_second_secfpn_6x8_80e_kitti-3d-3class configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py \
115
+ $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
116
+ 2>&1|tee $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py/FULL_LOG.txt &
117
+
118
+ echo 'configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py' &
119
+ mkdir -p $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py
120
+ GPUS=16 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py \
121
+ $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
122
+ 2>&1|tee $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/FULL_LOG.txt &
123
+
124
+ echo 'configs/votenet/votenet_8xb8_scannet-3d.py' &
125
+ mkdir -p $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py
126
+ GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION votenet_8x8_scannet-3d-18class configs/votenet/votenet_8xb8_scannet-3d.py \
127
+ $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
128
+ 2>&1|tee $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py/FULL_LOG.txt &
configs/.DS_Store ADDED
Binary file (12.3 kB). View file
 
configs/3dssd/3dssd_4xb4_kitti-3d-car.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../_base_/models/3dssd.py', '../_base_/datasets/kitti-3d-car.py',
3
+ '../_base_/default_runtime.py'
4
+ ]
5
+
6
+ # dataset settings
7
+ dataset_type = 'KittiDataset'
8
+ data_root = 'data/kitti/'
9
+ class_names = ['Car']
10
+ point_cloud_range = [0, -40, -5, 70, 40, 3]
11
+ input_modality = dict(use_lidar=True, use_camera=False)
12
+ backend_args = None
13
+
14
+ db_sampler = dict(
15
+ data_root=data_root,
16
+ info_path=data_root + 'kitti_dbinfos_train.pkl',
17
+ rate=1.0,
18
+ prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
19
+ classes=class_names,
20
+ sample_groups=dict(Car=15),
21
+ points_loader=dict(
22
+ type='LoadPointsFromFile',
23
+ coord_type='LIDAR',
24
+ load_dim=4,
25
+ use_dim=4,
26
+ backend_args=backend_args),
27
+ backend_args=backend_args)
28
+
29
+ train_pipeline = [
30
+ dict(
31
+ type='LoadPointsFromFile',
32
+ coord_type='LIDAR',
33
+ load_dim=4,
34
+ use_dim=4,
35
+ backend_args=backend_args),
36
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
37
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
38
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
39
+ dict(type='ObjectSample', db_sampler=db_sampler),
40
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
41
+ dict(
42
+ type='ObjectNoise',
43
+ num_try=100,
44
+ translation_std=[1.0, 1.0, 0],
45
+ global_rot_range=[0.0, 0.0],
46
+ rot_range=[-1.0471975511965976, 1.0471975511965976]),
47
+ dict(
48
+ type='GlobalRotScaleTrans',
49
+ rot_range=[-0.78539816, 0.78539816],
50
+ scale_ratio_range=[0.9, 1.1]),
51
+ # 3DSSD can get a higher performance without this transform
52
+ # dict(type='BackgroundPointsFilter', bbox_enlarge_range=(0.5, 2.0, 0.5)),
53
+ dict(type='PointSample', num_points=16384),
54
+ dict(
55
+ type='Pack3DDetInputs',
56
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
57
+ ]
58
+
59
+ test_pipeline = [
60
+ dict(
61
+ type='LoadPointsFromFile',
62
+ coord_type='LIDAR',
63
+ load_dim=4,
64
+ use_dim=4,
65
+ backend_args=backend_args),
66
+ dict(
67
+ type='MultiScaleFlipAug3D',
68
+ img_scale=(1333, 800),
69
+ pts_scale_ratio=1,
70
+ flip=False,
71
+ transforms=[
72
+ dict(
73
+ type='GlobalRotScaleTrans',
74
+ rot_range=[0, 0],
75
+ scale_ratio_range=[1., 1.],
76
+ translation_std=[0, 0, 0]),
77
+ dict(type='RandomFlip3D'),
78
+ dict(
79
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range),
80
+ dict(type='PointSample', num_points=16384),
81
+ ]),
82
+ dict(type='Pack3DDetInputs', keys=['points'])
83
+ ]
84
+
85
+ train_dataloader = dict(
86
+ batch_size=4, dataset=dict(dataset=dict(pipeline=train_pipeline, )))
87
+ test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
88
+ val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
89
+
90
+ # model settings
91
+ model = dict(
92
+ bbox_head=dict(
93
+ num_classes=1,
94
+ bbox_coder=dict(
95
+ type='AnchorFreeBBoxCoder', num_dir_bins=12, with_rot=True)))
96
+
97
+ # optimizer
98
+ lr = 0.002 # max learning rate
99
+ optim_wrapper = dict(
100
+ type='OptimWrapper',
101
+ optimizer=dict(type='AdamW', lr=lr, weight_decay=0.),
102
+ clip_grad=dict(max_norm=35, norm_type=2),
103
+ )
104
+
105
+ # training schedule for 1x
106
+ train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=2)
107
+ val_cfg = dict(type='ValLoop')
108
+ test_cfg = dict(type='TestLoop')
109
+
110
+ # learning rate
111
+ param_scheduler = [
112
+ dict(
113
+ type='MultiStepLR',
114
+ begin=0,
115
+ end=80,
116
+ by_epoch=True,
117
+ milestones=[45, 60],
118
+ gamma=0.1)
119
+ ]
configs/3dssd/README.md ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 3DSSD: Point-based 3D Single Stage Object Detector
2
+
3
+ > [3DSSD: Point-based 3D Single Stage Object Detector](https://arxiv.org/abs/2002.10187)
4
+
5
+ <!-- [ALGORITHM] -->
6
+
7
+ ## Abstract
8
+
9
+ Currently, there have been many kinds of voxel-based 3D single stage detectors, while point-based single stage methods are still underexplored. In this paper, we first present a lightweight and effective point-based 3D single stage object detector, named 3DSSD, achieving a good balance between accuracy and efficiency. In this paradigm, all upsampling layers and refinement stage, which are indispensable in all existing point-based methods, are abandoned to reduce the large computation cost. We novelly propose a fusion sampling strategy in downsampling process to make detection on less representative points feasible. A delicate box prediction network including a candidate generation layer, an anchor-free regression head with a 3D center-ness assignment strategy is designed to meet with our demand of accuracy and speed. Our paradigm is an elegant single stage anchor-free framework, showing great superiority to other existing methods. We evaluate 3DSSD on widely used KITTI dataset and more challenging nuScenes dataset. Our method outperforms all state-of-the-art voxel-based single stage methods by a large margin, and has comparable performance to two stage point-based methods as well, with inference speed more than 25 FPS, 2x faster than former state-of-the-art point-based methods.
10
+
11
+ <div align=center>
12
+ <img src="https://user-images.githubusercontent.com/30491025/143854187-54ed1257-a046-4764-81cd-d2c8404137d3.png" width="800"/>
13
+ </div>
14
+
15
+ ## Introduction
16
+
17
+ We implement 3DSSD and provide the results and checkpoints on KITTI datasets.
18
+
19
+ Some settings in our implementation are different from the [official implementation](https://github.com/Jia-Research-Lab/3DSSD), which bring marginal differences to the performance on KITTI datasets in our experiments. To simplify and unify the models of our implementation, we skip them in our models. These differences are listed as below:
20
+
21
+ 1. We keep the scenes without any object while the official code skips these scenes in training. In the official implementation, only 3229 and 3394 samples are used as training and validation sets, respectively. In our implementation, we keep using 3712 and 3769 samples as training and validation sets, respectively, as those used for all the other models in our implementation on KITTI datasets.
22
+ 2. We do not modify the decay of `batch normalization` during training.
23
+ 3. While using [`DataBaseSampler`](https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/datasets/pipelines/dbsampler.py#L80) for data augmentation, the official code uses road planes as reference to place the sampled objects while we do not.
24
+ 4. We perform detection using LIDAR coordinates while the official code uses camera coordinates.
25
+
26
+ ## Results and models
27
+
28
+ ### KITTI
29
+
30
+ | Backbone | Class | Lr schd | Mem (GB) | Inf time (fps) | mAP | Download |
31
+ | :--------------------------------------------: | :---: | :-----: | :------: | :------------: | :----------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
32
+ | [PointNet2SAMSG](./3dssd_4xb4_kitti-3d-car.py) | Car | 72e | 4.7 | | 78.58(81.27)<sup>1</sup> | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/3dssd/3dssd_4x4_kitti-3d-car/3dssd_4x4_kitti-3d-car_20210818_203828-b89c8fc4.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/3dssd/3dssd_4x4_kitti-3d-car/3dssd_4x4_kitti-3d-car_20210818_203828.log.json) |
33
+
34
+ \[1\]: We report two different 3D object detection performance here. 78.58mAP is evaluated by our evaluation code and 81.27mAP is evaluated by the official development kit (so as that used in the paper and official code of 3DSSD ). We found that the commonly used Python implementation of [`rotate_iou`](https://github.com/traveller59/second.pytorch/blob/e42e4a0e17262ab7d180ee96a0a36427f2c20a44/second/core/non_max_suppression/nms_gpu.py#L605) which is used in our KITTI dataset evaluation, is different from the official implementation in [KITTI benchmark](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d).
35
+
36
+ ## Citation
37
+
38
+ ```latex
39
+ @inproceedings{yang20203dssd,
40
+ author = {Zetong Yang and Yanan Sun and Shu Liu and Jiaya Jia},
41
+ title = {3DSSD: Point-based 3D Single Stage Object Detector},
42
+ booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
43
+ year = {2020}
44
+ }
45
+ ```
configs/3dssd/metafile.yml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Collections:
2
+ - Name: 3DSSD
3
+ Metadata:
4
+ Training Data: KITTI
5
+ Training Techniques:
6
+ - AdamW
7
+ Training Resources: 4x TITAN X
8
+ Architecture:
9
+ - PointNet++
10
+ Paper:
11
+ URL: https://arxiv.org/abs/2002.10187
12
+ Title: '3DSSD: Point-based 3D Single Stage Object Detector'
13
+ README: configs/3dssd/README.md
14
+ Code:
15
+ URL: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/models/detectors/ssd3dnet.py#L7
16
+ Version: v0.6.0
17
+
18
+ Models:
19
+ - Name: 3dssd_4x4_kitti-3d-car
20
+ In Collection: 3DSSD
21
+ Config: configs/3dssd/3dssd_4xb4_kitti-3d-car.py
22
+ Metadata:
23
+ Training Memory (GB): 4.7
24
+ Results:
25
+ - Task: 3D Object Detection
26
+ Dataset: KITTI
27
+ Metrics:
28
+ mAP: 78.58
29
+ Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/3dssd/3dssd_4x4_kitti-3d-car/3dssd_4x4_kitti-3d-car_20210818_203828-b89c8fc4.pth
configs/_base_/datasets/kitti-3d-3class.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'KittiDataset'
3
+ data_root = 'data/kitti/'
4
+ class_names = ['Pedestrian', 'Cyclist', 'Car']
5
+ point_cloud_range = [0, -40, -3, 70.4, 40, 1]
6
+ input_modality = dict(use_lidar=True, use_camera=False)
7
+ metainfo = dict(classes=class_names)
8
+
9
+ # Example to use different file client
10
+ # Method 1: simply set the data root and let the file I/O module
11
+ # automatically infer from prefix (not support LMDB and Memcache yet)
12
+
13
+ # data_root = 's3://openmmlab/datasets/detection3d/kitti/'
14
+
15
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
16
+ # backend_args = dict(
17
+ # backend='petrel',
18
+ # path_mapping=dict({
19
+ # './data/': 's3://openmmlab/datasets/detection3d/',
20
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
21
+ # }))
22
+ backend_args = None
23
+
24
+ db_sampler = dict(
25
+ data_root=data_root,
26
+ info_path=data_root + 'kitti_dbinfos_train.pkl',
27
+ rate=1.0,
28
+ prepare=dict(
29
+ filter_by_difficulty=[-1],
30
+ filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
31
+ classes=class_names,
32
+ sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6),
33
+ points_loader=dict(
34
+ type='LoadPointsFromFile',
35
+ coord_type='LIDAR',
36
+ load_dim=4,
37
+ use_dim=4,
38
+ backend_args=backend_args),
39
+ backend_args=backend_args)
40
+
41
+ train_pipeline = [
42
+ dict(
43
+ type='LoadPointsFromFile',
44
+ coord_type='LIDAR',
45
+ load_dim=4, # x, y, z, intensity
46
+ use_dim=4,
47
+ backend_args=backend_args),
48
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
49
+ dict(type='ObjectSample', db_sampler=db_sampler),
50
+ dict(
51
+ type='ObjectNoise',
52
+ num_try=100,
53
+ translation_std=[1.0, 1.0, 0.5],
54
+ global_rot_range=[0.0, 0.0],
55
+ rot_range=[-0.78539816, 0.78539816]),
56
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
57
+ dict(
58
+ type='GlobalRotScaleTrans',
59
+ rot_range=[-0.78539816, 0.78539816],
60
+ scale_ratio_range=[0.95, 1.05]),
61
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
62
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
63
+ dict(type='PointShuffle'),
64
+ dict(
65
+ type='Pack3DDetInputs',
66
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
67
+ ]
68
+ test_pipeline = [
69
+ dict(
70
+ type='LoadPointsFromFile',
71
+ coord_type='LIDAR',
72
+ load_dim=4,
73
+ use_dim=4,
74
+ backend_args=backend_args),
75
+ dict(
76
+ type='MultiScaleFlipAug3D',
77
+ img_scale=(1333, 800),
78
+ pts_scale_ratio=1,
79
+ flip=False,
80
+ transforms=[
81
+ dict(
82
+ type='GlobalRotScaleTrans',
83
+ rot_range=[0, 0],
84
+ scale_ratio_range=[1., 1.],
85
+ translation_std=[0, 0, 0]),
86
+ dict(type='RandomFlip3D'),
87
+ dict(
88
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range)
89
+ ]),
90
+ dict(type='Pack3DDetInputs', keys=['points'])
91
+ ]
92
+ # construct a pipeline for data and gt loading in show function
93
+ # please keep its loading function consistent with test_pipeline (e.g. client)
94
+ eval_pipeline = [
95
+ dict(
96
+ type='LoadPointsFromFile',
97
+ coord_type='LIDAR',
98
+ load_dim=4,
99
+ use_dim=4,
100
+ backend_args=backend_args),
101
+ dict(type='Pack3DDetInputs', keys=['points'])
102
+ ]
103
+ train_dataloader = dict(
104
+ batch_size=6,
105
+ num_workers=4,
106
+ persistent_workers=True,
107
+ sampler=dict(type='DefaultSampler', shuffle=True),
108
+ dataset=dict(
109
+ type='RepeatDataset',
110
+ times=2,
111
+ dataset=dict(
112
+ type=dataset_type,
113
+ data_root=data_root,
114
+ ann_file='kitti_infos_train.pkl',
115
+ data_prefix=dict(pts='training/velodyne_reduced'),
116
+ pipeline=train_pipeline,
117
+ modality=input_modality,
118
+ test_mode=False,
119
+ metainfo=metainfo,
120
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
121
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
122
+ box_type_3d='LiDAR',
123
+ backend_args=backend_args)))
124
+ val_dataloader = dict(
125
+ batch_size=1,
126
+ num_workers=1,
127
+ persistent_workers=True,
128
+ drop_last=False,
129
+ sampler=dict(type='DefaultSampler', shuffle=False),
130
+ dataset=dict(
131
+ type=dataset_type,
132
+ data_root=data_root,
133
+ data_prefix=dict(pts='training/velodyne_reduced'),
134
+ ann_file='kitti_infos_val.pkl',
135
+ pipeline=test_pipeline,
136
+ modality=input_modality,
137
+ test_mode=True,
138
+ metainfo=metainfo,
139
+ box_type_3d='LiDAR',
140
+ backend_args=backend_args))
141
+ test_dataloader = dict(
142
+ batch_size=1,
143
+ num_workers=1,
144
+ persistent_workers=True,
145
+ drop_last=False,
146
+ sampler=dict(type='DefaultSampler', shuffle=False),
147
+ dataset=dict(
148
+ type=dataset_type,
149
+ data_root=data_root,
150
+ data_prefix=dict(pts='training/velodyne_reduced'),
151
+ ann_file='kitti_infos_val.pkl',
152
+ pipeline=test_pipeline,
153
+ modality=input_modality,
154
+ test_mode=True,
155
+ metainfo=metainfo,
156
+ box_type_3d='LiDAR',
157
+ backend_args=backend_args))
158
+ val_evaluator = dict(
159
+ type='KittiMetric',
160
+ ann_file=data_root + 'kitti_infos_val.pkl',
161
+ metric='bbox',
162
+ backend_args=backend_args)
163
+ test_evaluator = val_evaluator
164
+
165
+ vis_backends = [dict(type='LocalVisBackend')]
166
+ visualizer = dict(
167
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/kitti-3d-car.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'KittiDataset'
3
+ data_root = 'data/kitti/'
4
+ class_names = ['Car']
5
+ point_cloud_range = [0, -40, -3, 70.4, 40, 1]
6
+ input_modality = dict(use_lidar=True, use_camera=False)
7
+ metainfo = dict(classes=class_names)
8
+
9
+ # Example to use different file client
10
+ # Method 1: simply set the data root and let the file I/O module
11
+ # automatically infer from prefix (not support LMDB and Memcache yet)
12
+
13
+ # data_root = 's3://openmmlab/datasets/detection3d/kitti/'
14
+
15
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
16
+ # backend_args = dict(
17
+ # backend='petrel',
18
+ # path_mapping=dict({
19
+ # './data/': 's3://openmmlab/datasets/detection3d/',
20
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
21
+ # }))
22
+ backend_args = None
23
+
24
+ db_sampler = dict(
25
+ data_root=data_root,
26
+ info_path=data_root + 'kitti_dbinfos_train.pkl',
27
+ rate=1.0,
28
+ prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
29
+ classes=class_names,
30
+ sample_groups=dict(Car=15),
31
+ points_loader=dict(
32
+ type='LoadPointsFromFile',
33
+ coord_type='LIDAR',
34
+ load_dim=4,
35
+ use_dim=4,
36
+ backend_args=backend_args),
37
+ backend_args=backend_args)
38
+
39
+ train_pipeline = [
40
+ dict(
41
+ type='LoadPointsFromFile',
42
+ coord_type='LIDAR',
43
+ load_dim=4, # x, y, z, intensity
44
+ use_dim=4,
45
+ backend_args=backend_args),
46
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
47
+ dict(type='ObjectSample', db_sampler=db_sampler),
48
+ dict(
49
+ type='ObjectNoise',
50
+ num_try=100,
51
+ translation_std=[1.0, 1.0, 0.5],
52
+ global_rot_range=[0.0, 0.0],
53
+ rot_range=[-0.78539816, 0.78539816]),
54
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
55
+ dict(
56
+ type='GlobalRotScaleTrans',
57
+ rot_range=[-0.78539816, 0.78539816],
58
+ scale_ratio_range=[0.95, 1.05]),
59
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
60
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
61
+ dict(type='PointShuffle'),
62
+ dict(
63
+ type='Pack3DDetInputs',
64
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
65
+ ]
66
+ test_pipeline = [
67
+ dict(
68
+ type='LoadPointsFromFile',
69
+ coord_type='LIDAR',
70
+ load_dim=4,
71
+ use_dim=4,
72
+ backend_args=backend_args),
73
+ dict(
74
+ type='MultiScaleFlipAug3D',
75
+ img_scale=(1333, 800),
76
+ pts_scale_ratio=1,
77
+ flip=False,
78
+ transforms=[
79
+ dict(
80
+ type='GlobalRotScaleTrans',
81
+ rot_range=[0, 0],
82
+ scale_ratio_range=[1., 1.],
83
+ translation_std=[0, 0, 0]),
84
+ dict(type='RandomFlip3D'),
85
+ dict(
86
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range)
87
+ ]),
88
+ dict(type='Pack3DDetInputs', keys=['points'])
89
+ ]
90
+ # construct a pipeline for data and gt loading in show function
91
+ # please keep its loading function consistent with test_pipeline (e.g. client)
92
+ eval_pipeline = [
93
+ dict(
94
+ type='LoadPointsFromFile',
95
+ coord_type='LIDAR',
96
+ load_dim=4,
97
+ use_dim=4,
98
+ backend_args=backend_args),
99
+ dict(type='Pack3DDetInputs', keys=['points'])
100
+ ]
101
+ train_dataloader = dict(
102
+ batch_size=6,
103
+ num_workers=4,
104
+ persistent_workers=True,
105
+ sampler=dict(type='DefaultSampler', shuffle=True),
106
+ dataset=dict(
107
+ type='RepeatDataset',
108
+ times=2,
109
+ dataset=dict(
110
+ type=dataset_type,
111
+ data_root=data_root,
112
+ ann_file='kitti_infos_train.pkl',
113
+ data_prefix=dict(pts='training/velodyne_reduced'),
114
+ pipeline=train_pipeline,
115
+ modality=input_modality,
116
+ test_mode=False,
117
+ metainfo=metainfo,
118
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
119
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
120
+ box_type_3d='LiDAR',
121
+ backend_args=backend_args)))
122
+ val_dataloader = dict(
123
+ batch_size=1,
124
+ num_workers=1,
125
+ persistent_workers=True,
126
+ drop_last=False,
127
+ sampler=dict(type='DefaultSampler', shuffle=False),
128
+ dataset=dict(
129
+ type=dataset_type,
130
+ data_root=data_root,
131
+ data_prefix=dict(pts='training/velodyne_reduced'),
132
+ ann_file='kitti_infos_val.pkl',
133
+ pipeline=test_pipeline,
134
+ modality=input_modality,
135
+ test_mode=True,
136
+ metainfo=metainfo,
137
+ box_type_3d='LiDAR',
138
+ backend_args=backend_args))
139
+ test_dataloader = dict(
140
+ batch_size=1,
141
+ num_workers=1,
142
+ persistent_workers=True,
143
+ drop_last=False,
144
+ sampler=dict(type='DefaultSampler', shuffle=False),
145
+ dataset=dict(
146
+ type=dataset_type,
147
+ data_root=data_root,
148
+ data_prefix=dict(pts='training/velodyne_reduced'),
149
+ ann_file='kitti_infos_val.pkl',
150
+ pipeline=test_pipeline,
151
+ modality=input_modality,
152
+ test_mode=True,
153
+ metainfo=metainfo,
154
+ box_type_3d='LiDAR',
155
+ backend_args=backend_args))
156
+ val_evaluator = dict(
157
+ type='KittiMetric',
158
+ ann_file=data_root + 'kitti_infos_val.pkl',
159
+ metric='bbox',
160
+ backend_args=backend_args)
161
+ test_evaluator = val_evaluator
162
+
163
+ vis_backends = [dict(type='LocalVisBackend')]
164
+ visualizer = dict(
165
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/kitti-mono3d.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'KittiDataset'
2
+ data_root = 'data/kitti/'
3
+ class_names = ['Pedestrian', 'Cyclist', 'Car']
4
+ input_modality = dict(use_lidar=False, use_camera=True)
5
+ metainfo = dict(classes=class_names)
6
+
7
+ # Example to use different file client
8
+ # Method 1: simply set the data root and let the file I/O module
9
+ # automatically infer from prefix (not support LMDB and Memcache yet)
10
+
11
+ # data_root = 's3://openmmlab/datasets/detection3d/kitti/'
12
+
13
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
14
+ # backend_args = dict(
15
+ # backend='petrel',
16
+ # path_mapping=dict({
17
+ # './data/': 's3://openmmlab/datasets/detection3d/',
18
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
19
+ # }))
20
+ backend_args = None
21
+
22
+ train_pipeline = [
23
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
24
+ dict(
25
+ type='LoadAnnotations3D',
26
+ with_bbox=True,
27
+ with_label=True,
28
+ with_attr_label=False,
29
+ with_bbox_3d=True,
30
+ with_label_3d=True,
31
+ with_bbox_depth=True),
32
+ dict(type='Resize', scale=(1242, 375), keep_ratio=True),
33
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
34
+ dict(
35
+ type='Pack3DDetInputs',
36
+ keys=[
37
+ 'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
38
+ 'gt_labels_3d', 'centers_2d', 'depths'
39
+ ]),
40
+ ]
41
+ test_pipeline = [
42
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
43
+ dict(type='Resize', scale=(1242, 375), keep_ratio=True),
44
+ dict(type='Pack3DDetInputs', keys=['img'])
45
+ ]
46
+ eval_pipeline = [
47
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
48
+ dict(type='Pack3DDetInputs', keys=['img'])
49
+ ]
50
+
51
+ train_dataloader = dict(
52
+ batch_size=2,
53
+ num_workers=2,
54
+ persistent_workers=True,
55
+ sampler=dict(type='DefaultSampler', shuffle=True),
56
+ dataset=dict(
57
+ type=dataset_type,
58
+ data_root=data_root,
59
+ ann_file='kitti_infos_train.pkl',
60
+ data_prefix=dict(img='training/image_2'),
61
+ pipeline=train_pipeline,
62
+ modality=input_modality,
63
+ load_type='fov_image_based',
64
+ test_mode=False,
65
+ metainfo=metainfo,
66
+ # we use box_type_3d='Camera' in monocular 3d
67
+ # detection task
68
+ box_type_3d='Camera',
69
+ backend_args=backend_args))
70
+ val_dataloader = dict(
71
+ batch_size=1,
72
+ num_workers=2,
73
+ persistent_workers=True,
74
+ drop_last=False,
75
+ sampler=dict(type='DefaultSampler', shuffle=False),
76
+ dataset=dict(
77
+ type=dataset_type,
78
+ data_root=data_root,
79
+ data_prefix=dict(img='training/image_2'),
80
+ ann_file='kitti_infos_val.pkl',
81
+ pipeline=test_pipeline,
82
+ modality=input_modality,
83
+ load_type='fov_image_based',
84
+ metainfo=metainfo,
85
+ test_mode=True,
86
+ box_type_3d='Camera',
87
+ backend_args=backend_args))
88
+ test_dataloader = val_dataloader
89
+
90
+ val_evaluator = dict(
91
+ type='KittiMetric',
92
+ ann_file=data_root + 'kitti_infos_val.pkl',
93
+ metric='bbox',
94
+ backend_args=backend_args)
95
+
96
+ test_evaluator = val_evaluator
97
+
98
+ vis_backends = [dict(type='LocalVisBackend')]
99
+ visualizer = dict(
100
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/lyft-3d-range100.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # If point cloud range is changed, the models should also change their point
2
+ # cloud range accordingly
3
+ point_cloud_range = [-100, -100, -5, 100, 100, 3]
4
+ # For Lyft we usually do 9-class detection
5
+ class_names = [
6
+ 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
7
+ 'bicycle', 'pedestrian', 'animal'
8
+ ]
9
+ dataset_type = 'LyftDataset'
10
+ data_root = 'data/lyft/'
11
+ data_prefix = dict(pts='v1.01-train/lidar', img='', sweeps='v1.01-train/lidar')
12
+ # Input modality for Lyft dataset, this is consistent with the submission
13
+ # format which requires the information in input_modality.
14
+ input_modality = dict(
15
+ use_lidar=True,
16
+ use_camera=False,
17
+ use_radar=False,
18
+ use_map=False,
19
+ use_external=False)
20
+
21
+ # Example to use different file client
22
+ # Method 1: simply set the data root and let the file I/O module
23
+ # automatically infer from prefix (not support LMDB and Memcache yet)
24
+
25
+ # data_root = 's3://openmmlab/datasets/detection3d/lyft/'
26
+
27
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
28
+ # backend_args = dict(
29
+ # backend='petrel',
30
+ # path_mapping=dict({
31
+ # './data/': 's3://openmmlab/datasets/detection3d/',
32
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
33
+ # }))
34
+ backend_args = None
35
+
36
+ train_pipeline = [
37
+ dict(
38
+ type='LoadPointsFromFile',
39
+ coord_type='LIDAR',
40
+ load_dim=5,
41
+ use_dim=5,
42
+ backend_args=backend_args),
43
+ dict(
44
+ type='LoadPointsFromMultiSweeps',
45
+ sweeps_num=10,
46
+ backend_args=backend_args),
47
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
48
+ dict(
49
+ type='GlobalRotScaleTrans',
50
+ rot_range=[-0.3925, 0.3925],
51
+ scale_ratio_range=[0.95, 1.05],
52
+ translation_std=[0, 0, 0]),
53
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
54
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
55
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
56
+ dict(type='PointShuffle'),
57
+ dict(
58
+ type='Pack3DDetInputs',
59
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
60
+ ]
61
+ test_pipeline = [
62
+ dict(
63
+ type='LoadPointsFromFile',
64
+ coord_type='LIDAR',
65
+ load_dim=5,
66
+ use_dim=5,
67
+ backend_args=backend_args),
68
+ dict(
69
+ type='LoadPointsFromMultiSweeps',
70
+ sweeps_num=10,
71
+ backend_args=backend_args),
72
+ dict(
73
+ type='MultiScaleFlipAug3D',
74
+ img_scale=(1333, 800),
75
+ pts_scale_ratio=1,
76
+ flip=False,
77
+ transforms=[
78
+ dict(
79
+ type='GlobalRotScaleTrans',
80
+ rot_range=[0, 0],
81
+ scale_ratio_range=[1., 1.],
82
+ translation_std=[0, 0, 0]),
83
+ dict(type='RandomFlip3D'),
84
+ dict(
85
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range),
86
+ ]),
87
+ dict(type='Pack3DDetInputs', keys=['points'])
88
+ ]
89
+ # construct a pipeline for data and gt loading in show function
90
+ # please keep its loading function consistent with test_pipeline (e.g. client)
91
+ eval_pipeline = [
92
+ dict(
93
+ type='LoadPointsFromFile',
94
+ coord_type='LIDAR',
95
+ load_dim=5,
96
+ use_dim=5,
97
+ backend_args=backend_args),
98
+ dict(
99
+ type='LoadPointsFromMultiSweeps',
100
+ sweeps_num=10,
101
+ backend_args=backend_args),
102
+ dict(type='Pack3DDetInputs', keys=['points'])
103
+ ]
104
+
105
+ train_dataloader = dict(
106
+ batch_size=2,
107
+ num_workers=2,
108
+ persistent_workers=True,
109
+ sampler=dict(type='DefaultSampler', shuffle=True),
110
+ dataset=dict(
111
+ type=dataset_type,
112
+ data_root=data_root,
113
+ ann_file='lyft_infos_train.pkl',
114
+ pipeline=train_pipeline,
115
+ metainfo=dict(classes=class_names),
116
+ modality=input_modality,
117
+ data_prefix=data_prefix,
118
+ test_mode=False,
119
+ box_type_3d='LiDAR',
120
+ backend_args=backend_args))
121
+ val_dataloader = dict(
122
+ batch_size=1,
123
+ num_workers=1,
124
+ persistent_workers=True,
125
+ drop_last=False,
126
+ sampler=dict(type='DefaultSampler', shuffle=False),
127
+ dataset=dict(
128
+ type=dataset_type,
129
+ data_root=data_root,
130
+ ann_file='lyft_infos_val.pkl',
131
+ pipeline=test_pipeline,
132
+ metainfo=dict(classes=class_names),
133
+ modality=input_modality,
134
+ test_mode=True,
135
+ data_prefix=data_prefix,
136
+ box_type_3d='LiDAR',
137
+ backend_args=backend_args))
138
+ test_dataloader = val_dataloader
139
+
140
+ val_evaluator = dict(
141
+ type='LyftMetric',
142
+ data_root=data_root,
143
+ ann_file='lyft_infos_val.pkl',
144
+ metric='bbox',
145
+ backend_args=backend_args)
146
+ test_evaluator = val_evaluator
147
+
148
+ vis_backends = [dict(type='LocalVisBackend')]
149
+ visualizer = dict(
150
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/lyft-3d.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # If point cloud range is changed, the models should also change their point
2
+ # cloud range accordingly
3
+ point_cloud_range = [-80, -80, -5, 80, 80, 3]
4
+ # For Lyft we usually do 9-class detection
5
+ class_names = [
6
+ 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
7
+ 'bicycle', 'pedestrian', 'animal'
8
+ ]
9
+ dataset_type = 'LyftDataset'
10
+ data_root = 'data/lyft/'
11
+ # Input modality for Lyft dataset, this is consistent with the submission
12
+ # format which requires the information in input_modality.
13
+ input_modality = dict(use_lidar=True, use_camera=False)
14
+ data_prefix = dict(pts='v1.01-train/lidar', img='', sweeps='v1.01-train/lidar')
15
+
16
+ # Example to use different file client
17
+ # Method 1: simply set the data root and let the file I/O module
18
+ # automatically infer from prefix (not support LMDB and Memcache yet)
19
+
20
+ # data_root = 's3://openmmlab/datasets/detection3d/lyft/'
21
+
22
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
23
+ # backend_args = dict(
24
+ # backend='petrel',
25
+ # path_mapping=dict({
26
+ # './data/': 's3://openmmlab/datasets/detection3d/',
27
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
28
+ # }))
29
+ backend_args = None
30
+
31
+ train_pipeline = [
32
+ dict(
33
+ type='LoadPointsFromFile',
34
+ coord_type='LIDAR',
35
+ load_dim=5,
36
+ use_dim=5,
37
+ backend_args=backend_args),
38
+ dict(
39
+ type='LoadPointsFromMultiSweeps',
40
+ sweeps_num=10,
41
+ backend_args=backend_args),
42
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
43
+ dict(
44
+ type='GlobalRotScaleTrans',
45
+ rot_range=[-0.3925, 0.3925],
46
+ scale_ratio_range=[0.95, 1.05],
47
+ translation_std=[0, 0, 0]),
48
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
49
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
50
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
51
+ dict(type='PointShuffle'),
52
+ dict(
53
+ type='Pack3DDetInputs',
54
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
55
+ ]
56
+ test_pipeline = [
57
+ dict(
58
+ type='LoadPointsFromFile',
59
+ coord_type='LIDAR',
60
+ load_dim=5,
61
+ use_dim=5,
62
+ backend_args=backend_args),
63
+ dict(
64
+ type='LoadPointsFromMultiSweeps',
65
+ sweeps_num=10,
66
+ backend_args=backend_args),
67
+ dict(
68
+ type='MultiScaleFlipAug3D',
69
+ img_scale=(1333, 800),
70
+ pts_scale_ratio=1,
71
+ flip=False,
72
+ transforms=[
73
+ dict(
74
+ type='GlobalRotScaleTrans',
75
+ rot_range=[0, 0],
76
+ scale_ratio_range=[1., 1.],
77
+ translation_std=[0, 0, 0]),
78
+ dict(type='RandomFlip3D'),
79
+ dict(
80
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range)
81
+ ]),
82
+ dict(type='Pack3DDetInputs', keys=['points'])
83
+ ]
84
+ # construct a pipeline for data and gt loading in show function
85
+ # please keep its loading function consistent with test_pipeline (e.g. client)
86
+ eval_pipeline = [
87
+ dict(
88
+ type='LoadPointsFromFile',
89
+ coord_type='LIDAR',
90
+ load_dim=5,
91
+ use_dim=5,
92
+ backend_args=backend_args),
93
+ dict(
94
+ type='LoadPointsFromMultiSweeps',
95
+ sweeps_num=10,
96
+ backend_args=backend_args),
97
+ dict(type='Pack3DDetInputs', keys=['points'])
98
+ ]
99
+ train_dataloader = dict(
100
+ batch_size=2,
101
+ num_workers=2,
102
+ persistent_workers=True,
103
+ sampler=dict(type='DefaultSampler', shuffle=True),
104
+ dataset=dict(
105
+ type=dataset_type,
106
+ data_root=data_root,
107
+ ann_file='lyft_infos_train.pkl',
108
+ pipeline=train_pipeline,
109
+ metainfo=dict(classes=class_names),
110
+ modality=input_modality,
111
+ data_prefix=data_prefix,
112
+ test_mode=False,
113
+ box_type_3d='LiDAR',
114
+ backend_args=backend_args))
115
+ test_dataloader = dict(
116
+ batch_size=1,
117
+ num_workers=1,
118
+ persistent_workers=True,
119
+ drop_last=False,
120
+ sampler=dict(type='DefaultSampler', shuffle=False),
121
+ dataset=dict(
122
+ type=dataset_type,
123
+ data_root=data_root,
124
+ ann_file='lyft_infos_val.pkl',
125
+ pipeline=test_pipeline,
126
+ metainfo=dict(classes=class_names),
127
+ modality=input_modality,
128
+ data_prefix=data_prefix,
129
+ test_mode=True,
130
+ box_type_3d='LiDAR',
131
+ backend_args=backend_args))
132
+ val_dataloader = dict(
133
+ batch_size=1,
134
+ num_workers=1,
135
+ persistent_workers=True,
136
+ drop_last=False,
137
+ sampler=dict(type='DefaultSampler', shuffle=False),
138
+ dataset=dict(
139
+ type=dataset_type,
140
+ data_root=data_root,
141
+ ann_file='lyft_infos_val.pkl',
142
+ pipeline=test_pipeline,
143
+ metainfo=dict(classes=class_names),
144
+ modality=input_modality,
145
+ test_mode=True,
146
+ data_prefix=data_prefix,
147
+ box_type_3d='LiDAR',
148
+ backend_args=backend_args))
149
+
150
+ val_evaluator = dict(
151
+ type='LyftMetric',
152
+ data_root=data_root,
153
+ ann_file='lyft_infos_val.pkl',
154
+ metric='bbox',
155
+ backend_args=backend_args)
156
+ test_evaluator = val_evaluator
157
+
158
+ vis_backends = [dict(type='LocalVisBackend')]
159
+ visualizer = dict(
160
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/nuim-instance.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'CocoDataset'
2
+ data_root = 'data/nuimages/'
3
+ class_names = [
4
+ 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
5
+ 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
6
+ ]
7
+
8
+ # Example to use different file client
9
+ # Method 1: simply set the data root and let the file I/O module
10
+ # automatically infer from prefix (not support LMDB and Memcache yet)
11
+
12
+ # data_root = 's3://openmmlab/datasets/detection3d/nuimages/'
13
+
14
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
15
+ # backend_args = dict(
16
+ # backend='petrel',
17
+ # path_mapping=dict({
18
+ # './data/': 's3://openmmlab/datasets/detection3d/',
19
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
20
+ # }))
21
+ backend_args = None
22
+
23
+ train_pipeline = [
24
+ dict(type='LoadImageFromFile', backend_args=backend_args),
25
+ dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
26
+ dict(
27
+ type='Resize',
28
+ img_scale=[(1280, 720), (1920, 1080)],
29
+ multiscale_mode='range',
30
+ keep_ratio=True),
31
+ dict(type='RandomFlip', flip_ratio=0.5),
32
+ dict(type='PackDetInputs'),
33
+ ]
34
+ test_pipeline = [
35
+ dict(type='LoadImageFromFile', backend_args=backend_args),
36
+ dict(
37
+ type='MultiScaleFlipAug',
38
+ img_scale=(1600, 900),
39
+ flip=False,
40
+ transforms=[
41
+ dict(type='Resize', keep_ratio=True),
42
+ dict(type='RandomFlip'),
43
+ ]),
44
+ dict(
45
+ type='PackDetInputs',
46
+ meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
47
+ 'scale_factor')),
48
+ ]
49
+ data = dict(
50
+ samples_per_gpu=2,
51
+ workers_per_gpu=2,
52
+ train=dict(
53
+ type=dataset_type,
54
+ ann_file=data_root + 'annotations/nuimages_v1.0-train.json',
55
+ img_prefix=data_root,
56
+ classes=class_names,
57
+ pipeline=train_pipeline),
58
+ val=dict(
59
+ type=dataset_type,
60
+ ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
61
+ img_prefix=data_root,
62
+ classes=class_names,
63
+ pipeline=test_pipeline),
64
+ test=dict(
65
+ type=dataset_type,
66
+ ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
67
+ img_prefix=data_root,
68
+ classes=class_names,
69
+ pipeline=test_pipeline))
70
+ evaluation = dict(metric=['bbox', 'segm'])
configs/_base_/datasets/nus-3d.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # If point cloud range is changed, the models should also change their point
2
+ # cloud range accordingly
3
+ point_cloud_range = [-50, -50, -5, 50, 50, 3]
4
+ # Using calibration info convert the Lidar-coordinate point cloud range to the
5
+ # ego-coordinate point cloud range could bring a little promotion in nuScenes.
6
+ # point_cloud_range = [-50, -50.8, -5, 50, 49.2, 3]
7
+ # For nuScenes we usually do 10-class detection
8
+ class_names = [
9
+ 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
10
+ 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
11
+ ]
12
+ metainfo = dict(classes=class_names)
13
+ dataset_type = 'NuScenesDataset'
14
+ data_root = 'data/nuscenes/'
15
+ # Input modality for nuScenes dataset, this is consistent with the submission
16
+ # format which requires the information in input_modality.
17
+ input_modality = dict(use_lidar=True, use_camera=False)
18
+ data_prefix = dict(pts='samples/LIDAR_TOP', img='', sweeps='sweeps/LIDAR_TOP')
19
+
20
+ # Example to use different file client
21
+ # Method 1: simply set the data root and let the file I/O module
22
+ # automatically infer from prefix (not support LMDB and Memcache yet)
23
+
24
+ # data_root = 's3://openmmlab/datasets/detection3d/nuscenes/'
25
+
26
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
27
+ # backend_args = dict(
28
+ # backend='petrel',
29
+ # path_mapping=dict({
30
+ # './data/': 's3://openmmlab/datasets/detection3d/',
31
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
32
+ # }))
33
+ backend_args = None
34
+
35
+ train_pipeline = [
36
+ dict(
37
+ type='LoadPointsFromFile',
38
+ coord_type='LIDAR',
39
+ load_dim=5,
40
+ use_dim=5,
41
+ backend_args=backend_args),
42
+ dict(
43
+ type='LoadPointsFromMultiSweeps',
44
+ sweeps_num=10,
45
+ backend_args=backend_args),
46
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
47
+ dict(
48
+ type='GlobalRotScaleTrans',
49
+ rot_range=[-0.3925, 0.3925],
50
+ scale_ratio_range=[0.95, 1.05],
51
+ translation_std=[0, 0, 0]),
52
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
53
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
54
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
55
+ dict(type='ObjectNameFilter', classes=class_names),
56
+ dict(type='PointShuffle'),
57
+ dict(
58
+ type='Pack3DDetInputs',
59
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
60
+ ]
61
+ test_pipeline = [
62
+ dict(
63
+ type='LoadPointsFromFile',
64
+ coord_type='LIDAR',
65
+ load_dim=5,
66
+ use_dim=5,
67
+ backend_args=backend_args),
68
+ dict(
69
+ type='LoadPointsFromMultiSweeps',
70
+ sweeps_num=10,
71
+ test_mode=True,
72
+ backend_args=backend_args),
73
+ dict(
74
+ type='MultiScaleFlipAug3D',
75
+ img_scale=(1333, 800),
76
+ pts_scale_ratio=1,
77
+ flip=False,
78
+ transforms=[
79
+ dict(
80
+ type='GlobalRotScaleTrans',
81
+ rot_range=[0, 0],
82
+ scale_ratio_range=[1., 1.],
83
+ translation_std=[0, 0, 0]),
84
+ dict(type='RandomFlip3D'),
85
+ dict(
86
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range)
87
+ ]),
88
+ dict(type='Pack3DDetInputs', keys=['points'])
89
+ ]
90
+ # construct a pipeline for data and gt loading in show function
91
+ # please keep its loading function consistent with test_pipeline (e.g. client)
92
+ eval_pipeline = [
93
+ dict(
94
+ type='LoadPointsFromFile',
95
+ coord_type='LIDAR',
96
+ load_dim=5,
97
+ use_dim=5,
98
+ backend_args=backend_args),
99
+ dict(
100
+ type='LoadPointsFromMultiSweeps',
101
+ sweeps_num=10,
102
+ test_mode=True,
103
+ backend_args=backend_args),
104
+ dict(type='Pack3DDetInputs', keys=['points'])
105
+ ]
106
+ train_dataloader = dict(
107
+ batch_size=4,
108
+ num_workers=4,
109
+ persistent_workers=True,
110
+ sampler=dict(type='DefaultSampler', shuffle=True),
111
+ dataset=dict(
112
+ type=dataset_type,
113
+ data_root=data_root,
114
+ ann_file='nuscenes_infos_train.pkl',
115
+ pipeline=train_pipeline,
116
+ metainfo=metainfo,
117
+ modality=input_modality,
118
+ test_mode=False,
119
+ data_prefix=data_prefix,
120
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
121
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
122
+ box_type_3d='LiDAR',
123
+ backend_args=backend_args))
124
+ test_dataloader = dict(
125
+ batch_size=1,
126
+ num_workers=1,
127
+ persistent_workers=True,
128
+ drop_last=False,
129
+ sampler=dict(type='DefaultSampler', shuffle=False),
130
+ dataset=dict(
131
+ type=dataset_type,
132
+ data_root=data_root,
133
+ ann_file='nuscenes_infos_val.pkl',
134
+ pipeline=test_pipeline,
135
+ metainfo=metainfo,
136
+ modality=input_modality,
137
+ data_prefix=data_prefix,
138
+ test_mode=True,
139
+ box_type_3d='LiDAR',
140
+ backend_args=backend_args))
141
+ val_dataloader = dict(
142
+ batch_size=1,
143
+ num_workers=1,
144
+ persistent_workers=True,
145
+ drop_last=False,
146
+ sampler=dict(type='DefaultSampler', shuffle=False),
147
+ dataset=dict(
148
+ type=dataset_type,
149
+ data_root=data_root,
150
+ ann_file='nuscenes_infos_val.pkl',
151
+ pipeline=test_pipeline,
152
+ metainfo=metainfo,
153
+ modality=input_modality,
154
+ test_mode=True,
155
+ data_prefix=data_prefix,
156
+ box_type_3d='LiDAR',
157
+ backend_args=backend_args))
158
+
159
+ val_evaluator = dict(
160
+ type='NuScenesMetric',
161
+ data_root=data_root,
162
+ ann_file=data_root + 'nuscenes_infos_val.pkl',
163
+ metric='bbox',
164
+ backend_args=backend_args)
165
+ test_evaluator = val_evaluator
166
+
167
+ vis_backends = [dict(type='LocalVisBackend')]
168
+ visualizer = dict(
169
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/nus-mono3d.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'NuScenesDataset'
2
+ data_root = 'data/nuscenes/'
3
+ class_names = [
4
+ 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
5
+ 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
6
+ ]
7
+ metainfo = dict(classes=class_names)
8
+ # Input modality for nuScenes dataset, this is consistent with the submission
9
+ # format which requires the information in input_modality.
10
+ input_modality = dict(use_lidar=False, use_camera=True)
11
+
12
+ # Example to use different file client
13
+ # Method 1: simply set the data root and let the file I/O module
14
+ # automatically infer from prefix (not support LMDB and Memcache yet)
15
+
16
+ # data_root = 's3://openmmlab/datasets/detection3d/nuscenes/'
17
+
18
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
19
+ # backend_args = dict(
20
+ # backend='petrel',
21
+ # path_mapping=dict({
22
+ # './data/': 's3://openmmlab/datasets/detection3d/',
23
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
24
+ # }))
25
+ backend_args = None
26
+
27
+ train_pipeline = [
28
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
29
+ dict(
30
+ type='LoadAnnotations3D',
31
+ with_bbox=True,
32
+ with_label=True,
33
+ with_attr_label=True,
34
+ with_bbox_3d=True,
35
+ with_label_3d=True,
36
+ with_bbox_depth=True),
37
+ dict(type='Resize', scale=(1600, 900), keep_ratio=True),
38
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
39
+ dict(
40
+ type='Pack3DDetInputs',
41
+ keys=[
42
+ 'img', 'gt_bboxes', 'gt_bboxes_labels', 'attr_labels',
43
+ 'gt_bboxes_3d', 'gt_labels_3d', 'centers_2d', 'depths'
44
+ ]),
45
+ ]
46
+
47
+ test_pipeline = [
48
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
49
+ dict(type='mmdet.Resize', scale=(1600, 900), keep_ratio=True),
50
+ dict(type='Pack3DDetInputs', keys=['img'])
51
+ ]
52
+
53
+ train_dataloader = dict(
54
+ batch_size=2,
55
+ num_workers=2,
56
+ persistent_workers=True,
57
+ sampler=dict(type='DefaultSampler', shuffle=True),
58
+ dataset=dict(
59
+ type=dataset_type,
60
+ data_root=data_root,
61
+ data_prefix=dict(
62
+ pts='',
63
+ CAM_FRONT='samples/CAM_FRONT',
64
+ CAM_FRONT_LEFT='samples/CAM_FRONT_LEFT',
65
+ CAM_FRONT_RIGHT='samples/CAM_FRONT_RIGHT',
66
+ CAM_BACK='samples/CAM_BACK',
67
+ CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
68
+ CAM_BACK_LEFT='samples/CAM_BACK_LEFT'),
69
+ ann_file='nuscenes_infos_train.pkl',
70
+ load_type='mv_image_based',
71
+ pipeline=train_pipeline,
72
+ metainfo=metainfo,
73
+ modality=input_modality,
74
+ test_mode=False,
75
+ # we use box_type_3d='Camera' in monocular 3d
76
+ # detection task
77
+ box_type_3d='Camera',
78
+ use_valid_flag=True,
79
+ backend_args=backend_args))
80
+ val_dataloader = dict(
81
+ batch_size=1,
82
+ num_workers=2,
83
+ persistent_workers=True,
84
+ drop_last=False,
85
+ sampler=dict(type='DefaultSampler', shuffle=False),
86
+ dataset=dict(
87
+ type=dataset_type,
88
+ data_root=data_root,
89
+ data_prefix=dict(
90
+ pts='',
91
+ CAM_FRONT='samples/CAM_FRONT',
92
+ CAM_FRONT_LEFT='samples/CAM_FRONT_LEFT',
93
+ CAM_FRONT_RIGHT='samples/CAM_FRONT_RIGHT',
94
+ CAM_BACK='samples/CAM_BACK',
95
+ CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
96
+ CAM_BACK_LEFT='samples/CAM_BACK_LEFT'),
97
+ ann_file='nuscenes_infos_val.pkl',
98
+ load_type='mv_image_based',
99
+ pipeline=test_pipeline,
100
+ modality=input_modality,
101
+ metainfo=metainfo,
102
+ test_mode=True,
103
+ box_type_3d='Camera',
104
+ use_valid_flag=True,
105
+ backend_args=backend_args))
106
+ test_dataloader = val_dataloader
107
+
108
+ val_evaluator = dict(
109
+ type='NuScenesMetric',
110
+ data_root=data_root,
111
+ ann_file=data_root + 'nuscenes_infos_val.pkl',
112
+ metric='bbox',
113
+ backend_args=backend_args)
114
+
115
+ test_evaluator = val_evaluator
116
+
117
+ vis_backends = [dict(type='LocalVisBackend')]
118
+ visualizer = dict(
119
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/s3dis-3d.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'S3DISDataset'
3
+ data_root = 'data/s3dis/'
4
+
5
+ # Example to use different file client
6
+ # Method 1: simply set the data root and let the file I/O module
7
+ # automatically infer from prefix (not support LMDB and Memcache yet)
8
+
9
+ # data_root = 's3://openmmlab/datasets/detection3d/s3dis/'
10
+
11
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
12
+ # backend_args = dict(
13
+ # backend='petrel',
14
+ # path_mapping=dict({
15
+ # './data/': 's3://openmmlab/datasets/detection3d/',
16
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
17
+ # }))
18
+ backend_args = None
19
+
20
+ metainfo = dict(classes=('table', 'chair', 'sofa', 'bookcase', 'board'))
21
+ train_area = [1, 2, 3, 4, 6]
22
+ test_area = 5
23
+
24
+ train_pipeline = [
25
+ dict(
26
+ type='LoadPointsFromFile',
27
+ coord_type='DEPTH',
28
+ shift_height=False,
29
+ use_color=True,
30
+ load_dim=6,
31
+ use_dim=[0, 1, 2, 3, 4, 5],
32
+ backend_args=backend_args),
33
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
34
+ dict(type='PointSample', num_points=100000),
35
+ dict(
36
+ type='RandomFlip3D',
37
+ sync_2d=False,
38
+ flip_ratio_bev_horizontal=0.5,
39
+ flip_ratio_bev_vertical=0.5),
40
+ dict(
41
+ type='GlobalRotScaleTrans',
42
+ rot_range=[-0.087266, 0.087266],
43
+ scale_ratio_range=[0.9, 1.1],
44
+ translation_std=[.1, .1, .1],
45
+ shift_height=False),
46
+ dict(type='NormalizePointsColor', color_mean=None),
47
+ dict(
48
+ type='Pack3DDetInputs',
49
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
50
+ ]
51
+ test_pipeline = [
52
+ dict(
53
+ type='LoadPointsFromFile',
54
+ coord_type='DEPTH',
55
+ shift_height=False,
56
+ use_color=True,
57
+ load_dim=6,
58
+ use_dim=[0, 1, 2, 3, 4, 5],
59
+ backend_args=backend_args),
60
+ dict(
61
+ type='MultiScaleFlipAug3D',
62
+ img_scale=(1333, 800),
63
+ pts_scale_ratio=1,
64
+ flip=False,
65
+ transforms=[
66
+ dict(
67
+ type='GlobalRotScaleTrans',
68
+ rot_range=[0, 0],
69
+ scale_ratio_range=[1., 1.],
70
+ translation_std=[0, 0, 0]),
71
+ dict(
72
+ type='RandomFlip3D',
73
+ sync_2d=False,
74
+ flip_ratio_bev_horizontal=0.5,
75
+ flip_ratio_bev_vertical=0.5),
76
+ dict(type='PointSample', num_points=100000),
77
+ dict(type='NormalizePointsColor', color_mean=None),
78
+ ]),
79
+ dict(type='Pack3DDetInputs', keys=['points'])
80
+ ]
81
+
82
+ train_dataloader = dict(
83
+ batch_size=8,
84
+ num_workers=4,
85
+ sampler=dict(type='DefaultSampler', shuffle=True),
86
+ dataset=dict(
87
+ type='RepeatDataset',
88
+ times=13,
89
+ dataset=dict(
90
+ type='ConcatDataset',
91
+ datasets=[
92
+ dict(
93
+ type=dataset_type,
94
+ data_root=data_root,
95
+ ann_file=f's3dis_infos_Area_{i}.pkl',
96
+ pipeline=train_pipeline,
97
+ filter_empty_gt=True,
98
+ metainfo=metainfo,
99
+ box_type_3d='Depth',
100
+ backend_args=backend_args) for i in train_area
101
+ ])))
102
+
103
+ val_dataloader = dict(
104
+ batch_size=1,
105
+ num_workers=1,
106
+ sampler=dict(type='DefaultSampler', shuffle=False),
107
+ dataset=dict(
108
+ type=dataset_type,
109
+ data_root=data_root,
110
+ ann_file=f's3dis_infos_Area_{test_area}.pkl',
111
+ pipeline=test_pipeline,
112
+ metainfo=metainfo,
113
+ test_mode=True,
114
+ box_type_3d='Depth',
115
+ backend_args=backend_args))
116
+ test_dataloader = dict(
117
+ batch_size=1,
118
+ num_workers=1,
119
+ sampler=dict(type='DefaultSampler', shuffle=False),
120
+ dataset=dict(
121
+ type=dataset_type,
122
+ data_root=data_root,
123
+ ann_file=f's3dis_infos_Area_{test_area}.pkl',
124
+ pipeline=test_pipeline,
125
+ metainfo=metainfo,
126
+ test_mode=True,
127
+ box_type_3d='Depth',
128
+ backend_args=backend_args))
129
+ val_evaluator = dict(type='IndoorMetric')
130
+ test_evaluator = val_evaluator
131
+
132
+ vis_backends = [dict(type='LocalVisBackend')]
133
+ visualizer = dict(
134
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/s3dis-seg.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # For S3DIS seg we usually do 13-class segmentation
2
+ class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',
3
+ 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')
4
+ metainfo = dict(classes=class_names)
5
+ dataset_type = 'S3DISSegDataset'
6
+ data_root = 'data/s3dis/'
7
+ input_modality = dict(use_lidar=True, use_camera=False)
8
+ data_prefix = dict(
9
+ pts='points',
10
+ pts_instance_mask='instance_mask',
11
+ pts_semantic_mask='semantic_mask')
12
+
13
+ # Example to use different file client
14
+ # Method 1: simply set the data root and let the file I/O module
15
+ # automatically infer from prefix (not support LMDB and Memcache yet)
16
+
17
+ # data_root = 's3://openmmlab/datasets/detection3d/s3dis/'
18
+
19
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
20
+ # backend_args = dict(
21
+ # backend='petrel',
22
+ # path_mapping=dict({
23
+ # './data/': 's3://openmmlab/datasets/detection3d/',
24
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
25
+ # }))
26
+ backend_args = None
27
+
28
+ num_points = 4096
29
+ train_area = [1, 2, 3, 4, 6]
30
+ test_area = 5
31
+ train_pipeline = [
32
+ dict(
33
+ type='LoadPointsFromFile',
34
+ coord_type='DEPTH',
35
+ shift_height=False,
36
+ use_color=True,
37
+ load_dim=6,
38
+ use_dim=[0, 1, 2, 3, 4, 5],
39
+ backend_args=backend_args),
40
+ dict(
41
+ type='LoadAnnotations3D',
42
+ with_bbox_3d=False,
43
+ with_label_3d=False,
44
+ with_mask_3d=False,
45
+ with_seg_3d=True,
46
+ backend_args=backend_args),
47
+ dict(type='PointSegClassMapping'),
48
+ dict(
49
+ type='IndoorPatchPointSample',
50
+ num_points=num_points,
51
+ block_size=1.0,
52
+ ignore_index=len(class_names),
53
+ use_normalized_coord=True,
54
+ enlarge_size=0.2,
55
+ min_unique_num=None),
56
+ dict(type='NormalizePointsColor', color_mean=None),
57
+ dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask'])
58
+ ]
59
+ test_pipeline = [
60
+ dict(
61
+ type='LoadPointsFromFile',
62
+ coord_type='DEPTH',
63
+ shift_height=False,
64
+ use_color=True,
65
+ load_dim=6,
66
+ use_dim=[0, 1, 2, 3, 4, 5],
67
+ backend_args=backend_args),
68
+ dict(
69
+ type='LoadAnnotations3D',
70
+ with_bbox_3d=False,
71
+ with_label_3d=False,
72
+ with_mask_3d=False,
73
+ with_seg_3d=True,
74
+ backend_args=backend_args),
75
+ dict(type='NormalizePointsColor', color_mean=None),
76
+ dict(type='Pack3DDetInputs', keys=['points'])
77
+ ]
78
+ # construct a pipeline for data and gt loading in show function
79
+ # please keep its loading function consistent with test_pipeline (e.g. client)
80
+ # we need to load gt seg_mask!
81
+ eval_pipeline = [
82
+ dict(
83
+ type='LoadPointsFromFile',
84
+ coord_type='DEPTH',
85
+ shift_height=False,
86
+ use_color=True,
87
+ load_dim=6,
88
+ use_dim=[0, 1, 2, 3, 4, 5],
89
+ backend_args=backend_args),
90
+ dict(type='NormalizePointsColor', color_mean=None),
91
+ dict(type='Pack3DDetInputs', keys=['points'])
92
+ ]
93
+ tta_pipeline = [
94
+ dict(
95
+ type='LoadPointsFromFile',
96
+ coord_type='DEPTH',
97
+ shift_height=False,
98
+ use_color=True,
99
+ load_dim=6,
100
+ use_dim=[0, 1, 2, 3, 4, 5],
101
+ backend_args=backend_args),
102
+ dict(
103
+ type='LoadAnnotations3D',
104
+ with_bbox_3d=False,
105
+ with_label_3d=False,
106
+ with_mask_3d=False,
107
+ with_seg_3d=True,
108
+ backend_args=backend_args),
109
+ dict(type='NormalizePointsColor', color_mean=None),
110
+ dict(
111
+ type='TestTimeAug',
112
+ transforms=[[
113
+ dict(
114
+ type='RandomFlip3D',
115
+ sync_2d=False,
116
+ flip_ratio_bev_horizontal=0.,
117
+ flip_ratio_bev_vertical=0.)
118
+ ], [dict(type='Pack3DDetInputs', keys=['points'])]])
119
+ ]
120
+
121
+ # train on area 1, 2, 3, 4, 6
122
+ # test on area 5
123
+ train_dataloader = dict(
124
+ batch_size=8,
125
+ num_workers=4,
126
+ persistent_workers=True,
127
+ sampler=dict(type='DefaultSampler', shuffle=True),
128
+ dataset=dict(
129
+ type=dataset_type,
130
+ data_root=data_root,
131
+ ann_files=[f's3dis_infos_Area_{i}.pkl' for i in train_area],
132
+ metainfo=metainfo,
133
+ data_prefix=data_prefix,
134
+ pipeline=train_pipeline,
135
+ modality=input_modality,
136
+ ignore_index=len(class_names),
137
+ scene_idxs=[
138
+ f'seg_info/Area_{i}_resampled_scene_idxs.npy' for i in train_area
139
+ ],
140
+ test_mode=False,
141
+ backend_args=backend_args))
142
+ test_dataloader = dict(
143
+ batch_size=1,
144
+ num_workers=1,
145
+ persistent_workers=True,
146
+ drop_last=False,
147
+ sampler=dict(type='DefaultSampler', shuffle=False),
148
+ dataset=dict(
149
+ type=dataset_type,
150
+ data_root=data_root,
151
+ ann_files=f's3dis_infos_Area_{test_area}.pkl',
152
+ metainfo=metainfo,
153
+ data_prefix=data_prefix,
154
+ pipeline=test_pipeline,
155
+ modality=input_modality,
156
+ ignore_index=len(class_names),
157
+ scene_idxs=f'seg_info/Area_{test_area}_resampled_scene_idxs.npy',
158
+ test_mode=True,
159
+ backend_args=backend_args))
160
+ val_dataloader = test_dataloader
161
+
162
+ val_evaluator = dict(type='SegMetric')
163
+ test_evaluator = val_evaluator
164
+
165
+ vis_backends = [dict(type='LocalVisBackend')]
166
+ visualizer = dict(
167
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
168
+
169
+ tta_model = dict(type='Seg3DTTAModel')
configs/_base_/datasets/scannet-3d.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ dataset_type = 'ScanNetDataset'
3
+ data_root = 'data/scannet/'
4
+
5
+ metainfo = dict(
6
+ classes=('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
7
+ 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
8
+ 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
9
+ 'garbagebin'))
10
+
11
+ # Example to use different file client
12
+ # Method 1: simply set the data root and let the file I/O module
13
+ # automatically infer from prefix (not support LMDB and Memcache yet)
14
+
15
+ # data_root = 's3://openmmlab/datasets/detection3d/scannet/'
16
+
17
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
18
+ # backend_args = dict(
19
+ # backend='petrel',
20
+ # path_mapping=dict({
21
+ # './data/': 's3://openmmlab/datasets/detection3d/',
22
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
23
+ # }))
24
+ backend_args = None
25
+
26
+ train_pipeline = [
27
+ dict(
28
+ type='LoadPointsFromFile',
29
+ coord_type='DEPTH',
30
+ shift_height=True,
31
+ load_dim=6,
32
+ use_dim=[0, 1, 2],
33
+ backend_args=backend_args),
34
+ dict(
35
+ type='LoadAnnotations3D',
36
+ with_bbox_3d=True,
37
+ with_label_3d=True,
38
+ with_mask_3d=True,
39
+ with_seg_3d=True,
40
+ backend_args=backend_args),
41
+ dict(type='GlobalAlignment', rotation_axis=2),
42
+ dict(type='PointSegClassMapping'),
43
+ dict(type='PointSample', num_points=40000),
44
+ dict(
45
+ type='RandomFlip3D',
46
+ sync_2d=False,
47
+ flip_ratio_bev_horizontal=0.5,
48
+ flip_ratio_bev_vertical=0.5),
49
+ dict(
50
+ type='GlobalRotScaleTrans',
51
+ rot_range=[-0.087266, 0.087266],
52
+ scale_ratio_range=[1.0, 1.0],
53
+ shift_height=True),
54
+ dict(
55
+ type='Pack3DDetInputs',
56
+ keys=[
57
+ 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
58
+ 'pts_instance_mask'
59
+ ])
60
+ ]
61
+ test_pipeline = [
62
+ dict(
63
+ type='LoadPointsFromFile',
64
+ coord_type='DEPTH',
65
+ shift_height=True,
66
+ load_dim=6,
67
+ use_dim=[0, 1, 2],
68
+ backend_args=backend_args),
69
+ dict(type='GlobalAlignment', rotation_axis=2),
70
+ dict(
71
+ type='MultiScaleFlipAug3D',
72
+ img_scale=(1333, 800),
73
+ pts_scale_ratio=1,
74
+ flip=False,
75
+ transforms=[
76
+ dict(
77
+ type='GlobalRotScaleTrans',
78
+ rot_range=[0, 0],
79
+ scale_ratio_range=[1., 1.],
80
+ translation_std=[0, 0, 0]),
81
+ dict(
82
+ type='RandomFlip3D',
83
+ sync_2d=False,
84
+ flip_ratio_bev_horizontal=0.5,
85
+ flip_ratio_bev_vertical=0.5),
86
+ dict(type='PointSample', num_points=40000),
87
+ ]),
88
+ dict(type='Pack3DDetInputs', keys=['points'])
89
+ ]
90
+
91
+ train_dataloader = dict(
92
+ batch_size=8,
93
+ num_workers=4,
94
+ sampler=dict(type='DefaultSampler', shuffle=True),
95
+ dataset=dict(
96
+ type='RepeatDataset',
97
+ times=5,
98
+ dataset=dict(
99
+ type=dataset_type,
100
+ data_root=data_root,
101
+ ann_file='scannet_infos_train.pkl',
102
+ pipeline=train_pipeline,
103
+ filter_empty_gt=False,
104
+ metainfo=metainfo,
105
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
106
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
107
+ box_type_3d='Depth',
108
+ backend_args=backend_args)))
109
+
110
+ val_dataloader = dict(
111
+ batch_size=1,
112
+ num_workers=1,
113
+ sampler=dict(type='DefaultSampler', shuffle=False),
114
+ dataset=dict(
115
+ type=dataset_type,
116
+ data_root=data_root,
117
+ ann_file='scannet_infos_val.pkl',
118
+ pipeline=test_pipeline,
119
+ metainfo=metainfo,
120
+ test_mode=True,
121
+ box_type_3d='Depth',
122
+ backend_args=backend_args))
123
+ test_dataloader = dict(
124
+ batch_size=1,
125
+ num_workers=1,
126
+ sampler=dict(type='DefaultSampler', shuffle=False),
127
+ dataset=dict(
128
+ type=dataset_type,
129
+ data_root=data_root,
130
+ ann_file='scannet_infos_val.pkl',
131
+ pipeline=test_pipeline,
132
+ metainfo=metainfo,
133
+ test_mode=True,
134
+ box_type_3d='Depth',
135
+ backend_args=backend_args))
136
+ val_evaluator = dict(type='IndoorMetric')
137
+ test_evaluator = val_evaluator
138
+
139
+ vis_backends = [dict(type='LocalVisBackend')]
140
+ visualizer = dict(
141
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/scannet-seg.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # For ScanNet seg we usually do 20-class segmentation
2
+ class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table',
3
+ 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk',
4
+ 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink',
5
+ 'bathtub', 'otherfurniture')
6
+ metainfo = dict(classes=class_names)
7
+ dataset_type = 'ScanNetSegDataset'
8
+ data_root = 'data/scannet/'
9
+ input_modality = dict(use_lidar=True, use_camera=False)
10
+ data_prefix = dict(
11
+ pts='points',
12
+ pts_instance_mask='instance_mask',
13
+ pts_semantic_mask='semantic_mask')
14
+
15
+ # Example to use different file client
16
+ # Method 1: simply set the data root and let the file I/O module
17
+ # automatically infer from prefix (not support LMDB and Memcache yet)
18
+
19
+ # data_root = 's3://openmmlab/datasets/detection3d/scannet/'
20
+
21
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
22
+ # backend_args = dict(
23
+ # backend='petrel',
24
+ # path_mapping=dict({
25
+ # './data/': 's3://openmmlab/datasets/detection3d/',
26
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
27
+ # }))
28
+ backend_args = None
29
+
30
+ num_points = 8192
31
+ train_pipeline = [
32
+ dict(
33
+ type='LoadPointsFromFile',
34
+ coord_type='DEPTH',
35
+ shift_height=False,
36
+ use_color=True,
37
+ load_dim=6,
38
+ use_dim=[0, 1, 2, 3, 4, 5],
39
+ backend_args=backend_args),
40
+ dict(
41
+ type='LoadAnnotations3D',
42
+ with_bbox_3d=False,
43
+ with_label_3d=False,
44
+ with_mask_3d=False,
45
+ with_seg_3d=True,
46
+ backend_args=backend_args),
47
+ dict(type='PointSegClassMapping'),
48
+ dict(
49
+ type='IndoorPatchPointSample',
50
+ num_points=num_points,
51
+ block_size=1.5,
52
+ ignore_index=len(class_names),
53
+ use_normalized_coord=False,
54
+ enlarge_size=0.2,
55
+ min_unique_num=None),
56
+ dict(type='NormalizePointsColor', color_mean=None),
57
+ dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask'])
58
+ ]
59
+ test_pipeline = [
60
+ dict(
61
+ type='LoadPointsFromFile',
62
+ coord_type='DEPTH',
63
+ shift_height=False,
64
+ use_color=True,
65
+ load_dim=6,
66
+ use_dim=[0, 1, 2, 3, 4, 5],
67
+ backend_args=backend_args),
68
+ dict(
69
+ type='LoadAnnotations3D',
70
+ with_bbox_3d=False,
71
+ with_label_3d=False,
72
+ with_mask_3d=False,
73
+ with_seg_3d=True,
74
+ backend_args=backend_args),
75
+ dict(type='NormalizePointsColor', color_mean=None),
76
+ dict(type='Pack3DDetInputs', keys=['points'])
77
+ ]
78
+ # construct a pipeline for data and gt loading in show function
79
+ # please keep its loading function consistent with test_pipeline (e.g. client)
80
+ # we need to load gt seg_mask!
81
+ eval_pipeline = [
82
+ dict(
83
+ type='LoadPointsFromFile',
84
+ coord_type='DEPTH',
85
+ shift_height=False,
86
+ use_color=True,
87
+ load_dim=6,
88
+ use_dim=[0, 1, 2, 3, 4, 5],
89
+ backend_args=backend_args),
90
+ dict(type='NormalizePointsColor', color_mean=None),
91
+ dict(type='Pack3DDetInputs', keys=['points'])
92
+ ]
93
+ tta_pipeline = [
94
+ dict(
95
+ type='LoadPointsFromFile',
96
+ coord_type='DEPTH',
97
+ shift_height=False,
98
+ use_color=True,
99
+ load_dim=6,
100
+ use_dim=[0, 1, 2, 3, 4, 5],
101
+ backend_args=backend_args),
102
+ dict(
103
+ type='LoadAnnotations3D',
104
+ with_bbox_3d=False,
105
+ with_label_3d=False,
106
+ with_mask_3d=False,
107
+ with_seg_3d=True,
108
+ backend_args=backend_args),
109
+ dict(type='NormalizePointsColor', color_mean=None),
110
+ dict(
111
+ type='TestTimeAug',
112
+ transforms=[[
113
+ dict(
114
+ type='RandomFlip3D',
115
+ sync_2d=False,
116
+ flip_ratio_bev_horizontal=0.,
117
+ flip_ratio_bev_vertical=0.)
118
+ ], [dict(type='Pack3DDetInputs', keys=['points'])]])
119
+ ]
120
+
121
+ train_dataloader = dict(
122
+ batch_size=8,
123
+ num_workers=4,
124
+ persistent_workers=True,
125
+ sampler=dict(type='DefaultSampler', shuffle=True),
126
+ dataset=dict(
127
+ type=dataset_type,
128
+ data_root=data_root,
129
+ ann_file='scannet_infos_train.pkl',
130
+ metainfo=metainfo,
131
+ data_prefix=data_prefix,
132
+ pipeline=train_pipeline,
133
+ modality=input_modality,
134
+ ignore_index=len(class_names),
135
+ scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy',
136
+ test_mode=False,
137
+ backend_args=backend_args))
138
+ test_dataloader = dict(
139
+ batch_size=1,
140
+ num_workers=1,
141
+ persistent_workers=True,
142
+ drop_last=False,
143
+ sampler=dict(type='DefaultSampler', shuffle=False),
144
+ dataset=dict(
145
+ type=dataset_type,
146
+ data_root=data_root,
147
+ ann_file='scannet_infos_val.pkl',
148
+ metainfo=metainfo,
149
+ data_prefix=data_prefix,
150
+ pipeline=test_pipeline,
151
+ modality=input_modality,
152
+ ignore_index=len(class_names),
153
+ test_mode=True,
154
+ backend_args=backend_args))
155
+ val_dataloader = test_dataloader
156
+
157
+ val_evaluator = dict(type='SegMetric')
158
+ test_evaluator = val_evaluator
159
+
160
+ vis_backends = [dict(type='LocalVisBackend')]
161
+ visualizer = dict(
162
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
163
+
164
+ tta_model = dict(type='Seg3DTTAModel')
configs/_base_/datasets/semantickitti.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # For SemanticKitti we usually do 19-class segmentation.
2
+ # For labels_map we follow the uniform format of MMDetection & MMSegmentation
3
+ # i.e. we consider the unlabeled class as the last one, which is different
4
+ # from the original implementation of some methods e.g. Cylinder3D.
5
+ dataset_type = 'SemanticKittiDataset'
6
+ data_root = 'data/semantickitti/'
7
+ class_names = [
8
+ 'car', 'bicycle', 'motorcycle', 'truck', 'bus', 'person', 'bicyclist',
9
+ 'motorcyclist', 'road', 'parking', 'sidewalk', 'other-ground', 'building',
10
+ 'fence', 'vegetation', 'trunck', 'terrian', 'pole', 'traffic-sign'
11
+ ]
12
+ labels_map = {
13
+ 0: 19, # "unlabeled"
14
+ 1: 19, # "outlier" mapped to "unlabeled" --------------mapped
15
+ 10: 0, # "car"
16
+ 11: 1, # "bicycle"
17
+ 13: 4, # "bus" mapped to "other-vehicle" --------------mapped
18
+ 15: 2, # "motorcycle"
19
+ 16: 4, # "on-rails" mapped to "other-vehicle" ---------mapped
20
+ 18: 3, # "truck"
21
+ 20: 4, # "other-vehicle"
22
+ 30: 5, # "person"
23
+ 31: 6, # "bicyclist"
24
+ 32: 7, # "motorcyclist"
25
+ 40: 8, # "road"
26
+ 44: 9, # "parking"
27
+ 48: 10, # "sidewalk"
28
+ 49: 11, # "other-ground"
29
+ 50: 12, # "building"
30
+ 51: 13, # "fence"
31
+ 52: 19, # "other-structure" mapped to "unlabeled" ------mapped
32
+ 60: 8, # "lane-marking" to "road" ---------------------mapped
33
+ 70: 14, # "vegetation"
34
+ 71: 15, # "trunk"
35
+ 72: 16, # "terrain"
36
+ 80: 17, # "pole"
37
+ 81: 18, # "traffic-sign"
38
+ 99: 19, # "other-object" to "unlabeled" ----------------mapped
39
+ 252: 0, # "moving-car" to "car" ------------------------mapped
40
+ 253: 6, # "moving-bicyclist" to "bicyclist" ------------mapped
41
+ 254: 5, # "moving-person" to "person" ------------------mapped
42
+ 255: 7, # "moving-motorcyclist" to "motorcyclist" ------mapped
43
+ 256: 4, # "moving-on-rails" mapped to "other-vehic------mapped
44
+ 257: 4, # "moving-bus" mapped to "other-vehicle" -------mapped
45
+ 258: 3, # "moving-truck" to "truck" --------------------mapped
46
+ 259: 4 # "moving-other"-vehicle to "other-vehicle"-----mapped
47
+ }
48
+
49
+ metainfo = dict(
50
+ classes=class_names, seg_label_mapping=labels_map, max_label=259)
51
+
52
+ input_modality = dict(use_lidar=True, use_camera=False)
53
+
54
+ # Example to use different file client
55
+ # Method 1: simply set the data root and let the file I/O module
56
+ # automatically infer from prefix (not support LMDB and Memcache yet)
57
+
58
+ # data_root = 's3://openmmlab/datasets/detection3d/semantickitti/'
59
+
60
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
61
+ # backend_args = dict(
62
+ # backend='petrel',
63
+ # path_mapping=dict({
64
+ # './data/': 's3://openmmlab/datasets/detection3d/',
65
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
66
+ # }))
67
+ backend_args = None
68
+
69
+ train_pipeline = [
70
+ dict(
71
+ type='LoadPointsFromFile',
72
+ coord_type='LIDAR',
73
+ load_dim=4,
74
+ use_dim=4,
75
+ backend_args=backend_args),
76
+ dict(
77
+ type='LoadAnnotations3D',
78
+ with_bbox_3d=False,
79
+ with_label_3d=False,
80
+ with_seg_3d=True,
81
+ seg_3d_dtype='np.int32',
82
+ seg_offset=2**16,
83
+ dataset_type='semantickitti',
84
+ backend_args=backend_args),
85
+ dict(type='PointSegClassMapping'),
86
+ dict(
87
+ type='RandomFlip3D',
88
+ sync_2d=False,
89
+ flip_ratio_bev_horizontal=0.5,
90
+ flip_ratio_bev_vertical=0.5),
91
+ dict(
92
+ type='GlobalRotScaleTrans',
93
+ rot_range=[-0.78539816, 0.78539816],
94
+ scale_ratio_range=[0.95, 1.05],
95
+ translation_std=[0.1, 0.1, 0.1],
96
+ ),
97
+ dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask'])
98
+ ]
99
+ test_pipeline = [
100
+ dict(
101
+ type='LoadPointsFromFile',
102
+ coord_type='LIDAR',
103
+ load_dim=4,
104
+ use_dim=4,
105
+ backend_args=backend_args),
106
+ dict(
107
+ type='LoadAnnotations3D',
108
+ with_bbox_3d=False,
109
+ with_label_3d=False,
110
+ with_seg_3d=True,
111
+ seg_3d_dtype='np.int32',
112
+ seg_offset=2**16,
113
+ dataset_type='semantickitti',
114
+ backend_args=backend_args),
115
+ dict(type='PointSegClassMapping'),
116
+ dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask'])
117
+ ]
118
+ # construct a pipeline for data and gt loading in show function
119
+ # please keep its loading function consistent with test_pipeline (e.g. client)
120
+ eval_pipeline = [
121
+ dict(
122
+ type='LoadPointsFromFile',
123
+ coord_type='LIDAR',
124
+ load_dim=4,
125
+ use_dim=4,
126
+ backend_args=backend_args),
127
+ dict(type='Pack3DDetInputs', keys=['points'])
128
+ ]
129
+ tta_pipeline = [
130
+ dict(
131
+ type='LoadPointsFromFile',
132
+ coord_type='LIDAR',
133
+ load_dim=4,
134
+ use_dim=4,
135
+ backend_args=backend_args),
136
+ dict(
137
+ type='LoadAnnotations3D',
138
+ with_bbox_3d=False,
139
+ with_label_3d=False,
140
+ with_seg_3d=True,
141
+ seg_3d_dtype='np.int32',
142
+ seg_offset=2**16,
143
+ dataset_type='semantickitti',
144
+ backend_args=backend_args),
145
+ dict(type='PointSegClassMapping'),
146
+ dict(
147
+ type='TestTimeAug',
148
+ transforms=[[
149
+ dict(
150
+ type='RandomFlip3D',
151
+ sync_2d=False,
152
+ flip_ratio_bev_horizontal=0.,
153
+ flip_ratio_bev_vertical=0.),
154
+ dict(
155
+ type='RandomFlip3D',
156
+ sync_2d=False,
157
+ flip_ratio_bev_horizontal=0.,
158
+ flip_ratio_bev_vertical=1.),
159
+ dict(
160
+ type='RandomFlip3D',
161
+ sync_2d=False,
162
+ flip_ratio_bev_horizontal=1.,
163
+ flip_ratio_bev_vertical=0.),
164
+ dict(
165
+ type='RandomFlip3D',
166
+ sync_2d=False,
167
+ flip_ratio_bev_horizontal=1.,
168
+ flip_ratio_bev_vertical=1.)
169
+ ],
170
+ [
171
+ dict(
172
+ type='GlobalRotScaleTrans',
173
+ rot_range=[pcd_rotate_range, pcd_rotate_range],
174
+ scale_ratio_range=[
175
+ pcd_scale_factor, pcd_scale_factor
176
+ ],
177
+ translation_std=[0, 0, 0])
178
+ for pcd_rotate_range in [-0.78539816, 0.0, 0.78539816]
179
+ for pcd_scale_factor in [0.95, 1.0, 1.05]
180
+ ], [dict(type='Pack3DDetInputs', keys=['points'])]])
181
+ ]
182
+
183
+ train_dataloader = dict(
184
+ batch_size=2,
185
+ num_workers=4,
186
+ persistent_workers=True,
187
+ sampler=dict(type='DefaultSampler', shuffle=True),
188
+ dataset=dict(
189
+ type=dataset_type,
190
+ data_root=data_root,
191
+ ann_file='semantickitti_infos_train.pkl',
192
+ pipeline=train_pipeline,
193
+ metainfo=metainfo,
194
+ modality=input_modality,
195
+ ignore_index=19,
196
+ backend_args=backend_args))
197
+
198
+ test_dataloader = dict(
199
+ batch_size=1,
200
+ num_workers=1,
201
+ persistent_workers=True,
202
+ drop_last=False,
203
+ sampler=dict(type='DefaultSampler', shuffle=False),
204
+ dataset=dict(
205
+ type=dataset_type,
206
+ data_root=data_root,
207
+ ann_file='semantickitti_infos_val.pkl',
208
+ pipeline=test_pipeline,
209
+ metainfo=metainfo,
210
+ modality=input_modality,
211
+ ignore_index=19,
212
+ test_mode=True,
213
+ backend_args=backend_args))
214
+
215
+ val_dataloader = test_dataloader
216
+
217
+ val_evaluator = dict(type='SegMetric')
218
+ test_evaluator = val_evaluator
219
+
220
+ vis_backends = [dict(type='LocalVisBackend')]
221
+ visualizer = dict(
222
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
223
+
224
+ tta_model = dict(type='Seg3DTTAModel')
configs/_base_/datasets/sunrgbd-3d.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'SUNRGBDDataset'
2
+ data_root = 'data/sunrgbd/'
3
+ class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
4
+ 'night_stand', 'bookshelf', 'bathtub')
5
+
6
+ metainfo = dict(classes=class_names)
7
+
8
+ # Example to use different file client
9
+ # Method 1: simply set the data root and let the file I/O module
10
+ # automatically infer from prefix (not support LMDB and Memcache yet)
11
+
12
+ # data_root = 's3://openmmlab/datasets/detection3d/sunrgbd/'
13
+
14
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
15
+ # backend_args = dict(
16
+ # backend='petrel',
17
+ # path_mapping=dict({
18
+ # './data/': 's3://openmmlab/datasets/detection3d/',
19
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
20
+ # }))
21
+ backend_args = None
22
+
23
+ train_pipeline = [
24
+ dict(
25
+ type='LoadPointsFromFile',
26
+ coord_type='DEPTH',
27
+ shift_height=True,
28
+ load_dim=6,
29
+ use_dim=[0, 1, 2],
30
+ backend_args=backend_args),
31
+ dict(type='LoadAnnotations3D'),
32
+ dict(
33
+ type='RandomFlip3D',
34
+ sync_2d=False,
35
+ flip_ratio_bev_horizontal=0.5,
36
+ ),
37
+ dict(
38
+ type='GlobalRotScaleTrans',
39
+ rot_range=[-0.523599, 0.523599],
40
+ scale_ratio_range=[0.85, 1.15],
41
+ shift_height=True),
42
+ dict(type='PointSample', num_points=20000),
43
+ dict(
44
+ type='Pack3DDetInputs',
45
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
46
+ ]
47
+ test_pipeline = [
48
+ dict(
49
+ type='LoadPointsFromFile',
50
+ coord_type='DEPTH',
51
+ shift_height=True,
52
+ load_dim=6,
53
+ use_dim=[0, 1, 2],
54
+ backend_args=backend_args),
55
+ dict(
56
+ type='MultiScaleFlipAug3D',
57
+ img_scale=(1333, 800),
58
+ pts_scale_ratio=1,
59
+ flip=False,
60
+ transforms=[
61
+ dict(
62
+ type='GlobalRotScaleTrans',
63
+ rot_range=[0, 0],
64
+ scale_ratio_range=[1., 1.],
65
+ translation_std=[0, 0, 0]),
66
+ dict(
67
+ type='RandomFlip3D',
68
+ sync_2d=False,
69
+ flip_ratio_bev_horizontal=0.5,
70
+ ),
71
+ dict(type='PointSample', num_points=20000)
72
+ ]),
73
+ dict(type='Pack3DDetInputs', keys=['points'])
74
+ ]
75
+
76
+ train_dataloader = dict(
77
+ batch_size=16,
78
+ num_workers=4,
79
+ sampler=dict(type='DefaultSampler', shuffle=True),
80
+ dataset=dict(
81
+ type='RepeatDataset',
82
+ times=5,
83
+ dataset=dict(
84
+ type=dataset_type,
85
+ data_root=data_root,
86
+ ann_file='sunrgbd_infos_train.pkl',
87
+ pipeline=train_pipeline,
88
+ filter_empty_gt=False,
89
+ metainfo=metainfo,
90
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
91
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
92
+ box_type_3d='Depth',
93
+ backend_args=backend_args)))
94
+
95
+ val_dataloader = dict(
96
+ batch_size=1,
97
+ num_workers=1,
98
+ sampler=dict(type='DefaultSampler', shuffle=False),
99
+ dataset=dict(
100
+ type=dataset_type,
101
+ data_root=data_root,
102
+ ann_file='sunrgbd_infos_val.pkl',
103
+ pipeline=test_pipeline,
104
+ metainfo=metainfo,
105
+ test_mode=True,
106
+ box_type_3d='Depth',
107
+ backend_args=backend_args))
108
+ test_dataloader = dict(
109
+ batch_size=1,
110
+ num_workers=1,
111
+ sampler=dict(type='DefaultSampler', shuffle=False),
112
+ dataset=dict(
113
+ type=dataset_type,
114
+ data_root=data_root,
115
+ ann_file='sunrgbd_infos_val.pkl',
116
+ pipeline=test_pipeline,
117
+ metainfo=metainfo,
118
+ test_mode=True,
119
+ box_type_3d='Depth',
120
+ backend_args=backend_args))
121
+ val_evaluator = dict(type='IndoorMetric')
122
+ test_evaluator = val_evaluator
123
+
124
+ vis_backends = [dict(type='LocalVisBackend')]
125
+ visualizer = dict(
126
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/waymoD3-fov-mono3d-3class.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ # D3 in the config name means the whole dataset is divided into 3 folds
3
+ # We only use one fold for efficient experiments
4
+ dataset_type = 'WaymoDataset'
5
+ data_root = 'data/waymo/kitti_format/'
6
+ class_names = ['Pedestrian', 'Cyclist', 'Car']
7
+ metainfo = dict(classes=class_names)
8
+ input_modality = dict(use_lidar=False, use_camera=True)
9
+
10
+ # Example to use different file client
11
+ # Method 1: simply set the data root and let the file I/O module
12
+ # automatically infer from prefix (not support LMDB and Memcache yet)
13
+
14
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
15
+
16
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
17
+ # backend_args = dict(
18
+ # backend='petrel',
19
+ # path_mapping=dict({
20
+ # './data/': 's3://openmmlab/datasets/detection3d/',
21
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
22
+ # }))
23
+ backend_args = None
24
+
25
+ train_pipeline = [
26
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
27
+ dict(
28
+ type='LoadAnnotations3D',
29
+ with_bbox=True,
30
+ with_label=True,
31
+ with_attr_label=False,
32
+ with_bbox_3d=True,
33
+ with_label_3d=True,
34
+ with_bbox_depth=True),
35
+ # base shape (1248, 832), scale (0.95, 1.05)
36
+ dict(
37
+ type='RandomResize3D',
38
+ scale=(1248, 832),
39
+ ratio_range=(0.95, 1.05),
40
+ # ratio_range=(1., 1.),
41
+ interpolation='nearest',
42
+ keep_ratio=True,
43
+ ),
44
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
45
+ dict(
46
+ type='Pack3DDetInputs',
47
+ keys=[
48
+ 'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
49
+ 'gt_labels_3d', 'centers_2d', 'depths'
50
+ ]),
51
+ ]
52
+
53
+ test_pipeline = [
54
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
55
+ dict(
56
+ type='RandomResize3D',
57
+ scale=(1248, 832),
58
+ ratio_range=(1., 1.),
59
+ interpolation='nearest',
60
+ keep_ratio=True),
61
+ dict(
62
+ type='Pack3DDetInputs',
63
+ keys=['img'],
64
+ meta_keys=[
65
+ 'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
66
+ 'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
67
+ ]),
68
+ ]
69
+ # construct a pipeline for data and gt loading in show function
70
+ # please keep its loading function consistent with test_pipeline (e.g. client)
71
+ eval_pipeline = [
72
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
73
+ dict(
74
+ type='RandomResize3D',
75
+ scale=(1248, 832),
76
+ ratio_range=(1., 1.),
77
+ interpolation='nearest',
78
+ keep_ratio=True),
79
+ dict(
80
+ type='Pack3DDetInputs',
81
+ keys=['img'],
82
+ meta_keys=[
83
+ 'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
84
+ 'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
85
+ ]),
86
+ ]
87
+
88
+ train_dataloader = dict(
89
+ batch_size=3,
90
+ num_workers=3,
91
+ persistent_workers=True,
92
+ sampler=dict(type='DefaultSampler', shuffle=True),
93
+ dataset=dict(
94
+ type=dataset_type,
95
+ data_root=data_root,
96
+ ann_file='waymo_infos_train.pkl',
97
+ data_prefix=dict(
98
+ pts='training/velodyne',
99
+ CAM_FRONT='training/image_0',
100
+ CAM_FRONT_LEFT='training/image_1',
101
+ CAM_FRONT_RIGHT='training/image_2',
102
+ CAM_SIDE_LEFT='training/image_3',
103
+ CAM_SIDE_RIGHT='training/image_4'),
104
+ pipeline=train_pipeline,
105
+ modality=input_modality,
106
+ test_mode=False,
107
+ metainfo=metainfo,
108
+ cam_sync_instances=True,
109
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
110
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
111
+ box_type_3d='Camera',
112
+ load_type='fov_image_based',
113
+ # load one frame every three frames
114
+ load_interval=3,
115
+ backend_args=backend_args))
116
+
117
+ val_dataloader = dict(
118
+ batch_size=1,
119
+ num_workers=1,
120
+ persistent_workers=True,
121
+ drop_last=False,
122
+ sampler=dict(type='DefaultSampler', shuffle=False),
123
+ dataset=dict(
124
+ type=dataset_type,
125
+ data_root=data_root,
126
+ data_prefix=dict(
127
+ pts='training/velodyne',
128
+ CAM_FRONT='training/image_0',
129
+ CAM_FRONT_LEFT='training/image_1',
130
+ CAM_FRONT_RIGHT='training/image_2',
131
+ CAM_SIDE_LEFT='training/image_3',
132
+ CAM_SIDE_RIGHT='training/image_4'),
133
+ ann_file='waymo_infos_val.pkl',
134
+ pipeline=eval_pipeline,
135
+ modality=input_modality,
136
+ test_mode=True,
137
+ metainfo=metainfo,
138
+ cam_sync_instances=True,
139
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
140
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
141
+ box_type_3d='Camera',
142
+ load_type='fov_image_based',
143
+ load_eval_anns=False,
144
+ backend_args=backend_args))
145
+
146
+ test_dataloader = dict(
147
+ batch_size=1,
148
+ num_workers=1,
149
+ persistent_workers=True,
150
+ drop_last=False,
151
+ sampler=dict(type='DefaultSampler', shuffle=False),
152
+ dataset=dict(
153
+ type=dataset_type,
154
+ data_root=data_root,
155
+ data_prefix=dict(
156
+ pts='training/velodyne',
157
+ CAM_FRONT='training/image_0',
158
+ CAM_FRONT_LEFT='training/image_1',
159
+ CAM_FRONT_RIGHT='training/image_2',
160
+ CAM_SIDE_LEFT='training/image_3',
161
+ CAM_SIDE_RIGHT='training/image_4'),
162
+ ann_file='waymo_infos_val.pkl',
163
+ pipeline=eval_pipeline,
164
+ modality=input_modality,
165
+ test_mode=True,
166
+ metainfo=metainfo,
167
+ cam_sync_instances=True,
168
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
169
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
170
+ box_type_3d='Camera',
171
+ load_type='fov_image_based',
172
+ backend_args=backend_args))
173
+
174
+ val_evaluator = dict(
175
+ type='WaymoMetric',
176
+ waymo_bin_file='./data/waymo/waymo_format/fov_gt.bin',
177
+ metric='LET_mAP',
178
+ load_type='fov_image_based',
179
+ result_prefix='./pgd_fov_pred')
180
+ test_evaluator = val_evaluator
181
+
182
+ vis_backends = [dict(type='LocalVisBackend')]
183
+ visualizer = dict(
184
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/waymoD3-mv-mono3d-3class.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ # D3 in the config name means the whole dataset is divided into 3 folds
3
+ # We only use one fold for efficient experiments
4
+ dataset_type = 'WaymoDataset'
5
+ data_root = 'data/waymo/kitti_format/'
6
+ class_names = ['Pedestrian', 'Cyclist', 'Car']
7
+ metainfo = dict(classes=class_names)
8
+ input_modality = dict(use_lidar=False, use_camera=True)
9
+
10
+ # Example to use different file client
11
+ # Method 1: simply set the data root and let the file I/O module
12
+ # automatically infer from prefix (not support LMDB and Memcache yet)
13
+
14
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
15
+
16
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
17
+ # backend_args = dict(
18
+ # backend='petrel',
19
+ # path_mapping=dict({
20
+ # './data/': 's3://openmmlab/datasets/detection3d/',
21
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
22
+ # }))
23
+ backend_args = None
24
+
25
+ train_pipeline = [
26
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
27
+ dict(
28
+ type='LoadAnnotations3D',
29
+ with_bbox=True,
30
+ with_label=True,
31
+ with_attr_label=False,
32
+ with_bbox_3d=True,
33
+ with_label_3d=True,
34
+ with_bbox_depth=True),
35
+ # base shape (1248, 832), scale (0.95, 1.05)
36
+ dict(
37
+ type='RandomResize3D',
38
+ scale=(1248, 832),
39
+ # ratio_range=(1., 1.),
40
+ ratio_range=(0.95, 1.05),
41
+ interpolation='nearest',
42
+ keep_ratio=True,
43
+ ),
44
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
45
+ dict(
46
+ type='Pack3DDetInputs',
47
+ keys=[
48
+ 'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
49
+ 'gt_labels_3d', 'centers_2d', 'depths'
50
+ ]),
51
+ ]
52
+
53
+ test_pipeline = [
54
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
55
+ dict(
56
+ type='Resize3D',
57
+ scale_factor=0.65,
58
+ interpolation='nearest',
59
+ keep_ratio=True),
60
+ dict(
61
+ type='Pack3DDetInputs',
62
+ keys=['img'],
63
+ meta_keys=[
64
+ 'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
65
+ 'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
66
+ ]),
67
+ ]
68
+ # construct a pipeline for data and gt loading in show function
69
+ # please keep its loading function consistent with test_pipeline (e.g. client)
70
+ eval_pipeline = [
71
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
72
+ dict(
73
+ type='Resize3D',
74
+ scale_factor=0.65,
75
+ interpolation='nearest',
76
+ keep_ratio=True),
77
+ dict(
78
+ type='Pack3DDetInputs',
79
+ keys=['img'],
80
+ meta_keys=[
81
+ 'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
82
+ 'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
83
+ ]),
84
+ ]
85
+
86
+ train_dataloader = dict(
87
+ batch_size=3,
88
+ num_workers=3,
89
+ persistent_workers=True,
90
+ sampler=dict(type='DefaultSampler', shuffle=True),
91
+ dataset=dict(
92
+ type=dataset_type,
93
+ data_root=data_root,
94
+ ann_file='waymo_infos_train.pkl',
95
+ data_prefix=dict(
96
+ pts='training/velodyne',
97
+ CAM_FRONT='training/image_0',
98
+ CAM_FRONT_LEFT='training/image_1',
99
+ CAM_FRONT_RIGHT='training/image_2',
100
+ CAM_SIDE_LEFT='training/image_3',
101
+ CAM_SIDE_RIGHT='training/image_4'),
102
+ pipeline=train_pipeline,
103
+ modality=input_modality,
104
+ test_mode=False,
105
+ metainfo=metainfo,
106
+ cam_sync_instances=True,
107
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
108
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
109
+ box_type_3d='Camera',
110
+ load_type='mv_image_based',
111
+ # load one frame every three frames
112
+ load_interval=3,
113
+ backend_args=backend_args))
114
+
115
+ val_dataloader = dict(
116
+ batch_size=1,
117
+ num_workers=0,
118
+ persistent_workers=False,
119
+ drop_last=False,
120
+ sampler=dict(type='DefaultSampler', shuffle=False),
121
+ dataset=dict(
122
+ type=dataset_type,
123
+ data_root=data_root,
124
+ data_prefix=dict(
125
+ pts='training/velodyne',
126
+ CAM_FRONT='training/image_0',
127
+ CAM_FRONT_LEFT='training/image_1',
128
+ CAM_FRONT_RIGHT='training/image_2',
129
+ CAM_SIDE_LEFT='training/image_3',
130
+ CAM_SIDE_RIGHT='training/image_4'),
131
+ ann_file='waymo_infos_val.pkl',
132
+ pipeline=eval_pipeline,
133
+ modality=input_modality,
134
+ test_mode=True,
135
+ metainfo=metainfo,
136
+ cam_sync_instances=True,
137
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
138
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
139
+ box_type_3d='Camera',
140
+ load_type='mv_image_based',
141
+ # load_eval_anns=False,
142
+ backend_args=backend_args))
143
+
144
+ test_dataloader = dict(
145
+ batch_size=1,
146
+ num_workers=0,
147
+ persistent_workers=False,
148
+ drop_last=False,
149
+ sampler=dict(type='DefaultSampler', shuffle=False),
150
+ dataset=dict(
151
+ type=dataset_type,
152
+ data_root=data_root,
153
+ data_prefix=dict(
154
+ pts='training/velodyne',
155
+ CAM_FRONT='training/image_0',
156
+ CAM_FRONT_LEFT='training/image_1',
157
+ CAM_FRONT_RIGHT='training/image_2',
158
+ CAM_SIDE_LEFT='training/image_3',
159
+ CAM_SIDE_RIGHT='training/image_4'),
160
+ ann_file='waymo_infos_val.pkl',
161
+ pipeline=eval_pipeline,
162
+ modality=input_modality,
163
+ test_mode=True,
164
+ metainfo=metainfo,
165
+ cam_sync_instances=True,
166
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
167
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
168
+ box_type_3d='Camera',
169
+ load_type='mv_image_based',
170
+ load_eval_anns=False,
171
+ backend_args=backend_args))
172
+
173
+ val_evaluator = dict(
174
+ type='WaymoMetric',
175
+ waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
176
+ metric='LET_mAP',
177
+ load_type='mv_image_based',
178
+ result_prefix='./pgd_mv_pred',
179
+ nms_cfg=dict(
180
+ use_rotate_nms=True,
181
+ nms_across_levels=False,
182
+ nms_pre=500,
183
+ nms_thr=0.05,
184
+ score_thr=0.001,
185
+ min_bbox_size=0,
186
+ max_per_frame=100))
187
+ test_evaluator = val_evaluator
188
+
189
+ vis_backends = [dict(type='LocalVisBackend')]
190
+ visualizer = dict(
191
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/waymoD5-3d-3class.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ # D5 in the config name means the whole dataset is divided into 5 folds
3
+ # We only use one fold for efficient experiments
4
+ dataset_type = 'WaymoDataset'
5
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
6
+ data_root = 'data/waymo/kitti_format/'
7
+
8
+ # Example to use different file client
9
+ # Method 1: simply set the data root and let the file I/O module
10
+ # automatically infer from prefix (not support LMDB and Memcache yet)
11
+
12
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
13
+
14
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
15
+ # backend_args = dict(
16
+ # backend='petrel',
17
+ # path_mapping=dict({
18
+ # './data/': 's3://openmmlab/datasets/detection3d/',
19
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
20
+ # }))
21
+ backend_args = None
22
+
23
+ class_names = ['Car', 'Pedestrian', 'Cyclist']
24
+ metainfo = dict(classes=class_names)
25
+
26
+ point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
27
+ input_modality = dict(use_lidar=True, use_camera=False)
28
+ db_sampler = dict(
29
+ data_root=data_root,
30
+ info_path=data_root + 'waymo_dbinfos_train.pkl',
31
+ rate=1.0,
32
+ prepare=dict(
33
+ filter_by_difficulty=[-1],
34
+ filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
35
+ classes=class_names,
36
+ sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),
37
+ points_loader=dict(
38
+ type='LoadPointsFromFile',
39
+ coord_type='LIDAR',
40
+ load_dim=6,
41
+ use_dim=[0, 1, 2, 3, 4],
42
+ backend_args=backend_args),
43
+ backend_args=backend_args)
44
+
45
+ train_pipeline = [
46
+ dict(
47
+ type='LoadPointsFromFile',
48
+ coord_type='LIDAR',
49
+ load_dim=6,
50
+ use_dim=5,
51
+ backend_args=backend_args),
52
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
53
+ # dict(type='ObjectSample', db_sampler=db_sampler),
54
+ dict(
55
+ type='RandomFlip3D',
56
+ sync_2d=False,
57
+ flip_ratio_bev_horizontal=0.5,
58
+ flip_ratio_bev_vertical=0.5),
59
+ dict(
60
+ type='GlobalRotScaleTrans',
61
+ rot_range=[-0.78539816, 0.78539816],
62
+ scale_ratio_range=[0.95, 1.05]),
63
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
64
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
65
+ dict(type='PointShuffle'),
66
+ dict(
67
+ type='Pack3DDetInputs',
68
+ keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
69
+ ]
70
+ test_pipeline = [
71
+ dict(
72
+ type='LoadPointsFromFile',
73
+ coord_type='LIDAR',
74
+ load_dim=6,
75
+ use_dim=5,
76
+ backend_args=backend_args),
77
+ dict(
78
+ type='MultiScaleFlipAug3D',
79
+ img_scale=(1333, 800),
80
+ pts_scale_ratio=1,
81
+ flip=False,
82
+ transforms=[
83
+ dict(
84
+ type='GlobalRotScaleTrans',
85
+ rot_range=[0, 0],
86
+ scale_ratio_range=[1., 1.],
87
+ translation_std=[0, 0, 0]),
88
+ dict(type='RandomFlip3D'),
89
+ dict(
90
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range)
91
+ ]),
92
+ dict(
93
+ type='Pack3DDetInputs',
94
+ keys=['points'],
95
+ meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp'])
96
+ ]
97
+ # construct a pipeline for data and gt loading in show function
98
+ # please keep its loading function consistent with test_pipeline (e.g. client)
99
+ eval_pipeline = [
100
+ dict(
101
+ type='LoadPointsFromFile',
102
+ coord_type='LIDAR',
103
+ load_dim=6,
104
+ use_dim=5,
105
+ backend_args=backend_args),
106
+ dict(
107
+ type='Pack3DDetInputs',
108
+ keys=['points'],
109
+ meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp'])
110
+ ]
111
+
112
+ train_dataloader = dict(
113
+ batch_size=2,
114
+ num_workers=2,
115
+ persistent_workers=True,
116
+ sampler=dict(type='DefaultSampler', shuffle=True),
117
+ dataset=dict(
118
+ type='RepeatDataset',
119
+ times=2,
120
+ dataset=dict(
121
+ type=dataset_type,
122
+ data_root=data_root,
123
+ ann_file='waymo_infos_train.pkl',
124
+ data_prefix=dict(
125
+ pts='training/velodyne', sweeps='training/velodyne'),
126
+ pipeline=train_pipeline,
127
+ modality=input_modality,
128
+ test_mode=False,
129
+ metainfo=metainfo,
130
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
131
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
132
+ box_type_3d='LiDAR',
133
+ # load one frame every five frames
134
+ load_interval=5,
135
+ backend_args=backend_args)))
136
+ val_dataloader = dict(
137
+ batch_size=1,
138
+ num_workers=1,
139
+ persistent_workers=True,
140
+ drop_last=False,
141
+ sampler=dict(type='DefaultSampler', shuffle=False),
142
+ dataset=dict(
143
+ type=dataset_type,
144
+ data_root=data_root,
145
+ data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
146
+ ann_file='waymo_infos_val.pkl',
147
+ pipeline=eval_pipeline,
148
+ modality=input_modality,
149
+ test_mode=True,
150
+ metainfo=metainfo,
151
+ box_type_3d='LiDAR',
152
+ backend_args=backend_args))
153
+
154
+ test_dataloader = dict(
155
+ batch_size=1,
156
+ num_workers=1,
157
+ persistent_workers=True,
158
+ drop_last=False,
159
+ sampler=dict(type='DefaultSampler', shuffle=False),
160
+ dataset=dict(
161
+ type=dataset_type,
162
+ data_root=data_root,
163
+ data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
164
+ ann_file='waymo_infos_val.pkl',
165
+ pipeline=eval_pipeline,
166
+ modality=input_modality,
167
+ test_mode=True,
168
+ metainfo=metainfo,
169
+ box_type_3d='LiDAR',
170
+ backend_args=backend_args))
171
+
172
+ val_evaluator = dict(
173
+ type='WaymoMetric', waymo_bin_file='./data/waymo/waymo_format/gt.bin')
174
+ test_evaluator = val_evaluator
175
+
176
+ vis_backends = [dict(type='LocalVisBackend')]
177
+ visualizer = dict(
178
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/waymoD5-3d-car.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ # D5 in the config name means the whole dataset is divided into 5 folds
3
+ # We only use one fold for efficient experiments
4
+ dataset_type = 'WaymoDataset'
5
+ data_root = 'data/waymo/kitti_format/'
6
+
7
+ # Example to use different file client
8
+ # Method 1: simply set the data root and let the file I/O module
9
+ # automatically infer from prefix (not support LMDB and Memcache yet)
10
+
11
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
12
+
13
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
14
+ # backend_args = dict(
15
+ # backend='petrel',
16
+ # path_mapping=dict({
17
+ # './data/': 's3://openmmlab/datasets/detection3d/',
18
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
19
+ # }))
20
+ backend_args = None
21
+
22
+ class_names = ['Car']
23
+ metainfo = dict(classes=class_names)
24
+
25
+ point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
26
+ input_modality = dict(use_lidar=True, use_camera=False)
27
+ db_sampler = dict(
28
+ data_root=data_root,
29
+ info_path=data_root + 'waymo_dbinfos_train.pkl',
30
+ rate=1.0,
31
+ prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
32
+ classes=class_names,
33
+ sample_groups=dict(Car=15),
34
+ points_loader=dict(
35
+ type='LoadPointsFromFile',
36
+ coord_type='LIDAR',
37
+ load_dim=6,
38
+ use_dim=[0, 1, 2, 3, 4],
39
+ backend_args=backend_args),
40
+ backend_args=backend_args)
41
+
42
+ train_pipeline = [
43
+ dict(
44
+ type='LoadPointsFromFile',
45
+ coord_type='LIDAR',
46
+ load_dim=6,
47
+ use_dim=5,
48
+ backend_args=backend_args),
49
+ dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
50
+ dict(type='ObjectSample', db_sampler=db_sampler),
51
+ dict(
52
+ type='RandomFlip3D',
53
+ sync_2d=False,
54
+ flip_ratio_bev_horizontal=0.5,
55
+ flip_ratio_bev_vertical=0.5),
56
+ dict(
57
+ type='GlobalRotScaleTrans',
58
+ rot_range=[-0.78539816, 0.78539816],
59
+ scale_ratio_range=[0.95, 1.05]),
60
+ dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
61
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
62
+ dict(type='PointShuffle'),
63
+ dict(
64
+ type='Pack3DDetInputs',
65
+ keys=['points'],
66
+ meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp'])
67
+ ]
68
+ test_pipeline = [
69
+ dict(
70
+ type='LoadPointsFromFile',
71
+ coord_type='LIDAR',
72
+ load_dim=6,
73
+ use_dim=5,
74
+ backend_args=backend_args),
75
+ dict(
76
+ type='MultiScaleFlipAug3D',
77
+ img_scale=(1333, 800),
78
+ pts_scale_ratio=1,
79
+ flip=False,
80
+ transforms=[
81
+ dict(
82
+ type='GlobalRotScaleTrans',
83
+ rot_range=[0, 0],
84
+ scale_ratio_range=[1., 1.],
85
+ translation_std=[0, 0, 0]),
86
+ dict(type='RandomFlip3D'),
87
+ dict(
88
+ type='PointsRangeFilter', point_cloud_range=point_cloud_range)
89
+ ]),
90
+ dict(
91
+ type='Pack3DDetInputs',
92
+ keys=['points'],
93
+ meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp'])
94
+ ]
95
+ # construct a pipeline for data and gt loading in show function
96
+ # please keep its loading function consistent with test_pipeline (e.g. client)
97
+ eval_pipeline = [
98
+ dict(
99
+ type='LoadPointsFromFile',
100
+ coord_type='LIDAR',
101
+ load_dim=6,
102
+ use_dim=5,
103
+ backend_args=backend_args),
104
+ dict(type='Pack3DDetInputs', keys=['points']),
105
+ ]
106
+
107
+ train_dataloader = dict(
108
+ batch_size=2,
109
+ num_workers=2,
110
+ persistent_workers=True,
111
+ sampler=dict(type='DefaultSampler', shuffle=True),
112
+ dataset=dict(
113
+ type='RepeatDataset',
114
+ times=2,
115
+ dataset=dict(
116
+ type=dataset_type,
117
+ data_root=data_root,
118
+ ann_file='waymo_infos_train.pkl',
119
+ data_prefix=dict(
120
+ pts='training/velodyne', sweeps='training/velodyne'),
121
+ pipeline=train_pipeline,
122
+ modality=input_modality,
123
+ test_mode=False,
124
+ metainfo=metainfo,
125
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
126
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
127
+ box_type_3d='LiDAR',
128
+ # load one frame every five frames
129
+ load_interval=5,
130
+ backend_args=backend_args)))
131
+ val_dataloader = dict(
132
+ batch_size=1,
133
+ num_workers=1,
134
+ persistent_workers=True,
135
+ drop_last=False,
136
+ sampler=dict(type='DefaultSampler', shuffle=False),
137
+ dataset=dict(
138
+ type=dataset_type,
139
+ data_root=data_root,
140
+ data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
141
+ ann_file='waymo_infos_val.pkl',
142
+ pipeline=eval_pipeline,
143
+ modality=input_modality,
144
+ test_mode=True,
145
+ metainfo=metainfo,
146
+ box_type_3d='LiDAR',
147
+ backend_args=backend_args))
148
+
149
+ test_dataloader = dict(
150
+ batch_size=1,
151
+ num_workers=1,
152
+ persistent_workers=True,
153
+ drop_last=False,
154
+ sampler=dict(type='DefaultSampler', shuffle=False),
155
+ dataset=dict(
156
+ type=dataset_type,
157
+ data_root=data_root,
158
+ data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
159
+ ann_file='waymo_infos_val.pkl',
160
+ pipeline=eval_pipeline,
161
+ modality=input_modality,
162
+ test_mode=True,
163
+ metainfo=metainfo,
164
+ box_type_3d='LiDAR',
165
+ backend_args=backend_args))
166
+
167
+ val_evaluator = dict(
168
+ type='WaymoMetric', waymo_bin_file='./data/waymo/waymo_format/gt.bin')
169
+ test_evaluator = val_evaluator
170
+
171
+ vis_backends = [dict(type='LocalVisBackend')]
172
+ visualizer = dict(
173
+ type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
configs/_base_/datasets/waymoD5-fov-mono3d-3class.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ # D3 in the config name means the whole dataset is divided into 3 folds
3
+ # We only use one fold for efficient experiments
4
+ dataset_type = 'WaymoDataset'
5
+ data_root = 'data/waymo/kitti_format/'
6
+ class_names = ['Car', 'Pedestrian', 'Cyclist']
7
+ input_modality = dict(use_lidar=False, use_camera=True)
8
+
9
+ # Example to use different file client
10
+ # Method 1: simply set the data root and let the file I/O module
11
+ # automatically infer from prefix (not support LMDB and Memcache yet)
12
+
13
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
14
+
15
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
16
+ # backend_args = dict(
17
+ # backend='petrel',
18
+ # path_mapping=dict({
19
+ # './data/': 's3://openmmlab/datasets/detection3d/',
20
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
21
+ # }))
22
+ backend_args = None
23
+
24
+ train_pipeline = [
25
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
26
+ dict(
27
+ type='LoadAnnotations3D',
28
+ with_bbox=True,
29
+ with_label=True,
30
+ with_attr_label=False,
31
+ with_bbox_3d=True,
32
+ with_label_3d=True,
33
+ with_bbox_depth=True),
34
+ # base shape (1248, 832), scale (0.95, 1.05)
35
+ dict(
36
+ type='RandomResize3D',
37
+ scale=(1284, 832),
38
+ ratio_range=(0.95, 1.05),
39
+ keep_ratio=True,
40
+ ),
41
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
42
+ dict(
43
+ type='Pack3DDetInputs',
44
+ keys=[
45
+ 'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
46
+ 'gt_labels_3d', 'centers_2d', 'depths'
47
+ ]),
48
+ ]
49
+
50
+ test_pipeline = [
51
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
52
+ dict(
53
+ type='RandomResize3D',
54
+ scale=(1248, 832),
55
+ ratio_range=(1., 1.),
56
+ keep_ratio=True),
57
+ dict(type='Pack3DDetInputs', keys=['img']),
58
+ ]
59
+ # construct a pipeline for data and gt loading in show function
60
+ # please keep its loading function consistent with test_pipeline (e.g. client)
61
+ eval_pipeline = [
62
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
63
+ dict(
64
+ type='RandomResize3D',
65
+ scale=(1248, 832),
66
+ ratio_range=(1., 1.),
67
+ keep_ratio=True),
68
+ dict(type='Pack3DDetInputs', keys=['img']),
69
+ ]
70
+
71
+ metainfo = dict(CLASSES=class_names)
72
+
73
+ train_dataloader = dict(
74
+ batch_size=3,
75
+ num_workers=3,
76
+ persistent_workers=True,
77
+ sampler=dict(type='DefaultSampler', shuffle=True),
78
+ dataset=dict(
79
+ type=dataset_type,
80
+ data_root=data_root,
81
+ ann_file='waymo_infos_train.pkl',
82
+ data_prefix=dict(
83
+ pts='training/velodyne',
84
+ CAM_FRONT='training/image_0',
85
+ CAM_FRONT_LEFT='training/image_1',
86
+ CAM_FRONT_RIGHT='training/image_2',
87
+ CAM_SIDE_LEFT='training/image_3',
88
+ CAM_SIDE_RIGHT='training/image_4'),
89
+ pipeline=train_pipeline,
90
+ modality=input_modality,
91
+ test_mode=False,
92
+ metainfo=metainfo,
93
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
94
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
95
+ box_type_3d='Camera',
96
+ load_type='fov_image_based',
97
+ # load one frame every three frames
98
+ load_interval=5,
99
+ backend_args=backend_args))
100
+
101
+ val_dataloader = dict(
102
+ batch_size=1,
103
+ num_workers=1,
104
+ persistent_workers=True,
105
+ drop_last=False,
106
+ sampler=dict(type='DefaultSampler', shuffle=False),
107
+ dataset=dict(
108
+ type=dataset_type,
109
+ data_root=data_root,
110
+ data_prefix=dict(
111
+ pts='training/velodyne',
112
+ CAM_FRONT='training/image_0',
113
+ CAM_FRONT_LEFT='training/image_1',
114
+ CAM_FRONT_RIGHT='training/image_2',
115
+ CAM_SIDE_LEFT='training/image_3',
116
+ CAM_SIDE_RIGHT='training/image_4'),
117
+ ann_file='waymo_infos_val.pkl',
118
+ pipeline=eval_pipeline,
119
+ modality=input_modality,
120
+ test_mode=True,
121
+ metainfo=metainfo,
122
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
123
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
124
+ box_type_3d='Camera',
125
+ load_type='fov_image_based',
126
+ backend_args=backend_args))
127
+
128
+ test_dataloader = dict(
129
+ batch_size=1,
130
+ num_workers=1,
131
+ persistent_workers=True,
132
+ drop_last=False,
133
+ sampler=dict(type='DefaultSampler', shuffle=False),
134
+ dataset=dict(
135
+ type=dataset_type,
136
+ data_root=data_root,
137
+ data_prefix=dict(
138
+ pts='training/velodyne',
139
+ CAM_FRONT='training/image_0',
140
+ CAM_FRONT_LEFT='training/image_1',
141
+ CAM_FRONT_RIGHT='training/image_2',
142
+ CAM_SIDE_LEFT='training/image_3',
143
+ CAM_SIDE_RIGHT='training/image_4'),
144
+ ann_file='waymo_infos_val.pkl',
145
+ pipeline=eval_pipeline,
146
+ modality=input_modality,
147
+ test_mode=True,
148
+ metainfo=metainfo,
149
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
150
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
151
+ box_type_3d='Camera',
152
+ load_type='fov_image_based',
153
+ backend_args=backend_args))
154
+
155
+ val_evaluator = dict(
156
+ type='WaymoMetric',
157
+ ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
158
+ waymo_bin_file='./data/waymo/waymo_format/fov_gt.bin',
159
+ data_root='./data/waymo/waymo_format',
160
+ metric='LET_mAP',
161
+ load_type='fov_image_based',
162
+ backend_args=backend_args)
163
+ test_evaluator = val_evaluator
configs/_base_/datasets/waymoD5-mv-mono3d-3class.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ # D3 in the config name means the whole dataset is divided into 3 folds
3
+ # We only use one fold for efficient experiments
4
+ dataset_type = 'WaymoDataset'
5
+ data_root = 'data/waymo/kitti_format/'
6
+ class_names = ['Car', 'Pedestrian', 'Cyclist']
7
+ input_modality = dict(use_lidar=False, use_camera=True)
8
+
9
+ # Example to use different file client
10
+ # Method 1: simply set the data root and let the file I/O module
11
+ # automatically infer from prefix (not support LMDB and Memcache yet)
12
+
13
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
14
+
15
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
16
+ # backend_args = dict(
17
+ # backend='petrel',
18
+ # path_mapping=dict({
19
+ # './data/': 's3://openmmlab/datasets/detection3d/',
20
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
21
+ # }))
22
+ backend_args = None
23
+
24
+ train_pipeline = [
25
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
26
+ dict(
27
+ type='LoadAnnotations3D',
28
+ with_bbox=True,
29
+ with_label=True,
30
+ with_attr_label=False,
31
+ with_bbox_3d=True,
32
+ with_label_3d=True,
33
+ with_bbox_depth=True),
34
+ # base shape (1248, 832), scale (0.95, 1.05)
35
+ dict(
36
+ type='RandomResize3D',
37
+ scale=(1284, 832),
38
+ ratio_range=(0.95, 1.05),
39
+ keep_ratio=True,
40
+ ),
41
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
42
+ dict(
43
+ type='Pack3DDetInputs',
44
+ keys=[
45
+ 'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
46
+ 'gt_labels_3d', 'centers_2d', 'depths'
47
+ ]),
48
+ ]
49
+
50
+ test_pipeline = [
51
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
52
+ dict(
53
+ type='RandomResize3D',
54
+ scale=(1248, 832),
55
+ ratio_range=(1., 1.),
56
+ keep_ratio=True),
57
+ dict(type='Pack3DDetInputs', keys=['img']),
58
+ ]
59
+ # construct a pipeline for data and gt loading in show function
60
+ # please keep its loading function consistent with test_pipeline (e.g. client)
61
+ eval_pipeline = [
62
+ dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
63
+ dict(
64
+ type='RandomResize3D',
65
+ scale=(1248, 832),
66
+ ratio_range=(1., 1.),
67
+ keep_ratio=True),
68
+ dict(type='Pack3DDetInputs', keys=['img']),
69
+ ]
70
+
71
+ metainfo = dict(classes=class_names)
72
+
73
+ train_dataloader = dict(
74
+ batch_size=3,
75
+ num_workers=3,
76
+ persistent_workers=True,
77
+ sampler=dict(type='DefaultSampler', shuffle=True),
78
+ dataset=dict(
79
+ type=dataset_type,
80
+ data_root=data_root,
81
+ ann_file='waymo_infos_train.pkl',
82
+ data_prefix=dict(
83
+ pts='training/velodyne',
84
+ CAM_FRONT='training/image_0',
85
+ CAM_FRONT_LEFT='training/image_1',
86
+ CAM_FRONT_RIGHT='training/image_2',
87
+ CAM_SIDE_LEFT='training/image_3',
88
+ CAM_SIDE_RIGHT='training/image_4'),
89
+ pipeline=train_pipeline,
90
+ modality=input_modality,
91
+ test_mode=False,
92
+ metainfo=metainfo,
93
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
94
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
95
+ box_type_3d='Camera',
96
+ load_type='mv_image_based',
97
+ # load one frame every three frames
98
+ load_interval=5,
99
+ backend_args=backend_args))
100
+
101
+ val_dataloader = dict(
102
+ batch_size=1,
103
+ num_workers=1,
104
+ persistent_workers=True,
105
+ drop_last=False,
106
+ sampler=dict(type='DefaultSampler', shuffle=False),
107
+ dataset=dict(
108
+ type=dataset_type,
109
+ data_root=data_root,
110
+ data_prefix=dict(
111
+ pts='training/velodyne',
112
+ CAM_FRONT='training/image_0',
113
+ CAM_FRONT_LEFT='training/image_1',
114
+ CAM_FRONT_RIGHT='training/image_2',
115
+ CAM_SIDE_LEFT='training/image_3',
116
+ CAM_SIDE_RIGHT='training/image_4'),
117
+ ann_file='waymo_infos_val.pkl',
118
+ pipeline=eval_pipeline,
119
+ modality=input_modality,
120
+ test_mode=True,
121
+ metainfo=metainfo,
122
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
123
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
124
+ box_type_3d='Camera',
125
+ load_type='mv_image_based',
126
+ backend_args=backend_args))
127
+
128
+ test_dataloader = dict(
129
+ batch_size=1,
130
+ num_workers=1,
131
+ persistent_workers=True,
132
+ drop_last=False,
133
+ sampler=dict(type='DefaultSampler', shuffle=False),
134
+ dataset=dict(
135
+ type=dataset_type,
136
+ data_root=data_root,
137
+ data_prefix=dict(
138
+ pts='training/velodyne',
139
+ CAM_FRONT='training/image_0',
140
+ CAM_FRONT_LEFT='training/image_1',
141
+ CAM_FRONT_RIGHT='training/image_2',
142
+ CAM_SIDE_LEFT='training/image_3',
143
+ CAM_SIDE_RIGHT='training/image_4'),
144
+ ann_file='waymo_infos_val.pkl',
145
+ pipeline=eval_pipeline,
146
+ modality=input_modality,
147
+ test_mode=True,
148
+ metainfo=metainfo,
149
+ # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
150
+ # and box_type_3d='Depth' in sunrgbd and scannet dataset.
151
+ box_type_3d='Camera',
152
+ load_type='mv_image_based',
153
+ backend_args=backend_args))
154
+
155
+ val_evaluator = dict(
156
+ type='WaymoMetric',
157
+ ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
158
+ waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
159
+ data_root='./data/waymo/waymo_format',
160
+ metric='LET_mAP',
161
+ load_type='mv_image_based',
162
+ backend_args=backend_args)
163
+ test_evaluator = val_evaluator
configs/_base_/datasets/waymoD5-mv3d-3class.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # dataset settings
2
+ # D5 in the config name means the whole dataset is divided into 5 folds
3
+ # We only use one fold for efficient experiments
4
+ dataset_type = 'WaymoDataset'
5
+ data_root = 'data/waymo/kitti_format/'
6
+
7
+ # Example to use different file client
8
+ # Method 1: simply set the data root and let the file I/O module
9
+ # automatically infer from prefix (not support LMDB and Memcache yet)
10
+
11
+ # data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
12
+
13
+ # Method 2: Use backend_args, file_client_args in versions before 1.1.0
14
+ # backend_args = dict(
15
+ # backend='petrel',
16
+ # path_mapping=dict({
17
+ # './data/': 's3://openmmlab/datasets/detection3d/',
18
+ # 'data/': 's3://openmmlab/datasets/detection3d/'
19
+ # }))
20
+ backend_args = None
21
+
22
+ class_names = ['Pedestrian', 'Cyclist', 'Car']
23
+ input_modality = dict(use_lidar=False, use_camera=True)
24
+ point_cloud_range = [-35.0, -75.0, -2, 75.0, 75.0, 4]
25
+
26
+ train_transforms = [
27
+ dict(type='PhotoMetricDistortion3D'),
28
+ dict(
29
+ type='RandomResize3D',
30
+ scale=(1248, 832),
31
+ ratio_range=(0.95, 1.05),
32
+ keep_ratio=True),
33
+ dict(type='RandomCrop3D', crop_size=(1080, 720)),
34
+ dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5, flip_box3d=False),
35
+ ]
36
+
37
+ train_pipeline = [
38
+ dict(
39
+ type='LoadMultiViewImageFromFiles',
40
+ to_float32=True,
41
+ backend_args=backend_args),
42
+ dict(
43
+ type='LoadAnnotations3D',
44
+ with_bbox=True,
45
+ with_label=True,
46
+ with_attr_label=False,
47
+ with_bbox_3d=True,
48
+ with_label_3d=True,
49
+ with_bbox_depth=True),
50
+ dict(type='MultiViewWrapper', transforms=train_transforms),
51
+ dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
52
+ dict(type='ObjectNameFilter', classes=class_names),
53
+ dict(
54
+ type='Pack3DDetInputs', keys=[
55
+ 'img',
56
+ 'gt_bboxes_3d',
57
+ 'gt_labels_3d',
58
+ ]),
59
+ ]
60
+ test_transforms = [
61
+ dict(
62
+ type='RandomResize3D',
63
+ scale=(1248, 832),
64
+ ratio_range=(1., 1.),
65
+ keep_ratio=True)
66
+ ]
67
+ test_pipeline = [
68
+ dict(
69
+ type='LoadMultiViewImageFromFiles',
70
+ to_float32=True,
71
+ backend_args=backend_args),
72
+ dict(type='MultiViewWrapper', transforms=test_transforms),
73
+ dict(
74
+ type='Pack3DDetInputs',
75
+ keys=['img'],
76
+ meta_keys=[
77
+ 'box_type_3d', 'img_shape', 'ori_cam2img', 'scale_factor',
78
+ 'sample_idx', 'context_name', 'timestamp', 'lidar2cam',
79
+ 'num_ref_frames', 'num_views'
80
+ ])
81
+ ]
82
+ # construct a pipeline for data and gt loading in show function
83
+ # please keep its loading function consistent with test_pipeline (e.g. client)
84
+ eval_pipeline = [
85
+ dict(
86
+ type='LoadMultiViewImageFromFiles',
87
+ to_float32=True,
88
+ backend_args=backend_args),
89
+ dict(type='MultiViewWrapper', transforms=test_transforms),
90
+ dict(
91
+ type='Pack3DDetInputs',
92
+ keys=['img'],
93
+ meta_keys=[
94
+ 'box_type_3d', 'img_shape', 'ori_cam2img', 'scale_factor',
95
+ 'sample_idx', 'context_name', 'timestamp', 'lidar2cam',
96
+ 'num_ref_frames', 'num_views'
97
+ ])
98
+ ]
99
+ metainfo = dict(classes=class_names)
100
+
101
+ train_dataloader = dict(
102
+ batch_size=2,
103
+ num_workers=2,
104
+ persistent_workers=True,
105
+ sampler=dict(type='DefaultSampler', shuffle=True),
106
+ dataset=dict(
107
+ type=dataset_type,
108
+ data_root=data_root,
109
+ ann_file='waymo_infos_train.pkl',
110
+ data_prefix=dict(
111
+ pts='training/velodyne',
112
+ CAM_FRONT='training/image_0',
113
+ CAM_FRONT_LEFT='training/image_1',
114
+ CAM_FRONT_RIGHT='training/image_2',
115
+ CAM_SIDE_LEFT='training/image_3',
116
+ CAM_SIDE_RIGHT='training/image_4'),
117
+ pipeline=train_pipeline,
118
+ modality=input_modality,
119
+ test_mode=False,
120
+ cam_sync_instances=True,
121
+ metainfo=metainfo,
122
+ box_type_3d='Lidar',
123
+ load_interval=5,
124
+ backend_args=backend_args))
125
+
126
+ val_dataloader = dict(
127
+ batch_size=1,
128
+ num_workers=1,
129
+ persistent_workers=True,
130
+ drop_last=False,
131
+ sampler=dict(type='DefaultSampler', shuffle=False),
132
+ dataset=dict(
133
+ type=dataset_type,
134
+ data_root=data_root,
135
+ ann_file='waymo_infos_val.pkl',
136
+ data_prefix=dict(
137
+ pts='training/velodyne',
138
+ CAM_FRONT='training/image_0',
139
+ CAM_FRONT_LEFT='training/image_1',
140
+ CAM_FRONT_RIGHT='training/image_2',
141
+ CAM_SIDE_LEFT='training/image_3',
142
+ CAM_SIDE_RIGHT='training/image_4'),
143
+ pipeline=eval_pipeline,
144
+ modality=input_modality,
145
+ test_mode=True,
146
+ metainfo=metainfo,
147
+ box_type_3d='Lidar',
148
+ backend_args=backend_args))
149
+
150
+ test_dataloader = dict(
151
+ batch_size=1,
152
+ num_workers=1,
153
+ persistent_workers=True,
154
+ drop_last=False,
155
+ sampler=dict(type='DefaultSampler', shuffle=False),
156
+ dataset=dict(
157
+ type=dataset_type,
158
+ data_root=data_root,
159
+ ann_file='waymo_infos_val.pkl',
160
+ data_prefix=dict(
161
+ pts='training/velodyne',
162
+ CAM_FRONT='training/image_0',
163
+ CAM_FRONT_LEFT='training/image_1',
164
+ CAM_FRONT_RIGHT='training/image_2',
165
+ CAM_SIDE_LEFT='training/image_3',
166
+ CAM_SIDE_RIGHT='training/image_4'),
167
+ pipeline=test_pipeline,
168
+ modality=input_modality,
169
+ test_mode=True,
170
+ metainfo=metainfo,
171
+ box_type_3d='Lidar',
172
+ backend_args=backend_args))
173
+ val_evaluator = dict(
174
+ type='WaymoMetric',
175
+ waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
176
+ metric='LET_mAP')
177
+
178
+ test_evaluator = val_evaluator
configs/_base_/default_runtime.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_scope = 'mmdet3d'
2
+
3
+ default_hooks = dict(
4
+ timer=dict(type='IterTimerHook'),
5
+ logger=dict(type='LoggerHook', interval=50),
6
+ param_scheduler=dict(type='ParamSchedulerHook'),
7
+ checkpoint=dict(type='CheckpointHook', interval=-1),
8
+ sampler_seed=dict(type='DistSamplerSeedHook'),
9
+ visualization=dict(type='Det3DVisualizationHook'))
10
+
11
+ env_cfg = dict(
12
+ cudnn_benchmark=False,
13
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
14
+ dist_cfg=dict(backend='nccl'),
15
+ )
16
+
17
+ log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
18
+
19
+ log_level = 'INFO'
20
+ load_from = None
21
+ resume = False
22
+
23
+ # TODO: support auto scaling lr
configs/_base_/models/3dssd.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='SSD3DNet',
3
+ data_preprocessor=dict(type='Det3DDataPreprocessor'),
4
+ backbone=dict(
5
+ type='PointNet2SAMSG',
6
+ in_channels=4,
7
+ num_points=(4096, 512, (256, 256)),
8
+ radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),
9
+ num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),
10
+ sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),
11
+ ((64, 64, 128), (64, 64, 128), (64, 96, 128)),
12
+ ((128, 128, 256), (128, 192, 256), (128, 256, 256))),
13
+ aggregation_channels=(64, 128, 256),
14
+ fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),
15
+ fps_sample_range_lists=((-1), (-1), (512, -1)),
16
+ norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
17
+ sa_cfg=dict(
18
+ type='PointSAModuleMSG',
19
+ pool_mod='max',
20
+ use_xyz=True,
21
+ normalize_xyz=False)),
22
+ bbox_head=dict(
23
+ type='SSD3DHead',
24
+ vote_module_cfg=dict(
25
+ in_channels=256,
26
+ num_points=256,
27
+ gt_per_seed=1,
28
+ conv_channels=(128, ),
29
+ conv_cfg=dict(type='Conv1d'),
30
+ norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
31
+ with_res_feat=False,
32
+ vote_xyz_range=(3.0, 3.0, 2.0)),
33
+ vote_aggregation_cfg=dict(
34
+ type='PointSAModuleMSG',
35
+ num_point=256,
36
+ radii=(4.8, 6.4),
37
+ sample_nums=(16, 32),
38
+ mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)),
39
+ norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
40
+ use_xyz=True,
41
+ normalize_xyz=False,
42
+ bias=True),
43
+ pred_layer_cfg=dict(
44
+ in_channels=1536,
45
+ shared_conv_channels=(512, 128),
46
+ cls_conv_channels=(128, ),
47
+ reg_conv_channels=(128, ),
48
+ conv_cfg=dict(type='Conv1d'),
49
+ norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
50
+ bias=True),
51
+ objectness_loss=dict(
52
+ type='mmdet.CrossEntropyLoss',
53
+ use_sigmoid=True,
54
+ reduction='sum',
55
+ loss_weight=1.0),
56
+ center_loss=dict(
57
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0),
58
+ dir_class_loss=dict(
59
+ type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
60
+ dir_res_loss=dict(
61
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0),
62
+ size_res_loss=dict(
63
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0),
64
+ corner_loss=dict(
65
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0),
66
+ vote_loss=dict(
67
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0)),
68
+ # model training and testing settings
69
+ train_cfg=dict(
70
+ sample_mode='spec', pos_distance_thr=10.0, expand_dims_length=0.05),
71
+ test_cfg=dict(
72
+ nms_cfg=dict(type='nms', iou_thr=0.1),
73
+ sample_mode='spec',
74
+ score_thr=0.0,
75
+ per_class_proposal=True,
76
+ max_output_num=100))
configs/_base_/models/cascade-mask-rcnn_r50_fpn.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='CascadeRCNN',
4
+ pretrained='torchvision://resnet50',
5
+ _scope_='mmdet',
6
+ backbone=dict(
7
+ type='ResNet',
8
+ depth=50,
9
+ num_stages=4,
10
+ out_indices=(0, 1, 2, 3),
11
+ frozen_stages=1,
12
+ norm_cfg=dict(type='BN', requires_grad=True),
13
+ norm_eval=True,
14
+ style='pytorch'),
15
+ neck=dict(
16
+ type='FPN',
17
+ in_channels=[256, 512, 1024, 2048],
18
+ out_channels=256,
19
+ num_outs=5),
20
+ rpn_head=dict(
21
+ type='RPNHead',
22
+ in_channels=256,
23
+ feat_channels=256,
24
+ anchor_generator=dict(
25
+ type='AnchorGenerator',
26
+ scales=[8],
27
+ ratios=[0.5, 1.0, 2.0],
28
+ strides=[4, 8, 16, 32, 64]),
29
+ bbox_coder=dict(
30
+ type='DeltaXYWHBBoxCoder',
31
+ target_means=[.0, .0, .0, .0],
32
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
33
+ loss_cls=dict(
34
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
35
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
36
+ roi_head=dict(
37
+ type='CascadeRoIHead',
38
+ num_stages=3,
39
+ stage_loss_weights=[1, 0.5, 0.25],
40
+ bbox_roi_extractor=dict(
41
+ type='SingleRoIExtractor',
42
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
43
+ out_channels=256,
44
+ featmap_strides=[4, 8, 16, 32]),
45
+ bbox_head=[
46
+ dict(
47
+ type='Shared2FCBBoxHead',
48
+ in_channels=256,
49
+ fc_out_channels=1024,
50
+ roi_feat_size=7,
51
+ num_classes=80,
52
+ bbox_coder=dict(
53
+ type='DeltaXYWHBBoxCoder',
54
+ target_means=[0., 0., 0., 0.],
55
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
56
+ reg_class_agnostic=True,
57
+ loss_cls=dict(
58
+ type='CrossEntropyLoss',
59
+ use_sigmoid=False,
60
+ loss_weight=1.0),
61
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
62
+ loss_weight=1.0)),
63
+ dict(
64
+ type='Shared2FCBBoxHead',
65
+ in_channels=256,
66
+ fc_out_channels=1024,
67
+ roi_feat_size=7,
68
+ num_classes=80,
69
+ bbox_coder=dict(
70
+ type='DeltaXYWHBBoxCoder',
71
+ target_means=[0., 0., 0., 0.],
72
+ target_stds=[0.05, 0.05, 0.1, 0.1]),
73
+ reg_class_agnostic=True,
74
+ loss_cls=dict(
75
+ type='CrossEntropyLoss',
76
+ use_sigmoid=False,
77
+ loss_weight=1.0),
78
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
79
+ loss_weight=1.0)),
80
+ dict(
81
+ type='Shared2FCBBoxHead',
82
+ in_channels=256,
83
+ fc_out_channels=1024,
84
+ roi_feat_size=7,
85
+ num_classes=80,
86
+ bbox_coder=dict(
87
+ type='DeltaXYWHBBoxCoder',
88
+ target_means=[0., 0., 0., 0.],
89
+ target_stds=[0.033, 0.033, 0.067, 0.067]),
90
+ reg_class_agnostic=True,
91
+ loss_cls=dict(
92
+ type='CrossEntropyLoss',
93
+ use_sigmoid=False,
94
+ loss_weight=1.0),
95
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
96
+ ],
97
+ mask_roi_extractor=dict(
98
+ type='SingleRoIExtractor',
99
+ roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
100
+ out_channels=256,
101
+ featmap_strides=[4, 8, 16, 32]),
102
+ mask_head=dict(
103
+ type='FCNMaskHead',
104
+ num_convs=4,
105
+ in_channels=256,
106
+ conv_out_channels=256,
107
+ num_classes=80,
108
+ loss_mask=dict(
109
+ type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
110
+ # model training and testing settings
111
+ train_cfg=dict(
112
+ rpn=dict(
113
+ assigner=dict(
114
+ type='MaxIoUAssigner',
115
+ pos_iou_thr=0.7,
116
+ neg_iou_thr=0.3,
117
+ min_pos_iou=0.3,
118
+ match_low_quality=True,
119
+ ignore_iof_thr=-1),
120
+ sampler=dict(
121
+ type='RandomSampler',
122
+ num=256,
123
+ pos_fraction=0.5,
124
+ neg_pos_ub=-1,
125
+ add_gt_as_proposals=False),
126
+ allowed_border=0,
127
+ pos_weight=-1,
128
+ debug=False),
129
+ rpn_proposal=dict(
130
+ nms_pre=2000,
131
+ nms_post=2000,
132
+ max_per_img=2000,
133
+ nms=dict(type='nms', iou_threshold=0.7),
134
+ min_bbox_size=0),
135
+ rcnn=[
136
+ dict(
137
+ assigner=dict(
138
+ type='MaxIoUAssigner',
139
+ pos_iou_thr=0.5,
140
+ neg_iou_thr=0.5,
141
+ min_pos_iou=0.5,
142
+ match_low_quality=False,
143
+ ignore_iof_thr=-1),
144
+ sampler=dict(
145
+ type='RandomSampler',
146
+ num=512,
147
+ pos_fraction=0.25,
148
+ neg_pos_ub=-1,
149
+ add_gt_as_proposals=True),
150
+ mask_size=28,
151
+ pos_weight=-1,
152
+ debug=False),
153
+ dict(
154
+ assigner=dict(
155
+ type='MaxIoUAssigner',
156
+ pos_iou_thr=0.6,
157
+ neg_iou_thr=0.6,
158
+ min_pos_iou=0.6,
159
+ match_low_quality=False,
160
+ ignore_iof_thr=-1),
161
+ sampler=dict(
162
+ type='RandomSampler',
163
+ num=512,
164
+ pos_fraction=0.25,
165
+ neg_pos_ub=-1,
166
+ add_gt_as_proposals=True),
167
+ mask_size=28,
168
+ pos_weight=-1,
169
+ debug=False),
170
+ dict(
171
+ assigner=dict(
172
+ type='MaxIoUAssigner',
173
+ pos_iou_thr=0.7,
174
+ neg_iou_thr=0.7,
175
+ min_pos_iou=0.7,
176
+ match_low_quality=False,
177
+ ignore_iof_thr=-1),
178
+ sampler=dict(
179
+ type='RandomSampler',
180
+ num=512,
181
+ pos_fraction=0.25,
182
+ neg_pos_ub=-1,
183
+ add_gt_as_proposals=True),
184
+ mask_size=28,
185
+ pos_weight=-1,
186
+ debug=False)
187
+ ]),
188
+ test_cfg=dict(
189
+ rpn=dict(
190
+ nms_pre=1000,
191
+ nms_post=1000,
192
+ max_per_img=1000,
193
+ nms=dict(type='nms', iou_threshold=0.7),
194
+ min_bbox_size=0),
195
+ rcnn=dict(
196
+ score_thr=0.05,
197
+ nms=dict(type='nms', iou_threshold=0.5),
198
+ max_per_img=100,
199
+ mask_thr_binary=0.5)))
configs/_base_/models/centerpoint_pillar02_second_secfpn_nus.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ voxel_size = [0.2, 0.2, 8]
2
+ model = dict(
3
+ type='CenterPoint',
4
+ data_preprocessor=dict(
5
+ type='Det3DDataPreprocessor',
6
+ voxel=True,
7
+ voxel_layer=dict(
8
+ max_num_points=20,
9
+ voxel_size=voxel_size,
10
+ max_voxels=(30000, 40000))),
11
+ pts_voxel_encoder=dict(
12
+ type='PillarFeatureNet',
13
+ in_channels=5,
14
+ feat_channels=[64],
15
+ with_distance=False,
16
+ voxel_size=(0.2, 0.2, 8),
17
+ norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
18
+ legacy=False),
19
+ pts_middle_encoder=dict(
20
+ type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),
21
+ pts_backbone=dict(
22
+ type='SECOND',
23
+ in_channels=64,
24
+ out_channels=[64, 128, 256],
25
+ layer_nums=[3, 5, 5],
26
+ layer_strides=[2, 2, 2],
27
+ norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
28
+ conv_cfg=dict(type='Conv2d', bias=False)),
29
+ pts_neck=dict(
30
+ type='SECONDFPN',
31
+ in_channels=[64, 128, 256],
32
+ out_channels=[128, 128, 128],
33
+ upsample_strides=[0.5, 1, 2],
34
+ norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
35
+ upsample_cfg=dict(type='deconv', bias=False),
36
+ use_conv_for_no_stride=True),
37
+ pts_bbox_head=dict(
38
+ type='CenterHead',
39
+ in_channels=sum([128, 128, 128]),
40
+ tasks=[
41
+ dict(num_class=1, class_names=['car']),
42
+ dict(num_class=2, class_names=['truck', 'construction_vehicle']),
43
+ dict(num_class=2, class_names=['bus', 'trailer']),
44
+ dict(num_class=1, class_names=['barrier']),
45
+ dict(num_class=2, class_names=['motorcycle', 'bicycle']),
46
+ dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
47
+ ],
48
+ common_heads=dict(
49
+ reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
50
+ share_conv_channel=64,
51
+ bbox_coder=dict(
52
+ type='CenterPointBBoxCoder',
53
+ post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
54
+ max_num=500,
55
+ score_threshold=0.1,
56
+ out_size_factor=4,
57
+ voxel_size=voxel_size[:2],
58
+ code_size=9),
59
+ separate_head=dict(
60
+ type='SeparateHead', init_bias=-2.19, final_kernel=3),
61
+ loss_cls=dict(type='mmdet.GaussianFocalLoss', reduction='mean'),
62
+ loss_bbox=dict(
63
+ type='mmdet.L1Loss', reduction='mean', loss_weight=0.25),
64
+ norm_bbox=True),
65
+ # model training and testing settings
66
+ train_cfg=dict(
67
+ pts=dict(
68
+ grid_size=[512, 512, 1],
69
+ voxel_size=voxel_size,
70
+ out_size_factor=4,
71
+ dense_reg=1,
72
+ gaussian_overlap=0.1,
73
+ max_objs=500,
74
+ min_radius=2,
75
+ code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
76
+ test_cfg=dict(
77
+ pts=dict(
78
+ post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
79
+ max_per_img=500,
80
+ max_pool_nms=False,
81
+ min_radius=[4, 12, 10, 1, 0.85, 0.175],
82
+ score_threshold=0.1,
83
+ pc_range=[-51.2, -51.2],
84
+ out_size_factor=4,
85
+ voxel_size=voxel_size[:2],
86
+ nms_type='rotate',
87
+ pre_max_size=1000,
88
+ post_max_size=83,
89
+ nms_thr=0.2)))
configs/_base_/models/centerpoint_voxel01_second_secfpn_nus.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ voxel_size = [0.1, 0.1, 0.2]
2
+ model = dict(
3
+ type='CenterPoint',
4
+ data_preprocessor=dict(
5
+ type='Det3DDataPreprocessor',
6
+ voxel=True,
7
+ voxel_layer=dict(
8
+ max_num_points=10,
9
+ voxel_size=voxel_size,
10
+ max_voxels=(90000, 120000))),
11
+ pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
12
+ pts_middle_encoder=dict(
13
+ type='SparseEncoder',
14
+ in_channels=5,
15
+ sparse_shape=[41, 1024, 1024],
16
+ output_channels=128,
17
+ order=('conv', 'norm', 'act'),
18
+ encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,
19
+ 128)),
20
+ encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
21
+ block_type='basicblock'),
22
+ pts_backbone=dict(
23
+ type='SECOND',
24
+ in_channels=256,
25
+ out_channels=[128, 256],
26
+ layer_nums=[5, 5],
27
+ layer_strides=[1, 2],
28
+ norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
29
+ conv_cfg=dict(type='Conv2d', bias=False)),
30
+ pts_neck=dict(
31
+ type='SECONDFPN',
32
+ in_channels=[128, 256],
33
+ out_channels=[256, 256],
34
+ upsample_strides=[1, 2],
35
+ norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
36
+ upsample_cfg=dict(type='deconv', bias=False),
37
+ use_conv_for_no_stride=True),
38
+ pts_bbox_head=dict(
39
+ type='CenterHead',
40
+ in_channels=sum([256, 256]),
41
+ tasks=[
42
+ dict(num_class=1, class_names=['car']),
43
+ dict(num_class=2, class_names=['truck', 'construction_vehicle']),
44
+ dict(num_class=2, class_names=['bus', 'trailer']),
45
+ dict(num_class=1, class_names=['barrier']),
46
+ dict(num_class=2, class_names=['motorcycle', 'bicycle']),
47
+ dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
48
+ ],
49
+ common_heads=dict(
50
+ reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
51
+ share_conv_channel=64,
52
+ bbox_coder=dict(
53
+ type='CenterPointBBoxCoder',
54
+ post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
55
+ max_num=500,
56
+ score_threshold=0.1,
57
+ out_size_factor=8,
58
+ voxel_size=voxel_size[:2],
59
+ code_size=9),
60
+ separate_head=dict(
61
+ type='SeparateHead', init_bias=-2.19, final_kernel=3),
62
+ loss_cls=dict(type='mmdet.GaussianFocalLoss', reduction='mean'),
63
+ loss_bbox=dict(
64
+ type='mmdet.L1Loss', reduction='mean', loss_weight=0.25),
65
+ norm_bbox=True),
66
+ # model training and testing settings
67
+ train_cfg=dict(
68
+ pts=dict(
69
+ grid_size=[1024, 1024, 40],
70
+ voxel_size=voxel_size,
71
+ out_size_factor=8,
72
+ dense_reg=1,
73
+ gaussian_overlap=0.1,
74
+ max_objs=500,
75
+ min_radius=2,
76
+ code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
77
+ test_cfg=dict(
78
+ pts=dict(
79
+ post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
80
+ max_per_img=500,
81
+ max_pool_nms=False,
82
+ min_radius=[4, 12, 10, 1, 0.85, 0.175],
83
+ score_threshold=0.1,
84
+ out_size_factor=8,
85
+ voxel_size=voxel_size[:2],
86
+ nms_type='rotate',
87
+ pre_max_size=1000,
88
+ post_max_size=83,
89
+ nms_thr=0.2)))
configs/_base_/models/cylinder3d.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ grid_shape = [480, 360, 32]
2
+ model = dict(
3
+ type='Cylinder3D',
4
+ data_preprocessor=dict(
5
+ type='Det3DDataPreprocessor',
6
+ voxel=True,
7
+ voxel_type='cylindrical',
8
+ voxel_layer=dict(
9
+ grid_shape=grid_shape,
10
+ point_cloud_range=[0, -3.14159265359, -4, 50, 3.14159265359, 2],
11
+ max_num_points=-1,
12
+ max_voxels=-1,
13
+ ),
14
+ ),
15
+ voxel_encoder=dict(
16
+ type='SegVFE',
17
+ feat_channels=[64, 128, 256, 256],
18
+ in_channels=6,
19
+ with_voxel_center=True,
20
+ feat_compression=16,
21
+ return_point_feats=False),
22
+ backbone=dict(
23
+ type='Asymm3DSpconv',
24
+ grid_size=grid_shape,
25
+ input_channels=16,
26
+ base_channels=32,
27
+ norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.1)),
28
+ decode_head=dict(
29
+ type='Cylinder3DHead',
30
+ channels=128,
31
+ num_classes=20,
32
+ loss_ce=dict(
33
+ type='mmdet.CrossEntropyLoss',
34
+ use_sigmoid=False,
35
+ class_weight=None,
36
+ loss_weight=1.0),
37
+ loss_lovasz=dict(type='LovaszLoss', loss_weight=1.0, reduction='none'),
38
+ ),
39
+ train_cfg=None,
40
+ test_cfg=dict(mode='whole'),
41
+ )
configs/_base_/models/dgcnn.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='EncoderDecoder3D',
4
+ data_preprocessor=dict(type='Det3DDataPreprocessor'),
5
+ backbone=dict(
6
+ type='DGCNNBackbone',
7
+ in_channels=9, # [xyz, rgb, normal_xyz], modified with dataset
8
+ num_samples=(20, 20, 20),
9
+ knn_modes=('D-KNN', 'F-KNN', 'F-KNN'),
10
+ radius=(None, None, None),
11
+ gf_channels=((64, 64), (64, 64), (64, )),
12
+ fa_channels=(1024, ),
13
+ act_cfg=dict(type='LeakyReLU', negative_slope=0.2)),
14
+ decode_head=dict(
15
+ type='DGCNNHead',
16
+ fp_channels=(1216, 512),
17
+ channels=256,
18
+ dropout_ratio=0.5,
19
+ conv_cfg=dict(type='Conv1d'),
20
+ norm_cfg=dict(type='BN1d'),
21
+ act_cfg=dict(type='LeakyReLU', negative_slope=0.2),
22
+ loss_decode=dict(
23
+ type='mmdet.CrossEntropyLoss',
24
+ use_sigmoid=False,
25
+ class_weight=None, # modified with dataset
26
+ loss_weight=1.0)),
27
+ # model training and testing settings
28
+ train_cfg=dict(),
29
+ test_cfg=dict(mode='slide'))
configs/_base_/models/fcaf3d.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='MinkSingleStage3DDetector',
3
+ data_preprocessor=dict(type='Det3DDataPreprocessor'),
4
+ backbone=dict(type='MinkResNet', in_channels=3, depth=34),
5
+ bbox_head=dict(
6
+ type='FCAF3DHead',
7
+ in_channels=(64, 128, 256, 512),
8
+ out_channels=128,
9
+ voxel_size=.01,
10
+ pts_prune_threshold=100000,
11
+ pts_assign_threshold=27,
12
+ pts_center_threshold=18,
13
+ num_classes=18,
14
+ num_reg_outs=6,
15
+ center_loss=dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True),
16
+ bbox_loss=dict(type='AxisAlignedIoULoss'),
17
+ cls_loss=dict(type='mmdet.FocalLoss'),
18
+ ),
19
+ train_cfg=dict(),
20
+ test_cfg=dict(nms_pre=1000, iou_thr=.5, score_thr=.01))
configs/_base_/models/fcos3d.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model settings
2
+ model = dict(
3
+ type='FCOSMono3D',
4
+ data_preprocessor=dict(
5
+ type='Det3DDataPreprocessor',
6
+ mean=[123.675, 116.28, 103.53],
7
+ std=[58.395, 57.12, 57.375],
8
+ bgr_to_rgb=True,
9
+ pad_size_divisor=32),
10
+ backbone=dict(
11
+ type='mmdet.ResNet',
12
+ depth=101,
13
+ num_stages=4,
14
+ out_indices=(0, 1, 2, 3),
15
+ frozen_stages=1,
16
+ norm_cfg=dict(type='BN', requires_grad=False),
17
+ norm_eval=True,
18
+ style='caffe',
19
+ init_cfg=dict(
20
+ type='Pretrained',
21
+ checkpoint='open-mmlab://detectron2/resnet101_caffe')),
22
+ neck=dict(
23
+ type='mmdet.FPN',
24
+ in_channels=[256, 512, 1024, 2048],
25
+ out_channels=256,
26
+ start_level=1,
27
+ add_extra_convs='on_output',
28
+ num_outs=5,
29
+ relu_before_extra_convs=True),
30
+ bbox_head=dict(
31
+ type='FCOSMono3DHead',
32
+ num_classes=10,
33
+ in_channels=256,
34
+ stacked_convs=2,
35
+ feat_channels=256,
36
+ use_direction_classifier=True,
37
+ diff_rad_by_sin=True,
38
+ pred_attrs=True,
39
+ pred_velo=True,
40
+ dir_offset=0.7854, # pi/4
41
+ dir_limit_offset=0,
42
+ strides=[8, 16, 32, 64, 128],
43
+ group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo
44
+ cls_branch=(256, ),
45
+ reg_branch=(
46
+ (256, ), # offset
47
+ (256, ), # depth
48
+ (256, ), # size
49
+ (256, ), # rot
50
+ () # velo
51
+ ),
52
+ dir_branch=(256, ),
53
+ attr_branch=(256, ),
54
+ loss_cls=dict(
55
+ type='mmdet.FocalLoss',
56
+ use_sigmoid=True,
57
+ gamma=2.0,
58
+ alpha=0.25,
59
+ loss_weight=1.0),
60
+ loss_bbox=dict(
61
+ type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
62
+ loss_dir=dict(
63
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
64
+ loss_attr=dict(
65
+ type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
66
+ loss_centerness=dict(
67
+ type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
68
+ bbox_coder=dict(type='FCOS3DBBoxCoder', code_size=9),
69
+ norm_on_bbox=True,
70
+ centerness_on_reg=True,
71
+ center_sampling=True,
72
+ conv_bias=True,
73
+ dcn_on_last_conv=True),
74
+ train_cfg=dict(
75
+ allowed_border=0,
76
+ code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
77
+ pos_weight=-1,
78
+ debug=False),
79
+ test_cfg=dict(
80
+ use_rotate_nms=True,
81
+ nms_across_levels=False,
82
+ nms_pre=1000,
83
+ nms_thr=0.8,
84
+ score_thr=0.05,
85
+ min_bbox_size=0,
86
+ max_per_img=200))
configs/_base_/models/groupfree3d.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='GroupFree3DNet',
3
+ data_preprocessor=dict(type='Det3DDataPreprocessor'),
4
+ backbone=dict(
5
+ type='PointNet2SASSG',
6
+ in_channels=3,
7
+ num_points=(2048, 1024, 512, 256),
8
+ radius=(0.2, 0.4, 0.8, 1.2),
9
+ num_samples=(64, 32, 16, 16),
10
+ sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
11
+ (128, 128, 256)),
12
+ fp_channels=((256, 256), (256, 288)),
13
+ norm_cfg=dict(type='BN2d'),
14
+ sa_cfg=dict(
15
+ type='PointSAModule',
16
+ pool_mod='max',
17
+ use_xyz=True,
18
+ normalize_xyz=True)),
19
+ bbox_head=dict(
20
+ type='GroupFree3DHead',
21
+ in_channels=288,
22
+ num_decoder_layers=6,
23
+ num_proposal=256,
24
+ transformerlayers=dict(
25
+ type='BaseTransformerLayer',
26
+ attn_cfgs=dict(
27
+ type='GroupFree3DMHA',
28
+ embed_dims=288,
29
+ num_heads=8,
30
+ attn_drop=0.1,
31
+ dropout_layer=dict(type='Dropout', drop_prob=0.1)),
32
+ ffn_cfgs=dict(
33
+ embed_dims=288,
34
+ feedforward_channels=2048,
35
+ ffn_drop=0.1,
36
+ act_cfg=dict(type='ReLU', inplace=True)),
37
+ operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',
38
+ 'norm')),
39
+ pred_layer_cfg=dict(
40
+ in_channels=288, shared_conv_channels=(288, 288), bias=True),
41
+ sampling_objectness_loss=dict(
42
+ type='mmdet.FocalLoss',
43
+ use_sigmoid=True,
44
+ gamma=2.0,
45
+ alpha=0.25,
46
+ loss_weight=8.0),
47
+ objectness_loss=dict(
48
+ type='mmdet.FocalLoss',
49
+ use_sigmoid=True,
50
+ gamma=2.0,
51
+ alpha=0.25,
52
+ loss_weight=1.0),
53
+ center_loss=dict(
54
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
55
+ dir_class_loss=dict(
56
+ type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
57
+ dir_res_loss=dict(
58
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
59
+ size_class_loss=dict(
60
+ type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
61
+ size_res_loss=dict(
62
+ type='mmdet.SmoothL1Loss',
63
+ beta=1.0,
64
+ reduction='sum',
65
+ loss_weight=10.0),
66
+ semantic_loss=dict(
67
+ type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
68
+ # model training and testing settings
69
+ train_cfg=dict(sample_mode='kps'),
70
+ test_cfg=dict(
71
+ sample_mode='kps',
72
+ nms_thr=0.25,
73
+ score_thr=0.0,
74
+ per_class_proposal=True,
75
+ prediction_stages='last'))
configs/_base_/models/h3dnet.py ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ primitive_z_cfg = dict(
2
+ type='PrimitiveHead',
3
+ num_dims=2,
4
+ num_classes=18,
5
+ primitive_mode='z',
6
+ upper_thresh=100.0,
7
+ surface_thresh=0.5,
8
+ vote_module_cfg=dict(
9
+ in_channels=256,
10
+ vote_per_seed=1,
11
+ gt_per_seed=1,
12
+ conv_channels=(256, 256),
13
+ conv_cfg=dict(type='Conv1d'),
14
+ norm_cfg=dict(type='BN1d'),
15
+ norm_feats=True,
16
+ vote_loss=dict(
17
+ type='ChamferDistance',
18
+ mode='l1',
19
+ reduction='none',
20
+ loss_dst_weight=10.0)),
21
+ vote_aggregation_cfg=dict(
22
+ type='PointSAModule',
23
+ num_point=1024,
24
+ radius=0.3,
25
+ num_sample=16,
26
+ mlp_channels=[256, 128, 128, 128],
27
+ use_xyz=True,
28
+ normalize_xyz=True),
29
+ feat_channels=(128, 128),
30
+ conv_cfg=dict(type='Conv1d'),
31
+ norm_cfg=dict(type='BN1d'),
32
+ objectness_loss=dict(
33
+ type='mmdet.CrossEntropyLoss',
34
+ class_weight=[0.4, 0.6],
35
+ reduction='mean',
36
+ loss_weight=30.0),
37
+ center_loss=dict(
38
+ type='ChamferDistance',
39
+ mode='l1',
40
+ reduction='sum',
41
+ loss_src_weight=0.5,
42
+ loss_dst_weight=0.5),
43
+ semantic_reg_loss=dict(
44
+ type='ChamferDistance',
45
+ mode='l1',
46
+ reduction='sum',
47
+ loss_src_weight=0.5,
48
+ loss_dst_weight=0.5),
49
+ semantic_cls_loss=dict(
50
+ type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
51
+ train_cfg=dict(
52
+ sample_mode='vote',
53
+ dist_thresh=0.2,
54
+ var_thresh=1e-2,
55
+ lower_thresh=1e-6,
56
+ num_point=100,
57
+ num_point_line=10,
58
+ line_thresh=0.2),
59
+ test_cfg=dict(sample_mode='seed'))
60
+
61
+ primitive_xy_cfg = dict(
62
+ type='PrimitiveHead',
63
+ num_dims=1,
64
+ num_classes=18,
65
+ primitive_mode='xy',
66
+ upper_thresh=100.0,
67
+ surface_thresh=0.5,
68
+ vote_module_cfg=dict(
69
+ in_channels=256,
70
+ vote_per_seed=1,
71
+ gt_per_seed=1,
72
+ conv_channels=(256, 256),
73
+ conv_cfg=dict(type='Conv1d'),
74
+ norm_cfg=dict(type='BN1d'),
75
+ norm_feats=True,
76
+ vote_loss=dict(
77
+ type='ChamferDistance',
78
+ mode='l1',
79
+ reduction='none',
80
+ loss_dst_weight=10.0)),
81
+ vote_aggregation_cfg=dict(
82
+ type='PointSAModule',
83
+ num_point=1024,
84
+ radius=0.3,
85
+ num_sample=16,
86
+ mlp_channels=[256, 128, 128, 128],
87
+ use_xyz=True,
88
+ normalize_xyz=True),
89
+ feat_channels=(128, 128),
90
+ conv_cfg=dict(type='Conv1d'),
91
+ norm_cfg=dict(type='BN1d'),
92
+ objectness_loss=dict(
93
+ type='mmdet.CrossEntropyLoss',
94
+ class_weight=[0.4, 0.6],
95
+ reduction='mean',
96
+ loss_weight=30.0),
97
+ center_loss=dict(
98
+ type='ChamferDistance',
99
+ mode='l1',
100
+ reduction='sum',
101
+ loss_src_weight=0.5,
102
+ loss_dst_weight=0.5),
103
+ semantic_reg_loss=dict(
104
+ type='ChamferDistance',
105
+ mode='l1',
106
+ reduction='sum',
107
+ loss_src_weight=0.5,
108
+ loss_dst_weight=0.5),
109
+ semantic_cls_loss=dict(
110
+ type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
111
+ train_cfg=dict(
112
+ sample_mode='vote',
113
+ dist_thresh=0.2,
114
+ var_thresh=1e-2,
115
+ lower_thresh=1e-6,
116
+ num_point=100,
117
+ num_point_line=10,
118
+ line_thresh=0.2),
119
+ test_cfg=dict(sample_mode='seed'))
120
+
121
+ primitive_line_cfg = dict(
122
+ type='PrimitiveHead',
123
+ num_dims=0,
124
+ num_classes=18,
125
+ primitive_mode='line',
126
+ upper_thresh=100.0,
127
+ surface_thresh=0.5,
128
+ vote_module_cfg=dict(
129
+ in_channels=256,
130
+ vote_per_seed=1,
131
+ gt_per_seed=1,
132
+ conv_channels=(256, 256),
133
+ conv_cfg=dict(type='Conv1d'),
134
+ norm_cfg=dict(type='BN1d'),
135
+ norm_feats=True,
136
+ vote_loss=dict(
137
+ type='ChamferDistance',
138
+ mode='l1',
139
+ reduction='none',
140
+ loss_dst_weight=10.0)),
141
+ vote_aggregation_cfg=dict(
142
+ type='PointSAModule',
143
+ num_point=1024,
144
+ radius=0.3,
145
+ num_sample=16,
146
+ mlp_channels=[256, 128, 128, 128],
147
+ use_xyz=True,
148
+ normalize_xyz=True),
149
+ feat_channels=(128, 128),
150
+ conv_cfg=dict(type='Conv1d'),
151
+ norm_cfg=dict(type='BN1d'),
152
+ objectness_loss=dict(
153
+ type='mmdet.CrossEntropyLoss',
154
+ class_weight=[0.4, 0.6],
155
+ reduction='mean',
156
+ loss_weight=30.0),
157
+ center_loss=dict(
158
+ type='ChamferDistance',
159
+ mode='l1',
160
+ reduction='sum',
161
+ loss_src_weight=1.0,
162
+ loss_dst_weight=1.0),
163
+ semantic_reg_loss=dict(
164
+ type='ChamferDistance',
165
+ mode='l1',
166
+ reduction='sum',
167
+ loss_src_weight=1.0,
168
+ loss_dst_weight=1.0),
169
+ semantic_cls_loss=dict(
170
+ type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=2.0),
171
+ train_cfg=dict(
172
+ sample_mode='vote',
173
+ dist_thresh=0.2,
174
+ var_thresh=1e-2,
175
+ lower_thresh=1e-6,
176
+ num_point=100,
177
+ num_point_line=10,
178
+ line_thresh=0.2),
179
+ test_cfg=dict(sample_mode='seed'))
180
+
181
+ model = dict(
182
+ type='H3DNet',
183
+ data_preprocessor=dict(type='Det3DDataPreprocessor'),
184
+ backbone=dict(
185
+ type='MultiBackbone',
186
+ num_streams=4,
187
+ suffixes=['net0', 'net1', 'net2', 'net3'],
188
+ conv_cfg=dict(type='Conv1d'),
189
+ norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01),
190
+ act_cfg=dict(type='ReLU'),
191
+ backbones=dict(
192
+ type='PointNet2SASSG',
193
+ in_channels=4,
194
+ num_points=(2048, 1024, 512, 256),
195
+ radius=(0.2, 0.4, 0.8, 1.2),
196
+ num_samples=(64, 32, 16, 16),
197
+ sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
198
+ (128, 128, 256)),
199
+ fp_channels=((256, 256), (256, 256)),
200
+ norm_cfg=dict(type='BN2d'),
201
+ sa_cfg=dict(
202
+ type='PointSAModule',
203
+ pool_mod='max',
204
+ use_xyz=True,
205
+ normalize_xyz=True))),
206
+ rpn_head=dict(
207
+ type='VoteHead',
208
+ vote_module_cfg=dict(
209
+ in_channels=256,
210
+ vote_per_seed=1,
211
+ gt_per_seed=3,
212
+ conv_channels=(256, 256),
213
+ conv_cfg=dict(type='Conv1d'),
214
+ norm_cfg=dict(type='BN1d'),
215
+ norm_feats=True,
216
+ vote_loss=dict(
217
+ type='ChamferDistance',
218
+ mode='l1',
219
+ reduction='none',
220
+ loss_dst_weight=10.0)),
221
+ vote_aggregation_cfg=dict(
222
+ type='PointSAModule',
223
+ num_point=256,
224
+ radius=0.3,
225
+ num_sample=16,
226
+ mlp_channels=[256, 128, 128, 128],
227
+ use_xyz=True,
228
+ normalize_xyz=True),
229
+ pred_layer_cfg=dict(
230
+ in_channels=128, shared_conv_channels=(128, 128), bias=True),
231
+ objectness_loss=dict(
232
+ type='mmdet.CrossEntropyLoss',
233
+ class_weight=[0.2, 0.8],
234
+ reduction='sum',
235
+ loss_weight=5.0),
236
+ center_loss=dict(
237
+ type='ChamferDistance',
238
+ mode='l2',
239
+ reduction='sum',
240
+ loss_src_weight=10.0,
241
+ loss_dst_weight=10.0),
242
+ dir_class_loss=dict(
243
+ type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
244
+ dir_res_loss=dict(
245
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
246
+ size_class_loss=dict(
247
+ type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
248
+ size_res_loss=dict(
249
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
250
+ semantic_loss=dict(
251
+ type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
252
+ roi_head=dict(
253
+ type='H3DRoIHead',
254
+ primitive_list=[primitive_z_cfg, primitive_xy_cfg, primitive_line_cfg],
255
+ bbox_head=dict(
256
+ type='H3DBboxHead',
257
+ gt_per_seed=3,
258
+ num_proposal=256,
259
+ suface_matching_cfg=dict(
260
+ type='PointSAModule',
261
+ num_point=256 * 6,
262
+ radius=0.5,
263
+ num_sample=32,
264
+ mlp_channels=[128 + 6, 128, 64, 32],
265
+ use_xyz=True,
266
+ normalize_xyz=True),
267
+ line_matching_cfg=dict(
268
+ type='PointSAModule',
269
+ num_point=256 * 12,
270
+ radius=0.5,
271
+ num_sample=32,
272
+ mlp_channels=[128 + 12, 128, 64, 32],
273
+ use_xyz=True,
274
+ normalize_xyz=True),
275
+ primitive_refine_channels=[128, 128, 128],
276
+ upper_thresh=100.0,
277
+ surface_thresh=0.5,
278
+ line_thresh=0.5,
279
+ conv_cfg=dict(type='Conv1d'),
280
+ norm_cfg=dict(type='BN1d'),
281
+ objectness_loss=dict(
282
+ type='mmdet.CrossEntropyLoss',
283
+ class_weight=[0.2, 0.8],
284
+ reduction='sum',
285
+ loss_weight=5.0),
286
+ center_loss=dict(
287
+ type='ChamferDistance',
288
+ mode='l2',
289
+ reduction='sum',
290
+ loss_src_weight=10.0,
291
+ loss_dst_weight=10.0),
292
+ dir_class_loss=dict(
293
+ type='mmdet.CrossEntropyLoss',
294
+ reduction='sum',
295
+ loss_weight=0.1),
296
+ dir_res_loss=dict(
297
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
298
+ size_class_loss=dict(
299
+ type='mmdet.CrossEntropyLoss',
300
+ reduction='sum',
301
+ loss_weight=0.1),
302
+ size_res_loss=dict(
303
+ type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
304
+ semantic_loss=dict(
305
+ type='mmdet.CrossEntropyLoss',
306
+ reduction='sum',
307
+ loss_weight=0.1),
308
+ cues_objectness_loss=dict(
309
+ type='mmdet.CrossEntropyLoss',
310
+ class_weight=[0.3, 0.7],
311
+ reduction='mean',
312
+ loss_weight=5.0),
313
+ cues_semantic_loss=dict(
314
+ type='mmdet.CrossEntropyLoss',
315
+ class_weight=[0.3, 0.7],
316
+ reduction='mean',
317
+ loss_weight=5.0),
318
+ proposal_objectness_loss=dict(
319
+ type='mmdet.CrossEntropyLoss',
320
+ class_weight=[0.2, 0.8],
321
+ reduction='none',
322
+ loss_weight=5.0),
323
+ primitive_center_loss=dict(
324
+ type='mmdet.MSELoss', reduction='none', loss_weight=1.0))),
325
+ # model training and testing settings
326
+ train_cfg=dict(
327
+ rpn=dict(
328
+ pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mode='vote'),
329
+ rpn_proposal=dict(use_nms=False),
330
+ rcnn=dict(
331
+ pos_distance_thr=0.3,
332
+ neg_distance_thr=0.6,
333
+ sample_mode='vote',
334
+ far_threshold=0.6,
335
+ near_threshold=0.3,
336
+ mask_surface_threshold=0.3,
337
+ label_surface_threshold=0.3,
338
+ mask_line_threshold=0.3,
339
+ label_line_threshold=0.3)),
340
+ test_cfg=dict(
341
+ rpn=dict(
342
+ sample_mode='seed',
343
+ nms_thr=0.25,
344
+ score_thr=0.05,
345
+ per_class_proposal=True,
346
+ use_nms=False),
347
+ rcnn=dict(
348
+ sample_mode='seed',
349
+ nms_thr=0.25,
350
+ score_thr=0.05,
351
+ per_class_proposal=True)))
configs/_base_/models/imvotenet.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model = dict(
2
+ type='ImVoteNet',
3
+ data_preprocessor=dict(
4
+ type='Det3DDataPreprocessor',
5
+ # use caffe img_norm
6
+ mean=[103.530, 116.280, 123.675],
7
+ std=[1.0, 1.0, 1.0],
8
+ bgr_to_rgb=False,
9
+ pad_size_divisor=32),
10
+ img_backbone=dict(
11
+ type='mmdet.ResNet',
12
+ depth=50,
13
+ num_stages=4,
14
+ out_indices=(0, 1, 2, 3),
15
+ frozen_stages=1,
16
+ norm_cfg=dict(type='BN', requires_grad=False),
17
+ norm_eval=True,
18
+ style='caffe'),
19
+ img_neck=dict(
20
+ type='mmdet.FPN',
21
+ in_channels=[256, 512, 1024, 2048],
22
+ out_channels=256,
23
+ num_outs=5),
24
+ img_rpn_head=dict(
25
+ _scope_='mmdet',
26
+ type='RPNHead',
27
+ in_channels=256,
28
+ feat_channels=256,
29
+ anchor_generator=dict(
30
+ type='AnchorGenerator',
31
+ scales=[8],
32
+ ratios=[0.5, 1.0, 2.0],
33
+ strides=[4, 8, 16, 32, 64]),
34
+ bbox_coder=dict(
35
+ type='DeltaXYWHBBoxCoder',
36
+ target_means=[.0, .0, .0, .0],
37
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
38
+ loss_cls=dict(
39
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
40
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
41
+ img_roi_head=dict(
42
+ _scope_='mmdet',
43
+ type='StandardRoIHead',
44
+ bbox_roi_extractor=dict(
45
+ type='SingleRoIExtractor',
46
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
47
+ out_channels=256,
48
+ featmap_strides=[4, 8, 16, 32]),
49
+ bbox_head=dict(
50
+ type='Shared2FCBBoxHead',
51
+ in_channels=256,
52
+ fc_out_channels=1024,
53
+ roi_feat_size=7,
54
+ num_classes=10,
55
+ bbox_coder=dict(
56
+ type='DeltaXYWHBBoxCoder',
57
+ target_means=[0., 0., 0., 0.],
58
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
59
+ reg_class_agnostic=False,
60
+ loss_cls=dict(
61
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
62
+ loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
63
+
64
+ # model training and testing settings
65
+ train_cfg=dict(
66
+ _scope_='mmdet',
67
+ img_rpn=dict(
68
+ assigner=dict(
69
+ type='MaxIoUAssigner',
70
+ pos_iou_thr=0.7,
71
+ neg_iou_thr=0.3,
72
+ min_pos_iou=0.3,
73
+ match_low_quality=True,
74
+ ignore_iof_thr=-1),
75
+ sampler=dict(
76
+ type='RandomSampler',
77
+ num=256,
78
+ pos_fraction=0.5,
79
+ neg_pos_ub=-1,
80
+ add_gt_as_proposals=False),
81
+ allowed_border=-1,
82
+ pos_weight=-1,
83
+ debug=False),
84
+ img_rpn_proposal=dict(
85
+ nms_across_levels=False,
86
+ nms_pre=2000,
87
+ nms_post=1000,
88
+ max_per_img=1000,
89
+ nms=dict(type='nms', iou_threshold=0.7),
90
+ min_bbox_size=0),
91
+ img_rcnn=dict(
92
+ assigner=dict(
93
+ type='MaxIoUAssigner',
94
+ pos_iou_thr=0.5,
95
+ neg_iou_thr=0.5,
96
+ min_pos_iou=0.5,
97
+ match_low_quality=False,
98
+ ignore_iof_thr=-1),
99
+ sampler=dict(
100
+ type='RandomSampler',
101
+ num=512,
102
+ pos_fraction=0.25,
103
+ neg_pos_ub=-1,
104
+ add_gt_as_proposals=True),
105
+ pos_weight=-1,
106
+ debug=False)),
107
+ test_cfg=dict(
108
+ img_rpn=dict(
109
+ nms_across_levels=False,
110
+ nms_pre=1000,
111
+ nms_post=1000,
112
+ max_per_img=1000,
113
+ nms=dict(type='nms', iou_threshold=0.7),
114
+ min_bbox_size=0),
115
+ img_rcnn=dict(
116
+ score_thr=0.05,
117
+ nms=dict(type='nms', iou_threshold=0.5),
118
+ max_per_img=100)))