gntmky
/

mm3dtest

Model card Files Files and versions Community

giantmonkeyTC commited on Jun 5, 2024

Commit

34d1f8b

1 Parent(s): 4c86b49

2344

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.circleci/config.yml +35 -0
.circleci/docker/Dockerfile +13 -0
.circleci/test.yml +199 -0
.dev_scripts/benchmark_full_models.txt +26 -0
.dev_scripts/benchmark_options.py +11 -0
.dev_scripts/benchmark_train_models.txt +13 -0
.dev_scripts/covignore.cfg +6 -0
.dev_scripts/diff_coverage_test.sh +42 -0
.dev_scripts/gather_models.py +229 -0
.dev_scripts/gen_benchmark_script.py +193 -0
.dev_scripts/linter.sh +3 -0
.dev_scripts/test_benchmark.sh +128 -0
.dev_scripts/train_benchmark.sh +128 -0
configs/.DS_Store +0 -0
configs/3dssd/3dssd_4xb4_kitti-3d-car.py +119 -0
configs/3dssd/README.md +45 -0
configs/3dssd/metafile.yml +29 -0
configs/_base_/datasets/kitti-3d-3class.py +167 -0
configs/_base_/datasets/kitti-3d-car.py +165 -0
configs/_base_/datasets/kitti-mono3d.py +100 -0
configs/_base_/datasets/lyft-3d-range100.py +150 -0
configs/_base_/datasets/lyft-3d.py +160 -0
configs/_base_/datasets/nuim-instance.py +70 -0
configs/_base_/datasets/nus-3d.py +169 -0
configs/_base_/datasets/nus-mono3d.py +119 -0
configs/_base_/datasets/s3dis-3d.py +134 -0
configs/_base_/datasets/s3dis-seg.py +169 -0
configs/_base_/datasets/scannet-3d.py +141 -0
configs/_base_/datasets/scannet-seg.py +164 -0
configs/_base_/datasets/semantickitti.py +224 -0
configs/_base_/datasets/sunrgbd-3d.py +126 -0
configs/_base_/datasets/waymoD3-fov-mono3d-3class.py +184 -0
configs/_base_/datasets/waymoD3-mv-mono3d-3class.py +191 -0
configs/_base_/datasets/waymoD5-3d-3class.py +178 -0
configs/_base_/datasets/waymoD5-3d-car.py +173 -0
configs/_base_/datasets/waymoD5-fov-mono3d-3class.py +163 -0
configs/_base_/datasets/waymoD5-mv-mono3d-3class.py +163 -0
configs/_base_/datasets/waymoD5-mv3d-3class.py +178 -0
configs/_base_/default_runtime.py +23 -0
configs/_base_/models/3dssd.py +76 -0
configs/_base_/models/cascade-mask-rcnn_r50_fpn.py +199 -0
configs/_base_/models/centerpoint_pillar02_second_secfpn_nus.py +89 -0
configs/_base_/models/centerpoint_voxel01_second_secfpn_nus.py +89 -0
configs/_base_/models/cylinder3d.py +41 -0
configs/_base_/models/dgcnn.py +29 -0
configs/_base_/models/fcaf3d.py +20 -0
configs/_base_/models/fcos3d.py +86 -0
configs/_base_/models/groupfree3d.py +75 -0
configs/_base_/models/h3dnet.py +351 -0
configs/_base_/models/imvotenet.py +118 -0

.circleci/config.yml ADDED Viewed

	@@ -0,0 +1,35 @@

+version: 2.1
+# this allows you to use CircleCI's dynamic configuration feature
+setup: true
+# the path-filtering orb is required to continue a pipeline based on
+# the path of an updated fileset
+orbs:
+  path-filtering: circleci/path-filtering@0.1.2
+workflows:
+  # the always-run workflow is always triggered, regardless of the pipeline parameters.
+  always-run:
+    jobs:
+      # the path-filtering/filter job determines which pipeline
+      # parameters to update.
+      - path-filtering/filter:
+          name: check-updated-files
+          # 3-column, whitespace-delimited mapping. One mapping per
+          # line:
+          # <regex path-to-test> <parameter-to-set> <value-of-pipeline-parameter>
+          mapping: |
+            mmdet3d/.* lint_only false
+            requirements/.* lint_only false
+            tests/.* lint_only false
+            tools/.* lint_only false
+            configs/.* lint_only false
+            .circleci/.* lint_only false
+            projects/.* lint_only false
+          base-revision: dev-1.x
+          # this is the path of the configuration we should trigger once
+          # path filtering and pipeline parameter value updates are
+          # complete. In this case, we are using the parent dynamic
+          # configuration itself.
+          config-path: .circleci/test.yml

.circleci/docker/Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+ARG PYTORCH="1.8.1"
+ARG CUDA="10.2"
+ARG CUDNN="7"
+ARG DEBIAN_FRONTEND=noninteractive
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+# To fix GPG key error when running apt-get update
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
+RUN apt-get update && apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx

.circleci/test.yml ADDED Viewed

	@@ -0,0 +1,199 @@

+version: 2.1
+# the default pipeline parameters, which will be updated according to
+# the results of the path-filtering orb
+parameters:
+  lint_only:
+    type: boolean
+    default: true
+jobs:
+  lint:
+    docker:
+      - image: cimg/python:3.7.4
+    steps:
+      - checkout
+      - run:
+          name: Install pre-commit hook
+          command: |
+            pip install pre-commit
+            pre-commit install
+      - run:
+          name: Linting
+          command: pre-commit run --all-files
+      - run:
+          name: Check docstring coverage
+          command: |
+            pip install interrogate
+            interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-magic --ignore-regex "__repr__" --fail-under 90 mmdet3d
+  build_cpu:
+    parameters:
+      # The python version must match available image tags in
+      # https://circleci.com/developer/images/image/cimg/python
+      python:
+        type: string
+      torch:
+        type: string
+      torchvision:
+        type: string
+    docker:
+      - image: cimg/python:<< parameters.python >>
+    resource_class: large
+    steps:
+      - checkout
+      - run:
+          name: Install Libraries
+          command: |
+            sudo apt-get update
+            sudo apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx libjpeg-dev zlib1g-dev libtinfo-dev libncurses5
+      - run:
+          name: Configure Python & pip
+          command: |
+            pip install --upgrade pip
+            pip install wheel
+      - run:
+          name: Install PyTorch
+          command: pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html
+      - when:
+          condition:
+            equal: ["3.9.0", << parameters.python >>]
+          steps:
+            - run: pip install "protobuf <= 3.20.1" && sudo apt-get update && sudo apt-get -y install libprotobuf-dev protobuf-compiler cmake
+      - run:
+          name: Install mmdet3d dependencies
+          command: |
+            pip install git+ssh://git@github.com/open-mmlab/mmengine.git@main
+            pip install -U openmim
+            mim install 'mmcv >= 2.0.0rc4'
+            pip install git+ssh://git@github.com/open-mmlab/mmdetection.git@dev-3.x
+            pip install -r requirements/tests.txt
+      - run:
+          name: Build and install
+          command: |
+            pip install -e .
+      - run:
+          name: Run unittests
+          command: |
+            coverage run --branch --source mmdet3d -m pytest tests/
+            coverage xml
+            coverage report -m
+  build_cuda:
+    parameters:
+      torch:
+        type: string
+      cuda:
+        type: enum
+        enum: ["10.2", "11.7"]
+      cudnn:
+        type: integer
+        default: 8
+    machine:
+      image: linux-cuda-11:default
+      # docker_layer_caching: true
+    resource_class: gpu.nvidia.small.multi
+    steps:
+      - checkout
+      - run:
+          name: Install nvidia-container-toolkit and Restart Docker
+          command: |
+            sudo apt-get update
+            sudo apt-get install -y nvidia-container-toolkit
+            sudo systemctl restart docker
+      - run:
+          # Cloning repos in VM since Docker doesn't have access to the private key
+          name: Clone Repos
+          command: |
+            git clone -b main --depth 1 ssh://git@github.com/open-mmlab/mmengine.git /home/circleci/mmengine
+            git clone -b dev-3.x --depth 1 ssh://git@github.com/open-mmlab/mmdetection.git /home/circleci/mmdetection
+      - run:
+          name: Build Docker image
+          command: |
+            docker build .circleci/docker -t mmdet3d:gpu --build-arg PYTORCH=<< parameters.torch >> --build-arg CUDA=<< parameters.cuda >> --build-arg CUDNN=<< parameters.cudnn >>
+            docker run --gpus all -t -d -v /home/circleci/project:/mmdetection3d -v /home/circleci/mmengine:/mmengine -v /home/circleci/mmdetection:/mmdetection -w /mmdetection3d --name mmdet3d mmdet3d:gpu
+            docker exec mmdet3d apt-get install -y git
+      - run:
+          name: Install mmdet3d dependencies
+          command: |
+            docker exec mmdet3d pip install -e /mmengine
+            docker exec mmdet3d pip install -U openmim
+            docker exec mmdet3d mim install 'mmcv >= 2.0.0rc4'
+            docker exec mmdet3d pip install -e /mmdetection
+            docker exec mmdet3d pip install -r requirements/tests.txt
+      - run:
+          name: Build and install
+          command: |
+            docker exec mmdet3d pip install -e .
+      - run:
+          name: Run unittests
+          command: |
+            docker exec mmdet3d pytest tests/
+workflows:
+  pr_stage_lint:
+    when: << pipeline.parameters.lint_only >>
+    jobs:
+      - lint:
+          name: lint
+          filters:
+            branches:
+              ignore:
+                - dev-1.x
+  pr_stage_test:
+    when:
+      not: << pipeline.parameters.lint_only >>
+    jobs:
+      - lint:
+          name: lint
+          filters:
+            branches:
+              ignore:
+                - dev-1.x
+      - build_cpu:
+          name: minimum_version_cpu
+          torch: 1.8.1
+          torchvision: 0.9.1
+          python: 3.7.4 # The lowest python 3.7.x version available on CircleCI images
+          requires:
+            - lint
+      - build_cpu:
+          name: maximum_version_cpu
+          torch: 2.0.0
+          torchvision: 0.15.1
+          python: 3.9.0
+          requires:
+            - minimum_version_cpu
+      - hold:
+          type: approval
+          requires:
+            - maximum_version_cpu
+      - build_cuda:
+          name: mainstream_version_gpu
+          torch: 1.8.1
+          # Use double quotation mark to explicitly specify its type
+          # as string instead of number
+          cuda: "10.2"
+          cudnn: 7
+          requires:
+            - hold
+      - build_cuda:
+          name: maximum_version_gpu
+          torch: 2.0.0
+          cuda: "11.7"
+          cudnn: 8
+          requires:
+            - hold
+  merge_stage_test:
+    when:
+      not: << pipeline.parameters.lint_only >>
+    jobs:
+      - build_cuda:
+          name: minimum_version_gpu
+          torch: 1.8.1
+          cuda: "10.2"
+          cudnn: 7
+          filters:
+            branches:
+              only:
+                - dev-1.x

.dev_scripts/benchmark_full_models.txt ADDED Viewed

	@@ -0,0 +1,26 @@

+configs/3dssd/3dssd_4xb4_kitti-3d-car.py
+configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn_8xb4-cyclic-20e_nus-3d.py
+configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
+configs/fcaf3d/fcaf3d_2xb8_s3dis-3d-5class.py
+configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py
+configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py
+configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py
+configs/h3dnet/h3dnet_8xb3_scannet-seg.py
+configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py
+configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py
+configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py
+configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py
+configs/paconv/paconv_ssg_8xb8-cosine-150e_s3dis-seg.py
+configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py
+configs/pgd/pgd_r101-caffe_fpn_head-gn_4xb3-4x_kitti-mono3d.py
+configs/point_rcnn/point-rcnn_8xb2_kitti-3d-3class.py
+configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py
+configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py
+configs/pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py
+configs/pv_rcnn/pv_rcnn_8xb2-80e_kitti-3d-3class.py
+configs/regnet/pointpillars_hv_regnet-1.6gf_fpn_sbn-all_8xb4-2x_nus-3d.py
+configs/second/second_hv_secfpn_8xb6-80e_kitti-3d-3class.py
+configs/second/second_hv_secfpn_8xb6-amp-80e_kitti-3d-3class.py
+configs/smoke/smoke_dla34_dlaneck_gn-all_4xb8-6x_kitti-mono3d.py
+configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_nus-3d.py
+configs/votenet/votenet_8xb8_scannet-3d.py

.dev_scripts/benchmark_options.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+third_part_libs = [
+    'conda install openblas-devel -c anaconda',
+    "pip install -U git+https://github.com/NVIDIA/MinkowskiEngine -v --no-deps --install-option='--blas_include_dirs=/opt/conda/include' --install-option='--blas=openblas'"  # noqa
+]
+default_floating_range = 0.5
+model_floating_ranges = {
+    'configs/pointpillars/pointpillars_hv_secfpn_sbn-all_16xb2-2x_waymoD5-3d-3class.py': # noqa
+    0.7
+}

.dev_scripts/benchmark_train_models.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+configs/3dssd/3dssd_4xb4_kitti-3d-car.py
+configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn_8xb4-cyclic-20e_nus-3d.py
+configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
+configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py
+configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py
+configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py
+configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py
+configs/pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py
+configs/pv_rcnn/pv_rcnn_8xb2-80e_kitti-3d-3class.py
+configs/second/second_hv_secfpn_8xb6-80e_kitti-3d-3class.py
+configs/second/second_hv_secfpn_8xb6-amp-80e_kitti-3d-3class.py
+configs/smoke/smoke_dla34_dlaneck_gn-all_4xb8-6x_kitti-mono3d.py
+configs/votenet/votenet_8xb8_scannet-3d.py

.dev_scripts/covignore.cfg ADDED Viewed

	@@ -0,0 +1,6 @@

+# Each line should be the relative path to the root directory
+# of this repo. Support regular expression as well.
+# For example:
+# .*/utils.py
+.*/__init__.py

.dev_scripts/diff_coverage_test.sh ADDED Viewed

	@@ -0,0 +1,42 @@

+#!/bin/bash
+readarray -t IGNORED_FILES < $( dirname "$0" )/covignore.cfg
+REUSE_COVERAGE_REPORT=${REUSE_COVERAGE_REPORT:-0}
+REPO=${1:-"origin"}
+BRANCH=${2:-"refactor_dev"}
+git fetch $REPO $BRANCH
+PY_FILES=""
+for FILE_NAME in $(git diff --name-only ${REPO}/${BRANCH}); do
+    # Only test python files in mmdet3d/ existing in current branch, and not ignored in covignore.cfg
+    if [ ${FILE_NAME: -3} == ".py" ] && [ ${FILE_NAME:0:8} == "mmdet3d/" ] && [ -f "$FILE_NAME" ]; then
+        IGNORED=false
+        for IGNORED_FILE_NAME in "${IGNORED_FILES[@]}"; do
+            # Skip blank lines
+            if [ -z "$IGNORED_FILE_NAME" ]; then
+                continue
+            fi
+            if [ "${IGNORED_FILE_NAME::1}" != "#" ] && [[ "$FILE_NAME" =~ $IGNORED_FILE_NAME ]]; then
+                echo "Ignoring $FILE_NAME"
+                IGNORED=true
+                break
+            fi
+        done
+        if [ "$IGNORED" = false ]; then
+            PY_FILES="$PY_FILES $FILE_NAME"
+        fi
+    fi
+done
+# Only test the coverage when PY_FILES are not empty, otherwise they will test the entire project
+if [ ! -z "${PY_FILES}" ]
+then
+    if [ "$REUSE_COVERAGE_REPORT" == "0" ]; then
+        coverage run --branch --source mmdet3d -m pytest tests/
+    fi
+    coverage report --fail-under 80 -m $PY_FILES
+    interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-magic --ignore-regex "__repr__" --fail-under 95 $PY_FILES
+fi

.dev_scripts/gather_models.py ADDED Viewed

	@@ -0,0 +1,229 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+"""Script to gather benchmarked models and prepare them for upload.
+Usage:
+python gather_models.py ${root_path} ${out_dir}
+Example:
+python gather_models.py \
+work_dirs/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d \
+work_dirs/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d
+Note that before running the above command, rename the directory with the
+config name if you did not use the default directory name, create
+a corresponding directory 'pgd' under the above path and put the used config
+into it.
+"""
+import argparse
+import glob
+import json
+import shutil
+import subprocess
+from os import path as osp
+import mmengine
+import torch
+# build schedule look-up table to automatically find the final model
+SCHEDULES_LUT = {
+    '_1x_': 12,
+    '_2x_': 24,
+    '_20e_': 20,
+    '_3x_': 36,
+    '_4x_': 48,
+    '_24e_': 24,
+    '_6x_': 73,
+    '_50e_': 50,
+    '_80e_': 80,
+    '_100e_': 100,
+    '_150e_': 150,
+    '_200e_': 200,
+    '_250e_': 250,
+    '_400e_': 400
+}
+# TODO: add support for lyft dataset
+RESULTS_LUT = {
+    'coco': ['bbox_mAP', 'segm_mAP'],
+    'nus': ['pts_bbox_NuScenes/NDS', 'NDS'],
+    'kitti-3d-3class': ['KITTI/Overall_3D_moderate', 'Overall_3D_moderate'],
+    'kitti-3d-car': ['KITTI/Car_3D_moderate_strict', 'Car_3D_moderate_strict'],
+    'lyft': ['score'],
+    'scannet_seg': ['miou'],
+    's3dis_seg': ['miou'],
+    'scannet': ['mAP_0.50'],
+    'sunrgbd': ['mAP_0.50'],
+    'kitti-mono3d': [
+        'img_bbox/KITTI/Car_3D_AP40_moderate_strict',
+        'Car_3D_AP40_moderate_strict'
+    ],
+    'nus-mono3d': ['img_bbox_NuScenes/NDS', 'NDS']
+}
+def get_model_dataset(log_json_path):
+    for key in RESULTS_LUT:
+        if log_json_path.find(key) != -1:
+            return key
+def process_checkpoint(in_file, out_file):
+    checkpoint = torch.load(in_file, map_location='cpu')
+    # remove optimizer for smaller file size
+    if 'optimizer' in checkpoint:
+        del checkpoint['optimizer']
+    # if it is necessary to remove some sensitive data in checkpoint['meta'],
+    # add the code here.
+    torch.save(checkpoint, out_file)
+    sha = subprocess.check_output(['sha256sum', out_file]).decode()
+    final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
+    subprocess.Popen(['mv', out_file, final_file])
+    return final_file
+def get_final_epoch(config):
+    if config.find('grid_rcnn') != -1 and config.find('2x') != -1:
+        # grid_rcnn 2x trains 25 epochs
+        return 25
+    for schedule_name, epoch_num in SCHEDULES_LUT.items():
+        if config.find(schedule_name) != -1:
+            return epoch_num
+def get_best_results(log_json_path):
+    dataset = get_model_dataset(log_json_path)
+    max_dict = dict()
+    max_memory = 0
+    with open(log_json_path, 'r') as f:
+        for line in f.readlines():
+            log_line = json.loads(line)
+            if 'mode' not in log_line.keys():
+                continue
+            # record memory and find best results & epochs
+            if log_line['mode'] == 'train' \
+                    and max_memory <= log_line['memory']:
+                max_memory = log_line['memory']
+            elif log_line['mode'] == 'val':
+                result_dict = {
+                    key: log_line[key]
+                    for key in RESULTS_LUT[dataset] if key in log_line
+                }
+                if len(max_dict) == 0:
+                    max_dict = result_dict
+                    max_dict['epoch'] = log_line['epoch']
+                elif all(
+                    [max_dict[key] <= result_dict[key]
+                     for key in result_dict]):
+                    max_dict.update(result_dict)
+                    max_dict['epoch'] = log_line['epoch']
+        max_dict['memory'] = max_memory
+        return max_dict
+def parse_args():
+    parser = argparse.ArgumentParser(description='Gather benchmarked models')
+    parser.add_argument(
+        'root',
+        type=str,
+        help='root path of benchmarked models to be gathered')
+    parser.add_argument(
+        'out', type=str, help='output path of gathered models to be stored')
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    models_root = args.root
+    models_out = args.out
+    mmengine.mkdir_or_exist(models_out)
+    # find all models in the root directory to be gathered
+    raw_configs = list(mmengine.scandir('./configs', '.py', recursive=True))
+    # filter configs that is not trained in the experiments dir
+    used_configs = []
+    for raw_config in raw_configs:
+        if osp.exists(osp.join(models_root, raw_config)):
+            used_configs.append(raw_config)
+    print(f'Find {len(used_configs)} models to be gathered')
+    # find final_ckpt and log file for trained each config
+    # and parse the best performance
+    model_infos = []
+    for used_config in used_configs:
+        # get logs
+        log_json_path = glob.glob(osp.join(models_root, '*.log.json'))[0]
+        log_txt_path = glob.glob(osp.join(models_root, '*.log'))[0]
+        model_performance = get_best_results(log_json_path)
+        final_epoch = model_performance['epoch']
+        final_model = 'epoch_{}.pth'.format(final_epoch)
+        model_path = osp.join(models_root, final_model)
+        # skip if the model is still training
+        if not osp.exists(model_path):
+            print(f'Expected {model_path} does not exist!')
+            continue
+        if model_performance is None:
+            print(f'Obtained no performance for model {used_config}')
+            continue
+        model_time = osp.split(log_txt_path)[-1].split('.')[0]
+        model_infos.append(
+            dict(
+                config=used_config,
+                results=model_performance,
+                epochs=final_epoch,
+                model_time=model_time,
+                log_json_path=osp.split(log_json_path)[-1]))
+    # publish model for each checkpoint
+    publish_model_infos = []
+    for model in model_infos:
+        model_publish_dir = osp.join(models_out, model['config'].rstrip('.py'))
+        mmengine.mkdir_or_exist(model_publish_dir)
+        model_name = model['config'].split('/')[-1].rstrip(
+            '.py') + '_' + model['model_time']
+        publish_model_path = osp.join(model_publish_dir, model_name)
+        trained_model_path = osp.join(models_root,
+                                      'epoch_{}.pth'.format(model['epochs']))
+        # convert model
+        final_model_path = process_checkpoint(trained_model_path,
+                                              publish_model_path)
+        # copy log
+        shutil.copy(
+            osp.join(models_root, model['log_json_path']),
+            osp.join(model_publish_dir, f'{model_name}.log.json'))
+        shutil.copy(
+            osp.join(models_root, model['log_json_path'].rstrip('.json')),
+            osp.join(model_publish_dir, f'{model_name}.log'))
+        # copy config to guarantee reproducibility
+        config_path = model['config']
+        config_path = osp.join(
+            'configs',
+            config_path) if 'configs' not in config_path else config_path
+        target_cconfig_path = osp.split(config_path)[-1]
+        shutil.copy(config_path,
+                    osp.join(model_publish_dir, target_cconfig_path))
+        model['model_path'] = final_model_path
+        publish_model_infos.append(model)
+    models = dict(models=publish_model_infos)
+    print(f'Totally gathered {len(publish_model_infos)} models')
+    mmengine.dump(models, osp.join(models_out, 'model_info.json'))
+if __name__ == '__main__':
+    main()

.dev_scripts/gen_benchmark_script.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import argparse
+import re
+from os import path as osp
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Generate benchmark training/testing scripts')
+    parser.add_argument(
+        '--input_file',
+        required=False,
+        type=str,
+        help='Input file containing the paths '
+        'of configs to be trained/tested.')
+    parser.add_argument(
+        '--output_file',
+        required=True,
+        type=str,
+        help='Output file containing the '
+        'commands to train/test selected models.')
+    parser.add_argument(
+        '--gpus_per_node',
+        type=int,
+        default=8,
+        help='GPUs per node config for slurm, '
+        'should be set according to your slurm environment')
+    parser.add_argument(
+        '--cpus_per_task',
+        type=int,
+        default=5,
+        help='CPUs per task config for slurm, '
+        'should be set according to your slurm environment')
+    parser.add_argument(
+        '--gpus',
+        type=int,
+        default=8,
+        help='Totally used num of GPUs config for slurm (in testing), '
+        'should be set according to your slurm environment')
+    parser.add_argument(
+        '--mode', type=str, default='train', help='Train or test')
+    parser.add_argument(
+        '--long_work_dir',
+        action='store_true',
+        help='Whether use full relative path of config as work dir')
+    parser.add_argument(
+        '--max_keep_ckpts',
+        type=int,
+        default=1,
+        help='The max number of checkpoints saved in training')
+    parser.add_argument(
+        '--full_log',
+        action='store_true',
+        help='Whether save full log in a file')
+    args = parser.parse_args()
+    return args
+args = parse_args()
+assert args.mode in ['train', 'test'], 'Currently we only support ' \
+    'automatically generating training or testing scripts.'
+config_paths = []
+if args.input_file is not None:
+    with open(args.input_file, 'r') as fi:
+        config_paths = fi.read().strip().split('\n')
+else:
+    while True:
+        print('Please type a config path and '
+              'press enter (press enter directly to exit):')
+        config_path = input()
+        if config_path != '':
+            config_paths.append(config_path)
+        else:
+            break
+script = '''PARTITION=$1
+CHECKPOINT_DIR=$2
+'''
+if args.mode == 'train':
+    for i, config_path in enumerate(config_paths):
+        root_dir = osp.dirname(osp.dirname(osp.abspath(__file__)))
+        if not osp.exists(osp.join(root_dir, config_path)):
+            print(f'Invalid config path (does not exist):\n{config_path}')
+            continue
+        config_name = config_path.split('/')[-1][:-3]
+        match_obj = re.match(r'^.*_[0-9]+x([0-9]+)_.*$', config_name)
+        if match_obj is None:
+            print(f'Invalid config path (no GPU num in '
+                  f'config name):\n{config_path}')
+            continue
+        gpu_num = int(match_obj.group(1))
+        work_dir_name = config_path if args.long_work_dir else config_name
+        script += f"echo '{config_path}' &\n"
+        if args.full_log:
+            script += f'mkdir -p $CHECKPOINT_DIR/{work_dir_name}\n'
+        # training commands
+        script += f'GPUS={gpu_num} GPUS_PER_NODE={args.gpus_per_node} ' \
+                  f'CPUS_PER_TASK={args.cpus_per_task} ' \
+                  f'./tools/slurm_train.sh $PARTITION {config_name} ' \
+                  f'{config_path} \\\n'
+        script += f'$CHECKPOINT_DIR/{work_dir_name} --cfg-options ' \
+                  f'checkpoint_config.max_keep_ckpts=' \
+                  f'{args.max_keep_ckpts} \\\n' \
+        # if output full log, redirect stdout and stderr to
+        # another log file in work dir
+        if args.full_log:
+            script += f'2>&1|tee $CHECKPOINT_DIR/{work_dir_name}' \
+                      f'/FULL_LOG.txt &\n'
+        else:
+            script += '>/dev/null &\n'
+        if i != len(config_paths) - 1:
+            script += '\n'
+        print(f'Successfully generated script for {config_name}')
+    with open(args.output_file, 'w') as fo:
+        fo.write(script)
+elif args.mode == 'test':
+    for i, config_path in enumerate(config_paths):
+        root_dir = osp.dirname(osp.dirname(osp.abspath(__file__)))
+        if not osp.exists(osp.join(root_dir, config_path)):
+            print(f'Invalid config path (does not exist):\n{config_path}')
+            continue
+        config_name = config_path.split('/')[-1][:-3]
+        tasks = {
+            'scannet_seg', 'scannet', 's3dis_seg', 'sunrgbd', 'kitti', 'nus',
+            'lyft', 'waymo'
+        }
+        eval_option = None
+        for task in tasks:
+            if task in config_name:
+                eval_option = task
+                break
+        if eval_option is None:
+            print(f'Invalid config path (invalid task):\n{config_path}')
+            continue
+        work_dir_name = config_path if args.long_work_dir else config_name
+        script += f"echo '{config_path}' &\n"
+        if args.full_log:
+            script += f'mkdir -p $CHECKPOINT_DIR/{work_dir_name}\n'
+        # training commands
+        script += f'GPUS={args.gpus} GPUS_PER_NODE={args.gpus_per_node} ' \
+                  f'CPUS_PER_TASK={args.cpus_per_task} ' \
+                  f'./tools/slurm_test.sh $PARTITION {config_name} ' \
+                  f'{config_path} \\\n'
+        script += f'$CHECKPOINT_DIR/{work_dir_name}/latest.pth ' \
+        if eval_option in ['scannet_seg', 's3dis_seg']:
+            script += '--eval mIoU \\\n'
+        elif eval_option in ['scannet', 'sunrgbd', 'kitti', 'nus']:
+            script += '--eval map \\\n'
+        elif eval_option in ['lyft']:
+            script += f'--format-only --eval-options jsonfile_prefix=' \
+                      f'$CHECKPOINT_DIR/{work_dir_name}/results_challenge ' \
+                      f'csv_savepath=$CHECKPOINT_DIR/{work_dir_name}/' \
+                      f'results_challenge.csv \\\n'
+        elif eval_option in ['waymo']:
+            script += f'--eval waymo --eval-options pklfile_prefix=' \
+                      f'$CHECKPOINT_DIR/{work_dir_name}/kitti_results ' \
+                      f'submission_prefix=$CHECKPOINT_DIR/{work_dir_name}/' \
+                      f'kitti_results \\\n'
+        # if output full log, redirect stdout and stderr to
+        # another log file in work dir
+        if args.full_log:
+            script += f'2>&1|tee $CHECKPOINT_DIR/{work_dir_name}' \
+                      f'/FULL_LOG.txt &\n'
+        else:
+            script += '>/dev/null &\n'
+        if i != len(config_paths) - 1:
+            script += '\n'
+        print(f'Successfully generated script for {config_name}')
+    with open(args.output_file, 'w') as fo:
+        fo.write(script)

.dev_scripts/linter.sh ADDED Viewed

	@@ -0,0 +1,3 @@

+yapf -r -i mmdet3d/ configs/ tests/ tools/
+isort mmdet3d/ configs/ tests/ tools/
+flake8 .

.dev_scripts/test_benchmark.sh ADDED Viewed

	@@ -0,0 +1,128 @@

+PARTITION=$1
+CHECKPOINT_DIR=$2
+echo 'configs/3dssd/3dssd_4xb4_kitti-3d-car.py' &
+mkdir -p $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION 3dssd_4x4_kitti-3d-car configs/3dssd/3dssd_4xb4_kitti-3d-car.py \
+$CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py/FULL_LOG.txt &
+echo 'configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION centerpoint_02pillar_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py \
+$CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py/FULL_LOG.txt &
+echo 'configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py' &
+mkdir -p $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py \
+$CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py/FULL_LOG.txt &
+echo 'configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py \
+$CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py/FULL_LOG.txt &
+echo 'configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py' &
+mkdir -p $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py \
+$CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py/FULL_LOG.txt &
+echo 'configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py \
+$CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py/FULL_LOG.txt &
+echo 'configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py' &
+mkdir -p $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION groupfree3d_8x4_scannet-3d-18class-L6-O256 configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py \
+$CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py/FULL_LOG.txt &
+echo 'configs/h3dnet/h3dnet_8xb3_scannet-seg.py' &
+mkdir -p $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION h3dnet_3x8_scannet-3d-18class configs/h3dnet/h3dnet_8xb3_scannet-seg.py \
+$CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py/FULL_LOG.txt &
+echo 'configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py \
+$CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py/FULL_LOG.txt &
+echo 'configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION imvotenet_stage2_16x8_sunrgbd-3d-10class configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py \
+$CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py/FULL_LOG.txt &
+echo 'configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py' &
+mkdir -p $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION imvoxelnet_4x8_kitti-3d-car configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py \
+$CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py/FULL_LOG.txt &
+echo 'configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py' &
+mkdir -p $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py \
+$CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py/FULL_LOG.txt &
+echo 'configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py' &
+mkdir -p $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py \
+$CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py/FULL_LOG.txt &
+echo 'configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py' &
+mkdir -p $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION pointnet2_msg_16x2_cosine_80e_s3dis_seg-3d-13class configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py \
+$CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py/latest.pth --eval mIoU \
+2>&1|tee $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py/FULL_LOG.txt &
+echo 'configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py' &
+mkdir -p $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION pointnet2_msg_16x2_cosine_250e_scannet_seg-3d-20class configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py \
+$CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py/FULL_LOG.txt &
+echo 'configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_pointpillars_fpn_sbn-all_2x8_2x_lyft-3d configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py \
+$CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/latest.pth --format-only --eval-options jsonfile_prefix=$CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/results_challenge csv_savepath=$CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/results_challenge.csv \
+2>&1|tee $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/FULL_LOG.txt &
+echo 'configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py' &
+mkdir -p $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py \
+$CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/latest.pth --eval waymo --eval-options pklfile_prefix=$CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/kitti_results submission_prefix=$CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/kitti_results \
+2>&1|tee $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/FULL_LOG.txt &
+echo 'configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py \
+$CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py/FULL_LOG.txt &
+echo 'configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py' &
+mkdir -p $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_second_secfpn_6x8_80e_kitti-3d-3class configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py \
+$CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py/FULL_LOG.txt &
+echo 'configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py \
+$CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/latest.pth --format-only --eval-options jsonfile_prefix=$CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/results_challenge csv_savepath=$CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/results_challenge.csv \
+2>&1|tee $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/FULL_LOG.txt &
+echo 'configs/votenet/votenet_8xb8_scannet-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_test.sh $PARTITION votenet_8x8_scannet-3d-18class configs/votenet/votenet_8xb8_scannet-3d.py \
+$CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py/latest.pth --eval map \
+2>&1|tee $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py/FULL_LOG.txt &

.dev_scripts/train_benchmark.sh ADDED Viewed

	@@ -0,0 +1,128 @@

+PARTITION=$1
+CHECKPOINT_DIR=$2
+echo 'configs/3dssd/3dssd_4xb4_kitti-3d-car.py' &
+mkdir -p $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py
+GPUS=4 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION 3dssd_4x4_kitti-3d-car configs/3dssd/3dssd_4xb4_kitti-3d-car.py \
+$CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/3dssd/3dssd_4xb4_kitti-3d-car.py/FULL_LOG.txt &
+echo 'configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION centerpoint_02pillar_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py \
+$CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py/FULL_LOG.txt &
+echo 'configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py' &
+mkdir -p $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py \
+$CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/dynamic_voxelization/second_dv_secfpn_8xb2-cosine-80e_kitti-3d-3class.py/FULL_LOG.txt &
+echo 'configs/fcos3d/fcos3d_r101-caffe-dcn_fpn_head-gn_8xb2-1x_nus-mono3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py \
+$CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/fcos3d/fcos3d_r101-caffe-fpn-head-gn-dcn_8xb2-1x_nus-mono3d.py/FULL_LOG.txt &
+echo 'configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py' &
+mkdir -p $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py \
+$CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/second/hv_second_secfpn_fp16_6x8_80e_kitti-3d-3class.py/FULL_LOG.txt &
+echo 'configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_pointpillars_regnet-1.6gf_fpn_sbn-all_free-anchor_strong-aug_4x8_3x_nus-3d configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py \
+$CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/free_anchor/pointpillars_hv_regnet-1.6gf_fpn_head-free-anchor_sbn-all_8xb4-strong-aug-3x_nus-3d.py/FULL_LOG.txt &
+echo 'configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py' &
+mkdir -p $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py
+GPUS=4 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION groupfree3d_8x4_scannet-3d-18class-L6-O256 configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py \
+$CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/groupfree3d/groupfree3d_head-L6-O256_4xb8_scannet-seg.py/FULL_LOG.txt &
+echo 'configs/h3dnet/h3dnet_8xb3_scannet-seg.py' &
+mkdir -p $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION h3dnet_3x8_scannet-3d-18class configs/h3dnet/h3dnet_8xb3_scannet-seg.py \
+$CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/h3dnet/h3dnet_8xb3_scannet-seg.py/FULL_LOG.txt &
+echo 'configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py
+GPUS=4 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py \
+$CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/imvotenet/imvotenet_faster-rcnn-r50_fpn_4xb2_sunrgbd-3d.py/FULL_LOG.txt &
+echo 'configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION imvotenet_stage2_16x8_sunrgbd-3d-10class configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py \
+$CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/imvotenet/imvotenet_stage2_8xb16_sunrgbd-3d.py/FULL_LOG.txt &
+echo 'configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py' &
+mkdir -p $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION imvoxelnet_4x8_kitti-3d-car configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py \
+$CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py/FULL_LOG.txt &
+echo 'configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py' &
+mkdir -p $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py \
+$CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py/FULL_LOG.txt &
+echo 'configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py' &
+mkdir -p $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py \
+$CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/parta2/parta2_hv_secfpn_8xb2-cyclic-80e_kitti-3d-3class.py/FULL_LOG.txt &
+echo 'configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py' &
+mkdir -p $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py
+GPUS=2 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION pointnet2_msg_16x2_cosine_80e_s3dis_seg-3d-13class configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py \
+$CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-80e_s3dis-seg.py/FULL_LOG.txt &
+echo 'configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py' &
+mkdir -p $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py
+GPUS=2 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION pointnet2_msg_16x2_cosine_250e_scannet_seg-3d-20class configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py \
+$CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/pointnet2/pointnet2_msg_2xb16-cosine-250e_scannet-seg.py/FULL_LOG.txt &
+echo 'configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_pointpillars_fpn_sbn-all_2x8_2x_lyft-3d configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py \
+$CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py/FULL_LOG.txt &
+echo 'configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py' &
+mkdir -p $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py
+GPUS=16 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py \
+$CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/pointpillars/hv_pointpillars_secfpn_sbn_2x16_2x_waymoD5-3d-3class.py/FULL_LOG.txt &
+echo 'configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py \
+$CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d.py/FULL_LOG.txt &
+echo 'configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py' &
+mkdir -p $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_second_secfpn_6x8_80e_kitti-3d-3class configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py \
+$CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py/FULL_LOG.txt &
+echo 'configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py
+GPUS=16 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION hv_ssn_secfpn_sbn-all_2x16_2x_lyft-3d configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py \
+$CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/ssn/ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py/FULL_LOG.txt &
+echo 'configs/votenet/votenet_8xb8_scannet-3d.py' &
+mkdir -p $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py
+GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=5 ./tools/slurm_train.sh $PARTITION votenet_8x8_scannet-3d-18class configs/votenet/votenet_8xb8_scannet-3d.py \
+$CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py --cfg-options checkpoint_config.max_keep_ckpts=1 \
+2>&1|tee $CHECKPOINT_DIR/configs/votenet/votenet_8xb8_scannet-3d.py/FULL_LOG.txt &

configs/.DS_Store ADDED Viewed

Binary file (12.3 kB). View file

configs/3dssd/3dssd_4xb4_kitti-3d-car.py ADDED Viewed

	@@ -0,0 +1,119 @@

+_base_ = [
+    '../_base_/models/3dssd.py', '../_base_/datasets/kitti-3d-car.py',
+    '../_base_/default_runtime.py'
+]
+# dataset settings
+dataset_type = 'KittiDataset'
+data_root = 'data/kitti/'
+class_names = ['Car']
+point_cloud_range = [0, -40, -5, 70, 40, 3]
+input_modality = dict(use_lidar=True, use_camera=False)
+backend_args = None
+db_sampler = dict(
+    data_root=data_root,
+    info_path=data_root + 'kitti_dbinfos_train.pkl',
+    rate=1.0,
+    prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
+    classes=class_names,
+    sample_groups=dict(Car=15),
+    points_loader=dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,
+        use_dim=4,
+        backend_args=backend_args),
+    backend_args=backend_args)
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,
+        use_dim=4,
+        backend_args=backend_args),
+    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectSample', db_sampler=db_sampler),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(
+        type='ObjectNoise',
+        num_try=100,
+        translation_std=[1.0, 1.0, 0],
+        global_rot_range=[0.0, 0.0],
+        rot_range=[-1.0471975511965976, 1.0471975511965976]),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.78539816, 0.78539816],
+        scale_ratio_range=[0.9, 1.1]),
+    # 3DSSD can get a higher performance without this transform
+    # dict(type='BackgroundPointsFilter', bbox_enlarge_range=(0.5, 2.0, 0.5)),
+    dict(type='PointSample', num_points=16384),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,
+        use_dim=4,
+        backend_args=backend_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+            dict(type='PointSample', num_points=16384),
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+train_dataloader = dict(
+    batch_size=4, dataset=dict(dataset=dict(pipeline=train_pipeline, )))
+test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+# model settings
+model = dict(
+    bbox_head=dict(
+        num_classes=1,
+        bbox_coder=dict(
+            type='AnchorFreeBBoxCoder', num_dir_bins=12, with_rot=True)))
+# optimizer
+lr = 0.002  # max learning rate
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='AdamW', lr=lr, weight_decay=0.),
+    clip_grad=dict(max_norm=35, norm_type=2),
+)
+# training schedule for 1x
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=2)
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+# learning rate
+param_scheduler = [
+    dict(
+        type='MultiStepLR',
+        begin=0,
+        end=80,
+        by_epoch=True,
+        milestones=[45, 60],
+        gamma=0.1)
+]

configs/3dssd/README.md ADDED Viewed

	@@ -0,0 +1,45 @@

+# 3DSSD: Point-based 3D Single Stage Object Detector
+> [3DSSD: Point-based 3D Single Stage Object Detector](https://arxiv.org/abs/2002.10187)
+<!-- [ALGORITHM] -->
+## Abstract
+Currently, there have been many kinds of voxel-based 3D single stage detectors, while point-based single stage methods are still underexplored. In this paper, we first present a lightweight and effective point-based 3D single stage object detector, named 3DSSD, achieving a good balance between accuracy and efficiency. In this paradigm, all upsampling layers and refinement stage, which are indispensable in all existing point-based methods, are abandoned to reduce the large computation cost. We novelly propose a fusion sampling strategy in downsampling process to make detection on less representative points feasible. A delicate box prediction network including a candidate generation layer, an anchor-free regression head with a 3D center-ness assignment strategy is designed to meet with our demand of accuracy and speed. Our paradigm is an elegant single stage anchor-free framework, showing great superiority to other existing methods. We evaluate 3DSSD on widely used KITTI dataset and more challenging nuScenes dataset. Our method outperforms all state-of-the-art voxel-based single stage methods by a large margin, and has comparable performance to two stage point-based methods as well, with inference speed more than 25 FPS, 2x faster than former state-of-the-art point-based methods.
+<div align=center>
+<img src="https://user-images.githubusercontent.com/30491025/143854187-54ed1257-a046-4764-81cd-d2c8404137d3.png" width="800"/>
+</div>
+## Introduction
+We implement 3DSSD and provide the results and checkpoints on KITTI datasets.
+Some settings in our implementation are different from the [official implementation](https://github.com/Jia-Research-Lab/3DSSD), which bring marginal differences to the performance on KITTI datasets in our experiments. To simplify and unify the models of our implementation, we skip them in our models. These differences are listed as below:
+1. We keep the scenes without any object while the official code skips these scenes in training. In the official implementation, only 3229 and 3394 samples are used as training and validation sets, respectively. In our implementation, we keep using 3712 and 3769 samples as training and validation sets, respectively, as those used for all the other models in our implementation on KITTI datasets.
+2. We do not modify the decay of `batch normalization` during training.
+3. While using [`DataBaseSampler`](https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/datasets/pipelines/dbsampler.py#L80) for data augmentation, the official code uses road planes as reference to place the sampled objects while we do not.
+4. We perform detection using LIDAR coordinates while the official code uses camera coordinates.
+## Results and models
+### KITTI
+|                    Backbone                    | Class | Lr schd | Mem (GB) | Inf time (fps) |           mAP            |                                                                                                                                                Download                                                                                                                                                |
+| :--------------------------------------------: | :---: | :-----: | :------: | :------------: | :----------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| [PointNet2SAMSG](./3dssd_4xb4_kitti-3d-car.py) |  Car  |   72e   |   4.7    |                | 78.58(81.27)<sup>1</sup> | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/3dssd/3dssd_4x4_kitti-3d-car/3dssd_4x4_kitti-3d-car_20210818_203828-b89c8fc4.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/3dssd/3dssd_4x4_kitti-3d-car/3dssd_4x4_kitti-3d-car_20210818_203828.log.json) |
+\[1\]: We report two different 3D object detection performance here. 78.58mAP is evaluated by our evaluation code and 81.27mAP is evaluated by the official development kit （so as that used in the paper and official code of 3DSSD ）. We found that the commonly used Python implementation of [`rotate_iou`](https://github.com/traveller59/second.pytorch/blob/e42e4a0e17262ab7d180ee96a0a36427f2c20a44/second/core/non_max_suppression/nms_gpu.py#L605) which is used in our KITTI dataset evaluation, is different from the official implementation in [KITTI benchmark](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d).
+## Citation
+```latex
+@inproceedings{yang20203dssd,
+    author = {Zetong Yang and Yanan Sun and Shu Liu and Jiaya Jia},
+    title = {3DSSD: Point-based 3D Single Stage Object Detector},
+    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+    year = {2020}
+}
+```

configs/3dssd/metafile.yml ADDED Viewed

	@@ -0,0 +1,29 @@

+Collections:
+  - Name: 3DSSD
+    Metadata:
+      Training Data: KITTI
+      Training Techniques:
+        - AdamW
+      Training Resources: 4x TITAN X
+      Architecture:
+        - PointNet++
+    Paper:
+      URL: https://arxiv.org/abs/2002.10187
+      Title: '3DSSD: Point-based 3D Single Stage Object Detector'
+    README: configs/3dssd/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/models/detectors/ssd3dnet.py#L7
+      Version: v0.6.0
+Models:
+  - Name: 3dssd_4x4_kitti-3d-car
+    In Collection: 3DSSD
+    Config: configs/3dssd/3dssd_4xb4_kitti-3d-car.py
+    Metadata:
+      Training Memory (GB): 4.7
+    Results:
+      - Task: 3D Object Detection
+        Dataset: KITTI
+        Metrics:
+          mAP: 78.58
+    Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/3dssd/3dssd_4x4_kitti-3d-car/3dssd_4x4_kitti-3d-car_20210818_203828-b89c8fc4.pth

configs/_base_/datasets/kitti-3d-3class.py ADDED Viewed

	@@ -0,0 +1,167 @@

+# dataset settings
+dataset_type = 'KittiDataset'
+data_root = 'data/kitti/'
+class_names = ['Pedestrian', 'Cyclist', 'Car']
+point_cloud_range = [0, -40, -3, 70.4, 40, 1]
+input_modality = dict(use_lidar=True, use_camera=False)
+metainfo = dict(classes=class_names)
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/kitti/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+db_sampler = dict(
+    data_root=data_root,
+    info_path=data_root + 'kitti_dbinfos_train.pkl',
+    rate=1.0,
+    prepare=dict(
+        filter_by_difficulty=[-1],
+        filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
+    classes=class_names,
+    sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6),
+    points_loader=dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,
+        use_dim=4,
+        backend_args=backend_args),
+    backend_args=backend_args)
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,  # x, y, z, intensity
+        use_dim=4,
+        backend_args=backend_args),
+    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
+    dict(type='ObjectSample', db_sampler=db_sampler),
+    dict(
+        type='ObjectNoise',
+        num_try=100,
+        translation_std=[1.0, 1.0, 0.5],
+        global_rot_range=[0.0, 0.0],
+        rot_range=[-0.78539816, 0.78539816]),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.78539816, 0.78539816],
+        scale_ratio_range=[0.95, 1.05]),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,
+        use_dim=4,
+        backend_args=backend_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range)
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+eval_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,
+        use_dim=4,
+        backend_args=backend_args),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+train_dataloader = dict(
+    batch_size=6,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type='RepeatDataset',
+        times=2,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='kitti_infos_train.pkl',
+            data_prefix=dict(pts='training/velodyne_reduced'),
+            pipeline=train_pipeline,
+            modality=input_modality,
+            test_mode=False,
+            metainfo=metainfo,
+            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+            # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+            box_type_3d='LiDAR',
+            backend_args=backend_args)))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(pts='training/velodyne_reduced'),
+        ann_file='kitti_infos_val.pkl',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(pts='training/velodyne_reduced'),
+        ann_file='kitti_infos_val.pkl',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+val_evaluator = dict(
+    type='KittiMetric',
+    ann_file=data_root + 'kitti_infos_val.pkl',
+    metric='bbox',
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/_base_/datasets/kitti-3d-car.py ADDED Viewed

	@@ -0,0 +1,165 @@

+# dataset settings
+dataset_type = 'KittiDataset'
+data_root = 'data/kitti/'
+class_names = ['Car']
+point_cloud_range = [0, -40, -3, 70.4, 40, 1]
+input_modality = dict(use_lidar=True, use_camera=False)
+metainfo = dict(classes=class_names)
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/kitti/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+db_sampler = dict(
+    data_root=data_root,
+    info_path=data_root + 'kitti_dbinfos_train.pkl',
+    rate=1.0,
+    prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
+    classes=class_names,
+    sample_groups=dict(Car=15),
+    points_loader=dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,
+        use_dim=4,
+        backend_args=backend_args),
+    backend_args=backend_args)
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,  # x, y, z, intensity
+        use_dim=4,
+        backend_args=backend_args),
+    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
+    dict(type='ObjectSample', db_sampler=db_sampler),
+    dict(
+        type='ObjectNoise',
+        num_try=100,
+        translation_std=[1.0, 1.0, 0.5],
+        global_rot_range=[0.0, 0.0],
+        rot_range=[-0.78539816, 0.78539816]),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.78539816, 0.78539816],
+        scale_ratio_range=[0.95, 1.05]),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,
+        use_dim=4,
+        backend_args=backend_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range)
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+eval_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,
+        use_dim=4,
+        backend_args=backend_args),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+train_dataloader = dict(
+    batch_size=6,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type='RepeatDataset',
+        times=2,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='kitti_infos_train.pkl',
+            data_prefix=dict(pts='training/velodyne_reduced'),
+            pipeline=train_pipeline,
+            modality=input_modality,
+            test_mode=False,
+            metainfo=metainfo,
+            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+            # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+            box_type_3d='LiDAR',
+            backend_args=backend_args)))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(pts='training/velodyne_reduced'),
+        ann_file='kitti_infos_val.pkl',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(pts='training/velodyne_reduced'),
+        ann_file='kitti_infos_val.pkl',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+val_evaluator = dict(
+    type='KittiMetric',
+    ann_file=data_root + 'kitti_infos_val.pkl',
+    metric='bbox',
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/_base_/datasets/kitti-mono3d.py ADDED Viewed

	@@ -0,0 +1,100 @@

+dataset_type = 'KittiDataset'
+data_root = 'data/kitti/'
+class_names = ['Pedestrian', 'Cyclist', 'Car']
+input_modality = dict(use_lidar=False, use_camera=True)
+metainfo = dict(classes=class_names)
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/kitti/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+train_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox=True,
+        with_label=True,
+        with_attr_label=False,
+        with_bbox_3d=True,
+        with_label_3d=True,
+        with_bbox_depth=True),
+    dict(type='Resize', scale=(1242, 375), keep_ratio=True),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(
+        type='Pack3DDetInputs',
+        keys=[
+            'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
+            'gt_labels_3d', 'centers_2d', 'depths'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(type='Resize', scale=(1242, 375), keep_ratio=True),
+    dict(type='Pack3DDetInputs', keys=['img'])
+]
+eval_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(type='Pack3DDetInputs', keys=['img'])
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='kitti_infos_train.pkl',
+        data_prefix=dict(img='training/image_2'),
+        pipeline=train_pipeline,
+        modality=input_modality,
+        load_type='fov_image_based',
+        test_mode=False,
+        metainfo=metainfo,
+        # we use box_type_3d='Camera' in monocular 3d
+        # detection task
+        box_type_3d='Camera',
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(img='training/image_2'),
+        ann_file='kitti_infos_val.pkl',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        load_type='fov_image_based',
+        metainfo=metainfo,
+        test_mode=True,
+        box_type_3d='Camera',
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type='KittiMetric',
+    ann_file=data_root + 'kitti_infos_val.pkl',
+    metric='bbox',
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/_base_/datasets/lyft-3d-range100.py ADDED Viewed

	@@ -0,0 +1,150 @@

+# If point cloud range is changed, the models should also change their point
+# cloud range accordingly
+point_cloud_range = [-100, -100, -5, 100, 100, 3]
+# For Lyft we usually do 9-class detection
+class_names = [
+    'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
+    'bicycle', 'pedestrian', 'animal'
+]
+dataset_type = 'LyftDataset'
+data_root = 'data/lyft/'
+data_prefix = dict(pts='v1.01-train/lidar', img='', sweeps='v1.01-train/lidar')
+# Input modality for Lyft dataset, this is consistent with the submission
+# format which requires the information in input_modality.
+input_modality = dict(
+    use_lidar=True,
+    use_camera=False,
+    use_radar=False,
+    use_map=False,
+    use_external=False)
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/lyft/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(
+        type='LoadPointsFromMultiSweeps',
+        sweeps_num=10,
+        backend_args=backend_args),
+    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.3925, 0.3925],
+        scale_ratio_range=[0.95, 1.05],
+        translation_std=[0, 0, 0]),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(
+        type='LoadPointsFromMultiSweeps',
+        sweeps_num=10,
+        backend_args=backend_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+eval_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(
+        type='LoadPointsFromMultiSweeps',
+        sweeps_num=10,
+        backend_args=backend_args),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='lyft_infos_train.pkl',
+        pipeline=train_pipeline,
+        metainfo=dict(classes=class_names),
+        modality=input_modality,
+        data_prefix=data_prefix,
+        test_mode=False,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='lyft_infos_val.pkl',
+        pipeline=test_pipeline,
+        metainfo=dict(classes=class_names),
+        modality=input_modality,
+        test_mode=True,
+        data_prefix=data_prefix,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type='LyftMetric',
+    data_root=data_root,
+    ann_file='lyft_infos_val.pkl',
+    metric='bbox',
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/_base_/datasets/lyft-3d.py ADDED Viewed

	@@ -0,0 +1,160 @@

+# If point cloud range is changed, the models should also change their point
+# cloud range accordingly
+point_cloud_range = [-80, -80, -5, 80, 80, 3]
+# For Lyft we usually do 9-class detection
+class_names = [
+    'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
+    'bicycle', 'pedestrian', 'animal'
+]
+dataset_type = 'LyftDataset'
+data_root = 'data/lyft/'
+# Input modality for Lyft dataset, this is consistent with the submission
+# format which requires the information in input_modality.
+input_modality = dict(use_lidar=True, use_camera=False)
+data_prefix = dict(pts='v1.01-train/lidar', img='', sweeps='v1.01-train/lidar')
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/lyft/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(
+        type='LoadPointsFromMultiSweeps',
+        sweeps_num=10,
+        backend_args=backend_args),
+    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.3925, 0.3925],
+        scale_ratio_range=[0.95, 1.05],
+        translation_std=[0, 0, 0]),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(
+        type='LoadPointsFromMultiSweeps',
+        sweeps_num=10,
+        backend_args=backend_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range)
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+eval_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(
+        type='LoadPointsFromMultiSweeps',
+        sweeps_num=10,
+        backend_args=backend_args),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='lyft_infos_train.pkl',
+        pipeline=train_pipeline,
+        metainfo=dict(classes=class_names),
+        modality=input_modality,
+        data_prefix=data_prefix,
+        test_mode=False,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='lyft_infos_val.pkl',
+        pipeline=test_pipeline,
+        metainfo=dict(classes=class_names),
+        modality=input_modality,
+        data_prefix=data_prefix,
+        test_mode=True,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='lyft_infos_val.pkl',
+        pipeline=test_pipeline,
+        metainfo=dict(classes=class_names),
+        modality=input_modality,
+        test_mode=True,
+        data_prefix=data_prefix,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+val_evaluator = dict(
+    type='LyftMetric',
+    data_root=data_root,
+    ann_file='lyft_infos_val.pkl',
+    metric='bbox',
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/_base_/datasets/nuim-instance.py ADDED Viewed

	@@ -0,0 +1,70 @@

+dataset_type = 'CocoDataset'
+data_root = 'data/nuimages/'
+class_names = [
+    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
+    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
+]
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/nuimages/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+train_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(
+        type='Resize',
+        img_scale=[(1280, 720), (1920, 1080)],
+        multiscale_mode='range',
+        keep_ratio=True),
+    dict(type='RandomFlip', flip_ratio=0.5),
+    dict(type='PackDetInputs'),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFile', backend_args=backend_args),
+    dict(
+        type='MultiScaleFlipAug',
+        img_scale=(1600, 900),
+        flip=False,
+        transforms=[
+            dict(type='Resize', keep_ratio=True),
+            dict(type='RandomFlip'),
+        ]),
+    dict(
+        type='PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor')),
+]
+data = dict(
+    samples_per_gpu=2,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/nuimages_v1.0-train.json',
+        img_prefix=data_root,
+        classes=class_names,
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
+        img_prefix=data_root,
+        classes=class_names,
+        pipeline=test_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
+        img_prefix=data_root,
+        classes=class_names,
+        pipeline=test_pipeline))
+evaluation = dict(metric=['bbox', 'segm'])

configs/_base_/datasets/nus-3d.py ADDED Viewed

	@@ -0,0 +1,169 @@

+# If point cloud range is changed, the models should also change their point
+# cloud range accordingly
+point_cloud_range = [-50, -50, -5, 50, 50, 3]
+# Using calibration info convert the Lidar-coordinate point cloud range to the
+# ego-coordinate point cloud range could bring a little promotion in nuScenes.
+# point_cloud_range = [-50, -50.8, -5, 50, 49.2, 3]
+# For nuScenes we usually do 10-class detection
+class_names = [
+    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
+    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
+]
+metainfo = dict(classes=class_names)
+dataset_type = 'NuScenesDataset'
+data_root = 'data/nuscenes/'
+# Input modality for nuScenes dataset, this is consistent with the submission
+# format which requires the information in input_modality.
+input_modality = dict(use_lidar=True, use_camera=False)
+data_prefix = dict(pts='samples/LIDAR_TOP', img='', sweeps='sweeps/LIDAR_TOP')
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/nuscenes/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(
+        type='LoadPointsFromMultiSweeps',
+        sweeps_num=10,
+        backend_args=backend_args),
+    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.3925, 0.3925],
+        scale_ratio_range=[0.95, 1.05],
+        translation_std=[0, 0, 0]),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectNameFilter', classes=class_names),
+    dict(type='PointShuffle'),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(
+        type='LoadPointsFromMultiSweeps',
+        sweeps_num=10,
+        test_mode=True,
+        backend_args=backend_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range)
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+eval_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=5,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(
+        type='LoadPointsFromMultiSweeps',
+        sweeps_num=10,
+        test_mode=True,
+        backend_args=backend_args),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+train_dataloader = dict(
+    batch_size=4,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='nuscenes_infos_train.pkl',
+        pipeline=train_pipeline,
+        metainfo=metainfo,
+        modality=input_modality,
+        test_mode=False,
+        data_prefix=data_prefix,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='nuscenes_infos_val.pkl',
+        pipeline=test_pipeline,
+        metainfo=metainfo,
+        modality=input_modality,
+        data_prefix=data_prefix,
+        test_mode=True,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='nuscenes_infos_val.pkl',
+        pipeline=test_pipeline,
+        metainfo=metainfo,
+        modality=input_modality,
+        test_mode=True,
+        data_prefix=data_prefix,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+val_evaluator = dict(
+    type='NuScenesMetric',
+    data_root=data_root,
+    ann_file=data_root + 'nuscenes_infos_val.pkl',
+    metric='bbox',
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/_base_/datasets/nus-mono3d.py ADDED Viewed

	@@ -0,0 +1,119 @@

+dataset_type = 'NuScenesDataset'
+data_root = 'data/nuscenes/'
+class_names = [
+    'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
+    'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
+]
+metainfo = dict(classes=class_names)
+# Input modality for nuScenes dataset, this is consistent with the submission
+# format which requires the information in input_modality.
+input_modality = dict(use_lidar=False, use_camera=True)
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/nuscenes/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+train_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox=True,
+        with_label=True,
+        with_attr_label=True,
+        with_bbox_3d=True,
+        with_label_3d=True,
+        with_bbox_depth=True),
+    dict(type='Resize', scale=(1600, 900), keep_ratio=True),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(
+        type='Pack3DDetInputs',
+        keys=[
+            'img', 'gt_bboxes', 'gt_bboxes_labels', 'attr_labels',
+            'gt_bboxes_3d', 'gt_labels_3d', 'centers_2d', 'depths'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(type='mmdet.Resize', scale=(1600, 900), keep_ratio=True),
+    dict(type='Pack3DDetInputs', keys=['img'])
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            pts='',
+            CAM_FRONT='samples/CAM_FRONT',
+            CAM_FRONT_LEFT='samples/CAM_FRONT_LEFT',
+            CAM_FRONT_RIGHT='samples/CAM_FRONT_RIGHT',
+            CAM_BACK='samples/CAM_BACK',
+            CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
+            CAM_BACK_LEFT='samples/CAM_BACK_LEFT'),
+        ann_file='nuscenes_infos_train.pkl',
+        load_type='mv_image_based',
+        pipeline=train_pipeline,
+        metainfo=metainfo,
+        modality=input_modality,
+        test_mode=False,
+        # we use box_type_3d='Camera' in monocular 3d
+        # detection task
+        box_type_3d='Camera',
+        use_valid_flag=True,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=2,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            pts='',
+            CAM_FRONT='samples/CAM_FRONT',
+            CAM_FRONT_LEFT='samples/CAM_FRONT_LEFT',
+            CAM_FRONT_RIGHT='samples/CAM_FRONT_RIGHT',
+            CAM_BACK='samples/CAM_BACK',
+            CAM_BACK_RIGHT='samples/CAM_BACK_RIGHT',
+            CAM_BACK_LEFT='samples/CAM_BACK_LEFT'),
+        ann_file='nuscenes_infos_val.pkl',
+        load_type='mv_image_based',
+        pipeline=test_pipeline,
+        modality=input_modality,
+        metainfo=metainfo,
+        test_mode=True,
+        box_type_3d='Camera',
+        use_valid_flag=True,
+        backend_args=backend_args))
+test_dataloader = val_dataloader
+val_evaluator = dict(
+    type='NuScenesMetric',
+    data_root=data_root,
+    ann_file=data_root + 'nuscenes_infos_val.pkl',
+    metric='bbox',
+    backend_args=backend_args)
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/_base_/datasets/s3dis-3d.py ADDED Viewed

	@@ -0,0 +1,134 @@

+# dataset settings
+dataset_type = 'S3DISDataset'
+data_root = 'data/s3dis/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/s3dis/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+metainfo = dict(classes=('table', 'chair', 'sofa', 'bookcase', 'board'))
+train_area = [1, 2, 3, 4, 6]
+test_area = 5
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5],
+        backend_args=backend_args),
+    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
+    dict(type='PointSample', num_points=100000),
+    dict(
+        type='RandomFlip3D',
+        sync_2d=False,
+        flip_ratio_bev_horizontal=0.5,
+        flip_ratio_bev_vertical=0.5),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.087266, 0.087266],
+        scale_ratio_range=[0.9, 1.1],
+        translation_std=[.1, .1, .1],
+        shift_height=False),
+    dict(type='NormalizePointsColor', color_mean=None),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5],
+        backend_args=backend_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=0.5,
+                flip_ratio_bev_vertical=0.5),
+            dict(type='PointSample', num_points=100000),
+            dict(type='NormalizePointsColor', color_mean=None),
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+train_dataloader = dict(
+    batch_size=8,
+    num_workers=4,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type='RepeatDataset',
+        times=13,
+        dataset=dict(
+            type='ConcatDataset',
+            datasets=[
+                dict(
+                    type=dataset_type,
+                    data_root=data_root,
+                    ann_file=f's3dis_infos_Area_{i}.pkl',
+                    pipeline=train_pipeline,
+                    filter_empty_gt=True,
+                    metainfo=metainfo,
+                    box_type_3d='Depth',
+                    backend_args=backend_args) for i in train_area
+            ])))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file=f's3dis_infos_Area_{test_area}.pkl',
+        pipeline=test_pipeline,
+        metainfo=metainfo,
+        test_mode=True,
+        box_type_3d='Depth',
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file=f's3dis_infos_Area_{test_area}.pkl',
+        pipeline=test_pipeline,
+        metainfo=metainfo,
+        test_mode=True,
+        box_type_3d='Depth',
+        backend_args=backend_args))
+val_evaluator = dict(type='IndoorMetric')
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/_base_/datasets/s3dis-seg.py ADDED Viewed

	@@ -0,0 +1,169 @@

+# For S3DIS seg we usually do 13-class segmentation
+class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',
+               'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')
+metainfo = dict(classes=class_names)
+dataset_type = 'S3DISSegDataset'
+data_root = 'data/s3dis/'
+input_modality = dict(use_lidar=True, use_camera=False)
+data_prefix = dict(
+    pts='points',
+    pts_instance_mask='instance_mask',
+    pts_semantic_mask='semantic_mask')
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/s3dis/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+num_points = 4096
+train_area = [1, 2, 3, 4, 6]
+test_area = 5
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5],
+        backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox_3d=False,
+        with_label_3d=False,
+        with_mask_3d=False,
+        with_seg_3d=True,
+        backend_args=backend_args),
+    dict(type='PointSegClassMapping'),
+    dict(
+        type='IndoorPatchPointSample',
+        num_points=num_points,
+        block_size=1.0,
+        ignore_index=len(class_names),
+        use_normalized_coord=True,
+        enlarge_size=0.2,
+        min_unique_num=None),
+    dict(type='NormalizePointsColor', color_mean=None),
+    dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5],
+        backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox_3d=False,
+        with_label_3d=False,
+        with_mask_3d=False,
+        with_seg_3d=True,
+        backend_args=backend_args),
+    dict(type='NormalizePointsColor', color_mean=None),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+# we need to load gt seg_mask!
+eval_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5],
+        backend_args=backend_args),
+    dict(type='NormalizePointsColor', color_mean=None),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+tta_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5],
+        backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox_3d=False,
+        with_label_3d=False,
+        with_mask_3d=False,
+        with_seg_3d=True,
+        backend_args=backend_args),
+    dict(type='NormalizePointsColor', color_mean=None),
+    dict(
+        type='TestTimeAug',
+        transforms=[[
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=0.,
+                flip_ratio_bev_vertical=0.)
+        ], [dict(type='Pack3DDetInputs', keys=['points'])]])
+]
+# train on area 1, 2, 3, 4, 6
+# test on area 5
+train_dataloader = dict(
+    batch_size=8,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_files=[f's3dis_infos_Area_{i}.pkl' for i in train_area],
+        metainfo=metainfo,
+        data_prefix=data_prefix,
+        pipeline=train_pipeline,
+        modality=input_modality,
+        ignore_index=len(class_names),
+        scene_idxs=[
+            f'seg_info/Area_{i}_resampled_scene_idxs.npy' for i in train_area
+        ],
+        test_mode=False,
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_files=f's3dis_infos_Area_{test_area}.pkl',
+        metainfo=metainfo,
+        data_prefix=data_prefix,
+        pipeline=test_pipeline,
+        modality=input_modality,
+        ignore_index=len(class_names),
+        scene_idxs=f'seg_info/Area_{test_area}_resampled_scene_idxs.npy',
+        test_mode=True,
+        backend_args=backend_args))
+val_dataloader = test_dataloader
+val_evaluator = dict(type='SegMetric')
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
+tta_model = dict(type='Seg3DTTAModel')

configs/_base_/datasets/scannet-3d.py ADDED Viewed

	@@ -0,0 +1,141 @@

+# dataset settings
+dataset_type = 'ScanNetDataset'
+data_root = 'data/scannet/'
+metainfo = dict(
+    classes=('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
+             'bookshelf', 'picture', 'counter', 'desk', 'curtain',
+             'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
+             'garbagebin'))
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/scannet/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=True,
+        load_dim=6,
+        use_dim=[0, 1, 2],
+        backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox_3d=True,
+        with_label_3d=True,
+        with_mask_3d=True,
+        with_seg_3d=True,
+        backend_args=backend_args),
+    dict(type='GlobalAlignment', rotation_axis=2),
+    dict(type='PointSegClassMapping'),
+    dict(type='PointSample', num_points=40000),
+    dict(
+        type='RandomFlip3D',
+        sync_2d=False,
+        flip_ratio_bev_horizontal=0.5,
+        flip_ratio_bev_vertical=0.5),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.087266, 0.087266],
+        scale_ratio_range=[1.0, 1.0],
+        shift_height=True),
+    dict(
+        type='Pack3DDetInputs',
+        keys=[
+            'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
+            'pts_instance_mask'
+        ])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=True,
+        load_dim=6,
+        use_dim=[0, 1, 2],
+        backend_args=backend_args),
+    dict(type='GlobalAlignment', rotation_axis=2),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=0.5,
+                flip_ratio_bev_vertical=0.5),
+            dict(type='PointSample', num_points=40000),
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+train_dataloader = dict(
+    batch_size=8,
+    num_workers=4,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type='RepeatDataset',
+        times=5,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='scannet_infos_train.pkl',
+            pipeline=train_pipeline,
+            filter_empty_gt=False,
+            metainfo=metainfo,
+            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+            # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+            box_type_3d='Depth',
+            backend_args=backend_args)))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='scannet_infos_val.pkl',
+        pipeline=test_pipeline,
+        metainfo=metainfo,
+        test_mode=True,
+        box_type_3d='Depth',
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='scannet_infos_val.pkl',
+        pipeline=test_pipeline,
+        metainfo=metainfo,
+        test_mode=True,
+        box_type_3d='Depth',
+        backend_args=backend_args))
+val_evaluator = dict(type='IndoorMetric')
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/_base_/datasets/scannet-seg.py ADDED Viewed

	@@ -0,0 +1,164 @@

+# For ScanNet seg we usually do 20-class segmentation
+class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table',
+               'door', 'window', 'bookshelf', 'picture', 'counter', 'desk',
+               'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink',
+               'bathtub', 'otherfurniture')
+metainfo = dict(classes=class_names)
+dataset_type = 'ScanNetSegDataset'
+data_root = 'data/scannet/'
+input_modality = dict(use_lidar=True, use_camera=False)
+data_prefix = dict(
+    pts='points',
+    pts_instance_mask='instance_mask',
+    pts_semantic_mask='semantic_mask')
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/scannet/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+num_points = 8192
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5],
+        backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox_3d=False,
+        with_label_3d=False,
+        with_mask_3d=False,
+        with_seg_3d=True,
+        backend_args=backend_args),
+    dict(type='PointSegClassMapping'),
+    dict(
+        type='IndoorPatchPointSample',
+        num_points=num_points,
+        block_size=1.5,
+        ignore_index=len(class_names),
+        use_normalized_coord=False,
+        enlarge_size=0.2,
+        min_unique_num=None),
+    dict(type='NormalizePointsColor', color_mean=None),
+    dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5],
+        backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox_3d=False,
+        with_label_3d=False,
+        with_mask_3d=False,
+        with_seg_3d=True,
+        backend_args=backend_args),
+    dict(type='NormalizePointsColor', color_mean=None),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+# we need to load gt seg_mask!
+eval_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5],
+        backend_args=backend_args),
+    dict(type='NormalizePointsColor', color_mean=None),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+tta_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5],
+        backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox_3d=False,
+        with_label_3d=False,
+        with_mask_3d=False,
+        with_seg_3d=True,
+        backend_args=backend_args),
+    dict(type='NormalizePointsColor', color_mean=None),
+    dict(
+        type='TestTimeAug',
+        transforms=[[
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=0.,
+                flip_ratio_bev_vertical=0.)
+        ], [dict(type='Pack3DDetInputs', keys=['points'])]])
+]
+train_dataloader = dict(
+    batch_size=8,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='scannet_infos_train.pkl',
+        metainfo=metainfo,
+        data_prefix=data_prefix,
+        pipeline=train_pipeline,
+        modality=input_modality,
+        ignore_index=len(class_names),
+        scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy',
+        test_mode=False,
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='scannet_infos_val.pkl',
+        metainfo=metainfo,
+        data_prefix=data_prefix,
+        pipeline=test_pipeline,
+        modality=input_modality,
+        ignore_index=len(class_names),
+        test_mode=True,
+        backend_args=backend_args))
+val_dataloader = test_dataloader
+val_evaluator = dict(type='SegMetric')
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
+tta_model = dict(type='Seg3DTTAModel')

configs/_base_/datasets/semantickitti.py ADDED Viewed

	@@ -0,0 +1,224 @@

+# For SemanticKitti we usually do 19-class segmentation.
+# For labels_map we follow the uniform format of MMDetection & MMSegmentation
+# i.e. we consider the unlabeled class as the last one, which is different
+# from the original implementation of some methods e.g. Cylinder3D.
+dataset_type = 'SemanticKittiDataset'
+data_root = 'data/semantickitti/'
+class_names = [
+    'car', 'bicycle', 'motorcycle', 'truck', 'bus', 'person', 'bicyclist',
+    'motorcyclist', 'road', 'parking', 'sidewalk', 'other-ground', 'building',
+    'fence', 'vegetation', 'trunck', 'terrian', 'pole', 'traffic-sign'
+]
+labels_map = {
+    0: 19,  # "unlabeled"
+    1: 19,  # "outlier" mapped to "unlabeled" --------------mapped
+    10: 0,  # "car"
+    11: 1,  # "bicycle"
+    13: 4,  # "bus" mapped to "other-vehicle" --------------mapped
+    15: 2,  # "motorcycle"
+    16: 4,  # "on-rails" mapped to "other-vehicle" ---------mapped
+    18: 3,  # "truck"
+    20: 4,  # "other-vehicle"
+    30: 5,  # "person"
+    31: 6,  # "bicyclist"
+    32: 7,  # "motorcyclist"
+    40: 8,  # "road"
+    44: 9,  # "parking"
+    48: 10,  # "sidewalk"
+    49: 11,  # "other-ground"
+    50: 12,  # "building"
+    51: 13,  # "fence"
+    52: 19,  # "other-structure" mapped to "unlabeled" ------mapped
+    60: 8,  # "lane-marking" to "road" ---------------------mapped
+    70: 14,  # "vegetation"
+    71: 15,  # "trunk"
+    72: 16,  # "terrain"
+    80: 17,  # "pole"
+    81: 18,  # "traffic-sign"
+    99: 19,  # "other-object" to "unlabeled" ----------------mapped
+    252: 0,  # "moving-car" to "car" ------------------------mapped
+    253: 6,  # "moving-bicyclist" to "bicyclist" ------------mapped
+    254: 5,  # "moving-person" to "person" ------------------mapped
+    255: 7,  # "moving-motorcyclist" to "motorcyclist" ------mapped
+    256: 4,  # "moving-on-rails" mapped to "other-vehic------mapped
+    257: 4,  # "moving-bus" mapped to "other-vehicle" -------mapped
+    258: 3,  # "moving-truck" to "truck" --------------------mapped
+    259: 4  # "moving-other"-vehicle to "other-vehicle"-----mapped
+}
+metainfo = dict(
+    classes=class_names, seg_label_mapping=labels_map, max_label=259)
+input_modality = dict(use_lidar=True, use_camera=False)
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/semantickitti/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,
+        use_dim=4,
+        backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox_3d=False,
+        with_label_3d=False,
+        with_seg_3d=True,
+        seg_3d_dtype='np.int32',
+        seg_offset=2**16,
+        dataset_type='semantickitti',
+        backend_args=backend_args),
+    dict(type='PointSegClassMapping'),
+    dict(
+        type='RandomFlip3D',
+        sync_2d=False,
+        flip_ratio_bev_horizontal=0.5,
+        flip_ratio_bev_vertical=0.5),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.78539816, 0.78539816],
+        scale_ratio_range=[0.95, 1.05],
+        translation_std=[0.1, 0.1, 0.1],
+    ),
+    dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,
+        use_dim=4,
+        backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox_3d=False,
+        with_label_3d=False,
+        with_seg_3d=True,
+        seg_3d_dtype='np.int32',
+        seg_offset=2**16,
+        dataset_type='semantickitti',
+        backend_args=backend_args),
+    dict(type='PointSegClassMapping'),
+    dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask'])
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+eval_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,
+        use_dim=4,
+        backend_args=backend_args),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+tta_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=4,
+        use_dim=4,
+        backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox_3d=False,
+        with_label_3d=False,
+        with_seg_3d=True,
+        seg_3d_dtype='np.int32',
+        seg_offset=2**16,
+        dataset_type='semantickitti',
+        backend_args=backend_args),
+    dict(type='PointSegClassMapping'),
+    dict(
+        type='TestTimeAug',
+        transforms=[[
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=0.,
+                flip_ratio_bev_vertical=0.),
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=0.,
+                flip_ratio_bev_vertical=1.),
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=1.,
+                flip_ratio_bev_vertical=0.),
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=1.,
+                flip_ratio_bev_vertical=1.)
+        ],
+                    [
+                        dict(
+                            type='GlobalRotScaleTrans',
+                            rot_range=[pcd_rotate_range, pcd_rotate_range],
+                            scale_ratio_range=[
+                                pcd_scale_factor, pcd_scale_factor
+                            ],
+                            translation_std=[0, 0, 0])
+                        for pcd_rotate_range in [-0.78539816, 0.0, 0.78539816]
+                        for pcd_scale_factor in [0.95, 1.0, 1.05]
+                    ], [dict(type='Pack3DDetInputs', keys=['points'])]])
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=4,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='semantickitti_infos_train.pkl',
+        pipeline=train_pipeline,
+        metainfo=metainfo,
+        modality=input_modality,
+        ignore_index=19,
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='semantickitti_infos_val.pkl',
+        pipeline=test_pipeline,
+        metainfo=metainfo,
+        modality=input_modality,
+        ignore_index=19,
+        test_mode=True,
+        backend_args=backend_args))
+val_dataloader = test_dataloader
+val_evaluator = dict(type='SegMetric')
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
+tta_model = dict(type='Seg3DTTAModel')

configs/_base_/datasets/sunrgbd-3d.py ADDED Viewed

	@@ -0,0 +1,126 @@

+dataset_type = 'SUNRGBDDataset'
+data_root = 'data/sunrgbd/'
+class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
+               'night_stand', 'bookshelf', 'bathtub')
+metainfo = dict(classes=class_names)
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/sunrgbd/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=True,
+        load_dim=6,
+        use_dim=[0, 1, 2],
+        backend_args=backend_args),
+    dict(type='LoadAnnotations3D'),
+    dict(
+        type='RandomFlip3D',
+        sync_2d=False,
+        flip_ratio_bev_horizontal=0.5,
+    ),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.523599, 0.523599],
+        scale_ratio_range=[0.85, 1.15],
+        shift_height=True),
+    dict(type='PointSample', num_points=20000),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=True,
+        load_dim=6,
+        use_dim=[0, 1, 2],
+        backend_args=backend_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(
+                type='RandomFlip3D',
+                sync_2d=False,
+                flip_ratio_bev_horizontal=0.5,
+            ),
+            dict(type='PointSample', num_points=20000)
+        ]),
+    dict(type='Pack3DDetInputs', keys=['points'])
+]
+train_dataloader = dict(
+    batch_size=16,
+    num_workers=4,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type='RepeatDataset',
+        times=5,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='sunrgbd_infos_train.pkl',
+            pipeline=train_pipeline,
+            filter_empty_gt=False,
+            metainfo=metainfo,
+            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+            # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+            box_type_3d='Depth',
+            backend_args=backend_args)))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='sunrgbd_infos_val.pkl',
+        pipeline=test_pipeline,
+        metainfo=metainfo,
+        test_mode=True,
+        box_type_3d='Depth',
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='sunrgbd_infos_val.pkl',
+        pipeline=test_pipeline,
+        metainfo=metainfo,
+        test_mode=True,
+        box_type_3d='Depth',
+        backend_args=backend_args))
+val_evaluator = dict(type='IndoorMetric')
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/_base_/datasets/waymoD3-fov-mono3d-3class.py ADDED Viewed

	@@ -0,0 +1,184 @@

+# dataset settings
+# D3 in the config name means the whole dataset is divided into 3 folds
+# We only use one fold for efficient experiments
+dataset_type = 'WaymoDataset'
+data_root = 'data/waymo/kitti_format/'
+class_names = ['Pedestrian', 'Cyclist', 'Car']
+metainfo = dict(classes=class_names)
+input_modality = dict(use_lidar=False, use_camera=True)
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+train_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox=True,
+        with_label=True,
+        with_attr_label=False,
+        with_bbox_3d=True,
+        with_label_3d=True,
+        with_bbox_depth=True),
+    # base shape (1248, 832), scale (0.95, 1.05)
+    dict(
+        type='RandomResize3D',
+        scale=(1248, 832),
+        ratio_range=(0.95, 1.05),
+        # ratio_range=(1., 1.),
+        interpolation='nearest',
+        keep_ratio=True,
+    ),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(
+        type='Pack3DDetInputs',
+        keys=[
+            'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
+            'gt_labels_3d', 'centers_2d', 'depths'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='RandomResize3D',
+        scale=(1248, 832),
+        ratio_range=(1., 1.),
+        interpolation='nearest',
+        keep_ratio=True),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['img'],
+        meta_keys=[
+            'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
+            'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
+        ]),
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+eval_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='RandomResize3D',
+        scale=(1248, 832),
+        ratio_range=(1., 1.),
+        interpolation='nearest',
+        keep_ratio=True),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['img'],
+        meta_keys=[
+            'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
+            'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
+        ]),
+]
+train_dataloader = dict(
+    batch_size=3,
+    num_workers=3,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='waymo_infos_train.pkl',
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        pipeline=train_pipeline,
+        modality=input_modality,
+        test_mode=False,
+        metainfo=metainfo,
+        cam_sync_instances=True,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='fov_image_based',
+        # load one frame every three frames
+        load_interval=3,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        ann_file='waymo_infos_val.pkl',
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        cam_sync_instances=True,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='fov_image_based',
+        load_eval_anns=False,
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        ann_file='waymo_infos_val.pkl',
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        cam_sync_instances=True,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='fov_image_based',
+        backend_args=backend_args))
+val_evaluator = dict(
+    type='WaymoMetric',
+    waymo_bin_file='./data/waymo/waymo_format/fov_gt.bin',
+    metric='LET_mAP',
+    load_type='fov_image_based',
+    result_prefix='./pgd_fov_pred')
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/_base_/datasets/waymoD3-mv-mono3d-3class.py ADDED Viewed

	@@ -0,0 +1,191 @@

+# dataset settings
+# D3 in the config name means the whole dataset is divided into 3 folds
+# We only use one fold for efficient experiments
+dataset_type = 'WaymoDataset'
+data_root = 'data/waymo/kitti_format/'
+class_names = ['Pedestrian', 'Cyclist', 'Car']
+metainfo = dict(classes=class_names)
+input_modality = dict(use_lidar=False, use_camera=True)
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+train_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox=True,
+        with_label=True,
+        with_attr_label=False,
+        with_bbox_3d=True,
+        with_label_3d=True,
+        with_bbox_depth=True),
+    # base shape (1248, 832), scale (0.95, 1.05)
+    dict(
+        type='RandomResize3D',
+        scale=(1248, 832),
+        # ratio_range=(1., 1.),
+        ratio_range=(0.95, 1.05),
+        interpolation='nearest',
+        keep_ratio=True,
+    ),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(
+        type='Pack3DDetInputs',
+        keys=[
+            'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
+            'gt_labels_3d', 'centers_2d', 'depths'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='Resize3D',
+        scale_factor=0.65,
+        interpolation='nearest',
+        keep_ratio=True),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['img'],
+        meta_keys=[
+            'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
+            'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
+        ]),
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+eval_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='Resize3D',
+        scale_factor=0.65,
+        interpolation='nearest',
+        keep_ratio=True),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['img'],
+        meta_keys=[
+            'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
+            'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
+        ]),
+]
+train_dataloader = dict(
+    batch_size=3,
+    num_workers=3,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='waymo_infos_train.pkl',
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        pipeline=train_pipeline,
+        modality=input_modality,
+        test_mode=False,
+        metainfo=metainfo,
+        cam_sync_instances=True,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='mv_image_based',
+        # load one frame every three frames
+        load_interval=3,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=0,
+    persistent_workers=False,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        ann_file='waymo_infos_val.pkl',
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        cam_sync_instances=True,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='mv_image_based',
+        # load_eval_anns=False,
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=0,
+    persistent_workers=False,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        ann_file='waymo_infos_val.pkl',
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        cam_sync_instances=True,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='mv_image_based',
+        load_eval_anns=False,
+        backend_args=backend_args))
+val_evaluator = dict(
+    type='WaymoMetric',
+    waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
+    metric='LET_mAP',
+    load_type='mv_image_based',
+    result_prefix='./pgd_mv_pred',
+    nms_cfg=dict(
+        use_rotate_nms=True,
+        nms_across_levels=False,
+        nms_pre=500,
+        nms_thr=0.05,
+        score_thr=0.001,
+        min_bbox_size=0,
+        max_per_frame=100))
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/_base_/datasets/waymoD5-3d-3class.py ADDED Viewed

	@@ -0,0 +1,178 @@

+# dataset settings
+# D5 in the config name means the whole dataset is divided into 5 folds
+# We only use one fold for efficient experiments
+dataset_type = 'WaymoDataset'
+# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
+data_root = 'data/waymo/kitti_format/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+class_names = ['Car', 'Pedestrian', 'Cyclist']
+metainfo = dict(classes=class_names)
+point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
+input_modality = dict(use_lidar=True, use_camera=False)
+db_sampler = dict(
+    data_root=data_root,
+    info_path=data_root + 'waymo_dbinfos_train.pkl',
+    rate=1.0,
+    prepare=dict(
+        filter_by_difficulty=[-1],
+        filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
+    classes=class_names,
+    sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),
+    points_loader=dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4],
+        backend_args=backend_args),
+    backend_args=backend_args)
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=6,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
+    # dict(type='ObjectSample', db_sampler=db_sampler),
+    dict(
+        type='RandomFlip3D',
+        sync_2d=False,
+        flip_ratio_bev_horizontal=0.5,
+        flip_ratio_bev_vertical=0.5),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.78539816, 0.78539816],
+        scale_ratio_range=[0.95, 1.05]),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=6,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range)
+        ]),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points'],
+        meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp'])
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+eval_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=6,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points'],
+        meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp'])
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type='RepeatDataset',
+        times=2,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='waymo_infos_train.pkl',
+            data_prefix=dict(
+                pts='training/velodyne', sweeps='training/velodyne'),
+            pipeline=train_pipeline,
+            modality=input_modality,
+            test_mode=False,
+            metainfo=metainfo,
+            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+            # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+            box_type_3d='LiDAR',
+            # load one frame every five frames
+            load_interval=5,
+            backend_args=backend_args)))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
+        ann_file='waymo_infos_val.pkl',
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
+        ann_file='waymo_infos_val.pkl',
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+val_evaluator = dict(
+    type='WaymoMetric', waymo_bin_file='./data/waymo/waymo_format/gt.bin')
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/_base_/datasets/waymoD5-3d-car.py ADDED Viewed

	@@ -0,0 +1,173 @@

+# dataset settings
+# D5 in the config name means the whole dataset is divided into 5 folds
+# We only use one fold for efficient experiments
+dataset_type = 'WaymoDataset'
+data_root = 'data/waymo/kitti_format/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+class_names = ['Car']
+metainfo = dict(classes=class_names)
+point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
+input_modality = dict(use_lidar=True, use_camera=False)
+db_sampler = dict(
+    data_root=data_root,
+    info_path=data_root + 'waymo_dbinfos_train.pkl',
+    rate=1.0,
+    prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
+    classes=class_names,
+    sample_groups=dict(Car=15),
+    points_loader=dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4],
+        backend_args=backend_args),
+    backend_args=backend_args)
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=6,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
+    dict(type='ObjectSample', db_sampler=db_sampler),
+    dict(
+        type='RandomFlip3D',
+        sync_2d=False,
+        flip_ratio_bev_horizontal=0.5,
+        flip_ratio_bev_vertical=0.5),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[-0.78539816, 0.78539816],
+        scale_ratio_range=[0.95, 1.05]),
+    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='PointShuffle'),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points'],
+        meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp'])
+]
+test_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=6,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(
+        type='MultiScaleFlipAug3D',
+        img_scale=(1333, 800),
+        pts_scale_ratio=1,
+        flip=False,
+        transforms=[
+            dict(
+                type='GlobalRotScaleTrans',
+                rot_range=[0, 0],
+                scale_ratio_range=[1., 1.],
+                translation_std=[0, 0, 0]),
+            dict(type='RandomFlip3D'),
+            dict(
+                type='PointsRangeFilter', point_cloud_range=point_cloud_range)
+        ]),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['points'],
+        meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp'])
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+eval_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='LIDAR',
+        load_dim=6,
+        use_dim=5,
+        backend_args=backend_args),
+    dict(type='Pack3DDetInputs', keys=['points']),
+]
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type='RepeatDataset',
+        times=2,
+        dataset=dict(
+            type=dataset_type,
+            data_root=data_root,
+            ann_file='waymo_infos_train.pkl',
+            data_prefix=dict(
+                pts='training/velodyne', sweeps='training/velodyne'),
+            pipeline=train_pipeline,
+            modality=input_modality,
+            test_mode=False,
+            metainfo=metainfo,
+            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+            # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+            box_type_3d='LiDAR',
+            # load one frame every five frames
+            load_interval=5,
+            backend_args=backend_args)))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
+        ann_file='waymo_infos_val.pkl',
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'),
+        ann_file='waymo_infos_val.pkl',
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        box_type_3d='LiDAR',
+        backend_args=backend_args))
+val_evaluator = dict(
+    type='WaymoMetric', waymo_bin_file='./data/waymo/waymo_format/gt.bin')
+test_evaluator = val_evaluator
+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/_base_/datasets/waymoD5-fov-mono3d-3class.py ADDED Viewed

	@@ -0,0 +1,163 @@

+# dataset settings
+# D3 in the config name means the whole dataset is divided into 3 folds
+# We only use one fold for efficient experiments
+dataset_type = 'WaymoDataset'
+data_root = 'data/waymo/kitti_format/'
+class_names = ['Car', 'Pedestrian', 'Cyclist']
+input_modality = dict(use_lidar=False, use_camera=True)
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+train_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox=True,
+        with_label=True,
+        with_attr_label=False,
+        with_bbox_3d=True,
+        with_label_3d=True,
+        with_bbox_depth=True),
+    # base shape (1248, 832), scale (0.95, 1.05)
+    dict(
+        type='RandomResize3D',
+        scale=(1284, 832),
+        ratio_range=(0.95, 1.05),
+        keep_ratio=True,
+    ),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(
+        type='Pack3DDetInputs',
+        keys=[
+            'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
+            'gt_labels_3d', 'centers_2d', 'depths'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='RandomResize3D',
+        scale=(1248, 832),
+        ratio_range=(1., 1.),
+        keep_ratio=True),
+    dict(type='Pack3DDetInputs', keys=['img']),
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+eval_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='RandomResize3D',
+        scale=(1248, 832),
+        ratio_range=(1., 1.),
+        keep_ratio=True),
+    dict(type='Pack3DDetInputs', keys=['img']),
+]
+metainfo = dict(CLASSES=class_names)
+train_dataloader = dict(
+    batch_size=3,
+    num_workers=3,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='waymo_infos_train.pkl',
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        pipeline=train_pipeline,
+        modality=input_modality,
+        test_mode=False,
+        metainfo=metainfo,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='fov_image_based',
+        # load one frame every three frames
+        load_interval=5,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        ann_file='waymo_infos_val.pkl',
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='fov_image_based',
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        ann_file='waymo_infos_val.pkl',
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='fov_image_based',
+        backend_args=backend_args))
+val_evaluator = dict(
+    type='WaymoMetric',
+    ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
+    waymo_bin_file='./data/waymo/waymo_format/fov_gt.bin',
+    data_root='./data/waymo/waymo_format',
+    metric='LET_mAP',
+    load_type='fov_image_based',
+    backend_args=backend_args)
+test_evaluator = val_evaluator

configs/_base_/datasets/waymoD5-mv-mono3d-3class.py ADDED Viewed

	@@ -0,0 +1,163 @@

+# dataset settings
+# D3 in the config name means the whole dataset is divided into 3 folds
+# We only use one fold for efficient experiments
+dataset_type = 'WaymoDataset'
+data_root = 'data/waymo/kitti_format/'
+class_names = ['Car', 'Pedestrian', 'Cyclist']
+input_modality = dict(use_lidar=False, use_camera=True)
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+train_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox=True,
+        with_label=True,
+        with_attr_label=False,
+        with_bbox_3d=True,
+        with_label_3d=True,
+        with_bbox_depth=True),
+    # base shape (1248, 832), scale (0.95, 1.05)
+    dict(
+        type='RandomResize3D',
+        scale=(1284, 832),
+        ratio_range=(0.95, 1.05),
+        keep_ratio=True,
+    ),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
+    dict(
+        type='Pack3DDetInputs',
+        keys=[
+            'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
+            'gt_labels_3d', 'centers_2d', 'depths'
+        ]),
+]
+test_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='RandomResize3D',
+        scale=(1248, 832),
+        ratio_range=(1., 1.),
+        keep_ratio=True),
+    dict(type='Pack3DDetInputs', keys=['img']),
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+eval_pipeline = [
+    dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
+    dict(
+        type='RandomResize3D',
+        scale=(1248, 832),
+        ratio_range=(1., 1.),
+        keep_ratio=True),
+    dict(type='Pack3DDetInputs', keys=['img']),
+]
+metainfo = dict(classes=class_names)
+train_dataloader = dict(
+    batch_size=3,
+    num_workers=3,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='waymo_infos_train.pkl',
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        pipeline=train_pipeline,
+        modality=input_modality,
+        test_mode=False,
+        metainfo=metainfo,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='mv_image_based',
+        # load one frame every three frames
+        load_interval=5,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        ann_file='waymo_infos_val.pkl',
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='mv_image_based',
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        ann_file='waymo_infos_val.pkl',
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
+        # and box_type_3d='Depth' in sunrgbd and scannet dataset.
+        box_type_3d='Camera',
+        load_type='mv_image_based',
+        backend_args=backend_args))
+val_evaluator = dict(
+    type='WaymoMetric',
+    ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
+    waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
+    data_root='./data/waymo/waymo_format',
+    metric='LET_mAP',
+    load_type='mv_image_based',
+    backend_args=backend_args)
+test_evaluator = val_evaluator

configs/_base_/datasets/waymoD5-mv3d-3class.py ADDED Viewed

	@@ -0,0 +1,178 @@

+# dataset settings
+# D5 in the config name means the whole dataset is divided into 5 folds
+# We only use one fold for efficient experiments
+dataset_type = 'WaymoDataset'
+data_root = 'data/waymo/kitti_format/'
+# Example to use different file client
+# Method 1: simply set the data root and let the file I/O module
+# automatically infer from prefix (not support LMDB and Memcache yet)
+# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
+# Method 2: Use backend_args, file_client_args in versions before 1.1.0
+# backend_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection3d/',
+#          'data/': 's3://openmmlab/datasets/detection3d/'
+#      }))
+backend_args = None
+class_names = ['Pedestrian', 'Cyclist', 'Car']
+input_modality = dict(use_lidar=False, use_camera=True)
+point_cloud_range = [-35.0, -75.0, -2, 75.0, 75.0, 4]
+train_transforms = [
+    dict(type='PhotoMetricDistortion3D'),
+    dict(
+        type='RandomResize3D',
+        scale=(1248, 832),
+        ratio_range=(0.95, 1.05),
+        keep_ratio=True),
+    dict(type='RandomCrop3D', crop_size=(1080, 720)),
+    dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5, flip_box3d=False),
+]
+train_pipeline = [
+    dict(
+        type='LoadMultiViewImageFromFiles',
+        to_float32=True,
+        backend_args=backend_args),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox=True,
+        with_label=True,
+        with_attr_label=False,
+        with_bbox_3d=True,
+        with_label_3d=True,
+        with_bbox_depth=True),
+    dict(type='MultiViewWrapper', transforms=train_transforms),
+    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
+    dict(type='ObjectNameFilter', classes=class_names),
+    dict(
+        type='Pack3DDetInputs', keys=[
+            'img',
+            'gt_bboxes_3d',
+            'gt_labels_3d',
+        ]),
+]
+test_transforms = [
+    dict(
+        type='RandomResize3D',
+        scale=(1248, 832),
+        ratio_range=(1., 1.),
+        keep_ratio=True)
+]
+test_pipeline = [
+    dict(
+        type='LoadMultiViewImageFromFiles',
+        to_float32=True,
+        backend_args=backend_args),
+    dict(type='MultiViewWrapper', transforms=test_transforms),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['img'],
+        meta_keys=[
+            'box_type_3d', 'img_shape', 'ori_cam2img', 'scale_factor',
+            'sample_idx', 'context_name', 'timestamp', 'lidar2cam',
+            'num_ref_frames', 'num_views'
+        ])
+]
+# construct a pipeline for data and gt loading in show function
+# please keep its loading function consistent with test_pipeline (e.g. client)
+eval_pipeline = [
+    dict(
+        type='LoadMultiViewImageFromFiles',
+        to_float32=True,
+        backend_args=backend_args),
+    dict(type='MultiViewWrapper', transforms=test_transforms),
+    dict(
+        type='Pack3DDetInputs',
+        keys=['img'],
+        meta_keys=[
+            'box_type_3d', 'img_shape', 'ori_cam2img', 'scale_factor',
+            'sample_idx', 'context_name', 'timestamp', 'lidar2cam',
+            'num_ref_frames', 'num_views'
+        ])
+]
+metainfo = dict(classes=class_names)
+train_dataloader = dict(
+    batch_size=2,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='waymo_infos_train.pkl',
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        pipeline=train_pipeline,
+        modality=input_modality,
+        test_mode=False,
+        cam_sync_instances=True,
+        metainfo=metainfo,
+        box_type_3d='Lidar',
+        load_interval=5,
+        backend_args=backend_args))
+val_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='waymo_infos_val.pkl',
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        pipeline=eval_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        box_type_3d='Lidar',
+        backend_args=backend_args))
+test_dataloader = dict(
+    batch_size=1,
+    num_workers=1,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='waymo_infos_val.pkl',
+        data_prefix=dict(
+            pts='training/velodyne',
+            CAM_FRONT='training/image_0',
+            CAM_FRONT_LEFT='training/image_1',
+            CAM_FRONT_RIGHT='training/image_2',
+            CAM_SIDE_LEFT='training/image_3',
+            CAM_SIDE_RIGHT='training/image_4'),
+        pipeline=test_pipeline,
+        modality=input_modality,
+        test_mode=True,
+        metainfo=metainfo,
+        box_type_3d='Lidar',
+        backend_args=backend_args))
+val_evaluator = dict(
+    type='WaymoMetric',
+    waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
+    metric='LET_mAP')
+test_evaluator = val_evaluator

configs/_base_/default_runtime.py ADDED Viewed

	@@ -0,0 +1,23 @@

+default_scope = 'mmdet3d'
+default_hooks = dict(
+    timer=dict(type='IterTimerHook'),
+    logger=dict(type='LoggerHook', interval=50),
+    param_scheduler=dict(type='ParamSchedulerHook'),
+    checkpoint=dict(type='CheckpointHook', interval=-1),
+    sampler_seed=dict(type='DistSamplerSeedHook'),
+    visualization=dict(type='Det3DVisualizationHook'))
+env_cfg = dict(
+    cudnn_benchmark=False,
+    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
+    dist_cfg=dict(backend='nccl'),
+)
+log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
+log_level = 'INFO'
+load_from = None
+resume = False
+# TODO: support auto scaling lr

configs/_base_/models/3dssd.py ADDED Viewed

	@@ -0,0 +1,76 @@

+model = dict(
+    type='SSD3DNet',
+    data_preprocessor=dict(type='Det3DDataPreprocessor'),
+    backbone=dict(
+        type='PointNet2SAMSG',
+        in_channels=4,
+        num_points=(4096, 512, (256, 256)),
+        radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),
+        num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),
+        sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),
+                     ((64, 64, 128), (64, 64, 128), (64, 96, 128)),
+                     ((128, 128, 256), (128, 192, 256), (128, 256, 256))),
+        aggregation_channels=(64, 128, 256),
+        fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),
+        fps_sample_range_lists=((-1), (-1), (512, -1)),
+        norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
+        sa_cfg=dict(
+            type='PointSAModuleMSG',
+            pool_mod='max',
+            use_xyz=True,
+            normalize_xyz=False)),
+    bbox_head=dict(
+        type='SSD3DHead',
+        vote_module_cfg=dict(
+            in_channels=256,
+            num_points=256,
+            gt_per_seed=1,
+            conv_channels=(128, ),
+            conv_cfg=dict(type='Conv1d'),
+            norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
+            with_res_feat=False,
+            vote_xyz_range=(3.0, 3.0, 2.0)),
+        vote_aggregation_cfg=dict(
+            type='PointSAModuleMSG',
+            num_point=256,
+            radii=(4.8, 6.4),
+            sample_nums=(16, 32),
+            mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)),
+            norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
+            use_xyz=True,
+            normalize_xyz=False,
+            bias=True),
+        pred_layer_cfg=dict(
+            in_channels=1536,
+            shared_conv_channels=(512, 128),
+            cls_conv_channels=(128, ),
+            reg_conv_channels=(128, ),
+            conv_cfg=dict(type='Conv1d'),
+            norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
+            bias=True),
+        objectness_loss=dict(
+            type='mmdet.CrossEntropyLoss',
+            use_sigmoid=True,
+            reduction='sum',
+            loss_weight=1.0),
+        center_loss=dict(
+            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0),
+        dir_class_loss=dict(
+            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
+        dir_res_loss=dict(
+            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0),
+        size_res_loss=dict(
+            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0),
+        corner_loss=dict(
+            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0),
+        vote_loss=dict(
+            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(
+        sample_mode='spec', pos_distance_thr=10.0, expand_dims_length=0.05),
+    test_cfg=dict(
+        nms_cfg=dict(type='nms', iou_thr=0.1),
+        sample_mode='spec',
+        score_thr=0.0,
+        per_class_proposal=True,
+        max_output_num=100))

configs/_base_/models/cascade-mask-rcnn_r50_fpn.py ADDED Viewed

	@@ -0,0 +1,199 @@

+# model settings
+model = dict(
+    type='CascadeRCNN',
+    pretrained='torchvision://resnet50',
+    _scope_='mmdet',
+    backbone=dict(
+        type='ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch'),
+    neck=dict(
+        type='FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    rpn_head=dict(
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
+    roi_head=dict(
+        type='CascadeRoIHead',
+        num_stages=3,
+        stage_loss_weights=[1, 0.5, 0.25],
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=[
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.1, 0.1, 0.2, 0.2]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+                               loss_weight=1.0)),
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.05, 0.05, 0.1, 0.1]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
+                               loss_weight=1.0)),
+            dict(
+                type='Shared2FCBBoxHead',
+                in_channels=256,
+                fc_out_channels=1024,
+                roi_feat_size=7,
+                num_classes=80,
+                bbox_coder=dict(
+                    type='DeltaXYWHBBoxCoder',
+                    target_means=[0., 0., 0., 0.],
+                    target_stds=[0.033, 0.033, 0.067, 0.067]),
+                reg_class_agnostic=True,
+                loss_cls=dict(
+                    type='CrossEntropyLoss',
+                    use_sigmoid=False,
+                    loss_weight=1.0),
+                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
+        ],
+        mask_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        mask_head=dict(
+            type='FCNMaskHead',
+            num_convs=4,
+            in_channels=256,
+            conv_out_channels=256,
+            num_classes=80,
+            loss_mask=dict(
+                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=0,
+            pos_weight=-1,
+            debug=False),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            nms_post=2000,
+            max_per_img=2000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=[
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.5,
+                    min_pos_iou=0.5,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                mask_size=28,
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.6,
+                    neg_iou_thr=0.6,
+                    min_pos_iou=0.6,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                mask_size=28,
+                pos_weight=-1,
+                debug=False),
+            dict(
+                assigner=dict(
+                    type='MaxIoUAssigner',
+                    pos_iou_thr=0.7,
+                    neg_iou_thr=0.7,
+                    min_pos_iou=0.7,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1),
+                sampler=dict(
+                    type='RandomSampler',
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True),
+                mask_size=28,
+                pos_weight=-1,
+                debug=False)
+        ]),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            nms_post=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100,
+            mask_thr_binary=0.5)))

configs/_base_/models/centerpoint_pillar02_second_secfpn_nus.py ADDED Viewed

	@@ -0,0 +1,89 @@

+voxel_size = [0.2, 0.2, 8]
+model = dict(
+    type='CenterPoint',
+    data_preprocessor=dict(
+        type='Det3DDataPreprocessor',
+        voxel=True,
+        voxel_layer=dict(
+            max_num_points=20,
+            voxel_size=voxel_size,
+            max_voxels=(30000, 40000))),
+    pts_voxel_encoder=dict(
+        type='PillarFeatureNet',
+        in_channels=5,
+        feat_channels=[64],
+        with_distance=False,
+        voxel_size=(0.2, 0.2, 8),
+        norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
+        legacy=False),
+    pts_middle_encoder=dict(
+        type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),
+    pts_backbone=dict(
+        type='SECOND',
+        in_channels=64,
+        out_channels=[64, 128, 256],
+        layer_nums=[3, 5, 5],
+        layer_strides=[2, 2, 2],
+        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
+        conv_cfg=dict(type='Conv2d', bias=False)),
+    pts_neck=dict(
+        type='SECONDFPN',
+        in_channels=[64, 128, 256],
+        out_channels=[128, 128, 128],
+        upsample_strides=[0.5, 1, 2],
+        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
+        upsample_cfg=dict(type='deconv', bias=False),
+        use_conv_for_no_stride=True),
+    pts_bbox_head=dict(
+        type='CenterHead',
+        in_channels=sum([128, 128, 128]),
+        tasks=[
+            dict(num_class=1, class_names=['car']),
+            dict(num_class=2, class_names=['truck', 'construction_vehicle']),
+            dict(num_class=2, class_names=['bus', 'trailer']),
+            dict(num_class=1, class_names=['barrier']),
+            dict(num_class=2, class_names=['motorcycle', 'bicycle']),
+            dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
+        ],
+        common_heads=dict(
+            reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
+        share_conv_channel=64,
+        bbox_coder=dict(
+            type='CenterPointBBoxCoder',
+            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
+            max_num=500,
+            score_threshold=0.1,
+            out_size_factor=4,
+            voxel_size=voxel_size[:2],
+            code_size=9),
+        separate_head=dict(
+            type='SeparateHead', init_bias=-2.19, final_kernel=3),
+        loss_cls=dict(type='mmdet.GaussianFocalLoss', reduction='mean'),
+        loss_bbox=dict(
+            type='mmdet.L1Loss', reduction='mean', loss_weight=0.25),
+        norm_bbox=True),
+    # model training and testing settings
+    train_cfg=dict(
+        pts=dict(
+            grid_size=[512, 512, 1],
+            voxel_size=voxel_size,
+            out_size_factor=4,
+            dense_reg=1,
+            gaussian_overlap=0.1,
+            max_objs=500,
+            min_radius=2,
+            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
+    test_cfg=dict(
+        pts=dict(
+            post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
+            max_per_img=500,
+            max_pool_nms=False,
+            min_radius=[4, 12, 10, 1, 0.85, 0.175],
+            score_threshold=0.1,
+            pc_range=[-51.2, -51.2],
+            out_size_factor=4,
+            voxel_size=voxel_size[:2],
+            nms_type='rotate',
+            pre_max_size=1000,
+            post_max_size=83,
+            nms_thr=0.2)))

configs/_base_/models/centerpoint_voxel01_second_secfpn_nus.py ADDED Viewed

	@@ -0,0 +1,89 @@

+voxel_size = [0.1, 0.1, 0.2]
+model = dict(
+    type='CenterPoint',
+    data_preprocessor=dict(
+        type='Det3DDataPreprocessor',
+        voxel=True,
+        voxel_layer=dict(
+            max_num_points=10,
+            voxel_size=voxel_size,
+            max_voxels=(90000, 120000))),
+    pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
+    pts_middle_encoder=dict(
+        type='SparseEncoder',
+        in_channels=5,
+        sparse_shape=[41, 1024, 1024],
+        output_channels=128,
+        order=('conv', 'norm', 'act'),
+        encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,
+                                                                      128)),
+        encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
+        block_type='basicblock'),
+    pts_backbone=dict(
+        type='SECOND',
+        in_channels=256,
+        out_channels=[128, 256],
+        layer_nums=[5, 5],
+        layer_strides=[1, 2],
+        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
+        conv_cfg=dict(type='Conv2d', bias=False)),
+    pts_neck=dict(
+        type='SECONDFPN',
+        in_channels=[128, 256],
+        out_channels=[256, 256],
+        upsample_strides=[1, 2],
+        norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
+        upsample_cfg=dict(type='deconv', bias=False),
+        use_conv_for_no_stride=True),
+    pts_bbox_head=dict(
+        type='CenterHead',
+        in_channels=sum([256, 256]),
+        tasks=[
+            dict(num_class=1, class_names=['car']),
+            dict(num_class=2, class_names=['truck', 'construction_vehicle']),
+            dict(num_class=2, class_names=['bus', 'trailer']),
+            dict(num_class=1, class_names=['barrier']),
+            dict(num_class=2, class_names=['motorcycle', 'bicycle']),
+            dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
+        ],
+        common_heads=dict(
+            reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
+        share_conv_channel=64,
+        bbox_coder=dict(
+            type='CenterPointBBoxCoder',
+            post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
+            max_num=500,
+            score_threshold=0.1,
+            out_size_factor=8,
+            voxel_size=voxel_size[:2],
+            code_size=9),
+        separate_head=dict(
+            type='SeparateHead', init_bias=-2.19, final_kernel=3),
+        loss_cls=dict(type='mmdet.GaussianFocalLoss', reduction='mean'),
+        loss_bbox=dict(
+            type='mmdet.L1Loss', reduction='mean', loss_weight=0.25),
+        norm_bbox=True),
+    # model training and testing settings
+    train_cfg=dict(
+        pts=dict(
+            grid_size=[1024, 1024, 40],
+            voxel_size=voxel_size,
+            out_size_factor=8,
+            dense_reg=1,
+            gaussian_overlap=0.1,
+            max_objs=500,
+            min_radius=2,
+            code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
+    test_cfg=dict(
+        pts=dict(
+            post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
+            max_per_img=500,
+            max_pool_nms=False,
+            min_radius=[4, 12, 10, 1, 0.85, 0.175],
+            score_threshold=0.1,
+            out_size_factor=8,
+            voxel_size=voxel_size[:2],
+            nms_type='rotate',
+            pre_max_size=1000,
+            post_max_size=83,
+            nms_thr=0.2)))

configs/_base_/models/cylinder3d.py ADDED Viewed

	@@ -0,0 +1,41 @@

+grid_shape = [480, 360, 32]
+model = dict(
+    type='Cylinder3D',
+    data_preprocessor=dict(
+        type='Det3DDataPreprocessor',
+        voxel=True,
+        voxel_type='cylindrical',
+        voxel_layer=dict(
+            grid_shape=grid_shape,
+            point_cloud_range=[0, -3.14159265359, -4, 50, 3.14159265359, 2],
+            max_num_points=-1,
+            max_voxels=-1,
+        ),
+    ),
+    voxel_encoder=dict(
+        type='SegVFE',
+        feat_channels=[64, 128, 256, 256],
+        in_channels=6,
+        with_voxel_center=True,
+        feat_compression=16,
+        return_point_feats=False),
+    backbone=dict(
+        type='Asymm3DSpconv',
+        grid_size=grid_shape,
+        input_channels=16,
+        base_channels=32,
+        norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.1)),
+    decode_head=dict(
+        type='Cylinder3DHead',
+        channels=128,
+        num_classes=20,
+        loss_ce=dict(
+            type='mmdet.CrossEntropyLoss',
+            use_sigmoid=False,
+            class_weight=None,
+            loss_weight=1.0),
+        loss_lovasz=dict(type='LovaszLoss', loss_weight=1.0, reduction='none'),
+    ),
+    train_cfg=None,
+    test_cfg=dict(mode='whole'),
+)

configs/_base_/models/dgcnn.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# model settings
+model = dict(
+    type='EncoderDecoder3D',
+    data_preprocessor=dict(type='Det3DDataPreprocessor'),
+    backbone=dict(
+        type='DGCNNBackbone',
+        in_channels=9,  # [xyz, rgb, normal_xyz], modified with dataset
+        num_samples=(20, 20, 20),
+        knn_modes=('D-KNN', 'F-KNN', 'F-KNN'),
+        radius=(None, None, None),
+        gf_channels=((64, 64), (64, 64), (64, )),
+        fa_channels=(1024, ),
+        act_cfg=dict(type='LeakyReLU', negative_slope=0.2)),
+    decode_head=dict(
+        type='DGCNNHead',
+        fp_channels=(1216, 512),
+        channels=256,
+        dropout_ratio=0.5,
+        conv_cfg=dict(type='Conv1d'),
+        norm_cfg=dict(type='BN1d'),
+        act_cfg=dict(type='LeakyReLU', negative_slope=0.2),
+        loss_decode=dict(
+            type='mmdet.CrossEntropyLoss',
+            use_sigmoid=False,
+            class_weight=None,  # modified with dataset
+            loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(),
+    test_cfg=dict(mode='slide'))

configs/_base_/models/fcaf3d.py ADDED Viewed

	@@ -0,0 +1,20 @@

+model = dict(
+    type='MinkSingleStage3DDetector',
+    data_preprocessor=dict(type='Det3DDataPreprocessor'),
+    backbone=dict(type='MinkResNet', in_channels=3, depth=34),
+    bbox_head=dict(
+        type='FCAF3DHead',
+        in_channels=(64, 128, 256, 512),
+        out_channels=128,
+        voxel_size=.01,
+        pts_prune_threshold=100000,
+        pts_assign_threshold=27,
+        pts_center_threshold=18,
+        num_classes=18,
+        num_reg_outs=6,
+        center_loss=dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True),
+        bbox_loss=dict(type='AxisAlignedIoULoss'),
+        cls_loss=dict(type='mmdet.FocalLoss'),
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(nms_pre=1000, iou_thr=.5, score_thr=.01))

configs/_base_/models/fcos3d.py ADDED Viewed

	@@ -0,0 +1,86 @@

+# model settings
+model = dict(
+    type='FCOSMono3D',
+    data_preprocessor=dict(
+        type='Det3DDataPreprocessor',
+        mean=[123.675, 116.28, 103.53],
+        std=[58.395, 57.12, 57.375],
+        bgr_to_rgb=True,
+        pad_size_divisor=32),
+    backbone=dict(
+        type='mmdet.ResNet',
+        depth=101,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=False),
+        norm_eval=True,
+        style='caffe',
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='open-mmlab://detectron2/resnet101_caffe')),
+    neck=dict(
+        type='mmdet.FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        start_level=1,
+        add_extra_convs='on_output',
+        num_outs=5,
+        relu_before_extra_convs=True),
+    bbox_head=dict(
+        type='FCOSMono3DHead',
+        num_classes=10,
+        in_channels=256,
+        stacked_convs=2,
+        feat_channels=256,
+        use_direction_classifier=True,
+        diff_rad_by_sin=True,
+        pred_attrs=True,
+        pred_velo=True,
+        dir_offset=0.7854,  # pi/4
+        dir_limit_offset=0,
+        strides=[8, 16, 32, 64, 128],
+        group_reg_dims=(2, 1, 3, 1, 2),  # offset, depth, size, rot, velo
+        cls_branch=(256, ),
+        reg_branch=(
+            (256, ),  # offset
+            (256, ),  # depth
+            (256, ),  # size
+            (256, ),  # rot
+            ()  # velo
+        ),
+        dir_branch=(256, ),
+        attr_branch=(256, ),
+        loss_cls=dict(
+            type='mmdet.FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        loss_bbox=dict(
+            type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
+        loss_dir=dict(
+            type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+        loss_attr=dict(
+            type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+        loss_centerness=dict(
+            type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        bbox_coder=dict(type='FCOS3DBBoxCoder', code_size=9),
+        norm_on_bbox=True,
+        centerness_on_reg=True,
+        center_sampling=True,
+        conv_bias=True,
+        dcn_on_last_conv=True),
+    train_cfg=dict(
+        allowed_border=0,
+        code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
+        pos_weight=-1,
+        debug=False),
+    test_cfg=dict(
+        use_rotate_nms=True,
+        nms_across_levels=False,
+        nms_pre=1000,
+        nms_thr=0.8,
+        score_thr=0.05,
+        min_bbox_size=0,
+        max_per_img=200))

configs/_base_/models/groupfree3d.py ADDED Viewed

	@@ -0,0 +1,75 @@

+model = dict(
+    type='GroupFree3DNet',
+    data_preprocessor=dict(type='Det3DDataPreprocessor'),
+    backbone=dict(
+        type='PointNet2SASSG',
+        in_channels=3,
+        num_points=(2048, 1024, 512, 256),
+        radius=(0.2, 0.4, 0.8, 1.2),
+        num_samples=(64, 32, 16, 16),
+        sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
+                     (128, 128, 256)),
+        fp_channels=((256, 256), (256, 288)),
+        norm_cfg=dict(type='BN2d'),
+        sa_cfg=dict(
+            type='PointSAModule',
+            pool_mod='max',
+            use_xyz=True,
+            normalize_xyz=True)),
+    bbox_head=dict(
+        type='GroupFree3DHead',
+        in_channels=288,
+        num_decoder_layers=6,
+        num_proposal=256,
+        transformerlayers=dict(
+            type='BaseTransformerLayer',
+            attn_cfgs=dict(
+                type='GroupFree3DMHA',
+                embed_dims=288,
+                num_heads=8,
+                attn_drop=0.1,
+                dropout_layer=dict(type='Dropout', drop_prob=0.1)),
+            ffn_cfgs=dict(
+                embed_dims=288,
+                feedforward_channels=2048,
+                ffn_drop=0.1,
+                act_cfg=dict(type='ReLU', inplace=True)),
+            operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',
+                             'norm')),
+        pred_layer_cfg=dict(
+            in_channels=288, shared_conv_channels=(288, 288), bias=True),
+        sampling_objectness_loss=dict(
+            type='mmdet.FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=8.0),
+        objectness_loss=dict(
+            type='mmdet.FocalLoss',
+            use_sigmoid=True,
+            gamma=2.0,
+            alpha=0.25,
+            loss_weight=1.0),
+        center_loss=dict(
+            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
+        dir_class_loss=dict(
+            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
+        dir_res_loss=dict(
+            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
+        size_class_loss=dict(
+            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
+        size_res_loss=dict(
+            type='mmdet.SmoothL1Loss',
+            beta=1.0,
+            reduction='sum',
+            loss_weight=10.0),
+        semantic_loss=dict(
+            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
+    # model training and testing settings
+    train_cfg=dict(sample_mode='kps'),
+    test_cfg=dict(
+        sample_mode='kps',
+        nms_thr=0.25,
+        score_thr=0.0,
+        per_class_proposal=True,
+        prediction_stages='last'))

configs/_base_/models/h3dnet.py ADDED Viewed

	@@ -0,0 +1,351 @@

+primitive_z_cfg = dict(
+    type='PrimitiveHead',
+    num_dims=2,
+    num_classes=18,
+    primitive_mode='z',
+    upper_thresh=100.0,
+    surface_thresh=0.5,
+    vote_module_cfg=dict(
+        in_channels=256,
+        vote_per_seed=1,
+        gt_per_seed=1,
+        conv_channels=(256, 256),
+        conv_cfg=dict(type='Conv1d'),
+        norm_cfg=dict(type='BN1d'),
+        norm_feats=True,
+        vote_loss=dict(
+            type='ChamferDistance',
+            mode='l1',
+            reduction='none',
+            loss_dst_weight=10.0)),
+    vote_aggregation_cfg=dict(
+        type='PointSAModule',
+        num_point=1024,
+        radius=0.3,
+        num_sample=16,
+        mlp_channels=[256, 128, 128, 128],
+        use_xyz=True,
+        normalize_xyz=True),
+    feat_channels=(128, 128),
+    conv_cfg=dict(type='Conv1d'),
+    norm_cfg=dict(type='BN1d'),
+    objectness_loss=dict(
+        type='mmdet.CrossEntropyLoss',
+        class_weight=[0.4, 0.6],
+        reduction='mean',
+        loss_weight=30.0),
+    center_loss=dict(
+        type='ChamferDistance',
+        mode='l1',
+        reduction='sum',
+        loss_src_weight=0.5,
+        loss_dst_weight=0.5),
+    semantic_reg_loss=dict(
+        type='ChamferDistance',
+        mode='l1',
+        reduction='sum',
+        loss_src_weight=0.5,
+        loss_dst_weight=0.5),
+    semantic_cls_loss=dict(
+        type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
+    train_cfg=dict(
+        sample_mode='vote',
+        dist_thresh=0.2,
+        var_thresh=1e-2,
+        lower_thresh=1e-6,
+        num_point=100,
+        num_point_line=10,
+        line_thresh=0.2),
+    test_cfg=dict(sample_mode='seed'))
+primitive_xy_cfg = dict(
+    type='PrimitiveHead',
+    num_dims=1,
+    num_classes=18,
+    primitive_mode='xy',
+    upper_thresh=100.0,
+    surface_thresh=0.5,
+    vote_module_cfg=dict(
+        in_channels=256,
+        vote_per_seed=1,
+        gt_per_seed=1,
+        conv_channels=(256, 256),
+        conv_cfg=dict(type='Conv1d'),
+        norm_cfg=dict(type='BN1d'),
+        norm_feats=True,
+        vote_loss=dict(
+            type='ChamferDistance',
+            mode='l1',
+            reduction='none',
+            loss_dst_weight=10.0)),
+    vote_aggregation_cfg=dict(
+        type='PointSAModule',
+        num_point=1024,
+        radius=0.3,
+        num_sample=16,
+        mlp_channels=[256, 128, 128, 128],
+        use_xyz=True,
+        normalize_xyz=True),
+    feat_channels=(128, 128),
+    conv_cfg=dict(type='Conv1d'),
+    norm_cfg=dict(type='BN1d'),
+    objectness_loss=dict(
+        type='mmdet.CrossEntropyLoss',
+        class_weight=[0.4, 0.6],
+        reduction='mean',
+        loss_weight=30.0),
+    center_loss=dict(
+        type='ChamferDistance',
+        mode='l1',
+        reduction='sum',
+        loss_src_weight=0.5,
+        loss_dst_weight=0.5),
+    semantic_reg_loss=dict(
+        type='ChamferDistance',
+        mode='l1',
+        reduction='sum',
+        loss_src_weight=0.5,
+        loss_dst_weight=0.5),
+    semantic_cls_loss=dict(
+        type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
+    train_cfg=dict(
+        sample_mode='vote',
+        dist_thresh=0.2,
+        var_thresh=1e-2,
+        lower_thresh=1e-6,
+        num_point=100,
+        num_point_line=10,
+        line_thresh=0.2),
+    test_cfg=dict(sample_mode='seed'))
+primitive_line_cfg = dict(
+    type='PrimitiveHead',
+    num_dims=0,
+    num_classes=18,
+    primitive_mode='line',
+    upper_thresh=100.0,
+    surface_thresh=0.5,
+    vote_module_cfg=dict(
+        in_channels=256,
+        vote_per_seed=1,
+        gt_per_seed=1,
+        conv_channels=(256, 256),
+        conv_cfg=dict(type='Conv1d'),
+        norm_cfg=dict(type='BN1d'),
+        norm_feats=True,
+        vote_loss=dict(
+            type='ChamferDistance',
+            mode='l1',
+            reduction='none',
+            loss_dst_weight=10.0)),
+    vote_aggregation_cfg=dict(
+        type='PointSAModule',
+        num_point=1024,
+        radius=0.3,
+        num_sample=16,
+        mlp_channels=[256, 128, 128, 128],
+        use_xyz=True,
+        normalize_xyz=True),
+    feat_channels=(128, 128),
+    conv_cfg=dict(type='Conv1d'),
+    norm_cfg=dict(type='BN1d'),
+    objectness_loss=dict(
+        type='mmdet.CrossEntropyLoss',
+        class_weight=[0.4, 0.6],
+        reduction='mean',
+        loss_weight=30.0),
+    center_loss=dict(
+        type='ChamferDistance',
+        mode='l1',
+        reduction='sum',
+        loss_src_weight=1.0,
+        loss_dst_weight=1.0),
+    semantic_reg_loss=dict(
+        type='ChamferDistance',
+        mode='l1',
+        reduction='sum',
+        loss_src_weight=1.0,
+        loss_dst_weight=1.0),
+    semantic_cls_loss=dict(
+        type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=2.0),
+    train_cfg=dict(
+        sample_mode='vote',
+        dist_thresh=0.2,
+        var_thresh=1e-2,
+        lower_thresh=1e-6,
+        num_point=100,
+        num_point_line=10,
+        line_thresh=0.2),
+    test_cfg=dict(sample_mode='seed'))
+model = dict(
+    type='H3DNet',
+    data_preprocessor=dict(type='Det3DDataPreprocessor'),
+    backbone=dict(
+        type='MultiBackbone',
+        num_streams=4,
+        suffixes=['net0', 'net1', 'net2', 'net3'],
+        conv_cfg=dict(type='Conv1d'),
+        norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01),
+        act_cfg=dict(type='ReLU'),
+        backbones=dict(
+            type='PointNet2SASSG',
+            in_channels=4,
+            num_points=(2048, 1024, 512, 256),
+            radius=(0.2, 0.4, 0.8, 1.2),
+            num_samples=(64, 32, 16, 16),
+            sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
+                         (128, 128, 256)),
+            fp_channels=((256, 256), (256, 256)),
+            norm_cfg=dict(type='BN2d'),
+            sa_cfg=dict(
+                type='PointSAModule',
+                pool_mod='max',
+                use_xyz=True,
+                normalize_xyz=True))),
+    rpn_head=dict(
+        type='VoteHead',
+        vote_module_cfg=dict(
+            in_channels=256,
+            vote_per_seed=1,
+            gt_per_seed=3,
+            conv_channels=(256, 256),
+            conv_cfg=dict(type='Conv1d'),
+            norm_cfg=dict(type='BN1d'),
+            norm_feats=True,
+            vote_loss=dict(
+                type='ChamferDistance',
+                mode='l1',
+                reduction='none',
+                loss_dst_weight=10.0)),
+        vote_aggregation_cfg=dict(
+            type='PointSAModule',
+            num_point=256,
+            radius=0.3,
+            num_sample=16,
+            mlp_channels=[256, 128, 128, 128],
+            use_xyz=True,
+            normalize_xyz=True),
+        pred_layer_cfg=dict(
+            in_channels=128, shared_conv_channels=(128, 128), bias=True),
+        objectness_loss=dict(
+            type='mmdet.CrossEntropyLoss',
+            class_weight=[0.2, 0.8],
+            reduction='sum',
+            loss_weight=5.0),
+        center_loss=dict(
+            type='ChamferDistance',
+            mode='l2',
+            reduction='sum',
+            loss_src_weight=10.0,
+            loss_dst_weight=10.0),
+        dir_class_loss=dict(
+            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
+        dir_res_loss=dict(
+            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
+        size_class_loss=dict(
+            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
+        size_res_loss=dict(
+            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
+        semantic_loss=dict(
+            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
+    roi_head=dict(
+        type='H3DRoIHead',
+        primitive_list=[primitive_z_cfg, primitive_xy_cfg, primitive_line_cfg],
+        bbox_head=dict(
+            type='H3DBboxHead',
+            gt_per_seed=3,
+            num_proposal=256,
+            suface_matching_cfg=dict(
+                type='PointSAModule',
+                num_point=256 * 6,
+                radius=0.5,
+                num_sample=32,
+                mlp_channels=[128 + 6, 128, 64, 32],
+                use_xyz=True,
+                normalize_xyz=True),
+            line_matching_cfg=dict(
+                type='PointSAModule',
+                num_point=256 * 12,
+                radius=0.5,
+                num_sample=32,
+                mlp_channels=[128 + 12, 128, 64, 32],
+                use_xyz=True,
+                normalize_xyz=True),
+            primitive_refine_channels=[128, 128, 128],
+            upper_thresh=100.0,
+            surface_thresh=0.5,
+            line_thresh=0.5,
+            conv_cfg=dict(type='Conv1d'),
+            norm_cfg=dict(type='BN1d'),
+            objectness_loss=dict(
+                type='mmdet.CrossEntropyLoss',
+                class_weight=[0.2, 0.8],
+                reduction='sum',
+                loss_weight=5.0),
+            center_loss=dict(
+                type='ChamferDistance',
+                mode='l2',
+                reduction='sum',
+                loss_src_weight=10.0,
+                loss_dst_weight=10.0),
+            dir_class_loss=dict(
+                type='mmdet.CrossEntropyLoss',
+                reduction='sum',
+                loss_weight=0.1),
+            dir_res_loss=dict(
+                type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
+            size_class_loss=dict(
+                type='mmdet.CrossEntropyLoss',
+                reduction='sum',
+                loss_weight=0.1),
+            size_res_loss=dict(
+                type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
+            semantic_loss=dict(
+                type='mmdet.CrossEntropyLoss',
+                reduction='sum',
+                loss_weight=0.1),
+            cues_objectness_loss=dict(
+                type='mmdet.CrossEntropyLoss',
+                class_weight=[0.3, 0.7],
+                reduction='mean',
+                loss_weight=5.0),
+            cues_semantic_loss=dict(
+                type='mmdet.CrossEntropyLoss',
+                class_weight=[0.3, 0.7],
+                reduction='mean',
+                loss_weight=5.0),
+            proposal_objectness_loss=dict(
+                type='mmdet.CrossEntropyLoss',
+                class_weight=[0.2, 0.8],
+                reduction='none',
+                loss_weight=5.0),
+            primitive_center_loss=dict(
+                type='mmdet.MSELoss', reduction='none', loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        rpn=dict(
+            pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mode='vote'),
+        rpn_proposal=dict(use_nms=False),
+        rcnn=dict(
+            pos_distance_thr=0.3,
+            neg_distance_thr=0.6,
+            sample_mode='vote',
+            far_threshold=0.6,
+            near_threshold=0.3,
+            mask_surface_threshold=0.3,
+            label_surface_threshold=0.3,
+            mask_line_threshold=0.3,
+            label_line_threshold=0.3)),
+    test_cfg=dict(
+        rpn=dict(
+            sample_mode='seed',
+            nms_thr=0.25,
+            score_thr=0.05,
+            per_class_proposal=True,
+            use_nms=False),
+        rcnn=dict(
+            sample_mode='seed',
+            nms_thr=0.25,
+            score_thr=0.05,
+            per_class_proposal=True)))

configs/_base_/models/imvotenet.py ADDED Viewed

	@@ -0,0 +1,118 @@

+model = dict(
+    type='ImVoteNet',
+    data_preprocessor=dict(
+        type='Det3DDataPreprocessor',
+        # use caffe img_norm
+        mean=[103.530, 116.280, 123.675],
+        std=[1.0, 1.0, 1.0],
+        bgr_to_rgb=False,
+        pad_size_divisor=32),
+    img_backbone=dict(
+        type='mmdet.ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(0, 1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=False),
+        norm_eval=True,
+        style='caffe'),
+    img_neck=dict(
+        type='mmdet.FPN',
+        in_channels=[256, 512, 1024, 2048],
+        out_channels=256,
+        num_outs=5),
+    img_rpn_head=dict(
+        _scope_='mmdet',
+        type='RPNHead',
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type='AnchorGenerator',
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64]),
+        bbox_coder=dict(
+            type='DeltaXYWHBBoxCoder',
+            target_means=[.0, .0, .0, .0],
+            target_stds=[1.0, 1.0, 1.0, 1.0]),
+        loss_cls=dict(
+            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
+    img_roi_head=dict(
+        _scope_='mmdet',
+        type='StandardRoIHead',
+        bbox_roi_extractor=dict(
+            type='SingleRoIExtractor',
+            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32]),
+        bbox_head=dict(
+            type='Shared2FCBBoxHead',
+            in_channels=256,
+            fc_out_channels=1024,
+            roi_feat_size=7,
+            num_classes=10,
+            bbox_coder=dict(
+                type='DeltaXYWHBBoxCoder',
+                target_means=[0., 0., 0., 0.],
+                target_stds=[0.1, 0.1, 0.2, 0.2]),
+            reg_class_agnostic=False,
+            loss_cls=dict(
+                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
+            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
+    # model training and testing settings
+    train_cfg=dict(
+        _scope_='mmdet',
+        img_rpn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False),
+        img_rpn_proposal=dict(
+            nms_across_levels=False,
+            nms_pre=2000,
+            nms_post=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        img_rcnn=dict(
+            assigner=dict(
+                type='MaxIoUAssigner',
+                pos_iou_thr=0.5,
+                neg_iou_thr=0.5,
+                min_pos_iou=0.5,
+                match_low_quality=False,
+                ignore_iof_thr=-1),
+            sampler=dict(
+                type='RandomSampler',
+                num=512,
+                pos_fraction=0.25,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=True),
+            pos_weight=-1,
+            debug=False)),
+    test_cfg=dict(
+        img_rpn=dict(
+            nms_across_levels=False,
+            nms_pre=1000,
+            nms_post=1000,
+            max_per_img=1000,
+            nms=dict(type='nms', iou_threshold=0.7),
+            min_bbox_size=0),
+        img_rcnn=dict(
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.5),
+            max_per_img=100)))