diff --git a/Pointcept/.gitignore b/Pointcept/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..7288b0a938b724007d2705abdd611bc5967b8e3e
--- /dev/null
+++ b/Pointcept/.gitignore
@@ -0,0 +1,16 @@
+image/
+__pycache__
+**/build/
+**/*.egg-info/
+**/dist/
+*.so
+exp
+weights
+data
+log
+outputs/
+.vscode
+.idea
+*/.DS_Store
+**/*.out
+Dockerfile
diff --git a/Pointcept/LICENSE b/Pointcept/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..ee1fac1b22ae96f38e681900a3181d3e70ac6e4f
--- /dev/null
+++ b/Pointcept/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Pointcept
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Pointcept/README.md b/Pointcept/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..cf84efb36681d48d7f6d7500ff3723d853d3f709
--- /dev/null
+++ b/Pointcept/README.md
@@ -0,0 +1,896 @@
+
+
+
+
+
+
+
+
+
+
+
+
+[![Formatter](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml/badge.svg)](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml)
+
+**Pointcept** is a powerful and flexible codebase for point cloud perception research. It is also an official implementation of the following paper:
+- **Point Transformer V3: Simpler, Faster, Stronger**
+*Xiaoyang Wu, Li Jiang, Peng-Shuai Wang, Zhijian Liu, Xihui Liu, Yu Qiao, Wanli Ouyang, Tong He, Hengshuang Zhao*
+IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 - Oral
+[ Backbone ] [PTv3] - [ [arXiv](https://arxiv.org/abs/2312.10035) ] [ [Bib](https://xywu.me/research/ptv3/bib.txt) ] [ [Project](https://github.com/Pointcept/PointTransformerV3) ] → [here](https://github.com/Pointcept/PointTransformerV3)
+
+- **OA-CNNs: Omni-Adaptive Sparse CNNs for 3D Semantic Segmentation**
+*Bohao Peng, Xiaoyang Wu, Li Jiang, Yukang Chen, Hengshuang Zhao, Zhuotao Tian, Jiaya Jia*
+IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024
+[ Backbone ] [ OA-CNNs ] - [ [arXiv](https://arxiv.org/abs/2403.14418) ] [ [Bib](https://xywu.me/research/oacnns/bib.txt) ] → [here](#oa-cnns)
+
+- **PonderV2: Pave the Way for 3D Foundation Model with A Universal Pre-training Paradigm**
+*Haoyi Zhu\*, Honghui Yang\*, Xiaoyang Wu\*, Di Huang\*, Sha Zhang, Xianglong He, Tong He, Hengshuang Zhao, Chunhua Shen, Yu Qiao, Wanli Ouyang*
+arXiv Preprint 2023
+[ Pretrain ] [PonderV2] - [ [arXiv](https://arxiv.org/abs/2310.08586) ] [ [Bib](https://xywu.me/research/ponderv2/bib.txt) ] [ [Project](https://github.com/OpenGVLab/PonderV2) ] → [here](https://github.com/OpenGVLab/PonderV2)
+
+
+- **Towards Large-scale 3D Representation Learning with Multi-dataset Point Prompt Training**
+*Xiaoyang Wu, Zhuotao Tian, Xin Wen, Bohao Peng, Xihui Liu, Kaicheng Yu, Hengshuang Zhao*
+IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024
+[ Pretrain ] [PPT] - [ [arXiv](https://arxiv.org/abs/2308.09718) ] [ [Bib](https://xywu.me/research/ppt/bib.txt) ] → [here](#point-prompt-training-ppt)
+
+- **Masked Scene Contrast: A Scalable Framework for Unsupervised 3D Representation Learning**
+*Xiaoyang Wu, Xin Wen, Xihui Liu, Hengshuang Zhao*
+IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2023
+[ Pretrain ] [ MSC ] - [ [arXiv](https://arxiv.org/abs/2303.14191) ] [ [Bib](https://xywu.me/research/msc/bib.txt) ] → [here](#masked-scene-contrast-msc)
+
+
+- **Learning Context-aware Classifier for Semantic Segmentation** (3D Part)
+*Zhuotao Tian, Jiequan Cui, Li Jiang, Xiaojuan Qi, Xin Lai, Yixin Chen, Shu Liu, Jiaya Jia*
+AAAI Conference on Artificial Intelligence (**AAAI**) 2023 - Oral
+[ SemSeg ] [ CAC ] - [ [arXiv](https://arxiv.org/abs/2303.11633) ] [ [Bib](https://xywu.me/research/cac/bib.txt) ] [ [2D Part](https://github.com/tianzhuotao/CAC) ] → [here](#context-aware-classifier)
+
+
+- **Point Transformer V2: Grouped Vector Attention and Partition-based Pooling**
+*Xiaoyang Wu, Yixing Lao, Li Jiang, Xihui Liu, Hengshuang Zhao*
+Conference on Neural Information Processing Systems (**NeurIPS**) 2022
+[ Backbone ] [ PTv2 ] - [ [arXiv](https://arxiv.org/abs/2210.05666) ] [ [Bib](https://xywu.me/research/ptv2/bib.txt) ] → [here](#point-transformers)
+
+
+- **Point Transformer**
+*Hengshuang Zhao, Li Jiang, Jiaya Jia, Philip Torr, Vladlen Koltun*
+IEEE International Conference on Computer Vision (**ICCV**) 2021 - Oral
+[ Backbone ] [ PTv1 ] - [ [arXiv](https://arxiv.org/abs/2012.09164) ] [ [Bib](https://hszhao.github.io/papers/iccv21_pointtransformer_bib.txt) ] → [here](#point-transformers)
+
+Additionally, **Pointcept** integrates the following excellent work (contain above):
+Backbone:
+[MinkUNet](https://github.com/NVIDIA/MinkowskiEngine) ([here](#sparseunet)),
+[SpUNet](https://github.com/traveller59/spconv) ([here](#sparseunet)),
+[SPVCNN](https://github.com/mit-han-lab/spvnas) ([here](#spvcnn)),
+[OACNNs](https://arxiv.org/abs/2403.14418) ([here](#oa-cnns)),
+[PTv1](https://arxiv.org/abs/2012.09164) ([here](#point-transformers)),
+[PTv2](https://arxiv.org/abs/2210.05666) ([here](#point-transformers)),
+[PTv3](https://arxiv.org/abs/2312.10035) ([here](#point-transformers)),
+[StratifiedFormer](https://github.com/dvlab-research/Stratified-Transformer) ([here](#stratified-transformer)),
+[OctFormer](https://github.com/octree-nn/octformer) ([here](#octformer)),
+[Swin3D](https://github.com/microsoft/Swin3D) ([here](#swin3d));
+Semantic Segmentation:
+[Mix3d](https://github.com/kumuji/mix3d) ([here](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-spunet-v1m1-0-base.py#L5)),
+[CAC](https://arxiv.org/abs/2303.11633) ([here](#context-aware-classifier));
+Instance Segmentation:
+[PointGroup](https://github.com/dvlab-research/PointGroup) ([here](#pointgroup));
+Pre-training:
+[PointContrast](https://github.com/facebookresearch/PointContrast) ([here](#pointcontrast)),
+[Contrastive Scene Contexts](https://github.com/facebookresearch/ContrastiveSceneContexts) ([here](#contrastive-scene-contexts)),
+[Masked Scene Contrast](https://arxiv.org/abs/2303.14191) ([here](#masked-scene-contrast-msc)),
+[Point Prompt Training](https://arxiv.org/abs/2308.09718) ([here](#point-prompt-training-ppt));
+Datasets:
+[ScanNet](http://www.scan-net.org/) ([here](#scannet-v2)),
+[ScanNet200](http://www.scan-net.org/) ([here](#scannet-v2)),
+[ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) ([here](#scannet)),
+[S3DIS](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1) ([here](#s3dis)),
+[Matterport3D](https://niessner.github.io/Matterport/) ([here](#matterport3d)),
+[ArkitScene](https://github.com/apple/ARKitScenes),
+[Structured3D](https://structured3d-dataset.org/) ([here](#structured3d)),
+[SemanticKITTI](http://www.semantic-kitti.org/) ([here](#semantickitti)),
+[nuScenes](https://www.nuscenes.org/nuscenes) ([here](#nuscenes)),
+[ModelNet40](https://modelnet.cs.princeton.edu/) ([here](#modelnet)),
+[Waymo](https://waymo.com/open/) ([here](#waymo)).
+
+
+## Highlights
+- *May, 2024*: In v1.5.2, we redesigned the default structure for each dataset for better performance. Please **re-preprocess** datasets or **download** our preprocessed datasets from **[here](https://huggingface.co/Pointcept)**.
+- *Apr, 2024*: **PTv3** is selected as one of the 90 **Oral** papers (3.3% accepted papers, 0.78% submissions) by CVPR'24!
+- *Mar, 2024*: We release code for **OA-CNNs**, accepted by CVPR'24. Issue related to **OA-CNNs** can @Pbihao.
+- *Feb, 2024*: **PTv3** and **PPT** are accepted by CVPR'24, another **two** papers by our Pointcept team have also been accepted by CVPR'24 ๐๐๐. We will make them publicly available soon!
+- *Dec, 2023*: **PTv3** is released on arXiv, and the code is available in Pointcept. PTv3 is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios.
+- *Aug, 2023*: **PPT** is released on arXiv. PPT presents a multi-dataset pre-training framework that achieves SOTA performance in both **indoor** and **outdoor** scenarios. It is compatible with various existing pre-training frameworks and backbones. A **pre-release** version of the code is accessible; for those interested, please feel free to contact me directly for access.
+- *Mar, 2023*: We released our codebase, **Pointcept**, a highly potent tool for point cloud representation learning and perception. We welcome new work to join the _Pointcept_ family and highly recommend reading [Quick Start](#quick-start) before starting your trail.
+- *Feb, 2023*: **MSC** and **CeCo** accepted by CVPR 2023. _MSC_ is a highly efficient and effective pretraining framework that facilitates cross-dataset large-scale pretraining, while _CeCo_ is a segmentation method specifically designed for long-tail datasets. Both approaches are compatible with all existing backbone models in our codebase, and we will soon make the code available for public use.
+- *Jan, 2023*: **CAC**, oral work of AAAI 2023, has expanded its 3D result with the incorporation of Pointcept. This addition will allow CAC to serve as a pluggable segmentor within our codebase.
+- *Sep, 2022*: **PTv2** accepted by NeurIPS 2022. It is a continuation of the Point Transformer. The proposed GVA theory can apply to most existing attention mechanisms, while Grid Pooling is also a practical addition to existing pooling methods.
+
+## Citation
+If you find _Pointcept_ useful to your research, please cite our work as encouragement. (เฉญห๊ณโห)เฉญโง
+```
+@misc{pointcept2023,
+ title={Pointcept: A Codebase for Point Cloud Perception Research},
+ author={Pointcept Contributors},
+ howpublished = {\url{https://github.com/Pointcept/Pointcept}},
+ year={2023}
+}
+```
+
+## Overview
+
+- [Installation](#installation)
+- [Data Preparation](#data-preparation)
+- [Quick Start](#quick-start)
+- [Model Zoo](#model-zoo)
+- [Citation](#citation)
+- [Acknowledgement](#acknowledgement)
+
+## Installation
+
+### Requirements
+- Ubuntu: 18.04 and above.
+- CUDA: 11.3 and above.
+- PyTorch: 1.10.0 and above.
+
+### Conda Environment
+
+```bash
+conda create -n pointcept python=3.8 -y
+conda activate pointcept
+conda install ninja -y
+# Choose version you want here: https://pytorch.org/get-started/previous-versions/
+conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch -y
+conda install h5py pyyaml -c anaconda -y
+conda install sharedarray tensorboard tensorboardx yapf addict einops scipy plyfile termcolor timm -c conda-forge -y
+conda install pytorch-cluster pytorch-scatter pytorch-sparse -c pyg -y
+pip install torch-geometric
+
+# spconv (SparseUNet)
+# refer https://github.com/traveller59/spconv
+pip install spconv-cu113
+
+# PPT (clip)
+pip install ftfy regex tqdm
+pip install git+https://github.com/openai/CLIP.git
+
+# PTv1 & PTv2 or precise eval
+cd libs/pointops
+# usual
+python setup.py install
+# docker & multi GPU arch
+TORCH_CUDA_ARCH_LIST="ARCH LIST" python setup.py install
+# e.g. 7.5: RTX 3000; 8.0: a100 More available in: https://developer.nvidia.com/cuda-gpus
+TORCH_CUDA_ARCH_LIST="7.5 8.0" python setup.py install
+cd ../..
+
+# Open3D (visualization, optional)
+pip install open3d
+```
+
+## Data Preparation
+
+### ScanNet v2
+
+The preprocessing supports semantic and instance segmentation for both `ScanNet20`, `ScanNet200`, and `ScanNet Data Efficient`.
+- Download the [ScanNet](http://www.scan-net.org/) v2 dataset.
+- Run preprocessing code for raw ScanNet as follows:
+
+ ```bash
+ # RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset.
+ # PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset (output dir).
+ python pointcept/datasets/preprocessing/scannet/preprocess_scannet.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_DIR}
+ ```
+- (Optional) Download ScanNet Data Efficient files:
+ ```bash
+ # download-scannet.py is the official download script
+ # or follow instructions here: https://kaldir.vc.in.tum.de/scannet_benchmark/data_efficient/documentation#download
+ python download-scannet.py --data_efficient -o ${RAW_SCANNET_DIR}
+ # unzip downloads
+ cd ${RAW_SCANNET_DIR}/tasks
+ unzip limited-annotation-points.zip
+ unzip limited-reconstruction-scenes.zip
+ # copy files to processed dataset folder
+ mkdir ${PROCESSED_SCANNET_DIR}/tasks
+ cp -r ${RAW_SCANNET_DIR}/tasks/points ${PROCESSED_SCANNET_DIR}/tasks
+ cp -r ${RAW_SCANNET_DIR}/tasks/scenes ${PROCESSED_SCANNET_DIR}/tasks
+ ```
+- (Alternative) Our preprocess data can be directly downloaded [[here](https://huggingface.co/datasets/Pointcept/scannet-compressed)], please agree the official license before download it.
+
+- Link processed dataset to codebase:
+ ```bash
+ # PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset.
+ mkdir data
+ ln -s ${PROCESSED_SCANNET_DIR} ${CODEBASE_DIR}/data/scannet
+ ```
+
+### ScanNet++
+- Download the [ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) dataset.
+- Run preprocessing code for raw ScanNet++ as follows:
+ ```bash
+ # RAW_SCANNETPP_DIR: the directory of downloaded ScanNet++ raw dataset.
+ # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir).
+ # NUM_WORKERS: the number of workers for parallel preprocessing.
+ python pointcept/datasets/preprocessing/scannetpp/preprocess_scannetpp.py --dataset_root ${RAW_SCANNETPP_DIR} --output_root ${PROCESSED_SCANNETPP_DIR} --num_workers ${NUM_WORKERS}
+ ```
+- Sampling and chunking large point cloud data in train/val split as follows (only used for training):
+ ```bash
+ # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir).
+ # NUM_WORKERS: the number of workers for parallel preprocessing.
+ python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split train --num_workers ${NUM_WORKERS}
+ python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split val --num_workers ${NUM_WORKERS}
+ ```
+- (Alternative) Our preprocess data can be directly downloaded [[here](https://huggingface.co/datasets/Pointcept/scannetpp-compressed)], please agree the official license before download it.
+- Link processed dataset to codebase:
+ ```bash
+ # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet dataset.
+ mkdir data
+ ln -s ${PROCESSED_SCANNETPP_DIR} ${CODEBASE_DIR}/data/scannetpp
+ ```
+
+### S3DIS
+
+- Download S3DIS data by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1). Download the `Stanford3dDataset_v1.2.zip` file and unzip it.
+- Fix error in `Area_5/office_19/Annotations/ceiling` Line 323474 (103.0๏ฟฝ0000 => 103.000000).
+- (Optional) Download Full 2D-3D S3DIS dataset (no XYZ) from [here](https://github.com/alexsax/2D-3D-Semantics) for parsing normal.
+- Run preprocessing code for S3DIS as follows:
+
+ ```bash
+ # S3DIS_DIR: the directory of downloaded Stanford3dDataset_v1.2 dataset.
+ # RAW_S3DIS_DIR: the directory of Stanford2d3dDataset_noXYZ dataset. (optional, for parsing normal)
+ # PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset (output dir).
+
+ # S3DIS without aligned angle
+ python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR}
+ # S3DIS with aligned angle
+ python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --align_angle
+ # S3DIS with normal vector (recommended, normal is helpful)
+ python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --parse_normal
+ python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --align_angle --parse_normal
+ ```
+
+- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/s3dis-compressed
+)] (with normal vector and aligned angle), please agree with the official license before downloading it.
+
+- Link processed dataset to codebase.
+ ```bash
+ # PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset.
+ mkdir data
+ ln -s ${PROCESSED_S3DIS_DIR} ${CODEBASE_DIR}/data/s3dis
+ ```
+### Structured3D
+
+- Download Structured3D panorama related and perspective (full) related zip files by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLSc0qtvh4vHSoZaW6UvlXYy79MbcGdZfICjh4_t4bYofQIVIdw/viewform?pli=1) (no need to unzip them).
+- Organize all downloaded zip file in one folder (`${STRUCT3D_DIR}`).
+- Run preprocessing code for Structured3D as follows:
+ ```bash
+ # STRUCT3D_DIR: the directory of downloaded Structured3D dataset.
+ # PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir).
+ # NUM_WORKERS: Number for workers for preprocessing, default same as cpu count (might OOM).
+ export PYTHONPATH=./
+ python pointcept/datasets/preprocessing/structured3d/preprocess_structured3d.py --dataset_root ${STRUCT3D_DIR} --output_root ${PROCESSED_STRUCT3D_DIR} --num_workers ${NUM_WORKERS} --grid_size 0.01 --fuse_prsp --fuse_pano
+ ```
+Following the instruction of [Swin3D](https://arxiv.org/abs/2304.06906), we keep 25 categories with frequencies of more than 0.001, out of the original 40 categories.
+
+[//]: # (- (Alternative) Our preprocess data can also be downloaded [[here]()], please agree the official license before download it.)
+
+- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/structured3d-compressed
+)] (with perspective views and panorama view, 471.7G after unzipping), please agree the official license before download it.
+
+- Link processed dataset to codebase.
+ ```bash
+ # PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir).
+ mkdir data
+ ln -s ${PROCESSED_STRUCT3D_DIR} ${CODEBASE_DIR}/data/structured3d
+ ```
+### Matterport3D
+- Follow [this page](https://niessner.github.io/Matterport/#download) to request access to the dataset.
+- Download the "region_segmentation" type, which represents the division of a scene into individual rooms.
+ ```bash
+ # download-mp.py is the official download script
+ # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset.
+ python download-mp.py -o {MATTERPORT3D_DIR} --type region_segmentations
+ ```
+- Unzip the region_segmentations data
+ ```bash
+ # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset.
+ python pointcept/datasets/preprocessing/matterport3d/unzip_matterport3d_region_segmentation.py --dataset_root {MATTERPORT3D_DIR}
+ ```
+- Run preprocessing code for Matterport3D as follows:
+ ```bash
+ # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset.
+ # PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir).
+ # NUM_WORKERS: the number of workers for this preprocessing.
+ python pointcept/datasets/preprocessing/matterport3d/preprocess_matterport3d_mesh.py --dataset_root ${MATTERPORT3D_DIR} --output_root ${PROCESSED_MATTERPORT3D_DIR} --num_workers ${NUM_WORKERS}
+ ```
+- Link processed dataset to codebase.
+ ```bash
+ # PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir).
+ mkdir data
+ ln -s ${PROCESSED_MATTERPORT3D_DIR} ${CODEBASE_DIR}/data/matterport3d
+ ```
+
+Following the instruction of [OpenRooms](https://github.com/ViLab-UCSD/OpenRooms), we remapped Matterport3D's categories to ScanNet 20 semantic categories with the addition of a ceiling category.
+* (Alternative) Our preprocess data can also be downloaded [here](https://huggingface.co/datasets/Pointcept/matterport3d-compressed), please agree the official license before download it.
+
+### SemanticKITTI
+- Download [SemanticKITTI](http://www.semantic-kitti.org/dataset.html#download) dataset.
+- Link dataset to codebase.
+ ```bash
+ # SEMANTIC_KITTI_DIR: the directory of SemanticKITTI dataset.
+ # |- SEMANTIC_KITTI_DIR
+ # |- dataset
+ # |- sequences
+ # |- 00
+ # |- 01
+ # |- ...
+
+ mkdir -p data
+ ln -s ${SEMANTIC_KITTI_DIR} ${CODEBASE_DIR}/data/semantic_kitti
+ ```
+
+### nuScenes
+- Download the official [NuScene](https://www.nuscenes.org/nuscenes#download) dataset (with Lidar Segmentation) and organize the downloaded files as follows:
+ ```bash
+ NUSCENES_DIR
+ โโโ samples
+ โโโ sweeps
+ โโโ lidarseg
+ ...
+ โโโ v1.0-trainval
+ โโโ v1.0-test
+ ```
+- Run information preprocessing code (modified from OpenPCDet) for nuScenes as follows:
+ ```bash
+ # NUSCENES_DIR: the directory of downloaded nuScenes dataset.
+ # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir).
+ # MAX_SWEEPS: Max number of sweeps. Default: 10.
+ pip install nuscenes-devkit pyquaternion
+ python pointcept/datasets/preprocessing/nuscenes/preprocess_nuscenes_info.py --dataset_root ${NUSCENES_DIR} --output_root ${PROCESSED_NUSCENES_DIR} --max_sweeps ${MAX_SWEEPS} --with_camera
+ ```
+- (Alternative) Our preprocess nuScenes information data can also be downloaded [[here](
+https://huggingface.co/datasets/Pointcept/nuscenes-compressed)] (only processed information, still need to download raw dataset and link to the folder), please agree the official license before download it.
+
+- Link raw dataset to processed NuScene dataset folder:
+ ```bash
+ # NUSCENES_DIR: the directory of downloaded nuScenes dataset.
+ # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir).
+ ln -s ${NUSCENES_DIR} {PROCESSED_NUSCENES_DIR}/raw
+ ```
+ then the processed nuscenes folder is organized as follows:
+ ```bash
+ nuscene
+ |โโ raw
+ โโโ samples
+ โโโ sweeps
+ โโโ lidarseg
+ ...
+ โโโ v1.0-trainval
+ โโโ v1.0-test
+ |โโ info
+ ```
+
+- Link processed dataset to codebase.
+ ```bash
+ # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir).
+ mkdir data
+ ln -s ${PROCESSED_NUSCENES_DIR} ${CODEBASE_DIR}/data/nuscenes
+ ```
+
+### Waymo
+- Download the official [Waymo](https://waymo.com/open/download/) dataset (v1.4.3) and organize the downloaded files as follows:
+ ```bash
+ WAYMO_RAW_DIR
+ โโโ training
+ โโโ validation
+ โโโ testing
+ ```
+- Install the following dependence:
+ ```bash
+ # If shows "No matching distribution found", download whl directly from Pypi and install the package.
+ conda create -n waymo python=3.10 -y
+ conda activate waymo
+ pip install waymo-open-dataset-tf-2-12-0
+ ```
+- Run the preprocessing code as follows:
+ ```bash
+ # WAYMO_DIR: the directory of the downloaded Waymo dataset.
+ # PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir).
+ # NUM_WORKERS: num workers for preprocessing
+ python pointcept/datasets/preprocessing/waymo/preprocess_waymo.py --dataset_root ${WAYMO_DIR} --output_root ${PROCESSED_WAYMO_DIR} --splits training validation --num_workers ${NUM_WORKERS}
+ ```
+
+- Link processed dataset to the codebase.
+ ```bash
+ # PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir).
+ mkdir data
+ ln -s ${PROCESSED_WAYMO_DIR} ${CODEBASE_DIR}/data/waymo
+ ```
+
+### ModelNet
+- Download [modelnet40_normal_resampled.zip](https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip) and unzip
+- Link dataset to the codebase.
+ ```bash
+ mkdir -p data
+ ln -s ${MODELNET_DIR} ${CODEBASE_DIR}/data/modelnet40_normal_resampled
+ ```
+
+## Quick Start
+
+### Training
+**Train from scratch.** The training processing is based on configs in `configs` folder.
+The training script will generate an experiment folder in `exp` folder and backup essential code in the experiment folder.
+Training config, log, tensorboard, and checkpoints will also be saved into the experiment folder during the training process.
+```bash
+export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
+# Script (Recommended)
+sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME}
+# Direct
+export PYTHONPATH=./
+python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH}
+```
+
+For example:
+```bash
+# By script (Recommended)
+# -p is default set as python and can be ignored
+sh scripts/train.sh -p python -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
+# Direct
+export PYTHONPATH=./
+python tools/train.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base
+```
+**Resume training from checkpoint.** If the training process is interrupted by accident, the following script can resume training from a given checkpoint.
+```bash
+export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
+# Script (Recommended)
+# simply add "-r true"
+sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME} -r true
+# Direct
+export PYTHONPATH=./
+python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} resume=True weight=${CHECKPOINT_PATH}
+```
+
+### Testing
+During training, model evaluation is performed on point clouds after grid sampling (voxelization), providing an initial assessment of model performance. However, to obtain precise evaluation results, testing is **essential**. The testing process involves subsampling a dense point cloud into a sequence of voxelized point clouds, ensuring comprehensive coverage of all points. These sub-results are then predicted and collected to form a complete prediction of the entire point cloud. This approach yields higher evaluation results compared to simply mapping/interpolating the prediction. In addition, our testing code supports TTA (test time augmentation) testing, which further enhances the stability of evaluation performance.
+
+```bash
+# By script (Based on experiment folder created by training script)
+sh scripts/test.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -n ${EXP_NAME} -w ${CHECKPOINT_NAME}
+# Direct
+export PYTHONPATH=./
+python tools/test.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} weight=${CHECKPOINT_PATH}
+```
+For example:
+```bash
+# By script (Based on experiment folder created by training script)
+# -p is default set as python and can be ignored
+# -w is default set as model_best and can be ignored
+sh scripts/test.sh -p python -d scannet -n semseg-pt-v2m2-0-base -w model_best
+# Direct
+export PYTHONPATH=./
+python tools/test.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base weight=exp/scannet/semseg-pt-v2m2-0-base/model/model_best.pth
+```
+
+The TTA can be disabled by replace `data.test.test_cfg.aug_transform = [...]` with:
+
+```python
+data = dict(
+ train = dict(...),
+ val = dict(...),
+ test = dict(
+ ...,
+ test_cfg = dict(
+ ...,
+ aug_transform = [
+ [dict(type="RandomRotateTargetAngle", angle=[0], axis="z", center=[0, 0, 0], p=1)]
+ ]
+ )
+ )
+)
+```
+
+### Offset
+`Offset` is the separator of point clouds in batch data, and it is similar to the concept of `Batch` in PyG.
+A visual illustration of batch and offset is as follows:
+
+
+
+
+
+
+
+
+
+
+
+
+## Model Zoo
+### 1. Backbones and Semantic Segmentation
+#### SparseUNet
+
+_Pointcept_ provides `SparseUNet` implemented by `SpConv` and `MinkowskiEngine`. The SpConv version is recommended since SpConv is easy to install and faster than MinkowskiEngine. Meanwhile, SpConv is also widely applied in outdoor perception.
+
+- **SpConv (recommend)**
+
+The SpConv version `SparseUNet` in the codebase was fully rewrite from `MinkowskiEngine` version, example running script is as follows:
+
+```bash
+# ScanNet val
+sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
+# ScanNet200
+sh scripts/train.sh -g 4 -d scannet200 -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
+# S3DIS
+sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
+# S3DIS (with normal)
+sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-cn-base -n semseg-spunet-v1m1-0-cn-base
+# SemanticKITTI
+sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
+# nuScenes
+sh scripts/train.sh -g 4 -d nuscenes -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
+# ModelNet40
+sh scripts/train.sh -g 2 -d modelnet40 -c cls-spunet-v1m1-0-base -n cls-spunet-v1m1-0-base
+
+# ScanNet Data Efficient
+sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la20 -n semseg-spunet-v1m1-2-efficient-la20
+sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la50 -n semseg-spunet-v1m1-2-efficient-la50
+sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la100 -n semseg-spunet-v1m1-2-efficient-la100
+sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la200 -n semseg-spunet-v1m1-2-efficient-la200
+sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr1 -n semseg-spunet-v1m1-2-efficient-lr1
+sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr5 -n semseg-spunet-v1m1-2-efficient-lr5
+sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr10 -n semseg-spunet-v1m1-2-efficient-lr10
+sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr20 -n semseg-spunet-v1m1-2-efficient-lr20
+
+# Profile model run time
+sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-enable-profiler -n semseg-spunet-v1m1-0-enable-profiler
+```
+
+- **MinkowskiEngine**
+
+The MinkowskiEngine version `SparseUNet` in the codebase was modified from the original MinkowskiEngine repo, and example running scripts are as follows:
+1. Install MinkowskiEngine, refer https://github.com/NVIDIA/MinkowskiEngine
+2. Training with the following example scripts:
+```bash
+# Uncomment "# from .sparse_unet import *" in "pointcept/models/__init__.py"
+# Uncomment "# from .mink_unet import *" in "pointcept/models/sparse_unet/__init__.py"
+# ScanNet
+sh scripts/train.sh -g 4 -d scannet -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
+# ScanNet200
+sh scripts/train.sh -g 4 -d scannet200 -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
+# S3DIS
+sh scripts/train.sh -g 4 -d s3dis -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
+# SemanticKITTI
+sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
+```
+
+#### OA-CNNs
+Introducing Omni-Adaptive 3D CNNs (**OA-CNNs**), a family of networks that integrates a lightweight module to greatly enhance the adaptivity of sparse CNNs at minimal computational cost. Without any self-attention modules, **OA-CNNs** favorably surpass point transformers in terms of accuracy in both indoor and outdoor scenes, with much less latency and memory cost. Issue related to **OA-CNNs** can @Pbihao.
+```bash
+# ScanNet
+sh scripts/train.sh -g 4 -d scannet -c semseg-oacnns-v1m1-0-base -n semseg-oacnns-v1m1-0-base
+```
+
+#### Point Transformers
+- **PTv3**
+
+[PTv3](https://arxiv.org/abs/2312.10035) is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios. The full PTv3 relies on FlashAttention, while FlashAttention relies on CUDA 11.6 and above, make sure your local Pointcept environment satisfies the requirements.
+
+If you can not upgrade your local environment to satisfy the requirements (CUDA >= 11.6), then you can disable FlashAttention by setting the model parameter `enable_flash` to `false` and reducing the `enc_patch_size` and `dec_patch_size` to a level (e.g. 128).
+
+FlashAttention force disables RPE and forces the accuracy reduced to fp16. If you require these features, please disable `enable_flash` and adjust `enable_rpe`, `upcast_attention` and`upcast_softmax`.
+
+Detailed instructions and experiment records (containing weights) are available on the [project repository](https://github.com/Pointcept/PointTransformerV3). Example running scripts are as follows:
+```bash
+# Scratched ScanNet
+sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
+# PPT joint training (ScanNet + Structured3D) and evaluate in ScanNet
+sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme
+
+# Scratched ScanNet200
+sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
+# Fine-tuning from PPT joint training (ScanNet + Structured3D) with ScanNet200
+# PTV3_PPT_WEIGHT_PATH: Path to model weight trained by PPT multi-dataset joint training
+# e.g. exp/scannet/semseg-pt-v3m1-1-ppt-extreme/model/model_best.pth
+sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-1-ppt-ft -n semseg-pt-v3m1-1-ppt-ft -w ${PTV3_PPT_WEIGHT_PATH}
+
+# Scratched ScanNet++
+sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
+# Scratched ScanNet++ test
+sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-1-submit -n semseg-pt-v3m1-1-submit
+
+
+# Scratched S3DIS
+sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
+# an example for disbale flash_attention and enable rpe.
+sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-1-rpe -n semseg-pt-v3m1-0-rpe
+# PPT joint training (ScanNet + S3DIS + Structured3D) and evaluate in ScanNet
+sh scripts/train.sh -g 8 -d s3dis -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme
+# S3DIS 6-fold cross validation
+# 1. The default configs are evaluated on Area_5, modify the "data.train.split", "data.val.split", and "data.test.split" to make the config evaluated on Area_1 ~ Area_6 respectively.
+# 2. Train and evaluate the model on each split of areas and gather result files located in "exp/s3dis/EXP_NAME/result/Area_x.pth" in one single folder, noted as RECORD_FOLDER.
+# 3. Run the following script to get S3DIS 6-fold cross validation performance:
+export PYTHONPATH=./
+python tools/test_s3dis_6fold.py --record_root ${RECORD_FOLDER}
+
+# Scratched nuScenes
+sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
+# Scratched Waymo
+sh scripts/train.sh -g 4 -d waymo -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
+
+# More configs and exp records for PTv3 will be available soon.
+```
+
+Indoor semantic segmentation
+| Model | Benchmark | Additional Data | Num GPUs | Val mIoU | Config | Tensorboard | Exp Record |
+| :---: | :---: |:---------------:| :---: | :---: | :---: | :---: | :---: |
+| PTv3 | ScanNet | ✗ | 4 | 77.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-0-base) |
+| PTv3 + PPT | ScanNet | ✓ | 8 | 78.5% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-1-ppt-extreme) |
+| PTv3 | ScanNet200 | ✗ | 4 | 35.3% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet200/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) |[link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet200-semseg-pt-v3m1-0-base)|
+| PTv3 + PPT | ScanNet200 | ✓ (f.t.) | 4 | | | | |
+| PTv3 | S3DIS (Area5) | ✗ | 4 | 73.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-0-rpe.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-0-rpe) |
+| PTv3 + PPT | S3DIS (Area5) | ✓ | 8 | 75.4% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-1-ppt-extreme) |
+
+Outdoor semantic segmentation
+| Model | Benchmark | Additional Data | Num GPUs | Val mIoU | Config | Tensorboard | Exp Record |
+| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
+| PTv3 | nuScenes | ✗ | 4 | 80.3 | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/nuscenes/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard)|[link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/nuscenes-semseg-pt-v3m1-0-base) |
+| PTv3 + PPT | nuScenes | ✓ | 8 | | | | |
+| PTv3 | SemanticKITTI | ✗ | 4 | | | | |
+| PTv3 + PPT | SemanticKITTI | ✓ | 8 | | | | |
+| PTv3 | Waymo | ✗ | 4 | 71.2 | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/waymo/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/waymo-semseg-pt-v3m1-0-base) (log only) |
+| PTv3 + PPT | Waymo | ✓ | 8 | | | | |
+
+_**\*Released model weights are trained for v1.5.1, weights for v1.5.2 and later is still ongoing.**_
+
+- **PTv2 mode2**
+
+The original PTv2 was trained on 4 * RTX a6000 (48G memory). Even enabling AMP, the memory cost of the original PTv2 is slightly larger than 24G. Considering GPUs with 24G memory are much more accessible, I tuned the PTv2 on the latest Pointcept and made it runnable on 4 * RTX 3090 machines.
+
+`PTv2 Mode2` enables AMP and disables _Position Encoding Multiplier_ & _Grouped Linear_. During our further research, we found that precise coordinates are not necessary for point cloud understanding (Replacing precise coordinates with grid coordinates doesn't influence the performance. Also, SparseUNet is an example). As for Grouped Linear, my implementation of Grouped Linear seems to cost more memory than the Linear layer provided by PyTorch. Benefiting from the codebase and better parameter tuning, we also relieve the overfitting problem. The reproducing performance is even better than the results reported in our paper.
+
+Example running scripts are as follows:
+
+```bash
+# ptv2m2: PTv2 mode2, disable PEM & Grouped Linear, GPU memory cost < 24G (recommend)
+# ScanNet
+sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
+sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-3-lovasz -n semseg-pt-v2m2-3-lovasz
+
+# ScanNet test
+sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit
+# ScanNet200
+sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
+# ScanNet++
+sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
+# ScanNet++ test
+sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit
+# S3DIS
+sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
+# SemanticKITTI
+sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
+# nuScenes
+sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
+```
+
+- **PTv2 mode1**
+
+`PTv2 mode1` is the original PTv2 we reported in our paper, example running scripts are as follows:
+
+```bash
+# ptv2m1: PTv2 mode1, Original PTv2, GPU memory cost > 24G
+# ScanNet
+sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base
+# ScanNet200
+sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base
+# S3DIS
+sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base
+```
+
+- **PTv1**
+
+The original PTv1 is also available in our Pointcept codebase. I haven't run PTv1 for a long time, but I have ensured that the example running script works well.
+
+```bash
+# ScanNet
+sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base
+# ScanNet200
+sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base
+# S3DIS
+sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base
+```
+
+
+#### Stratified Transformer
+1. Additional requirements:
+```bash
+pip install torch-points3d
+# Fix dependence, caused by installing torch-points3d
+pip uninstall SharedArray
+pip install SharedArray==3.2.1
+
+cd libs/pointops2
+python setup.py install
+cd ../..
+```
+2. Uncomment `# from .stratified_transformer import *` in `pointcept/models/__init__.py`.
+3. Refer [Optional Installation](installation) to install dependence.
+4. Training with the following example scripts:
+```bash
+# stv1m1: Stratified Transformer mode1, Modified from the original Stratified Transformer code.
+# PTv2m2: Stratified Transformer mode2, My rewrite version (recommend).
+
+# ScanNet
+sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined
+sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m1-0-origin -n semseg-st-v1m1-0-origin
+# ScanNet200
+sh scripts/train.sh -g 4 -d scannet200 -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined
+# S3DIS
+sh scripts/train.sh -g 4 -d s3dis -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined
+```
+
+#### SPVCNN
+`SPVCNN` is a baseline model of [SPVNAS](https://github.com/mit-han-lab/spvnas), it is also a practical baseline for outdoor datasets.
+1. Install torchsparse:
+```bash
+# refer https://github.com/mit-han-lab/torchsparse
+# install method without sudo apt install
+conda install google-sparsehash -c bioconda
+export C_INCLUDE_PATH=${CONDA_PREFIX}/include:$C_INCLUDE_PATH
+export CPLUS_INCLUDE_PATH=${CONDA_PREFIX}/include:CPLUS_INCLUDE_PATH
+pip install --upgrade git+https://github.com/mit-han-lab/torchsparse.git
+```
+2. Training with the following example scripts:
+```bash
+# SemanticKITTI
+sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-spvcnn-v1m1-0-base -n semseg-spvcnn-v1m1-0-base
+```
+
+#### OctFormer
+OctFormer from _OctFormer: Octree-based Transformers for 3D Point Clouds_.
+1. Additional requirements:
+```bash
+cd libs
+git clone https://github.com/octree-nn/dwconv.git
+pip install ./dwconv
+pip install ocnn
+```
+2. Uncomment `# from .octformer import *` in `pointcept/models/__init__.py`.
+2. Training with the following example scripts:
+```bash
+# ScanNet
+sh scripts/train.sh -g 4 -d scannet -c semseg-octformer-v1m1-0-base -n semseg-octformer-v1m1-0-base
+```
+
+#### Swin3D
+Swin3D from _Swin3D: A Pretrained Transformer Backbone for 3D Indoor Scene Understanding_.
+1. Additional requirements:
+```bash
+# 1. Install MinkEngine v0.5.4, follow readme in https://github.com/NVIDIA/MinkowskiEngine;
+# 2. Install Swin3D, mainly for cuda operation:
+cd libs
+git clone https://github.com/microsoft/Swin3D.git
+cd Swin3D
+pip install ./
+```
+2. Uncomment `# from .swin3d import *` in `pointcept/models/__init__.py`.
+3. Pre-Training with the following example scripts (Structured3D preprocessing refer [here](#structured3d)):
+```bash
+# Structured3D + Swin-S
+sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small
+# Structured3D + Swin-L
+sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large
+
+# Addition
+# Structured3D + SpUNet
+sh scripts/train.sh -g 4 -d structured3d -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
+# Structured3D + PTv2
+sh scripts/train.sh -g 4 -d structured3d -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
+```
+4. Fine-tuning with the following example scripts:
+```bash
+# ScanNet + Swin-S
+sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small
+# ScanNet + Swin-L
+sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large
+
+# S3DIS + Swin-S (here we provide config support S3DIS normal vector)
+sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small
+# S3DIS + Swin-L (here we provide config support S3DIS normal vector)
+sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large
+```
+
+#### Context-Aware Classifier
+`Context-Aware Classifier` is a segmentor that can further boost the performance of each backbone, as a replacement for `Default Segmentor`. Training with the following example scripts:
+```bash
+# ScanNet
+sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base
+sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz
+sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz
+
+# ScanNet200
+sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base
+sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz
+sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz
+```
+
+
+### 2. Instance Segmentation
+#### PointGroup
+[PointGroup](https://github.com/dvlab-research/PointGroup) is a baseline framework for point cloud instance segmentation.
+1. Additional requirements:
+```bash
+conda install -c bioconda google-sparsehash
+cd libs/pointgroup_ops
+python setup.py install --include_dirs=${CONDA_PREFIX}/include
+cd ../..
+```
+2. Uncomment `# from .point_group import *` in `pointcept/models/__init__.py`.
+3. Training with the following example scripts:
+```bash
+# ScanNet
+sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base
+# S3DIS
+sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base
+```
+
+### 3. Pre-training
+#### Masked Scene Contrast (MSC)
+1. Pre-training with the following example scripts:
+```bash
+# ScanNet
+sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-0-spunet-base -n pretrain-msc-v1m1-0-spunet-base
+```
+
+2. Fine-tuning with the following example scripts:
+enable PointGroup ([here](#pointgroup)) before fine-tuning on instance segmentation task.
+```bash
+# ScanNet20 Semantic Segmentation
+sh scripts/train.sh -g 8 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c semseg-spunet-v1m1-4-ft -n semseg-msc-v1m1-0f-spunet-base
+# ScanNet20 Instance Segmentation (enable PointGroup before running the script)
+sh scripts/train.sh -g 4 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-msc-v1m1-0f-pointgroup-spunet-base
+```
+3. Example log and weight: [[Pretrain](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EYvNV4XUJ_5Mlk-g15RelN4BW_P8lVBfC_zhjC_BlBDARg?e=UoGFWH)] [[Semseg](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EQkDiv5xkOFKgCpGiGtAlLwBon7i8W6my3TIbGVxuiTttQ?e=tQFnbr)]
+
+#### Point Prompt Training (PPT)
+PPT presents a multi-dataset pre-training framework, and it is compatible with various existing pre-training frameworks and backbones.
+1. PPT supervised joint training with the following example scripts:
+```bash
+# ScanNet + Structured3d, validate on ScanNet (S3DIS might cause long data time, w/o S3DIS for a quick validation) >= 3090 * 8
+sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-0-sc-st-spunet -n semseg-ppt-v1m1-0-sc-st-spunet
+sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-1-sc-st-spunet-submit -n semseg-ppt-v1m1-1-sc-st-spunet-submit
+# ScanNet + S3DIS + Structured3d, validate on S3DIS (>= a100 * 8)
+sh scripts/train.sh -g 8 -d s3dis -c semseg-ppt-v1m1-0-s3-sc-st-spunet -n semseg-ppt-v1m1-0-s3-sc-st-spunet
+# SemanticKITTI + nuScenes + Waymo, validate on SemanticKITTI (bs12 >= 3090 * 4 >= 3090 * 8, v1m1-0 is still on tuning)
+sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet
+sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-0-sk-nu-wa-spunet -n semseg-ppt-v1m2-0-sk-nu-wa-spunet
+sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit -n semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit
+# SemanticKITTI + nuScenes + Waymo, validate on nuScenes (bs12 >= 3090 * 4; bs24 >= 3090 * 8, v1m1-0 is still on tuning))
+sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet
+sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-0-nu-sk-wa-spunet -n semseg-ppt-v1m2-0-nu-sk-wa-spunet
+sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit -n semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit
+```
+
+#### PointContrast
+1. Preprocess and link ScanNet-Pair dataset (pair-wise matching with ScanNet raw RGB-D frame, ~1.5T):
+```bash
+# RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset.
+# PROCESSED_SCANNET_PAIR_DIR: the directory of processed ScanNet pair dataset (output dir).
+python pointcept/datasets/preprocessing/scannet/scannet_pair/preprocess.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_PAIR_DIR}
+ln -s ${PROCESSED_SCANNET_PAIR_DIR} ${CODEBASE_DIR}/data/scannet
+```
+2. Pre-training with the following example scripts:
+```bash
+# ScanNet
+sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-1-spunet-pointcontrast -n pretrain-msc-v1m1-1-spunet-pointcontrast
+```
+3. Fine-tuning refer [MSC](#masked-scene-contrast-msc).
+
+#### Contrastive Scene Contexts
+1. Preprocess and link ScanNet-Pair dataset (refer [PointContrast](#pointcontrast)):
+2. Pre-training with the following example scripts:
+```bash
+# ScanNet
+sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m2-0-spunet-csc -n pretrain-msc-v1m2-0-spunet-csc
+```
+3. Fine-tuning refer [MSC](#masked-scene-contrast-msc).
+
+## Acknowledgement
+_Pointcept_ is designed by [Xiaoyang](https://xywu.me/), named by [Yixing](https://github.com/yxlao) and the logo is created by [Yuechen](https://julianjuaner.github.io/). It is derived from [Hengshuang](https://hszhao.github.io/)'s [Semseg](https://github.com/hszhao/semseg) and inspirited by several repos, e.g., [MinkowskiEngine](https://github.com/NVIDIA/MinkowskiEngine), [pointnet2](https://github.com/charlesq34/pointnet2), [mmcv](https://github.com/open-mmlab/mmcv/tree/master/mmcv), and [Detectron2](https://github.com/facebookresearch/detectron2).
diff --git a/Pointcept/configs/_base_/dataset/scannetpp.py b/Pointcept/configs/_base_/dataset/scannetpp.py
new file mode 100644
index 0000000000000000000000000000000000000000..926850c22981b88f2b56f26507a7a1693e00800b
--- /dev/null
+++ b/Pointcept/configs/_base_/dataset/scannetpp.py
@@ -0,0 +1,104 @@
+data = dict(
+ names=[
+ "wall",
+ "ceiling",
+ "floor",
+ "table",
+ "door",
+ "ceiling lamp",
+ "cabinet",
+ "blinds",
+ "curtain",
+ "chair",
+ "storage cabinet",
+ "office chair",
+ "bookshelf",
+ "whiteboard",
+ "window",
+ "box",
+ "window frame",
+ "monitor",
+ "shelf",
+ "doorframe",
+ "pipe",
+ "heater",
+ "kitchen cabinet",
+ "sofa",
+ "windowsill",
+ "bed",
+ "shower wall",
+ "trash can",
+ "book",
+ "plant",
+ "blanket",
+ "tv",
+ "computer tower",
+ "kitchen counter",
+ "refrigerator",
+ "jacket",
+ "electrical duct",
+ "sink",
+ "bag",
+ "picture",
+ "pillow",
+ "towel",
+ "suitcase",
+ "backpack",
+ "crate",
+ "keyboard",
+ "rack",
+ "toilet",
+ "paper",
+ "printer",
+ "poster",
+ "painting",
+ "microwave",
+ "board",
+ "shoes",
+ "socket",
+ "bottle",
+ "bucket",
+ "cushion",
+ "basket",
+ "shoe rack",
+ "telephone",
+ "file folder",
+ "cloth",
+ "blind rail",
+ "laptop",
+ "plant pot",
+ "exhaust fan",
+ "cup",
+ "coat hanger",
+ "light switch",
+ "speaker",
+ "table lamp",
+ "air vent",
+ "clothes hanger",
+ "kettle",
+ "smoke detector",
+ "container",
+ "power strip",
+ "slippers",
+ "paper bag",
+ "mouse",
+ "cutting board",
+ "toilet paper",
+ "paper towel",
+ "pot",
+ "clock",
+ "pan",
+ "tap",
+ "jar",
+ "soap dispenser",
+ "binder",
+ "bowl",
+ "tissue box",
+ "whiteboard eraser",
+ "toilet brush",
+ "spray bottle",
+ "headphones",
+ "stapler",
+ "marker",
+ ]
+)
diff --git a/Pointcept/configs/_base_/default_runtime.py b/Pointcept/configs/_base_/default_runtime.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ec8bf179f3c462dd80e58dcd70debcbd775f5d2
--- /dev/null
+++ b/Pointcept/configs/_base_/default_runtime.py
@@ -0,0 +1,39 @@
+weight = None # path to model weight
+resume = False # whether to resume training process
+evaluate = True # evaluate after each epoch training process
+test_only = False # test process
+
+seed = None # train process will init a random seed and record
+save_path = "exp/default"
+num_worker = 16 # total worker in all gpu
+batch_size = 16 # total batch size in all gpu
+batch_size_val = None # auto adapt to bs 1 for each gpu
+batch_size_test = None # auto adapt to bs 1 for each gpu
+epoch = 100 # total epoch, data loop = epoch // eval_epoch
+eval_epoch = 100 # sche total eval & checkpoint epoch
+clip_grad = None # disable with None, enable with a float
+
+sync_bn = False
+enable_amp = False
+empty_cache = False
+empty_cache_per_epoch = False
+find_unused_parameters = False
+
+mix_prob = 0
+param_dicts = None # example: param_dicts = [dict(keyword="block", lr_scale=0.1)]
+
+# hook
+hooks = [
+ dict(type="CheckpointLoader"),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(type="SemSegEvaluator"),
+ dict(type="CheckpointSaver", save_freq=None),
+ dict(type="PreciseEvaluator", test_last=False),
+]
+
+# Trainer
+train = dict(type="DefaultTrainer")
+
+# Tester
+test = dict(type="SemSegTester", verbose=True)
diff --git a/Pointcept/configs/matterport3d/semseg-pt-v3m1-0-base.py b/Pointcept/configs/matterport3d/semseg-pt-v3m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..1559d97a2696fb7c9a5f6e2ec75238445ed13eb2
--- /dev/null
+++ b/Pointcept/configs/matterport3d/semseg-pt-v3m1-0-base.py
@@ -0,0 +1,313 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentorV2",
+ num_classes=21,
+ backbone_out_channels=64,
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=6,
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
+ stride=(2, 2, 2, 2),
+ enc_depths=(2, 2, 2, 6, 2),
+ enc_channels=(32, 64, 128, 256, 512),
+ enc_num_head=(2, 4, 8, 16, 32),
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+ dec_depths=(2, 2, 2, 2),
+ dec_channels=(64, 64, 128, 256),
+ dec_num_head=(4, 4, 8, 16),
+ dec_patch_size=(1024, 1024, 1024, 1024),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=False,
+ enable_flash=True,
+ upcast_attention=False,
+ upcast_softmax=False,
+ cls_mode=False,
+ pdnorm_bn=False,
+ pdnorm_ln=False,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.006, 0.0006],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0006)]
+
+# dataset settings
+dataset_type = "DefaultDataset"
+data_root = "data/matterport3d"
+
+data = dict(
+ num_classes=21,
+ ignore_index=-1,
+ names=(
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refrigerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "other",
+ "ceiling",
+ ),
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=102400, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/matterport3d/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/matterport3d/semseg-spunet-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef0305cd78a7fb58c029b4b69f2cfb48cc0d6648
--- /dev/null
+++ b/Pointcept/configs/matterport3d/semseg-spunet-v1m1-0-base.py
@@ -0,0 +1,282 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=21,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "DefaultDataset"
+data_root = "data/matterport3d"
+
+data = dict(
+ num_classes=21,
+ ignore_index=-1,
+ names=(
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refrigerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "other",
+ "ceiling",
+ ),
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/modelnet40/cls-ptv3-v1m1-0-base.py b/Pointcept/configs/modelnet40/cls-ptv3-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..235a5567e5382e297cb285af9d1ceb2c82a20e9b
--- /dev/null
+++ b/Pointcept/configs/modelnet40/cls-ptv3-v1m1-0-base.py
@@ -0,0 +1,232 @@
+_base_ = ["../_base_/default_runtime.py"]
+# misc custom setting
+batch_size = 32 # bs: total bs in all gpus
+num_worker = 16
+batch_size_val = 8
+empty_cache = False
+enable_amp = False
+
+# model settings
+model = dict(
+ type="DefaultClassifier",
+ num_classes=40,
+ backbone_embed_dim=512,
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=6,
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
+ stride=(2, 2, 2, 2),
+ enc_depths=(2, 2, 2, 6, 2),
+ enc_channels=(32, 64, 128, 256, 512),
+ enc_num_head=(2, 4, 8, 16, 32),
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+ dec_depths=(2, 2, 2, 2),
+ dec_channels=(64, 64, 128, 256),
+ dec_num_head=(4, 4, 8, 16),
+ dec_patch_size=(1024, 1024, 1024, 1024),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=False,
+ enable_flash=True,
+ upcast_attention=False,
+ upcast_softmax=False,
+ cls_mode=True,
+ pdnorm_bn=False,
+ pdnorm_ln=False,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 300
+# optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+# scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
+optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.01)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.001, 0.0001],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0001)]
+
+# dataset settings
+dataset_type = "ModelNetDataset"
+data_root = "data/modelnet40_normal_resampled"
+cache_data = False
+class_names = [
+ "airplane",
+ "bathtub",
+ "bed",
+ "bench",
+ "bookshelf",
+ "bottle",
+ "bowl",
+ "car",
+ "chair",
+ "cone",
+ "cup",
+ "curtain",
+ "desk",
+ "door",
+ "dresser",
+ "flower_pot",
+ "glass_box",
+ "guitar",
+ "keyboard",
+ "lamp",
+ "laptop",
+ "mantel",
+ "monitor",
+ "night_stand",
+ "person",
+ "piano",
+ "plant",
+ "radio",
+ "range_hood",
+ "sink",
+ "sofa",
+ "stairs",
+ "stool",
+ "table",
+ "tent",
+ "toilet",
+ "tv_stand",
+ "vase",
+ "wardrobe",
+ "xbox",
+]
+
+data = dict(
+ num_classes=40,
+ ignore_index=-1,
+ names=class_names,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ class_names=class_names,
+ transform=[
+ dict(type="NormalizeCoord"),
+ # dict(type="CenterShift", apply_z=True),
+ # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1/24, 1/24], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/24, 1/24], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.7, 1.5], anisotropic=True),
+ dict(type="RandomShift", shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
+ # dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "normal"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=10000, mode="random"),
+ # dict(type="CenterShift", apply_z=True),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "category"),
+ feat_keys=["coord", "normal"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="test",
+ data_root=data_root,
+ class_names=class_names,
+ transform=[
+ dict(type="NormalizeCoord"),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "normal"),
+ return_grid_coord=True,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "category"),
+ feat_keys=["coord", "normal"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="test",
+ data_root=data_root,
+ class_names=class_names,
+ transform=[
+ dict(type="NormalizeCoord"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ post_transform=[
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "normal"),
+ return_grid_coord=True,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord"),
+ feat_keys=["coord", "normal"],
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[1, 1], anisotropic=True)], # 1
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 2
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 3
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 4
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 5
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 5
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 6
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 7
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 8
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 9
+ ],
+ ),
+ ),
+)
+
+# hooks
+hooks = [
+ dict(type="CheckpointLoader"),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(type="ClsEvaluator"),
+ dict(type="CheckpointSaver", save_freq=None),
+ dict(type="PreciseEvaluator", test_last=False),
+]
+
+# tester
+test = dict(type="ClsVotingTester", num_repeat=100)
diff --git a/Pointcept/configs/modelnet40/cls-spunet-v1m1-0-base.py b/Pointcept/configs/modelnet40/cls-spunet-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e6585af547d3658093f5203468c2b3c12108f67
--- /dev/null
+++ b/Pointcept/configs/modelnet40/cls-spunet-v1m1-0-base.py
@@ -0,0 +1,176 @@
+_base_ = ["../_base_/default_runtime.py"]
+# misc custom setting
+batch_size = 16 # bs: total bs in all gpus
+# batch_size_val = 8
+empty_cache = False
+enable_amp = False
+
+# model settings
+model = dict(
+ type="DefaultClassifier",
+ num_classes=40,
+ backbone_embed_dim=256,
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=0,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=True,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 200
+optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
+
+# dataset settings
+dataset_type = "ModelNetDataset"
+data_root = "data/modelnet40_normal_resampled"
+cache_data = False
+class_names = [
+ "airplane",
+ "bathtub",
+ "bed",
+ "bench",
+ "bookshelf",
+ "bottle",
+ "bowl",
+ "car",
+ "chair",
+ "cone",
+ "cup",
+ "curtain",
+ "desk",
+ "door",
+ "dresser",
+ "flower_pot",
+ "glass_box",
+ "guitar",
+ "keyboard",
+ "lamp",
+ "laptop",
+ "mantel",
+ "monitor",
+ "night_stand",
+ "person",
+ "piano",
+ "plant",
+ "radio",
+ "range_hood",
+ "sink",
+ "sofa",
+ "stairs",
+ "stool",
+ "table",
+ "tent",
+ "toilet",
+ "tv_stand",
+ "vase",
+ "wardrobe",
+ "xbox",
+]
+
+data = dict(
+ num_classes=40,
+ ignore_index=-1,
+ names=class_names,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ class_names=class_names,
+ transform=[
+ dict(type="NormalizeCoord"),
+ # dict(type="CenterShift", apply_z=True),
+ # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1/24, 1/24], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/24, 1/24], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ dict(type="RandomShift", shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
+ # dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "normal"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=10000, mode="random"),
+ # dict(type="CenterShift", apply_z=True),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "category"),
+ feat_keys=["coord", "normal"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="test",
+ data_root=data_root,
+ class_names=class_names,
+ transform=[
+ dict(type="NormalizeCoord"),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "normal"),
+ return_grid_coord=True,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "category"),
+ feat_keys=["coord", "normal"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="test",
+ data_root=data_root,
+ class_names=class_names,
+ transform=[
+ dict(type="NormalizeCoord"),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "normal"),
+ return_grid_coord=True,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "category"),
+ feat_keys=["coord", "normal"],
+ ),
+ ],
+ test_mode=True,
+ ),
+)
+
+# hooks
+hooks = [
+ dict(type="CheckpointLoader"),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(type="ClsEvaluator"),
+ dict(type="CheckpointSaver", save_freq=None),
+]
+
+# tester
+test = dict(type="ClsTester")
diff --git a/Pointcept/configs/nuscenes/semseg-ppt-v1m1-0-nu-sk-wa-spunet.py b/Pointcept/configs/nuscenes/semseg-ppt-v1m1-0-nu-sk-wa-spunet.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed82be25301d2ac9650147cc68ebd7a2aa9534be
--- /dev/null
+++ b/Pointcept/configs/nuscenes/semseg-ppt-v1m1-0-nu-sk-wa-spunet.py
@@ -0,0 +1,342 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+
+# trainer
+train = dict(
+ type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+ type="PPT-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m3",
+ in_channels=4,
+ num_classes=0,
+ base_channels=32,
+ context_channels=256,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=False,
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
+ zero_init=False,
+ norm_decouple=True,
+ norm_adaptive=False,
+ norm_affine=True,
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ backbone_out_channels=96,
+ context_channels=256,
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
+ template="[x]",
+ clip_model="ViT-B/16",
+ # fmt: off
+ class_name=(
+ # SemanticKITTI
+ "car", "bicycle", "motorcycle", "truck", "other vehicle",
+ "person", "person who rides a bicycle", "person who rides a motorcycle", "road", "parking",
+ "path for pedestrians at the side of a road", "other ground", "building", "fence", "vegetation",
+ "trunk", "terrain", "pole", "traffic sign",
+ # nuScenes
+ "barrier", "bicycle", "bus", "car", "construction vehicle",
+ "motorcycle", "pedestrian", "traffic cone", "trailer", "truck",
+ "path suitable or safe for driving", "other flat", "sidewalk", "terrain", "man made", "vegetation",
+ # waymo
+ "car", "truck", "bus", "other vehicle", "person who rides a motorcycle",
+ "person who rides a bicycle", "pedestrian", "sign", "traffic light", "pole",
+ "construction cone", "bicycle", "motorcycle", "building", "vegetation",
+ "tree trunk", "curb", "road", "lane marker", "other ground", "horizontal surface that can not drive",
+ "surface when pedestrians most likely to walk on",
+ ),
+ valid_index=(
+ [i for i in range(19)],
+ [i for i in range(19, 19 + 16)],
+ [i for i in range(19 + 16, 19 + 16 + 22)],
+ ),
+ # fmt: on
+ backbone_mode=False,
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+# param_dicts = [dict(keyword="modulation", lr=0.0002)]
+
+# dataset settings
+data = dict(
+ num_classes=16,
+ ignore_index=-1,
+ names=[
+ "barrier",
+ "bicycle",
+ "bus",
+ "car",
+ "construction_vehicle",
+ "motorcycle",
+ "pedestrian",
+ "traffic_cone",
+ "trailer",
+ "truck",
+ "driveable_surface",
+ "other_flat",
+ "sidewalk",
+ "terrain",
+ "manmade",
+ "vegetation",
+ ],
+ train=dict(
+ type="ConcatDataset",
+ datasets=[
+ # nuScenes
+ dict(
+ type="NuScenesDataset",
+ split="train",
+ data_root="data/nuscenes",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ # SemanticKITTI
+ dict(
+ type="SemanticKITTIDataset",
+ split="train",
+ data_root="data/semantic_kitti",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ # Waymo
+ dict(
+ type="WaymoDataset",
+ split="training",
+ data_root="data/waymo",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "Waymo"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ ],
+ ),
+ val=dict(
+ type="NuScenesDataset",
+ split="val",
+ data_root="data/nuscenes",
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ ),
+ test=dict(
+ type="NuScenesDataset",
+ split="val",
+ data_root="data/nuscenes",
+ transform=[
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+ dict(
+ type="GridSample",
+ grid_size=0.025,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_inverse=True,
+ ),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ignore_index=-1,
+ ),
+)
diff --git a/Pointcept/configs/nuscenes/semseg-ppt-v1m2-0-nu-sk-wa-spunet.py b/Pointcept/configs/nuscenes/semseg-ppt-v1m2-0-nu-sk-wa-spunet.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf0aba527a41fc745f24dbd5913a14a7698834f5
--- /dev/null
+++ b/Pointcept/configs/nuscenes/semseg-ppt-v1m2-0-nu-sk-wa-spunet.py
@@ -0,0 +1,316 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+
+# trainer
+train = dict(
+ type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+ type="PPT-v1m2",
+ backbone=dict(
+ type="SpUNet-v1m3",
+ in_channels=4,
+ num_classes=0,
+ base_channels=32,
+ context_channels=256,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=False,
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
+ zero_init=False,
+ norm_decouple=True,
+ norm_adaptive=False,
+ norm_affine=True,
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ backbone_out_channels=96,
+ context_channels=256,
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
+ num_classes=(19, 16, 22),
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+# param_dicts = [dict(keyword="modulation", lr=0.0002)]
+
+# dataset settings
+data = dict(
+ num_classes=16,
+ ignore_index=-1,
+ names=[
+ "barrier",
+ "bicycle",
+ "bus",
+ "car",
+ "construction_vehicle",
+ "motorcycle",
+ "pedestrian",
+ "traffic_cone",
+ "trailer",
+ "truck",
+ "driveable_surface",
+ "other_flat",
+ "sidewalk",
+ "terrain",
+ "manmade",
+ "vegetation",
+ ],
+ train=dict(
+ type="ConcatDataset",
+ datasets=[
+ # nuScenes
+ dict(
+ type="NuScenesDataset",
+ split="train",
+ data_root="data/nuscenes",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ # SemanticKITTI
+ dict(
+ type="SemanticKITTIDataset",
+ split="train",
+ data_root="data/semantic_kitti",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ # Waymo
+ dict(
+ type="WaymoDataset",
+ split="training",
+ data_root="data/waymo",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "Waymo"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ ],
+ ),
+ val=dict(
+ type="NuScenesDataset",
+ split="val",
+ data_root="data/nuscenes",
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ ),
+ test=dict(
+ type="NuScenesDataset",
+ split="val",
+ data_root="data/nuscenes",
+ transform=[
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+ dict(
+ type="GridSample",
+ grid_size=0.025,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_inverse=True,
+ ),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ignore_index=-1,
+ ),
+)
diff --git a/Pointcept/configs/nuscenes/semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit.py b/Pointcept/configs/nuscenes/semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8f254757995c51401643a7db1e9c48455b4fefb
--- /dev/null
+++ b/Pointcept/configs/nuscenes/semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit.py
@@ -0,0 +1,292 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+evaluate = False
+
+# trainer
+train = dict(
+ type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+ type="PPT-v1m2",
+ backbone=dict(
+ type="SpUNet-v1m3",
+ in_channels=4,
+ num_classes=0,
+ base_channels=32,
+ context_channels=256,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=False,
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
+ zero_init=False,
+ norm_decouple=True,
+ norm_adaptive=False,
+ norm_affine=True,
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ backbone_out_channels=96,
+ context_channels=256,
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
+ num_classes=(19, 16, 22),
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+# param_dicts = [dict(keyword="modulation", lr=0.0002)]
+
+# dataset settings
+data = dict(
+ num_classes=16,
+ ignore_index=-1,
+ names=[
+ "barrier",
+ "bicycle",
+ "bus",
+ "car",
+ "construction_vehicle",
+ "motorcycle",
+ "pedestrian",
+ "traffic_cone",
+ "trailer",
+ "truck",
+ "driveable_surface",
+ "other_flat",
+ "sidewalk",
+ "terrain",
+ "manmade",
+ "vegetation",
+ ],
+ train=dict(
+ type="ConcatDataset",
+ datasets=[
+ # nuScenes
+ dict(
+ type="NuScenesDataset",
+ split=["train", "val"],
+ data_root="data/nuscenes",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ # SemanticKITTI
+ dict(
+ type="SemanticKITTIDataset",
+ split=["train", "val"],
+ data_root="data/semantic_kitti",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ # Waymo
+ dict(
+ type="WaymoDataset",
+ split=["training", "validation"],
+ data_root="data/waymo",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "Waymo"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ ],
+ ),
+ test=dict(
+ type="NuScenesDataset",
+ split="test",
+ data_root="data/nuscenes",
+ transform=[
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+ dict(
+ type="GridSample",
+ grid_size=0.025,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_inverse=True,
+ ),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ignore_index=-1,
+ ),
+)
diff --git a/Pointcept/configs/nuscenes/semseg-pt-v2m2-0-base.py b/Pointcept/configs/nuscenes/semseg-pt-v2m2-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ce53d7d872e69bd2e66614124f6d4e19a6fdc02
--- /dev/null
+++ b/Pointcept/configs/nuscenes/semseg-pt-v2m2-0-base.py
@@ -0,0 +1,174 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=4,
+ num_classes=16,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.15, 0.375, 0.9375, 2.34375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+
+# dataset settings
+dataset_type = "NuScenesDataset"
+data_root = "data/nuscenes"
+ignore_index = -1
+names = [
+ "barrier",
+ "bicycle",
+ "bus",
+ "car",
+ "construction_vehicle",
+ "motorcycle",
+ "pedestrian",
+ "traffic_cone",
+ "trailer",
+ "truck",
+ "driveable_surface",
+ "other_flat",
+ "sidewalk",
+ "terrain",
+ "manmade",
+ "vegetation",
+]
+
+data = dict(
+ num_classes=16,
+ ignore_index=ignore_index,
+ names=names,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ # dict(type="GridSample", grid_size=0.05, hash_type="fnv", mode="train",
+ # keys=("coord", "strength", "segment"), return_grid_coord=True),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ # dict(type="PointClip", point_cloud_range=(-51.2, -51.2, -4, 51.2, 51.2, 2.4)),
+ # dict(type="GridSample", grid_size=0.05, hash_type="fnv", mode="train",
+ # keys=("coord", "strength", "segment"), return_grid_coord=True),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=None,
+ crop=None,
+ post_transform=[
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ignore_index=ignore_index,
+ ),
+)
diff --git a/Pointcept/configs/nuscenes/semseg-pt-v2m2-1-benchmark-submit.py b/Pointcept/configs/nuscenes/semseg-pt-v2m2-1-benchmark-submit.py
new file mode 100644
index 0000000000000000000000000000000000000000..19f7e7512d4f809704be97ee64653c1d852aafff
--- /dev/null
+++ b/Pointcept/configs/nuscenes/semseg-pt-v2m2-1-benchmark-submit.py
@@ -0,0 +1,157 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+evaluate = False
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=4,
+ num_classes=16,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.15, 0.375, 0.9375, 2.34375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+
+# dataset settings
+dataset_type = "NuScenesDataset"
+data_root = "data/nuscenes"
+ignore_index = -1
+names = [
+ "barrier",
+ "bicycle",
+ "bus",
+ "car",
+ "construction_vehicle",
+ "motorcycle",
+ "pedestrian",
+ "traffic_cone",
+ "trailer",
+ "truck",
+ "driveable_surface",
+ "other_flat",
+ "sidewalk",
+ "terrain",
+ "manmade",
+ "vegetation",
+]
+
+data = dict(
+ num_classes=16,
+ ignore_index=ignore_index,
+ names=names,
+ train=dict(
+ type=dataset_type,
+ split=["train", "val"],
+ data_root=data_root,
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ # dict(type="GridSample", grid_size=0.05, hash_type="fnv", mode="train",
+ # keys=("coord", "strength", "segment"), return_grid_coord=True),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="test",
+ data_root=data_root,
+ transform=[],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=None,
+ crop=None,
+ post_transform=[
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ignore_index=ignore_index,
+ ),
+)
diff --git a/Pointcept/configs/nuscenes/semseg-pt-v3m1-0-base.py b/Pointcept/configs/nuscenes/semseg-pt-v3m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f64b9e67dedcf0cd1a7f950d2d6677dce0aa088
--- /dev/null
+++ b/Pointcept/configs/nuscenes/semseg-pt-v3m1-0-base.py
@@ -0,0 +1,215 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentorV2",
+ num_classes=16,
+ backbone_out_channels=64,
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=4,
+ order=["z", "z-trans", "hilbert", "hilbert-trans"],
+ stride=(2, 2, 2, 2),
+ enc_depths=(2, 2, 2, 6, 2),
+ enc_channels=(32, 64, 128, 256, 512),
+ enc_num_head=(2, 4, 8, 16, 32),
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+ dec_depths=(2, 2, 2, 2),
+ dec_channels=(64, 64, 128, 256),
+ dec_num_head=(4, 4, 8, 16),
+ dec_patch_size=(1024, 1024, 1024, 1024),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=False,
+ enable_flash=True,
+ upcast_attention=False,
+ upcast_softmax=False,
+ cls_mode=False,
+ pdnorm_bn=False,
+ pdnorm_ln=False,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("nuScenes", "SemanticKITTI", "Waymo"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.002, 0.0002],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0002)]
+
+# dataset settings
+dataset_type = "NuScenesDataset"
+data_root = "data/nuscenes"
+ignore_index = -1
+names = [
+ "barrier",
+ "bicycle",
+ "bus",
+ "car",
+ "construction_vehicle",
+ "motorcycle",
+ "pedestrian",
+ "traffic_cone",
+ "trailer",
+ "truck",
+ "driveable_surface",
+ "other_flat",
+ "sidewalk",
+ "terrain",
+ "manmade",
+ "vegetation",
+]
+
+data = dict(
+ num_classes=16,
+ ignore_index=ignore_index,
+ names=names,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ # dict(type="PointClip", point_cloud_range=(-51.2, -51.2, -4, 51.2, 51.2, 2.4)),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+ dict(
+ type="GridSample",
+ grid_size=0.025,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_inverse=True,
+ ),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ignore_index=ignore_index,
+ ),
+)
diff --git a/Pointcept/configs/nuscenes/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/nuscenes/semseg-spunet-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6b6a126086a210335b27953a2e620bc74e56503
--- /dev/null
+++ b/Pointcept/configs/nuscenes/semseg-spunet-v1m1-0-base.py
@@ -0,0 +1,183 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=4,
+ num_classes=16,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+
+# dataset settings
+dataset_type = "NuScenesDataset"
+data_root = "data/nuscenes"
+ignore_index = -1
+names = [
+ "barrier",
+ "bicycle",
+ "bus",
+ "car",
+ "construction_vehicle",
+ "motorcycle",
+ "pedestrian",
+ "traffic_cone",
+ "trailer",
+ "truck",
+ "driveable_surface",
+ "other_flat",
+ "sidewalk",
+ "terrain",
+ "manmade",
+ "vegetation",
+]
+
+data = dict(
+ num_classes=16,
+ ignore_index=ignore_index,
+ names=names,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ # dict(type="PointClip", point_cloud_range=(-51.2, -51.2, -4, 51.2, 51.2, 2.4)),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+ dict(
+ type="GridSample",
+ grid_size=0.025,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_inverse=True,
+ ),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ignore_index=ignore_index,
+ ),
+)
diff --git a/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02-sc-aug.py b/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02-sc-aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cb44ce8269da20036a4b2b7e61109bb97529709
--- /dev/null
+++ b/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02-sc-aug.py
@@ -0,0 +1,180 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 12
+mix_prob = 0.0
+empty_cache = False
+enable_amp = True
+evaluate = True
+
+class_names = [
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+]
+num_classes = 13
+segment_ignore_index = (-1,)
+
+# model settings
+model = dict(
+ type="PG-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=0,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ backbone_out_channels=96,
+ semantic_num_classes=num_classes,
+ semantic_ignore_index=-1,
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ cluster_thresh=1.5,
+ cluster_closed_points=300,
+ cluster_propose_points=100,
+ cluster_min_points=50,
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(type="PolyLR")
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=num_classes,
+ ignore_index=-1,
+ names=class_names,
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "instance_centroid",
+ "bbox",
+ ),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={
+ "coord": "origin_coord",
+ "segment": "origin_segment",
+ "instance": "origin_instance",
+ },
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "origin_coord",
+ "origin_segment",
+ "origin_instance",
+ "instance_centroid",
+ "bbox",
+ ),
+ feat_keys=("color", "normal"),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(), # currently not available
+)
+
+hooks = [
+ dict(type="CheckpointLoader", keywords="module.", replacement="module."),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(
+ type="InsSegEvaluator",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="CheckpointSaver", save_freq=None),
+]
diff --git a/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02.py b/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02.py
new file mode 100644
index 0000000000000000000000000000000000000000..826d3731ac6662e03a956daa1f213f03b5c1984a
--- /dev/null
+++ b/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02.py
@@ -0,0 +1,180 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 12
+mix_prob = 0.0
+empty_cache = False
+enable_amp = True
+evaluate = True
+
+class_names = [
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+]
+num_classes = 13
+segment_ignore_index = (-1,)
+
+# model settings
+model = dict(
+ type="PG-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=0,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ backbone_out_channels=96,
+ semantic_num_classes=num_classes,
+ semantic_ignore_index=-1,
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ cluster_thresh=1.5,
+ cluster_closed_points=300,
+ cluster_propose_points=100,
+ cluster_min_points=50,
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(type="PolyLR")
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=num_classes,
+ ignore_index=-1,
+ names=class_names,
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.005),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "instance_centroid",
+ "bbox",
+ ),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={
+ "coord": "origin_coord",
+ "segment": "origin_segment",
+ "instance": "origin_instance",
+ },
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "origin_coord",
+ "origin_segment",
+ "origin_instance",
+ "instance_centroid",
+ "bbox",
+ ),
+ feat_keys=("color", "normal"),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(), # currently not available
+)
+
+hooks = [
+ dict(type="CheckpointLoader", keywords="module.", replacement="module."),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(
+ type="InsSegEvaluator",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="CheckpointSaver", save_freq=None),
+]
diff --git a/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base.py b/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ce06b51d1a566c19305e486a614b73d4594bc58
--- /dev/null
+++ b/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base.py
@@ -0,0 +1,181 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 12
+mix_prob = 0.0
+empty_cache = False
+enable_amp = True
+evaluate = True
+
+class_names = [
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+]
+num_classes = 13
+segment_ignore_index = (-1,)
+
+# model settings
+model = dict(
+ type="PG-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=0,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ backbone_out_channels=96,
+ semantic_num_classes=num_classes,
+ semantic_ignore_index=-1,
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ cluster_thresh=1.5,
+ cluster_closed_points=300,
+ cluster_propose_points=100,
+ cluster_min_points=50,
+ voxel_size=0.05,
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(type="PolyLR")
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=num_classes,
+ ignore_index=-1,
+ names=class_names,
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.005),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "instance_centroid",
+ "bbox",
+ ),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={
+ "coord": "origin_coord",
+ "segment": "origin_segment",
+ "instance": "origin_instance",
+ },
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "origin_coord",
+ "origin_segment",
+ "origin_instance",
+ "instance_centroid",
+ "bbox",
+ ),
+ feat_keys=("color", "normal"),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(), # currently not available
+)
+
+hooks = [
+ dict(type="CheckpointLoader", keywords="module.", replacement="module."),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(
+ type="InsSegEvaluator",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="CheckpointSaver", save_freq=None),
+]
diff --git a/Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft-vs0p05.py b/Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft-vs0p05.py
new file mode 100644
index 0000000000000000000000000000000000000000..b1f5d0dbf5c4363eabb7f017422d005dc6ff1b57
--- /dev/null
+++ b/Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft-vs0p05.py
@@ -0,0 +1,273 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0
+empty_cache = False
+enable_amp = True
+evaluate = True
+find_unused_parameters = True
+
+class_names = [
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+]
+num_classes = 13
+segment_ignore_index = (-1,)
+
+# model settings
+model = dict(
+ type="PG-v1m1",
+ backbone=dict(
+ type="PPT-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m3",
+ in_channels=6,
+ num_classes=0,
+ base_channels=32,
+ context_channels=256,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=False,
+ conditions=("ScanNet", "S3DIS", "Structured3D"),
+ zero_init=False,
+ norm_decouple=True,
+ norm_adaptive=True,
+ norm_affine=True,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+ backbone_out_channels=96,
+ context_channels=256,
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
+ template="[x]",
+ clip_model="ViT-B/16",
+ class_name=(
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "bookcase",
+ "picture",
+ "counter",
+ "desk",
+ "shelves",
+ "curtain",
+ "dresser",
+ "pillow",
+ "mirror",
+ "ceiling",
+ "refrigerator",
+ "television",
+ "shower curtain",
+ "nightstand",
+ "toilet",
+ "sink",
+ "lamp",
+ "bathtub",
+ "garbagebin",
+ "board",
+ "beam",
+ "column",
+ "clutter",
+ "otherstructure",
+ "otherfurniture",
+ "otherprop",
+ ),
+ valid_index=(
+ (
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 11,
+ 13,
+ 14,
+ 15,
+ 16,
+ 17,
+ 18,
+ 19,
+ 20,
+ 21,
+ 23,
+ 25,
+ 26,
+ 33,
+ 34,
+ 35,
+ ),
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
+ ),
+ backbone_mode=True,
+ ),
+ backbone_out_channels=96,
+ semantic_num_classes=num_classes,
+ semantic_ignore_index=-1,
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ cluster_thresh=1.5,
+ cluster_closed_points=300,
+ cluster_propose_points=100,
+ cluster_min_points=50,
+ voxel_size=0.05,
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(type="PolyLR")
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=num_classes,
+ ignore_index=-1,
+ names=class_names,
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.005),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "instance_centroid",
+ "bbox",
+ "condition",
+ ),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={
+ "coord": "origin_coord",
+ "segment": "origin_segment",
+ "instance": "origin_instance",
+ },
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "origin_coord",
+ "origin_segment",
+ "origin_instance",
+ "instance_centroid",
+ "bbox",
+ "condition",
+ ),
+ feat_keys=("color", "normal"),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(), # currently not available
+)
+
+hooks = [
+ dict(type="CheckpointLoader", keywords="module.", replacement="module.backbone."),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(
+ type="InsSegEvaluator",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="CheckpointSaver", save_freq=None),
+]
diff --git a/Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py b/Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca4aa554cedd24771d12b645241773757a0ef253
--- /dev/null
+++ b/Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py
@@ -0,0 +1,273 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0
+empty_cache = False
+enable_amp = True
+evaluate = True
+find_unused_parameters = True
+
+class_names = [
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+]
+num_classes = 13
+segment_ignore_index = (-1,)
+
+# model settings
+model = dict(
+ type="PG-v1m1",
+ backbone=dict(
+ type="PPT-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m3",
+ in_channels=6,
+ num_classes=0,
+ base_channels=32,
+ context_channels=256,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=False,
+ conditions=("ScanNet", "S3DIS", "Structured3D"),
+ zero_init=False,
+ norm_decouple=True,
+ norm_adaptive=True,
+ norm_affine=True,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+ backbone_out_channels=96,
+ context_channels=256,
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
+ template="[x]",
+ clip_model="ViT-B/16",
+ class_name=(
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "bookcase",
+ "picture",
+ "counter",
+ "desk",
+ "shelves",
+ "curtain",
+ "dresser",
+ "pillow",
+ "mirror",
+ "ceiling",
+ "refrigerator",
+ "television",
+ "shower curtain",
+ "nightstand",
+ "toilet",
+ "sink",
+ "lamp",
+ "bathtub",
+ "garbagebin",
+ "board",
+ "beam",
+ "column",
+ "clutter",
+ "otherstructure",
+ "otherfurniture",
+ "otherprop",
+ ),
+ valid_index=(
+ (
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 11,
+ 13,
+ 14,
+ 15,
+ 16,
+ 17,
+ 18,
+ 19,
+ 20,
+ 21,
+ 23,
+ 25,
+ 26,
+ 33,
+ 34,
+ 35,
+ ),
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
+ ),
+ backbone_mode=True,
+ ),
+ backbone_out_channels=96,
+ semantic_num_classes=num_classes,
+ semantic_ignore_index=-1,
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ cluster_thresh=1.5,
+ cluster_closed_points=300,
+ cluster_propose_points=100,
+ cluster_min_points=50,
+ voxel_size=0.02,
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(type="PolyLR")
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=num_classes,
+ ignore_index=-1,
+ names=class_names,
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.005),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "instance_centroid",
+ "bbox",
+ "condition",
+ ),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={
+ "coord": "origin_coord",
+ "segment": "origin_segment",
+ "instance": "origin_instance",
+ },
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "origin_coord",
+ "origin_segment",
+ "origin_instance",
+ "instance_centroid",
+ "bbox",
+ "condition",
+ ),
+ feat_keys=("color", "normal"),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(), # currently not available
+)
+
+hooks = [
+ dict(type="CheckpointLoader", keywords="module.", replacement="module.backbone."),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(
+ type="InsSegEvaluator",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="CheckpointSaver", save_freq=None),
+]
diff --git a/Pointcept/configs/s3dis/semseg-minkunet34c-0-base.py b/Pointcept/configs/s3dis/semseg-minkunet34c-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..8234bb4b52de86edb05b540ee7250fdf53a7d02e
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-minkunet34c-0-base.py
@@ -0,0 +1,174 @@
+_base_ = ["../_base_/default_runtime.py"]
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(type="MinkUNet34C", in_channels=6, out_channels=13),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(type="PolyLR")
+
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=["coord", "color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "origin_coord",
+ "segment",
+ "origin_segment",
+ ),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ feat_keys=["coord", "color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/s3dis/semseg-ppt-v1m1-0-s3-sc-st-spunet.py b/Pointcept/configs/s3dis/semseg-ppt-v1m1-0-s3-sc-st-spunet.py
new file mode 100644
index 0000000000000000000000000000000000000000..e50ebbf10a1989342f39a476c1c3348671a78e95
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-ppt-v1m1-0-s3-sc-st-spunet.py
@@ -0,0 +1,496 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 24 # bs: total bs in all gpus
+num_worker = 48
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+
+# trainer
+train = dict(
+ type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+ type="PPT-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m3",
+ in_channels=6,
+ num_classes=0,
+ base_channels=32,
+ context_channels=256,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=False,
+ conditions=("ScanNet", "S3DIS", "Structured3D"),
+ zero_init=False,
+ norm_decouple=True,
+ norm_adaptive=True,
+ norm_affine=True,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+ backbone_out_channels=96,
+ context_channels=256,
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
+ template="[x]",
+ clip_model="ViT-B/16",
+ class_name=(
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "bookcase",
+ "picture",
+ "counter",
+ "desk",
+ "shelves",
+ "curtain",
+ "dresser",
+ "pillow",
+ "mirror",
+ "ceiling",
+ "refrigerator",
+ "television",
+ "shower curtain",
+ "nightstand",
+ "toilet",
+ "sink",
+ "lamp",
+ "bathtub",
+ "garbagebin",
+ "board",
+ "beam",
+ "column",
+ "clutter",
+ "otherstructure",
+ "otherfurniture",
+ "otherprop",
+ ),
+ valid_index=(
+ (
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 11,
+ 13,
+ 14,
+ 15,
+ 16,
+ 17,
+ 18,
+ 19,
+ 20,
+ 21,
+ 23,
+ 25,
+ 26,
+ 33,
+ 34,
+ 35,
+ ),
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
+ ),
+ backbone_mode=False,
+)
+
+# scheduler settings
+epoch = 100
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+# param_dicts = [dict(keyword="modulation", lr=0.005)]
+
+# dataset settings
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type="ConcatDataset",
+ datasets=[
+ # Structured3D
+ dict(
+ type="Structured3DDataset",
+ split="train",
+ data_root="data/structured3d",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "Structured3D"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=4, # sampling weight
+ ),
+ # ScanNet
+ dict(
+ type="ScanNetDataset",
+ split="train",
+ data_root="data/scannet",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=2, # sampling weight
+ ),
+ # S3DIS
+ dict(
+ type="S3DISDataset",
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root="data/s3dis",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.6, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=1, # sampling weight
+ ),
+ ],
+ ),
+ val=dict(
+ type="S3DISDataset",
+ split="Area_5",
+ data_root="data/s3dis",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type="S3DISDataset",
+ split="Area_5",
+ data_root="data/s3dis",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/s3dis/semseg-pt-v1-0-base.py b/Pointcept/configs/s3dis/semseg-pt-v1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..a925757abee6d3000ee4c3613e5b3bc49dbe7dc6
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-pt-v1-0-base.py
@@ -0,0 +1,170 @@
+_base_ = ["../_base_/default_runtime.py"]
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PointTransformer-Seg50",
+ in_channels=6,
+ num_classes=13,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=["coord", "color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ offset_keys_dict=dict(offset="coord"),
+ feat_keys=["coord", "color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/s3dis/semseg-pt-v2m1-0-base.py b/Pointcept/configs/s3dis/semseg-pt-v2m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..3aca26a731096c1a1af9a34085c11184e6061d63
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-pt-v2m1-0-base.py
@@ -0,0 +1,189 @@
+_base_ = ["../_base_/default_runtime.py"]
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = False
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m1",
+ in_channels=6,
+ num_classes=13,
+ patch_embed_depth=2,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=16,
+ enc_depths=(2, 6, 2),
+ enc_channels=(96, 192, 384),
+ enc_groups=(12, 24, 48),
+ enc_neighbours=(16, 16, 16),
+ dec_depths=(1, 1, 1),
+ dec_channels=(48, 96, 192),
+ dec_groups=(6, 12, 24),
+ dec_neighbours=(16, 16, 16),
+ grid_sizes=(0.1, 0.2, 0.4),
+ attn_qkv_bias=True,
+ pe_multiplier=True,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="interp", # map / interp
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=80000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=["coord", "color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ offset_keys_dict=dict(offset="coord"),
+ feat_keys=["coord", "color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/s3dis/semseg-pt-v2m2-0-base.py b/Pointcept/configs/s3dis/semseg-pt-v2m2-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..a99cb34fea520e3cb28fbb2e9bfcfd59dea1cdda
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-pt-v2m2-0-base.py
@@ -0,0 +1,189 @@
+_base_ = ["../_base_/default_runtime.py"]
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=6,
+ num_classes=13,
+ patch_embed_depth=2,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=16,
+ enc_depths=(2, 6, 2),
+ enc_channels=(96, 192, 384),
+ enc_groups=(12, 24, 48),
+ enc_neighbours=(16, 16, 16),
+ dec_depths=(1, 1, 1),
+ dec_channels=(48, 96, 192),
+ dec_groups=(6, 12, 24),
+ dec_neighbours=(16, 16, 16),
+ grid_sizes=(0.1, 0.2, 0.4),
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="interp", # map / interp
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=80000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=["coord", "color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ offset_keys_dict=dict(offset="coord"),
+ feat_keys=["coord", "color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/s3dis/semseg-pt-v2m2-0-lovasz.py b/Pointcept/configs/s3dis/semseg-pt-v2m2-0-lovasz.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a8b71a9ae8c676bd2d4f86dfbfa8adc79cf06d3
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-pt-v2m2-0-lovasz.py
@@ -0,0 +1,192 @@
+_base_ = ["../_base_/default_runtime.py"]
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=6,
+ num_classes=13,
+ patch_embed_depth=2,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=16,
+ enc_depths=(2, 6, 2),
+ enc_channels=(96, 192, 384),
+ enc_groups=(12, 24, 48),
+ enc_neighbours=(16, 16, 16),
+ dec_depths=(1, 1, 1),
+ dec_channels=(48, 96, 192),
+ dec_groups=(6, 12, 24),
+ dec_neighbours=(16, 16, 16),
+ grid_sizes=(0.1, 0.2, 0.4),
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="interp", # map / interp
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=80000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=["coord", "color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ offset_keys_dict=dict(offset="coord"),
+ feat_keys=["coord", "color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/s3dis/semseg-pt-v2m2-1-one-cycle.py b/Pointcept/configs/s3dis/semseg-pt-v2m2-1-one-cycle.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7d2493b92fcc21a00f2fecb270fb289dea58c0d
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-pt-v2m2-1-one-cycle.py
@@ -0,0 +1,196 @@
+_base_ = ["../_base_/default_runtime.py"]
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=6,
+ num_classes=13,
+ patch_embed_depth=2,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=16,
+ enc_depths=(2, 6, 2),
+ enc_channels=(96, 192, 384),
+ enc_groups=(12, 24, 48),
+ enc_neighbours=(16, 16, 16),
+ dec_depths=(1, 1, 1),
+ dec_channels=(48, 96, 192),
+ dec_groups=(6, 12, 24),
+ dec_neighbours=(16, 16, 16),
+ grid_sizes=(0.1, 0.2, 0.4),
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="interp", # map / interp
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=80000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=["coord", "color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ offset_keys_dict=dict(offset="coord"),
+ feat_keys=["coord", "color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/s3dis/semseg-pt-v3m1-0-base.py b/Pointcept/configs/s3dis/semseg-pt-v3m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..89d34ba0ff00d52373f9cf791afb554e99ef7a57
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-pt-v3m1-0-base.py
@@ -0,0 +1,225 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentorV2",
+ num_classes=13,
+ backbone_out_channels=64,
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=6,
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
+ stride=(2, 2, 2, 2),
+ enc_depths=(2, 2, 2, 6, 2),
+ enc_channels=(32, 64, 128, 256, 512),
+ enc_num_head=(2, 4, 8, 16, 32),
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+ dec_depths=(2, 2, 2, 2),
+ dec_channels=(64, 64, 128, 256),
+ dec_num_head=(4, 4, 8, 16),
+ dec_patch_size=(1024, 1024, 1024, 1024),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=False,
+ enable_flash=True,
+ upcast_attention=False,
+ upcast_softmax=False,
+ cls_mode=False,
+ pdnorm_bn=False,
+ pdnorm_ln=False,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.006, 0.0006],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0006)]
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.6, mode="random"),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "origin_coord",
+ "segment",
+ "origin_segment",
+ ),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1, 1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/s3dis/semseg-pt-v3m1-1-rpe.py b/Pointcept/configs/s3dis/semseg-pt-v3m1-1-rpe.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab612fc5449920103df6ea037588c87833ba73d5
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-pt-v3m1-1-rpe.py
@@ -0,0 +1,225 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentorV2",
+ num_classes=13,
+ backbone_out_channels=64,
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=6,
+ order=["z", "z-trans", "hilbert", "hilbert-trans"],
+ stride=(2, 2, 2, 2),
+ enc_depths=(2, 2, 2, 6, 2),
+ enc_channels=(32, 64, 128, 256, 512),
+ enc_num_head=(2, 4, 8, 16, 32),
+ enc_patch_size=(128, 128, 128, 128, 128),
+ dec_depths=(2, 2, 2, 2),
+ dec_channels=(64, 64, 128, 256),
+ dec_num_head=(4, 4, 8, 16),
+ dec_patch_size=(128, 128, 128, 128),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=True,
+ enable_flash=False,
+ upcast_attention=True,
+ upcast_softmax=True,
+ cls_mode=False,
+ pdnorm_bn=False,
+ pdnorm_ln=False,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.006, 0.0006],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0006)]
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.6, mode="random"),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "origin_coord",
+ "segment",
+ "origin_segment",
+ ),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1, 1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/s3dis/semseg-pt-v3m1-2-ppt-extreme.py b/Pointcept/configs/s3dis/semseg-pt-v3m1-2-ppt-extreme.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2d892af5b5317eef3b3554c3a49c0c1861a2645
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-pt-v3m1-2-ppt-extreme.py
@@ -0,0 +1,487 @@
+"""
+PTv3 + PPT
+Pre-trained on ScanNet + Structured3D
+(S3DIS is commented by default as a long data time issue of S3DIS: https://github.com/Pointcept/Pointcept/issues/103)
+In the original PPT paper, 3 datasets are jointly trained and validated on the three datasets jointly with
+one shared weight model. In PTv3, we trained on multi-dataset but only validated on one single dataset to
+achieve extreme performance on one single dataset.
+
+To enable joint training on three datasets, uncomment config for the S3DIS dataset and change the "loop" of
+ Structured3D and ScanNet to 4 and 2 respectively.
+"""
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 24 # bs: total bs in all gpus
+num_worker = 48
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+
+# trainer
+train = dict(
+ type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+ type="PPT-v1m1",
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=6,
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
+ stride=(2, 2, 2, 2),
+ enc_depths=(2, 2, 2, 6, 2),
+ enc_channels=(32, 64, 128, 256, 512),
+ enc_num_head=(2, 4, 8, 16, 32),
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+ dec_depths=(2, 2, 2, 2),
+ dec_channels=(64, 64, 128, 256),
+ dec_num_head=(4, 4, 8, 16),
+ dec_patch_size=(1024, 1024, 1024, 1024),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=False,
+ enable_flash=True,
+ upcast_attention=False,
+ upcast_softmax=False,
+ cls_mode=False,
+ pdnorm_bn=True,
+ pdnorm_ln=True,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ backbone_out_channels=64,
+ context_channels=256,
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
+ template="[x]",
+ clip_model="ViT-B/16",
+ # fmt: off
+ class_name=(
+ "wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door",
+ "window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain",
+ "dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand",
+ "toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column",
+ "clutter", "otherstructure", "otherfurniture", "otherprop",
+ ),
+ valid_index=(
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35),
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
+ ),
+ # fmt: on
+ backbone_mode=False,
+)
+
+# scheduler settings
+epoch = 100
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.005, 0.0005],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0005)]
+
+# dataset settings
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type="ConcatDataset",
+ datasets=[
+ # Structured3D
+ dict(
+ type="Structured3DDataset",
+ split=["train", "val", "test"],
+ data_root="data/structured3d",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(
+ # type="ElasticDistortion",
+ # distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ # ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "Structured3D"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=4, # sampling weight
+ ),
+ # ScanNet
+ dict(
+ type="ScanNetDataset",
+ split="train",
+ data_root="data/scannet",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(
+ # type="ElasticDistortion",
+ # distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ # ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=102400, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=2, # sampling weight
+ ),
+ # S3DIS
+ dict(
+ type="S3DISDataset",
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root="data/s3dis",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(
+ # type="ElasticDistortion",
+ # distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ # ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.6, mode="random"),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=1, # sampling weight
+ ),
+ ],
+ ),
+ val=dict(
+ type="S3DISDataset",
+ split="Area_5",
+ data_root="data/s3dis",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "origin_coord",
+ "segment",
+ "origin_segment",
+ "condition",
+ ),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type="S3DISDataset",
+ split="Area_5",
+ data_root="data/s3dis",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/s3dis/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/s3dis/semseg-spunet-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..6545ca151ef59f6ea6e151a90de5e188db7e8ea7
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-spunet-v1m1-0-base.py
@@ -0,0 +1,168 @@
+_base_ = ["../_base_/default_runtime.py"]
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=13,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(type="PolyLR")
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=["coord", "color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=["coord", "color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/s3dis/semseg-spunet-v1m1-0-cn-base.py b/Pointcept/configs/s3dis/semseg-spunet-v1m1-0-cn-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee037115e69b6d7086109a31f7046520b42243f8
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-spunet-v1m1-0-cn-base.py
@@ -0,0 +1,181 @@
+# spconv is too fast, data loading speed is bottleneck. Cache data is a better choice.
+
+
+_base_ = ["../_base_/default_runtime.py"]
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=13,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(type="PolyLR")
+
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=["color", "normal"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "origin_coord",
+ "segment",
+ "origin_segment",
+ ),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ feat_keys=["color", "normal"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/s3dis/semseg-spunet-v1m2-0-base.py b/Pointcept/configs/s3dis/semseg-spunet-v1m2-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..98511c5919b5f40271e4d500a30ed0bd42a6d2c9
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-spunet-v1m2-0-base.py
@@ -0,0 +1,184 @@
+# spconv is too fast, data loading speed is bottleneck. Cache data is a better choice.
+
+
+_base_ = ["../_base_/default_runtime.py"]
+# misc custom setting
+batch_size = 48 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m2",
+ in_channels=3,
+ num_classes=13,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ bn_momentum=0.1,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(type="PolyLR")
+
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=["color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "origin_coord",
+ "segment",
+ "origin_segment",
+ ),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ feat_keys=["color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/s3dis/semseg-swin3d-v1m1-0-small.py b/Pointcept/configs/s3dis/semseg-swin3d-v1m1-0-small.py
new file mode 100644
index 0000000000000000000000000000000000000000..119775214ad1e5e0f508d166e9aa9edfc987b76b
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-swin3d-v1m1-0-small.py
@@ -0,0 +1,184 @@
+_base_ = ["../_base_/default_runtime.py"]
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="Swin3D-v1m1",
+ in_channels=9,
+ num_classes=13,
+ base_grid_size=0.02,
+ depths=[2, 4, 9, 4, 4],
+ channels=[48, 96, 192, 384, 384],
+ num_heads=[6, 6, 12, 24, 24],
+ window_sizes=[5, 7, 7, 7, 7],
+ quant_size=4,
+ drop_path_rate=0.3,
+ up_k=3,
+ num_layers=5,
+ stem_transformer=True,
+ down_stride=3,
+ upsample="linear_attn",
+ knn_down=True,
+ cRSE="XYZ_RGB_NORM",
+ fp16_mode=1,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.05)
+scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
+param_dicts = [dict(keyword="blocks", lr=0.0001)]
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.8, 1.2]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ return_displacement=True,
+ ),
+ dict(type="SphereCrop", point_max=80000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ return_displacement=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ return_displacement=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/s3dis/semseg-swin3d-v1m1-1-large.py b/Pointcept/configs/s3dis/semseg-swin3d-v1m1-1-large.py
new file mode 100644
index 0000000000000000000000000000000000000000..02c43d2debf08f26305d5d5636b857b5f34f17c4
--- /dev/null
+++ b/Pointcept/configs/s3dis/semseg-swin3d-v1m1-1-large.py
@@ -0,0 +1,191 @@
+_base_ = ["../_base_/default_runtime.py"]
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="Swin3D-v1m1",
+ in_channels=9,
+ num_classes=13,
+ base_grid_size=0.02,
+ depths=[2, 4, 9, 4, 4],
+ channels=[80, 160, 320, 640, 640],
+ num_heads=[10, 10, 20, 40, 40],
+ window_sizes=[5, 7, 7, 7, 7],
+ quant_size=4,
+ drop_path_rate=0.3,
+ up_k=3,
+ num_layers=5,
+ stem_transformer=True,
+ down_stride=3,
+ upsample="linear_attn",
+ knn_down=True,
+ cRSE="XYZ_RGB_NORM",
+ fp16_mode=1,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 3000
+optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.001, 0.0001],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="blocks", lr=0.0001)]
+
+# dataset settings
+dataset_type = "S3DISDataset"
+data_root = "data/s3dis"
+
+data = dict(
+ num_classes=13,
+ ignore_index=-1,
+ names=[
+ "ceiling",
+ "floor",
+ "wall",
+ "beam",
+ "column",
+ "window",
+ "door",
+ "table",
+ "chair",
+ "sofa",
+ "bookcase",
+ "board",
+ "clutter",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.8, 1.2]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ return_displacement=True,
+ ),
+ dict(type="SphereCrop", point_max=80000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ return_displacement=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="Area_5",
+ data_root=data_root,
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.04,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ return_displacement=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
+ [dict(type="RandomScale", scale=[1, 1])],
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
+ [
+ dict(type="RandomScale", scale=[0.9, 0.9]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
+ [
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ dict(type="RandomFlip", p=1),
+ ],
+ [
+ dict(type="RandomScale", scale=[1.1, 1.1]),
+ dict(type="RandomFlip", p=1),
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/insseg-pointgroup-v1m1-0-spunet-base.py b/Pointcept/configs/scannet/insseg-pointgroup-v1m1-0-spunet-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..3dec6d47c51545b55b543f80ac0c13556bacc84f
--- /dev/null
+++ b/Pointcept/configs/scannet/insseg-pointgroup-v1m1-0-spunet-base.py
@@ -0,0 +1,187 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 12
+mix_prob = 0
+empty_cache = False
+enable_amp = True
+evaluate = True
+
+class_names = [
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+]
+num_classes = 20
+segment_ignore_index = (-1, 0, 1)
+
+# model settings
+model = dict(
+ type="PG-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=0,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ backbone_out_channels=96,
+ semantic_num_classes=num_classes,
+ semantic_ignore_index=-1,
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ cluster_thresh=1.5,
+ cluster_closed_points=300,
+ cluster_propose_points=100,
+ cluster_min_points=50,
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(type="PolyLR")
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=num_classes,
+ ignore_index=-1,
+ names=class_names,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "instance_centroid",
+ "bbox",
+ ),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={
+ "coord": "origin_coord",
+ "segment": "origin_segment",
+ "instance": "origin_instance",
+ },
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "origin_coord",
+ "origin_segment",
+ "origin_instance",
+ "instance_centroid",
+ "bbox",
+ ),
+ feat_keys=("color", "normal"),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(), # currently not available
+)
+
+hooks = [
+ dict(type="CheckpointLoader", keywords="module.", replacement="module."),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(
+ type="InsSegEvaluator",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="CheckpointSaver", save_freq=None),
+]
diff --git a/Pointcept/configs/scannet/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py b/Pointcept/configs/scannet/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py
new file mode 100644
index 0000000000000000000000000000000000000000..09789883e5ae3c79f1cd3f3282380fdb30c21782
--- /dev/null
+++ b/Pointcept/configs/scannet/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py
@@ -0,0 +1,279 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0
+empty_cache = False
+enable_amp = True
+evaluate = True
+find_unused_parameters = True
+
+class_names = [
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+]
+num_classes = 20
+segment_ignore_index = (-1, 0, 1)
+
+# model settings
+model = dict(
+ type="PG-v1m1",
+ backbone=dict(
+ type="PPT-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m3",
+ in_channels=6,
+ num_classes=0,
+ base_channels=32,
+ context_channels=256,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=False,
+ conditions=("ScanNet", "S3DIS", "Structured3D"),
+ zero_init=False,
+ norm_decouple=True,
+ norm_adaptive=True,
+ norm_affine=True,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+ backbone_out_channels=96,
+ context_channels=256,
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
+ template="[x]",
+ clip_model="ViT-B/16",
+ class_name=(
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "bookcase",
+ "picture",
+ "counter",
+ "desk",
+ "shelves",
+ "curtain",
+ "dresser",
+ "pillow",
+ "mirror",
+ "ceiling",
+ "refrigerator",
+ "television",
+ "shower curtain",
+ "nightstand",
+ "toilet",
+ "sink",
+ "lamp",
+ "bathtub",
+ "garbagebin",
+ "board",
+ "beam",
+ "column",
+ "clutter",
+ "otherstructure",
+ "otherfurniture",
+ "otherprop",
+ ),
+ valid_index=(
+ (
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ 11,
+ 13,
+ 14,
+ 15,
+ 16,
+ 17,
+ 18,
+ 19,
+ 20,
+ 21,
+ 23,
+ 25,
+ 26,
+ 33,
+ 34,
+ 35,
+ ),
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
+ ),
+ backbone_mode=True,
+ ),
+ backbone_out_channels=96,
+ semantic_num_classes=num_classes,
+ semantic_ignore_index=-1,
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ cluster_thresh=1.5,
+ cluster_closed_points=300,
+ cluster_propose_points=100,
+ cluster_min_points=50,
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(type="PolyLR")
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=num_classes,
+ ignore_index=-1,
+ names=class_names,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "instance_centroid",
+ "bbox",
+ "condition",
+ ),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={
+ "coord": "origin_coord",
+ "segment": "origin_segment",
+ "instance": "origin_instance",
+ },
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "origin_coord",
+ "origin_segment",
+ "origin_instance",
+ "instance_centroid",
+ "bbox",
+ "condition",
+ ),
+ feat_keys=("color", "normal"),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(), # currently not available
+)
+
+hooks = [
+ dict(type="CheckpointLoader", keywords="module.", replacement="module.backbone."),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(
+ type="InsSegEvaluator",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="CheckpointSaver", save_freq=None),
+]
diff --git a/Pointcept/configs/scannet/objdet-cagroup3d-v1m1-0-base.py b/Pointcept/configs/scannet/objdet-cagroup3d-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e31e32bf369b651f361135614953ae4fcaf1047
--- /dev/null
+++ b/Pointcept/configs/scannet/objdet-cagroup3d-v1m1-0-base.py
@@ -0,0 +1,183 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 16 # bs: total bs in all gpus
+num_worker = 32
+mix_prob = 0
+empty_cache = False
+enable_amp = False
+evaluate = True
+
+class_names = [
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+]
+num_classes = 20
+segment_ignore_index = (-1, 0, 1)
+
+# model settings
+model = dict(
+ type="PG-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=0,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ backbone_out_channels=96,
+ semantic_num_classes=num_classes,
+ semantic_ignore_index=-1,
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ cluster_thresh=1.5,
+ cluster_closed_points=300,
+ cluster_propose_points=100,
+ cluster_min_points=50,
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(type="PolyLR")
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=num_classes,
+ ignore_index=-1,
+ names=class_names,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ # dict(type="CenterShift", apply_z=True),
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5),
+ # # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ # dict(type="RandomRotate", angle=[-1, 1], axis='z', center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis='x', p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis='y', p=0.5),
+ # dict(type="RandomScale", scale=[0.9, 1.1]),
+ # # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ # dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ # dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ # dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
+ # dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ # dict(type="GridSample",
+ # grid_size=0.02,
+ # hash_type='fnv',
+ # mode='train',
+ # return_grid_coord=True,
+ # keys=("coord", "color", "normal", "segment", "instance")),
+ # dict(type="SphereCrop", sample_rate=0.8, mode='random'),
+ # dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "instance_centroid",
+ "bbox",
+ ),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={
+ "coord": "origin_coord",
+ "segment": "origin_segment",
+ "instance": "origin_instance",
+ },
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "origin_coord",
+ "origin_segment",
+ "origin_instance",
+ "instance_centroid",
+ "bbox",
+ ),
+ feat_keys=("color", "normal"),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(), # currently not available
+)
+
+hooks = [
+ dict(type="CheckpointLoader", keywords="module.", replacement="module."),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(
+ type="InsSegEvaluator",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="CheckpointSaver", save_freq=None),
+]
diff --git a/Pointcept/configs/scannet/pretrain-msc-v1m1-0-spunet-base.py b/Pointcept/configs/scannet/pretrain-msc-v1m1-0-spunet-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f56a96af8c57a8ebe05380c53c03245420d0d93
--- /dev/null
+++ b/Pointcept/configs/scannet/pretrain-msc-v1m1-0-spunet-base.py
@@ -0,0 +1,155 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 32 # bs: total bs in all gpus
+num_worker = 32
+mix_prob = 0
+empty_cache = False
+enable_amp = False
+evaluate = False
+find_unused_parameters = False
+
+# model settings
+model = dict(
+ type="MSC-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=0,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ backbone_in_channels=6,
+ backbone_out_channels=96,
+ mask_grid_size=0.1,
+ mask_rate=0.4,
+ view1_mix_prob=0.8,
+ view2_mix_prob=0,
+ matching_max_k=8,
+ matching_max_radius=0.03,
+ matching_max_pair=8192,
+ nce_t=0.4,
+ contrast_weight=1,
+ reconstruct_weight=1,
+ reconstruct_color=True,
+ reconstruct_normal=False,
+)
+
+# scheduler settings
+epoch = 600
+optimizer = dict(type="SGD", lr=0.1, momentum=0.8, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.01,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=["train", "val", "test"],
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ dict(type="Copy", keys_dict={"coord": "origin_coord"}),
+ dict(
+ type="ContrastiveViewsGenerator",
+ view_keys=("coord", "color", "normal", "origin_coord"),
+ view_trans_cfg=[
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="RandomColorJitter",
+ brightness=0.4,
+ contrast=0.4,
+ saturation=0.2,
+ hue=0.02,
+ p=0.8,
+ ),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ keys=("origin_coord", "coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.6, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ ],
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "view1_origin_coord",
+ "view1_grid_coord",
+ "view1_coord",
+ "view1_color",
+ "view1_normal",
+ "view2_origin_coord",
+ "view2_grid_coord",
+ "view2_coord",
+ "view2_color",
+ "view2_normal",
+ ),
+ offset_keys_dict=dict(
+ view1_offset="view1_coord", view2_offset="view2_coord"
+ ),
+ view1_feat_keys=("view1_color", "view1_normal"),
+ view2_feat_keys=("view2_color", "view2_normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+)
+
+hooks = [
+ dict(type="CheckpointLoader"),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(type="CheckpointSaver", save_freq=None),
+]
diff --git a/Pointcept/configs/scannet/pretrain-msc-v1m1-1-spunet-pointcontrast.py b/Pointcept/configs/scannet/pretrain-msc-v1m1-1-spunet-pointcontrast.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ff9061f2fb93a2e72343e8f066893fbae4a897a
--- /dev/null
+++ b/Pointcept/configs/scannet/pretrain-msc-v1m1-1-spunet-pointcontrast.py
@@ -0,0 +1,162 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 32 # bs: total bs in all gpus
+num_worker = 32
+mix_prob = 0
+empty_cache = False
+enable_amp = False
+evaluate = False
+find_unused_parameters = False
+
+# model settings
+model = dict(
+ type="MSC-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=3,
+ num_classes=0,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ backbone_in_channels=3,
+ backbone_out_channels=96,
+ mask_grid_size=0.1,
+ mask_rate=0,
+ view1_mix_prob=0,
+ view2_mix_prob=0,
+ matching_max_k=8,
+ matching_max_radius=0.03,
+ matching_max_pair=4096,
+ nce_t=0.07,
+ contrast_weight=1,
+ reconstruct_weight=1,
+ reconstruct_color=False,
+ reconstruct_normal=False,
+)
+
+# scheduler settings
+epoch = 10
+eval_epoch = 10
+optimizer = dict(type="SGD", lr=0.1, momentum=0.8, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.01,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetPairDataset"
+data_root = "data/scannet_pair"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ data_root=data_root,
+ view1_transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="Copy", keys_dict={"coord": "origin_coord"}),
+ # dict(type="RandomScale", scale=[0.9, 1.1]),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="RandomColorJitter",
+ brightness=0.4,
+ contrast=0.4,
+ saturation=0.2,
+ hue=0.02,
+ p=0.8,
+ ),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ dict(
+ type="GridSample",
+ grid_size=0.025,
+ hash_type="fnv",
+ mode="train",
+ keys=("origin_coord", "coord", "color"),
+ return_grid_coord=True,
+ ),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("origin_coord", "grid_coord", "coord", "color"),
+ offset_keys_dict=dict(offset="coord"),
+ feat_keys=["color"],
+ ),
+ ],
+ view2_transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="Copy", keys_dict={"coord": "origin_coord"}),
+ # dict(type="RandomScale", scale=[0.9, 1.1]),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="RandomColorJitter",
+ brightness=0.4,
+ contrast=0.4,
+ saturation=0.2,
+ hue=0.02,
+ p=0.8,
+ ),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ dict(
+ type="GridSample",
+ grid_size=0.025,
+ hash_type="fnv",
+ mode="train",
+ keys=("origin_coord", "coord", "color"),
+ return_grid_coord=True,
+ ),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("origin_coord", "grid_coord", "coord", "color"),
+ offset_keys_dict=dict(offset="coord"),
+ feat_keys=["color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+)
+
+hooks = [
+ dict(type="CheckpointLoader"),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(type="CheckpointSaver", save_freq=None),
+]
diff --git a/Pointcept/configs/scannet/pretrain-msc-v1m2-0-spunet-csc.py b/Pointcept/configs/scannet/pretrain-msc-v1m2-0-spunet-csc.py
new file mode 100644
index 0000000000000000000000000000000000000000..def70881496c3c04d1bf6a32df260fe9cbdac612
--- /dev/null
+++ b/Pointcept/configs/scannet/pretrain-msc-v1m2-0-spunet-csc.py
@@ -0,0 +1,165 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 32 # bs: total bs in all gpus
+num_worker = 32
+mix_prob = 0
+empty_cache = False
+enable_amp = False
+evaluate = False
+find_unused_parameters = False
+
+# model settings
+model = dict(
+ type="MSC-v1m2",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=3,
+ num_classes=0,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ backbone_in_channels=3,
+ backbone_out_channels=96,
+ mask_grid_size=0.1,
+ mask_rate=0,
+ view1_mix_prob=0,
+ view2_mix_prob=0,
+ matching_max_k=8,
+ matching_max_radius=0.03,
+ matching_max_pair=8192,
+ nce_t=0.4,
+ contrast_weight=1,
+ reconstruct_weight=1,
+ reconstruct_color=False,
+ reconstruct_normal=False,
+ partitions=4,
+ r1=2,
+ r2=20,
+)
+
+# scheduler settings
+epoch = 10
+eval_epoch = 10
+optimizer = dict(type="SGD", lr=0.1, momentum=0.8, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.01,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetPairDataset"
+data_root = "data/scannet_pair"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ data_root=data_root,
+ view1_transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="Copy", keys_dict={"coord": "origin_coord"}),
+ # dict(type="RandomScale", scale=[0.9, 1.1]),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="RandomColorJitter",
+ brightness=0.4,
+ contrast=0.4,
+ saturation=0.2,
+ hue=0.02,
+ p=0.8,
+ ),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ dict(
+ type="GridSample",
+ grid_size=0.025,
+ hash_type="fnv",
+ mode="train",
+ keys=("origin_coord", "coord", "color"),
+ return_grid_coord=True,
+ ),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("origin_coord", "grid_coord", "coord", "color"),
+ offset_keys_dict=dict(offset="coord"),
+ feat_keys=["color"],
+ ),
+ ],
+ view2_transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="Copy", keys_dict={"coord": "origin_coord"}),
+ # dict(type="RandomScale", scale=[0.9, 1.1]),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="RandomColorJitter",
+ brightness=0.4,
+ contrast=0.4,
+ saturation=0.2,
+ hue=0.02,
+ p=0.8,
+ ),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ dict(
+ type="GridSample",
+ grid_size=0.025,
+ hash_type="fnv",
+ mode="train",
+ keys=("origin_coord", "coord", "color"),
+ return_grid_coord=True,
+ ),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("origin_coord", "grid_coord", "coord", "color"),
+ offset_keys_dict=dict(offset="coord"),
+ feat_keys=["color"],
+ ),
+ ],
+ test_mode=False,
+ ),
+)
+
+hooks = [
+ dict(type="CheckpointLoader"),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(type="CheckpointSaver", save_freq=None),
+]
diff --git a/Pointcept/configs/scannet/semseg-cac-v1m1-0-spunet-base.py b/Pointcept/configs/scannet/semseg-cac-v1m1-0-spunet-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..3968225e55db565a1e675605aa91d8a7a0353010
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-cac-v1m1-0-spunet-base.py
@@ -0,0 +1,292 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="CAC-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=0,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ num_classes=20,
+ backbone_out_channels=96,
+ cos_temp=15,
+ main_weight=1,
+ pre_weight=1,
+ pre_self_weight=1,
+ kl_weight=1,
+ conf_thresh=0.75,
+ detach_pre_logits=True,
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-cac-v1m1-1-spunet-lovasz.py b/Pointcept/configs/scannet/semseg-cac-v1m1-1-spunet-lovasz.py
new file mode 100644
index 0000000000000000000000000000000000000000..3968225e55db565a1e675605aa91d8a7a0353010
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-cac-v1m1-1-spunet-lovasz.py
@@ -0,0 +1,292 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="CAC-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=0,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ num_classes=20,
+ backbone_out_channels=96,
+ cos_temp=15,
+ main_weight=1,
+ pre_weight=1,
+ pre_self_weight=1,
+ kl_weight=1,
+ conf_thresh=0.75,
+ detach_pre_logits=True,
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-cac-v1m1-2-ptv2-lovasz.py b/Pointcept/configs/scannet/semseg-cac-v1m1-2-ptv2-lovasz.py
new file mode 100644
index 0000000000000000000000000000000000000000..f36a0c3a9426a13217cbefcd21b20f798b02b6a7
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-cac-v1m1-2-ptv2-lovasz.py
@@ -0,0 +1,309 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="CAC-v1m1",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=9,
+ num_classes=0,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ num_classes=20,
+ backbone_out_channels=48,
+ cos_temp=15,
+ main_weight=1,
+ pre_weight=1,
+ pre_self_weight=1,
+ kl_weight=1,
+ conf_thresh=0.75,
+ detach_pre_logits=True,
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-minkunet34c-0-base.py b/Pointcept/configs/scannet/semseg-minkunet34c-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..795998f368308c830fcdac760df67252b87ebab5
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-minkunet34c-0-base.py
@@ -0,0 +1,193 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(type="MinkUNet34C", in_channels=9, out_channels=20),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 600
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-oacnns-v1m1-0-base.py b/Pointcept/configs/scannet/semseg-oacnns-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..57ef37e6aa35f153288571bceb76448463772365
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-oacnns-v1m1-0-base.py
@@ -0,0 +1,290 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+sync_bn = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="OACNNs",
+ in_channels=9,
+ num_classes=20,
+ embed_channels=64,
+ enc_channels=[64, 64, 128, 256],
+ groups=[4, 4, 8, 16],
+ enc_depth=[3, 3, 9, 8],
+ dec_channels=[256, 256, 256, 256],
+ point_grid_size=[[8, 12, 16, 16], [6, 9, 12, 12], [4, 6, 8, 8], [3, 4, 6, 6]],
+ dec_depth=[2, 2, 2, 2],
+ enc_num_ref=[16, 16, 16, 16],
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "normal", "color"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ return_min_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "normal", "color"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "normal", "color"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "normal", "color"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-octformer-v1m1-0-base.py b/Pointcept/configs/scannet/semseg-octformer-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc0bafadc332cfa374e522699d314f721a9b57e8
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-octformer-v1m1-0-base.py
@@ -0,0 +1,296 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = False
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="OctFormer-v1m1",
+ in_channels=10,
+ num_classes=20,
+ fpn_channels=168,
+ channels=(96, 192, 384, 384),
+ num_blocks=(2, 2, 18, 2),
+ num_heads=(6, 12, 24, 24),
+ patch_size=26,
+ stem_down=2,
+ head_up=2,
+ dilation=4,
+ drop_path=0.5,
+ nempty=True,
+ octree_depth=11,
+ octree_full_depth=2,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 600
+optimizer = dict(type="AdamW", lr=0.0015, weight_decay=0.05)
+scheduler = dict(
+ type="MultiStepWithWarmupLR",
+ milestones=[0.6, 0.9],
+ gamma=0.1,
+ warmup_rate=0.05,
+ warmup_scale=1e-5,
+)
+param_dicts = [dict(keyword="blocks", lr=0.00015)]
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ return_displacement=True,
+ project_displacement=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="SphereCrop", point_max=120000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "normal", "segment"),
+ feat_keys=("coord", "color", "normal", "displacement"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ return_displacement=True,
+ project_displacement=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "normal", "segment"),
+ feat_keys=("coord", "color", "normal", "displacement"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_displacement=True,
+ project_displacement=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "normal", "index"),
+ feat_keys=("coord", "color", "normal", "displacement"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-ppt-v1m1-0-sc-st-spunet.py b/Pointcept/configs/scannet/semseg-ppt-v1m1-0-sc-st-spunet.py
new file mode 100644
index 0000000000000000000000000000000000000000..7fe0c7512b04aa874e32be04dc77ab35e706b67a
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-ppt-v1m1-0-sc-st-spunet.py
@@ -0,0 +1,391 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 24 # bs: total bs in all gpus
+num_worker = 48
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+
+# trainer
+train = dict(
+ type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+ type="PPT-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m3",
+ in_channels=6,
+ num_classes=0,
+ base_channels=32,
+ context_channels=256,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=False,
+ conditions=("ScanNet", "S3DIS", "Structured3D"),
+ zero_init=False,
+ norm_decouple=True,
+ norm_adaptive=True,
+ norm_affine=True,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+ backbone_out_channels=96,
+ context_channels=256,
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
+ template="[x]",
+ clip_model="ViT-B/16",
+ # fmt: off
+ class_name=(
+ "wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door",
+ "window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain",
+ "dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand",
+ "toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column",
+ "clutter", "otherstructure", "otherfurniture", "otherprop",
+ ),
+ valid_index=(
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35),
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
+ ),
+ # fmt: on
+ backbone_mode=False,
+)
+
+# scheduler settings
+epoch = 100
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+# param_dicts = [dict(keyword="modulation", lr=0.005)]
+
+# dataset settings
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type="ConcatDataset",
+ datasets=[
+ # Structured3D
+ dict(
+ type="Structured3DDataset",
+ split="train",
+ data_root="data/structured3d",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "Structured3D"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=2, # sampling weight
+ ),
+ # ScanNet
+ dict(
+ type="ScanNetDataset",
+ split="train",
+ data_root="data/scannet",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=1, # sampling weight
+ ),
+ ],
+ ),
+ val=dict(
+ type="ScanNetDataset",
+ split="val",
+ data_root="data/scannet",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type="ScanNetDataset",
+ split="val",
+ data_root="data/scannet",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-ppt-v1m1-1-sc-st-spunet-submit.py b/Pointcept/configs/scannet/semseg-ppt-v1m1-1-sc-st-spunet-submit.py
new file mode 100644
index 0000000000000000000000000000000000000000..d503080e21ff7d921dd29ca5dd77e95ccfd51c72
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-ppt-v1m1-1-sc-st-spunet-submit.py
@@ -0,0 +1,366 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 24 # bs: total bs in all gpus
+num_worker = 48
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+evaluate = False
+
+# trainer
+train = dict(
+ type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+ type="PPT-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m3",
+ in_channels=6,
+ num_classes=0,
+ base_channels=32,
+ context_channels=256,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=False,
+ conditions=("ScanNet", "S3DIS", "Structured3D"),
+ zero_init=False,
+ norm_decouple=True,
+ norm_adaptive=True,
+ norm_affine=True,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+ backbone_out_channels=96,
+ context_channels=256,
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
+ template="[x]",
+ clip_model="ViT-B/16",
+ # fmt: off
+ class_name=(
+ "wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door",
+ "window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain",
+ "dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand",
+ "toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column",
+ "clutter", "otherstructure", "otherfurniture", "otherprop",
+ ),
+ valid_index=(
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35),
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
+ ),
+ # fmt: on
+ backbone_mode=False,
+)
+
+# scheduler settings
+epoch = 100
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+# param_dicts = [dict(keyword="modulation", lr=0.005)]
+
+# dataset settings
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type="ConcatDataset",
+ datasets=[
+ # Structured3D
+ dict(
+ type="Structured3DDataset",
+ split=["train", "val"],
+ data_root="data/structured3d",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "Structured3D"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=2, # sampling weight
+ ),
+ # ScanNet
+ dict(
+ type="ScanNetDataset",
+ split=["train", "val"],
+ data_root="data/scannet",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=1, # sampling weight
+ ),
+ ],
+ ),
+ test=dict(
+ type="ScanNetDataset",
+ split="test",
+ data_root="data/scannet",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-pt-v1-0-base.py b/Pointcept/configs/scannet/semseg-pt-v1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7b56590ad284eaf9cc2c3b616316627120bdb7b
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-pt-v1-0-base.py
@@ -0,0 +1,277 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PointTransformer-Seg50",
+ in_channels=9,
+ num_classes=20,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-pt-v2m1-0-origin.py b/Pointcept/configs/scannet/semseg-pt-v2m1-0-origin.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd1f61afea1657cbd3d8db536bdbf78865fce368
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-pt-v2m1-0-origin.py
@@ -0,0 +1,297 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = False
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m1",
+ in_channels=9,
+ num_classes=20,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=True,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-pt-v2m2-0-base.py b/Pointcept/configs/scannet/semseg-pt-v2m2-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ec72b0177556edfb6bc2f93d286cf04b0a2b31e
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-pt-v2m2-0-base.py
@@ -0,0 +1,297 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=9,
+ num_classes=20,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-pt-v2m2-1-submit.py b/Pointcept/configs/scannet/semseg-pt-v2m2-1-submit.py
new file mode 100644
index 0000000000000000000000000000000000000000..daf9c9d6218d5f54f4a528d70c5a05c031c41910
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-pt-v2m2-1-submit.py
@@ -0,0 +1,273 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+evaluate = False
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=9,
+ num_classes=20,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split=["train", "val"],
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="test",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-pt-v2m2-2-precise-evaluate.py b/Pointcept/configs/scannet/semseg-pt-v2m2-2-precise-evaluate.py
new file mode 100644
index 0000000000000000000000000000000000000000..c01cf0e9f21ed7aefa05a39c75a5b261e800faf4
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-pt-v2m2-2-precise-evaluate.py
@@ -0,0 +1,307 @@
+"""
+An example for enabling precise evaluation validation dataset during training.
+PLease compare with semseg-pt-v2m2-0-base.py to lean the mechanism.
+"""
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=9,
+ num_classes=20,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "origin_coord", "segment", "origin_segment"),
+ feat_keys=("coord", "color", "normal"),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-pt-v2m2-3-lovasz.py b/Pointcept/configs/scannet/semseg-pt-v2m2-3-lovasz.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed46ff221baac62afcc03628f2d7aac399fa6e24
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-pt-v2m2-3-lovasz.py
@@ -0,0 +1,300 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=9,
+ num_classes=20,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-pt-v3m1-0-base.py b/Pointcept/configs/scannet/semseg-pt-v3m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..47cc7e010a9ab3d9e7b4307d1a0d24da4d13f0f3
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-pt-v3m1-0-base.py
@@ -0,0 +1,312 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentorV2",
+ num_classes=20,
+ backbone_out_channels=64,
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=6,
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
+ stride=(2, 2, 2, 2),
+ enc_depths=(2, 2, 2, 6, 2),
+ enc_channels=(32, 64, 128, 256, 512),
+ enc_num_head=(2, 4, 8, 16, 32),
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+ dec_depths=(2, 2, 2, 2),
+ dec_channels=(64, 64, 128, 256),
+ dec_num_head=(4, 4, 8, 16),
+ dec_patch_size=(1024, 1024, 1024, 1024),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=False,
+ enable_flash=True,
+ upcast_attention=False,
+ upcast_softmax=False,
+ cls_mode=False,
+ pdnorm_bn=False,
+ pdnorm_ln=False,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.006, 0.0006],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0006)]
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=102400, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-pt-v3m1-1-ppt-extreme.py b/Pointcept/configs/scannet/semseg-pt-v3m1-1-ppt-extreme.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e4804eaa9510bde3235c72cb2a34d2a1907ac14
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-pt-v3m1-1-ppt-extreme.py
@@ -0,0 +1,483 @@
+"""
+PTv3 + PPT
+Pre-trained on ScanNet + Structured3D
+(S3DIS is commented by default as a long data time issue of S3DIS: https://github.com/Pointcept/Pointcept/issues/103)
+In the original PPT paper, 3 datasets are jointly trained and validated on the three datasets jointly with
+one shared weight model. In PTv3, we trained on multi-dataset but only validated on one single dataset to
+achieve extreme performance on one single dataset.
+
+To enable joint training on three datasets, uncomment config for the S3DIS dataset and change the "loop" of
+ Structured3D and ScanNet to 4 and 2 respectively.
+"""
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 24 # bs: total bs in all gpus
+num_worker = 48
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+clip_grad = 3.0
+
+# trainer
+train = dict(
+ type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+ type="PPT-v1m1",
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=6,
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
+ stride=(2, 2, 2, 2),
+ enc_depths=(3, 3, 3, 6, 3),
+ enc_channels=(48, 96, 192, 384, 512),
+ enc_num_head=(3, 6, 12, 24, 32),
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+ dec_depths=(3, 3, 3, 3),
+ dec_channels=(64, 96, 192, 384),
+ dec_num_head=(4, 6, 12, 24),
+ dec_patch_size=(1024, 1024, 1024, 1024),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=False,
+ enable_flash=True,
+ upcast_attention=False,
+ upcast_softmax=False,
+ cls_mode=False,
+ pdnorm_bn=True,
+ pdnorm_ln=True,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ backbone_out_channels=64,
+ context_channels=256,
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
+ template="[x]",
+ clip_model="ViT-B/16",
+ # fmt: off
+ class_name=(
+ "wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door",
+ "window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain",
+ "dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand",
+ "toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column",
+ "clutter", "otherstructure", "otherfurniture", "otherprop",
+ ),
+ valid_index=(
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35),
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
+ ),
+ # fmt: on
+ backbone_mode=False,
+)
+
+# scheduler settings
+epoch = 100
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.005, 0.0005],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0005)]
+
+# dataset settings
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type="ConcatDataset",
+ datasets=[
+ # Structured3D
+ dict(
+ type="Structured3DDataset",
+ split=["train", "val", "test"],
+ data_root="data/structured3d",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "Structured3D"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=2, # sampling weight
+ ),
+ # ScanNet
+ dict(
+ type="ScanNetDataset",
+ split="train",
+ data_root="data/scannet",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=1, # sampling weight
+ ),
+ # S3DIS
+ # dict(
+ # type="S3DISDataset",
+ # split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ # data_root="data/s3dis",
+ # transform=[
+ # dict(type="CenterShift", apply_z=True),
+ # dict(
+ # type="RandomDropout",
+ # dropout_ratio=0.2,
+ # dropout_application_ratio=0.2,
+ # ),
+ # # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ # dict(
+ # type="RandomRotate",
+ # angle=[-1, 1],
+ # axis="z",
+ # center=[0, 0, 0],
+ # p=0.5,
+ # ),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ # dict(type="RandomScale", scale=[0.9, 1.1]),
+ # # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ # dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(
+ # type="ElasticDistortion",
+ # distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ # ),
+ # dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ # dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ # dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ # dict(
+ # type="GridSample",
+ # grid_size=0.02,
+ # hash_type="fnv",
+ # mode="train",
+ # return_grid_coord=True,
+ # ),
+ # dict(type="SphereCrop", sample_rate=0.6, mode="random"),
+ # dict(type="SphereCrop", point_max=204800, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ # dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ # dict(type="Add", keys_dict={"condition": "S3DIS"}),
+ # dict(type="ToTensor"),
+ # dict(
+ # type="Collect",
+ # keys=("coord", "grid_coord", "segment", "condition"),
+ # feat_keys=("color", "normal"),
+ # ),
+ # ],
+ # test_mode=False,
+ # loop=1, # sampling weight
+ # ),
+ ],
+ ),
+ val=dict(
+ type="ScanNetDataset",
+ split="val",
+ data_root="data/scannet",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type="ScanNetDataset",
+ split="val",
+ data_root="data/scannet",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e2e41aa5d949cad7428f722f017db73a565571a
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-0-base.py
@@ -0,0 +1,281 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=20,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-1-interp-eval.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-1-interp-eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..9adfb382ad61433c7c73c3e502adbeaf411ae946
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-1-interp-eval.py
@@ -0,0 +1,285 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=20,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-1-precise-eval.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-1-precise-eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..7afb3aef572bc5e22b13f95fe70dbe43bdbb8d5f
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-1-precise-eval.py
@@ -0,0 +1,289 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+hooks = [
+ dict(type="CheckpointLoader"),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(type="SemSegEvaluator"),
+ dict(type="CheckpointSaver", save_freq=None),
+ dict(type="PreciseEvaluator", test_last=False),
+]
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=20,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la100.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la100.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8774321618c7d84e8a26911cfa418e777798c9c
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la100.py
@@ -0,0 +1,282 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=20,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ la_file="data/scannet/tasks/points/points100",
+ ignore_index=-1,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la20.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la20.py
new file mode 100644
index 0000000000000000000000000000000000000000..1171c51184cab3372798d6bf94036719571880f0
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la20.py
@@ -0,0 +1,282 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=20,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ la_file="data/scannet/tasks/points/points20",
+ ignore_index=-1,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la200.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la200.py
new file mode 100644
index 0000000000000000000000000000000000000000..158b0873af0db67a2877d966fb9dafdf417872ad
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la200.py
@@ -0,0 +1,282 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=20,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ la_file="data/scannet/tasks/points/points200",
+ ignore_index=-1,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la50.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la50.py
new file mode 100644
index 0000000000000000000000000000000000000000..6eb906429e1209881b75f223ff85ff67d4d55594
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la50.py
@@ -0,0 +1,282 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=20,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ la_file="data/scannet/tasks/points/points50",
+ ignore_index=-1,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr1.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr1.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f5b2267f31b2e68306e414d46486a27081a7f1a
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr1.py
@@ -0,0 +1,281 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=20,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ lr_file="data/scannet/tasks/scenes/1.txt",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr10.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr10.py
new file mode 100644
index 0000000000000000000000000000000000000000..cff1df75e21a22e49486beb712e1443959f84447
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr10.py
@@ -0,0 +1,281 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=20,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ lr_file="data/scannet/tasks/scenes/10.txt",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr20.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr20.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d1891fe6e0a984089d5b7ff465bd16e0a6c3ae8
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr20.py
@@ -0,0 +1,281 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=20,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ lr_file="data/scannet/tasks/scenes/20.txt",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr5.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr5.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5a420a349a1ef0e12a58b3b4ea969092081a274
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr5.py
@@ -0,0 +1,281 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=20,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ lr_file="data/scannet/tasks/scenes/5.txt",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-3-enable-profiler.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-3-enable-profiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..32acb2be05092162561c66822e8cfe0758871147
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-3-enable-profiler.py
@@ -0,0 +1,296 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = False
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=20,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
+
+hooks = [
+ dict(type="CheckpointLoader"),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(type="SemSegEvaluator"),
+ dict(type="CheckpointSaver", save_freq=None),
+ dict(
+ type="RuntimeProfiler",
+ forward=True,
+ backward=True,
+ interrupt=True,
+ warm_up=2,
+ row_limit=30,
+ ),
+]
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-4-ft.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-4-ft.py
new file mode 100644
index 0000000000000000000000000000000000000000..f90564e23b3422550c7fe209b977aa428c779b0e
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-4-ft.py
@@ -0,0 +1,280 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 48 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=20,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-5-lovasz.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-5-lovasz.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb976abbd6570196e589ece51566a12e679c0364
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-5-lovasz.py
@@ -0,0 +1,283 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=20,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m3-0-pdnorm-base.py b/Pointcept/configs/scannet/semseg-spunet-v1m3-0-pdnorm-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6aed1fb9f5f4d7d7dd3b8608ac8be972931fbc3
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-spunet-v1m3-0-pdnorm-base.py
@@ -0,0 +1,291 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m3",
+ in_channels=6,
+ num_classes=20,
+ base_channels=32,
+ context_channels=256,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=False,
+ conditions=("ScanNet", "S3DIS", "Structured3D"),
+ zero_init=False,
+ norm_decouple=True,
+ norm_adaptive=False,
+ norm_affine=True,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict=dict(condition="ScanNet")),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="Add", keys_dict=dict(condition="ScanNet")),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict=dict(condition="ScanNet")),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-st-v1m1-0-origin.py b/Pointcept/configs/scannet/semseg-st-v1m1-0-origin.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c05848c5212616f68db4ec70f7dfdebd1ef7d35
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-st-v1m1-0-origin.py
@@ -0,0 +1,286 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 8 # bs: total bs in all gpus
+mix_prob = 0
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="ST-v1m1",
+ downsample_scale=4,
+ depths=[3, 3, 9, 3, 3],
+ channels=[48, 96, 192, 384, 384],
+ num_heads=[3, 6, 12, 24, 24],
+ window_size=[0.1, 0.2, 0.4, 0.8, 1.6],
+ up_k=3,
+ grid_sizes=[0.02, 0.04, 0.08, 0.16, 0.32],
+ quant_sizes=[0.005, 0.01, 0.02, 0.04, 0.08],
+ rel_query=True,
+ rel_key=True,
+ rel_value=True,
+ drop_path_rate=0.3,
+ num_layers=5,
+ concat_xyz=True,
+ num_classes=20,
+ ratio=0.25,
+ k=16,
+ prev_grid_size=0.02,
+ sigma=1.0,
+ stem_transformer=False,
+ kp_ball_radius=0.02 * 2.5,
+ kp_max_neighbor=34,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 600
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect", keys=("coord", "segment"), feat_keys=("coord", "color")
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect", keys=("coord", "segment"), feat_keys=("coord", "color")
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-st-v1m2-0-refined.py b/Pointcept/configs/scannet/semseg-st-v1m2-0-refined.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b9963eedc36be41897e041bc67c245cab0c1e5b
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-st-v1m2-0-refined.py
@@ -0,0 +1,287 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 8 # bs: total bs in all gpus
+mix_prob = 0
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="ST-v1m2",
+ in_channels=9,
+ num_classes=20,
+ channels=(48, 96, 192, 384, 384),
+ num_heads=(6, 12, 24, 24),
+ depths=(3, 9, 3, 3),
+ window_size=(0.2, 0.4, 0.8, 1.6),
+ quant_size=(0.01, 0.02, 0.04, 0.08),
+ mlp_expend_ratio=4.0,
+ down_ratio=0.25,
+ down_num_sample=16,
+ kp_ball_radius=2.5 * 0.02,
+ kp_max_neighbor=34,
+ kp_grid_size=0.02,
+ kp_sigma=1.0,
+ drop_path_rate=0.2,
+ rel_query=True,
+ rel_key=True,
+ rel_value=True,
+ qkv_bias=True,
+ stem=True,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+# scheduler settings
+epoch = 600
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-swin3d-v1m1-0-small.py b/Pointcept/configs/scannet/semseg-swin3d-v1m1-0-small.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8d8308de55cd42ba4ea0bea8a3b951bfda8d6df
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-swin3d-v1m1-0-small.py
@@ -0,0 +1,219 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="Swin3D-v1m1",
+ in_channels=9,
+ num_classes=20,
+ base_grid_size=0.02,
+ depths=[2, 4, 9, 4, 4],
+ channels=[48, 96, 192, 384, 384],
+ num_heads=[6, 6, 12, 24, 24],
+ window_sizes=[5, 7, 7, 7, 7],
+ quant_size=4,
+ drop_path_rate=0.3,
+ up_k=3,
+ num_layers=5,
+ stem_transformer=True,
+ down_stride=3,
+ upsample="linear_attn",
+ knn_down=True,
+ cRSE="XYZ_RGB_NORM",
+ fp16_mode=1,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 600
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.006, 0.0006],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="blocks", lr=0.0006)]
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.8, 1.2]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ return_displacement=True,
+ ),
+ dict(type="SphereCrop", point_max=120000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ return_displacement=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ return_displacement=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet/semseg-swin3d-v1m1-1-large.py b/Pointcept/configs/scannet/semseg-swin3d-v1m1-1-large.py
new file mode 100644
index 0000000000000000000000000000000000000000..0957ff85e1902220e0d13676b6814e5420a1776d
--- /dev/null
+++ b/Pointcept/configs/scannet/semseg-swin3d-v1m1-1-large.py
@@ -0,0 +1,219 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="Swin3D-v1m1",
+ in_channels=9,
+ num_classes=20,
+ base_grid_size=0.02,
+ depths=[2, 4, 9, 4, 4],
+ channels=[80, 160, 320, 640, 640],
+ num_heads=[10, 10, 20, 40, 40],
+ window_sizes=[5, 7, 7, 7, 7],
+ quant_size=4,
+ drop_path_rate=0.3,
+ up_k=3,
+ num_layers=5,
+ stem_transformer=True,
+ down_stride=3,
+ upsample="linear_attn",
+ knn_down=True,
+ cRSE="XYZ_RGB_NORM",
+ fp16_mode=1,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 600
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.006, 0.0006],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="blocks", lr=0.0006)]
+
+# dataset settings
+dataset_type = "ScanNetDataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=20,
+ ignore_index=-1,
+ names=[
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "bookshelf",
+ "picture",
+ "counter",
+ "desk",
+ "curtain",
+ "refridgerator",
+ "shower curtain",
+ "toilet",
+ "sink",
+ "bathtub",
+ "otherfurniture",
+ ],
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.8, 1.2]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ return_displacement=True,
+ ),
+ dict(type="SphereCrop", point_max=120000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ return_displacement=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ return_displacement=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet200/insseg-pointgroup-spunet-0-base.py b/Pointcept/configs/scannet200/insseg-pointgroup-spunet-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..90a8d5e384b59fd5e6a004d57006861de9ca921a
--- /dev/null
+++ b/Pointcept/configs/scannet200/insseg-pointgroup-spunet-0-base.py
@@ -0,0 +1,170 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 12
+mix_prob = 0
+empty_cache = False
+enable_amp = True
+evaluate = True
+
+class_names = CLASS_LABELS_200
+num_classes = 200
+segment_ignore_index = (-1, 0, 2)
+
+# model settings
+model = dict(
+ type="PG-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=0,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ backbone_out_channels=96,
+ semantic_num_classes=num_classes,
+ semantic_ignore_index=-1,
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ cluster_thresh=1.5,
+ cluster_closed_points=300,
+ cluster_propose_points=100,
+ cluster_min_points=50,
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(type="PolyLR")
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=num_classes,
+ ignore_index=-1,
+ names=class_names,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "instance_centroid",
+ "bbox",
+ ),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="Copy",
+ keys_dict={
+ "coord": "origin_coord",
+ "segment": "origin_segment",
+ "instance": "origin_instance",
+ },
+ ),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal", "segment", "instance"),
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(
+ type="InstanceParser",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=(
+ "coord",
+ "grid_coord",
+ "segment",
+ "instance",
+ "origin_coord",
+ "origin_segment",
+ "origin_instance",
+ "instance_centroid",
+ "bbox",
+ ),
+ feat_keys=("color", "normal"),
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(), # currently not available
+)
+
+hooks = [
+ dict(type="CheckpointLoader", keywords="module.", replacement="module."),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(
+ type="InsSegEvaluator",
+ segment_ignore_index=segment_ignore_index,
+ instance_ignore_index=-1,
+ ),
+ dict(type="CheckpointSaver", save_freq=None),
+]
diff --git a/Pointcept/configs/scannet200/semseg-cac-v1m1-0-spunet-base.py b/Pointcept/configs/scannet200/semseg-cac-v1m1-0-spunet-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..736bc767abdcba23de5676c5321cef01e19707c4
--- /dev/null
+++ b/Pointcept/configs/scannet200/semseg-cac-v1m1-0-spunet-base.py
@@ -0,0 +1,192 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="CAC-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=9,
+ num_classes=0,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+ num_classes=200,
+ backbone_out_channels=96,
+ cos_temp=15,
+ main_weight=1,
+ pre_weight=1,
+ pre_self_weight=1,
+ kl_weight=1,
+ conf_thresh=0,
+ detach_pre_logits=True,
+)
+
+
+# scheduler settings
+epoch = 600
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=200,
+ ignore_index=-1,
+ names=CLASS_LABELS_200,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet200/semseg-cac-v1m1-1-spunet-lovasz.py b/Pointcept/configs/scannet200/semseg-cac-v1m1-1-spunet-lovasz.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5584a8b763e911a896d4c948cc42f4d0da880e1
--- /dev/null
+++ b/Pointcept/configs/scannet200/semseg-cac-v1m1-1-spunet-lovasz.py
@@ -0,0 +1,195 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="CAC-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=9,
+ num_classes=0,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ num_classes=200,
+ backbone_out_channels=96,
+ cos_temp=15,
+ main_weight=1,
+ pre_weight=1,
+ pre_self_weight=1,
+ kl_weight=1,
+ conf_thresh=0,
+ detach_pre_logits=True,
+)
+
+
+# scheduler settings
+epoch = 600
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=200,
+ ignore_index=-1,
+ names=CLASS_LABELS_200,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet200/semseg-cac-v1m1-2-ptv2-lovasz.py b/Pointcept/configs/scannet200/semseg-cac-v1m1-2-ptv2-lovasz.py
new file mode 100644
index 0000000000000000000000000000000000000000..bbd49a65d32cac158ece4cf06c641d08b452207c
--- /dev/null
+++ b/Pointcept/configs/scannet200/semseg-cac-v1m1-2-ptv2-lovasz.py
@@ -0,0 +1,292 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="CAC-v1m1",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=9,
+ num_classes=0,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ num_classes=200,
+ backbone_out_channels=48,
+ cos_temp=15,
+ main_weight=1,
+ pre_weight=1,
+ pre_self_weight=1,
+ kl_weight=1,
+ conf_thresh=0,
+ detach_pre_logits=True,
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=200,
+ ignore_index=-1,
+ names=CLASS_LABELS_200,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet200/semseg-minkunet34c-0-base.py b/Pointcept/configs/scannet200/semseg-minkunet34c-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd8479414cc4a8306c1914490c6621cb6debaeab
--- /dev/null
+++ b/Pointcept/configs/scannet200/semseg-minkunet34c-0-base.py
@@ -0,0 +1,176 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(type="MinkUNet34C", in_channels=9, out_channels=200),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 600
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=200,
+ ignore_index=-1,
+ names=CLASS_LABELS_200,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 6, 1 / 6], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 6, 1 / 6], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet200/semseg-pt-v1-0-base.py b/Pointcept/configs/scannet200/semseg-pt-v1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..60c1e2fbd99f8cfac633545d265bfc6cbc7219f8
--- /dev/null
+++ b/Pointcept/configs/scannet200/semseg-pt-v1-0-base.py
@@ -0,0 +1,260 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PointTransformer-Seg50",
+ in_channels=9,
+ num_classes=200,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=200,
+ ignore_index=-1,
+ names=CLASS_LABELS_200,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet200/semseg-pt-v2m1-0-base.py b/Pointcept/configs/scannet200/semseg-pt-v2m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..b454f1de6563aec984578c8b7bff52bab7010218
--- /dev/null
+++ b/Pointcept/configs/scannet200/semseg-pt-v2m1-0-base.py
@@ -0,0 +1,280 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = False
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m1",
+ in_channels=9,
+ num_classes=200,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=True,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=200,
+ ignore_index=-1,
+ names=CLASS_LABELS_200,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet200/semseg-pt-v2m2-0-base.py b/Pointcept/configs/scannet200/semseg-pt-v2m2-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..51287dd96eb4d2af1cbe01cbf29e029918dc21b8
--- /dev/null
+++ b/Pointcept/configs/scannet200/semseg-pt-v2m2-0-base.py
@@ -0,0 +1,280 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=9,
+ num_classes=200,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=200,
+ ignore_index=-1,
+ names=CLASS_LABELS_200,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet200/semseg-pt-v2m2-1-benchmark-submit.py b/Pointcept/configs/scannet200/semseg-pt-v2m2-1-benchmark-submit.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8d3d1e449b688dc04d225d22626b2009285e52d
--- /dev/null
+++ b/Pointcept/configs/scannet200/semseg-pt-v2m2-1-benchmark-submit.py
@@ -0,0 +1,256 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+evaluate = False
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=9,
+ num_classes=200,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=200,
+ ignore_index=-1,
+ names=CLASS_LABELS_200,
+ train=dict(
+ type=dataset_type,
+ split=["train", "val"],
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="test",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet200/semseg-pt-v2m2-2-lovasz.py b/Pointcept/configs/scannet200/semseg-pt-v2m2-2-lovasz.py
new file mode 100644
index 0000000000000000000000000000000000000000..c3ab0f65f6e56a5faa876ab4ec52f531cd9e2453
--- /dev/null
+++ b/Pointcept/configs/scannet200/semseg-pt-v2m2-2-lovasz.py
@@ -0,0 +1,283 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=9,
+ num_classes=200,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=200,
+ ignore_index=-1,
+ names=CLASS_LABELS_200,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet200/semseg-pt-v3m1-0-base.py b/Pointcept/configs/scannet200/semseg-pt-v3m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed73ca90ec80dec027906fb69b1678a006840f36
--- /dev/null
+++ b/Pointcept/configs/scannet200/semseg-pt-v3m1-0-base.py
@@ -0,0 +1,295 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentorV2",
+ num_classes=200,
+ backbone_out_channels=64,
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=6,
+ order=["z", "z-trans", "hilbert", "hilbert-trans"],
+ stride=(2, 2, 2, 2),
+ enc_depths=(2, 2, 2, 6, 2),
+ enc_channels=(32, 64, 128, 256, 512),
+ enc_num_head=(2, 4, 8, 16, 32),
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+ dec_depths=(2, 2, 2, 2),
+ dec_channels=(64, 64, 128, 256),
+ dec_num_head=(4, 4, 8, 16),
+ dec_patch_size=(1024, 1024, 1024, 1024),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=False,
+ enable_flash=True,
+ upcast_attention=False,
+ upcast_softmax=False,
+ cls_mode=False,
+ pdnorm_bn=False,
+ pdnorm_ln=False,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.006, 0.0006],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0006)]
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=200,
+ ignore_index=-1,
+ names=CLASS_LABELS_200,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=102400, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet200/semseg-pt-v3m1-1-ppt-ft.py b/Pointcept/configs/scannet200/semseg-pt-v3m1-1-ppt-ft.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa92b10fe935193e4333b19e78ad8997aab1b102
--- /dev/null
+++ b/Pointcept/configs/scannet200/semseg-pt-v3m1-1-ppt-ft.py
@@ -0,0 +1,299 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentorV2",
+ num_classes=200,
+ backbone_out_channels=64,
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=6,
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
+ stride=(2, 2, 2, 2),
+ enc_depths=(3, 3, 3, 6, 3),
+ enc_channels=(48, 96, 192, 384, 512),
+ enc_num_head=(3, 6, 12, 24, 32),
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+ dec_depths=(3, 3, 3, 3),
+ dec_channels=(64, 96, 192, 384),
+ dec_num_head=(4, 6, 12, 24),
+ dec_patch_size=(1024, 1024, 1024, 1024),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=False,
+ enable_flash=True,
+ upcast_attention=False,
+ upcast_softmax=False,
+ cls_mode=False,
+ pdnorm_bn=True,
+ pdnorm_ln=True,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.006, 0.0006],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0006)]
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=200,
+ ignore_index=-1,
+ names=CLASS_LABELS_200,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=102400, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet200/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/scannet200/semseg-spunet-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..1fcd0fecf93c058b0f68c251ebb0210a64da976b
--- /dev/null
+++ b/Pointcept/configs/scannet200/semseg-spunet-v1m1-0-base.py
@@ -0,0 +1,182 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=9,
+ num_classes=200,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 600
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=200,
+ ignore_index=-1,
+ names=CLASS_LABELS_200,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet200/semseg-spunet-v1m1-1-lovasz.py b/Pointcept/configs/scannet200/semseg-spunet-v1m1-1-lovasz.py
new file mode 100644
index 0000000000000000000000000000000000000000..12c17df2be7bf2895c4e2018c124c196e8297f80
--- /dev/null
+++ b/Pointcept/configs/scannet200/semseg-spunet-v1m1-1-lovasz.py
@@ -0,0 +1,185 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=9,
+ num_classes=200,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 600
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=200,
+ ignore_index=-1,
+ names=CLASS_LABELS_200,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannet200/semseg-st-v1m2-0-refined.py b/Pointcept/configs/scannet200/semseg-st-v1m2-0-refined.py
new file mode 100644
index 0000000000000000000000000000000000000000..98363e34c91f344faf410493066f6291d8b75eaf
--- /dev/null
+++ b/Pointcept/configs/scannet200/semseg-st-v1m2-0-refined.py
@@ -0,0 +1,270 @@
+from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import (
+ CLASS_LABELS_200,
+)
+
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 8 # bs: total bs in all gpus
+mix_prob = 0
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="ST-v1m2",
+ in_channels=9,
+ num_classes=200,
+ channels=(48, 96, 192, 384, 384),
+ num_heads=(6, 12, 24, 24),
+ depths=(3, 9, 3, 3),
+ window_size=(0.2, 0.4, 0.8, 1.6),
+ quant_size=(0.01, 0.02, 0.04, 0.08),
+ mlp_expend_ratio=4.0,
+ down_ratio=0.25,
+ down_num_sample=16,
+ kp_ball_radius=2.5 * 0.02,
+ kp_max_neighbor=34,
+ kp_grid_size=0.02,
+ kp_sigma=1.0,
+ drop_path_rate=0.2,
+ rel_query=True,
+ rel_key=True,
+ rel_value=True,
+ qkv_bias=True,
+ stem=True,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+# scheduler settings
+epoch = 600
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
+
+# dataset settings
+dataset_type = "ScanNet200Dataset"
+data_root = "data/scannet"
+
+data = dict(
+ num_classes=200,
+ ignore_index=-1,
+ names=CLASS_LABELS_200,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=100000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_min_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannetpp/semseg-pt-v2m2-0-base.py b/Pointcept/configs/scannetpp/semseg-pt-v2m2-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..81baa7c600fbcfce1121899897879fd13f9ed0eb
--- /dev/null
+++ b/Pointcept/configs/scannetpp/semseg-pt-v2m2-0-base.py
@@ -0,0 +1,291 @@
+_base_ = [
+ "../_base_/default_runtime.py",
+ "../_base_/dataset/scannetpp.py",
+]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=9,
+ num_classes=100,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetPPDataset"
+data_root = "data/scannetpp"
+
+data = dict(
+ num_classes=100,
+ ignore_index=-1,
+ train=dict(
+ type=dataset_type,
+ split="train_grid1mm_chunk6x6_stride3x3",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "normal", "segment"),
+ return_inverse=True,
+ ),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannetpp/semseg-pt-v2m2-1-submit.py b/Pointcept/configs/scannetpp/semseg-pt-v2m2-1-submit.py
new file mode 100644
index 0000000000000000000000000000000000000000..e068997c5158ab4d1e84f3d2e75454198ee6897f
--- /dev/null
+++ b/Pointcept/configs/scannetpp/semseg-pt-v2m2-1-submit.py
@@ -0,0 +1,278 @@
+_base_ = [
+ "../_base_/default_runtime.py",
+ "../_base_/dataset/scannetpp.py",
+]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+evaluate = False
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=9,
+ num_classes=100,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 900
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetPPDataset"
+data_root = "data/scannetpp"
+
+data = dict(
+ num_classes=100,
+ ignore_index=-1,
+ train=dict(
+ type=dataset_type,
+ split=["train_grid1mm_chunk6x6_stride3x3", "val_grid1mm_chunk6x6_stride3x3"],
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="test",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "normal", "segment"),
+ return_inverse=True,
+ ),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
+
+# hook
+hooks = [
+ dict(type="CheckpointLoader"),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(type="SemSegEvaluator"),
+ dict(type="CheckpointSaver", save_freq=None),
+ dict(type="PreciseEvaluator", test_last=True),
+]
diff --git a/Pointcept/configs/scannetpp/semseg-pt-v3m1-0-base.py b/Pointcept/configs/scannetpp/semseg-pt-v3m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..efd95ad6190fb3a14234a9da293338ec67140658
--- /dev/null
+++ b/Pointcept/configs/scannetpp/semseg-pt-v3m1-0-base.py
@@ -0,0 +1,302 @@
+_base_ = [
+ "../_base_/default_runtime.py",
+ "../_base_/dataset/scannetpp.py",
+]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentorV2",
+ num_classes=100,
+ backbone_out_channels=64,
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=6,
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
+ stride=(2, 2, 2, 2),
+ enc_depths=(2, 2, 2, 6, 2),
+ enc_channels=(32, 64, 128, 256, 512),
+ enc_num_head=(2, 4, 8, 16, 32),
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+ dec_depths=(2, 2, 2, 2),
+ dec_channels=(64, 64, 128, 256),
+ dec_num_head=(4, 4, 8, 16),
+ dec_patch_size=(1024, 1024, 1024, 1024),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=False,
+ enable_flash=True,
+ upcast_attention=False,
+ upcast_softmax=False,
+ cls_mode=False,
+ pdnorm_bn=False,
+ pdnorm_ln=False,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.006, 0.0006],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0006)]
+
+# dataset settings
+dataset_type = "ScanNetPPDataset"
+data_root = "data/scannetpp"
+
+data = dict(
+ num_classes=100,
+ ignore_index=-1,
+ train=dict(
+ type=dataset_type,
+ split="train_grid1mm_chunk6x6_stride3x3",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "normal", "segment"),
+ return_inverse=True,
+ ),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannetpp/semseg-pt-v3m1-1-submit.py b/Pointcept/configs/scannetpp/semseg-pt-v3m1-1-submit.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b91ca6d93037bf502566afd3a704b2ef7aa010b
--- /dev/null
+++ b/Pointcept/configs/scannetpp/semseg-pt-v3m1-1-submit.py
@@ -0,0 +1,289 @@
+_base_ = [
+ "../_base_/default_runtime.py",
+ "../_base_/dataset/scannetpp.py",
+]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+evaluate = False
+
+# model settings
+model = dict(
+ type="DefaultSegmentorV2",
+ num_classes=100,
+ backbone_out_channels=64,
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=6,
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
+ stride=(2, 2, 2, 2),
+ enc_depths=(2, 2, 2, 6, 2),
+ enc_channels=(32, 64, 128, 256, 512),
+ enc_num_head=(2, 4, 8, 16, 32),
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+ dec_depths=(2, 2, 2, 2),
+ dec_channels=(64, 64, 128, 256),
+ dec_num_head=(4, 4, 8, 16),
+ dec_patch_size=(1024, 1024, 1024, 1024),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=False,
+ enable_flash=True,
+ upcast_attention=False,
+ upcast_softmax=False,
+ cls_mode=False,
+ pdnorm_bn=False,
+ pdnorm_ln=False,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.006, 0.0006],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0006)]
+
+# dataset settings
+dataset_type = "ScanNetPPDataset"
+data_root = "data/scannetpp"
+
+data = dict(
+ num_classes=100,
+ ignore_index=-1,
+ train=dict(
+ type=dataset_type,
+ split=["train_grid1mm_chunk6x6_stride3x3", "val_grid1mm_chunk6x6_stride3x3"],
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="test",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "normal", "segment"),
+ return_inverse=True,
+ ),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
+
+# hook
+hooks = [
+ dict(type="CheckpointLoader"),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(type="SemSegEvaluator"),
+ dict(type="CheckpointSaver", save_freq=None),
+ dict(type="PreciseEvaluator", test_last=True),
+]
diff --git a/Pointcept/configs/scannetpp/semseg-pt-v3m1-2-ppt-extreme.py b/Pointcept/configs/scannetpp/semseg-pt-v3m1-2-ppt-extreme.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b0c756223bd65cafca57ba077ff2d6830887403
--- /dev/null
+++ b/Pointcept/configs/scannetpp/semseg-pt-v3m1-2-ppt-extreme.py
@@ -0,0 +1,499 @@
+_base_ = [
+ "../_base_/default_runtime.py",
+ "../_base_/dataset/scannetpp.py",
+]
+
+# misc custom setting
+batch_size = 24 # bs: total bs in all gpus
+num_worker = 48
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+
+# trainer
+train = dict(
+ type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+ type="PPT-v1m2",
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=6,
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
+ stride=(2, 2, 2, 2),
+ enc_depths=(3, 3, 3, 6, 3),
+ enc_channels=(48, 96, 192, 384, 512),
+ enc_num_head=(3, 6, 12, 24, 32),
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+ dec_depths=(3, 3, 3, 3),
+ dec_channels=(64, 96, 192, 384),
+ dec_num_head=(4, 6, 12, 24),
+ dec_patch_size=(1024, 1024, 1024, 1024),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=False,
+ enable_flash=True,
+ upcast_attention=False,
+ upcast_softmax=False,
+ cls_mode=False,
+ pdnorm_bn=True,
+ pdnorm_ln=True,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("ScanNet", "ScanNet++", "S3DIS", "Structured3D"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ backbone_out_channels=64,
+ context_channels=256,
+ conditions=("ScanNet", "ScanNet++", "S3DIS", "Structured3D"),
+ num_classes=(200, 100, 13, 25),
+)
+
+# scheduler settings
+epoch = 100
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.005, 0.0005],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0005)]
+
+# dataset settings
+data = dict(
+ num_classes=100,
+ ignore_index=-1,
+ train=dict(
+ type="ConcatDataset",
+ datasets=[
+ # Structured3D
+ dict(
+ type="Structured3DDataset",
+ split=["train", "val", "test"],
+ data_root="data/structured3d",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "Structured3D"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=2, # sampling weight
+ ),
+ # ScanNet
+ dict(
+ type="ScanNet200Dataset",
+ split=["train", "val"],
+ data_root="data/scannet",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=1, # sampling weight
+ ),
+ # S3DIS
+ # dict(
+ # type="S3DISDataset",
+ # split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ # data_root="data/s3dis",
+ # transform=[
+ # dict(type="CenterShift", apply_z=True),
+ # dict(
+ # type="RandomDropout",
+ # dropout_ratio=0.2,
+ # dropout_application_ratio=0.2,
+ # ),
+ # # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ # dict(
+ # type="RandomRotate",
+ # angle=[-1, 1],
+ # axis="z",
+ # center=[0, 0, 0],
+ # p=0.5,
+ # ),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ # dict(type="RandomScale", scale=[0.9, 1.1]),
+ # # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ # dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(
+ # type="ElasticDistortion",
+ # distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ # ),
+ # dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ # dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ # dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ # dict(
+ # type="GridSample",
+ # grid_size=0.02,
+ # hash_type="fnv",
+ # mode="train",
+ # return_grid_coord=True,
+ # ),
+ # dict(type="SphereCrop", sample_rate=0.6, mode="random"),
+ # dict(type="SphereCrop", point_max=204800, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ # dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ # dict(type="Add", keys_dict={"condition": "S3DIS"}),
+ # dict(type="ToTensor"),
+ # dict(
+ # type="Collect",
+ # keys=("coord", "grid_coord", "segment", "condition"),
+ # feat_keys=("color", "normal"),
+ # ),
+ # ],
+ # test_mode=False,
+ # loop=1, # sampling weight
+ # ),
+ dict(
+ type="ScanNetPPDataset",
+ split="train_grid1mm_chunk6x6_stride3x3",
+ data_root="data/scannetpp",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "ScanNet++"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ ],
+ ),
+ val=dict(
+ type="ScanNetPPDataset",
+ split="val",
+ data_root="data/scannetpp",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(type="Add", keys_dict={"condition": "ScanNet++"}),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type="ScanNetPPDataset",
+ split="val",
+ data_root="data/scannetpp",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "normal", "segment"),
+ return_inverse=True,
+ ),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "ScanNet++"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannetpp/semseg-pt-v3m1-3-ppt-extreme-submit.py b/Pointcept/configs/scannetpp/semseg-pt-v3m1-3-ppt-extreme-submit.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb48cf2db8af6d55dd85945e92be33df1e49b5fc
--- /dev/null
+++ b/Pointcept/configs/scannetpp/semseg-pt-v3m1-3-ppt-extreme-submit.py
@@ -0,0 +1,488 @@
+_base_ = [
+ "../_base_/default_runtime.py",
+ "../_base_/dataset/scannetpp.py",
+]
+
+# misc custom setting
+batch_size = 24 # bs: total bs in all gpus
+num_worker = 48
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+evaluate = False
+find_unused_parameters = True
+
+# trainer
+train = dict(
+ type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+ type="PPT-v1m2",
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=6,
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
+ stride=(2, 2, 2, 2),
+ enc_depths=(3, 3, 3, 6, 3),
+ enc_channels=(48, 96, 192, 384, 512),
+ enc_num_head=(3, 6, 12, 24, 32),
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+ dec_depths=(3, 3, 3, 3),
+ dec_channels=(64, 96, 192, 384),
+ dec_num_head=(4, 6, 12, 24),
+ dec_patch_size=(1024, 1024, 1024, 1024),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=False,
+ enable_flash=True,
+ upcast_attention=False,
+ upcast_softmax=False,
+ cls_mode=False,
+ pdnorm_bn=True,
+ pdnorm_ln=True,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("ScanNet", "ScanNet++", "S3DIS", "Structured3D"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ backbone_out_channels=64,
+ context_channels=256,
+ conditions=("ScanNet", "ScanNet++", "S3DIS", "Structured3D"),
+ num_classes=(200, 100, 13, 25),
+)
+
+# scheduler settings
+epoch = 100
+optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.005, 0.0005],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0005)]
+
+# dataset settings
+data = dict(
+ num_classes=100,
+ ignore_index=-1,
+ train=dict(
+ type="ConcatDataset",
+ datasets=[
+ # Structured3D
+ dict(
+ type="Structured3DDataset",
+ split=["train", "val", "test"],
+ data_root="data/structured3d",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "Structured3D"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=2, # sampling weight
+ ),
+ # ScanNet
+ dict(
+ type="ScanNet200Dataset",
+ split=["train", "val"],
+ data_root="data/scannet",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=1, # sampling weight
+ ),
+ # S3DIS
+ # dict(
+ # type="S3DISDataset",
+ # split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ # data_root="data/s3dis",
+ # transform=[
+ # dict(type="CenterShift", apply_z=True),
+ # dict(
+ # type="RandomDropout",
+ # dropout_ratio=0.2,
+ # dropout_application_ratio=0.2,
+ # ),
+ # # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ # dict(
+ # type="RandomRotate",
+ # angle=[-1, 1],
+ # axis="z",
+ # center=[0, 0, 0],
+ # p=0.5,
+ # ),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ # dict(type="RandomScale", scale=[0.9, 1.1]),
+ # # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ # dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(
+ # type="ElasticDistortion",
+ # distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ # ),
+ # dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ # dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ # dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ # dict(
+ # type="GridSample",
+ # grid_size=0.02,
+ # hash_type="fnv",
+ # mode="train",
+ # return_grid_coord=True,
+ # ),
+ # dict(type="SphereCrop", sample_rate=0.6, mode="random"),
+ # dict(type="SphereCrop", point_max=204800, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ # dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ # dict(type="Add", keys_dict={"condition": "S3DIS"}),
+ # dict(type="ToTensor"),
+ # dict(
+ # type="Collect",
+ # keys=("coord", "grid_coord", "segment", "condition"),
+ # feat_keys=("color", "normal"),
+ # ),
+ # ],
+ # test_mode=False,
+ # loop=1, # sampling weight
+ # ),
+ dict(
+ type="ScanNetPPDataset",
+ split=[
+ "train_grid1mm_chunk6x6_stride3x3",
+ "val_grid1mm_chunk6x6_stride3x3",
+ ],
+ data_root="data/scannetpp",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "ScanNet++"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ ],
+ ),
+ test=dict(
+ type="ScanNetPPDataset",
+ split="test",
+ data_root="data/scannetpp",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "normal", "segment"),
+ return_inverse=True,
+ ),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "ScanNet++"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
+
+# hook
+hooks = [
+ dict(type="CheckpointLoader"),
+ dict(type="IterationTimer", warmup_iter=2),
+ dict(type="InformationWriter"),
+ dict(type="SemSegEvaluator"),
+ dict(type="CheckpointSaver", save_freq=None),
+ dict(type="PreciseEvaluator", test_last=True),
+]
diff --git a/Pointcept/configs/scannetpp/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/scannetpp/semseg-spunet-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..871aa42954765ea6091c328910e40ee6c31e6173
--- /dev/null
+++ b/Pointcept/configs/scannetpp/semseg-spunet-v1m1-0-base.py
@@ -0,0 +1,271 @@
+_base_ = [
+ "../_base_/default_runtime.py",
+ "../_base_/dataset/scannetpp.py",
+]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=100,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+
+# scheduler settings
+epoch = 800
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "ScanNetPPDataset"
+data_root = "data/scannetpp"
+
+data = dict(
+ num_classes=100,
+ ignore_index=-1,
+ train=dict(
+ type=dataset_type,
+ split="train_grid1mm_chunk6x6_stride3x3",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "normal", "segment"),
+ return_inverse=True,
+ ),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/scannetpp/semseg-spunet-v1m1-1-ppt-extreme.py b/Pointcept/configs/scannetpp/semseg-spunet-v1m1-1-ppt-extreme.py
new file mode 100644
index 0000000000000000000000000000000000000000..55ed0fe2be2b23404980d7c19633458f843506bd
--- /dev/null
+++ b/Pointcept/configs/scannetpp/semseg-spunet-v1m1-1-ppt-extreme.py
@@ -0,0 +1,480 @@
+_base_ = [
+ "../_base_/default_runtime.py",
+ "../_base_/dataset/scannetpp.py",
+]
+
+# misc custom setting
+batch_size = 24 # bs: total bs in all gpus
+num_worker = 48
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+
+# trainer
+train = dict(
+ type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+ type="PPT-v1m2",
+ backbone=dict(
+ type="SpUNet-v1m3",
+ in_channels=6,
+ num_classes=0,
+ base_channels=32,
+ context_channels=256,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=False,
+ conditions=("ScanNet", "ScanNet++", "S3DIS", "Structured3D"),
+ zero_init=False,
+ norm_decouple=True,
+ norm_adaptive=True,
+ norm_affine=True,
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ backbone_out_channels=96,
+ context_channels=256,
+ conditions=("ScanNet", "ScanNet++", "S3DIS", "Structured3D"),
+ num_classes=(200, 100, 13, 25),
+)
+
+# scheduler settings
+epoch = 100
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+data = dict(
+ num_classes=100,
+ ignore_index=-1,
+ train=dict(
+ type="ConcatDataset",
+ datasets=[
+ # Structured3D
+ dict(
+ type="Structured3DDataset",
+ split=["train", "val", "test"],
+ data_root="data/structured3d",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "Structured3D"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=2, # sampling weight
+ ),
+ # ScanNet
+ dict(
+ type="ScanNet200Dataset",
+ split=["train", "val"],
+ data_root="data/scannet",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ loop=1, # sampling weight
+ ),
+ # S3DIS
+ # dict(
+ # type="S3DISDataset",
+ # split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
+ # data_root="data/s3dis",
+ # transform=[
+ # dict(type="CenterShift", apply_z=True),
+ # dict(
+ # type="RandomDropout",
+ # dropout_ratio=0.2,
+ # dropout_application_ratio=0.2,
+ # ),
+ # # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ # dict(
+ # type="RandomRotate",
+ # angle=[-1, 1],
+ # axis="z",
+ # center=[0, 0, 0],
+ # p=0.5,
+ # ),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ # dict(type="RandomScale", scale=[0.9, 1.1]),
+ # # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ # dict(type="RandomFlip", p=0.5),
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(
+ # type="ElasticDistortion",
+ # distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ # ),
+ # dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ # dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ # dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ # dict(
+ # type="GridSample",
+ # grid_size=0.02,
+ # hash_type="fnv",
+ # mode="train",
+ # return_grid_coord=True,
+ # ),
+ # dict(type="SphereCrop", sample_rate=0.6, mode="random"),
+ # dict(type="SphereCrop", point_max=204800, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ # dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ # dict(type="Add", keys_dict={"condition": "S3DIS"}),
+ # dict(type="ToTensor"),
+ # dict(
+ # type="Collect",
+ # keys=("coord", "grid_coord", "segment", "condition"),
+ # feat_keys=("color", "normal"),
+ # ),
+ # ],
+ # test_mode=False,
+ # loop=1, # sampling weight
+ # ),
+ dict(
+ type="ScanNetPPDataset",
+ split="train_grid1mm_chunk6x6_stride3x3",
+ data_root="data/scannetpp",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout",
+ dropout_ratio=0.2,
+ dropout_application_ratio=0.2,
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(
+ type="ElasticDistortion",
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
+ ),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", point_max=204800, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ # dict(type="ShufflePoint"),
+ dict(type="Add", keys_dict={"condition": "ScanNet++"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ ],
+ ),
+ val=dict(
+ type="ScanNetPPDataset",
+ split="val",
+ data_root="data/scannetpp",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(type="Add", keys_dict={"condition": "ScanNet++"}),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type="ScanNetPPDataset",
+ split="val",
+ data_root="data/scannetpp",
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+ dict(
+ type="GridSample",
+ grid_size=0.01,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "color", "normal", "segment"),
+ return_inverse=True,
+ ),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ keys=("coord", "color", "normal"),
+ return_grid_coord=True,
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "ScanNet++"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/semantic_kitti/semseg-minkunet34c-0-base.py b/Pointcept/configs/semantic_kitti/semseg-minkunet34c-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b22906b26f70255b437c1f8e50e71e24dde3f6d
--- /dev/null
+++ b/Pointcept/configs/semantic_kitti/semseg-minkunet34c-0-base.py
@@ -0,0 +1,213 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 8 # bs: total bs in all gpus
+mix_prob = 0
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(type="MinkUNet34C", in_channels=4, out_channels=19),
+ criteria=[
+ dict(
+ type="CrossEntropyLoss",
+ weight=[
+ 3.1557,
+ 8.7029,
+ 7.8281,
+ 6.1354,
+ 6.3161,
+ 7.9937,
+ 8.9704,
+ 10.1922,
+ 1.6155,
+ 4.2187,
+ 1.9385,
+ 5.5455,
+ 2.0198,
+ 2.6261,
+ 1.3212,
+ 5.1102,
+ 2.5492,
+ 5.8585,
+ 7.3929,
+ ],
+ loss_weight=1.0,
+ ignore_index=-1,
+ )
+ ],
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+
+# dataset settings
+dataset_type = "SemanticKITTIDataset"
+data_root = "data/semantic_kitti"
+ignore_index = -1
+names = [
+ "car",
+ "bicycle",
+ "motorcycle",
+ "truck",
+ "other-vehicle",
+ "person",
+ "bicyclist",
+ "motorcyclist",
+ "road",
+ "parking",
+ "sidewalk",
+ "other-ground",
+ "building",
+ "fence",
+ "vegetation",
+ "trunk",
+ "terrain",
+ "pole",
+ "traffic-sign",
+]
+
+data = dict(
+ num_classes=19,
+ ignore_index=ignore_index,
+ names=names,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ignore_index=ignore_index,
+ ),
+)
diff --git a/Pointcept/configs/semantic_kitti/semseg-ppt-v1m1-0-sk-nu-wa-spunet.py b/Pointcept/configs/semantic_kitti/semseg-ppt-v1m1-0-sk-nu-wa-spunet.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ea85111fdc0eb44096367ce5c50df3c2683da68
--- /dev/null
+++ b/Pointcept/configs/semantic_kitti/semseg-ppt-v1m1-0-sk-nu-wa-spunet.py
@@ -0,0 +1,351 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+
+# trainer
+train = dict(
+ type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+ type="PPT-v1m1",
+ backbone=dict(
+ type="SpUNet-v1m3",
+ in_channels=4,
+ num_classes=0,
+ base_channels=32,
+ context_channels=256,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=False,
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
+ zero_init=False,
+ norm_decouple=True,
+ norm_adaptive=False,
+ norm_affine=True,
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ backbone_out_channels=96,
+ context_channels=256,
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
+ template="[x]",
+ clip_model="ViT-B/16",
+ # fmt: off
+ class_name=(
+ # SemanticKITTI
+ "car", "bicycle", "motorcycle", "truck", "other vehicle",
+ "person", "person who rides a bicycle", "person who rides a motorcycle", "road", "parking",
+ "path for pedestrians at the side of a road", "other ground", "building", "fence", "vegetation",
+ "trunk", "terrain", "pole", "traffic sign",
+ # nuScenes
+ "barrier", "bicycle", "bus", "car", "construction vehicle",
+ "motorcycle", "pedestrian", "traffic cone", "trailer", "truck",
+ "path suitable or safe for driving", "other flat", "sidewalk", "terrain", "man made", "vegetation",
+ # waymo
+ "car", "truck", "bus", "other vehicle", "person who rides a motorcycle",
+ "person who rides a bicycle", "pedestrian", "sign", "traffic light", "pole",
+ "construction cone", "bicycle", "motorcycle", "building", "vegetation",
+ "tree trunk", "curb", "road", "lane marker", "other ground", "horizontal surface that can not drive",
+ "surface when pedestrians most likely to walk on",
+ ),
+ valid_index=(
+ [i for i in range(19)],
+ [i for i in range(19, 19 + 16)],
+ [i for i in range(19 + 16, 19 + 16 + 22)],
+ ),
+ # fmt: on
+ backbone_mode=False,
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+# param_dicts = [dict(keyword="modulation", lr=0.0002)]
+
+# dataset settings
+data = dict(
+ num_classes=19,
+ ignore_index=-1,
+ names=[
+ "car",
+ "bicycle",
+ "motorcycle",
+ "truck",
+ "other-vehicle",
+ "person",
+ "bicyclist",
+ "motorcyclist",
+ "road",
+ "parking",
+ "sidewalk",
+ "other-ground",
+ "building",
+ "fence",
+ "vegetation",
+ "trunk",
+ "terrain",
+ "pole",
+ "traffic-sign",
+ ],
+ train=dict(
+ type="ConcatDataset",
+ datasets=[
+ # nuScenes
+ dict(
+ type="NuScenesDataset",
+ split="train",
+ data_root="data/nuscenes",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ # SemanticKITTI
+ dict(
+ type="SemanticKITTIDataset",
+ split="train",
+ data_root="data/semantic_kitti",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ # Waymo
+ dict(
+ type="WaymoDataset",
+ split="training",
+ data_root="data/waymo",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "Waymo"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ ],
+ ),
+ val=dict(
+ type="SemanticKITTIDataset",
+ split="val",
+ data_root="data/semantic_kitti",
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ ),
+ test=dict(
+ type="SemanticKITTIDataset",
+ split="val",
+ data_root="data/semantic_kitti",
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ignore_index=-1,
+ ),
+)
diff --git a/Pointcept/configs/semantic_kitti/semseg-ppt-v1m2-0-sk-nu-wa-spunet-submit.py b/Pointcept/configs/semantic_kitti/semseg-ppt-v1m2-0-sk-nu-wa-spunet-submit.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f1c21cb9e0a05067de2862d918ce39490cfe4c2
--- /dev/null
+++ b/Pointcept/configs/semantic_kitti/semseg-ppt-v1m2-0-sk-nu-wa-spunet-submit.py
@@ -0,0 +1,301 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+evaluate = False
+
+# trainer
+train = dict(
+ type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+ type="PPT-v1m2",
+ backbone=dict(
+ type="SpUNet-v1m3",
+ in_channels=4,
+ num_classes=0,
+ base_channels=32,
+ context_channels=256,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=False,
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
+ zero_init=False,
+ norm_decouple=True,
+ norm_adaptive=False,
+ norm_affine=True,
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ backbone_out_channels=96,
+ context_channels=256,
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
+ num_classes=(19, 16, 22),
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+# param_dicts = [dict(keyword="modulation", lr=0.0002)]
+
+# dataset settings
+data = dict(
+ num_classes=19,
+ ignore_index=-1,
+ names=[
+ "car",
+ "bicycle",
+ "motorcycle",
+ "truck",
+ "other-vehicle",
+ "person",
+ "bicyclist",
+ "motorcyclist",
+ "road",
+ "parking",
+ "sidewalk",
+ "other-ground",
+ "building",
+ "fence",
+ "vegetation",
+ "trunk",
+ "terrain",
+ "pole",
+ "traffic-sign",
+ ],
+ train=dict(
+ type="ConcatDataset",
+ datasets=[
+ # nuScenes
+ dict(
+ type="NuScenesDataset",
+ split=["train", "val"],
+ data_root="data/nuscenes",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ # SemanticKITTI
+ dict(
+ type="SemanticKITTIDataset",
+ split=["train", "val"],
+ data_root="data/semantic_kitti",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ # Waymo
+ dict(
+ type="WaymoDataset",
+ split=["training", "validation"],
+ data_root="data/waymo",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "Waymo"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ ],
+ ),
+ test=dict(
+ type="SemanticKITTIDataset",
+ split="test",
+ data_root="data/semantic_kitti",
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ignore_index=-1,
+ ),
+)
diff --git a/Pointcept/configs/semantic_kitti/semseg-ppt-v1m2-0-sk-nu-wa-spunet.py b/Pointcept/configs/semantic_kitti/semseg-ppt-v1m2-0-sk-nu-wa-spunet.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb5cd428b7e6856a130e7bc30c1c5e9ea7c58428
--- /dev/null
+++ b/Pointcept/configs/semantic_kitti/semseg-ppt-v1m2-0-sk-nu-wa-spunet.py
@@ -0,0 +1,325 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+num_worker = 24
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+find_unused_parameters = True
+
+# trainer
+train = dict(
+ type="MultiDatasetTrainer",
+)
+
+# model settings
+model = dict(
+ type="PPT-v1m2",
+ backbone=dict(
+ type="SpUNet-v1m3",
+ in_channels=4,
+ num_classes=0,
+ base_channels=32,
+ context_channels=256,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ cls_mode=False,
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
+ zero_init=False,
+ norm_decouple=True,
+ norm_adaptive=False,
+ norm_affine=True,
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ backbone_out_channels=96,
+ context_channels=256,
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
+ num_classes=(19, 16, 22),
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+# param_dicts = [dict(keyword="modulation", lr=0.0002)]
+
+# dataset settings
+data = dict(
+ num_classes=19,
+ ignore_index=-1,
+ names=[
+ "car",
+ "bicycle",
+ "motorcycle",
+ "truck",
+ "other-vehicle",
+ "person",
+ "bicyclist",
+ "motorcyclist",
+ "road",
+ "parking",
+ "sidewalk",
+ "other-ground",
+ "building",
+ "fence",
+ "vegetation",
+ "trunk",
+ "terrain",
+ "pole",
+ "traffic-sign",
+ ],
+ train=dict(
+ type="ConcatDataset",
+ datasets=[
+ # nuScenes
+ dict(
+ type="NuScenesDataset",
+ split="train",
+ data_root="data/nuscenes",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ # SemanticKITTI
+ dict(
+ type="SemanticKITTIDataset",
+ split="train",
+ data_root="data/semantic_kitti",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ # Waymo
+ dict(
+ type="WaymoDataset",
+ split="training",
+ data_root="data/waymo",
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(
+ type="RandomRotate",
+ angle=[-1, 1],
+ axis="z",
+ center=[0, 0, 0],
+ p=0.5,
+ ),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(
+ type="PointClip",
+ point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
+ ),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="Add", keys_dict={"condition": "Waymo"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ loop=1,
+ ),
+ ],
+ ),
+ val=dict(
+ type="SemanticKITTIDataset",
+ split="val",
+ data_root="data/semantic_kitti",
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=-1,
+ ),
+ test=dict(
+ type="SemanticKITTIDataset",
+ split="val",
+ data_root="data/semantic_kitti",
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index", "condition"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ignore_index=-1,
+ ),
+)
diff --git a/Pointcept/configs/semantic_kitti/semseg-pt-v2m2-0-base.py b/Pointcept/configs/semantic_kitti/semseg-pt-v2m2-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d1670a65c4e06b06caeef456bd0626310acbc40
--- /dev/null
+++ b/Pointcept/configs/semantic_kitti/semseg-pt-v2m2-0-base.py
@@ -0,0 +1,222 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 8 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=4,
+ num_classes=19,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.15, 0.375, 0.9375, 2.34375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ # fmt: off
+ criteria=[
+ dict(type="CrossEntropyLoss",
+ weight=[3.1557, 8.7029, 7.8281, 6.1354, 6.3161, 7.9937, 8.9704, 10.1922, 1.6155, 4.2187,
+ 1.9385, 5.5455, 2.0198, 2.6261, 1.3212, 5.1102, 2.5492, 5.8585, 7.3929],
+ loss_weight=1.0,
+ ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+ # fmt: on
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+
+# dataset settings
+dataset_type = "SemanticKITTIDataset"
+data_root = "data/semantic_kitti"
+ignore_index = -1
+names = [
+ "car",
+ "bicycle",
+ "motorcycle",
+ "truck",
+ "other-vehicle",
+ "person",
+ "bicyclist",
+ "motorcyclist",
+ "road",
+ "parking",
+ "sidewalk",
+ "other-ground",
+ "building",
+ "fence",
+ "vegetation",
+ "trunk",
+ "terrain",
+ "pole",
+ "traffic-sign",
+]
+
+data = dict(
+ num_classes=19,
+ ignore_index=ignore_index,
+ names=names,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="SphereCrop", point_max=120000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(
+ type="PointClip",
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ignore_index=ignore_index,
+ ),
+)
diff --git a/Pointcept/configs/semantic_kitti/semseg-pt-v2m2-1-benchmark-submit.py b/Pointcept/configs/semantic_kitti/semseg-pt-v2m2-1-benchmark-submit.py
new file mode 100644
index 0000000000000000000000000000000000000000..d65cda64e4ee3f485c11ada2c1eeccb30ca9e8ef
--- /dev/null
+++ b/Pointcept/configs/semantic_kitti/semseg-pt-v2m2-1-benchmark-submit.py
@@ -0,0 +1,218 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 8 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+evaluate = False
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=4,
+ num_classes=19,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.15, 0.375, 0.9375, 2.34375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[
+ dict(
+ type="CrossEntropyLoss",
+ weight=[
+ 3.1557,
+ 8.7029,
+ 7.8281,
+ 6.1354,
+ 6.3161,
+ 7.9937,
+ 8.9704,
+ 10.1922,
+ 1.6155,
+ 4.2187,
+ 1.9385,
+ 5.5455,
+ 2.0198,
+ 2.6261,
+ 1.3212,
+ 5.1102,
+ 2.5492,
+ 5.8585,
+ 7.3929,
+ ],
+ loss_weight=1.0,
+ ignore_index=-1,
+ ),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+
+# dataset settings
+dataset_type = "SemanticKITTIDataset"
+data_root = "data/semantic_kitti"
+ignore_index = -1
+names = [
+ "car",
+ "bicycle",
+ "motorcycle",
+ "truck",
+ "other-vehicle",
+ "person",
+ "bicyclist",
+ "motorcyclist",
+ "road",
+ "parking",
+ "sidewalk",
+ "other-ground",
+ "building",
+ "fence",
+ "vegetation",
+ "trunk",
+ "terrain",
+ "pole",
+ "traffic-sign",
+]
+
+data = dict(
+ num_classes=19,
+ ignore_index=ignore_index,
+ names=names,
+ train=dict(
+ type=dataset_type,
+ split=["train", "val"],
+ data_root=data_root,
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="SphereCrop", point_max=120000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="test",
+ data_root=data_root,
+ transform=[],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(
+ type="PointClip",
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ignore_index=ignore_index,
+ ),
+)
diff --git a/Pointcept/configs/semantic_kitti/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/semantic_kitti/semseg-spunet-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..7be9cb0979f431a137768849c0328ada0601fbd7
--- /dev/null
+++ b/Pointcept/configs/semantic_kitti/semseg-spunet-v1m1-0-base.py
@@ -0,0 +1,219 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=4,
+ num_classes=19,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[
+ dict(
+ type="CrossEntropyLoss",
+ weight=[
+ 3.1557,
+ 8.7029,
+ 7.8281,
+ 6.1354,
+ 6.3161,
+ 7.9937,
+ 8.9704,
+ 10.1922,
+ 1.6155,
+ 4.2187,
+ 1.9385,
+ 5.5455,
+ 2.0198,
+ 2.6261,
+ 1.3212,
+ 5.1102,
+ 2.5492,
+ 5.8585,
+ 7.3929,
+ ],
+ loss_weight=1.0,
+ ignore_index=-1,
+ )
+ ],
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+
+# dataset settings
+dataset_type = "SemanticKITTIDataset"
+data_root = "data/semantic_kitti"
+ignore_index = -1
+names = [
+ "car",
+ "bicycle",
+ "motorcycle",
+ "truck",
+ "other-vehicle",
+ "person",
+ "bicyclist",
+ "motorcyclist",
+ "road",
+ "parking",
+ "sidewalk",
+ "other-ground",
+ "building",
+ "fence",
+ "vegetation",
+ "trunk",
+ "terrain",
+ "pole",
+ "traffic-sign",
+]
+
+data = dict(
+ num_classes=19,
+ ignore_index=ignore_index,
+ names=names,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ignore_index=ignore_index,
+ ),
+)
diff --git a/Pointcept/configs/semantic_kitti/semseg-spvcnn-v1m1-0-base.py b/Pointcept/configs/semantic_kitti/semseg-spvcnn-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..599ff4d538e1e2bc706abc9c00a41dc71cd5e007
--- /dev/null
+++ b/Pointcept/configs/semantic_kitti/semseg-spvcnn-v1m1-0-base.py
@@ -0,0 +1,219 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 8 # bs: total bs in all gpus
+mix_prob = 0
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SPVCNN",
+ in_channels=4,
+ out_channels=19,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 2, 2, 2, 2, 2, 2, 2),
+ ),
+ criteria=[
+ dict(
+ type="CrossEntropyLoss",
+ weight=[
+ 3.1557,
+ 8.7029,
+ 7.8281,
+ 6.1354,
+ 6.3161,
+ 7.9937,
+ 8.9704,
+ 10.1922,
+ 1.6155,
+ 4.2187,
+ 1.9385,
+ 5.5455,
+ 2.0198,
+ 2.6261,
+ 1.3212,
+ 5.1102,
+ 2.5492,
+ 5.8585,
+ 7.3929,
+ ],
+ loss_weight=1.0,
+ ignore_index=-1,
+ )
+ ],
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+
+# dataset settings
+dataset_type = "SemanticKITTIDataset"
+data_root = "data/semantic_kitti"
+ignore_index = -1
+names = [
+ "car",
+ "bicycle",
+ "motorcycle",
+ "truck",
+ "other-vehicle",
+ "person",
+ "bicyclist",
+ "motorcyclist",
+ "road",
+ "parking",
+ "sidewalk",
+ "other-ground",
+ "building",
+ "fence",
+ "vegetation",
+ "trunk",
+ "terrain",
+ "pole",
+ "traffic-sign",
+]
+
+data = dict(
+ num_classes=19,
+ ignore_index=ignore_index,
+ names=names,
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ignore_index=ignore_index,
+ ),
+)
diff --git a/Pointcept/configs/structured3d/semseg-pt-v2m2-0-base.py b/Pointcept/configs/structured3d/semseg-pt-v2m2-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c67cd6a103d6b3176482c6710938d8e23f2feea
--- /dev/null
+++ b/Pointcept/configs/structured3d/semseg-pt-v2m2-0-base.py
@@ -0,0 +1,304 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="PT-v2m2",
+ in_channels=9,
+ num_classes=25,
+ patch_embed_depth=1,
+ patch_embed_channels=48,
+ patch_embed_groups=6,
+ patch_embed_neighbours=8,
+ enc_depths=(2, 2, 6, 2),
+ enc_channels=(96, 192, 384, 512),
+ enc_groups=(12, 24, 48, 64),
+ enc_neighbours=(16, 16, 16, 16),
+ dec_depths=(1, 1, 1, 1),
+ dec_channels=(48, 96, 192, 384),
+ dec_groups=(6, 12, 24, 48),
+ dec_neighbours=(16, 16, 16, 16),
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
+ attn_qkv_bias=True,
+ pe_multiplier=False,
+ pe_bias=True,
+ attn_drop_rate=0.0,
+ drop_path_rate=0.3,
+ enable_checkpoint=False,
+ unpool_backend="map", # map / interp
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 100
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "Structured3DDataset"
+data_root = "data/structured3d"
+
+data = dict(
+ num_classes=25,
+ ignore_index=-1,
+ names=(
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "picture",
+ "desk",
+ "shelves",
+ "curtain",
+ "dresser",
+ "pillow",
+ "mirror",
+ "ceiling",
+ "refrigerator",
+ "television",
+ "nightstand",
+ "sink",
+ "lamp",
+ "otherstructure",
+ "otherfurniture",
+ "otherprop",
+ ),
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="SphereCrop", point_max=120000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/structured3d/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/structured3d/semseg-spunet-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a2fa48320601baed972856a20afcba21991df0c
--- /dev/null
+++ b/Pointcept/configs/structured3d/semseg-spunet-v1m1-0-base.py
@@ -0,0 +1,285 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=6,
+ num_classes=25,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 100
+optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=10000.0,
+)
+
+# dataset settings
+dataset_type = "Structured3DDataset"
+data_root = "data/structured3d"
+
+data = dict(
+ num_classes=25,
+ ignore_index=-1,
+ names=(
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "picture",
+ "desk",
+ "shelves",
+ "curtain",
+ "dresser",
+ "pillow",
+ "mirror",
+ "ceiling",
+ "refrigerator",
+ "television",
+ "nightstand",
+ "sink",
+ "lamp",
+ "otherstructure",
+ "otherfurniture",
+ "otherprop",
+ ),
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.6, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/structured3d/semseg-swin3d-v1m1-0-small.py b/Pointcept/configs/structured3d/semseg-swin3d-v1m1-0-small.py
new file mode 100644
index 0000000000000000000000000000000000000000..e52bb1ee7a4da43ca523c1979129ec2fa4c5ecbe
--- /dev/null
+++ b/Pointcept/configs/structured3d/semseg-swin3d-v1m1-0-small.py
@@ -0,0 +1,306 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="Swin3D-v1m1",
+ in_channels=9,
+ num_classes=25,
+ base_grid_size=0.02,
+ depths=[2, 4, 9, 4, 4],
+ channels=[48, 96, 192, 384, 384],
+ num_heads=[6, 6, 12, 24, 24],
+ window_sizes=[5, 7, 7, 7, 7],
+ quant_size=4,
+ drop_path_rate=0.3,
+ up_k=3,
+ num_layers=5,
+ stem_transformer=True,
+ down_stride=3,
+ upsample="linear_attn",
+ knn_down=True,
+ cRSE="XYZ_RGB_NORM",
+ fp16_mode=1,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 100
+optimizer = dict(type="AdamW", lr=0.008, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.008, 0.0008],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="blocks", lr=0.0008)]
+
+# dataset settings
+dataset_type = "Structured3DDataset"
+data_root = "data/structured3d"
+
+data = dict(
+ num_classes=25,
+ ignore_index=-1,
+ names=(
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "picture",
+ "desk",
+ "shelves",
+ "curtain",
+ "dresser",
+ "pillow",
+ "mirror",
+ "ceiling",
+ "refrigerator",
+ "television",
+ "nightstand",
+ "sink",
+ "lamp",
+ "otherstructure",
+ "otherfurniture",
+ "otherprop",
+ ),
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ return_displacement=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="SphereCrop", point_max=120000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ return_displacement=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ return_displacement=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/structured3d/semseg-swin3d-v1m1-1-large.py b/Pointcept/configs/structured3d/semseg-swin3d-v1m1-1-large.py
new file mode 100644
index 0000000000000000000000000000000000000000..de62b4234473f34648201507a6b7d37d11674df6
--- /dev/null
+++ b/Pointcept/configs/structured3d/semseg-swin3d-v1m1-1-large.py
@@ -0,0 +1,306 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="Swin3D-v1m1",
+ in_channels=9,
+ num_classes=25,
+ base_grid_size=0.02,
+ depths=[2, 4, 9, 4, 4],
+ channels=[80, 160, 320, 640, 640],
+ num_heads=[10, 10, 20, 40, 40],
+ window_sizes=[5, 7, 7, 7, 7],
+ quant_size=4,
+ drop_path_rate=0.3,
+ up_k=3,
+ num_layers=5,
+ stem_transformer=True,
+ down_stride=3,
+ upsample="linear_attn",
+ knn_down=True,
+ cRSE="XYZ_RGB_NORM",
+ fp16_mode=1,
+ ),
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
+)
+
+# scheduler settings
+epoch = 100
+optimizer = dict(type="AdamW", lr=0.008, weight_decay=0.05)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.008, 0.0008],
+ pct_start=0.05,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=1000.0,
+)
+param_dicts = [dict(keyword="blocks", lr=0.0008)]
+
+# dataset settings
+dataset_type = "Structured3DDataset"
+data_root = "data/structured3d"
+
+data = dict(
+ num_classes=25,
+ ignore_index=-1,
+ names=(
+ "wall",
+ "floor",
+ "cabinet",
+ "bed",
+ "chair",
+ "sofa",
+ "table",
+ "door",
+ "window",
+ "picture",
+ "desk",
+ "shelves",
+ "curtain",
+ "dresser",
+ "pillow",
+ "mirror",
+ "ceiling",
+ "refrigerator",
+ "television",
+ "nightstand",
+ "sink",
+ "lamp",
+ "otherstructure",
+ "otherfurniture",
+ "otherprop",
+ ),
+ train=dict(
+ type=dataset_type,
+ split="train",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
+ ),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ return_displacement=True,
+ ),
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
+ dict(type="SphereCrop", point_max=120000, mode="random"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ShufflePoint"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="train",
+ return_grid_coord=True,
+ return_displacement=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
+ dict(type="CenterShift", apply_z=False),
+ dict(type="NormalizeColor"),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ test_mode=False,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="val",
+ data_root=data_root,
+ transform=[
+ dict(type="CenterShift", apply_z=True),
+ dict(type="NormalizeColor"),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.02,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ return_displacement=True,
+ keys=("coord", "color", "normal"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("color", "normal", "displacement"),
+ coord_feat_keys=("color", "normal"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[0.95, 0.95]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ ),
+ dict(type="RandomScale", scale=[1.05, 1.05]),
+ ],
+ [dict(type="RandomFlip", p=1)],
+ ],
+ ),
+ ),
+)
diff --git a/Pointcept/configs/waymo/semseg-pt-v3m1-0-base.py b/Pointcept/configs/waymo/semseg-pt-v3m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..89cb3bc588ce2ec662d76c81dad3fbcea8373f8a
--- /dev/null
+++ b/Pointcept/configs/waymo/semseg-pt-v3m1-0-base.py
@@ -0,0 +1,248 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentorV2",
+ num_classes=22,
+ backbone_out_channels=64,
+ backbone=dict(
+ type="PT-v3m1",
+ in_channels=4,
+ order=["z", "z-trans", "hilbert", "hilbert-trans"],
+ stride=(2, 2, 2, 2),
+ enc_depths=(2, 2, 2, 6, 2),
+ enc_channels=(32, 64, 128, 256, 512),
+ enc_num_head=(2, 4, 8, 16, 32),
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
+ dec_depths=(2, 2, 2, 2),
+ dec_channels=(64, 64, 128, 256),
+ dec_num_head=(4, 4, 8, 16),
+ dec_patch_size=(1024, 1024, 1024, 1024),
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ attn_drop=0.0,
+ proj_drop=0.0,
+ drop_path=0.3,
+ shuffle_orders=True,
+ pre_norm=True,
+ enable_rpe=False,
+ enable_flash=True,
+ upcast_attention=False,
+ upcast_softmax=False,
+ cls_mode=False,
+ pdnorm_bn=False,
+ pdnorm_ln=False,
+ pdnorm_decouple=True,
+ pdnorm_adaptive=False,
+ pdnorm_affine=True,
+ pdnorm_conditions=("nuScenes", "SemanticKITTI", "Waymo"),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=[0.002, 0.0002],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+param_dicts = [dict(keyword="block", lr=0.0002)]
+
+# dataset settings
+dataset_type = "WaymoDataset"
+data_root = "data/waymo"
+ignore_index = -1
+names = [
+ "Car",
+ "Truck",
+ "Bus",
+ # Other small vehicles (e.g. pedicab) and large vehicles (e.g. construction vehicles, RV, limo, tram).
+ "Other Vehicle",
+ "Motorcyclist",
+ "Bicyclist",
+ "Pedestrian",
+ "Sign",
+ "Traffic Light",
+ # Lamp post, traffic sign pole etc.
+ "Pole",
+ # Construction cone/pole.
+ "Construction Cone",
+ "Bicycle",
+ "Motorcycle",
+ "Building",
+ # Bushes, tree branches, tall grasses, flowers etc.
+ "Vegetation",
+ "Tree Trunk",
+ # Curb on the edge of roads. This does not include road boundaries if thereโs no curb.
+ "Curb",
+ # Surface a vehicle could drive on. This includes the driveway connecting
+ # parking lot and road over a section of sidewalk.
+ "Road",
+ # Marking on the road thatโs specifically for defining lanes such as
+ # single/double white/yellow lines.
+ "Lane Marker",
+ # Marking on the road other than lane markers, bumps, cateyes, railtracks etc.
+ "Other Ground",
+ # Most horizontal surface thatโs not drivable, e.g. grassy hill, pedestrian walkway stairs etc.
+ "Walkable",
+ # Nicely paved walkable surface when pedestrians most likely to walk on.
+ "Sidewalk",
+]
+
+data = dict(
+ num_classes=22,
+ ignore_index=ignore_index,
+ names=names,
+ train=dict(
+ type=dataset_type,
+ split="training",
+ data_root=data_root,
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(type="PointClip", point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2)),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="validation",
+ data_root=data_root,
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2)),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="validation",
+ data_root=data_root,
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2)),
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
+ dict(
+ type="GridSample",
+ grid_size=0.025,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_inverse=True,
+ ),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ignore_index=ignore_index,
+ ),
+)
diff --git a/Pointcept/configs/waymo/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/waymo/semseg-spunet-v1m1-0-base.py
new file mode 100644
index 0000000000000000000000000000000000000000..67d8011d2ff4df45757749facdb44d2f1c175b10
--- /dev/null
+++ b/Pointcept/configs/waymo/semseg-spunet-v1m1-0-base.py
@@ -0,0 +1,210 @@
+_base_ = ["../_base_/default_runtime.py"]
+
+# misc custom setting
+batch_size = 12 # bs: total bs in all gpus
+mix_prob = 0.8
+empty_cache = False
+enable_amp = True
+
+# model settings
+model = dict(
+ type="DefaultSegmentor",
+ backbone=dict(
+ type="SpUNet-v1m1",
+ in_channels=4,
+ num_classes=22,
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
+ ),
+ criteria=[
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
+ ],
+)
+
+# scheduler settings
+epoch = 50
+eval_epoch = 50
+optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
+scheduler = dict(
+ type="OneCycleLR",
+ max_lr=optimizer["lr"],
+ pct_start=0.04,
+ anneal_strategy="cos",
+ div_factor=10.0,
+ final_div_factor=100.0,
+)
+
+# dataset settings
+dataset_type = "WaymoDataset"
+data_root = "data/waymo"
+ignore_index = -1
+names = [
+ "Car",
+ "Truck",
+ "Bus",
+ # Other small vehicles (e.g. pedicab) and large vehicles (e.g. construction vehicles, RV, limo, tram).
+ "Other Vehicle",
+ "Motorcyclist",
+ "Bicyclist",
+ "Pedestrian",
+ "Sign",
+ "Traffic Light",
+ # Lamp post, traffic sign pole etc.
+ "Pole",
+ # Construction cone/pole.
+ "Construction Cone",
+ "Bicycle",
+ "Motorcycle",
+ "Building",
+ # Bushes, tree branches, tall grasses, flowers etc.
+ "Vegetation",
+ "Tree Trunk",
+ # Curb on the edge of roads. This does not include road boundaries if thereโs no curb.
+ "Curb",
+ # Surface a vehicle could drive on. This includes the driveway connecting
+ # parking lot and road over a section of sidewalk.
+ "Road",
+ # Marking on the road thatโs specifically for defining lanes such as
+ # single/double white/yellow lines.
+ "Lane Marker",
+ # Marking on the road other than lane markers, bumps, cateyes, railtracks etc.
+ "Other Ground",
+ # Most horizontal surface thatโs not drivable, e.g. grassy hill, pedestrian walkway stairs etc.
+ "Walkable",
+ # Nicely paved walkable surface when pedestrians most likely to walk on.
+ "Sidewalk",
+]
+
+data = dict(
+ num_classes=22,
+ ignore_index=ignore_index,
+ names=names,
+ train=dict(
+ type=dataset_type,
+ split="training",
+ data_root=data_root,
+ transform=[
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
+ dict(type="PointClip", point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2)),
+ dict(type="RandomScale", scale=[0.9, 1.1]),
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
+ dict(type="RandomFlip", p=0.5),
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
+ # dict(type="CenterShift", apply_z=False),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ val=dict(
+ type=dataset_type,
+ split="validation",
+ data_root=data_root,
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2)),
+ dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="train",
+ keys=("coord", "strength", "segment"),
+ return_grid_coord=True,
+ ),
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "segment"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ test_mode=False,
+ ignore_index=ignore_index,
+ ),
+ test=dict(
+ type=dataset_type,
+ split="validation",
+ data_root=data_root,
+ transform=[
+ dict(type="PointClip", point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2)),
+ ],
+ test_mode=True,
+ test_cfg=dict(
+ voxelize=dict(
+ type="GridSample",
+ grid_size=0.05,
+ hash_type="fnv",
+ mode="test",
+ return_grid_coord=True,
+ keys=("coord", "strength"),
+ ),
+ crop=None,
+ post_transform=[
+ dict(type="ToTensor"),
+ dict(
+ type="Collect",
+ keys=("coord", "grid_coord", "index"),
+ feat_keys=("coord", "strength"),
+ ),
+ ],
+ aug_transform=[
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[0],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[1],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ [
+ dict(
+ type="RandomRotateTargetAngle",
+ angle=[3 / 2],
+ axis="z",
+ center=[0, 0, 0],
+ p=1,
+ )
+ ],
+ ],
+ ),
+ ignore_index=ignore_index,
+ ),
+)
diff --git a/Pointcept/docs/logo.png b/Pointcept/docs/logo.png
new file mode 100644
index 0000000000000000000000000000000000000000..cdd04d6aa6cc7a31e29e3970977427e7edde2c17
Binary files /dev/null and b/Pointcept/docs/logo.png differ
diff --git a/Pointcept/docs/logo_dark.png b/Pointcept/docs/logo_dark.png
new file mode 100644
index 0000000000000000000000000000000000000000..32af83bd9e42d65251637fb4a90a9e3a4d2e0f83
Binary files /dev/null and b/Pointcept/docs/logo_dark.png differ
diff --git a/Pointcept/docs/offset.png b/Pointcept/docs/offset.png
new file mode 100644
index 0000000000000000000000000000000000000000..e66df4f29321312d700f2bb7960e502fe233c5d6
Binary files /dev/null and b/Pointcept/docs/offset.png differ
diff --git a/Pointcept/docs/offset_dark.png b/Pointcept/docs/offset_dark.png
new file mode 100755
index 0000000000000000000000000000000000000000..4db79e0d94e7669b6653c0e9abad96ec1cc86364
Binary files /dev/null and b/Pointcept/docs/offset_dark.png differ
diff --git a/Pointcept/libs/pointgroup_ops/functions/__init__.py b/Pointcept/libs/pointgroup_ops/functions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d8120292a327a400e15fa9a72cd08b16a68794f
--- /dev/null
+++ b/Pointcept/libs/pointgroup_ops/functions/__init__.py
@@ -0,0 +1 @@
+from .functions import bfs_cluster, ballquery_batch_p, Clustering
diff --git a/Pointcept/libs/pointgroup_ops/functions/functions.py b/Pointcept/libs/pointgroup_ops/functions/functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8ed62b10c2e88236d814ab34cece5a335e16930
--- /dev/null
+++ b/Pointcept/libs/pointgroup_ops/functions/functions.py
@@ -0,0 +1,176 @@
+import torch
+from torch.autograd import Function
+import pointgroup_ops_cuda
+
+
+class BallQueryBatchP(Function):
+ @staticmethod
+ def forward(ctx, coords, batch_idxs, batch_offsets, radius, meanActive):
+ """
+ :param ctx:
+ :param coords: (n, 3) float
+ :param batch_idxs: (n) int
+ :param batch_offsets: (B+1) int
+ :param radius: float
+ :param meanActive: int
+ :return: idx (nActive), int
+ :return: start_len (n, 2), int
+ """
+
+ n = coords.size(0)
+
+ assert coords.is_contiguous() and coords.is_cuda
+ assert batch_idxs.is_contiguous() and batch_idxs.is_cuda
+ assert batch_offsets.is_contiguous() and batch_offsets.is_cuda
+
+ while True:
+ idx = torch.cuda.IntTensor(n * meanActive).zero_()
+ start_len = torch.cuda.IntTensor(n, 2).zero_()
+ nActive = pointgroup_ops_cuda.ballquery_batch_p(
+ coords, batch_idxs, batch_offsets, idx, start_len, n, meanActive, radius
+ )
+ if nActive <= n * meanActive:
+ break
+ meanActive = int(nActive // n + 1)
+ idx = idx[:nActive]
+
+ return idx, start_len
+
+ @staticmethod
+ def backward(ctx, a=None, b=None):
+ return None, None, None
+
+
+ballquery_batch_p = BallQueryBatchP.apply
+
+
+class Clustering:
+ def __init__(
+ self,
+ ignored_labels,
+ class_mapping,
+ thresh=0.03,
+ closed_points=300,
+ min_points=50,
+ propose_points=100,
+ score_func=torch.max,
+ ) -> None:
+ self.ignored_labels = ignored_labels
+ self.thresh = thresh
+ self.closed_points = closed_points
+ self.min_points = min_points
+ self.class_mapping = class_mapping
+ self.propose_points = propose_points
+ self.score_func = score_func
+
+ def cluster(self, vertices, scores):
+ labels = torch.max(scores, 1)[1] # (N) long, cuda
+ proposals_idx, proposals_offset = self.cluster_(vertices, labels)
+
+ ## debug
+ # import ipdb; ipdb.set_trace()
+ # colors = np.array(create_color_palette())[labels.cpu()]
+ # write_triangle_mesh(vertices, colors, None, 'semantics.ply')
+
+ # scatter
+ proposals_pred = torch.zeros(
+ (proposals_offset.shape[0] - 1, vertices.shape[0]), dtype=torch.int
+ ) # (nProposal, N), int, cuda
+ proposals_pred[proposals_idx[:, 0].long(), proposals_idx[:, 1].long()] = 1
+ labels = labels[proposals_idx[:, 1][proposals_offset[:-1].long()].long()]
+
+ proposals_pointnum = proposals_pred.sum(1)
+ npoint_mask = proposals_pointnum > self.propose_points
+
+ proposals_pred = proposals_pred[npoint_mask]
+ labels = labels[npoint_mask]
+ return proposals_pred, labels
+
+ def cluster_(self, vertices, labels):
+ """
+ :param batch_idxs: (N), int, cuda
+ :labels: 0-19
+ """
+ batch_idxs = torch.zeros_like(labels)
+
+ mask_non_ignored = torch.ones_like(labels).bool()
+ for ignored_label in self.ignored_labels:
+ mask_non_ignored = mask_non_ignored & (
+ self.class_mapping[labels] != ignored_label
+ )
+ object_idxs = mask_non_ignored.nonzero().view(-1)
+
+ vertices_ = vertices[object_idxs].float()
+ labels_ = labels[object_idxs].int()
+
+ if vertices_.numel() == 0:
+ return torch.zeros((0, 2)).int(), torch.zeros(1).int()
+
+ batch_idxs_ = batch_idxs[object_idxs].int()
+ batch_offsets_ = torch.FloatTensor([0, object_idxs.shape[0]]).int().cuda()
+
+ idx, start_len = ballquery_batch_p(
+ vertices_, batch_idxs_, batch_offsets_, self.thresh, self.closed_points
+ )
+ proposals_idx, proposals_offset = bfs_cluster(
+ labels_.cpu(), idx.cpu(), start_len.cpu(), self.min_points
+ )
+ proposals_idx[:, 1] = object_idxs[proposals_idx[:, 1].long()].int()
+
+ return proposals_idx, proposals_offset
+
+ def get_instances(self, vertices, scores):
+ proposals_pred, labels = self.cluster(vertices, scores)
+ instances = {}
+ for proposal_id in range(len(proposals_pred)):
+ clusters_i = proposals_pred[proposal_id]
+ score = scores[clusters_i.bool(), labels[proposal_id]]
+ score = self.score_func(score)
+ instances[proposal_id] = {}
+ instances[proposal_id]["conf"] = score.cpu().numpy()
+ instances[proposal_id]["label_id"] = self.class_mapping.cpu()[
+ labels[proposal_id]
+ ]
+ instances[proposal_id]["pred_mask"] = clusters_i.cpu().numpy()
+ return instances
+
+
+class BFSCluster(Function):
+ @staticmethod
+ def forward(ctx, semantic_label, ball_query_idxs, start_len, threshold):
+ """
+ :param ctx:
+ :param semantic_label: (N), int
+ :param ball_query_idxs: (nActive), int
+ :param start_len: (N, 2), int
+ :return: cluster_idxs: int (sumNPoint, 2), dim 0 for cluster_id, dim 1 for corresponding point idxs in N
+ :return: cluster_offsets: int (nCluster + 1)
+ """
+
+ N = start_len.size(0)
+
+ assert semantic_label.is_contiguous()
+ assert ball_query_idxs.is_contiguous()
+ assert start_len.is_contiguous()
+
+ cluster_idxs = semantic_label.new()
+ cluster_offsets = semantic_label.new()
+
+ pointgroup_ops_cuda.bfs_cluster(
+ semantic_label,
+ ball_query_idxs,
+ start_len,
+ cluster_idxs,
+ cluster_offsets,
+ N,
+ threshold,
+ )
+
+ return cluster_idxs, cluster_offsets
+
+ @staticmethod
+ def backward(ctx, a=None):
+ return None
+
+
+bfs_cluster = BFSCluster.apply
diff --git a/Pointcept/libs/pointgroup_ops/setup.py b/Pointcept/libs/pointgroup_ops/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..162b68258920afcd46c3b49764236beecfbf35cb
--- /dev/null
+++ b/Pointcept/libs/pointgroup_ops/setup.py
@@ -0,0 +1,59 @@
+import os
+from sys import argv
+from setuptools import setup
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+from distutils.sysconfig import get_config_vars
+
+(opt,) = get_config_vars("OPT")
+os.environ["OPT"] = " ".join(
+ flag for flag in opt.split() if flag != "-Wstrict-prototypes"
+)
+
+
+def _argparse(pattern, argv, is_flag=True, is_list=False):
+ if is_flag:
+ found = pattern in argv
+ if found:
+ argv.remove(pattern)
+ return found, argv
+ else:
+ arr = [arg for arg in argv if pattern == arg.split("=")[0]]
+ if is_list:
+ if len(arr) == 0: # not found
+ return False, argv
+ else:
+ assert "=" in arr[0], f"{arr[0]} requires a value."
+ argv.remove(arr[0])
+ val = arr[0].split("=")[1]
+ if "," in val:
+ return val.split(","), argv
+ else:
+ return [val], argv
+ else:
+ if len(arr) == 0: # not found
+ return False, argv
+ else:
+ assert "=" in arr[0], f"{arr[0]} requires a value."
+ argv.remove(arr[0])
+ return arr[0].split("=")[1], argv
+
+
+INCLUDE_DIRS, argv = _argparse("--include_dirs", argv, False, is_list=True)
+include_dirs = []
+if not (INCLUDE_DIRS is False):
+ include_dirs += INCLUDE_DIRS
+
+setup(
+ name="pointgroup_ops",
+ packages=["pointgroup_ops"],
+ package_dir={"pointgroup_ops": "functions"},
+ ext_modules=[
+ CUDAExtension(
+ name="pointgroup_ops_cuda",
+ sources=["src/bfs_cluster.cpp", "src/bfs_cluster_kernel.cu"],
+ extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]},
+ )
+ ],
+ include_dirs=[*include_dirs],
+ cmdclass={"build_ext": BuildExtension},
+)
diff --git a/Pointcept/libs/pointgroup_ops/src/bfs_cluster.cpp b/Pointcept/libs/pointgroup_ops/src/bfs_cluster.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d0298aae5bf1f184eb4e923d8f9f8893168c8e19
--- /dev/null
+++ b/Pointcept/libs/pointgroup_ops/src/bfs_cluster.cpp
@@ -0,0 +1,145 @@
+/*
+Ball Query with BatchIdx & Clustering Algorithm
+Written by Li Jiang
+All Rights Reserved 2020.
+*/
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+int ballquery_batch_p_cuda(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, cudaStream_t stream);
+
+
+using Int = int32_t;
+class ConnectedComponent{
+public:
+ std::vector pt_idxs {};
+
+ ConnectedComponent(){};
+ void addPoint(Int pt_idx)
+ {
+ pt_idxs.push_back(pt_idx);
+
+ }
+};
+using ConnectedComponents = std::vector;
+
+/* ================================== ballquery_batch_p ================================== */
+// input xyz: (n, 3) float
+// input batch_idxs: (n) int
+// input batch_offsets: (B+1) int, batch_offsets[-1]
+// output idx: (n * meanActive) dim 0 for number of points in the ball, idx in n
+// output start_len: (n, 2), int
+int ballquery_batch_p(at::Tensor xyz_tensor, at::Tensor batch_idxs_tensor, at::Tensor batch_offsets_tensor, at::Tensor idx_tensor, at::Tensor start_len_tensor, int n, int meanActive, float radius){
+ const float *xyz = xyz_tensor.data();
+ const int *batch_idxs = batch_idxs_tensor.data();
+ const int *batch_offsets = batch_offsets_tensor.data();
+ int *idx = idx_tensor.data();
+ int *start_len = start_len_tensor.data();
+
+ cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+ int cumsum = ballquery_batch_p_cuda(n, meanActive, radius, xyz, batch_idxs, batch_offsets, idx, start_len, stream);
+ return cumsum;
+}
+
+/* ================================== bfs_cluster ================================== */
+ConnectedComponent find_cc(Int idx, int *semantic_label, Int *ball_query_idxs, int *start_len, int *visited){
+ ConnectedComponent cc;
+ cc.addPoint(idx);
+ visited[idx] = 1;
+
+ std::queue Q;
+ assert(Q.empty());
+ Q.push(idx);
+
+ while(!Q.empty()){
+ Int cur = Q.front(); Q.pop();
+ int start = start_len[cur * 2];
+ int len = start_len[cur * 2 + 1];
+ int label_cur = semantic_label[cur];
+ for(Int i = start; i < start + len; i++){
+ Int idx_i = ball_query_idxs[i];
+ if(semantic_label[idx_i] != label_cur) continue;
+ if(visited[idx_i] == 1) continue;
+
+ cc.addPoint(idx_i);
+ visited[idx_i] = 1;
+
+ Q.push(idx_i);
+ }
+ }
+ return cc;
+}
+
+//input: semantic_label, int, N
+//input: ball_query_idxs, Int, (nActive)
+//input: start_len, int, (N, 2)
+//output: clusters, CCs
+int get_clusters(int *semantic_label, Int *ball_query_idxs, int *start_len, const Int nPoint, int threshold, ConnectedComponents &clusters){
+ int visited[nPoint] = {0};
+
+ int sumNPoint = 0;
+ for(Int i = 0; i < nPoint; i++){
+ if(visited[i] == 0){
+ ConnectedComponent CC = find_cc(i, semantic_label, ball_query_idxs, start_len, visited);
+ if((int)CC.pt_idxs.size() >= threshold){
+ clusters.push_back(CC);
+ sumNPoint += (int)CC.pt_idxs.size();
+ }
+ }
+ }
+
+ return sumNPoint;
+}
+
+void fill_cluster_idxs_(ConnectedComponents &CCs, int *cluster_idxs, int *cluster_offsets){
+ for(int i = 0; i < (int)CCs.size(); i++){
+ cluster_offsets[i + 1] = cluster_offsets[i] + (int)CCs[i].pt_idxs.size();
+ for(int j = 0; j < (int)CCs[i].pt_idxs.size(); j++){
+ int idx = CCs[i].pt_idxs[j];
+ cluster_idxs[(cluster_offsets[i] + j) * 2 + 0] = i;
+ cluster_idxs[(cluster_offsets[i] + j) * 2 + 1] = idx;
+ }
+ }
+}
+
+//input: semantic_label, int, N
+//input: ball_query_idxs, int, (nActive)
+//input: start_len, int, (N, 2)
+//output: cluster_idxs, int (sumNPoint, 2), dim 0 for cluster_id, dim 1 for corresponding point idxs in N
+//output: cluster_offsets, int (nCluster + 1)
+void bfs_cluster(at::Tensor semantic_label_tensor, at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor,
+at::Tensor cluster_idxs_tensor, at::Tensor cluster_offsets_tensor, const int N, int threshold){
+ int *semantic_label = semantic_label_tensor.data();
+ Int *ball_query_idxs = ball_query_idxs_tensor.data();
+ int *start_len = start_len_tensor.data();
+
+ ConnectedComponents CCs;
+ int sumNPoint = get_clusters(semantic_label, ball_query_idxs, start_len, N, threshold, CCs);
+
+ int nCluster = (int)CCs.size();
+ cluster_idxs_tensor.resize_({sumNPoint, 2});
+ cluster_offsets_tensor.resize_({nCluster + 1});
+ cluster_idxs_tensor.zero_();
+ cluster_offsets_tensor.zero_();
+
+ int *cluster_idxs = cluster_idxs_tensor.data();
+ int *cluster_offsets = cluster_offsets_tensor.data();
+
+ fill_cluster_idxs_(CCs, cluster_idxs, cluster_offsets);
+}
+
+//------------------------------------API------------------------------------------
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){
+
+ m.def("ballquery_batch_p", &ballquery_batch_p, "ballquery_batch_p");
+ m.def("bfs_cluster", &bfs_cluster, "bfs_cluster");
+
+}
diff --git a/Pointcept/libs/pointgroup_ops/src/bfs_cluster_kernel.cu b/Pointcept/libs/pointgroup_ops/src/bfs_cluster_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..99a31842d605588b214826223143f2669475f402
--- /dev/null
+++ b/Pointcept/libs/pointgroup_ops/src/bfs_cluster_kernel.cu
@@ -0,0 +1,91 @@
+/*
+Ball Query with BatchIdx
+Written by Li Jiang
+All Rights Reserved 2020.
+*/
+#include
+#include
+#include
+
+#define TOTAL_THREADS 1024
+#define THREADS_PER_BLOCK 512
+#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
+
+
+/* ================================== ballquery_batch_p ================================== */
+__global__ void ballquery_batch_p_cuda_(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, int *cumsum) {
+ int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (pt_idx >= n) return;
+
+ start_len += (pt_idx * 2);
+ int idx_temp[1000];
+
+ float radius2 = radius * radius;
+ float o_x = xyz[pt_idx * 3 + 0];
+ float o_y = xyz[pt_idx * 3 + 1];
+ float o_z = xyz[pt_idx * 3 + 2];
+
+ int batch_idx = batch_idxs[pt_idx];
+ int start = batch_offsets[batch_idx];
+ int end = batch_offsets[batch_idx + 1];
+
+ int cnt = 0;
+ for(int k = start; k < end; k++){
+ float x = xyz[k * 3 + 0];
+ float y = xyz[k * 3 + 1];
+ float z = xyz[k * 3 + 2];
+ float d2 = (o_x - x) * (o_x - x) + (o_y - y) * (o_y - y) + (o_z - z) * (o_z - z);
+ if(d2 < radius2){
+ if(cnt < 1000){
+ idx_temp[cnt] = k;
+ }
+ else{
+ break;
+ }
+ ++cnt;
+ }
+ }
+
+ start_len[0] = atomicAdd(cumsum, cnt);
+ start_len[1] = cnt;
+
+ int thre = n * meanActive;
+ if(start_len[0] >= thre) return;
+
+ idx += start_len[0];
+ if(start_len[0] + cnt >= thre) cnt = thre - start_len[0];
+
+ for(int k = 0; k < cnt; k++){
+ idx[k] = idx_temp[k];
+ }
+}
+
+
+int ballquery_batch_p_cuda(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, cudaStream_t stream) {
+ // param xyz: (n, 3)
+ // param batch_idxs: (n)
+ // param batch_offsets: (B + 1)
+ // output idx: (n * meanActive) dim 0 for number of points in the ball, idx in n
+ // output start_len: (n, 2), int
+
+ cudaError_t err;
+
+ dim3 blocks(DIVUP(n, THREADS_PER_BLOCK));
+ dim3 threads(THREADS_PER_BLOCK);
+
+ int cumsum = 0;
+ int* p_cumsum;
+ cudaMalloc((void**)&p_cumsum, sizeof(int));
+ cudaMemcpy(p_cumsum, &cumsum, sizeof(int), cudaMemcpyHostToDevice);
+
+ ballquery_batch_p_cuda_<<>>(n, meanActive, radius, xyz, batch_idxs, batch_offsets, idx, start_len, p_cumsum);
+
+ err = cudaGetLastError();
+ if (cudaSuccess != err) {
+ fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
+ exit(-1);
+ }
+
+ cudaMemcpy(&cumsum, p_cumsum, sizeof(int), cudaMemcpyDeviceToHost);
+ return cumsum;
+}
diff --git a/Pointcept/libs/pointops/__init__.py b/Pointcept/libs/pointops/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8f75488366c12e144febe3adccd63b40820cdfa
--- /dev/null
+++ b/Pointcept/libs/pointops/__init__.py
@@ -0,0 +1 @@
+from .functions import *
diff --git a/Pointcept/libs/pointops/functions/__init__.py b/Pointcept/libs/pointops/functions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c05f2f4b74f1ae4343daf9b38b4576d75f13e81
--- /dev/null
+++ b/Pointcept/libs/pointops/functions/__init__.py
@@ -0,0 +1,14 @@
+from .query import knn_query, ball_query, random_ball_query
+from .sampling import farthest_point_sampling
+from .grouping import grouping, grouping2
+from .interpolation import interpolation, interpolation2
+from .subtraction import subtraction
+from .aggregation import aggregation
+from .attention import attention_relation_step, attention_fusion_step
+from .utils import (
+ query_and_group,
+ knn_query_and_group,
+ ball_query_and_group,
+ batch2offset,
+ offset2batch,
+)
diff --git a/Pointcept/libs/pointops/functions/aggregation.py b/Pointcept/libs/pointops/functions/aggregation.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0f62444a70d317dfb8df4adc1167bba5dd19ef1
--- /dev/null
+++ b/Pointcept/libs/pointops/functions/aggregation.py
@@ -0,0 +1,57 @@
+import torch
+from torch.autograd import Function
+
+from pointops._C import aggregation_forward_cuda, aggregation_backward_cuda
+
+
+class Aggregation(Function):
+ @staticmethod
+ def forward(ctx, input, position, weight, idx):
+ """
+ input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample)
+ output: (n, c)
+ """
+ assert (
+ input.is_contiguous()
+ and position.is_contiguous()
+ and weight.is_contiguous()
+ )
+ n, nsample, c = position.shape
+ w_c = weight.shape[-1]
+ output = torch.cuda.FloatTensor(n, c).zero_()
+ aggregation_forward_cuda(
+ n, nsample, c, w_c, input, position, weight, idx, output
+ )
+ ctx.save_for_backward(input, position, weight, idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_out: (n, c)
+ output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c')
+ """
+ input, position, weight, idx = ctx.saved_tensors
+ n, nsample, c = position.shape
+ w_c = weight.shape[-1]
+ grad_input = torch.cuda.FloatTensor(n, c).zero_()
+ grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_()
+ grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_()
+ aggregation_backward_cuda(
+ n,
+ nsample,
+ c,
+ w_c,
+ input,
+ position,
+ weight,
+ idx,
+ grad_output,
+ grad_input,
+ grad_position,
+ grad_weight,
+ )
+ return grad_input, grad_position, grad_weight, None
+
+
+aggregation = Aggregation.apply
diff --git a/Pointcept/libs/pointops/functions/attention.py b/Pointcept/libs/pointops/functions/attention.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e774ff67051d6272f7de3fd751bf3b712431249
--- /dev/null
+++ b/Pointcept/libs/pointops/functions/attention.py
@@ -0,0 +1,120 @@
+import torch
+from torch.autograd import Function
+
+from pointops._C import (
+ attention_relation_step_forward_cuda,
+ attention_relation_step_backward_cuda,
+ attention_fusion_step_forward_cuda,
+ attention_fusion_step_backward_cuda,
+)
+
+
+class AttentionRelationStep(Function):
+ @staticmethod
+ def forward(ctx, query, key, weight, index_target, index_refer):
+ """
+ input - query: (n, g, c), key: (n, g, c), weight: (c) 1_c for scatter attention,
+ index_target: (m), index_refer: (m)
+ output - relation: (M, g)
+ """
+
+ assert (
+ query.is_contiguous()
+ and key.is_contiguous()
+ and index_target.is_contiguous()
+ and index_refer.is_contiguous()
+ and weight.is_contiguous()
+ )
+
+ assert index_target.shape[0] == index_refer.shape[0]
+
+ _, g, c = query.shape
+ m = index_target.shape[0]
+ output = torch.cuda.FloatTensor(m, g).zero_()
+ attention_relation_step_forward_cuda(
+ m, g, c, query, key, weight, index_target.int(), index_refer.int(), output
+ )
+ ctx.save_for_backward(query, key, weight, index_target, index_refer)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ query, key, weight, index_target, index_refer = ctx.saved_tensors
+ n, g, c = query.shape
+ m = index_target.shape[0]
+ grad_query = torch.cuda.FloatTensor(n, g, c).zero_()
+ grad_key = torch.cuda.FloatTensor(n, g, c).zero_()
+ grad_weight = torch.cuda.FloatTensor(c).zero_()
+ attention_relation_step_backward_cuda(
+ m,
+ g,
+ c,
+ query,
+ grad_query,
+ key,
+ grad_key,
+ weight,
+ grad_weight,
+ index_target.int(),
+ index_refer.int(),
+ grad_output,
+ )
+ return grad_query, grad_key, None, None, None
+
+
+class AttentionFusionStep(Function):
+ @staticmethod
+ def forward(ctx, weight, value, index_target, index_refer):
+ """
+ input - weight: (m, g), value: (n, g, c)
+ index_target: (m), index_value: (m)
+ output - output: (n, g, c)
+ """
+
+ assert (
+ weight.is_contiguous()
+ and value.is_contiguous()
+ and index_target.is_contiguous()
+ and index_refer.is_contiguous()
+ and weight.is_contiguous()
+ )
+
+ assert index_target.shape[0] == index_refer.shape[0]
+
+ n, g, c = value.shape
+ m = index_refer.shape[0]
+ output = torch.cuda.FloatTensor(n, g, c).zero_()
+ attention_fusion_step_forward_cuda(
+ m, g, c, weight, value, index_target.int(), index_refer.int(), output
+ )
+ ctx.save_for_backward(weight, value, index_target, index_refer)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_output: (n, g, c)
+ output: grad_weight: (m, g), grad_value: (n, g, c), none, none
+ """
+ weight, value, index_target, index_refer = ctx.saved_tensors
+ n, g, c = value.shape
+ m = index_target.shape[0]
+ grad_weight = torch.cuda.FloatTensor(m, g).zero_()
+ grad_value = torch.cuda.FloatTensor(n, g, c).zero_()
+ attention_fusion_step_backward_cuda(
+ m,
+ g,
+ c,
+ weight,
+ grad_weight,
+ value,
+ grad_value,
+ index_target.int(),
+ index_refer.int(),
+ grad_output,
+ )
+ return grad_weight, grad_value, None, None
+
+
+attention_relation_step = AttentionRelationStep.apply
+attention_fusion_step = AttentionFusionStep.apply
diff --git a/Pointcept/libs/pointops/functions/grouping.py b/Pointcept/libs/pointops/functions/grouping.py
new file mode 100644
index 0000000000000000000000000000000000000000..c22d1e827f82331a4287362a368ccf93927493e6
--- /dev/null
+++ b/Pointcept/libs/pointops/functions/grouping.py
@@ -0,0 +1,63 @@
+import torch
+from torch.autograd import Function
+
+from pointops._C import grouping_forward_cuda, grouping_backward_cuda
+
+
+class Grouping(Function):
+ @staticmethod
+ def forward(ctx, input, idx):
+ """
+ input: input: (n, c), idx : (m, nsample)
+ output: (m, nsample, c)
+ """
+ assert input.is_contiguous() and idx.is_contiguous()
+ m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1]
+ output = torch.cuda.FloatTensor(m, nsample, c)
+ grouping_forward_cuda(m, nsample, c, input, idx, output)
+ ctx.n = n
+ ctx.save_for_backward(idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_out: (m, c, nsample)
+ output: (n, c), None
+ """
+ n = ctx.n
+ (idx,) = ctx.saved_tensors
+ m, nsample, c = grad_output.shape
+ grad_input = torch.cuda.FloatTensor(n, c).zero_()
+ grouping_backward_cuda(m, nsample, c, grad_output, idx, grad_input)
+ return grad_input, None
+
+
+def grouping(idx, feat, xyz, new_xyz=None, with_xyz=False):
+ if new_xyz is None:
+ new_xyz = xyz
+ assert xyz.is_contiguous() and feat.is_contiguous()
+ m, nsample, c = idx.shape[0], idx.shape[1], feat.shape[1]
+ xyz = torch.cat([xyz, torch.zeros([1, 3]).to(xyz.device)], dim=0)
+ feat = torch.cat([feat, torch.zeros([1, c]).to(feat.device)], dim=0)
+ grouped_feat = feat[idx.view(-1).long(), :].view(
+ m, nsample, c
+ ) # (m, num_sample, c)
+
+ if with_xyz:
+ assert new_xyz.is_contiguous()
+ mask = torch.sign(idx + 1)
+ grouped_xyz = xyz[idx.view(-1).long(), :].view(
+ m, nsample, 3
+ ) - new_xyz.unsqueeze(
+ 1
+ ) # (m, num_sample, 3)
+ grouped_xyz = torch.einsum(
+ "n s c, n s -> n s c", grouped_xyz, mask
+ ) # (m, num_sample, 3)
+ return torch.cat((grouped_xyz, grouped_feat), -1)
+ else:
+ return grouped_feat
+
+
+grouping2 = Grouping.apply
diff --git a/Pointcept/libs/pointops/functions/interpolation.py b/Pointcept/libs/pointops/functions/interpolation.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a5c861f272f89421fa097505d9882b2c473a060
--- /dev/null
+++ b/Pointcept/libs/pointops/functions/interpolation.py
@@ -0,0 +1,59 @@
+import torch
+from torch.autograd import Function
+
+from pointops._C import interpolation_forward_cuda, interpolation_backward_cuda
+from .query import knn_query
+
+
+def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3):
+ """
+ input: coords: (m, 3), new_xyz: (n, 3), color: (m, c), offset: (b), new_offset: (b)
+ output: (n, c)
+ """
+ assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous()
+ idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset) # (n, 3), (n, 3)
+ dist_recip = 1.0 / (dist + 1e-8) # (n, 3)
+ norm = torch.sum(dist_recip, dim=1, keepdim=True)
+ weight = dist_recip / norm # (n, 3)
+
+ new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_()
+ for i in range(k):
+ new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1)
+ return new_feat
+
+
+class Interpolation(Function):
+ @staticmethod
+ def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3):
+ """
+ input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b)
+ output: (n, c)
+ """
+ assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous()
+ idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset) # (n, k), (n, k)
+ dist_recip = 1.0 / (dist + 1e-8) # (n, k)
+ norm = torch.sum(dist_recip, dim=1, keepdim=True)
+ weight = dist_recip / norm # (n, k)
+
+ n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0]
+ output = torch.cuda.FloatTensor(n, c).zero_()
+ interpolation_forward_cuda(n, c, k, input, idx, weight, output)
+ ctx.m, ctx.k = m, k
+ ctx.save_for_backward(idx, weight)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b)
+ output: (n, c)
+ """
+ m, k = ctx.m, ctx.k
+ idx, weight = ctx.saved_tensors
+ n, c = grad_output.shape
+ grad_input = torch.cuda.FloatTensor(m, c).zero_()
+ interpolation_backward_cuda(n, c, k, grad_output, idx, weight, grad_input)
+ return None, None, grad_input, None, None, None
+
+
+interpolation2 = Interpolation.apply
diff --git a/Pointcept/libs/pointops/functions/query.py b/Pointcept/libs/pointops/functions/query.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1294b6125e00ae1d1dec21ed52a803c164c4810
--- /dev/null
+++ b/Pointcept/libs/pointops/functions/query.py
@@ -0,0 +1,113 @@
+import torch
+from torch.autograd import Function
+
+from pointops._C import knn_query_cuda, random_ball_query_cuda, ball_query_cuda
+
+
+class KNNQuery(Function):
+ @staticmethod
+ def forward(ctx, nsample, xyz, offset, new_xyz=None, new_offset=None):
+ """
+ input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
+ output: idx: (m, nsample) -1 is placeholder, dist2: (m, nsample)
+ """
+ if new_xyz is None or new_offset is None:
+ new_xyz = xyz
+ new_offset = offset
+ assert xyz.is_contiguous() and new_xyz.is_contiguous()
+ m = new_xyz.shape[0]
+ idx = torch.cuda.IntTensor(m, nsample).zero_()
+ dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
+ knn_query_cuda(
+ m, nsample, xyz, new_xyz, offset.int(), new_offset.int(), idx, dist2
+ )
+ return idx, torch.sqrt(dist2)
+
+
+class RandomBallQuery(Function):
+ """Random Ball Query.
+
+ Find nearby points in spherical space.
+ """
+
+ @staticmethod
+ def forward(
+ ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None
+ ):
+ """
+ input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
+ output: idx: (m, nsample), dist2: (m, nsample)
+ """
+ if new_xyz is None or new_offset is None:
+ new_xyz = xyz
+ new_offset = offset
+ assert xyz.is_contiguous() and new_xyz.is_contiguous()
+ assert min_radius < max_radius
+
+ m = new_xyz.shape[0]
+ order = []
+ for k in range(offset.shape[0]):
+ s_k, e_k = (0, offset[0]) if k == 0 else (offset[k - 1], offset[k])
+ order.append(
+ torch.randperm(e_k - s_k, dtype=torch.int32, device=offset.device) + s_k
+ )
+ order = torch.cat(order, dim=0)
+ idx = torch.cuda.IntTensor(m, nsample).zero_()
+ dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
+ random_ball_query_cuda(
+ m,
+ nsample,
+ min_radius,
+ max_radius,
+ order,
+ xyz,
+ new_xyz,
+ offset.int(),
+ new_offset.int(),
+ idx,
+ dist2,
+ )
+ return idx, torch.sqrt(dist2)
+
+
+class BallQuery(Function):
+ """Ball Query.
+
+ Find nearby points in spherical space.
+ """
+
+ @staticmethod
+ def forward(
+ ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None
+ ):
+ """
+ input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
+ output: idx: (m, nsample), dist2: (m, nsample)
+ """
+ if new_xyz is None or new_offset is None:
+ new_xyz = xyz
+ new_offset = offset
+ assert xyz.is_contiguous() and new_xyz.is_contiguous()
+ assert min_radius < max_radius
+
+ m = new_xyz.shape[0]
+ idx = torch.cuda.IntTensor(m, nsample).zero_()
+ dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
+ ball_query_cuda(
+ m,
+ nsample,
+ min_radius,
+ max_radius,
+ xyz,
+ new_xyz,
+ offset.int(),
+ new_offset.int(),
+ idx,
+ dist2,
+ )
+ return idx, torch.sqrt(dist2)
+
+
+knn_query = KNNQuery.apply
+ball_query = BallQuery.apply
+random_ball_query = RandomBallQuery.apply
diff --git a/Pointcept/libs/pointops/functions/sampling.py b/Pointcept/libs/pointops/functions/sampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f233d4afe02e43a6a390ca465f7108a01b98541
--- /dev/null
+++ b/Pointcept/libs/pointops/functions/sampling.py
@@ -0,0 +1,27 @@
+import torch
+from torch.autograd import Function
+
+from pointops._C import farthest_point_sampling_cuda
+
+
+class FarthestPointSampling(Function):
+ @staticmethod
+ def forward(ctx, xyz, offset, new_offset):
+ """
+ input: coords: (n, 3), offset: (b), new_offset: (b)
+ output: idx: (m)
+ """
+ assert xyz.is_contiguous()
+ n, b, n_max = xyz.shape[0], offset.shape[0], offset[0]
+ for i in range(1, b):
+ n_max = max(offset[i] - offset[i - 1], n_max)
+ idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_()
+ tmp = torch.cuda.FloatTensor(n).fill_(1e10)
+ farthest_point_sampling_cuda(
+ b, n_max, xyz, offset.int(), new_offset.int(), tmp, idx
+ )
+ del tmp
+ return idx
+
+
+farthest_point_sampling = FarthestPointSampling.apply
diff --git a/Pointcept/libs/pointops/functions/subtraction.py b/Pointcept/libs/pointops/functions/subtraction.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc683ce3d75901777e57886adc077d570230e027
--- /dev/null
+++ b/Pointcept/libs/pointops/functions/subtraction.py
@@ -0,0 +1,38 @@
+import torch
+from torch.autograd import Function
+
+from pointops._C import subtraction_forward_cuda, subtraction_backward_cuda
+
+
+class Subtraction(Function):
+ @staticmethod
+ def forward(ctx, input1, input2, idx):
+ """
+ input: input1: (n, c), input2: (n, c), idx: (n, nsample)
+ output: (n, nsample, c)
+ """
+ assert input1.is_contiguous() and input2.is_contiguous()
+ n, c = input1.shape
+ nsample = idx.shape[-1]
+ output = torch.cuda.FloatTensor(n, nsample, c).zero_()
+ subtraction_forward_cuda(n, nsample, c, input1, input2, idx, output)
+ ctx.save_for_backward(idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_out: (n, nsample, c)
+ output: grad_input1: (n, c), grad_input2: (n, c)
+ """
+ (idx,) = ctx.saved_tensors
+ n, nsample, c = grad_output.shape
+ grad_input1 = torch.cuda.FloatTensor(n, c).zero_()
+ grad_input2 = torch.cuda.FloatTensor(n, c).zero_()
+ subtraction_backward_cuda(
+ n, nsample, c, idx, grad_output, grad_input1, grad_input2
+ )
+ return grad_input1, grad_input2, None
+
+
+subtraction = Subtraction.apply
diff --git a/Pointcept/libs/pointops/functions/utils.py b/Pointcept/libs/pointops/functions/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..15e3e328bb012bb684787466f3ec2e97d1317b2b
--- /dev/null
+++ b/Pointcept/libs/pointops/functions/utils.py
@@ -0,0 +1,121 @@
+import torch
+from pointops import knn_query, ball_query, grouping
+
+
+def knn_query_and_group(
+ feat,
+ xyz,
+ offset=None,
+ new_xyz=None,
+ new_offset=None,
+ idx=None,
+ nsample=None,
+ with_xyz=False,
+):
+ if idx is None:
+ assert nsample is not None
+ idx, _ = knn_query(nsample, xyz, offset, new_xyz, new_offset)
+ return grouping(idx, feat, xyz, new_xyz, with_xyz), idx
+
+
+def ball_query_and_group(
+ feat,
+ xyz,
+ offset=None,
+ new_xyz=None,
+ new_offset=None,
+ idx=None,
+ max_radio=None,
+ min_radio=0,
+ nsample=None,
+ with_xyz=False,
+):
+ if idx is None:
+ assert nsample is not None and offset is not None
+ assert max_radio is not None and min_radio is not None
+ idx, _ = ball_query(
+ nsample, max_radio, min_radio, xyz, offset, new_xyz, new_offset
+ )
+ return grouping(idx, feat, xyz, new_xyz, with_xyz), idx
+
+
+def query_and_group(
+ nsample,
+ xyz,
+ new_xyz,
+ feat,
+ idx,
+ offset,
+ new_offset,
+ dilation=0,
+ with_feat=True,
+ with_xyz=True,
+):
+ """
+ input: coords: (n, 3), new_xyz: (m, 3), color: (n, c), idx: (m, nsample), offset: (b), new_offset: (b)
+ output: new_feat: (m, nsample, c+3), grouped_idx: (m, nsample)
+ """
+ assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous()
+ if new_xyz is None:
+ new_xyz = xyz
+
+ if idx is None:
+ num_samples_total = 1 + (nsample - 1) * (dilation + 1)
+ # num points in a batch might < num_samples_total => [n1, n2, ..., nk, ns, ns, ns, ...]
+ idx_no_dilation, _ = knn_query(
+ num_samples_total, xyz, offset, new_xyz, new_offset
+ ) # (m, nsample * (d + 1))
+ idx = []
+ batch_end = offset.tolist()
+ batch_start = [0] + batch_end[:-1]
+ new_batch_end = new_offset.tolist()
+ new_batch_start = [0] + new_batch_end[:-1]
+ for i in range(offset.shape[0]):
+ if batch_end[i] - batch_start[i] < num_samples_total:
+ soft_dilation = (batch_end[i] - batch_start[i] - 1) / (nsample - 1) - 1
+ else:
+ soft_dilation = dilation
+ idx.append(
+ idx_no_dilation[
+ new_batch_start[i] : new_batch_end[i],
+ [int((soft_dilation + 1) * i) for i in range(nsample)],
+ ]
+ )
+ idx = torch.cat(idx, dim=0)
+
+ if not with_feat:
+ return idx
+
+ n, m, c = xyz.shape[0], new_xyz.shape[0], feat.shape[1]
+ grouped_xyz = xyz[idx.view(-1).long(), :].view(m, nsample, 3) # (m, nsample, 3)
+ # grouped_xyz = grouping(coords, idx) # (m, nsample, 3)
+ grouped_xyz -= new_xyz.unsqueeze(1) # (m, nsample, 3)
+ grouped_feat = feat[idx.view(-1).long(), :].view(m, nsample, c) # (m, nsample, c)
+ # grouped_feat = grouping(color, idx) # (m, nsample, c)
+
+ if with_xyz:
+ return torch.cat((grouped_xyz, grouped_feat), -1), idx # (m, nsample, 3+c)
+ else:
+ return grouped_feat, idx
+
+
+def offset2batch(offset):
+ return (
+ torch.cat(
+ [
+ (
+ torch.tensor([i] * (o - offset[i - 1]))
+ if i > 0
+ else torch.tensor([i] * o)
+ )
+ for i, o in enumerate(offset)
+ ],
+ dim=0,
+ )
+ .long()
+ .to(offset.device)
+ )
+
+
+def batch2offset(batch):
+ return torch.cumsum(batch.bincount(), dim=0).int()
diff --git a/Pointcept/libs/pointops/setup.py b/Pointcept/libs/pointops/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..0cdf07b6c12bf702b40accbb51fd1825e4050a8b
--- /dev/null
+++ b/Pointcept/libs/pointops/setup.py
@@ -0,0 +1,33 @@
+import os
+from setuptools import setup
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+from distutils.sysconfig import get_config_vars
+
+(opt,) = get_config_vars("OPT")
+os.environ["OPT"] = " ".join(
+ flag for flag in opt.split() if flag != "-Wstrict-prototypes"
+)
+
+src = "src"
+sources = [
+ os.path.join(root, file)
+ for root, dirs, files in os.walk(src)
+ for file in files
+ if file.endswith(".cpp") or file.endswith(".cu")
+]
+
+setup(
+ name="pointops",
+ version="1.0",
+ install_requires=["torch", "numpy"],
+ packages=["pointops"],
+ package_dir={"pointops": "functions"},
+ ext_modules=[
+ CUDAExtension(
+ name="pointops._C",
+ sources=sources,
+ extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]},
+ )
+ ],
+ cmdclass={"build_ext": BuildExtension},
+)
diff --git a/Pointcept/libs/pointops/src/__init__.py b/Pointcept/libs/pointops/src/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/Pointcept/libs/pointops/src/aggregation/aggregation_cuda.cpp b/Pointcept/libs/pointops/src/aggregation/aggregation_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..491b6f41660edf9b5ea5656cc88edba8ed807d71
--- /dev/null
+++ b/Pointcept/libs/pointops/src/aggregation/aggregation_cuda.cpp
@@ -0,0 +1,28 @@
+#include
+#include
+#include
+#include "aggregation_cuda_kernel.h"
+
+
+void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
+{
+ const float *input = input_tensor.data_ptr();
+ const float *position = position_tensor.data_ptr();
+ const float *weight = weight_tensor.data_ptr();
+ const int *idx = idx_tensor.data_ptr();
+ float *output = output_tensor.data_ptr();
+ aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output);
+}
+
+void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor)
+{
+ const float *input = input_tensor.data_ptr();
+ const float *position = position_tensor.data_ptr();
+ const float *weight = weight_tensor.data_ptr();
+ const int *idx = idx_tensor.data_ptr();
+ const float *grad_output = grad_output_tensor.data_ptr();
+ float *grad_input = grad_input_tensor.data_ptr();
+ float *grad_position = grad_position_tensor.data_ptr();
+ float *grad_weight = grad_weight_tensor.data_ptr();
+ aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
+}
diff --git a/Pointcept/libs/pointops/src/aggregation/aggregation_cuda_kernel.cu b/Pointcept/libs/pointops/src/aggregation/aggregation_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..8339bb7e2088abffefba02c26b248edafed6cf47
--- /dev/null
+++ b/Pointcept/libs/pointops/src/aggregation/aggregation_cuda_kernel.cu
@@ -0,0 +1,53 @@
+#include "../cuda_utils.h"
+#include "aggregation_cuda_kernel.h"
+
+
+__global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
+ // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ if (index >= n * c) return;
+ const int c_idx = index % c;
+ const int n_idx = index / c;
+ const int w_c_idx = c_idx % w_c;
+ for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
+ {
+ int idx_idx = n_idx * nsample + nsample_idx;
+ int input_idx = idx[idx_idx] * c + c_idx;
+ int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
+ int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
+ output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx];
+ }
+}
+
+__global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {
+ // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ if (index >= n * c) return;
+ const int c_idx = index % c;
+ const int n_idx = index / c;
+ const int w_c_idx = c_idx % w_c;
+ for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
+ {
+ int idx_idx = n_idx * nsample + nsample_idx;
+ int input_idx = idx[idx_idx] * c + c_idx;
+ int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
+ int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
+ atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]);
+ grad_position[position_idx] = grad_output[index] * weight[weight_idx];
+ atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx]));
+ }
+}
+
+void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
+ // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
+ dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
+ dim3 threads(THREADS_PER_BLOCK);
+ aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output);
+}
+
+void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {
+ // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
+ dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
+ dim3 threads(THREADS_PER_BLOCK);
+ aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
+}
diff --git a/Pointcept/libs/pointops/src/aggregation/aggregation_cuda_kernel.h b/Pointcept/libs/pointops/src/aggregation/aggregation_cuda_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..5211a96aa2acbe0d9baf32bddc9ab4be87703072
--- /dev/null
+++ b/Pointcept/libs/pointops/src/aggregation/aggregation_cuda_kernel.h
@@ -0,0 +1,20 @@
+#ifndef _AGGREGATION_CUDA_KERNEL
+#define _AGGREGATION_CUDA_KERNEL
+#include
+#include
+#include
+
+void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
+void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output);
+void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/Pointcept/libs/pointops/src/attention/attention_cuda.cpp b/Pointcept/libs/pointops/src/attention/attention_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..79b90c7ebc3ed85dc389bc4ae3169a086efc5848
--- /dev/null
+++ b/Pointcept/libs/pointops/src/attention/attention_cuda.cpp
@@ -0,0 +1,76 @@
+#include
+#include
+#include
+#include "attention_cuda_kernel.h"
+
+
+void attention_relation_step_forward_cuda(int m, int g, int c,
+ at::Tensor query_tensor, at::Tensor key_tensor, at::Tensor weight_tensor,
+ at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
+ at::Tensor output_tensor)
+{
+ const float *query = query_tensor.data_ptr();
+ const float *key = key_tensor.data_ptr();
+ const float *weight = weight_tensor.data_ptr();
+ const int *index_target = index_target_tensor.data_ptr();
+ const int *index_refer = index_refer_tensor.data_ptr();
+ float *output = output_tensor.data_ptr();
+ attention_relation_step_forward_cuda_launcher(m, g, c, query, key, weight, index_target, index_refer, output);
+}
+
+void attention_relation_step_backward_cuda(int m, int g, int c,
+ at::Tensor query_tensor, at::Tensor grad_query_tensor,
+ at::Tensor key_tensor, at::Tensor grad_key_tensor,
+ at::Tensor weight_tensor, at::Tensor grad_weight_tensor,
+ at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
+ at::Tensor grad_output_tensor)
+{
+ const float *query = query_tensor.data_ptr();
+ float *grad_query = grad_query_tensor.data_ptr();
+ const float *key = key_tensor.data_ptr();
+ float *grad_key = grad_key_tensor.data_ptr();
+ const float *weight = weight_tensor.data_ptr();
+ float *grad_weight = grad_weight_tensor.data_ptr();
+ const int *index_target = index_target_tensor.data_ptr();
+ const int *index_refer = index_refer_tensor.data_ptr();
+ const float *grad_output = grad_output_tensor.data_ptr();
+ attention_relation_step_backward_cuda_launcher(m, g, c,
+ query, grad_query,
+ key, grad_key,
+ weight, grad_weight,
+ index_target, index_refer, grad_output);
+}
+
+
+void attention_fusion_step_forward_cuda(int m, int g, int c,
+ at::Tensor weight_tensor, at::Tensor value_tensor,
+ at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
+ at::Tensor output_tensor)
+{
+ const float *weight = weight_tensor.data_ptr();
+ const float *value = value_tensor.data_ptr();
+ const int *index_target = index_target_tensor.data_ptr();
+ const int *index_refer = index_refer_tensor.data_ptr();
+ float *output = output_tensor.data_ptr();
+ attention_fusion_step_forward_cuda_launcher(m, g, c, weight, value, index_target, index_refer, output);
+}
+
+
+void attention_fusion_step_backward_cuda(int m, int g, int c,
+ at::Tensor weight_tensor, at::Tensor grad_weight_tensor,
+ at::Tensor value_tensor, at::Tensor grad_value_tensor,
+ at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
+ at::Tensor grad_output_tensor)
+{
+ const float *weight = weight_tensor.data_ptr();
+ float *grad_weight = grad_weight_tensor.data_ptr();
+ const float *value = value_tensor.data_ptr();
+ float *grad_value = grad_value_tensor.data_ptr();
+ const int *index_target = index_target_tensor.data_ptr();
+ const int *index_refer = index_refer_tensor.data_ptr();
+ const float *grad_output = grad_output_tensor.data_ptr();
+ attention_fusion_step_backward_cuda_launcher(m, g, c,
+ weight, grad_weight,
+ value, grad_value,
+ index_target, index_refer, grad_output);
+}
diff --git a/Pointcept/libs/pointops/src/attention/attention_cuda_kernel.cu b/Pointcept/libs/pointops/src/attention/attention_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..05f4544a4dc4da584ad70eece75265d4845171e7
--- /dev/null
+++ b/Pointcept/libs/pointops/src/attention/attention_cuda_kernel.cu
@@ -0,0 +1,149 @@
+#include "../cuda_utils.h"
+#include "attention_cuda_kernel.h"
+
+
+/*
+Kernels
+*/
+
+__global__ void attention_relation_step_forward_cuda_kernel(int m, int g, int c,
+ const float *query, const float *key, const float *weight,
+ const int *index_target, const int *index_refer,
+ float *output)
+{
+ int r_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ int g_idx = blockIdx.y;
+ int c_idx = blockIdx.z;
+
+ if (r_idx >= m || g_idx >= g || c_idx >= c) return;
+ int q_idx = index_target[r_idx] * g * c + g_idx * c + c_idx;
+ int k_idx = index_refer[r_idx] * g * c + g_idx * c + c_idx;
+
+ float r = query[q_idx] * key[k_idx] * weight[c_idx];
+ atomicAdd(output + r_idx * g + g_idx, r);
+}
+
+__global__ void attention_relation_step_backward_cuda_kernel(int m, int g, int c,
+ const float *query, float *grad_query,
+ const float *key, float *grad_key,
+ const float *weight, float *grad_weight,
+ const int *index_target, const int *index_refer,
+ const float *grad_output)
+{
+ int r_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ int g_idx = blockIdx.y;
+ int c_idx = blockIdx.z;
+
+ if (r_idx >= m || g_idx >= g || c_idx >= c) return;
+
+ int q_idx = index_target[r_idx] * g * c + g_idx * c + c_idx;
+ int k_idx = index_refer[r_idx] * g * c + g_idx * c + c_idx;
+ int o_idx = r_idx * g + g_idx;
+ float grad_r = grad_output[o_idx];
+ atomicAdd(grad_query + q_idx, grad_r * key[k_idx] * weight[c_idx]);
+ atomicAdd(grad_key + k_idx, grad_r * query[q_idx] * weight[c_idx]);
+ atomicAdd(grad_weight + c_idx, grad_r * key[k_idx] * query[q_idx]);
+}
+
+
+__global__ void attention_fusion_step_forward_cuda_kernel(int m, int g, int c,
+ const float *weight, const float *value,
+ const int *index_target, const int *index_refer,
+ float *output)
+{
+ int r_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ int g_idx = blockIdx.y;
+ int c_idx = blockIdx.z;
+
+ if (r_idx >= m || g_idx >= g || c_idx >= c) return;
+
+ int o_idx = index_target[r_idx] * g * c + g_idx * c + c_idx;
+ int v_idx = index_refer[r_idx] * g * c + g_idx * c + c_idx;
+
+ float f = weight[r_idx * g + g_idx] * value[v_idx];
+ atomicAdd(output + o_idx, f);
+}
+
+
+__global__ void attention_fusion_step_backward_cuda_kernel(int m, int g, int c,
+ const float *weight, float *grad_weight,
+ const float *value, float *grad_value,
+ const int *index_target, const int *index_refer,
+ const float *grad_output)
+{
+ int r_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ int g_idx = blockIdx.y;
+ int c_idx = blockIdx.z;
+
+ if (r_idx >= m || g_idx >= g || c_idx >= c) return;
+
+ int o_idx = index_target[r_idx] * g * c + g_idx * c + c_idx;
+ int v_idx = index_refer[r_idx] * g * c + g_idx * c + c_idx;
+ int w_idx = r_idx * g + g_idx;
+ float grad = grad_output[o_idx];
+ atomicAdd(grad_weight + w_idx, grad * value[v_idx]);
+ atomicAdd(grad_value + v_idx, grad * weight[w_idx]);
+}
+
+/*
+Launchers
+*/
+
+
+void attention_relation_step_forward_cuda_launcher(int m, int g, int c,
+ const float *query, const float *key, const float *weight,
+ const int *index_target, const int *index_refer,
+ float *output)
+{
+ dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), g, c);
+ dim3 threads(THREADS_PER_BLOCK);
+ attention_relation_step_forward_cuda_kernel<<>>(m, g, c, query, key, weight,
+ index_target, index_refer, output);
+}
+
+void attention_relation_step_backward_cuda_launcher(int m, int g, int c,
+ const float *query, float *grad_query,
+ const float *key, float *grad_key,
+ const float *weight, float *grad_weight,
+ const int *index_target, const int *index_refer,
+ const float *grad_output)
+{
+ dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), g, c);
+ dim3 threads(THREADS_PER_BLOCK);
+ attention_relation_step_backward_cuda_kernel<<>>(m, g, c,
+ query, grad_query,
+ key, grad_key,
+ weight, grad_weight,
+ index_target, index_refer,
+ grad_output);
+}
+
+
+void attention_fusion_step_forward_cuda_launcher(int m, int g, int c,
+ const float *weight, const float *value,
+ const int *index_target, const int *index_refer,
+ float *output)
+{
+ dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), g, c);
+ dim3 threads(THREADS_PER_BLOCK);
+ attention_fusion_step_forward_cuda_kernel<<>>(m, g, c, weight, value,
+ index_target, index_refer, output);
+}
+
+
+void attention_fusion_step_backward_cuda_launcher(int m, int g, int c,
+ const float *weight, float *grad_weight,
+ const float *value, float *grad_value,
+ const int *index_target, const int *index_refer,
+ const float *grad_output)
+{
+ dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), g, c);
+ dim3 threads(THREADS_PER_BLOCK);
+ attention_fusion_step_backward_cuda_kernel<<>>(m, g, c,
+ weight, grad_weight,
+ value, grad_value,
+ index_target, index_refer,
+ grad_output);
+}
+
+
diff --git a/Pointcept/libs/pointops/src/attention/attention_cuda_kernel.h b/Pointcept/libs/pointops/src/attention/attention_cuda_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..fec965c0415c4cb5c64fd10e441b6a4c6a6c9ae9
--- /dev/null
+++ b/Pointcept/libs/pointops/src/attention/attention_cuda_kernel.h
@@ -0,0 +1,54 @@
+#ifndef _ATTENTION_CUDA_KERNEL
+#define _ATTENTION_CUDA_KERNEL
+#include
+#include
+#include
+
+void attention_relation_step_forward_cuda(int m, int g, int c,
+ at::Tensor query_tensor, at::Tensor key_tensor, at::Tensor weight_tensor,
+ at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
+ at::Tensor output_tensor);
+void attention_relation_step_backward_cuda(int m, int g, int c,
+ at::Tensor query_tensor, at::Tensor grad_query_tensor,
+ at::Tensor key_tensor, at::Tensor grad_key_tensor,
+ at::Tensor weight_tensor, at::Tensor grad_weight_tensor,
+ at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
+ at::Tensor grad_output_tensor);
+void attention_fusion_step_forward_cuda(int m, int g, int c,
+ at::Tensor weight_tensor, at::Tensor value_tensor,
+ at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
+ at::Tensor output_tensor);
+void attention_fusion_step_backward_cuda(int m, int g, int c,
+ at::Tensor weight_tensor, at::Tensor grad_weight_tensor,
+ at::Tensor value_tensor, at::Tensor grad_value_tensor,
+ at::Tensor index_target_tensor, at::Tensor index_refer_tensor,
+ at::Tensor grad_output_tensor);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void attention_relation_step_forward_cuda_launcher(int m, int g, int c,
+ const float *query, const float *key, const float *weight,
+ const int *index_target, const int *index_refer,
+ float *output);
+void attention_relation_step_backward_cuda_launcher(int m, int g, int c,
+ const float *query, float *grad_query,
+ const float *key, float *grad_key,
+ const float *weight, float *grad_weight,
+ const int *index_target, const int *index_refer,
+ const float *grad_output);
+void attention_fusion_step_forward_cuda_launcher(int m, int g, int c,
+ const float *weight, const float *value,
+ const int *index_target, const int *index_refer,
+ float *output);
+void attention_fusion_step_backward_cuda_launcher(int m, int g, int c,
+ const float *weight, float *grad_weight,
+ const float *value, float *grad_value,
+ const int *index_target, const int *index_refer,
+ const float *grad_output);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/Pointcept/libs/pointops/src/ball_query/ball_query_cuda.cpp b/Pointcept/libs/pointops/src/ball_query/ball_query_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..04cd5ff9e8e39c006222d5651f3aae70ce2e35c9
--- /dev/null
+++ b/Pointcept/libs/pointops/src/ball_query/ball_query_cuda.cpp
@@ -0,0 +1,20 @@
+#include
+#include
+#include
+#include "ball_query_cuda_kernel.h"
+
+
+void ball_query_cuda(int m, int nsample,
+ float min_radius, float max_radius,
+ at::Tensor xyz_tensor, at::Tensor new_xyz_tensor,
+ at::Tensor offset_tensor, at::Tensor new_offset_tensor,
+ at::Tensor idx_tensor, at::Tensor dist2_tensor)
+{
+ const float *xyz = xyz_tensor.data_ptr();
+ const float *new_xyz = new_xyz_tensor.data_ptr();
+ const int *offset = offset_tensor.data_ptr();
+ const int *new_offset = new_offset_tensor.data_ptr();
+ int *idx = idx_tensor.data_ptr();
+ float *dist2 = dist2_tensor.data_ptr();
+ ball_query_cuda_launcher(m, nsample, min_radius, max_radius, xyz, new_xyz, offset, new_offset, idx, dist2);
+}
diff --git a/Pointcept/libs/pointops/src/ball_query/ball_query_cuda_kernel.cu b/Pointcept/libs/pointops/src/ball_query/ball_query_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..7b3d95a9835f607798f0d63e2b66ddb3af9032da
--- /dev/null
+++ b/Pointcept/libs/pointops/src/ball_query/ball_query_cuda_kernel.cu
@@ -0,0 +1,190 @@
+#include "../cuda_utils.h"
+#include "ball_query_cuda_kernel.h"
+
+
+namespace ball_query_utils{
+
+template
+__device__ void swap(DType *x, DType *y)
+{
+ DType tmp = *x;
+ *x = *y;
+ *y = tmp;
+}
+
+__device__ void reheap(float *dist, int *idx, int k)
+{
+ int root = 0;
+ int child = root * 2 + 1;
+ while (child < k)
+ {
+ if(child + 1 < k && dist[child+1] > dist[child])
+ child++;
+ if(dist[root] > dist[child])
+ return;
+ swap(&dist[root], &dist[child]);
+ swap(&idx[root], &idx[child]);
+ root = child;
+ child = root * 2 + 1;
+ }
+}
+
+
+__device__ void heap_sort(float *dist, int *idx, int k)
+{
+ int i;
+ for (i = k - 1; i > 0; i--)
+ {
+ swap(&dist[0], &dist[i]);
+ swap(&idx[0], &idx[i]);
+ reheap(dist, idx, i);
+ }
+}
+
+__device__ int get_bt_idx(int idx, const int *offset)
+{
+ int i = 0;
+ while (1)
+ {
+ if (idx < offset[i])
+ break;
+ else
+ i++;
+ }
+ return i;
+}
+} // namespace ball_query_utils
+
+__global__ void ball_query_cuda_kernel(int m, int nsample,
+ float min_radius, float max_radius,
+ const float *__restrict__ xyz, const float *__restrict__ new_xyz,
+ const int *__restrict__ offset, const int *__restrict__ new_offset,
+ int *__restrict__ idx, float *__restrict__ dist2) {
+ // input: xyz (n, 3) new_xyz (m, 3)
+ // output: idx (m, nsample) dist (m, nsample)
+ int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (pt_idx >= m) return;
+
+ new_xyz += pt_idx * 3;
+ idx += pt_idx * nsample;
+ dist2 += pt_idx * nsample;
+
+ int bt_idx = ball_query_utils::get_bt_idx(pt_idx, new_offset);
+ int start;
+ if (bt_idx == 0)
+ start = 0;
+ else
+ start = offset[bt_idx - 1];
+ int end = offset[bt_idx];
+
+ float max_radius2 = max_radius * max_radius;
+ float min_radius2 = min_radius * min_radius;
+ float new_x = new_xyz[0];
+ float new_y = new_xyz[1];
+ float new_z = new_xyz[2];
+
+ float candi_dist[2048];
+ int candi_idx[2048];
+ int candi_num = 0;
+
+ for(int i = start; i < end; i++){
+ float x = xyz[i * 3 + 0];
+ float y = xyz[i * 3 + 1];
+ float z = xyz[i * 3 + 2];
+ float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
+
+ if (d2 <= 1e-5 || (d2 >= min_radius2 && d2 < max_radius2)){
+ // TODO: Check d2 <= 1e-5
+ candi_dist[candi_num] = d2;
+ candi_idx[candi_num] = i;
+ candi_num += 1;
+ }
+ }
+ ball_query_utils::heap_sort(candi_dist, candi_idx, candi_num);
+ if(candi_num <= nsample){
+ for(int i = 0; i < candi_num; i++){
+ idx[i] = candi_idx[i];
+ dist2[i] = candi_dist[i];
+ }
+ for(int i = candi_num; i < nsample; i++){
+ idx[i] = -1;
+ dist2[i] = 1e10;
+ }
+ }
+ else{
+ float sep = static_cast(candi_num) / nsample;
+ for(int i = 0; i < nsample; i++)
+ {
+ int index = static_cast(sep * i);
+ idx[i] = candi_idx[index];
+ dist2[i] = candi_idx[index];
+ }
+ }
+}
+
+/* Random Sample Mode Ball Query */
+
+// __global__ void ball_query_cuda_kernel(int m, int nsample,
+// float min_radius, float max_radius,
+// const float *__restrict__ xyz, const float *__restrict__ new_xyz,
+// const int *__restrict__ offset, const int *__restrict__ new_offset,
+// int *__restrict__ idx, float *__restrict__ dist2) {
+// // input: xyz (n, 3) new_xyz (m, 3)
+// // output: idx (m, nsample) dist (m, nsample)
+// int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
+// if (pt_idx >= m) return;
+//
+// new_xyz += pt_idx * 3;
+// idx += pt_idx * nsample;
+// dist2 += pt_idx * nsample;
+//
+// int bt_idx = ball_get_bt_idx(pt_idx, new_offset);
+// int start;
+// if (bt_idx == 0)
+// start = 0;
+// else
+// start = offset[bt_idx - 1];
+// int end = offset[bt_idx];
+//
+// float max_radius2 = max_radius * max_radius;
+// float min_radius2 = min_radius * min_radius;
+// float new_x = new_xyz[0];
+// float new_y = new_xyz[1];
+// float new_z = new_xyz[2];
+//
+// int cnt = 0;
+// for(int i = start; i < end; i++){
+// float x = xyz[i * 3 + 0];
+// float y = xyz[i * 3 + 1];
+// float z = xyz[i * 3 + 2];
+// float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
+//
+// if (d2 == 0 || (d2 >= min_radius2 && d2 < max_radius2)) {
+// if (cnt == 0) {
+// for (int l = 0; l < nsample; ++l) {
+// idx[l] = i;
+// dist2[l] = d2;
+// }
+// }
+// idx[cnt] = i;
+// ++cnt;
+// if (cnt >= nsample) break;
+// }
+// }
+// }
+
+
+void ball_query_cuda_launcher(int m, int nsample,
+ float min_radius, float max_radius,
+ const float *xyz, const float *new_xyz,
+ const int *offset, const int *new_offset,
+ int *idx, float *dist2) {
+ // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample)
+ dim3 blocks(DIVUP(m, THREADS_PER_BLOCK));
+ dim3 threads(THREADS_PER_BLOCK);
+ ball_query_cuda_kernel<<>>(m, nsample,
+ min_radius, max_radius,
+ xyz, new_xyz,
+ offset, new_offset,
+ idx, dist2);
+}
diff --git a/Pointcept/libs/pointops/src/ball_query/ball_query_cuda_kernel.h b/Pointcept/libs/pointops/src/ball_query/ball_query_cuda_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..03007a285a3559da85d099681f1316915e1d31b1
--- /dev/null
+++ b/Pointcept/libs/pointops/src/ball_query/ball_query_cuda_kernel.h
@@ -0,0 +1,26 @@
+#ifndef _BALL_QUERY_CUDA_KERNEL
+#define _BALL_QUERY_CUDA_KERNEL
+#include
+#include
+#include
+
+void ball_query_cuda(int m, int nsample,
+ float min_radius, float max_radius,
+ at::Tensor xyz_tensor, at::Tensor new_xyz_tensor,
+ at::Tensor offset_tensor, at::Tensor new_offset_tensor,
+ at::Tensor idx_tensor, at::Tensor dist2_tensor);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void ball_query_cuda_launcher(int m, int nsample,
+ float min_radius, float max_radius,
+ const float *xyz, const float *new_xyz,
+ const int *offset, const int *new_offset,
+ int *idx, float *dist2);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/Pointcept/libs/pointops/src/cuda_utils.h b/Pointcept/libs/pointops/src/cuda_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..bbfe7a06bf989056c0bd99e3e64fdbe7d15bb093
--- /dev/null
+++ b/Pointcept/libs/pointops/src/cuda_utils.h
@@ -0,0 +1,23 @@
+#ifndef _CUDA_UTILS_H
+#define _CUDA_UTILS_H
+
+#include
+#include
+
+#define TOTAL_THREADS 1024
+#define THREADS_PER_BLOCK 512
+#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
+
+inline int opt_n_threads(int work_size) {
+ const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0);
+ return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1);
+}
+
+inline dim3 opt_block_config(int x, int y) {
+ const int x_threads = opt_n_threads(x);
+ const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1);
+ dim3 block_config(x_threads, y_threads, 1);
+ return block_config;
+}
+
+#endif
diff --git a/Pointcept/libs/pointops/src/grouping/grouping_cuda.cpp b/Pointcept/libs/pointops/src/grouping/grouping_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6f7990adaf43f0a77050eed0d55adad19f256e10
--- /dev/null
+++ b/Pointcept/libs/pointops/src/grouping/grouping_cuda.cpp
@@ -0,0 +1,21 @@
+#include
+#include
+#include
+#include "grouping_cuda_kernel.h"
+
+
+void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
+{
+ const float *input = input_tensor.data_ptr();
+ const int *idx = idx_tensor.data_ptr();
+ float *output = output_tensor.data_ptr();
+ grouping_forward_cuda_launcher(m, nsample, c, input, idx, output);
+}
+
+void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor)
+{
+ const float *grad_output = grad_output_tensor.data_ptr();
+ const int *idx = idx_tensor.data_ptr();
+ float *grad_input = grad_input_tensor.data_ptr();
+ grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input);
+}
diff --git a/Pointcept/libs/pointops/src/grouping/grouping_cuda_kernel.cu b/Pointcept/libs/pointops/src/grouping/grouping_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..58ec0a21a2949f9f82504ccd24597c544c50af40
--- /dev/null
+++ b/Pointcept/libs/pointops/src/grouping/grouping_cuda_kernel.cu
@@ -0,0 +1,40 @@
+#include "../cuda_utils.h"
+#include "grouping_cuda_kernel.h"
+
+
+__global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) {
+ // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ if (index >= m * nsample * c) return;
+ const int c_idx = index % c;
+ const int nsample_idx = (index / c) % nsample;
+ const int m_idx = index / nsample / c;
+ const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
+ output[index] = input[input_idx];
+}
+
+__global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) {
+ // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ if (index >= m * nsample * c) return;
+ const int c_idx = index % c;
+ const int nsample_idx = (index / c) % nsample;
+ const int m_idx = index / nsample / c;
+ const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx;
+ atomicAdd(grad_input + input_idx, grad_output[index]);
+}
+
+void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) {
+ // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c)
+ dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
+ dim3 threads(THREADS_PER_BLOCK);
+ grouping_forward_cuda_kernel<<>>(m, nsample, c, input, idx, output);
+}
+
+void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input)
+{
+ // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c)
+ dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK));
+ dim3 threads(THREADS_PER_BLOCK);
+ grouping_backward_cuda_kernel<<>>(m, nsample, c, grad_output, idx, grad_input);
+}
diff --git a/Pointcept/libs/pointops/src/grouping/grouping_cuda_kernel.h b/Pointcept/libs/pointops/src/grouping/grouping_cuda_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..3db4aaa9fad5811d559d47c500e4b00f0165d9b4
--- /dev/null
+++ b/Pointcept/libs/pointops/src/grouping/grouping_cuda_kernel.h
@@ -0,0 +1,20 @@
+#ifndef _GROUPING_CUDA_KERNEL
+#define _GROUPING_CUDA_KERNEL
+#include
+#include
+#include
+
+void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
+void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output);
+void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/Pointcept/libs/pointops/src/interpolation/interpolation_cuda.cpp b/Pointcept/libs/pointops/src/interpolation/interpolation_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f2c1b0078f4b70626705d7b3f5d1d65d37ee6de7
--- /dev/null
+++ b/Pointcept/libs/pointops/src/interpolation/interpolation_cuda.cpp
@@ -0,0 +1,23 @@
+#include
+#include
+#include
+#include "interpolation_cuda_kernel.h"
+
+
+void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor)
+{
+ const float *input = input_tensor.data_ptr();
+ const int *idx = idx_tensor.data_ptr();
+ const float *weight = weight_tensor.data_ptr();
+ float *output = output_tensor.data_ptr();
+ interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output);
+}
+
+void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor)
+{
+ const float *grad_output = grad_output_tensor.data_ptr();
+ const int *idx = idx_tensor.data_ptr();
+ const float *weight = weight_tensor.data_ptr();
+ float *grad_input = grad_input_tensor.data_ptr();
+ interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input);
+}
diff --git a/Pointcept/libs/pointops/src/interpolation/interpolation_cuda_kernel.cu b/Pointcept/libs/pointops/src/interpolation/interpolation_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..f560d8c92c6eac865b8c1e1dc27140fe3fcc2250
--- /dev/null
+++ b/Pointcept/libs/pointops/src/interpolation/interpolation_cuda_kernel.cu
@@ -0,0 +1,47 @@
+#include "../cuda_utils.h"
+#include "interpolation_cuda_kernel.h"
+
+
+__global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output)
+{
+ // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ if (index >= n * c) return;
+ int c_idx = index % c;
+ int n_idx = index / c;
+ for (int i = 0; i < k; i++)
+ {
+ int idx_idx = n_idx * k + i;
+ int input_idx = idx[idx_idx] * c + c_idx;
+ output[index] += input[input_idx] * weight[idx_idx];
+ }
+}
+
+__global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input)
+{
+ // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ if (index >= n * c) return;
+ int c_idx = index % c;
+ int n_idx = index / c;
+ for (int i = 0; i < k; i++)
+ {
+ int idx_idx = n_idx * k + i;
+ int input_idx = idx[idx_idx] * c + c_idx;
+ atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]);
+ }
+}
+
+void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) {
+ // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c)
+ dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
+ dim3 threads(THREADS_PER_BLOCK);
+ interpolation_forward_cuda_kernel<<>>(n, c, k, input, idx, weight, output);
+}
+
+void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) {
+ // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c)
+ dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
+ dim3 threads(THREADS_PER_BLOCK);
+ interpolation_backward_cuda_kernel<<>>(n, c, k, grad_output, idx, weight, grad_input);
+}
diff --git a/Pointcept/libs/pointops/src/interpolation/interpolation_cuda_kernel.h b/Pointcept/libs/pointops/src/interpolation/interpolation_cuda_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..309e5dd0a34ccb58807bbf32389ba65e7ee6961b
--- /dev/null
+++ b/Pointcept/libs/pointops/src/interpolation/interpolation_cuda_kernel.h
@@ -0,0 +1,20 @@
+#ifndef _INTERPOLATION_CUDA_KERNEL
+#define _INTERPOLATION_CUDA_KERNEL
+#include
+#include
+#include
+
+void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor);
+void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output);
+void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/Pointcept/libs/pointops/src/knn_query/knn_query_cuda.cpp b/Pointcept/libs/pointops/src/knn_query/knn_query_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bbe841ce0352fd234143b3b4978ec001522b31dd
--- /dev/null
+++ b/Pointcept/libs/pointops/src/knn_query/knn_query_cuda.cpp
@@ -0,0 +1,16 @@
+#include
+#include
+#include
+#include "knn_query_cuda_kernel.h"
+
+
+void knn_query_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor)
+{
+ const float *xyz = xyz_tensor.data_ptr();
+ const float *new_xyz = new_xyz_tensor.data_ptr();
+ const int *offset = offset_tensor.data_ptr();
+ const int *new_offset = new_offset_tensor.data_ptr();
+ int *idx = idx_tensor.data_ptr();
+ float *dist2 = dist2_tensor.data_ptr();
+ knn_query_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
+}
diff --git a/Pointcept/libs/pointops/src/knn_query/knn_query_cuda_kernel.cu b/Pointcept/libs/pointops/src/knn_query/knn_query_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..297740237eae98cc4e61421bc261755d79b83142
--- /dev/null
+++ b/Pointcept/libs/pointops/src/knn_query/knn_query_cuda_kernel.cu
@@ -0,0 +1,112 @@
+#include "../cuda_utils.h"
+#include "knn_query_cuda_kernel.h"
+
+
+namespace knn_query_utils{
+
+template
+__device__ void swap(DType *x, DType *y)
+{
+ DType tmp = *x;
+ *x = *y;
+ *y = tmp;
+}
+
+__device__ void reheap(float *dist, int *idx, int k)
+{
+ int root = 0;
+ int child = root * 2 + 1;
+ while (child < k)
+ {
+ if(child + 1 < k && dist[child+1] > dist[child])
+ child++;
+ if(dist[root] > dist[child])
+ return;
+ swap(&dist[root], &dist[child]);
+ swap(&idx[root], &idx[child]);
+ root = child;
+ child = root * 2 + 1;
+ }
+}
+
+
+__device__ void heap_sort(float *dist, int *idx, int k)
+{
+ int i;
+ for (i = k - 1; i > 0; i--)
+ {
+ swap(&dist[0], &dist[i]);
+ swap(&idx[0], &idx[i]);
+ reheap(dist, idx, i);
+ }
+}
+
+
+__device__ int get_bt_idx(int idx, const int *offset)
+{
+ int i = 0;
+ while (1)
+ {
+ if (idx < offset[i])
+ break;
+ else
+ i++;
+ }
+ return i;
+}
+} // namespace knn_query_utils
+
+
+__global__ void knn_query_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) {
+ // input: xyz (n, 3) new_xyz (m, 3)
+ // output: idx (m, nsample) dist2 (m, nsample)
+ int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (pt_idx >= m) return;
+
+ new_xyz += pt_idx * 3;
+ idx += pt_idx * nsample;
+ dist2 += pt_idx * nsample;
+
+ int bt_idx = knn_query_utils::get_bt_idx(pt_idx, new_offset);
+ int start;
+ if (bt_idx == 0)
+ start = 0;
+ else
+ start = offset[bt_idx - 1];
+ int end = offset[bt_idx];
+
+ float new_x = new_xyz[0];
+ float new_y = new_xyz[1];
+ float new_z = new_xyz[2];
+
+ float best_dist[128];
+ int best_idx[128];
+ for(int i = 0; i < nsample; i++){
+ best_dist[i] = 1e10;
+ best_idx[i] = -1;
+ }
+ for(int i = start; i < end; i++){
+ float x = xyz[i * 3 + 0];
+ float y = xyz[i * 3 + 1];
+ float z = xyz[i * 3 + 2];
+ float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
+ if (d2 < best_dist[0]){
+ best_dist[0] = d2;
+ best_idx[0] = i;
+ knn_query_utils::reheap(best_dist, best_idx, nsample);
+ }
+ }
+ knn_query_utils::heap_sort(best_dist, best_idx, nsample);
+ for(int i = 0; i < nsample; i++){
+ idx[i] = best_idx[i];
+ dist2[i] = best_dist[i];
+ }
+}
+
+
+void knn_query_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) {
+ // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample)
+ dim3 blocks(DIVUP(m, THREADS_PER_BLOCK));
+ dim3 threads(THREADS_PER_BLOCK);
+ knn_query_cuda_kernel<<>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2);
+}
diff --git a/Pointcept/libs/pointops/src/knn_query/knn_query_cuda_kernel.h b/Pointcept/libs/pointops/src/knn_query/knn_query_cuda_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..c07c1cb46a56b7a37d55e25fb78816e034a8387e
--- /dev/null
+++ b/Pointcept/libs/pointops/src/knn_query/knn_query_cuda_kernel.h
@@ -0,0 +1,18 @@
+#ifndef _KNN_QUERY_CUDA_KERNEL
+#define _KNN_QUERY_CUDA_KERNEL
+#include
+#include
+#include
+
+void knn_query_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void knn_query_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/Pointcept/libs/pointops/src/pointops_api.cpp b/Pointcept/libs/pointops/src/pointops_api.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5ca4377607eb181d48d458d700f1df876294a848
--- /dev/null
+++ b/Pointcept/libs/pointops/src/pointops_api.cpp
@@ -0,0 +1,32 @@
+#include
+#include
+
+#include "knn_query/knn_query_cuda_kernel.h"
+#include "ball_query/ball_query_cuda_kernel.h"
+#include "random_ball_query/random_ball_query_cuda_kernel.h"
+#include "sampling/sampling_cuda_kernel.h"
+#include "grouping/grouping_cuda_kernel.h"
+#include "interpolation/interpolation_cuda_kernel.h"
+#include "aggregation/aggregation_cuda_kernel.h"
+#include "subtraction/subtraction_cuda_kernel.h"
+#include "attention/attention_cuda_kernel.h"
+
+
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+ m.def("knn_query_cuda", &knn_query_cuda, "knn_query_cuda");
+ m.def("ball_query_cuda", &ball_query_cuda, "ball_query_cuda");
+ m.def("random_ball_query_cuda", &random_ball_query_cuda, "random_ball_query_cuda");
+ m.def("farthest_point_sampling_cuda", &farthest_point_sampling_cuda, "farthest_point_sampling_cuda");
+ m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda");
+ m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda");
+ m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda");
+ m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda");
+ m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda");
+ m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda");
+ m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda");
+ m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda");
+ m.def("attention_relation_step_forward_cuda", &attention_relation_step_forward_cuda, "attention_relation_step_forward_cuda");
+ m.def("attention_relation_step_backward_cuda", &attention_relation_step_backward_cuda, "attention_relation_step_backward_cuda");
+ m.def("attention_fusion_step_forward_cuda", &attention_fusion_step_forward_cuda, "attention_fusion_step_forward_cuda");
+ m.def("attention_fusion_step_backward_cuda", &attention_fusion_step_backward_cuda, "attention_fusion_step_backward_cuda");
+}
diff --git a/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda.cpp b/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c2618c94b6b19175f044131cebeefe8a23152c47
--- /dev/null
+++ b/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda.cpp
@@ -0,0 +1,21 @@
+#include
+#include
+#include
+#include "random_ball_query_cuda_kernel.h"
+
+
+void random_ball_query_cuda(int m, int nsample,
+ float min_radius, float max_radius, at::Tensor order_tensor,
+ at::Tensor xyz_tensor, at::Tensor new_xyz_tensor,
+ at::Tensor offset_tensor, at::Tensor new_offset_tensor,
+ at::Tensor idx_tensor, at::Tensor dist2_tensor)
+{
+ const int *order = order_tensor.data_ptr();
+ const float *xyz = xyz_tensor.data_ptr();
+ const float *new_xyz = new_xyz_tensor.data_ptr();
+ const int *offset = offset_tensor.data_ptr();
+ const int *new_offset = new_offset_tensor.data_ptr();
+ int *idx = idx_tensor.data_ptr();
+ float *dist2 = dist2_tensor.data_ptr();
+ random_ball_query_cuda_launcher(m, nsample, min_radius, max_radius, order, xyz, new_xyz, offset, new_offset, idx, dist2);
+}
diff --git a/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.cu b/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..bfafb0f8b731e201783c94144cad9de3e11228ad
--- /dev/null
+++ b/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.cu
@@ -0,0 +1,123 @@
+#include "../cuda_utils.h"
+#include "random_ball_query_cuda_kernel.h"
+
+
+namespace random_ball_query_utils{
+
+template
+__device__ void swap(DType *x, DType *y)
+{
+ DType tmp = *x;
+ *x = *y;
+ *y = tmp;
+}
+
+__device__ void reheap(float *dist, int *idx, int k)
+{
+ int root = 0;
+ int child = root * 2 + 1;
+ while (child < k)
+ {
+ if(child + 1 < k && dist[child+1] > dist[child])
+ child++;
+ if(dist[root] > dist[child])
+ return;
+ swap(&dist[root], &dist[child]);
+ swap(&idx[root], &idx[child]);
+ root = child;
+ child = root * 2 + 1;
+ }
+}
+
+
+__device__ void heap_sort(float *dist, int *idx, int k)
+{
+ int i;
+ for (i = k - 1; i > 0; i--)
+ {
+ swap(&dist[0], &dist[i]);
+ swap(&idx[0], &idx[i]);
+ reheap(dist, idx, i);
+ }
+}
+
+__device__ int get_bt_idx(int idx, const int *offset)
+{
+ int i = 0;
+ while (1)
+ {
+ if (idx < offset[i])
+ break;
+ else
+ i++;
+ }
+ return i;
+}
+} // namespace ball_query_utils
+
+__global__ void random_ball_query_cuda_kernel(int m, int nsample,
+ float min_radius, float max_radius, const int *__restrict__ order,
+ const float *__restrict__ xyz, const float *__restrict__ new_xyz,
+ const int *__restrict__ offset, const int *__restrict__ new_offset,
+ int *__restrict__ idx, float *__restrict__ dist2) {
+ // input: xyz (n, 3) new_xyz (m, 3)
+ // output: idx (m, nsample) dist (m, nsample)
+ int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (pt_idx >= m) return;
+
+ new_xyz += pt_idx * 3;
+ idx += pt_idx * nsample;
+ dist2 += pt_idx * nsample;
+
+ int bt_idx = random_ball_query_utils::get_bt_idx(pt_idx, new_offset);
+ int start;
+ if (bt_idx == 0)
+ start = 0;
+ else
+ start = offset[bt_idx - 1];
+ int end = offset[bt_idx];
+
+ float max_radius2 = max_radius * max_radius;
+ float min_radius2 = min_radius * min_radius;
+ float new_x = new_xyz[0];
+ float new_y = new_xyz[1];
+ float new_z = new_xyz[2];
+
+ int cnt = 0;
+
+ for(int i = start; i < end; i++){
+ float x = xyz[order[i] * 3 + 0];
+ float y = xyz[order[i] * 3 + 1];
+ float z = xyz[order[i] * 3 + 2];
+ float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
+
+ if (d2 <= 1e-5 || (d2 >= min_radius2 && d2 < max_radius2)){
+ dist2[cnt] = d2;
+ idx[cnt] = order[i];
+ cnt += 1;
+ if (cnt >= nsample) break;
+ }
+ }
+
+ if (cnt < nsample) {
+ for (int i = cnt; i < nsample; i++){
+ idx[i] = -1;
+ dist2[i] = 1e10;
+ }
+ }
+}
+
+void random_ball_query_cuda_launcher(int m, int nsample,
+ float min_radius, float max_radius, const int *order,
+ const float *xyz, const float *new_xyz,
+ const int *offset, const int *new_offset,
+ int *idx, float *dist2) {
+ // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample)
+ dim3 blocks(DIVUP(m, THREADS_PER_BLOCK));
+ dim3 threads(THREADS_PER_BLOCK);
+ random_ball_query_cuda_kernel<<>>(m, nsample,
+ min_radius, max_radius, order,
+ xyz, new_xyz,
+ offset, new_offset,
+ idx, dist2);
+}
diff --git a/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.h b/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..d3e35be21933d95b50e9c42150067071502bbc1e
--- /dev/null
+++ b/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.h
@@ -0,0 +1,26 @@
+#ifndef _RANDOM_BALL_QUERY_CUDA_KERNEL
+#define _RANDOM_BALL_QUERY_CUDA_KERNEL
+#include
+#include
+#include
+
+void random_ball_query_cuda(int m, int nsample,
+ float min_radius, float max_radius, at::Tensor order_tensor,
+ at::Tensor xyz_tensor, at::Tensor new_xyz_tensor,
+ at::Tensor offset_tensor, at::Tensor new_offset_tensor,
+ at::Tensor idx_tensor, at::Tensor dist2_tensor);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void random_ball_query_cuda_launcher(int m, int nsample,
+ float min_radius, float max_radius, const int *order,
+ const float *xyz, const float *new_xyz,
+ const int *offset, const int *new_offset,
+ int *idx, float *dist2);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/Pointcept/libs/pointops/src/sampling/sampling_cuda.cpp b/Pointcept/libs/pointops/src/sampling/sampling_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7dc8094c3343f874457fd23d1506b25fd006fd0b
--- /dev/null
+++ b/Pointcept/libs/pointops/src/sampling/sampling_cuda.cpp
@@ -0,0 +1,15 @@
+#include
+#include
+#include
+#include "sampling_cuda_kernel.h"
+
+
+void farthest_point_sampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor)
+{
+ const float *xyz = xyz_tensor.data_ptr();
+ const int *offset = offset_tensor.data_ptr();
+ const int *new_offset = new_offset_tensor.data_ptr();
+ float *tmp = tmp_tensor.data_ptr();
+ int *idx = idx_tensor.data_ptr();
+ farthest_point_sampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx);
+}
diff --git a/Pointcept/libs/pointops/src/sampling/sampling_cuda_kernel.cu b/Pointcept/libs/pointops/src/sampling/sampling_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..9a8676876672f68cd94913a0500d64813133b387
--- /dev/null
+++ b/Pointcept/libs/pointops/src/sampling/sampling_cuda_kernel.cu
@@ -0,0 +1,171 @@
+#include "../cuda_utils.h"
+#include "sampling_cuda_kernel.h"
+
+
+__device__ void __update(float *dists, int *dists_i, int idx1, int idx2) {
+ const float v1 = dists[idx1], v2 = dists[idx2];
+ const int i1 = dists_i[idx1], i2 = dists_i[idx2];
+ dists[idx1] = max(v1, v2);
+ dists_i[idx1] = v2 > v1 ? i2 : i1;
+}
+
+// input xyz: (n, 3), tmp: (b, n_max)
+// ouput idx (m)
+template
+__global__ void farthest_point_sampling_cuda_kernel(const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx)
+{
+ __shared__ float dists[block_size];
+ __shared__ int dists_i[block_size];
+
+ int bid = blockIdx.x;
+ int start_n, end_n, start_m, end_m, old;
+ if (bid == 0) {
+ start_n = 0;
+ end_n = offset[0];
+ start_m = 0;
+ end_m = new_offset[0];
+ old = 0;
+ }
+ else {
+ start_n = offset[bid - 1];
+ end_n = offset[bid];
+ start_m = new_offset[bid - 1];
+ end_m = new_offset[bid];
+ old = offset[bid - 1];
+ }
+
+ const int stride = block_size;
+ int tid = threadIdx.x;
+ if (tid == 0) idx[start_m] = start_n;
+
+ __syncthreads();
+ for (int j = start_m + 1; j < end_m; j++)
+ {
+ int besti = start_n;
+ float best = -1;
+ float x1 = xyz[old * 3 + 0];
+ float y1 = xyz[old * 3 + 1];
+ float z1 = xyz[old * 3 + 2];
+ for (int k = start_n + tid; k < end_n; k += stride)
+ {
+ float x2 = xyz[k * 3 + 0];
+ float y2 = xyz[k * 3 + 1];
+ float z2 = xyz[k * 3 + 2];
+ float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
+ float d2 = min(d, tmp[k]);
+ tmp[k] = d2;
+ besti = d2 > best ? k : besti;
+ best = d2 > best ? d2 : best;
+ }
+ dists[tid] = best;
+ dists_i[tid] = besti;
+ __syncthreads();
+
+ if (block_size >= 1024) {
+ if (tid < 512) {
+ __update(dists, dists_i, tid, tid + 512);
+ }
+ __syncthreads();
+ }
+ if (block_size >= 512) {
+ if (tid < 256) {
+ __update(dists, dists_i, tid, tid + 256);
+ }
+ __syncthreads();
+ }
+ if (block_size >= 256) {
+ if (tid < 128) {
+ __update(dists, dists_i, tid, tid + 128);
+ }
+ __syncthreads();
+ }
+ if (block_size >= 128) {
+ if (tid < 64) {
+ __update(dists, dists_i, tid, tid + 64);
+ }
+ __syncthreads();
+ }
+ if (block_size >= 64) {
+ if (tid < 32) {
+ __update(dists, dists_i, tid, tid + 32);
+ }
+ __syncthreads();
+ }
+ if (block_size >= 32) {
+ if (tid < 16) {
+ __update(dists, dists_i, tid, tid + 16);
+ }
+ __syncthreads();
+ }
+ if (block_size >= 16) {
+ if (tid < 8) {
+ __update(dists, dists_i, tid, tid + 8);
+ }
+ __syncthreads();
+ }
+ if (block_size >= 8) {
+ if (tid < 4) {
+ __update(dists, dists_i, tid, tid + 4);
+ }
+ __syncthreads();
+ }
+ if (block_size >= 4) {
+ if (tid < 2) {
+ __update(dists, dists_i, tid, tid + 2);
+ }
+ __syncthreads();
+ }
+ if (block_size >= 2) {
+ if (tid < 1) {
+ __update(dists, dists_i, tid, tid + 1);
+ }
+ __syncthreads();
+ }
+
+ old = dists_i[0];
+ if (tid == 0)
+ idx[j] = old;
+ }
+}
+
+void farthest_point_sampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx)
+{
+ unsigned int n_threads = opt_n_threads(n);
+ switch (n_threads) {
+ case 1024:
+ farthest_point_sampling_cuda_kernel<1024><<>>(xyz, offset, new_offset, tmp, idx);
+ break;
+ case 512:
+ farthest_point_sampling_cuda_kernel<512><<>>(xyz, offset, new_offset, tmp, idx);
+ break;
+ case 256:
+ farthest_point_sampling_cuda_kernel<256><<>>(xyz, offset, new_offset, tmp, idx);
+ break;
+ case 128:
+ farthest_point_sampling_cuda_kernel<128><<>>(xyz, offset, new_offset, tmp, idx);
+ break;
+ case 64:
+ farthest_point_sampling_cuda_kernel<64><<>>(xyz, offset, new_offset, tmp, idx);
+ break;
+ case 32:
+ farthest_point_sampling_cuda_kernel<32><<>>(xyz, offset, new_offset, tmp, idx);
+ break;
+ case 16:
+ farthest_point_sampling_cuda_kernel<16><<>>(xyz, offset, new_offset, tmp, idx);
+ break;
+ case 8:
+ farthest_point_sampling_cuda_kernel<8><<>>(xyz, offset, new_offset, tmp, idx);
+ break;
+ case 4:
+ farthest_point_sampling_cuda_kernel<4><<>>(xyz, offset, new_offset, tmp, idx);
+ break;
+ case 2:
+ farthest_point_sampling_cuda_kernel<2><<>>(xyz, offset, new_offset, tmp, idx);
+ break;
+ case 1:
+ farthest_point_sampling_cuda_kernel<1><<>>(xyz, offset, new_offset, tmp, idx);
+ break;
+ default:
+ farthest_point_sampling_cuda_kernel<512><<>>(xyz, offset, new_offset, tmp, idx);
+ }
+}
diff --git a/Pointcept/libs/pointops/src/sampling/sampling_cuda_kernel.h b/Pointcept/libs/pointops/src/sampling/sampling_cuda_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..f0e07607394a10b2b70c29f7497589d5edb8aab3
--- /dev/null
+++ b/Pointcept/libs/pointops/src/sampling/sampling_cuda_kernel.h
@@ -0,0 +1,18 @@
+#ifndef _SAMPLING_CUDA_KERNEL
+#define _SAMPLING_CUDA_KERNEL
+#include
+#include
+#include
+
+void farthest_point_sampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void farthest_point_sampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/Pointcept/libs/pointops/src/subtraction/subtraction_cuda.cpp b/Pointcept/libs/pointops/src/subtraction/subtraction_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b343857a1671eafe5199089973e863e2ac5b618c
--- /dev/null
+++ b/Pointcept/libs/pointops/src/subtraction/subtraction_cuda.cpp
@@ -0,0 +1,23 @@
+#include
+#include
+#include
+#include "subtraction_cuda_kernel.h"
+
+
+void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
+{
+ const float *input1 = input1_tensor.data_ptr();
+ const float *input2 = input2_tensor.data_ptr();
+ const int *idx = idx_tensor.data_ptr();
+ float *output = output_tensor.data_ptr();
+ subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output);
+}
+
+void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor)
+{
+ const int *idx = idx_tensor.data_ptr();
+ const float *grad_output = grad_output_tensor.data_ptr();
+ float *grad_input1 = grad_input1_tensor.data_ptr();
+ float *grad_input2 = grad_input2_tensor.data_ptr();
+ subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
+}
diff --git a/Pointcept/libs/pointops/src/subtraction/subtraction_cuda_kernel.cu b/Pointcept/libs/pointops/src/subtraction/subtraction_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..9b8d4f752940d580ee2b49f1b2946a8d6386d11a
--- /dev/null
+++ b/Pointcept/libs/pointops/src/subtraction/subtraction_cuda_kernel.cu
@@ -0,0 +1,44 @@
+#include "../cuda_utils.h"
+#include "subtraction_cuda_kernel.h"
+
+
+__global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
+ // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ if (index >= n * nsample * c) return;
+ const int c_idx = index % c;
+ const int nsample_idx = (index / c) % nsample;
+ const int n_idx = index / nsample / c;
+ const int idx_idx = n_idx * nsample + nsample_idx;
+ const int input1_idx = n_idx * c + c_idx;
+ const int input2_idx = idx[idx_idx] * c + c_idx;
+ output[index] = input1[input1_idx] - input2[input2_idx];
+}
+
+__global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {
+ // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ if (index >= n * nsample * c) return;
+ const int c_idx = index % c;
+ const int nsample_idx = (index / c) % nsample;
+ const int n_idx = index / nsample / c;
+ const int idx_idx = n_idx * nsample + nsample_idx;
+ const int input1_idx = n_idx * c + c_idx;
+ const int input2_idx = idx[idx_idx] * c + c_idx;
+ atomicAdd(grad_input1 + input1_idx, grad_output[index]);
+ atomicAdd(grad_input2 + input2_idx, -grad_output[index]);
+}
+
+void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) {
+ // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c)
+ dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
+ dim3 threads(THREADS_PER_BLOCK);
+ subtraction_forward_cuda_kernel<<>>(n, nsample, c, input1, input2, idx, output);
+}
+
+void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) {
+ // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c)
+ dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK));
+ dim3 threads(THREADS_PER_BLOCK);
+ subtraction_backward_cuda_kernel<<>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2);
+}
diff --git a/Pointcept/libs/pointops/src/subtraction/subtraction_cuda_kernel.h b/Pointcept/libs/pointops/src/subtraction/subtraction_cuda_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..856133d97bdd3dc58f29c746ff240fc9d489c22e
--- /dev/null
+++ b/Pointcept/libs/pointops/src/subtraction/subtraction_cuda_kernel.h
@@ -0,0 +1,20 @@
+#ifndef _SUBTRACTION_CUDA_KERNEL
+#define _SUBTRACTION_CUDA_KERNEL
+#include
+#include
+#include
+
+void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
+void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output);
+void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/Pointcept/libs/pointops2/__init__.py b/Pointcept/libs/pointops2/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/Pointcept/libs/pointops2/functions/__init__.py b/Pointcept/libs/pointops2/functions/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..25a2367da463d2f32b923166b48396d7292ad1f2
--- /dev/null
+++ b/Pointcept/libs/pointops2/functions/__init__.py
@@ -0,0 +1 @@
+from pointops2 import *
diff --git a/Pointcept/libs/pointops2/functions/pointops.py b/Pointcept/libs/pointops2/functions/pointops.py
new file mode 100644
index 0000000000000000000000000000000000000000..efda900b3e702c4e5f8576baad5f4168cb756ee9
--- /dev/null
+++ b/Pointcept/libs/pointops2/functions/pointops.py
@@ -0,0 +1,1193 @@
+"""
+The part of attention operations is written by Xin Lai.
+Email: xinlai@cse.cuhk.edu.hk
+"""
+
+from typing import Tuple
+
+import torch
+from torch.autograd import Function
+import torch.nn as nn
+
+import pointops2_cuda as pointops_cuda
+import time
+
+
+class FurthestSampling(Function):
+ @staticmethod
+ def forward(ctx, xyz, offset, new_offset):
+ """
+ input: xyz: (n, 3), offset: (b), new_offset: (b)
+ output: idx: (m)
+ """
+ assert xyz.is_contiguous()
+ n, b, n_max = xyz.shape[0], offset.shape[0], offset[0]
+ for i in range(1, b):
+ n_max = max(offset[i] - offset[i - 1], n_max)
+ idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_()
+ tmp = torch.cuda.FloatTensor(n).fill_(1e10)
+ pointops_cuda.furthestsampling_cuda(b, n_max, xyz, offset, new_offset, tmp, idx)
+ del tmp
+ return idx
+
+
+furthestsampling = FurthestSampling.apply
+
+
+class KNNQuery(Function):
+ @staticmethod
+ def forward(ctx, nsample, xyz, new_xyz, offset, new_offset):
+ """
+ input: xyz: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
+ output: idx: (m, nsample), dist2: (m, nsample)
+ """
+ if new_xyz is None:
+ new_xyz = xyz
+ assert xyz.is_contiguous() and new_xyz.is_contiguous()
+ m = new_xyz.shape[0]
+ idx = torch.cuda.IntTensor(m, nsample).zero_()
+ dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
+ pointops_cuda.knnquery_cuda(
+ m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2
+ )
+ return idx, torch.sqrt(dist2)
+
+
+knnquery = KNNQuery.apply
+
+
+class Grouping(Function):
+ @staticmethod
+ def forward(ctx, input, idx):
+ """
+ input: input: (n, c), idx : (m, nsample)
+ output: (m, nsample, c)
+ """
+ assert input.is_contiguous() and idx.is_contiguous()
+ m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1]
+ output = torch.cuda.FloatTensor(m, nsample, c)
+ pointops_cuda.grouping_forward_cuda(m, nsample, c, input, idx, output)
+ ctx.n = n
+ ctx.save_for_backward(idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_out: (m, c, nsample)
+ output: (n, c), None
+ """
+ n = ctx.n
+ (idx,) = ctx.saved_tensors
+ m, nsample, c = grad_output.shape
+ grad_input = torch.cuda.FloatTensor(n, c).zero_()
+ pointops_cuda.grouping_backward_cuda(
+ m, nsample, c, grad_output, idx, grad_input
+ )
+ return grad_input, None
+
+
+grouping = Grouping.apply
+
+
+class AttentionStep1(Function):
+ @staticmethod
+ def forward(ctx, q, k, index0, index1):
+ """
+ input: q: (N, h, C//h), k: (N, h, C//h), index0: (M), index1: (M)
+ output: output: [N, h, C//h]
+ """
+ assert (
+ q.is_contiguous()
+ and k.is_contiguous()
+ and index0.is_contiguous()
+ and index1.is_contiguous()
+ )
+
+ N_q, h, C_div_h = q.shape
+ N_k = k.shape[0]
+ M = index0.shape[0]
+ C = int(C_div_h * h)
+
+ output = torch.cuda.FloatTensor(M, h).zero_()
+ pointops_cuda.attention_step1_forward_cuda(
+ N_k, M, h, C, q, k, index0, index1, output
+ )
+ ctx.N_q = N_q
+ ctx.N_k = N_k
+ ctx.C = C
+ ctx.save_for_backward(q, k, index0, index1)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_output: (N, h, C//h)
+ output: (M, h), (N, h, C//h), None, None
+ """
+
+ N_q = ctx.N_q
+ N_k = ctx.N_k
+ C = ctx.C
+ q, k, index0, index1 = ctx.saved_tensors
+ M, h = grad_output.shape
+
+ grad_output = grad_output.contiguous()
+ # print("grad_output.is_contiguous(): ", grad_output.is_contiguous())
+ assert (
+ q.is_contiguous()
+ and k.is_contiguous()
+ and index0.is_contiguous()
+ and index1.is_contiguous()
+ and grad_output.is_contiguous()
+ )
+
+ # print("back: attn[:5,:5]: ", attn[:5, :5])
+
+ # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape))
+
+ grad_q = torch.cuda.FloatTensor(N_q, h, C // h).zero_()
+ grad_k = torch.cuda.FloatTensor(N_k, h, C // h).zero_()
+
+ # torch.cuda.synchronize()
+ # start = time.time()
+
+ pointops_cuda.attention_step1_backward_cuda(
+ N_q, M, h, C, grad_output, index0, index1, q, k, grad_q, grad_k
+ )
+
+ # torch.cuda.synchronize()
+ # end = time.time()
+ # print("time v7: {}".format(end - start))
+ # # input()
+
+ return grad_q, grad_k, None, None
+
+
+attention_step1 = AttentionStep1.apply
+
+
+class AttentionStep1_v2(Function):
+ @staticmethod
+ def forward(ctx, q, k, index1, index0_offsets, n_max):
+ """
+ input: q: (N, h, C//h), k: (N, h, C//h), index0: (M), index1: (M)
+ output: output: [N, h, C//h]
+ """
+ assert (
+ q.is_contiguous()
+ and k.is_contiguous()
+ and index0_offsets.is_contiguous()
+ and index1.is_contiguous()
+ )
+ assert n_max <= 1024
+
+ N_q, h, C_div_h = q.shape
+ N_k = k.shape[0]
+ M = index1.shape[0]
+ C = int(C_div_h * h)
+
+ output = torch.cuda.FloatTensor(M, h).zero_()
+ pointops_cuda.attention_step1_forward_cuda_v2(
+ N_k, M, h, C, n_max, q, k, index0_offsets, index1, output
+ )
+ ctx.N_q = N_q
+ ctx.N_k = N_k
+ ctx.C = C
+ ctx.n_max = n_max
+ ctx.save_for_backward(q, k, index0_offsets, index1)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_output: (N, h, C//h)
+ output: (M, h), (N, h, C//h), None, None
+ """
+
+ N_q = ctx.N_q
+ N_k = ctx.N_k
+ C = ctx.C
+ n_max = ctx.n_max
+ q, k, index0_offsets, index1 = ctx.saved_tensors
+ M, h = grad_output.shape
+
+ grad_output = grad_output.contiguous()
+ # print("grad_output.is_contiguous(): ", grad_output.is_contiguous())
+ assert (
+ q.is_contiguous()
+ and k.is_contiguous()
+ and index0_offsets.is_contiguous()
+ and index1.is_contiguous()
+ and grad_output.is_contiguous()
+ )
+
+ # print("back: attn[:5,:5]: ", attn[:5, :5])
+
+ # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape))
+
+ grad_q = torch.cuda.FloatTensor(N_q, h, C // h).zero_()
+ grad_k = torch.cuda.FloatTensor(N_k, h, C // h).zero_()
+
+ # torch.cuda.synchronize()
+ # start = time.time()
+
+ pointops_cuda.attention_step1_backward_cuda_v2(
+ N_q,
+ M,
+ h,
+ C,
+ n_max,
+ grad_output,
+ index0_offsets,
+ index1,
+ q,
+ k,
+ grad_q,
+ grad_k,
+ )
+
+ # torch.cuda.synchronize()
+ # end = time.time()
+ # print("time v7: {}".format(end - start))
+ # # input()
+
+ return grad_q, grad_k, None, None, None
+
+
+attention_step1_v2 = AttentionStep1_v2.apply
+
+
+class AttentionStep2(Function):
+ @staticmethod
+ def forward(ctx, attn, v, index0, index1):
+ """
+ input: attn: (M, h), v: (N, h, C//h), index0: (M), index1: (M)
+ output: output: [N, h, C//h]
+ """
+ assert (
+ attn.is_contiguous()
+ and v.is_contiguous()
+ and index0.is_contiguous()
+ and index1.is_contiguous()
+ )
+
+ M, h = attn.shape
+ N_q = index0.max().item() + 1
+ N_v, h, C_div_h = v.shape
+ C = int(C_div_h * h)
+
+ output = torch.cuda.FloatTensor(N_q, h, C // h).zero_()
+ pointops_cuda.attention_step2_forward_cuda(
+ N_q, M, h, C, attn, v, index0, index1, output
+ )
+ ctx.M = M
+
+ # print("attn[:5,:5]: ", attn[:5, :5])
+
+ ctx.save_for_backward(attn, v, index0, index1)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_output: (N, h, C//h)
+ output: (M, h), (N, h, C//h), None, None
+ """
+ M = ctx.M
+ attn, v, index0, index1 = ctx.saved_tensors
+ N_v = v.shape[0]
+ N_q, h, C_div_h = grad_output.shape
+ C = h * C_div_h
+
+ grad_output = grad_output.contiguous()
+ # print("grad_output.is_contiguous(): ", grad_output.is_contiguous())
+ assert (
+ attn.is_contiguous()
+ and v.is_contiguous()
+ and index0.is_contiguous()
+ and index1.is_contiguous()
+ and grad_output.is_contiguous()
+ )
+
+ # print("back: attn[:5,:5]: ", attn[:5, :5])
+
+ # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape))
+
+ grad_attn = torch.cuda.FloatTensor(M, h).zero_()
+ grad_v = torch.cuda.FloatTensor(N_v, h, C // h).zero_()
+
+ # torch.cuda.synchronize()
+ # start = time.time()
+
+ pointops_cuda.attention_step2_backward_cuda(
+ N_q, M, h, C, grad_output, index0, index1, attn, v, grad_attn, grad_v
+ )
+
+ # torch.cuda.synchronize()
+ # end = time.time()
+ # print("time v8: {}".format(end - start))
+ # # input()
+
+ return grad_attn, grad_v, None, None
+
+
+attention_step2 = AttentionStep2.apply
+
+
+class AttentionStep2_v2(Function):
+ @staticmethod
+ def forward(ctx, attn, v, index0, index1):
+ """
+ input: attn: (M, h), v: (N, h, C//h), index0: (M), index1: (M)
+ output: output: [L, h, C//h]
+ """
+ assert (
+ attn.is_contiguous()
+ and v.is_contiguous()
+ and index0.is_contiguous()
+ and index1.is_contiguous()
+ )
+
+ L = int(index0.max().item()) + 1
+
+ M, h = attn.shape
+ N, h, C_div_h = v.shape
+ C = int(C_div_h * h)
+
+ output = torch.cuda.FloatTensor(L, h, C // h).zero_()
+ pointops_cuda.attention_step2_forward_cuda(
+ N, M, h, C, attn, v, index0, index1, output
+ )
+ ctx.M = M
+
+ # print("attn[:5,:5]: ", attn[:5, :5])
+
+ ctx.save_for_backward(attn, v, index0, index1)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_output: (L, h, C//h)
+ output: (M, h), (N, h, C//h), None, None
+ """
+ M = ctx.M
+ attn, v, index0, index1 = ctx.saved_tensors
+ L, h, C_div_h = grad_output.shape
+ N = v.shape[0]
+ C = h * C_div_h
+
+ grad_output = grad_output.contiguous()
+ # print("grad_output.is_contiguous(): ", grad_output.is_contiguous())
+ assert (
+ attn.is_contiguous()
+ and v.is_contiguous()
+ and index0.is_contiguous()
+ and index1.is_contiguous()
+ and grad_output.is_contiguous()
+ )
+
+ # print("back: attn[:5,:5]: ", attn[:5, :5])
+
+ # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape))
+
+ grad_attn = torch.cuda.FloatTensor(M, h).zero_()
+ grad_v = torch.cuda.FloatTensor(N, h, C // h).zero_()
+
+ pointops_cuda.attention_step2_backward_cuda(
+ N, M, h, C, grad_output, index0, index1, attn, v, grad_attn, grad_v
+ )
+ return grad_attn, grad_v, None, None
+
+
+attention_step2_v2 = AttentionStep2_v2.apply
+
+
+class DotProdWithIdx(Function):
+ @staticmethod
+ def forward(ctx, q, index, table, rel_idx):
+ """
+ input: q: (N, h, hdim), index: (M), table: (L, h, hdim, 3), rel_idx: (M, 3)
+ output: output: [M, h]
+ """
+ assert (
+ q.is_contiguous()
+ and index.is_contiguous()
+ and table.is_contiguous()
+ and rel_idx.is_contiguous()
+ )
+
+ N, h, hdim = q.shape
+ M = index.shape[0]
+
+ output = torch.cuda.FloatTensor(M, h).zero_()
+ pointops_cuda.dot_prod_with_idx_forward_cuda(
+ N, M, h, hdim, q, index, table, rel_idx, output
+ )
+ ctx.save_for_backward(q, index, table, rel_idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_output: [M, h]
+ output: (N, h, hdim), None, (L, h, hdim, 3), None
+ """
+ q, index, table, rel_idx = ctx.saved_tensors
+ M, h = grad_output.shape
+ N, _, hdim = q.shape
+ L = table.shape[0]
+
+ grad_output = grad_output.contiguous()
+ assert (
+ q.is_contiguous()
+ and index.is_contiguous()
+ and table.is_contiguous()
+ and rel_idx.is_contiguous()
+ and grad_output.is_contiguous()
+ )
+
+ # print("back: attn[:5,:5]: ", attn[:5, :5])
+
+ # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape))
+
+ grad_q = torch.cuda.FloatTensor(N, h, hdim).zero_()
+ grad_table = torch.cuda.FloatTensor(L, h, hdim, 3).zero_()
+
+ # torch.cuda.synchronize()
+ # start = time.time()
+
+ pointops_cuda.dot_prod_with_idx_backward_cuda(
+ N, M, h, hdim, grad_output, q, index, table, rel_idx, grad_q, grad_table
+ )
+
+ # torch.cuda.synchronize()
+ # end = time.time()
+ # print("time v9: {}".format(end - start))
+ # # input()
+
+ return grad_q, None, grad_table, None
+
+
+dot_prod_with_idx = DotProdWithIdx.apply
+
+
+class DotProdWithIdx_v2(Function):
+ @staticmethod
+ def forward(ctx, q, index_q, k, index_k, table_q, table_k, rel_idx):
+ """
+ input: q: (N, h, hdim), index_q: (M), k: (N, h, hdim), index_k: (M), table_q: (L, h, hdim, 3), table_k: (L, h, hdim, 3), rel_idx: (M, 3)
+ output: output: [M, h]
+ """
+ assert (
+ q.is_contiguous()
+ and index_q.is_contiguous()
+ and k.is_contiguous()
+ and index_k.is_contiguous()
+ and table_q.is_contiguous()
+ and table_k.is_contiguous()
+ and rel_idx.is_contiguous()
+ )
+
+ N, h, hdim = q.shape
+ M = index_q.shape[0]
+ L = table_q.shape[0]
+ assert table_k.shape[0] == L and index_k.shape[0] == M
+
+ # obtain the mapping from block_idx to m_idx
+ rel_idx_merge = (
+ rel_idx[:, 0] + rel_idx[:, 1] * L + rel_idx[:, 2] * (L**2)
+ ) # [M, ]
+ sorted_values, sort_indices = torch.sort(rel_idx_merge)
+ _, counts = torch.unique_consecutive(sorted_values, return_counts=True)
+ rel_idx_offsets = torch.cumsum(counts, dim=-1) # [T,]
+ rel_idx_offsets = torch.cat(
+ [torch.zeros(1, dtype=torch.long).cuda(), rel_idx_offsets], 0
+ ) # [T+1,]
+ n_max = counts.max()
+ T = counts.shape[0]
+
+ # print("M: {}, L: {}, n_max: {}, T: {}".format(M, L, n_max, T))
+ # print("rel_idx_merge.shape: {}, sorted_values.shape: {}".format(rel_idx_merge.shape, sorted_values.shape))
+ # print("counts.shape: {}".format(counts.shape))
+
+ output = torch.cuda.FloatTensor(M, h).zero_()
+ # pointops_cuda.dot_prod_with_idx_forward_cuda(N, M, h, hdim, q, index, table, rel_idx, output)
+ pointops_cuda.dot_prod_with_idx_forward_cuda_v2(
+ N,
+ M,
+ h,
+ hdim,
+ n_max,
+ T,
+ q,
+ index_q,
+ k,
+ index_k,
+ table_q,
+ table_k,
+ rel_idx,
+ rel_idx_offsets.int(),
+ sort_indices.int(),
+ output,
+ )
+
+ ctx.n_max = n_max
+ ctx.T = T
+ ctx.save_for_backward(
+ q,
+ index_q,
+ k,
+ index_k,
+ table_q,
+ table_k,
+ rel_idx,
+ rel_idx_offsets,
+ sort_indices,
+ )
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_output: [M, h]
+ output: (N, h, hdim), None, (L, h, hdim, 3), None
+ """
+ (
+ q,
+ index_q,
+ k,
+ index_k,
+ table_q,
+ table_k,
+ rel_idx,
+ rel_idx_offsets,
+ sort_indices,
+ ) = ctx.saved_tensors
+ M, h = grad_output.shape
+ N, _, hdim = q.shape
+ L = table_q.shape[0]
+ T, n_max = ctx.T, ctx.n_max
+
+ grad_output = grad_output.contiguous()
+ assert (
+ q.is_contiguous()
+ and index_q.is_contiguous()
+ and k.is_contiguous()
+ and index_k.is_contiguous()
+ and table_q.is_contiguous()
+ and table_k.is_contiguous()
+ and rel_idx.is_contiguous()
+ and rel_idx_offsets.is_contiguous()
+ and sort_indices.is_contiguous()
+ and grad_output.is_contiguous()
+ )
+
+ # print("back: attn[:5,:5]: ", attn[:5, :5])
+
+ # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape))
+
+ grad_q = torch.cuda.FloatTensor(N, h, hdim).zero_()
+ grad_table_q = torch.cuda.FloatTensor(L, h, hdim, 3).zero_()
+ grad_k = torch.cuda.FloatTensor(N, h, hdim).zero_()
+ grad_table_k = torch.cuda.FloatTensor(L, h, hdim, 3).zero_()
+
+ # torch.cuda.synchronize()
+ # start = time.time()
+
+ pointops_cuda.dot_prod_with_idx_backward_cuda_v2(
+ N,
+ M,
+ h,
+ hdim,
+ n_max,
+ T,
+ grad_output,
+ q,
+ index_q,
+ k,
+ index_k,
+ table_q,
+ table_k,
+ rel_idx,
+ rel_idx_offsets.int(),
+ sort_indices.int(),
+ grad_q,
+ grad_k,
+ grad_table_q,
+ grad_table_k,
+ )
+
+ # torch.cuda.synchronize()
+ # end = time.time()
+ # print("time v9: {}".format(end - start))
+ # # input()
+ return grad_q, None, grad_k, None, grad_table_q, grad_table_k, None
+
+
+dot_prod_with_idx_v2 = DotProdWithIdx_v2.apply
+
+
+class DotProdWithIdx_v3(Function):
+ @staticmethod
+ def forward(ctx, q, index_q_offsets, n_max, k, index_k, table_q, table_k, rel_idx):
+ """
+ input: q: (N, h, hdim), index_q: (M), k: (N, h, hdim), index_k: (M), table_q: (L, h, hdim, 3), table_k: (L, h, hdim, 3), rel_idx: (M, 3)
+ output: output: [M, h]
+ """
+ assert (
+ q.is_contiguous()
+ and index_q_offsets.is_contiguous()
+ and k.is_contiguous()
+ and index_k.is_contiguous()
+ and table_q.is_contiguous()
+ and table_k.is_contiguous()
+ and rel_idx.is_contiguous()
+ )
+
+ N, h, hdim = q.shape
+ M = index_k.shape[0]
+ L = table_q.shape[0]
+ assert table_k.shape[0] == L
+
+ # # obtain the mapping from block_idx to m_idx
+ # rel_idx_merge = rel_idx[:, 0] + rel_idx[:, 1] * L + rel_idx[:, 2] * (L ** 2) #[M, ]
+ # sorted_values, sort_indices = torch.sort(rel_idx_merge)
+ # _, counts = torch.unique_consecutive(sorted_values, return_counts=True)
+ # rel_idx_offsets = torch.cumsum(counts, dim=-1) #[T,]
+ # rel_idx_offsets = torch.cat([torch.zeros(1, dtype=torch.long).cuda(), rel_idx_offsets], 0) #[T+1,]
+ # n_max = counts.max()
+ # T = counts.shape[0]
+
+ # print("M: {}, L: {}, n_max: {}, T: {}".format(M, L, n_max, T))
+ # print("rel_idx_merge.shape: {}, sorted_values.shape: {}".format(rel_idx_merge.shape, sorted_values.shape))
+ # print("counts.shape: {}".format(counts.shape))
+
+ # print("M: {}, L: {}, n_max: {}".format(M, L, n_max))
+
+ output = torch.cuda.FloatTensor(M, h).zero_()
+ # pointops_cuda.dot_prod_with_idx_forward_cuda(N, M, h, hdim, q, index, table, rel_idx, output)
+ pointops_cuda.dot_prod_with_idx_forward_cuda_v3(
+ N,
+ M,
+ h,
+ hdim,
+ n_max,
+ q,
+ index_q_offsets,
+ k,
+ index_k,
+ table_q,
+ table_k,
+ rel_idx,
+ output,
+ )
+
+ ctx.n_max = n_max
+ # ctx.T = T
+ ctx.save_for_backward(q, index_q_offsets, k, index_k, table_q, table_k, rel_idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_output: [M, h]
+ output: (N, h, hdim), None, (L, h, hdim, 3), None
+ """
+ q, index_q_offsets, k, index_k, table_q, table_k, rel_idx = ctx.saved_tensors
+ M, h = grad_output.shape
+ N, _, hdim = q.shape
+ L = table_q.shape[0]
+ n_max = ctx.n_max
+
+ grad_output = grad_output.contiguous()
+ assert (
+ q.is_contiguous()
+ and index_q_offsets.is_contiguous()
+ and k.is_contiguous()
+ and index_k.is_contiguous()
+ and table_q.is_contiguous()
+ and table_k.is_contiguous()
+ and rel_idx.is_contiguous()
+ and grad_output.is_contiguous()
+ )
+
+ # print("back: attn[:5,:5]: ", attn[:5, :5])
+
+ # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape))
+
+ grad_q = torch.cuda.FloatTensor(N, h, hdim).zero_()
+ grad_table_q = torch.cuda.FloatTensor(L, h, hdim, 3).zero_()
+ grad_k = torch.cuda.FloatTensor(N, h, hdim).zero_()
+ grad_table_k = torch.cuda.FloatTensor(L, h, hdim, 3).zero_()
+
+ # torch.cuda.synchronize()
+ # start = time.time()
+
+ pointops_cuda.dot_prod_with_idx_backward_cuda_v3(
+ N,
+ M,
+ h,
+ hdim,
+ n_max,
+ grad_output,
+ q,
+ index_q_offsets,
+ k,
+ index_k,
+ table_q,
+ table_k,
+ rel_idx,
+ grad_q,
+ grad_k,
+ grad_table_q,
+ grad_table_k,
+ )
+
+ # torch.cuda.synchronize()
+ # end = time.time()
+ # print("time v9: {}".format(end - start))
+ # # input()
+ return grad_q, None, None, grad_k, None, grad_table_q, grad_table_k, None
+
+
+dot_prod_with_idx_v3 = DotProdWithIdx_v3.apply
+
+
+class AttentionStep2WithRelPosValue(Function):
+ @staticmethod
+ def forward(ctx, attn, v, index0, index1, table, rel_idx):
+ """
+ input: attn: (M, h), v: (N, h, hdim), index0: (M), index1: (M), table: (L, h, hdim, 3), rel_idx: (M, 3)
+ output: output: [N, h, hdim]
+ """
+ assert (
+ attn.is_contiguous()
+ and v.is_contiguous()
+ and index0.is_contiguous()
+ and index1.is_contiguous()
+ and table.is_contiguous()
+ and rel_idx.is_contiguous()
+ )
+
+ M, h = attn.shape
+ N_v, h, hdim = v.shape
+ N_q = index0.max().item() + 1
+
+ output = torch.cuda.FloatTensor(N_q, h, hdim).zero_()
+ pointops_cuda.attention_step2_with_rel_pos_value_forward_cuda(
+ N_q, M, h, hdim, attn, v, index0, index1, table, rel_idx, output
+ )
+
+ # print("attn[:5,:5]: ", attn[:5, :5])
+
+ ctx.save_for_backward(attn, v, index0, index1, table, rel_idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_output: (N, h, C//h)
+ output: (M, h), (N, h, C//h), None, None, (L, h, hdim, 3), None
+ """
+ attn, v, index0, index1, table, rel_idx = ctx.saved_tensors
+ N_q, h, hdim = grad_output.shape
+ N_v = v.shape[0]
+ M = attn.shape[0]
+ L = table.shape[0]
+
+ grad_output = grad_output.contiguous()
+ # print("grad_output.is_contiguous(): ", grad_output.is_contiguous())
+ assert (
+ attn.is_contiguous()
+ and v.is_contiguous()
+ and index0.is_contiguous()
+ and index1.is_contiguous()
+ and grad_output.is_contiguous()
+ and table.is_contiguous()
+ and rel_idx.is_contiguous()
+ )
+
+ # print("back: attn[:5,:5]: ", attn[:5, :5])
+
+ # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape))
+
+ grad_attn = torch.cuda.FloatTensor(M, h).zero_()
+ grad_v = torch.cuda.FloatTensor(N_v, h, hdim).zero_()
+ grad_table = torch.cuda.FloatTensor(L, h, hdim, 3).zero_()
+
+ # print("attn.shape: {}, grad_attn.shape: {}".format(attn.shape, grad_attn.shape))
+ # print("v.shape: {}, grad_v.shape: {}".format(v.shape, grad_v.shape))
+ # print("table.shape: {}, grad_table.shape: {}".format(table.shape, grad_table.shape))
+
+ # torch.cuda.synchronize()
+ # start = time.time()
+
+ pointops_cuda.attention_step2_with_rel_pos_value_backward_cuda(
+ N_q,
+ M,
+ h,
+ hdim,
+ grad_output,
+ index0,
+ index1,
+ attn,
+ v,
+ table,
+ rel_idx,
+ grad_attn,
+ grad_v,
+ grad_table,
+ )
+
+ # torch.cuda.synchronize()
+ # end = time.time()
+ # print("time v10: {}".format(end - start))
+ # # input()
+ return grad_attn, grad_v, None, None, grad_table, None
+
+
+attention_step2_with_rel_pos_value = AttentionStep2WithRelPosValue.apply
+
+
+class AttentionStep2WithRelPosValue_v2(Function):
+ @staticmethod
+ def forward(ctx, attn, v, index0_offsets, n_max, index1, table, rel_idx):
+ """
+ input: attn: (M, h), v: (N, h, hdim), index0_offsets: (M), index1: (M), table: (L, h, hdim, 3), rel_idx: (M, 3)
+ output: output: [N, h, hdim]
+ """
+ assert (
+ attn.is_contiguous()
+ and v.is_contiguous()
+ and index0_offsets.is_contiguous()
+ and index1.is_contiguous()
+ and table.is_contiguous()
+ and rel_idx.is_contiguous()
+ )
+
+ M, h = attn.shape
+ N, h, hdim = v.shape
+ # N_q = int(index0_offsets.max().item()) + 1
+
+ output = torch.cuda.FloatTensor(N, h, hdim).zero_()
+ pointops_cuda.attention_step2_with_rel_pos_value_forward_cuda_v2(
+ N,
+ M,
+ h,
+ hdim,
+ n_max,
+ attn,
+ v,
+ index0_offsets,
+ index1,
+ table,
+ rel_idx,
+ output,
+ )
+
+ # print("attn[:5,:5]: ", attn[:5, :5])
+
+ ctx.n_max = n_max
+ ctx.save_for_backward(attn, v, index0_offsets, index1, table, rel_idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_output: (N, h, C//h)
+ output: (M, h), (N, h, C//h), None, None, (L, h, hdim, 3), None
+ """
+ n_max = ctx.n_max
+ attn, v, index0_offsets, index1, table, rel_idx = ctx.saved_tensors
+ N, h, hdim = grad_output.shape
+ N = v.shape[0]
+ M = attn.shape[0]
+ L = table.shape[0]
+
+ # grad_output = grad_output.contiguous()
+ # print("grad_output.is_contiguous(): ", grad_output.is_contiguous())
+ assert (
+ attn.is_contiguous()
+ and v.is_contiguous()
+ and index0_offsets.is_contiguous()
+ and index1.is_contiguous()
+ and grad_output.is_contiguous()
+ and table.is_contiguous()
+ and rel_idx.is_contiguous()
+ )
+
+ # print("back: attn[:5,:5]: ", attn[:5, :5])
+
+ # print("attn.shape: {} v.shape: {}, index0_offsets.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0_offsets.shape, index1.shape))
+
+ grad_attn = torch.cuda.FloatTensor(M, h).zero_()
+ grad_v = torch.cuda.FloatTensor(N, h, hdim).zero_()
+ grad_table = torch.cuda.FloatTensor(L, h, hdim, 3).zero_()
+
+ # print("attn.shape: {}, grad_attn.shape: {}".format(attn.shape, grad_attn.shape))
+ # print("v.shape: {}, grad_v.shape: {}".format(v.shape, grad_v.shape))
+ # print("table.shape: {}, grad_table.shape: {}".format(table.shape, grad_table.shape))
+
+ # torch.cuda.synchronize()
+ # start = time.time()
+
+ pointops_cuda.attention_step2_with_rel_pos_value_backward_cuda_v2(
+ N,
+ M,
+ h,
+ hdim,
+ n_max,
+ grad_output,
+ index0_offsets,
+ index1,
+ attn,
+ v,
+ table,
+ rel_idx,
+ grad_attn,
+ grad_v,
+ grad_table,
+ )
+
+ # torch.cuda.synchronize()
+ # end = time.time()
+ # print("time v10: {}".format(end - start))
+
+ return grad_attn, grad_v, None, None, None, grad_table, None
+
+
+attention_step2_with_rel_pos_value_v2 = AttentionStep2WithRelPosValue_v2.apply
+
+
+def queryandgroup(
+ nsample,
+ xyz,
+ new_xyz,
+ feat,
+ idx,
+ offset,
+ new_offset,
+ use_xyz=True,
+ return_indx=False,
+):
+ """
+ input: xyz: (n, 3), new_xyz: (m, 3), feat: (n, c), idx: (m, nsample), offset: (b), new_offset: (b)
+ output: new_feat: (m, c+3, nsample), grouped_idx: (m, nsample)
+ """
+ assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous()
+ if new_xyz is None:
+ new_xyz = xyz
+ if idx is None:
+ idx, _ = knnquery(nsample, xyz, new_xyz, offset, new_offset) # (m, nsample)
+
+ n, m, c = xyz.shape[0], new_xyz.shape[0], feat.shape[1]
+ grouped_xyz = xyz[idx.view(-1).long(), :].view(m, nsample, 3) # (m, nsample, 3)
+ # grouped_xyz = grouping(xyz, idx) # (m, nsample, 3)
+ # ็ธๅฏนไฝ็ฝฎ
+ grouped_xyz -= new_xyz.unsqueeze(1) # (m, nsample, 3)
+ grouped_feat = feat[idx.view(-1).long(), :].view(m, nsample, c) # (m, nsample, c)
+ # grouped_feat = grouping(feat, idx) # (m, nsample, c)
+ if use_xyz:
+ if return_indx:
+ return torch.cat((grouped_xyz, grouped_feat), -1), idx # (m, nsample, 3+c)
+ else:
+ return torch.cat((grouped_xyz, grouped_feat), -1)
+ else:
+ if return_indx:
+ return grouped_feat, idx
+ else:
+ return grouped_feat
+
+
+def Divide2Patch(nsample, xyz, offset, return_offset=False, anchor_scale=None):
+ # nsample: 16 xyz: (n, 3) offset: (b)
+ downsample_scale = anchor_scale or nsample
+ new_offset, count = [offset[0].item() // downsample_scale], offset[
+ 0
+ ].item() // downsample_scale
+ for i in range(1, offset.shape[0]):
+ count += (offset[i].item() - offset[i - 1].item()) // downsample_scale
+ new_offset.append(count)
+ # print("donw sample scale:", downsample_scale,"offset:", offset, "newoffset:", new_offset)
+ new_offset = torch.cuda.IntTensor(new_offset)
+ idx = furthestsampling(xyz, offset, new_offset) # (m)
+ new_xyz = xyz[idx.long()]
+ p_idx, _ = knnquery(nsample, xyz, new_xyz, offset, new_offset) # (m, nsample)
+ if return_offset:
+ return p_idx, new_offset
+ else:
+ return p_idx
+
+
+class Subtraction(Function):
+ @staticmethod
+ def forward(ctx, input1, input2, idx):
+ """
+ input: input1: (n, c), input2: (n, c), idx: (n, nsample)
+ output: (n, nsample, c)
+ """
+ assert input1.is_contiguous() and input2.is_contiguous()
+ n, c = input1.shape
+ nsample = idx.shape[-1]
+ output = torch.cuda.FloatTensor(n, nsample, c).zero_()
+ pointops_cuda.subtraction_forward_cuda(
+ n, nsample, c, input1, input2, idx, output
+ )
+ ctx.save_for_backward(idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_out: (n, nsample, c)
+ output: grad_input1: (n, c), grad_input2: (n, c)
+ """
+ (idx,) = ctx.saved_tensors
+ n, nsample, c = grad_output.shape
+ grad_input1 = torch.cuda.FloatTensor(n, c).zero_()
+ grad_input2 = torch.cuda.FloatTensor(n, c).zero_()
+ pointops_cuda.subtraction_backward_cuda(
+ n, nsample, c, idx, grad_output, grad_input1, grad_input2
+ )
+ return grad_input1, grad_input2, None
+
+
+subtraction = Subtraction.apply
+
+
+class Aggregation(Function):
+ @staticmethod
+ def forward(ctx, input, position, weight, idx):
+ """
+ input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample)
+ output: (n, c)
+ """
+ assert (
+ input.is_contiguous()
+ and position.is_contiguous()
+ and weight.is_contiguous()
+ )
+ n, nsample, c = position.shape
+ w_c = weight.shape[-1]
+ output = torch.cuda.FloatTensor(n, c).zero_()
+ pointops_cuda.aggregation_forward_cuda(
+ n, nsample, c, w_c, input, position, weight, idx, output
+ )
+ ctx.save_for_backward(input, position, weight, idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_out: (n, c)
+ output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c')
+ """
+ input, position, weight, idx = ctx.saved_tensors
+ n, nsample, c = position.shape
+ w_c = weight.shape[-1]
+ grad_input = torch.cuda.FloatTensor(n, c).zero_()
+ grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_()
+ grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_()
+ pointops_cuda.aggregation_backward_cuda(
+ n,
+ nsample,
+ c,
+ w_c,
+ input,
+ position,
+ weight,
+ idx,
+ grad_output,
+ grad_input,
+ grad_position,
+ grad_weight,
+ )
+ return grad_input, grad_position, grad_weight, None
+
+
+aggregation = Aggregation.apply
+
+
+def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3):
+ """
+ input: xyz: (m, 3), new_xyz: (n, 3), feat: (m, c), offset: (b), new_offset: (b)
+ output: (n, c)
+ """
+ assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous()
+ idx, dist = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, 3), (n, 3)
+ dist_recip = 1.0 / (dist + 1e-8) # (n, 3)
+ norm = torch.sum(dist_recip, dim=1, keepdim=True)
+ weight = dist_recip / norm # (n, 3)
+
+ new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_()
+ for i in range(k):
+ new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1)
+ return new_feat
+
+
+def interpolation_v2(xyz, new_xyz, feat, offset, new_offset, k=3):
+ """
+ input: xyz: (m, 3), new_xyz: (n, 3), feat: (m, c), offset: (b), new_offset: (b)
+ output: (n, c)
+ """
+ assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous()
+
+ idx, _ = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, 3), (n, 3)
+
+ # print("e3: idx.shape: {}, idx[:5]: {}".format(idx.shape, idx[:5]))
+
+ dist = torch.sqrt(((new_xyz.unsqueeze(1) - xyz[idx.long()]) ** 2).sum(-1) + 1e-8)
+
+ # print("e4: dist.shape: {}, dist[:5]: {}".format(dist.shape, dist[:5]))
+ # print("((_-dist)**2).max(): ", ((_-dist)**2).max())
+ # input()
+
+ dist_recip = 1.0 / (dist + 1e-8) # (n, 3)
+ norm = torch.sum(dist_recip, dim=1, keepdim=True)
+ weight = dist_recip / norm # (n, 3)
+
+ new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_()
+ for i in range(k):
+ new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1)
+ return new_feat
+
+
+class Interpolation(Function):
+ @staticmethod
+ def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3):
+ """
+ input: xyz: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b)
+ output: (n, c)
+ """
+ assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous()
+ idx, dist = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, k), (n, k)
+ dist_recip = 1.0 / (dist + 1e-8) # (n, k)
+ norm = torch.sum(dist_recip, dim=1, keepdim=True)
+ weight = dist_recip / norm # (n, k)
+
+ n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0]
+ output = torch.cuda.FloatTensor(n, c).zero_()
+ pointops_cuda.interpolation_forward_cuda(n, c, k, input, idx, weight, output)
+ ctx.m, ctx.k = m, k
+ ctx.save_for_backward(idx, weight)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: xyz: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b)
+ output: (n, c)
+ """
+ m, k = ctx.m, ctx.k
+ idx, weight = ctx.saved_tensors
+ n, c = grad_output.shape
+ grad_input = torch.cuda.FloatTensor(m, c).zero_()
+ pointops_cuda.interpolation_backward_cuda(
+ n, c, k, grad_output, idx, weight, grad_input
+ )
+ return None, None, grad_input, None, None, None
+
+
+interpolation2 = Interpolation.apply
diff --git a/Pointcept/libs/pointops2/functions/pointops2.py b/Pointcept/libs/pointops2/functions/pointops2.py
new file mode 100644
index 0000000000000000000000000000000000000000..e019eca4235e014421f0df3097c93bcec2d3a3d2
--- /dev/null
+++ b/Pointcept/libs/pointops2/functions/pointops2.py
@@ -0,0 +1,253 @@
+from typing import Tuple
+
+import torch
+from torch.autograd import Function
+import torch.nn as nn
+
+import pointops2_cuda as pointops_cuda
+
+
+class FurthestSampling(Function):
+ @staticmethod
+ def forward(ctx, xyz, offset, new_offset):
+ """
+ input: xyz: (n, 3), offset: (b), new_offset: (b)
+ output: idx: (m)
+ """
+ assert xyz.is_contiguous()
+ n, b, n_max = xyz.shape[0], offset.shape[0], offset[0]
+ for i in range(1, b):
+ n_max = max(offset[i] - offset[i - 1], n_max)
+ idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_()
+ tmp = torch.cuda.FloatTensor(n).fill_(1e10)
+ pointops_cuda.furthestsampling_cuda(b, n_max, xyz, offset, new_offset, tmp, idx)
+ del tmp
+ return idx
+
+
+furthestsampling = FurthestSampling.apply
+
+
+class KNNQuery(Function):
+ @staticmethod
+ def forward(ctx, nsample, xyz, new_xyz, offset, new_offset):
+ """
+ input: xyz: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
+ output: idx: (m, nsample), dist2: (m, nsample)
+ """
+ if new_xyz is None:
+ new_xyz = xyz
+ assert xyz.is_contiguous() and new_xyz.is_contiguous()
+ m = new_xyz.shape[0]
+ idx = torch.cuda.IntTensor(m, nsample).zero_()
+ dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
+ pointops_cuda.knnquery_cuda(
+ m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2
+ )
+ return idx, torch.sqrt(dist2)
+
+
+knnquery = KNNQuery.apply
+
+
+class Grouping(Function):
+ @staticmethod
+ def forward(ctx, input, idx):
+ """
+ input: input: (n, c), idx : (m, nsample)
+ output: (m, nsample, c)
+ """
+ assert input.is_contiguous() and idx.is_contiguous()
+ m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1]
+ output = torch.cuda.FloatTensor(m, nsample, c)
+ pointops_cuda.grouping_forward_cuda(m, nsample, c, input, idx, output)
+ ctx.n = n
+ ctx.save_for_backward(idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_out: (m, c, nsample)
+ output: (n, c), None
+ """
+ n = ctx.n
+ (idx,) = ctx.saved_tensors
+ m, nsample, c = grad_output.shape
+ grad_input = torch.cuda.FloatTensor(n, c).zero_()
+ pointops_cuda.grouping_backward_cuda(
+ m, nsample, c, grad_output, idx, grad_input
+ )
+ return grad_input, None
+
+
+grouping = Grouping.apply
+
+
+def queryandgroup(nsample, xyz, new_xyz, feat, idx, offset, new_offset, use_xyz=True):
+ """
+ input: xyz: (n, 3), new_xyz: (m, 3), feat: (n, c), idx: (m, nsample), offset: (b), new_offset: (b)
+ output: new_feat: (m, c+3, nsample), grouped_idx: (m, nsample)
+ """
+ assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous()
+ if new_xyz is None:
+ new_xyz = xyz
+ if idx is None:
+ idx, _ = knnquery(nsample, xyz, new_xyz, offset, new_offset) # (m, nsample)
+
+ n, m, c = xyz.shape[0], new_xyz.shape[0], feat.shape[1]
+ grouped_xyz = xyz[idx.view(-1).long(), :].view(m, nsample, 3) # (m, nsample, 3)
+ # grouped_xyz = grouping(xyz, idx) # (m, nsample, 3)
+ grouped_xyz -= new_xyz.unsqueeze(1) # (m, nsample, 3)
+ grouped_feat = feat[idx.view(-1).long(), :].view(m, nsample, c) # (m, nsample, c)
+ # grouped_feat = grouping(feat, idx) # (m, nsample, c)
+
+ if use_xyz:
+ return torch.cat((grouped_xyz, grouped_feat), -1) # (m, nsample, 3+c)
+ else:
+ return grouped_feat
+
+
+class Subtraction(Function):
+ @staticmethod
+ def forward(ctx, input1, input2, idx):
+ """
+ input: input1: (n, c), input2: (n, c), idx: (n, nsample)
+ output: (n, nsample, c)
+ """
+ assert input1.is_contiguous() and input2.is_contiguous()
+ n, c = input1.shape
+ nsample = idx.shape[-1]
+ output = torch.cuda.FloatTensor(n, nsample, c).zero_()
+ pointops_cuda.subtraction_forward_cuda(
+ n, nsample, c, input1, input2, idx, output
+ )
+ ctx.save_for_backward(idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_out: (n, nsample, c)
+ output: grad_input1: (n, c), grad_input2: (n, c)
+ """
+ (idx,) = ctx.saved_tensors
+ n, nsample, c = grad_output.shape
+ grad_input1 = torch.cuda.FloatTensor(n, c).zero_()
+ grad_input2 = torch.cuda.FloatTensor(n, c).zero_()
+ pointops_cuda.subtraction_backward_cuda(
+ n, nsample, c, idx, grad_output, grad_input1, grad_input2
+ )
+ return grad_input1, grad_input2, None
+
+
+subtraction = Subtraction.apply
+
+
+class Aggregation(Function):
+ @staticmethod
+ def forward(ctx, input, position, weight, idx):
+ """
+ input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample)
+ output: (n, c)
+ """
+ assert (
+ input.is_contiguous()
+ and position.is_contiguous()
+ and weight.is_contiguous()
+ )
+ n, nsample, c = position.shape
+ w_c = weight.shape[-1]
+ output = torch.cuda.FloatTensor(n, c).zero_()
+ pointops_cuda.aggregation_forward_cuda(
+ n, nsample, c, w_c, input, position, weight, idx, output
+ )
+ ctx.save_for_backward(input, position, weight, idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_out: (n, c)
+ output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c')
+ """
+ input, position, weight, idx = ctx.saved_tensors
+ n, nsample, c = position.shape
+ w_c = weight.shape[-1]
+ grad_input = torch.cuda.FloatTensor(n, c).zero_()
+ grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_()
+ grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_()
+ pointops_cuda.aggregation_backward_cuda(
+ n,
+ nsample,
+ c,
+ w_c,
+ input,
+ position,
+ weight,
+ idx,
+ grad_output,
+ grad_input,
+ grad_position,
+ grad_weight,
+ )
+ return grad_input, grad_position, grad_weight, None
+
+
+aggregation = Aggregation.apply
+
+
+def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3):
+ """
+ input: xyz: (m, 3), new_xyz: (n, 3), feat: (m, c), offset: (b), new_offset: (b)
+ output: (n, c)
+ """
+ assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous()
+ idx, dist = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, 3), (n, 3)
+ dist_recip = 1.0 / (dist + 1e-8) # (n, 3)
+ norm = torch.sum(dist_recip, dim=1, keepdim=True)
+ weight = dist_recip / norm # (n, 3)
+
+ new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_()
+ for i in range(k):
+ new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1)
+ return new_feat
+
+
+class Interpolation(Function):
+ @staticmethod
+ def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3):
+ """
+ input: xyz: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b)
+ output: (n, c)
+ """
+ assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous()
+ idx, dist = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, k), (n, k)
+ dist_recip = 1.0 / (dist + 1e-8) # (n, k)
+ norm = torch.sum(dist_recip, dim=1, keepdim=True)
+ weight = dist_recip / norm # (n, k)
+
+ n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0]
+ output = torch.cuda.FloatTensor(n, c).zero_()
+ pointops_cuda.interpolation_forward_cuda(n, c, k, input, idx, weight, output)
+ ctx.m, ctx.k = m, k
+ ctx.save_for_backward(idx, weight)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: xyz: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b)
+ output: (n, c)
+ """
+ m, k = ctx.m, ctx.k
+ idx, weight = ctx.saved_tensors
+ n, c = grad_output.shape
+ grad_input = torch.cuda.FloatTensor(m, c).zero_()
+ pointops_cuda.interpolation_backward_cuda(
+ n, c, k, grad_output, idx, weight, grad_input
+ )
+ return None, None, grad_input, None, None, None
+
+
+interpolation2 = Interpolation.apply
diff --git a/Pointcept/libs/pointops2/functions/pointops_ablation.py b/Pointcept/libs/pointops2/functions/pointops_ablation.py
new file mode 100644
index 0000000000000000000000000000000000000000..abfcc8bc1fb99379ff6d0fd97e19b7ca7fb0e723
--- /dev/null
+++ b/Pointcept/libs/pointops2/functions/pointops_ablation.py
@@ -0,0 +1,256 @@
+from typing import Tuple
+
+import torch
+from torch.autograd import Function
+import torch.nn as nn
+
+import pointops2_cuda as pointops_cuda
+
+
+class FurthestSampling(Function):
+ @staticmethod
+ def forward(ctx, xyz, offset, new_offset):
+ """
+ input: xyz: (n, 3), offset: (b), new_offset: (b)
+ output: idx: (m)
+ """
+ assert xyz.is_contiguous()
+ n, b, n_max = xyz.shape[0], offset.shape[0], offset[0]
+ for i in range(1, b):
+ n_max = max(offset[i] - offset[i - 1], n_max)
+ idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_()
+ tmp = torch.cuda.FloatTensor(n).fill_(1e10)
+ pointops_cuda.furthestsampling_cuda(b, n_max, xyz, offset, new_offset, tmp, idx)
+ del tmp
+ return idx
+
+
+furthestsampling = FurthestSampling.apply
+
+
+class KNNQuery(Function):
+ @staticmethod
+ def forward(ctx, nsample, xyz, new_xyz, offset, new_offset):
+ """
+ input: xyz: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b)
+ output: idx: (m, nsample), dist2: (m, nsample)
+ """
+ if new_xyz is None:
+ new_xyz = xyz
+ assert xyz.is_contiguous() and new_xyz.is_contiguous()
+ m = new_xyz.shape[0]
+ idx = torch.cuda.IntTensor(m, nsample).zero_()
+ dist2 = torch.cuda.FloatTensor(m, nsample).zero_()
+ pointops_cuda.knnquery_cuda(
+ m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2
+ )
+ return idx, torch.sqrt(dist2)
+
+
+knnquery = KNNQuery.apply
+
+
+class Grouping(Function):
+ @staticmethod
+ def forward(ctx, input, idx):
+ """
+ input: input: (n, c), idx : (m, nsample)
+ output: (m, nsample, c)
+ """
+ assert input.is_contiguous() and idx.is_contiguous()
+ m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1]
+ output = torch.cuda.FloatTensor(m, nsample, c)
+ pointops_cuda.grouping_forward_cuda(m, nsample, c, input, idx, output)
+ ctx.n = n
+ ctx.save_for_backward(idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_out: (m, c, nsample)
+ output: (n, c), None
+ """
+ n = ctx.n
+ (idx,) = ctx.saved_tensors
+ m, nsample, c = grad_output.shape
+ grad_input = torch.cuda.FloatTensor(n, c).zero_()
+ pointops_cuda.grouping_backward_cuda(
+ m, nsample, c, grad_output, idx, grad_input
+ )
+ return grad_input, None
+
+
+grouping = Grouping.apply
+
+
+def queryandgroup(
+ nsample, xyz, new_xyz, feat, idx, offset, new_offset, use_xyz=True, relative=True
+):
+ """
+ input: xyz: (n, 3), new_xyz: (m, 3), feat: (n, c), idx: (m, nsample), offset: (b), new_offset: (b)
+ output: new_feat: (m, c+3, nsample), grouped_idx: (m, nsample)
+ """
+ assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous()
+ if new_xyz is None:
+ new_xyz = xyz
+ if idx is None:
+ idx, _ = knnquery(nsample, xyz, new_xyz, offset, new_offset) # (m, nsample)
+
+ n, m, c = xyz.shape[0], new_xyz.shape[0], feat.shape[1]
+ grouped_xyz = xyz[idx.view(-1).long(), :].view(m, nsample, 3) # (m, nsample, 3)
+ # grouped_xyz = grouping(xyz, idx) # (m, nsample, 3)
+ if relative:
+ grouped_xyz -= new_xyz.unsqueeze(1) # (m, nsample, 3)
+ grouped_feat = feat[idx.view(-1).long(), :].view(m, nsample, c) # (m, nsample, c)
+ # grouped_feat = grouping(feat, idx) # (m, nsample, c)
+
+ if use_xyz:
+ return torch.cat((grouped_xyz, grouped_feat), -1) # (m, nsample, 3+c)
+ else:
+ return grouped_feat
+
+
+class Subtraction(Function):
+ @staticmethod
+ def forward(ctx, input1, input2, idx):
+ """
+ input: input1: (n, c), input2: (n, c), idx: (n, nsample)
+ output: (n, nsample, c)
+ """
+ assert input1.is_contiguous() and input2.is_contiguous()
+ n, c = input1.shape
+ nsample = idx.shape[-1]
+ output = torch.cuda.FloatTensor(n, nsample, c).zero_()
+ pointops_cuda.subtraction_forward_cuda(
+ n, nsample, c, input1, input2, idx, output
+ )
+ ctx.save_for_backward(idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_out: (n, nsample, c)
+ output: grad_input1: (n, c), grad_input2: (n, c)
+ """
+ (idx,) = ctx.saved_tensors
+ n, nsample, c = grad_output.shape
+ grad_input1 = torch.cuda.FloatTensor(n, c).zero_()
+ grad_input2 = torch.cuda.FloatTensor(n, c).zero_()
+ pointops_cuda.subtraction_backward_cuda(
+ n, nsample, c, idx, grad_output, grad_input1, grad_input2
+ )
+ return grad_input1, grad_input2, None
+
+
+subtraction = Subtraction.apply
+
+
+class Aggregation(Function):
+ @staticmethod
+ def forward(ctx, input, position, weight, idx):
+ """
+ input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample)
+ output: (n, c)
+ """
+ assert (
+ input.is_contiguous()
+ and position.is_contiguous()
+ and weight.is_contiguous()
+ )
+ n, nsample, c = position.shape
+ w_c = weight.shape[-1]
+ output = torch.cuda.FloatTensor(n, c).zero_()
+ pointops_cuda.aggregation_forward_cuda(
+ n, nsample, c, w_c, input, position, weight, idx, output
+ )
+ ctx.save_for_backward(input, position, weight, idx)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: grad_out: (n, c)
+ output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c')
+ """
+ input, position, weight, idx = ctx.saved_tensors
+ n, nsample, c = position.shape
+ w_c = weight.shape[-1]
+ grad_input = torch.cuda.FloatTensor(n, c).zero_()
+ grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_()
+ grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_()
+ pointops_cuda.aggregation_backward_cuda(
+ n,
+ nsample,
+ c,
+ w_c,
+ input,
+ position,
+ weight,
+ idx,
+ grad_output,
+ grad_input,
+ grad_position,
+ grad_weight,
+ )
+ return grad_input, grad_position, grad_weight, None
+
+
+aggregation = Aggregation.apply
+
+
+def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3):
+ """
+ input: xyz: (m, 3), new_xyz: (n, 3), feat: (m, c), offset: (b), new_offset: (b)
+ output: (n, c)
+ """
+ assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous()
+ idx, dist = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, 3), (n, 3)
+ dist_recip = 1.0 / (dist + 1e-8) # (n, 3)
+ norm = torch.sum(dist_recip, dim=1, keepdim=True)
+ weight = dist_recip / norm # (n, 3)
+
+ new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_()
+ for i in range(k):
+ new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1)
+ return new_feat
+
+
+class Interpolation(Function):
+ @staticmethod
+ def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3):
+ """
+ input: xyz: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b)
+ output: (n, c)
+ """
+ assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous()
+ idx, dist = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, k), (n, k)
+ dist_recip = 1.0 / (dist + 1e-8) # (n, k)
+ norm = torch.sum(dist_recip, dim=1, keepdim=True)
+ weight = dist_recip / norm # (n, k)
+
+ n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0]
+ output = torch.cuda.FloatTensor(n, c).zero_()
+ pointops_cuda.interpolation_forward_cuda(n, c, k, input, idx, weight, output)
+ ctx.m, ctx.k = m, k
+ ctx.save_for_backward(idx, weight)
+ return output
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ input: xyz: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b)
+ output: (n, c)
+ """
+ m, k = ctx.m, ctx.k
+ idx, weight = ctx.saved_tensors
+ n, c = grad_output.shape
+ grad_input = torch.cuda.FloatTensor(m, c).zero_()
+ pointops_cuda.interpolation_backward_cuda(
+ n, c, k, grad_output, idx, weight, grad_input
+ )
+ return None, None, grad_input, None, None, None
+
+
+interpolation2 = Interpolation.apply
diff --git a/Pointcept/libs/pointops2/functions/test_attention_op_step1.py b/Pointcept/libs/pointops2/functions/test_attention_op_step1.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2d8428c8e283811db156acc0e6ba563f92e72ce
--- /dev/null
+++ b/Pointcept/libs/pointops2/functions/test_attention_op_step1.py
@@ -0,0 +1,106 @@
+import torch
+import pointops
+from torch_scatter import (
+ scatter_max,
+ scatter_mean,
+ scatter_add,
+ scatter_min,
+ scatter_sum,
+)
+
+torch.manual_seed(1)
+
+M = 800000
+N = 35000
+C = 96
+h = 6
+query = torch.rand(N, h, C // h).cuda()
+key = torch.rand(N, h, C // h).cuda()
+
+index_0 = torch.rand(M)
+index_0[index_0 < 0] = 0
+index_0 = (index_0 * N).long().cuda()
+
+index_1 = torch.rand(M)
+index_1[index_1 < 0] = 0
+index_1 = (index_1 * N).long().cuda()
+
+query.requires_grad = True
+key.requires_grad = True
+
+# rearrange index for acceleration
+index_0, indices = torch.sort(index_0) # [M,]
+index_1 = index_1[indices] # [M,]
+index_0_counts = index_0.bincount()
+
+print("index_0_counts.shape: ", index_0_counts.shape)
+
+n_max = index_0_counts.max()
+index_0_offsets = index_0_counts.cumsum(dim=-1) # [N]
+
+print("v1 index_0_offsets.shape: ", index_0_offsets.shape)
+
+index_0_offsets = torch.cat(
+ [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0
+) # [N+1]
+
+# print("index_0[:100]: ", index_0[:100])
+print("n_max: ", n_max)
+print("index_0_offsets.shape: ", index_0_offsets.shape)
+# input()
+
+print("index_0_offsets[:100]: ", index_0_offsets[:100])
+print("index_1[300:320]: ", index_1[300:320])
+
+
+attn_flat = pointops.attention_step1(
+ query.float(), key.float(), index_0.int(), index_1.int()
+)
+# loss = attn_flat.sum()
+# loss.backward()
+print(
+ "attn_flat.shape: {}, attn_flat[300:320,:10]: {}".format(
+ attn_flat.shape, attn_flat[300:320, :10]
+ )
+)
+# print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
+# print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
+# input()
+
+print("query.is_contiguous(): ", query.is_contiguous())
+print("key.is_contiguous(): ", key.is_contiguous())
+print("index_0.is_contiguous(): ", index_0.is_contiguous())
+print("index_1.is_contiguous(): ", index_1.is_contiguous())
+
+attn_flat_v2 = pointops.attention_step1_v2(
+ query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max
+)
+# loss = attn_flat_v2.sum()
+# loss.backward()
+print(
+ "attn_flat_v2.shape: {}, attn_flat_v2[300:320,:10]: {}".format(
+ attn_flat_v2.shape, attn_flat_v2[300:320, :10]
+ )
+)
+# print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
+# print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
+# input()
+
+mask = attn_flat_v2.sum(-1) != 0
+print("mask.sum(): ", mask.sum())
+print(
+ "attn_flat_v2[mask] - attn_flat[mask]: ",
+ ((attn_flat_v2[mask] - attn_flat[mask]) ** 2).max(),
+)
+
+
+print(
+ "((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ",
+ ((attn_flat - attn_flat_v2) ** 2 < 1e-8).all(),
+)
+
+selected = 10000
+print(
+ "torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ",
+ torch.max((attn_flat[:selected] - attn_flat_v2[:selected]) ** 2, 0),
+)
diff --git a/Pointcept/libs/pointops2/functions/test_attention_op_step1_v2.py b/Pointcept/libs/pointops2/functions/test_attention_op_step1_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..941ea13da10fd4567aeb16b30740899535d6a0a6
--- /dev/null
+++ b/Pointcept/libs/pointops2/functions/test_attention_op_step1_v2.py
@@ -0,0 +1,123 @@
+import torch
+import pointops
+from torch_scatter import (
+ scatter_max,
+ scatter_mean,
+ scatter_add,
+ scatter_min,
+ scatter_sum,
+)
+
+torch.manual_seed(1)
+
+M = 800000
+N = 35000
+C = 96
+h = 6
+query = torch.rand(N, h, C // h).cuda()
+key = torch.rand(N, h, C // h).cuda()
+
+index_0 = torch.rand(M)
+index_0[index_0 < 0] = 0
+index_0 = (index_0 * N).long().cuda()
+
+index_1 = torch.rand(M)
+index_1[index_1 < 0] = 0
+index_1 = (index_1 * N).long().cuda()
+
+query.requires_grad = True
+key.requires_grad = True
+
+
+attn_flat = pointops.attention_step1(
+ query.float(), key.float(), index_0.int(), index_1.int()
+)
+loss = attn_flat.sum()
+loss.backward()
+print(
+ "attn_flat.shape: {}, attn_flat[:20,:10]: {}".format(
+ attn_flat.shape, attn_flat[:20, :10]
+ )
+)
+print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
+print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
+input()
+
+
+# rearrange index for acceleration
+index_0, indices = torch.sort(index_0) # [M,]
+index_1 = index_1[indices] # [M,]
+index_0_counts = index_0.bincount()
+
+print("index_0_counts.shape: ", index_0_counts.shape)
+
+n_max = index_0_counts.max()
+index_0_offsets = index_0_counts.cumsum(dim=-1) # [N]
+
+print("v1 index_0_offsets.shape: ", index_0_offsets.shape)
+
+index_0_offsets = torch.cat(
+ [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0
+) # [N+1]
+
+# print("index_0[:100]: ", index_0[:100])
+print("n_max: ", n_max)
+print("index_0_offsets.shape: ", index_0_offsets.shape)
+# input()
+
+print("index_0_offsets[:100]: ", index_0_offsets[:100])
+print("index_1[:20]: ", index_1[:20])
+
+
+attn_flat = pointops.attention_step1(
+ query.float(), key.float(), index_0.int(), index_1.int()
+)
+# loss = attn_flat.sum()
+# loss.backward()
+# # attn_flat = pointops.attention_step1(query.float(), key.float(), index_0.int(), index_1.int())
+# # loss = attn_flat.sum()
+# # loss.backward()
+# print("attn_flat.shape: {}, attn_flat[:20,:10]: {}".format(attn_flat.shape, attn_flat[:20,:10]))
+# print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
+# print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
+# input()
+
+print("query.is_contiguous(): ", query.is_contiguous())
+print("key.is_contiguous(): ", key.is_contiguous())
+print("index_0.is_contiguous(): ", index_0.is_contiguous())
+print("index_1.is_contiguous(): ", index_1.is_contiguous())
+
+attn_flat_v2 = pointops.attention_step1_v2(
+ query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max
+)
+loss = attn_flat_v2.sum()
+loss.backward()
+
+# attn_flat_v2 = pointops.attention_step1_v2(query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max)
+# loss = attn_flat_v2.sum()
+# loss.backward()
+
+print(
+ "attn_flat_v2.shape: {}, attn_flat_v2[:20,:10]: {}".format(
+ attn_flat_v2.shape, attn_flat_v2[:20, :10]
+ )
+)
+print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
+print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
+# input()
+
+# mask = attn_flat_v2.sum(-1) != 0
+# print("mask.sum(): ", mask.sum())
+# print("attn_flat_v2[mask] - attn_flat[mask]: ", ((attn_flat_v2[mask] - attn_flat[mask])**2).max())
+
+
+print(
+ "((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ",
+ ((attn_flat - attn_flat_v2) ** 2 < 1e-8).all(),
+)
+
+selected = 10000
+print(
+ "torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ",
+ torch.max((attn_flat[:selected] - attn_flat_v2[:selected]) ** 2, 0),
+)
diff --git a/Pointcept/libs/pointops2/functions/test_attention_op_step2.py b/Pointcept/libs/pointops2/functions/test_attention_op_step2.py
new file mode 100644
index 0000000000000000000000000000000000000000..036340377abedda932c74bb014ec82052ef2c884
--- /dev/null
+++ b/Pointcept/libs/pointops2/functions/test_attention_op_step2.py
@@ -0,0 +1,62 @@
+import torch
+import pointops
+from torch_scatter import (
+ scatter_max,
+ scatter_mean,
+ scatter_add,
+ scatter_min,
+ scatter_sum,
+)
+
+torch.manual_seed(1)
+
+M = 800000
+N = 35000
+C = 96
+h = 6
+softmax_attn_flat = torch.rand(M, h).cuda()
+value = torch.rand(N, h, C // h).cuda()
+
+index_0 = torch.rand(M)
+index_0[index_0 < 0] = 0
+index_0 = (index_0 * N).long().cuda()
+
+index_1 = torch.rand(M)
+index_1[index_1 < 0] = 0
+index_1 = (index_1 * N).long().cuda()
+
+softmax_attn_flat.requires_grad = True
+value.requires_grad = True
+
+# value_flat = value[index_1] #[M, num_heads, C // num_heads]
+# x = (softmax_attn_flat.unsqueeze(-1) * value_flat).reshape(M, C)
+# x = scatter_sum(src=x, index=index_0, dim=0, dim_size=N) #[N, C]
+# loss = x.sum()
+# loss.backward()
+
+# print("x.shape: {}, x[:5,:10]: {}".format(x.shape, x[:5,:10]))
+# print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10])
+# print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5])
+# input()
+
+print("softmax_attn_flat.is_contiguous(): ", softmax_attn_flat.is_contiguous())
+print("value.is_contiguous(): ", value.is_contiguous())
+print("index_0.is_contiguous(): ", index_0.is_contiguous())
+print("index_1.is_contiguous(): ", index_1.is_contiguous())
+
+x_v2 = pointops.attention_step2(
+ softmax_attn_flat.float(), value.float(), index_0.int(), index_1.int()
+)
+x_v2 = x_v2.view(N, C)
+loss = x_v2.sum()
+loss.backward()
+
+print("x_v2.shape: {}, x_v2[:5,:10]: {}".format(x_v2.shape, x_v2[:5, :10]))
+
+print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10])
+print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5])
+input()
+
+print("((x-x_v2)**2 < 1e-8).all(): ", ((x - x_v2) ** 2 < 1e-8).all())
+
+print("torch.max((x-x_v2)**2): ", torch.max((x - x_v2) ** 2))
diff --git a/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1.py b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1.py
new file mode 100644
index 0000000000000000000000000000000000000000..145c0fcbf765e65b52afc0c9fcc49041fdfd7d0d
--- /dev/null
+++ b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1.py
@@ -0,0 +1,65 @@
+import torch
+import pointops
+from torch_scatter import (
+ scatter_max,
+ scatter_mean,
+ scatter_add,
+ scatter_min,
+ scatter_sum,
+)
+
+torch.manual_seed(1)
+
+M = 80000
+N = 3500
+hdim = 16
+h = 6
+L = 31
+query = torch.rand(N, h, hdim).cuda()
+table = torch.rand(L, h, hdim, 3).cuda()
+
+index = torch.rand(M)
+index[index < 0] = 0
+index = (index * N).long().cuda()
+
+rel_index = torch.rand(M, 3)
+rel_index[rel_index < 0] = 0
+rel_index = (rel_index * L).long().cuda()
+
+query.requires_grad = True
+table.requires_grad = True
+
+# query_flat = query[index] #[M, h, hdim]
+# table_x, table_y, table_z = table[:,:,:,0], table[:,:,:,1], table[:,:,:,2] #[L, h, hdim]
+# rel_index_x, rel_index_y, rel_index_z = rel_index[:,0], rel_index[:,1], rel_index[:,2] #[M]
+# rel_pos_encoding = table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] #[M, h, hdim]
+# output = (query_flat * rel_pos_encoding).sum(-1) #[M, h]
+# loss = output.mean()
+# loss.backward()
+
+# print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10]))
+# print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
+# print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
+# input()
+
+# print("query.is_contiguous(): ", query.is_contiguous())
+# print("key.is_contiguous(): ", key.is_contiguous())
+# print("index_0.is_contiguous(): ", index_0.is_contiguous())
+# print("index_1.is_contiguous(): ", index_1.is_contiguous())
+
+output_v2 = pointops.dot_prod_with_idx(query, index.int(), table, rel_index.int())
+loss = output_v2.mean()
+loss.backward()
+
+print(
+ "output_v2.shape: {}, output_v2[:5,:10]: {}".format(
+ output_v2.shape, output_v2[:5, :10]
+ )
+)
+print("v2: query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
+print("v2: table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
+input()
+
+# print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max())
+
+# print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2))
diff --git a/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v2.py b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bf9975a725bfb30e8c73449668e41e0f53917d5
--- /dev/null
+++ b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v2.py
@@ -0,0 +1,75 @@
+import torch
+import pointops
+from torch_scatter import (
+ scatter_max,
+ scatter_mean,
+ scatter_add,
+ scatter_min,
+ scatter_sum,
+)
+
+torch.manual_seed(1)
+
+M = 80000
+N = 3500
+hdim = 16
+h = 6
+L = 31
+query = torch.rand(N, h, hdim).cuda()
+table_q = torch.rand(L, h, hdim, 3).cuda()
+key = torch.rand(N, h, hdim).cuda()
+table_k = torch.rand(L, h, hdim, 3).cuda()
+
+index_q = torch.rand(M)
+index_q[index_q < 0] = 0
+index_q = (index_q * N).long().cuda()
+
+index_k = torch.rand(M)
+index_k[index_k < 0] = 0
+index_k = (index_k * N).long().cuda()
+
+rel_index = torch.rand(M, 3)
+rel_index[rel_index < 0] = 0
+rel_index = (rel_index * L).long().cuda()
+
+query.requires_grad = True
+table_q.requires_grad = True
+key.requires_grad = True
+table_k.requires_grad = True
+
+output1 = pointops.dot_prod_with_idx(query, index_q.int(), table_q, rel_index.int())
+output2 = pointops.dot_prod_with_idx(key, index_k.int(), table_k, rel_index.int())
+output = output1 + output2
+# loss = output.mean()
+# loss.backward()
+
+# print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10]))
+# print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
+# print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
+# print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
+# print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
+# input()
+
+# print("query.is_contiguous(): ", query.is_contiguous())
+# print("key.is_contiguous(): ", key.is_contiguous())
+# print("index_0.is_contiguous(): ", index_0.is_contiguous())
+# print("index_1.is_contiguous(): ", index_1.is_contiguous())
+
+output_v2 = pointops.dot_prod_with_idx_v2(
+ query, index_q.int(), key, index_k.int(), table_q, table_k, rel_index.int()
+)
+loss = output_v2.mean()
+loss.backward()
+
+print(
+ "output_v2.shape: {}, output_v2[:5,:10]: {}".format(
+ output_v2.shape, output_v2[:5, :10]
+ )
+)
+print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
+print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
+print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
+print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
+# input()
+
+print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max())
diff --git a/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v3.py b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v3.py
new file mode 100644
index 0000000000000000000000000000000000000000..3738ba69b0d9be7aa9e890b0357f8de6cc42708b
--- /dev/null
+++ b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v3.py
@@ -0,0 +1,106 @@
+import torch
+import pointops
+from torch_scatter import (
+ scatter_max,
+ scatter_mean,
+ scatter_add,
+ scatter_min,
+ scatter_sum,
+)
+
+torch.manual_seed(1)
+
+M = 80000
+N = 3500
+# M = 80
+# N = 5
+hdim = 16
+h = 6
+L = 31
+query = torch.rand(N, h, hdim).cuda()
+table_q = torch.rand(L, h, hdim, 3).cuda()
+key = torch.rand(N, h, hdim).cuda()
+table_k = torch.rand(L, h, hdim, 3).cuda()
+
+index_q = torch.rand(M)
+index_q[index_q < 0] = 0
+index_q = (index_q * N).long().cuda()
+
+index_k = torch.rand(M)
+index_k[index_k < 0] = 0
+index_k = (index_k * N).long().cuda()
+
+rel_index = torch.rand(M, 3)
+rel_index[rel_index < 0] = 0
+rel_index = (rel_index * L).long().cuda()
+
+
+# rearrange index for acceleration
+index_q, indices = torch.sort(index_q) # [M,]
+index_k = index_k[indices] # [M,]
+rel_index = rel_index[indices]
+index_q_counts = index_q.bincount()
+
+print("index_q_counts.shape: ", index_q_counts.shape)
+
+n_max = index_q_counts.max()
+index_q_offsets = index_q_counts.cumsum(dim=-1) # [N]
+
+print("v1 index_q_offsets.shape: ", index_q_offsets.shape)
+
+index_q_offsets = torch.cat(
+ [torch.zeros(1, dtype=torch.long).cuda(), index_q_offsets], 0
+) # [N+1]
+
+# print("index_q[:100]: ", index_q[:100])
+print("n_max: ", n_max)
+print("index_q_offsets.shape: ", index_q_offsets.shape)
+# input()
+
+print("index_q_offsets[:100]: ", index_q_offsets[:100])
+print("index_k[:20]: ", index_k[:20])
+
+query.requires_grad = True
+table_q.requires_grad = True
+key.requires_grad = True
+table_k.requires_grad = True
+
+output1 = pointops.dot_prod_with_idx(query, index_q.int(), table_q, rel_index.int())
+output2 = pointops.dot_prod_with_idx(key, index_k.int(), table_k, rel_index.int())
+output = output1 + output2
+loss = output.mean()
+loss.backward()
+
+# print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10]))
+# print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
+# print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
+# print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
+# print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
+# input()
+
+# print("query.is_contiguous(): ", query.is_contiguous())
+# print("key.is_contiguous(): ", key.is_contiguous())
+# print("index_q.is_contiguous(): ", index_q.is_contiguous())
+# print("index_k.is_contiguous(): ", index_k.is_contiguous())
+
+output_v2 = pointops.dot_prod_with_idx_v3(
+ query,
+ index_q_offsets.int(),
+ n_max,
+ key,
+ index_k.int(),
+ table_q,
+ table_k,
+ rel_index.int(),
+)
+# loss = output_v2.mean()
+# loss.backward()
+
+# print("output_v2.shape: {}, output_v2[:5,:10]: {}".format(output_v2.shape, output_v2[:5,:10]))
+# print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5])
+# print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2])
+# print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5])
+# print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2])
+# input()
+
+print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max())
diff --git a/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step2.py b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step2.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1cb9ef37b4350f50f23857b365729b1a85b24f9
--- /dev/null
+++ b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step2.py
@@ -0,0 +1,83 @@
+import torch
+import pointops
+from torch_scatter import (
+ scatter_max,
+ scatter_mean,
+ scatter_add,
+ scatter_min,
+ scatter_sum,
+)
+
+torch.manual_seed(1)
+
+M = 80000
+N = 3500
+hdim = 16
+h = 6
+L = 31
+attn = torch.rand(M, h).cuda()
+v = torch.rand(N, h, hdim).cuda()
+table = torch.rand(L, h, hdim, 3).cuda()
+
+index_0 = torch.rand(M)
+index_0[index_0 < 0] = 0
+index_0 = (index_0 * N).long().cuda()
+
+index_1 = torch.rand(M)
+index_1[index_1 < 0] = 0
+index_1 = (index_1 * N).long().cuda()
+
+rel_index = torch.rand(M, 3)
+rel_index[rel_index < 0] = 0
+rel_index = (rel_index * L).long().cuda()
+
+attn.requires_grad = True
+v.requires_grad = True
+table.requires_grad = True
+
+v_flat = v[index_1] # [M, h, hdim]
+table_x, table_y, table_z = (
+ table[:, :, :, 0],
+ table[:, :, :, 1],
+ table[:, :, :, 2],
+) # [L, h, hdim]
+rel_index_x, rel_index_y, rel_index_z = (
+ rel_index[:, 0],
+ rel_index[:, 1],
+ rel_index[:, 2],
+) # [M]
+rel_pos_encoding = (
+ table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z]
+) # [M, h, hdim]
+v_flat_new = v_flat + rel_pos_encoding # [M, h, hdim]
+output = attn.unsqueeze(-1) * v_flat_new # [M, h, hdim]
+output = scatter_sum(src=output, index=index_0, dim=0, dim_size=N) # [N, h, hdim]
+loss = output.mean()
+loss.backward()
+
+print(
+ "output.shape: {}, output[:5,:10,:5]: {}".format(output.shape, output[:5, :10, :5])
+)
+print("attn.grad[:5, :3]: ", attn.grad[:5, :3])
+print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
+print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
+input()
+
+# print("query.is_contiguous(): ", query.is_contiguous())
+# print("key.is_contiguous(): ", key.is_contiguous())
+# print("index_0.is_contiguous(): ", index_0.is_contiguous())
+# print("index_1.is_contiguous(): ", index_1.is_contiguous())
+
+# output_v2 = pointops.attention_step2_with_rel_pos_value(attn, v, index_0.int(), index_1.int(), table, rel_index.int())
+# loss = output_v2.mean()
+# loss.backward()
+
+# print("output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format(output_v2.shape, output_v2[:5,:10,:5]))
+# print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3])
+# print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
+# print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
+# input()
+
+# print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max())
+
+# print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2))
diff --git a/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step2_v2.py b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step2_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..3090b980cf2ddf9803db40d161a21ce09edc7392
--- /dev/null
+++ b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step2_v2.py
@@ -0,0 +1,109 @@
+import torch
+import pointops
+from torch_scatter import (
+ scatter_max,
+ scatter_mean,
+ scatter_add,
+ scatter_min,
+ scatter_sum,
+)
+
+torch.manual_seed(1)
+
+M = 80000
+N = 3500
+hdim = 16
+h = 6
+L = 31
+attn = torch.rand(M, h).cuda()
+v = torch.rand(N, h, hdim).cuda()
+table = torch.rand(L, h, hdim, 3).cuda()
+
+index_0 = torch.rand(M)
+index_0[index_0 < 0] = 0
+index_0 = (index_0 * N).long().cuda()
+
+index_1 = torch.rand(M)
+index_1[index_1 < 0] = 0
+index_1 = (index_1 * N).long().cuda()
+
+rel_index = torch.rand(M, 3)
+rel_index[rel_index < 0] = 0
+rel_index = (rel_index * L).long().cuda()
+
+
+# rearrange index for acceleration
+index_0, indices = torch.sort(index_0) # [M,]
+index_1 = index_1[indices] # [M,]
+rel_index = rel_index[indices]
+index_0_counts = index_0.bincount()
+
+print("index_0_counts.shape: ", index_0_counts.shape)
+
+n_max = index_0_counts.max()
+index_0_offsets = index_0_counts.cumsum(dim=-1) # [N]
+
+print("v1 index_0_offsets.shape: ", index_0_offsets.shape)
+
+index_0_offsets = torch.cat(
+ [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0
+) # [N+1]
+
+
+attn.requires_grad = True
+v.requires_grad = True
+table.requires_grad = True
+
+
+output = pointops.attention_step2_with_rel_pos_value(
+ attn, v, index_0.int(), index_1.int(), table, rel_index.int()
+)
+loss = output.mean()
+loss.backward()
+
+print(
+ "output.shape: {}, output[:5,:10,:5]: {}".format(output.shape, output[:5, :10, :5])
+)
+print("attn.grad[:5, :3]: ", attn.grad[:5, :3])
+print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
+print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
+# input()
+
+attn_grad = attn.grad.clone()
+v_grad = v.grad.clone()
+table_grad = table.grad.clone()
+
+attn.grad.zero_()
+v.grad.zero_()
+table.grad.zero_()
+
+# print("query.is_contiguous(): ", query.is_contiguous())
+# print("key.is_contiguous(): ", key.is_contiguous())
+# print("index_0.is_contiguous(): ", index_0.is_contiguous())
+# print("index_1.is_contiguous(): ", index_1.is_contiguous())
+
+output_v2 = pointops.attention_step2_with_rel_pos_value_v2(
+ attn, v, index_0_offsets.int(), n_max, index_1.int(), table, rel_index.int()
+)
+loss = output_v2.mean()
+loss.backward()
+
+print(
+ "output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format(
+ output_v2.shape, output_v2[:5, :10, :5]
+ )
+)
+print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3])
+print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5])
+print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2])
+# input()
+
+print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max())
+
+print("((attn_grad-attn.grad)**2).max(): ", ((attn_grad - attn.grad) ** 2).max())
+
+print("((v_grad-v.grad)**2).max(): ", ((v_grad - v.grad) ** 2).max())
+
+print("((table_grad-table.grad)**2).max(): ", ((table_grad - table.grad) ** 2).max())
+
+# print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2))
diff --git a/Pointcept/libs/pointops2/setup.py b/Pointcept/libs/pointops2/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..b33cb3b83c39302500efb464667d346d5699f0aa
--- /dev/null
+++ b/Pointcept/libs/pointops2/setup.py
@@ -0,0 +1,33 @@
+import os
+from setuptools import setup
+from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+from distutils.sysconfig import get_config_vars
+
+(opt,) = get_config_vars("OPT")
+os.environ["OPT"] = " ".join(
+ flag for flag in opt.split() if flag != "-Wstrict-prototypes"
+)
+
+src = "src"
+sources = [
+ os.path.join(root, file)
+ for root, dirs, files in os.walk(src)
+ for file in files
+ if file.endswith(".cpp") or file.endswith(".cu")
+]
+
+setup(
+ name="pointops2",
+ version="1.0",
+ install_requires=["torch", "numpy"],
+ packages=["pointops2"],
+ package_dir={"pointops2": "functions"},
+ ext_modules=[
+ CUDAExtension(
+ name="pointops2_cuda",
+ sources=sources,
+ extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]},
+ )
+ ],
+ cmdclass={"build_ext": BuildExtension},
+)
diff --git a/Pointcept/libs/pointops2/src/__init__.py b/Pointcept/libs/pointops2/src/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda.cpp b/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..491b6f41660edf9b5ea5656cc88edba8ed807d71
--- /dev/null
+++ b/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda.cpp
@@ -0,0 +1,28 @@
+#include
+#include
+#include
+#include "aggregation_cuda_kernel.h"
+
+
+void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor)
+{
+ const float *input = input_tensor.data_ptr();
+ const float *position = position_tensor.data_ptr();
+ const float *weight = weight_tensor.data_ptr();
+ const int *idx = idx_tensor.data_ptr();
+ float *output = output_tensor.data_ptr();
+ aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output);
+}
+
+void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor)
+{
+ const float *input = input_tensor.data_ptr();
+ const float *position = position_tensor.data_ptr();
+ const float *weight = weight_tensor.data_ptr();
+ const int *idx = idx_tensor.data_ptr();
+ const float *grad_output = grad_output_tensor.data_ptr();
+ float *grad_input = grad_input_tensor.data_ptr();
+ float *grad_position = grad_position_tensor.data_ptr();
+ float *grad_weight = grad_weight_tensor.data_ptr();
+ aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
+}
diff --git a/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda_kernel.cu b/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda_kernel.cu
new file mode 100644
index 0000000000000000000000000000000000000000..8339bb7e2088abffefba02c26b248edafed6cf47
--- /dev/null
+++ b/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda_kernel.cu
@@ -0,0 +1,53 @@
+#include "../cuda_utils.h"
+#include "aggregation_cuda_kernel.h"
+
+
+__global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
+ // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ if (index >= n * c) return;
+ const int c_idx = index % c;
+ const int n_idx = index / c;
+ const int w_c_idx = c_idx % w_c;
+ for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
+ {
+ int idx_idx = n_idx * nsample + nsample_idx;
+ int input_idx = idx[idx_idx] * c + c_idx;
+ int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
+ int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
+ output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx];
+ }
+}
+
+__global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {
+ // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ if (index >= n * c) return;
+ const int c_idx = index % c;
+ const int n_idx = index / c;
+ const int w_c_idx = c_idx % w_c;
+ for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++)
+ {
+ int idx_idx = n_idx * nsample + nsample_idx;
+ int input_idx = idx[idx_idx] * c + c_idx;
+ int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx;
+ int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx;
+ atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]);
+ grad_position[position_idx] = grad_output[index] * weight[weight_idx];
+ atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx]));
+ }
+}
+
+void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) {
+ // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c)
+ dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
+ dim3 threads(THREADS_PER_BLOCK);
+ aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output);
+}
+
+void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) {
+ // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c)
+ dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK));
+ dim3 threads(THREADS_PER_BLOCK);
+ aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight);
+}
diff --git a/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda_kernel.h b/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda_kernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..5211a96aa2acbe0d9baf32bddc9ab4be87703072
--- /dev/null
+++ b/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda_kernel.h
@@ -0,0 +1,20 @@
+#ifndef _AGGREGATION_CUDA_KERNEL
+#define _AGGREGATION_CUDA_KERNEL
+#include
+#include
+#include
+
+void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor);
+void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output);
+void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/Pointcept/libs/pointops2/src/attention/attention_cuda.cpp b/Pointcept/libs/pointops2/src/attention/attention_cuda.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..27493d19ebfd11b083b8f31455ac12c4416208a9
--- /dev/null
+++ b/Pointcept/libs/pointops2/src/attention/attention_cuda.cpp
@@ -0,0 +1,55 @@
+#include
+#include
+#include
+#include "attention_cuda_kernel.h"
+
+void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor,
+ at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor)
+{
+ const float *q = q_tensor.data_ptr();
+ const float *k = k_tensor.data_ptr();
+ const int *index0 = index0_tensor.data_ptr();
+ const int *index1 = index1_tensor.data_ptr();
+ float *attn = attn_tensor.data_ptr();
+ attention_step1_forward_cuda_launcher(N, M, h, C, q, k, index0, index1, attn);
+}
+
+void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor,
+ at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor,
+ at::Tensor grad_q_tensor, at::Tensor grad_k_tensor)
+{
+ const float *grad_out = grad_out_tensor.data_ptr();
+ const int *index0 = index0_tensor.data_ptr();
+ const int *index1 = index1_tensor.data_ptr();
+ const float *q = q_tensor.data_ptr();
+ const float *k = k_tensor.data_ptr();
+ float *grad_q = grad_q_tensor.data_ptr();
+ float *grad_k = grad_k_tensor.data_ptr();
+ attention_step1_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, q, k, grad_q, grad_k);
+}
+
+void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor,
+ at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor)
+{
+ const float *attn = attn_tensor.data_ptr();
+ const float *v = v_tensor.data_ptr