ziqima commited on
Commit
4893ce0
1 Parent(s): 18c987f

initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Pointcept/.gitignore +16 -0
  2. Pointcept/LICENSE +21 -0
  3. Pointcept/README.md +896 -0
  4. Pointcept/configs/_base_/dataset/scannetpp.py +104 -0
  5. Pointcept/configs/_base_/default_runtime.py +39 -0
  6. Pointcept/configs/matterport3d/semseg-pt-v3m1-0-base.py +313 -0
  7. Pointcept/configs/matterport3d/semseg-spunet-v1m1-0-base.py +282 -0
  8. Pointcept/configs/modelnet40/cls-ptv3-v1m1-0-base.py +232 -0
  9. Pointcept/configs/modelnet40/cls-spunet-v1m1-0-base.py +176 -0
  10. Pointcept/configs/nuscenes/semseg-ppt-v1m1-0-nu-sk-wa-spunet.py +342 -0
  11. Pointcept/configs/nuscenes/semseg-ppt-v1m2-0-nu-sk-wa-spunet.py +316 -0
  12. Pointcept/configs/nuscenes/semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit.py +292 -0
  13. Pointcept/configs/nuscenes/semseg-pt-v2m2-0-base.py +174 -0
  14. Pointcept/configs/nuscenes/semseg-pt-v2m2-1-benchmark-submit.py +157 -0
  15. Pointcept/configs/nuscenes/semseg-pt-v3m1-0-base.py +215 -0
  16. Pointcept/configs/nuscenes/semseg-spunet-v1m1-0-base.py +183 -0
  17. Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02-sc-aug.py +180 -0
  18. Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02.py +180 -0
  19. Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base.py +181 -0
  20. Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft-vs0p05.py +273 -0
  21. Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py +273 -0
  22. Pointcept/configs/s3dis/semseg-minkunet34c-0-base.py +174 -0
  23. Pointcept/configs/s3dis/semseg-ppt-v1m1-0-s3-sc-st-spunet.py +496 -0
  24. Pointcept/configs/s3dis/semseg-pt-v1-0-base.py +170 -0
  25. Pointcept/configs/s3dis/semseg-pt-v2m1-0-base.py +189 -0
  26. Pointcept/configs/s3dis/semseg-pt-v2m2-0-base.py +189 -0
  27. Pointcept/configs/s3dis/semseg-pt-v2m2-0-lovasz.py +192 -0
  28. Pointcept/configs/s3dis/semseg-pt-v2m2-1-one-cycle.py +196 -0
  29. Pointcept/configs/s3dis/semseg-pt-v3m1-0-base.py +225 -0
  30. Pointcept/configs/s3dis/semseg-pt-v3m1-1-rpe.py +225 -0
  31. Pointcept/configs/s3dis/semseg-pt-v3m1-2-ppt-extreme.py +487 -0
  32. Pointcept/configs/s3dis/semseg-spunet-v1m1-0-base.py +168 -0
  33. Pointcept/configs/s3dis/semseg-spunet-v1m1-0-cn-base.py +181 -0
  34. Pointcept/configs/s3dis/semseg-spunet-v1m2-0-base.py +184 -0
  35. Pointcept/configs/s3dis/semseg-swin3d-v1m1-0-small.py +184 -0
  36. Pointcept/configs/s3dis/semseg-swin3d-v1m1-1-large.py +191 -0
  37. Pointcept/configs/scannet/insseg-pointgroup-v1m1-0-spunet-base.py +187 -0
  38. Pointcept/configs/scannet/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py +279 -0
  39. Pointcept/configs/scannet/objdet-cagroup3d-v1m1-0-base.py +183 -0
  40. Pointcept/configs/scannet/pretrain-msc-v1m1-0-spunet-base.py +155 -0
  41. Pointcept/configs/scannet/pretrain-msc-v1m1-1-spunet-pointcontrast.py +162 -0
  42. Pointcept/configs/scannet/pretrain-msc-v1m2-0-spunet-csc.py +165 -0
  43. Pointcept/configs/scannet/semseg-cac-v1m1-0-spunet-base.py +292 -0
  44. Pointcept/configs/scannet/semseg-cac-v1m1-1-spunet-lovasz.py +292 -0
  45. Pointcept/configs/scannet/semseg-cac-v1m1-2-ptv2-lovasz.py +309 -0
  46. Pointcept/configs/scannet/semseg-minkunet34c-0-base.py +193 -0
  47. Pointcept/configs/scannet/semseg-oacnns-v1m1-0-base.py +290 -0
  48. Pointcept/configs/scannet/semseg-octformer-v1m1-0-base.py +296 -0
  49. Pointcept/configs/scannet/semseg-ppt-v1m1-0-sc-st-spunet.py +391 -0
  50. Pointcept/configs/scannet/semseg-ppt-v1m1-1-sc-st-spunet-submit.py +366 -0
Pointcept/.gitignore ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ image/
2
+ __pycache__
3
+ **/build/
4
+ **/*.egg-info/
5
+ **/dist/
6
+ *.so
7
+ exp
8
+ weights
9
+ data
10
+ log
11
+ outputs/
12
+ .vscode
13
+ .idea
14
+ */.DS_Store
15
+ **/*.out
16
+ Dockerfile
Pointcept/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Pointcept
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
Pointcept/README.md ADDED
@@ -0,0 +1,896 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <p align="center">
2
+ <!-- pypi-strip -->
3
+ <picture>
4
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/logo_dark.png">
5
+ <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/logo.png">
6
+ <!-- /pypi-strip -->
7
+ <img alt="pointcept" src="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/logo.png" width="400">
8
+ <!-- pypi-strip -->
9
+ </picture><br>
10
+ <!-- /pypi-strip -->
11
+ </p>
12
+
13
+ [![Formatter](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml/badge.svg)](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml)
14
+
15
+ **Pointcept** is a powerful and flexible codebase for point cloud perception research. It is also an official implementation of the following paper:
16
+ - **Point Transformer V3: Simpler, Faster, Stronger**
17
+ *Xiaoyang Wu, Li Jiang, Peng-Shuai Wang, Zhijian Liu, Xihui Liu, Yu Qiao, Wanli Ouyang, Tong He, Hengshuang Zhao*
18
+ IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 - Oral
19
+ [ Backbone ] [PTv3] - [ [arXiv](https://arxiv.org/abs/2312.10035) ] [ [Bib](https://xywu.me/research/ptv3/bib.txt) ] [ [Project](https://github.com/Pointcept/PointTransformerV3) ] &rarr; [here](https://github.com/Pointcept/PointTransformerV3)
20
+
21
+ - **OA-CNNs: Omni-Adaptive Sparse CNNs for 3D Semantic Segmentation**
22
+ *Bohao Peng, Xiaoyang Wu, Li Jiang, Yukang Chen, Hengshuang Zhao, Zhuotao Tian, Jiaya Jia*
23
+ IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024
24
+ [ Backbone ] [ OA-CNNs ] - [ [arXiv](https://arxiv.org/abs/2403.14418) ] [ [Bib](https://xywu.me/research/oacnns/bib.txt) ] &rarr; [here](#oa-cnns)
25
+
26
+ - **PonderV2: Pave the Way for 3D Foundation Model with A Universal Pre-training Paradigm**
27
+ *Haoyi Zhu\*, Honghui Yang\*, Xiaoyang Wu\*, Di Huang\*, Sha Zhang, Xianglong He, Tong He, Hengshuang Zhao, Chunhua Shen, Yu Qiao, Wanli Ouyang*
28
+ arXiv Preprint 2023
29
+ [ Pretrain ] [PonderV2] - [ [arXiv](https://arxiv.org/abs/2310.08586) ] [ [Bib](https://xywu.me/research/ponderv2/bib.txt) ] [ [Project](https://github.com/OpenGVLab/PonderV2) ] &rarr; [here](https://github.com/OpenGVLab/PonderV2)
30
+
31
+
32
+ - **Towards Large-scale 3D Representation Learning with Multi-dataset Point Prompt Training**
33
+ *Xiaoyang Wu, Zhuotao Tian, Xin Wen, Bohao Peng, Xihui Liu, Kaicheng Yu, Hengshuang Zhao*
34
+ IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024
35
+ [ Pretrain ] [PPT] - [ [arXiv](https://arxiv.org/abs/2308.09718) ] [ [Bib](https://xywu.me/research/ppt/bib.txt) ] &rarr; [here](#point-prompt-training-ppt)
36
+
37
+ - **Masked Scene Contrast: A Scalable Framework for Unsupervised 3D Representation Learning**
38
+ *Xiaoyang Wu, Xin Wen, Xihui Liu, Hengshuang Zhao*
39
+ IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2023
40
+ [ Pretrain ] [ MSC ] - [ [arXiv](https://arxiv.org/abs/2303.14191) ] [ [Bib](https://xywu.me/research/msc/bib.txt) ] &rarr; [here](#masked-scene-contrast-msc)
41
+
42
+
43
+ - **Learning Context-aware Classifier for Semantic Segmentation** (3D Part)
44
+ *Zhuotao Tian, Jiequan Cui, Li Jiang, Xiaojuan Qi, Xin Lai, Yixin Chen, Shu Liu, Jiaya Jia*
45
+ AAAI Conference on Artificial Intelligence (**AAAI**) 2023 - Oral
46
+ [ SemSeg ] [ CAC ] - [ [arXiv](https://arxiv.org/abs/2303.11633) ] [ [Bib](https://xywu.me/research/cac/bib.txt) ] [ [2D Part](https://github.com/tianzhuotao/CAC) ] &rarr; [here](#context-aware-classifier)
47
+
48
+
49
+ - **Point Transformer V2: Grouped Vector Attention and Partition-based Pooling**
50
+ *Xiaoyang Wu, Yixing Lao, Li Jiang, Xihui Liu, Hengshuang Zhao*
51
+ Conference on Neural Information Processing Systems (**NeurIPS**) 2022
52
+ [ Backbone ] [ PTv2 ] - [ [arXiv](https://arxiv.org/abs/2210.05666) ] [ [Bib](https://xywu.me/research/ptv2/bib.txt) ] &rarr; [here](#point-transformers)
53
+
54
+
55
+ - **Point Transformer**
56
+ *Hengshuang Zhao, Li Jiang, Jiaya Jia, Philip Torr, Vladlen Koltun*
57
+ IEEE International Conference on Computer Vision (**ICCV**) 2021 - Oral
58
+ [ Backbone ] [ PTv1 ] - [ [arXiv](https://arxiv.org/abs/2012.09164) ] [ [Bib](https://hszhao.github.io/papers/iccv21_pointtransformer_bib.txt) ] &rarr; [here](#point-transformers)
59
+
60
+ Additionally, **Pointcept** integrates the following excellent work (contain above):
61
+ Backbone:
62
+ [MinkUNet](https://github.com/NVIDIA/MinkowskiEngine) ([here](#sparseunet)),
63
+ [SpUNet](https://github.com/traveller59/spconv) ([here](#sparseunet)),
64
+ [SPVCNN](https://github.com/mit-han-lab/spvnas) ([here](#spvcnn)),
65
+ [OACNNs](https://arxiv.org/abs/2403.14418) ([here](#oa-cnns)),
66
+ [PTv1](https://arxiv.org/abs/2012.09164) ([here](#point-transformers)),
67
+ [PTv2](https://arxiv.org/abs/2210.05666) ([here](#point-transformers)),
68
+ [PTv3](https://arxiv.org/abs/2312.10035) ([here](#point-transformers)),
69
+ [StratifiedFormer](https://github.com/dvlab-research/Stratified-Transformer) ([here](#stratified-transformer)),
70
+ [OctFormer](https://github.com/octree-nn/octformer) ([here](#octformer)),
71
+ [Swin3D](https://github.com/microsoft/Swin3D) ([here](#swin3d));
72
+ Semantic Segmentation:
73
+ [Mix3d](https://github.com/kumuji/mix3d) ([here](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-spunet-v1m1-0-base.py#L5)),
74
+ [CAC](https://arxiv.org/abs/2303.11633) ([here](#context-aware-classifier));
75
+ Instance Segmentation:
76
+ [PointGroup](https://github.com/dvlab-research/PointGroup) ([here](#pointgroup));
77
+ Pre-training:
78
+ [PointContrast](https://github.com/facebookresearch/PointContrast) ([here](#pointcontrast)),
79
+ [Contrastive Scene Contexts](https://github.com/facebookresearch/ContrastiveSceneContexts) ([here](#contrastive-scene-contexts)),
80
+ [Masked Scene Contrast](https://arxiv.org/abs/2303.14191) ([here](#masked-scene-contrast-msc)),
81
+ [Point Prompt Training](https://arxiv.org/abs/2308.09718) ([here](#point-prompt-training-ppt));
82
+ Datasets:
83
+ [ScanNet](http://www.scan-net.org/) ([here](#scannet-v2)),
84
+ [ScanNet200](http://www.scan-net.org/) ([here](#scannet-v2)),
85
+ [ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) ([here](#scannet)),
86
+ [S3DIS](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1) ([here](#s3dis)),
87
+ [Matterport3D](https://niessner.github.io/Matterport/) ([here](#matterport3d)),
88
+ [ArkitScene](https://github.com/apple/ARKitScenes),
89
+ [Structured3D](https://structured3d-dataset.org/) ([here](#structured3d)),
90
+ [SemanticKITTI](http://www.semantic-kitti.org/) ([here](#semantickitti)),
91
+ [nuScenes](https://www.nuscenes.org/nuscenes) ([here](#nuscenes)),
92
+ [ModelNet40](https://modelnet.cs.princeton.edu/) ([here](#modelnet)),
93
+ [Waymo](https://waymo.com/open/) ([here](#waymo)).
94
+
95
+
96
+ ## Highlights
97
+ - *May, 2024*: In v1.5.2, we redesigned the default structure for each dataset for better performance. Please **re-preprocess** datasets or **download** our preprocessed datasets from **[here](https://huggingface.co/Pointcept)**.
98
+ - *Apr, 2024*: **PTv3** is selected as one of the 90 **Oral** papers (3.3% accepted papers, 0.78% submissions) by CVPR'24!
99
+ - *Mar, 2024*: We release code for **OA-CNNs**, accepted by CVPR'24. Issue related to **OA-CNNs** can @Pbihao.
100
+ - *Feb, 2024*: **PTv3** and **PPT** are accepted by CVPR'24, another **two** papers by our Pointcept team have also been accepted by CVPR'24 🎉🎉🎉. We will make them publicly available soon!
101
+ - *Dec, 2023*: **PTv3** is released on arXiv, and the code is available in Pointcept. PTv3 is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios.
102
+ - *Aug, 2023*: **PPT** is released on arXiv. PPT presents a multi-dataset pre-training framework that achieves SOTA performance in both **indoor** and **outdoor** scenarios. It is compatible with various existing pre-training frameworks and backbones. A **pre-release** version of the code is accessible; for those interested, please feel free to contact me directly for access.
103
+ - *Mar, 2023*: We released our codebase, **Pointcept**, a highly potent tool for point cloud representation learning and perception. We welcome new work to join the _Pointcept_ family and highly recommend reading [Quick Start](#quick-start) before starting your trail.
104
+ - *Feb, 2023*: **MSC** and **CeCo** accepted by CVPR 2023. _MSC_ is a highly efficient and effective pretraining framework that facilitates cross-dataset large-scale pretraining, while _CeCo_ is a segmentation method specifically designed for long-tail datasets. Both approaches are compatible with all existing backbone models in our codebase, and we will soon make the code available for public use.
105
+ - *Jan, 2023*: **CAC**, oral work of AAAI 2023, has expanded its 3D result with the incorporation of Pointcept. This addition will allow CAC to serve as a pluggable segmentor within our codebase.
106
+ - *Sep, 2022*: **PTv2** accepted by NeurIPS 2022. It is a continuation of the Point Transformer. The proposed GVA theory can apply to most existing attention mechanisms, while Grid Pooling is also a practical addition to existing pooling methods.
107
+
108
+ ## Citation
109
+ If you find _Pointcept_ useful to your research, please cite our work as encouragement. (੭ˊ꒳​ˋ)੭✧
110
+ ```
111
+ @misc{pointcept2023,
112
+ title={Pointcept: A Codebase for Point Cloud Perception Research},
113
+ author={Pointcept Contributors},
114
+ howpublished = {\url{https://github.com/Pointcept/Pointcept}},
115
+ year={2023}
116
+ }
117
+ ```
118
+
119
+ ## Overview
120
+
121
+ - [Installation](#installation)
122
+ - [Data Preparation](#data-preparation)
123
+ - [Quick Start](#quick-start)
124
+ - [Model Zoo](#model-zoo)
125
+ - [Citation](#citation)
126
+ - [Acknowledgement](#acknowledgement)
127
+
128
+ ## Installation
129
+
130
+ ### Requirements
131
+ - Ubuntu: 18.04 and above.
132
+ - CUDA: 11.3 and above.
133
+ - PyTorch: 1.10.0 and above.
134
+
135
+ ### Conda Environment
136
+
137
+ ```bash
138
+ conda create -n pointcept python=3.8 -y
139
+ conda activate pointcept
140
+ conda install ninja -y
141
+ # Choose version you want here: https://pytorch.org/get-started/previous-versions/
142
+ conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch -y
143
+ conda install h5py pyyaml -c anaconda -y
144
+ conda install sharedarray tensorboard tensorboardx yapf addict einops scipy plyfile termcolor timm -c conda-forge -y
145
+ conda install pytorch-cluster pytorch-scatter pytorch-sparse -c pyg -y
146
+ pip install torch-geometric
147
+
148
+ # spconv (SparseUNet)
149
+ # refer https://github.com/traveller59/spconv
150
+ pip install spconv-cu113
151
+
152
+ # PPT (clip)
153
+ pip install ftfy regex tqdm
154
+ pip install git+https://github.com/openai/CLIP.git
155
+
156
+ # PTv1 & PTv2 or precise eval
157
+ cd libs/pointops
158
+ # usual
159
+ python setup.py install
160
+ # docker & multi GPU arch
161
+ TORCH_CUDA_ARCH_LIST="ARCH LIST" python setup.py install
162
+ # e.g. 7.5: RTX 3000; 8.0: a100 More available in: https://developer.nvidia.com/cuda-gpus
163
+ TORCH_CUDA_ARCH_LIST="7.5 8.0" python setup.py install
164
+ cd ../..
165
+
166
+ # Open3D (visualization, optional)
167
+ pip install open3d
168
+ ```
169
+
170
+ ## Data Preparation
171
+
172
+ ### ScanNet v2
173
+
174
+ The preprocessing supports semantic and instance segmentation for both `ScanNet20`, `ScanNet200`, and `ScanNet Data Efficient`.
175
+ - Download the [ScanNet](http://www.scan-net.org/) v2 dataset.
176
+ - Run preprocessing code for raw ScanNet as follows:
177
+
178
+ ```bash
179
+ # RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset.
180
+ # PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset (output dir).
181
+ python pointcept/datasets/preprocessing/scannet/preprocess_scannet.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_DIR}
182
+ ```
183
+ - (Optional) Download ScanNet Data Efficient files:
184
+ ```bash
185
+ # download-scannet.py is the official download script
186
+ # or follow instructions here: https://kaldir.vc.in.tum.de/scannet_benchmark/data_efficient/documentation#download
187
+ python download-scannet.py --data_efficient -o ${RAW_SCANNET_DIR}
188
+ # unzip downloads
189
+ cd ${RAW_SCANNET_DIR}/tasks
190
+ unzip limited-annotation-points.zip
191
+ unzip limited-reconstruction-scenes.zip
192
+ # copy files to processed dataset folder
193
+ mkdir ${PROCESSED_SCANNET_DIR}/tasks
194
+ cp -r ${RAW_SCANNET_DIR}/tasks/points ${PROCESSED_SCANNET_DIR}/tasks
195
+ cp -r ${RAW_SCANNET_DIR}/tasks/scenes ${PROCESSED_SCANNET_DIR}/tasks
196
+ ```
197
+ - (Alternative) Our preprocess data can be directly downloaded [[here](https://huggingface.co/datasets/Pointcept/scannet-compressed)], please agree the official license before download it.
198
+
199
+ - Link processed dataset to codebase:
200
+ ```bash
201
+ # PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset.
202
+ mkdir data
203
+ ln -s ${PROCESSED_SCANNET_DIR} ${CODEBASE_DIR}/data/scannet
204
+ ```
205
+
206
+ ### ScanNet++
207
+ - Download the [ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) dataset.
208
+ - Run preprocessing code for raw ScanNet++ as follows:
209
+ ```bash
210
+ # RAW_SCANNETPP_DIR: the directory of downloaded ScanNet++ raw dataset.
211
+ # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir).
212
+ # NUM_WORKERS: the number of workers for parallel preprocessing.
213
+ python pointcept/datasets/preprocessing/scannetpp/preprocess_scannetpp.py --dataset_root ${RAW_SCANNETPP_DIR} --output_root ${PROCESSED_SCANNETPP_DIR} --num_workers ${NUM_WORKERS}
214
+ ```
215
+ - Sampling and chunking large point cloud data in train/val split as follows (only used for training):
216
+ ```bash
217
+ # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir).
218
+ # NUM_WORKERS: the number of workers for parallel preprocessing.
219
+ python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split train --num_workers ${NUM_WORKERS}
220
+ python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split val --num_workers ${NUM_WORKERS}
221
+ ```
222
+ - (Alternative) Our preprocess data can be directly downloaded [[here](https://huggingface.co/datasets/Pointcept/scannetpp-compressed)], please agree the official license before download it.
223
+ - Link processed dataset to codebase:
224
+ ```bash
225
+ # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet dataset.
226
+ mkdir data
227
+ ln -s ${PROCESSED_SCANNETPP_DIR} ${CODEBASE_DIR}/data/scannetpp
228
+ ```
229
+
230
+ ### S3DIS
231
+
232
+ - Download S3DIS data by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1). Download the `Stanford3dDataset_v1.2.zip` file and unzip it.
233
+ - Fix error in `Area_5/office_19/Annotations/ceiling` Line 323474 (103.0�0000 => 103.000000).
234
+ - (Optional) Download Full 2D-3D S3DIS dataset (no XYZ) from [here](https://github.com/alexsax/2D-3D-Semantics) for parsing normal.
235
+ - Run preprocessing code for S3DIS as follows:
236
+
237
+ ```bash
238
+ # S3DIS_DIR: the directory of downloaded Stanford3dDataset_v1.2 dataset.
239
+ # RAW_S3DIS_DIR: the directory of Stanford2d3dDataset_noXYZ dataset. (optional, for parsing normal)
240
+ # PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset (output dir).
241
+
242
+ # S3DIS without aligned angle
243
+ python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR}
244
+ # S3DIS with aligned angle
245
+ python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --align_angle
246
+ # S3DIS with normal vector (recommended, normal is helpful)
247
+ python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --parse_normal
248
+ python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --align_angle --parse_normal
249
+ ```
250
+
251
+ - (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/s3dis-compressed
252
+ )] (with normal vector and aligned angle), please agree with the official license before downloading it.
253
+
254
+ - Link processed dataset to codebase.
255
+ ```bash
256
+ # PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset.
257
+ mkdir data
258
+ ln -s ${PROCESSED_S3DIS_DIR} ${CODEBASE_DIR}/data/s3dis
259
+ ```
260
+ ### Structured3D
261
+
262
+ - Download Structured3D panorama related and perspective (full) related zip files by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLSc0qtvh4vHSoZaW6UvlXYy79MbcGdZfICjh4_t4bYofQIVIdw/viewform?pli=1) (no need to unzip them).
263
+ - Organize all downloaded zip file in one folder (`${STRUCT3D_DIR}`).
264
+ - Run preprocessing code for Structured3D as follows:
265
+ ```bash
266
+ # STRUCT3D_DIR: the directory of downloaded Structured3D dataset.
267
+ # PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir).
268
+ # NUM_WORKERS: Number for workers for preprocessing, default same as cpu count (might OOM).
269
+ export PYTHONPATH=./
270
+ python pointcept/datasets/preprocessing/structured3d/preprocess_structured3d.py --dataset_root ${STRUCT3D_DIR} --output_root ${PROCESSED_STRUCT3D_DIR} --num_workers ${NUM_WORKERS} --grid_size 0.01 --fuse_prsp --fuse_pano
271
+ ```
272
+ Following the instruction of [Swin3D](https://arxiv.org/abs/2304.06906), we keep 25 categories with frequencies of more than 0.001, out of the original 40 categories.
273
+
274
+ [//]: # (- &#40;Alternative&#41; Our preprocess data can also be downloaded [[here]&#40;&#41;], please agree the official license before download it.)
275
+
276
+ - (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/structured3d-compressed
277
+ )] (with perspective views and panorama view, 471.7G after unzipping), please agree the official license before download it.
278
+
279
+ - Link processed dataset to codebase.
280
+ ```bash
281
+ # PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir).
282
+ mkdir data
283
+ ln -s ${PROCESSED_STRUCT3D_DIR} ${CODEBASE_DIR}/data/structured3d
284
+ ```
285
+ ### Matterport3D
286
+ - Follow [this page](https://niessner.github.io/Matterport/#download) to request access to the dataset.
287
+ - Download the "region_segmentation" type, which represents the division of a scene into individual rooms.
288
+ ```bash
289
+ # download-mp.py is the official download script
290
+ # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset.
291
+ python download-mp.py -o {MATTERPORT3D_DIR} --type region_segmentations
292
+ ```
293
+ - Unzip the region_segmentations data
294
+ ```bash
295
+ # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset.
296
+ python pointcept/datasets/preprocessing/matterport3d/unzip_matterport3d_region_segmentation.py --dataset_root {MATTERPORT3D_DIR}
297
+ ```
298
+ - Run preprocessing code for Matterport3D as follows:
299
+ ```bash
300
+ # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset.
301
+ # PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir).
302
+ # NUM_WORKERS: the number of workers for this preprocessing.
303
+ python pointcept/datasets/preprocessing/matterport3d/preprocess_matterport3d_mesh.py --dataset_root ${MATTERPORT3D_DIR} --output_root ${PROCESSED_MATTERPORT3D_DIR} --num_workers ${NUM_WORKERS}
304
+ ```
305
+ - Link processed dataset to codebase.
306
+ ```bash
307
+ # PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir).
308
+ mkdir data
309
+ ln -s ${PROCESSED_MATTERPORT3D_DIR} ${CODEBASE_DIR}/data/matterport3d
310
+ ```
311
+
312
+ Following the instruction of [OpenRooms](https://github.com/ViLab-UCSD/OpenRooms), we remapped Matterport3D's categories to ScanNet 20 semantic categories with the addition of a ceiling category.
313
+ * (Alternative) Our preprocess data can also be downloaded [here](https://huggingface.co/datasets/Pointcept/matterport3d-compressed), please agree the official license before download it.
314
+
315
+ ### SemanticKITTI
316
+ - Download [SemanticKITTI](http://www.semantic-kitti.org/dataset.html#download) dataset.
317
+ - Link dataset to codebase.
318
+ ```bash
319
+ # SEMANTIC_KITTI_DIR: the directory of SemanticKITTI dataset.
320
+ # |- SEMANTIC_KITTI_DIR
321
+ # |- dataset
322
+ # |- sequences
323
+ # |- 00
324
+ # |- 01
325
+ # |- ...
326
+
327
+ mkdir -p data
328
+ ln -s ${SEMANTIC_KITTI_DIR} ${CODEBASE_DIR}/data/semantic_kitti
329
+ ```
330
+
331
+ ### nuScenes
332
+ - Download the official [NuScene](https://www.nuscenes.org/nuscenes#download) dataset (with Lidar Segmentation) and organize the downloaded files as follows:
333
+ ```bash
334
+ NUSCENES_DIR
335
+ │── samples
336
+ │── sweeps
337
+ │── lidarseg
338
+ ...
339
+ │── v1.0-trainval
340
+ │── v1.0-test
341
+ ```
342
+ - Run information preprocessing code (modified from OpenPCDet) for nuScenes as follows:
343
+ ```bash
344
+ # NUSCENES_DIR: the directory of downloaded nuScenes dataset.
345
+ # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir).
346
+ # MAX_SWEEPS: Max number of sweeps. Default: 10.
347
+ pip install nuscenes-devkit pyquaternion
348
+ python pointcept/datasets/preprocessing/nuscenes/preprocess_nuscenes_info.py --dataset_root ${NUSCENES_DIR} --output_root ${PROCESSED_NUSCENES_DIR} --max_sweeps ${MAX_SWEEPS} --with_camera
349
+ ```
350
+ - (Alternative) Our preprocess nuScenes information data can also be downloaded [[here](
351
+ https://huggingface.co/datasets/Pointcept/nuscenes-compressed)] (only processed information, still need to download raw dataset and link to the folder), please agree the official license before download it.
352
+
353
+ - Link raw dataset to processed NuScene dataset folder:
354
+ ```bash
355
+ # NUSCENES_DIR: the directory of downloaded nuScenes dataset.
356
+ # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir).
357
+ ln -s ${NUSCENES_DIR} {PROCESSED_NUSCENES_DIR}/raw
358
+ ```
359
+ then the processed nuscenes folder is organized as follows:
360
+ ```bash
361
+ nuscene
362
+ |── raw
363
+ │── samples
364
+ │── sweeps
365
+ │── lidarseg
366
+ ...
367
+ │── v1.0-trainval
368
+ │── v1.0-test
369
+ |── info
370
+ ```
371
+
372
+ - Link processed dataset to codebase.
373
+ ```bash
374
+ # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir).
375
+ mkdir data
376
+ ln -s ${PROCESSED_NUSCENES_DIR} ${CODEBASE_DIR}/data/nuscenes
377
+ ```
378
+
379
+ ### Waymo
380
+ - Download the official [Waymo](https://waymo.com/open/download/) dataset (v1.4.3) and organize the downloaded files as follows:
381
+ ```bash
382
+ WAYMO_RAW_DIR
383
+ │── training
384
+ │── validation
385
+ │── testing
386
+ ```
387
+ - Install the following dependence:
388
+ ```bash
389
+ # If shows "No matching distribution found", download whl directly from Pypi and install the package.
390
+ conda create -n waymo python=3.10 -y
391
+ conda activate waymo
392
+ pip install waymo-open-dataset-tf-2-12-0
393
+ ```
394
+ - Run the preprocessing code as follows:
395
+ ```bash
396
+ # WAYMO_DIR: the directory of the downloaded Waymo dataset.
397
+ # PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir).
398
+ # NUM_WORKERS: num workers for preprocessing
399
+ python pointcept/datasets/preprocessing/waymo/preprocess_waymo.py --dataset_root ${WAYMO_DIR} --output_root ${PROCESSED_WAYMO_DIR} --splits training validation --num_workers ${NUM_WORKERS}
400
+ ```
401
+
402
+ - Link processed dataset to the codebase.
403
+ ```bash
404
+ # PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir).
405
+ mkdir data
406
+ ln -s ${PROCESSED_WAYMO_DIR} ${CODEBASE_DIR}/data/waymo
407
+ ```
408
+
409
+ ### ModelNet
410
+ - Download [modelnet40_normal_resampled.zip](https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip) and unzip
411
+ - Link dataset to the codebase.
412
+ ```bash
413
+ mkdir -p data
414
+ ln -s ${MODELNET_DIR} ${CODEBASE_DIR}/data/modelnet40_normal_resampled
415
+ ```
416
+
417
+ ## Quick Start
418
+
419
+ ### Training
420
+ **Train from scratch.** The training processing is based on configs in `configs` folder.
421
+ The training script will generate an experiment folder in `exp` folder and backup essential code in the experiment folder.
422
+ Training config, log, tensorboard, and checkpoints will also be saved into the experiment folder during the training process.
423
+ ```bash
424
+ export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
425
+ # Script (Recommended)
426
+ sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME}
427
+ # Direct
428
+ export PYTHONPATH=./
429
+ python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH}
430
+ ```
431
+
432
+ For example:
433
+ ```bash
434
+ # By script (Recommended)
435
+ # -p is default set as python and can be ignored
436
+ sh scripts/train.sh -p python -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
437
+ # Direct
438
+ export PYTHONPATH=./
439
+ python tools/train.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base
440
+ ```
441
+ **Resume training from checkpoint.** If the training process is interrupted by accident, the following script can resume training from a given checkpoint.
442
+ ```bash
443
+ export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
444
+ # Script (Recommended)
445
+ # simply add "-r true"
446
+ sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME} -r true
447
+ # Direct
448
+ export PYTHONPATH=./
449
+ python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} resume=True weight=${CHECKPOINT_PATH}
450
+ ```
451
+
452
+ ### Testing
453
+ During training, model evaluation is performed on point clouds after grid sampling (voxelization), providing an initial assessment of model performance. However, to obtain precise evaluation results, testing is **essential**. The testing process involves subsampling a dense point cloud into a sequence of voxelized point clouds, ensuring comprehensive coverage of all points. These sub-results are then predicted and collected to form a complete prediction of the entire point cloud. This approach yields higher evaluation results compared to simply mapping/interpolating the prediction. In addition, our testing code supports TTA (test time augmentation) testing, which further enhances the stability of evaluation performance.
454
+
455
+ ```bash
456
+ # By script (Based on experiment folder created by training script)
457
+ sh scripts/test.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -n ${EXP_NAME} -w ${CHECKPOINT_NAME}
458
+ # Direct
459
+ export PYTHONPATH=./
460
+ python tools/test.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} weight=${CHECKPOINT_PATH}
461
+ ```
462
+ For example:
463
+ ```bash
464
+ # By script (Based on experiment folder created by training script)
465
+ # -p is default set as python and can be ignored
466
+ # -w is default set as model_best and can be ignored
467
+ sh scripts/test.sh -p python -d scannet -n semseg-pt-v2m2-0-base -w model_best
468
+ # Direct
469
+ export PYTHONPATH=./
470
+ python tools/test.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base weight=exp/scannet/semseg-pt-v2m2-0-base/model/model_best.pth
471
+ ```
472
+
473
+ The TTA can be disabled by replace `data.test.test_cfg.aug_transform = [...]` with:
474
+
475
+ ```python
476
+ data = dict(
477
+ train = dict(...),
478
+ val = dict(...),
479
+ test = dict(
480
+ ...,
481
+ test_cfg = dict(
482
+ ...,
483
+ aug_transform = [
484
+ [dict(type="RandomRotateTargetAngle", angle=[0], axis="z", center=[0, 0, 0], p=1)]
485
+ ]
486
+ )
487
+ )
488
+ )
489
+ ```
490
+
491
+ ### Offset
492
+ `Offset` is the separator of point clouds in batch data, and it is similar to the concept of `Batch` in PyG.
493
+ A visual illustration of batch and offset is as follows:
494
+ <p align="center">
495
+ <!-- pypi-strip -->
496
+ <picture>
497
+ <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/offset_dark.png">
498
+ <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/offset.png">
499
+ <!-- /pypi-strip -->
500
+ <img alt="pointcept" src="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/offset.png" width="480">
501
+ <!-- pypi-strip -->
502
+ </picture><br>
503
+ <!-- /pypi-strip -->
504
+ </p>
505
+
506
+ ## Model Zoo
507
+ ### 1. Backbones and Semantic Segmentation
508
+ #### SparseUNet
509
+
510
+ _Pointcept_ provides `SparseUNet` implemented by `SpConv` and `MinkowskiEngine`. The SpConv version is recommended since SpConv is easy to install and faster than MinkowskiEngine. Meanwhile, SpConv is also widely applied in outdoor perception.
511
+
512
+ - **SpConv (recommend)**
513
+
514
+ The SpConv version `SparseUNet` in the codebase was fully rewrite from `MinkowskiEngine` version, example running script is as follows:
515
+
516
+ ```bash
517
+ # ScanNet val
518
+ sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
519
+ # ScanNet200
520
+ sh scripts/train.sh -g 4 -d scannet200 -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
521
+ # S3DIS
522
+ sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
523
+ # S3DIS (with normal)
524
+ sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-cn-base -n semseg-spunet-v1m1-0-cn-base
525
+ # SemanticKITTI
526
+ sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
527
+ # nuScenes
528
+ sh scripts/train.sh -g 4 -d nuscenes -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
529
+ # ModelNet40
530
+ sh scripts/train.sh -g 2 -d modelnet40 -c cls-spunet-v1m1-0-base -n cls-spunet-v1m1-0-base
531
+
532
+ # ScanNet Data Efficient
533
+ sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la20 -n semseg-spunet-v1m1-2-efficient-la20
534
+ sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la50 -n semseg-spunet-v1m1-2-efficient-la50
535
+ sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la100 -n semseg-spunet-v1m1-2-efficient-la100
536
+ sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la200 -n semseg-spunet-v1m1-2-efficient-la200
537
+ sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr1 -n semseg-spunet-v1m1-2-efficient-lr1
538
+ sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr5 -n semseg-spunet-v1m1-2-efficient-lr5
539
+ sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr10 -n semseg-spunet-v1m1-2-efficient-lr10
540
+ sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr20 -n semseg-spunet-v1m1-2-efficient-lr20
541
+
542
+ # Profile model run time
543
+ sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-enable-profiler -n semseg-spunet-v1m1-0-enable-profiler
544
+ ```
545
+
546
+ - **MinkowskiEngine**
547
+
548
+ The MinkowskiEngine version `SparseUNet` in the codebase was modified from the original MinkowskiEngine repo, and example running scripts are as follows:
549
+ 1. Install MinkowskiEngine, refer https://github.com/NVIDIA/MinkowskiEngine
550
+ 2. Training with the following example scripts:
551
+ ```bash
552
+ # Uncomment "# from .sparse_unet import *" in "pointcept/models/__init__.py"
553
+ # Uncomment "# from .mink_unet import *" in "pointcept/models/sparse_unet/__init__.py"
554
+ # ScanNet
555
+ sh scripts/train.sh -g 4 -d scannet -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
556
+ # ScanNet200
557
+ sh scripts/train.sh -g 4 -d scannet200 -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
558
+ # S3DIS
559
+ sh scripts/train.sh -g 4 -d s3dis -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
560
+ # SemanticKITTI
561
+ sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
562
+ ```
563
+
564
+ #### OA-CNNs
565
+ Introducing Omni-Adaptive 3D CNNs (**OA-CNNs**), a family of networks that integrates a lightweight module to greatly enhance the adaptivity of sparse CNNs at minimal computational cost. Without any self-attention modules, **OA-CNNs** favorably surpass point transformers in terms of accuracy in both indoor and outdoor scenes, with much less latency and memory cost. Issue related to **OA-CNNs** can @Pbihao.
566
+ ```bash
567
+ # ScanNet
568
+ sh scripts/train.sh -g 4 -d scannet -c semseg-oacnns-v1m1-0-base -n semseg-oacnns-v1m1-0-base
569
+ ```
570
+
571
+ #### Point Transformers
572
+ - **PTv3**
573
+
574
+ [PTv3](https://arxiv.org/abs/2312.10035) is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios. The full PTv3 relies on FlashAttention, while FlashAttention relies on CUDA 11.6 and above, make sure your local Pointcept environment satisfies the requirements.
575
+
576
+ If you can not upgrade your local environment to satisfy the requirements (CUDA >= 11.6), then you can disable FlashAttention by setting the model parameter `enable_flash` to `false` and reducing the `enc_patch_size` and `dec_patch_size` to a level (e.g. 128).
577
+
578
+ FlashAttention force disables RPE and forces the accuracy reduced to fp16. If you require these features, please disable `enable_flash` and adjust `enable_rpe`, `upcast_attention` and`upcast_softmax`.
579
+
580
+ Detailed instructions and experiment records (containing weights) are available on the [project repository](https://github.com/Pointcept/PointTransformerV3). Example running scripts are as follows:
581
+ ```bash
582
+ # Scratched ScanNet
583
+ sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
584
+ # PPT joint training (ScanNet + Structured3D) and evaluate in ScanNet
585
+ sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme
586
+
587
+ # Scratched ScanNet200
588
+ sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
589
+ # Fine-tuning from PPT joint training (ScanNet + Structured3D) with ScanNet200
590
+ # PTV3_PPT_WEIGHT_PATH: Path to model weight trained by PPT multi-dataset joint training
591
+ # e.g. exp/scannet/semseg-pt-v3m1-1-ppt-extreme/model/model_best.pth
592
+ sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-1-ppt-ft -n semseg-pt-v3m1-1-ppt-ft -w ${PTV3_PPT_WEIGHT_PATH}
593
+
594
+ # Scratched ScanNet++
595
+ sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
596
+ # Scratched ScanNet++ test
597
+ sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-1-submit -n semseg-pt-v3m1-1-submit
598
+
599
+
600
+ # Scratched S3DIS
601
+ sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
602
+ # an example for disbale flash_attention and enable rpe.
603
+ sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-1-rpe -n semseg-pt-v3m1-0-rpe
604
+ # PPT joint training (ScanNet + S3DIS + Structured3D) and evaluate in ScanNet
605
+ sh scripts/train.sh -g 8 -d s3dis -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme
606
+ # S3DIS 6-fold cross validation
607
+ # 1. The default configs are evaluated on Area_5, modify the "data.train.split", "data.val.split", and "data.test.split" to make the config evaluated on Area_1 ~ Area_6 respectively.
608
+ # 2. Train and evaluate the model on each split of areas and gather result files located in "exp/s3dis/EXP_NAME/result/Area_x.pth" in one single folder, noted as RECORD_FOLDER.
609
+ # 3. Run the following script to get S3DIS 6-fold cross validation performance:
610
+ export PYTHONPATH=./
611
+ python tools/test_s3dis_6fold.py --record_root ${RECORD_FOLDER}
612
+
613
+ # Scratched nuScenes
614
+ sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
615
+ # Scratched Waymo
616
+ sh scripts/train.sh -g 4 -d waymo -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
617
+
618
+ # More configs and exp records for PTv3 will be available soon.
619
+ ```
620
+
621
+ Indoor semantic segmentation
622
+ | Model | Benchmark | Additional Data | Num GPUs | Val mIoU | Config | Tensorboard | Exp Record |
623
+ | :---: | :---: |:---------------:| :---: | :---: | :---: | :---: | :---: |
624
+ | PTv3 | ScanNet | &cross; | 4 | 77.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-0-base) |
625
+ | PTv3 + PPT | ScanNet | &check; | 8 | 78.5% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-1-ppt-extreme) |
626
+ | PTv3 | ScanNet200 | &cross; | 4 | 35.3% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet200/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) |[link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet200-semseg-pt-v3m1-0-base)|
627
+ | PTv3 + PPT | ScanNet200 | &check; (f.t.) | 4 | | | | |
628
+ | PTv3 | S3DIS (Area5) | &cross; | 4 | 73.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-0-rpe.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-0-rpe) |
629
+ | PTv3 + PPT | S3DIS (Area5) | &check; | 8 | 75.4% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-1-ppt-extreme) |
630
+
631
+ Outdoor semantic segmentation
632
+ | Model | Benchmark | Additional Data | Num GPUs | Val mIoU | Config | Tensorboard | Exp Record |
633
+ | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
634
+ | PTv3 | nuScenes | &cross; | 4 | 80.3 | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/nuscenes/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard)|[link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/nuscenes-semseg-pt-v3m1-0-base) |
635
+ | PTv3 + PPT | nuScenes | &check; | 8 | | | | |
636
+ | PTv3 | SemanticKITTI | &cross; | 4 | | | | |
637
+ | PTv3 + PPT | SemanticKITTI | &check; | 8 | | | | |
638
+ | PTv3 | Waymo | &cross; | 4 | 71.2 | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/waymo/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/waymo-semseg-pt-v3m1-0-base) (log only) |
639
+ | PTv3 + PPT | Waymo | &check; | 8 | | | | |
640
+
641
+ _**\*Released model weights are trained for v1.5.1, weights for v1.5.2 and later is still ongoing.**_
642
+
643
+ - **PTv2 mode2**
644
+
645
+ The original PTv2 was trained on 4 * RTX a6000 (48G memory). Even enabling AMP, the memory cost of the original PTv2 is slightly larger than 24G. Considering GPUs with 24G memory are much more accessible, I tuned the PTv2 on the latest Pointcept and made it runnable on 4 * RTX 3090 machines.
646
+
647
+ `PTv2 Mode2` enables AMP and disables _Position Encoding Multiplier_ & _Grouped Linear_. During our further research, we found that precise coordinates are not necessary for point cloud understanding (Replacing precise coordinates with grid coordinates doesn't influence the performance. Also, SparseUNet is an example). As for Grouped Linear, my implementation of Grouped Linear seems to cost more memory than the Linear layer provided by PyTorch. Benefiting from the codebase and better parameter tuning, we also relieve the overfitting problem. The reproducing performance is even better than the results reported in our paper.
648
+
649
+ Example running scripts are as follows:
650
+
651
+ ```bash
652
+ # ptv2m2: PTv2 mode2, disable PEM & Grouped Linear, GPU memory cost < 24G (recommend)
653
+ # ScanNet
654
+ sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
655
+ sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-3-lovasz -n semseg-pt-v2m2-3-lovasz
656
+
657
+ # ScanNet test
658
+ sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit
659
+ # ScanNet200
660
+ sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
661
+ # ScanNet++
662
+ sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
663
+ # ScanNet++ test
664
+ sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit
665
+ # S3DIS
666
+ sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
667
+ # SemanticKITTI
668
+ sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
669
+ # nuScenes
670
+ sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
671
+ ```
672
+
673
+ - **PTv2 mode1**
674
+
675
+ `PTv2 mode1` is the original PTv2 we reported in our paper, example running scripts are as follows:
676
+
677
+ ```bash
678
+ # ptv2m1: PTv2 mode1, Original PTv2, GPU memory cost > 24G
679
+ # ScanNet
680
+ sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base
681
+ # ScanNet200
682
+ sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base
683
+ # S3DIS
684
+ sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base
685
+ ```
686
+
687
+ - **PTv1**
688
+
689
+ The original PTv1 is also available in our Pointcept codebase. I haven't run PTv1 for a long time, but I have ensured that the example running script works well.
690
+
691
+ ```bash
692
+ # ScanNet
693
+ sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base
694
+ # ScanNet200
695
+ sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base
696
+ # S3DIS
697
+ sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base
698
+ ```
699
+
700
+
701
+ #### Stratified Transformer
702
+ 1. Additional requirements:
703
+ ```bash
704
+ pip install torch-points3d
705
+ # Fix dependence, caused by installing torch-points3d
706
+ pip uninstall SharedArray
707
+ pip install SharedArray==3.2.1
708
+
709
+ cd libs/pointops2
710
+ python setup.py install
711
+ cd ../..
712
+ ```
713
+ 2. Uncomment `# from .stratified_transformer import *` in `pointcept/models/__init__.py`.
714
+ 3. Refer [Optional Installation](installation) to install dependence.
715
+ 4. Training with the following example scripts:
716
+ ```bash
717
+ # stv1m1: Stratified Transformer mode1, Modified from the original Stratified Transformer code.
718
+ # PTv2m2: Stratified Transformer mode2, My rewrite version (recommend).
719
+
720
+ # ScanNet
721
+ sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined
722
+ sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m1-0-origin -n semseg-st-v1m1-0-origin
723
+ # ScanNet200
724
+ sh scripts/train.sh -g 4 -d scannet200 -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined
725
+ # S3DIS
726
+ sh scripts/train.sh -g 4 -d s3dis -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined
727
+ ```
728
+
729
+ #### SPVCNN
730
+ `SPVCNN` is a baseline model of [SPVNAS](https://github.com/mit-han-lab/spvnas), it is also a practical baseline for outdoor datasets.
731
+ 1. Install torchsparse:
732
+ ```bash
733
+ # refer https://github.com/mit-han-lab/torchsparse
734
+ # install method without sudo apt install
735
+ conda install google-sparsehash -c bioconda
736
+ export C_INCLUDE_PATH=${CONDA_PREFIX}/include:$C_INCLUDE_PATH
737
+ export CPLUS_INCLUDE_PATH=${CONDA_PREFIX}/include:CPLUS_INCLUDE_PATH
738
+ pip install --upgrade git+https://github.com/mit-han-lab/torchsparse.git
739
+ ```
740
+ 2. Training with the following example scripts:
741
+ ```bash
742
+ # SemanticKITTI
743
+ sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-spvcnn-v1m1-0-base -n semseg-spvcnn-v1m1-0-base
744
+ ```
745
+
746
+ #### OctFormer
747
+ OctFormer from _OctFormer: Octree-based Transformers for 3D Point Clouds_.
748
+ 1. Additional requirements:
749
+ ```bash
750
+ cd libs
751
+ git clone https://github.com/octree-nn/dwconv.git
752
+ pip install ./dwconv
753
+ pip install ocnn
754
+ ```
755
+ 2. Uncomment `# from .octformer import *` in `pointcept/models/__init__.py`.
756
+ 2. Training with the following example scripts:
757
+ ```bash
758
+ # ScanNet
759
+ sh scripts/train.sh -g 4 -d scannet -c semseg-octformer-v1m1-0-base -n semseg-octformer-v1m1-0-base
760
+ ```
761
+
762
+ #### Swin3D
763
+ Swin3D from _Swin3D: A Pretrained Transformer Backbone for 3D Indoor Scene Understanding_.
764
+ 1. Additional requirements:
765
+ ```bash
766
+ # 1. Install MinkEngine v0.5.4, follow readme in https://github.com/NVIDIA/MinkowskiEngine;
767
+ # 2. Install Swin3D, mainly for cuda operation:
768
+ cd libs
769
+ git clone https://github.com/microsoft/Swin3D.git
770
+ cd Swin3D
771
+ pip install ./
772
+ ```
773
+ 2. Uncomment `# from .swin3d import *` in `pointcept/models/__init__.py`.
774
+ 3. Pre-Training with the following example scripts (Structured3D preprocessing refer [here](#structured3d)):
775
+ ```bash
776
+ # Structured3D + Swin-S
777
+ sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small
778
+ # Structured3D + Swin-L
779
+ sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large
780
+
781
+ # Addition
782
+ # Structured3D + SpUNet
783
+ sh scripts/train.sh -g 4 -d structured3d -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
784
+ # Structured3D + PTv2
785
+ sh scripts/train.sh -g 4 -d structured3d -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
786
+ ```
787
+ 4. Fine-tuning with the following example scripts:
788
+ ```bash
789
+ # ScanNet + Swin-S
790
+ sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small
791
+ # ScanNet + Swin-L
792
+ sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large
793
+
794
+ # S3DIS + Swin-S (here we provide config support S3DIS normal vector)
795
+ sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small
796
+ # S3DIS + Swin-L (here we provide config support S3DIS normal vector)
797
+ sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large
798
+ ```
799
+
800
+ #### Context-Aware Classifier
801
+ `Context-Aware Classifier` is a segmentor that can further boost the performance of each backbone, as a replacement for `Default Segmentor`. Training with the following example scripts:
802
+ ```bash
803
+ # ScanNet
804
+ sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base
805
+ sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz
806
+ sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz
807
+
808
+ # ScanNet200
809
+ sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base
810
+ sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz
811
+ sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz
812
+ ```
813
+
814
+
815
+ ### 2. Instance Segmentation
816
+ #### PointGroup
817
+ [PointGroup](https://github.com/dvlab-research/PointGroup) is a baseline framework for point cloud instance segmentation.
818
+ 1. Additional requirements:
819
+ ```bash
820
+ conda install -c bioconda google-sparsehash
821
+ cd libs/pointgroup_ops
822
+ python setup.py install --include_dirs=${CONDA_PREFIX}/include
823
+ cd ../..
824
+ ```
825
+ 2. Uncomment `# from .point_group import *` in `pointcept/models/__init__.py`.
826
+ 3. Training with the following example scripts:
827
+ ```bash
828
+ # ScanNet
829
+ sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base
830
+ # S3DIS
831
+ sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base
832
+ ```
833
+
834
+ ### 3. Pre-training
835
+ #### Masked Scene Contrast (MSC)
836
+ 1. Pre-training with the following example scripts:
837
+ ```bash
838
+ # ScanNet
839
+ sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-0-spunet-base -n pretrain-msc-v1m1-0-spunet-base
840
+ ```
841
+
842
+ 2. Fine-tuning with the following example scripts:
843
+ enable PointGroup ([here](#pointgroup)) before fine-tuning on instance segmentation task.
844
+ ```bash
845
+ # ScanNet20 Semantic Segmentation
846
+ sh scripts/train.sh -g 8 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c semseg-spunet-v1m1-4-ft -n semseg-msc-v1m1-0f-spunet-base
847
+ # ScanNet20 Instance Segmentation (enable PointGroup before running the script)
848
+ sh scripts/train.sh -g 4 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-msc-v1m1-0f-pointgroup-spunet-base
849
+ ```
850
+ 3. Example log and weight: [[Pretrain](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EYvNV4XUJ_5Mlk-g15RelN4BW_P8lVBfC_zhjC_BlBDARg?e=UoGFWH)] [[Semseg](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EQkDiv5xkOFKgCpGiGtAlLwBon7i8W6my3TIbGVxuiTttQ?e=tQFnbr)]
851
+
852
+ #### Point Prompt Training (PPT)
853
+ PPT presents a multi-dataset pre-training framework, and it is compatible with various existing pre-training frameworks and backbones.
854
+ 1. PPT supervised joint training with the following example scripts:
855
+ ```bash
856
+ # ScanNet + Structured3d, validate on ScanNet (S3DIS might cause long data time, w/o S3DIS for a quick validation) >= 3090 * 8
857
+ sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-0-sc-st-spunet -n semseg-ppt-v1m1-0-sc-st-spunet
858
+ sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-1-sc-st-spunet-submit -n semseg-ppt-v1m1-1-sc-st-spunet-submit
859
+ # ScanNet + S3DIS + Structured3d, validate on S3DIS (>= a100 * 8)
860
+ sh scripts/train.sh -g 8 -d s3dis -c semseg-ppt-v1m1-0-s3-sc-st-spunet -n semseg-ppt-v1m1-0-s3-sc-st-spunet
861
+ # SemanticKITTI + nuScenes + Waymo, validate on SemanticKITTI (bs12 >= 3090 * 4 >= 3090 * 8, v1m1-0 is still on tuning)
862
+ sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet
863
+ sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-0-sk-nu-wa-spunet -n semseg-ppt-v1m2-0-sk-nu-wa-spunet
864
+ sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit -n semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit
865
+ # SemanticKITTI + nuScenes + Waymo, validate on nuScenes (bs12 >= 3090 * 4; bs24 >= 3090 * 8, v1m1-0 is still on tuning))
866
+ sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet
867
+ sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-0-nu-sk-wa-spunet -n semseg-ppt-v1m2-0-nu-sk-wa-spunet
868
+ sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit -n semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit
869
+ ```
870
+
871
+ #### PointContrast
872
+ 1. Preprocess and link ScanNet-Pair dataset (pair-wise matching with ScanNet raw RGB-D frame, ~1.5T):
873
+ ```bash
874
+ # RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset.
875
+ # PROCESSED_SCANNET_PAIR_DIR: the directory of processed ScanNet pair dataset (output dir).
876
+ python pointcept/datasets/preprocessing/scannet/scannet_pair/preprocess.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_PAIR_DIR}
877
+ ln -s ${PROCESSED_SCANNET_PAIR_DIR} ${CODEBASE_DIR}/data/scannet
878
+ ```
879
+ 2. Pre-training with the following example scripts:
880
+ ```bash
881
+ # ScanNet
882
+ sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-1-spunet-pointcontrast -n pretrain-msc-v1m1-1-spunet-pointcontrast
883
+ ```
884
+ 3. Fine-tuning refer [MSC](#masked-scene-contrast-msc).
885
+
886
+ #### Contrastive Scene Contexts
887
+ 1. Preprocess and link ScanNet-Pair dataset (refer [PointContrast](#pointcontrast)):
888
+ 2. Pre-training with the following example scripts:
889
+ ```bash
890
+ # ScanNet
891
+ sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m2-0-spunet-csc -n pretrain-msc-v1m2-0-spunet-csc
892
+ ```
893
+ 3. Fine-tuning refer [MSC](#masked-scene-contrast-msc).
894
+
895
+ ## Acknowledgement
896
+ _Pointcept_ is designed by [Xiaoyang](https://xywu.me/), named by [Yixing](https://github.com/yxlao) and the logo is created by [Yuechen](https://julianjuaner.github.io/). It is derived from [Hengshuang](https://hszhao.github.io/)'s [Semseg](https://github.com/hszhao/semseg) and inspirited by several repos, e.g., [MinkowskiEngine](https://github.com/NVIDIA/MinkowskiEngine), [pointnet2](https://github.com/charlesq34/pointnet2), [mmcv](https://github.com/open-mmlab/mmcv/tree/master/mmcv), and [Detectron2](https://github.com/facebookresearch/detectron2).
Pointcept/configs/_base_/dataset/scannetpp.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data = dict(
2
+ names=[
3
+ "wall",
4
+ "ceiling",
5
+ "floor",
6
+ "table",
7
+ "door",
8
+ "ceiling lamp",
9
+ "cabinet",
10
+ "blinds",
11
+ "curtain",
12
+ "chair",
13
+ "storage cabinet",
14
+ "office chair",
15
+ "bookshelf",
16
+ "whiteboard",
17
+ "window",
18
+ "box",
19
+ "window frame",
20
+ "monitor",
21
+ "shelf",
22
+ "doorframe",
23
+ "pipe",
24
+ "heater",
25
+ "kitchen cabinet",
26
+ "sofa",
27
+ "windowsill",
28
+ "bed",
29
+ "shower wall",
30
+ "trash can",
31
+ "book",
32
+ "plant",
33
+ "blanket",
34
+ "tv",
35
+ "computer tower",
36
+ "kitchen counter",
37
+ "refrigerator",
38
+ "jacket",
39
+ "electrical duct",
40
+ "sink",
41
+ "bag",
42
+ "picture",
43
+ "pillow",
44
+ "towel",
45
+ "suitcase",
46
+ "backpack",
47
+ "crate",
48
+ "keyboard",
49
+ "rack",
50
+ "toilet",
51
+ "paper",
52
+ "printer",
53
+ "poster",
54
+ "painting",
55
+ "microwave",
56
+ "board",
57
+ "shoes",
58
+ "socket",
59
+ "bottle",
60
+ "bucket",
61
+ "cushion",
62
+ "basket",
63
+ "shoe rack",
64
+ "telephone",
65
+ "file folder",
66
+ "cloth",
67
+ "blind rail",
68
+ "laptop",
69
+ "plant pot",
70
+ "exhaust fan",
71
+ "cup",
72
+ "coat hanger",
73
+ "light switch",
74
+ "speaker",
75
+ "table lamp",
76
+ "air vent",
77
+ "clothes hanger",
78
+ "kettle",
79
+ "smoke detector",
80
+ "container",
81
+ "power strip",
82
+ "slippers",
83
+ "paper bag",
84
+ "mouse",
85
+ "cutting board",
86
+ "toilet paper",
87
+ "paper towel",
88
+ "pot",
89
+ "clock",
90
+ "pan",
91
+ "tap",
92
+ "jar",
93
+ "soap dispenser",
94
+ "binder",
95
+ "bowl",
96
+ "tissue box",
97
+ "whiteboard eraser",
98
+ "toilet brush",
99
+ "spray bottle",
100
+ "headphones",
101
+ "stapler",
102
+ "marker",
103
+ ]
104
+ )
Pointcept/configs/_base_/default_runtime.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weight = None # path to model weight
2
+ resume = False # whether to resume training process
3
+ evaluate = True # evaluate after each epoch training process
4
+ test_only = False # test process
5
+
6
+ seed = None # train process will init a random seed and record
7
+ save_path = "exp/default"
8
+ num_worker = 16 # total worker in all gpu
9
+ batch_size = 16 # total batch size in all gpu
10
+ batch_size_val = None # auto adapt to bs 1 for each gpu
11
+ batch_size_test = None # auto adapt to bs 1 for each gpu
12
+ epoch = 100 # total epoch, data loop = epoch // eval_epoch
13
+ eval_epoch = 100 # sche total eval & checkpoint epoch
14
+ clip_grad = None # disable with None, enable with a float
15
+
16
+ sync_bn = False
17
+ enable_amp = False
18
+ empty_cache = False
19
+ empty_cache_per_epoch = False
20
+ find_unused_parameters = False
21
+
22
+ mix_prob = 0
23
+ param_dicts = None # example: param_dicts = [dict(keyword="block", lr_scale=0.1)]
24
+
25
+ # hook
26
+ hooks = [
27
+ dict(type="CheckpointLoader"),
28
+ dict(type="IterationTimer", warmup_iter=2),
29
+ dict(type="InformationWriter"),
30
+ dict(type="SemSegEvaluator"),
31
+ dict(type="CheckpointSaver", save_freq=None),
32
+ dict(type="PreciseEvaluator", test_last=False),
33
+ ]
34
+
35
+ # Trainer
36
+ train = dict(type="DefaultTrainer")
37
+
38
+ # Tester
39
+ test = dict(type="SemSegTester", verbose=True)
Pointcept/configs/matterport3d/semseg-pt-v3m1-0-base.py ADDED
@@ -0,0 +1,313 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ num_worker = 24
6
+ mix_prob = 0.8
7
+ empty_cache = False
8
+ enable_amp = True
9
+
10
+ # model settings
11
+ model = dict(
12
+ type="DefaultSegmentorV2",
13
+ num_classes=21,
14
+ backbone_out_channels=64,
15
+ backbone=dict(
16
+ type="PT-v3m1",
17
+ in_channels=6,
18
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
19
+ stride=(2, 2, 2, 2),
20
+ enc_depths=(2, 2, 2, 6, 2),
21
+ enc_channels=(32, 64, 128, 256, 512),
22
+ enc_num_head=(2, 4, 8, 16, 32),
23
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
24
+ dec_depths=(2, 2, 2, 2),
25
+ dec_channels=(64, 64, 128, 256),
26
+ dec_num_head=(4, 4, 8, 16),
27
+ dec_patch_size=(1024, 1024, 1024, 1024),
28
+ mlp_ratio=4,
29
+ qkv_bias=True,
30
+ qk_scale=None,
31
+ attn_drop=0.0,
32
+ proj_drop=0.0,
33
+ drop_path=0.3,
34
+ shuffle_orders=True,
35
+ pre_norm=True,
36
+ enable_rpe=False,
37
+ enable_flash=True,
38
+ upcast_attention=False,
39
+ upcast_softmax=False,
40
+ cls_mode=False,
41
+ pdnorm_bn=False,
42
+ pdnorm_ln=False,
43
+ pdnorm_decouple=True,
44
+ pdnorm_adaptive=False,
45
+ pdnorm_affine=True,
46
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
47
+ ),
48
+ criteria=[
49
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
50
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
51
+ ],
52
+ )
53
+
54
+ # scheduler settings
55
+ epoch = 800
56
+ optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
57
+ scheduler = dict(
58
+ type="OneCycleLR",
59
+ max_lr=[0.006, 0.0006],
60
+ pct_start=0.05,
61
+ anneal_strategy="cos",
62
+ div_factor=10.0,
63
+ final_div_factor=1000.0,
64
+ )
65
+ param_dicts = [dict(keyword="block", lr=0.0006)]
66
+
67
+ # dataset settings
68
+ dataset_type = "DefaultDataset"
69
+ data_root = "data/matterport3d"
70
+
71
+ data = dict(
72
+ num_classes=21,
73
+ ignore_index=-1,
74
+ names=(
75
+ "wall",
76
+ "floor",
77
+ "cabinet",
78
+ "bed",
79
+ "chair",
80
+ "sofa",
81
+ "table",
82
+ "door",
83
+ "window",
84
+ "bookshelf",
85
+ "picture",
86
+ "counter",
87
+ "desk",
88
+ "curtain",
89
+ "refrigerator",
90
+ "shower curtain",
91
+ "toilet",
92
+ "sink",
93
+ "bathtub",
94
+ "other",
95
+ "ceiling",
96
+ ),
97
+ train=dict(
98
+ type=dataset_type,
99
+ split="train",
100
+ data_root=data_root,
101
+ transform=[
102
+ dict(type="CenterShift", apply_z=True),
103
+ dict(
104
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
105
+ ),
106
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
107
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
108
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
109
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
110
+ dict(type="RandomScale", scale=[0.9, 1.1]),
111
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
112
+ dict(type="RandomFlip", p=0.5),
113
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
114
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
115
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
116
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
117
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
118
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
119
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
120
+ dict(
121
+ type="GridSample",
122
+ grid_size=0.02,
123
+ hash_type="fnv",
124
+ mode="train",
125
+ return_grid_coord=True,
126
+ ),
127
+ dict(type="SphereCrop", point_max=102400, mode="random"),
128
+ dict(type="CenterShift", apply_z=False),
129
+ dict(type="NormalizeColor"),
130
+ # dict(type="ShufflePoint"),
131
+ dict(type="ToTensor"),
132
+ dict(
133
+ type="Collect",
134
+ keys=("coord", "grid_coord", "segment"),
135
+ feat_keys=("color", "normal"),
136
+ ),
137
+ ],
138
+ test_mode=False,
139
+ ),
140
+ val=dict(
141
+ type=dataset_type,
142
+ split="val",
143
+ data_root=data_root,
144
+ transform=[
145
+ dict(type="CenterShift", apply_z=True),
146
+ dict(
147
+ type="GridSample",
148
+ grid_size=0.02,
149
+ hash_type="fnv",
150
+ mode="train",
151
+ return_grid_coord=True,
152
+ ),
153
+ dict(type="CenterShift", apply_z=False),
154
+ dict(type="NormalizeColor"),
155
+ dict(type="ToTensor"),
156
+ dict(
157
+ type="Collect",
158
+ keys=("coord", "grid_coord", "segment"),
159
+ feat_keys=("color", "normal"),
160
+ ),
161
+ ],
162
+ test_mode=False,
163
+ ),
164
+ test=dict(
165
+ type=dataset_type,
166
+ split="val",
167
+ data_root=data_root,
168
+ transform=[
169
+ dict(type="CenterShift", apply_z=True),
170
+ dict(type="NormalizeColor"),
171
+ ],
172
+ test_mode=True,
173
+ test_cfg=dict(
174
+ voxelize=dict(
175
+ type="GridSample",
176
+ grid_size=0.02,
177
+ hash_type="fnv",
178
+ mode="test",
179
+ keys=("coord", "color", "normal"),
180
+ return_grid_coord=True,
181
+ ),
182
+ crop=None,
183
+ post_transform=[
184
+ dict(type="CenterShift", apply_z=False),
185
+ dict(type="ToTensor"),
186
+ dict(
187
+ type="Collect",
188
+ keys=("coord", "grid_coord", "index"),
189
+ feat_keys=("color", "normal"),
190
+ ),
191
+ ],
192
+ aug_transform=[
193
+ [
194
+ dict(
195
+ type="RandomRotateTargetAngle",
196
+ angle=[0],
197
+ axis="z",
198
+ center=[0, 0, 0],
199
+ p=1,
200
+ )
201
+ ],
202
+ [
203
+ dict(
204
+ type="RandomRotateTargetAngle",
205
+ angle=[1 / 2],
206
+ axis="z",
207
+ center=[0, 0, 0],
208
+ p=1,
209
+ )
210
+ ],
211
+ [
212
+ dict(
213
+ type="RandomRotateTargetAngle",
214
+ angle=[1],
215
+ axis="z",
216
+ center=[0, 0, 0],
217
+ p=1,
218
+ )
219
+ ],
220
+ [
221
+ dict(
222
+ type="RandomRotateTargetAngle",
223
+ angle=[3 / 2],
224
+ axis="z",
225
+ center=[0, 0, 0],
226
+ p=1,
227
+ )
228
+ ],
229
+ [
230
+ dict(
231
+ type="RandomRotateTargetAngle",
232
+ angle=[0],
233
+ axis="z",
234
+ center=[0, 0, 0],
235
+ p=1,
236
+ ),
237
+ dict(type="RandomScale", scale=[0.95, 0.95]),
238
+ ],
239
+ [
240
+ dict(
241
+ type="RandomRotateTargetAngle",
242
+ angle=[1 / 2],
243
+ axis="z",
244
+ center=[0, 0, 0],
245
+ p=1,
246
+ ),
247
+ dict(type="RandomScale", scale=[0.95, 0.95]),
248
+ ],
249
+ [
250
+ dict(
251
+ type="RandomRotateTargetAngle",
252
+ angle=[1],
253
+ axis="z",
254
+ center=[0, 0, 0],
255
+ p=1,
256
+ ),
257
+ dict(type="RandomScale", scale=[0.95, 0.95]),
258
+ ],
259
+ [
260
+ dict(
261
+ type="RandomRotateTargetAngle",
262
+ angle=[3 / 2],
263
+ axis="z",
264
+ center=[0, 0, 0],
265
+ p=1,
266
+ ),
267
+ dict(type="RandomScale", scale=[0.95, 0.95]),
268
+ ],
269
+ [
270
+ dict(
271
+ type="RandomRotateTargetAngle",
272
+ angle=[0],
273
+ axis="z",
274
+ center=[0, 0, 0],
275
+ p=1,
276
+ ),
277
+ dict(type="RandomScale", scale=[1.05, 1.05]),
278
+ ],
279
+ [
280
+ dict(
281
+ type="RandomRotateTargetAngle",
282
+ angle=[1 / 2],
283
+ axis="z",
284
+ center=[0, 0, 0],
285
+ p=1,
286
+ ),
287
+ dict(type="RandomScale", scale=[1.05, 1.05]),
288
+ ],
289
+ [
290
+ dict(
291
+ type="RandomRotateTargetAngle",
292
+ angle=[1],
293
+ axis="z",
294
+ center=[0, 0, 0],
295
+ p=1,
296
+ ),
297
+ dict(type="RandomScale", scale=[1.05, 1.05]),
298
+ ],
299
+ [
300
+ dict(
301
+ type="RandomRotateTargetAngle",
302
+ angle=[3 / 2],
303
+ axis="z",
304
+ center=[0, 0, 0],
305
+ p=1,
306
+ ),
307
+ dict(type="RandomScale", scale=[1.05, 1.05]),
308
+ ],
309
+ [dict(type="RandomFlip", p=1)],
310
+ ],
311
+ ),
312
+ ),
313
+ )
Pointcept/configs/matterport3d/semseg-spunet-v1m1-0-base.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ mix_prob = 0.8
6
+ empty_cache = False
7
+ enable_amp = True
8
+
9
+ # model settings
10
+ model = dict(
11
+ type="DefaultSegmentor",
12
+ backbone=dict(
13
+ type="SpUNet-v1m1",
14
+ in_channels=6,
15
+ num_classes=21,
16
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
17
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
18
+ ),
19
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
20
+ )
21
+
22
+
23
+ # scheduler settings
24
+ epoch = 800
25
+ optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
26
+ scheduler = dict(
27
+ type="OneCycleLR",
28
+ max_lr=optimizer["lr"],
29
+ pct_start=0.05,
30
+ anneal_strategy="cos",
31
+ div_factor=10.0,
32
+ final_div_factor=10000.0,
33
+ )
34
+
35
+ # dataset settings
36
+ dataset_type = "DefaultDataset"
37
+ data_root = "data/matterport3d"
38
+
39
+ data = dict(
40
+ num_classes=21,
41
+ ignore_index=-1,
42
+ names=(
43
+ "wall",
44
+ "floor",
45
+ "cabinet",
46
+ "bed",
47
+ "chair",
48
+ "sofa",
49
+ "table",
50
+ "door",
51
+ "window",
52
+ "bookshelf",
53
+ "picture",
54
+ "counter",
55
+ "desk",
56
+ "curtain",
57
+ "refrigerator",
58
+ "shower curtain",
59
+ "toilet",
60
+ "sink",
61
+ "bathtub",
62
+ "other",
63
+ "ceiling",
64
+ ),
65
+ train=dict(
66
+ type=dataset_type,
67
+ split="train",
68
+ data_root=data_root,
69
+ transform=[
70
+ dict(type="CenterShift", apply_z=True),
71
+ dict(
72
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
73
+ ),
74
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
75
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
76
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
77
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
78
+ dict(type="RandomScale", scale=[0.9, 1.1]),
79
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
80
+ dict(type="RandomFlip", p=0.5),
81
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
82
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
83
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
84
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
85
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
86
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
87
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
88
+ dict(
89
+ type="GridSample",
90
+ grid_size=0.02,
91
+ hash_type="fnv",
92
+ mode="train",
93
+ return_grid_coord=True,
94
+ ),
95
+ dict(type="SphereCrop", point_max=100000, mode="random"),
96
+ dict(type="CenterShift", apply_z=False),
97
+ dict(type="NormalizeColor"),
98
+ dict(type="ShufflePoint"),
99
+ dict(type="ToTensor"),
100
+ dict(
101
+ type="Collect",
102
+ keys=("coord", "grid_coord", "segment"),
103
+ feat_keys=("color", "normal"),
104
+ ),
105
+ ],
106
+ test_mode=False,
107
+ ),
108
+ val=dict(
109
+ type=dataset_type,
110
+ split="val",
111
+ data_root=data_root,
112
+ transform=[
113
+ dict(type="CenterShift", apply_z=True),
114
+ dict(
115
+ type="GridSample",
116
+ grid_size=0.02,
117
+ hash_type="fnv",
118
+ mode="train",
119
+ return_grid_coord=True,
120
+ ),
121
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
122
+ dict(type="CenterShift", apply_z=False),
123
+ dict(type="NormalizeColor"),
124
+ dict(type="ToTensor"),
125
+ dict(
126
+ type="Collect",
127
+ keys=("coord", "grid_coord", "segment"),
128
+ feat_keys=("color", "normal"),
129
+ ),
130
+ ],
131
+ test_mode=False,
132
+ ),
133
+ test=dict(
134
+ type=dataset_type,
135
+ split="val",
136
+ data_root=data_root,
137
+ transform=[
138
+ dict(type="CenterShift", apply_z=True),
139
+ dict(type="NormalizeColor"),
140
+ ],
141
+ test_mode=True,
142
+ test_cfg=dict(
143
+ voxelize=dict(
144
+ type="GridSample",
145
+ grid_size=0.02,
146
+ hash_type="fnv",
147
+ mode="test",
148
+ return_grid_coord=True,
149
+ keys=("coord", "color", "normal"),
150
+ ),
151
+ crop=None,
152
+ post_transform=[
153
+ dict(type="CenterShift", apply_z=False),
154
+ dict(type="ToTensor"),
155
+ dict(
156
+ type="Collect",
157
+ keys=("coord", "grid_coord", "index"),
158
+ feat_keys=("color", "normal"),
159
+ ),
160
+ ],
161
+ aug_transform=[
162
+ [
163
+ dict(
164
+ type="RandomRotateTargetAngle",
165
+ angle=[0],
166
+ axis="z",
167
+ center=[0, 0, 0],
168
+ p=1,
169
+ )
170
+ ],
171
+ [
172
+ dict(
173
+ type="RandomRotateTargetAngle",
174
+ angle=[1 / 2],
175
+ axis="z",
176
+ center=[0, 0, 0],
177
+ p=1,
178
+ )
179
+ ],
180
+ [
181
+ dict(
182
+ type="RandomRotateTargetAngle",
183
+ angle=[1],
184
+ axis="z",
185
+ center=[0, 0, 0],
186
+ p=1,
187
+ )
188
+ ],
189
+ [
190
+ dict(
191
+ type="RandomRotateTargetAngle",
192
+ angle=[3 / 2],
193
+ axis="z",
194
+ center=[0, 0, 0],
195
+ p=1,
196
+ )
197
+ ],
198
+ [
199
+ dict(
200
+ type="RandomRotateTargetAngle",
201
+ angle=[0],
202
+ axis="z",
203
+ center=[0, 0, 0],
204
+ p=1,
205
+ ),
206
+ dict(type="RandomScale", scale=[0.95, 0.95]),
207
+ ],
208
+ [
209
+ dict(
210
+ type="RandomRotateTargetAngle",
211
+ angle=[1 / 2],
212
+ axis="z",
213
+ center=[0, 0, 0],
214
+ p=1,
215
+ ),
216
+ dict(type="RandomScale", scale=[0.95, 0.95]),
217
+ ],
218
+ [
219
+ dict(
220
+ type="RandomRotateTargetAngle",
221
+ angle=[1],
222
+ axis="z",
223
+ center=[0, 0, 0],
224
+ p=1,
225
+ ),
226
+ dict(type="RandomScale", scale=[0.95, 0.95]),
227
+ ],
228
+ [
229
+ dict(
230
+ type="RandomRotateTargetAngle",
231
+ angle=[3 / 2],
232
+ axis="z",
233
+ center=[0, 0, 0],
234
+ p=1,
235
+ ),
236
+ dict(type="RandomScale", scale=[0.95, 0.95]),
237
+ ],
238
+ [
239
+ dict(
240
+ type="RandomRotateTargetAngle",
241
+ angle=[0],
242
+ axis="z",
243
+ center=[0, 0, 0],
244
+ p=1,
245
+ ),
246
+ dict(type="RandomScale", scale=[1.05, 1.05]),
247
+ ],
248
+ [
249
+ dict(
250
+ type="RandomRotateTargetAngle",
251
+ angle=[1 / 2],
252
+ axis="z",
253
+ center=[0, 0, 0],
254
+ p=1,
255
+ ),
256
+ dict(type="RandomScale", scale=[1.05, 1.05]),
257
+ ],
258
+ [
259
+ dict(
260
+ type="RandomRotateTargetAngle",
261
+ angle=[1],
262
+ axis="z",
263
+ center=[0, 0, 0],
264
+ p=1,
265
+ ),
266
+ dict(type="RandomScale", scale=[1.05, 1.05]),
267
+ ],
268
+ [
269
+ dict(
270
+ type="RandomRotateTargetAngle",
271
+ angle=[3 / 2],
272
+ axis="z",
273
+ center=[0, 0, 0],
274
+ p=1,
275
+ ),
276
+ dict(type="RandomScale", scale=[1.05, 1.05]),
277
+ ],
278
+ [dict(type="RandomFlip", p=1)],
279
+ ],
280
+ ),
281
+ ),
282
+ )
Pointcept/configs/modelnet40/cls-ptv3-v1m1-0-base.py ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+ # misc custom setting
3
+ batch_size = 32 # bs: total bs in all gpus
4
+ num_worker = 16
5
+ batch_size_val = 8
6
+ empty_cache = False
7
+ enable_amp = False
8
+
9
+ # model settings
10
+ model = dict(
11
+ type="DefaultClassifier",
12
+ num_classes=40,
13
+ backbone_embed_dim=512,
14
+ backbone=dict(
15
+ type="PT-v3m1",
16
+ in_channels=6,
17
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
18
+ stride=(2, 2, 2, 2),
19
+ enc_depths=(2, 2, 2, 6, 2),
20
+ enc_channels=(32, 64, 128, 256, 512),
21
+ enc_num_head=(2, 4, 8, 16, 32),
22
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
23
+ dec_depths=(2, 2, 2, 2),
24
+ dec_channels=(64, 64, 128, 256),
25
+ dec_num_head=(4, 4, 8, 16),
26
+ dec_patch_size=(1024, 1024, 1024, 1024),
27
+ mlp_ratio=4,
28
+ qkv_bias=True,
29
+ qk_scale=None,
30
+ attn_drop=0.0,
31
+ proj_drop=0.0,
32
+ drop_path=0.3,
33
+ shuffle_orders=True,
34
+ pre_norm=True,
35
+ enable_rpe=False,
36
+ enable_flash=True,
37
+ upcast_attention=False,
38
+ upcast_softmax=False,
39
+ cls_mode=True,
40
+ pdnorm_bn=False,
41
+ pdnorm_ln=False,
42
+ pdnorm_decouple=True,
43
+ pdnorm_adaptive=False,
44
+ pdnorm_affine=True,
45
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
46
+ ),
47
+ criteria=[
48
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
49
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
50
+ ],
51
+ )
52
+
53
+ # scheduler settings
54
+ epoch = 300
55
+ # optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
56
+ # scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
57
+ optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.01)
58
+ scheduler = dict(
59
+ type="OneCycleLR",
60
+ max_lr=[0.001, 0.0001],
61
+ pct_start=0.05,
62
+ anneal_strategy="cos",
63
+ div_factor=10.0,
64
+ final_div_factor=1000.0,
65
+ )
66
+ param_dicts = [dict(keyword="block", lr=0.0001)]
67
+
68
+ # dataset settings
69
+ dataset_type = "ModelNetDataset"
70
+ data_root = "data/modelnet40_normal_resampled"
71
+ cache_data = False
72
+ class_names = [
73
+ "airplane",
74
+ "bathtub",
75
+ "bed",
76
+ "bench",
77
+ "bookshelf",
78
+ "bottle",
79
+ "bowl",
80
+ "car",
81
+ "chair",
82
+ "cone",
83
+ "cup",
84
+ "curtain",
85
+ "desk",
86
+ "door",
87
+ "dresser",
88
+ "flower_pot",
89
+ "glass_box",
90
+ "guitar",
91
+ "keyboard",
92
+ "lamp",
93
+ "laptop",
94
+ "mantel",
95
+ "monitor",
96
+ "night_stand",
97
+ "person",
98
+ "piano",
99
+ "plant",
100
+ "radio",
101
+ "range_hood",
102
+ "sink",
103
+ "sofa",
104
+ "stairs",
105
+ "stool",
106
+ "table",
107
+ "tent",
108
+ "toilet",
109
+ "tv_stand",
110
+ "vase",
111
+ "wardrobe",
112
+ "xbox",
113
+ ]
114
+
115
+ data = dict(
116
+ num_classes=40,
117
+ ignore_index=-1,
118
+ names=class_names,
119
+ train=dict(
120
+ type=dataset_type,
121
+ split="train",
122
+ data_root=data_root,
123
+ class_names=class_names,
124
+ transform=[
125
+ dict(type="NormalizeCoord"),
126
+ # dict(type="CenterShift", apply_z=True),
127
+ # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
128
+ # dict(type="RandomRotate", angle=[-1/24, 1/24], axis="x", p=0.5),
129
+ # dict(type="RandomRotate", angle=[-1/24, 1/24], axis="y", p=0.5),
130
+ dict(type="RandomScale", scale=[0.7, 1.5], anisotropic=True),
131
+ dict(type="RandomShift", shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
132
+ # dict(type="RandomFlip", p=0.5),
133
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
134
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
135
+ dict(
136
+ type="GridSample",
137
+ grid_size=0.01,
138
+ hash_type="fnv",
139
+ mode="train",
140
+ keys=("coord", "normal"),
141
+ return_grid_coord=True,
142
+ ),
143
+ # dict(type="SphereCrop", point_max=10000, mode="random"),
144
+ # dict(type="CenterShift", apply_z=True),
145
+ dict(type="ShufflePoint"),
146
+ dict(type="ToTensor"),
147
+ dict(
148
+ type="Collect",
149
+ keys=("coord", "grid_coord", "category"),
150
+ feat_keys=["coord", "normal"],
151
+ ),
152
+ ],
153
+ test_mode=False,
154
+ ),
155
+ val=dict(
156
+ type=dataset_type,
157
+ split="test",
158
+ data_root=data_root,
159
+ class_names=class_names,
160
+ transform=[
161
+ dict(type="NormalizeCoord"),
162
+ dict(
163
+ type="GridSample",
164
+ grid_size=0.01,
165
+ hash_type="fnv",
166
+ mode="train",
167
+ keys=("coord", "normal"),
168
+ return_grid_coord=True,
169
+ ),
170
+ dict(type="ToTensor"),
171
+ dict(
172
+ type="Collect",
173
+ keys=("coord", "grid_coord", "category"),
174
+ feat_keys=["coord", "normal"],
175
+ ),
176
+ ],
177
+ test_mode=False,
178
+ ),
179
+ test=dict(
180
+ type=dataset_type,
181
+ split="test",
182
+ data_root=data_root,
183
+ class_names=class_names,
184
+ transform=[
185
+ dict(type="NormalizeCoord"),
186
+ ],
187
+ test_mode=True,
188
+ test_cfg=dict(
189
+ post_transform=[
190
+ dict(
191
+ type="GridSample",
192
+ grid_size=0.01,
193
+ hash_type="fnv",
194
+ mode="train",
195
+ keys=("coord", "normal"),
196
+ return_grid_coord=True,
197
+ ),
198
+ dict(type="ToTensor"),
199
+ dict(
200
+ type="Collect",
201
+ keys=("coord", "grid_coord"),
202
+ feat_keys=["coord", "normal"],
203
+ ),
204
+ ],
205
+ aug_transform=[
206
+ [dict(type="RandomScale", scale=[1, 1], anisotropic=True)], # 1
207
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 2
208
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 3
209
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 4
210
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 5
211
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 5
212
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 6
213
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 7
214
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 8
215
+ [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 9
216
+ ],
217
+ ),
218
+ ),
219
+ )
220
+
221
+ # hooks
222
+ hooks = [
223
+ dict(type="CheckpointLoader"),
224
+ dict(type="IterationTimer", warmup_iter=2),
225
+ dict(type="InformationWriter"),
226
+ dict(type="ClsEvaluator"),
227
+ dict(type="CheckpointSaver", save_freq=None),
228
+ dict(type="PreciseEvaluator", test_last=False),
229
+ ]
230
+
231
+ # tester
232
+ test = dict(type="ClsVotingTester", num_repeat=100)
Pointcept/configs/modelnet40/cls-spunet-v1m1-0-base.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+ # misc custom setting
3
+ batch_size = 16 # bs: total bs in all gpus
4
+ # batch_size_val = 8
5
+ empty_cache = False
6
+ enable_amp = False
7
+
8
+ # model settings
9
+ model = dict(
10
+ type="DefaultClassifier",
11
+ num_classes=40,
12
+ backbone_embed_dim=256,
13
+ backbone=dict(
14
+ type="SpUNet-v1m1",
15
+ in_channels=6,
16
+ num_classes=0,
17
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
18
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
19
+ cls_mode=True,
20
+ ),
21
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
22
+ )
23
+
24
+ # scheduler settings
25
+ epoch = 200
26
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
27
+ scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
28
+
29
+ # dataset settings
30
+ dataset_type = "ModelNetDataset"
31
+ data_root = "data/modelnet40_normal_resampled"
32
+ cache_data = False
33
+ class_names = [
34
+ "airplane",
35
+ "bathtub",
36
+ "bed",
37
+ "bench",
38
+ "bookshelf",
39
+ "bottle",
40
+ "bowl",
41
+ "car",
42
+ "chair",
43
+ "cone",
44
+ "cup",
45
+ "curtain",
46
+ "desk",
47
+ "door",
48
+ "dresser",
49
+ "flower_pot",
50
+ "glass_box",
51
+ "guitar",
52
+ "keyboard",
53
+ "lamp",
54
+ "laptop",
55
+ "mantel",
56
+ "monitor",
57
+ "night_stand",
58
+ "person",
59
+ "piano",
60
+ "plant",
61
+ "radio",
62
+ "range_hood",
63
+ "sink",
64
+ "sofa",
65
+ "stairs",
66
+ "stool",
67
+ "table",
68
+ "tent",
69
+ "toilet",
70
+ "tv_stand",
71
+ "vase",
72
+ "wardrobe",
73
+ "xbox",
74
+ ]
75
+
76
+ data = dict(
77
+ num_classes=40,
78
+ ignore_index=-1,
79
+ names=class_names,
80
+ train=dict(
81
+ type=dataset_type,
82
+ split="train",
83
+ data_root=data_root,
84
+ class_names=class_names,
85
+ transform=[
86
+ dict(type="NormalizeCoord"),
87
+ # dict(type="CenterShift", apply_z=True),
88
+ # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
89
+ # dict(type="RandomRotate", angle=[-1/24, 1/24], axis="x", p=0.5),
90
+ # dict(type="RandomRotate", angle=[-1/24, 1/24], axis="y", p=0.5),
91
+ dict(type="RandomScale", scale=[0.9, 1.1]),
92
+ dict(type="RandomShift", shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
93
+ # dict(type="RandomFlip", p=0.5),
94
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
95
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
96
+ dict(
97
+ type="GridSample",
98
+ grid_size=0.01,
99
+ hash_type="fnv",
100
+ mode="train",
101
+ keys=("coord", "normal"),
102
+ return_grid_coord=True,
103
+ ),
104
+ # dict(type="SphereCrop", point_max=10000, mode="random"),
105
+ # dict(type="CenterShift", apply_z=True),
106
+ dict(type="ShufflePoint"),
107
+ dict(type="ToTensor"),
108
+ dict(
109
+ type="Collect",
110
+ keys=("coord", "grid_coord", "category"),
111
+ feat_keys=["coord", "normal"],
112
+ ),
113
+ ],
114
+ test_mode=False,
115
+ ),
116
+ val=dict(
117
+ type=dataset_type,
118
+ split="test",
119
+ data_root=data_root,
120
+ class_names=class_names,
121
+ transform=[
122
+ dict(type="NormalizeCoord"),
123
+ dict(
124
+ type="GridSample",
125
+ grid_size=0.01,
126
+ hash_type="fnv",
127
+ mode="train",
128
+ keys=("coord", "normal"),
129
+ return_grid_coord=True,
130
+ ),
131
+ dict(type="ToTensor"),
132
+ dict(
133
+ type="Collect",
134
+ keys=("coord", "grid_coord", "category"),
135
+ feat_keys=["coord", "normal"],
136
+ ),
137
+ ],
138
+ test_mode=False,
139
+ ),
140
+ test=dict(
141
+ type=dataset_type,
142
+ split="test",
143
+ data_root=data_root,
144
+ class_names=class_names,
145
+ transform=[
146
+ dict(type="NormalizeCoord"),
147
+ dict(
148
+ type="GridSample",
149
+ grid_size=0.01,
150
+ hash_type="fnv",
151
+ mode="train",
152
+ keys=("coord", "normal"),
153
+ return_grid_coord=True,
154
+ ),
155
+ dict(type="ToTensor"),
156
+ dict(
157
+ type="Collect",
158
+ keys=("coord", "grid_coord", "category"),
159
+ feat_keys=["coord", "normal"],
160
+ ),
161
+ ],
162
+ test_mode=True,
163
+ ),
164
+ )
165
+
166
+ # hooks
167
+ hooks = [
168
+ dict(type="CheckpointLoader"),
169
+ dict(type="IterationTimer", warmup_iter=2),
170
+ dict(type="InformationWriter"),
171
+ dict(type="ClsEvaluator"),
172
+ dict(type="CheckpointSaver", save_freq=None),
173
+ ]
174
+
175
+ # tester
176
+ test = dict(type="ClsTester")
Pointcept/configs/nuscenes/semseg-ppt-v1m1-0-nu-sk-wa-spunet.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ num_worker = 24
6
+ mix_prob = 0.8
7
+ empty_cache = False
8
+ enable_amp = True
9
+ find_unused_parameters = True
10
+
11
+ # trainer
12
+ train = dict(
13
+ type="MultiDatasetTrainer",
14
+ )
15
+
16
+ # model settings
17
+ model = dict(
18
+ type="PPT-v1m1",
19
+ backbone=dict(
20
+ type="SpUNet-v1m3",
21
+ in_channels=4,
22
+ num_classes=0,
23
+ base_channels=32,
24
+ context_channels=256,
25
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
26
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
27
+ cls_mode=False,
28
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
29
+ zero_init=False,
30
+ norm_decouple=True,
31
+ norm_adaptive=False,
32
+ norm_affine=True,
33
+ ),
34
+ criteria=[
35
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
36
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
37
+ ],
38
+ backbone_out_channels=96,
39
+ context_channels=256,
40
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
41
+ template="[x]",
42
+ clip_model="ViT-B/16",
43
+ # fmt: off
44
+ class_name=(
45
+ # SemanticKITTI
46
+ "car", "bicycle", "motorcycle", "truck", "other vehicle",
47
+ "person", "person who rides a bicycle", "person who rides a motorcycle", "road", "parking",
48
+ "path for pedestrians at the side of a road", "other ground", "building", "fence", "vegetation",
49
+ "trunk", "terrain", "pole", "traffic sign",
50
+ # nuScenes
51
+ "barrier", "bicycle", "bus", "car", "construction vehicle",
52
+ "motorcycle", "pedestrian", "traffic cone", "trailer", "truck",
53
+ "path suitable or safe for driving", "other flat", "sidewalk", "terrain", "man made", "vegetation",
54
+ # waymo
55
+ "car", "truck", "bus", "other vehicle", "person who rides a motorcycle",
56
+ "person who rides a bicycle", "pedestrian", "sign", "traffic light", "pole",
57
+ "construction cone", "bicycle", "motorcycle", "building", "vegetation",
58
+ "tree trunk", "curb", "road", "lane marker", "other ground", "horizontal surface that can not drive",
59
+ "surface when pedestrians most likely to walk on",
60
+ ),
61
+ valid_index=(
62
+ [i for i in range(19)],
63
+ [i for i in range(19, 19 + 16)],
64
+ [i for i in range(19 + 16, 19 + 16 + 22)],
65
+ ),
66
+ # fmt: on
67
+ backbone_mode=False,
68
+ )
69
+
70
+ # scheduler settings
71
+ epoch = 50
72
+ eval_epoch = 50
73
+ optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
74
+ scheduler = dict(
75
+ type="OneCycleLR",
76
+ max_lr=optimizer["lr"],
77
+ pct_start=0.04,
78
+ anneal_strategy="cos",
79
+ div_factor=10.0,
80
+ final_div_factor=100.0,
81
+ )
82
+ # param_dicts = [dict(keyword="modulation", lr=0.0002)]
83
+
84
+ # dataset settings
85
+ data = dict(
86
+ num_classes=16,
87
+ ignore_index=-1,
88
+ names=[
89
+ "barrier",
90
+ "bicycle",
91
+ "bus",
92
+ "car",
93
+ "construction_vehicle",
94
+ "motorcycle",
95
+ "pedestrian",
96
+ "traffic_cone",
97
+ "trailer",
98
+ "truck",
99
+ "driveable_surface",
100
+ "other_flat",
101
+ "sidewalk",
102
+ "terrain",
103
+ "manmade",
104
+ "vegetation",
105
+ ],
106
+ train=dict(
107
+ type="ConcatDataset",
108
+ datasets=[
109
+ # nuScenes
110
+ dict(
111
+ type="NuScenesDataset",
112
+ split="train",
113
+ data_root="data/nuscenes",
114
+ transform=[
115
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
116
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
117
+ dict(
118
+ type="RandomRotate",
119
+ angle=[-1, 1],
120
+ axis="z",
121
+ center=[0, 0, 0],
122
+ p=0.5,
123
+ ),
124
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
125
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
126
+ dict(
127
+ type="PointClip",
128
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
129
+ ),
130
+ dict(type="RandomScale", scale=[0.9, 1.1]),
131
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
132
+ dict(type="RandomFlip", p=0.5),
133
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
134
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
135
+ dict(
136
+ type="GridSample",
137
+ grid_size=0.05,
138
+ hash_type="fnv",
139
+ mode="train",
140
+ keys=("coord", "strength", "segment"),
141
+ return_grid_coord=True,
142
+ ),
143
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
144
+ # dict(type="CenterShift", apply_z=False),
145
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
146
+ dict(type="ToTensor"),
147
+ dict(
148
+ type="Collect",
149
+ keys=("coord", "grid_coord", "segment", "condition"),
150
+ feat_keys=("coord", "strength"),
151
+ ),
152
+ ],
153
+ test_mode=False,
154
+ ignore_index=-1,
155
+ loop=1,
156
+ ),
157
+ # SemanticKITTI
158
+ dict(
159
+ type="SemanticKITTIDataset",
160
+ split="train",
161
+ data_root="data/semantic_kitti",
162
+ transform=[
163
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
164
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
165
+ dict(
166
+ type="RandomRotate",
167
+ angle=[-1, 1],
168
+ axis="z",
169
+ center=[0, 0, 0],
170
+ p=0.5,
171
+ ),
172
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
173
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
174
+ dict(
175
+ type="PointClip",
176
+ point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
177
+ ),
178
+ dict(type="RandomScale", scale=[0.9, 1.1]),
179
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
180
+ dict(type="RandomFlip", p=0.5),
181
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
182
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
183
+ dict(
184
+ type="GridSample",
185
+ grid_size=0.05,
186
+ hash_type="fnv",
187
+ mode="train",
188
+ keys=("coord", "strength", "segment"),
189
+ return_grid_coord=True,
190
+ ),
191
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
192
+ # dict(type="CenterShift", apply_z=False),
193
+ dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
194
+ dict(type="ToTensor"),
195
+ dict(
196
+ type="Collect",
197
+ keys=("coord", "grid_coord", "segment", "condition"),
198
+ feat_keys=("coord", "strength"),
199
+ ),
200
+ ],
201
+ test_mode=False,
202
+ ignore_index=-1,
203
+ loop=1,
204
+ ),
205
+ # Waymo
206
+ dict(
207
+ type="WaymoDataset",
208
+ split="training",
209
+ data_root="data/waymo",
210
+ transform=[
211
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
212
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
213
+ dict(
214
+ type="RandomRotate",
215
+ angle=[-1, 1],
216
+ axis="z",
217
+ center=[0, 0, 0],
218
+ p=0.5,
219
+ ),
220
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
221
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
222
+ dict(
223
+ type="PointClip",
224
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
225
+ ),
226
+ dict(type="RandomScale", scale=[0.9, 1.1]),
227
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
228
+ dict(type="RandomFlip", p=0.5),
229
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
230
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
231
+ dict(
232
+ type="GridSample",
233
+ grid_size=0.05,
234
+ hash_type="fnv",
235
+ mode="train",
236
+ keys=("coord", "strength", "segment"),
237
+ return_grid_coord=True,
238
+ ),
239
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
240
+ # dict(type="CenterShift", apply_z=False),
241
+ dict(type="Add", keys_dict={"condition": "Waymo"}),
242
+ dict(type="ToTensor"),
243
+ dict(
244
+ type="Collect",
245
+ keys=("coord", "grid_coord", "segment", "condition"),
246
+ feat_keys=("coord", "strength"),
247
+ ),
248
+ ],
249
+ test_mode=False,
250
+ ignore_index=-1,
251
+ loop=1,
252
+ ),
253
+ ],
254
+ ),
255
+ val=dict(
256
+ type="NuScenesDataset",
257
+ split="val",
258
+ data_root="data/nuscenes",
259
+ transform=[
260
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
261
+ dict(
262
+ type="GridSample",
263
+ grid_size=0.05,
264
+ hash_type="fnv",
265
+ mode="train",
266
+ keys=("coord", "strength", "segment"),
267
+ return_grid_coord=True,
268
+ ),
269
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
270
+ dict(type="ToTensor"),
271
+ dict(
272
+ type="Collect",
273
+ keys=("coord", "grid_coord", "segment", "condition"),
274
+ feat_keys=("coord", "strength"),
275
+ ),
276
+ ],
277
+ test_mode=False,
278
+ ignore_index=-1,
279
+ ),
280
+ test=dict(
281
+ type="NuScenesDataset",
282
+ split="val",
283
+ data_root="data/nuscenes",
284
+ transform=[
285
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
286
+ dict(
287
+ type="GridSample",
288
+ grid_size=0.025,
289
+ hash_type="fnv",
290
+ mode="train",
291
+ keys=("coord", "strength", "segment"),
292
+ return_inverse=True,
293
+ ),
294
+ ],
295
+ test_mode=True,
296
+ test_cfg=dict(
297
+ voxelize=dict(
298
+ type="GridSample",
299
+ grid_size=0.05,
300
+ hash_type="fnv",
301
+ mode="test",
302
+ return_grid_coord=True,
303
+ keys=("coord", "strength"),
304
+ ),
305
+ crop=None,
306
+ post_transform=[
307
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
308
+ dict(type="ToTensor"),
309
+ dict(
310
+ type="Collect",
311
+ keys=("coord", "grid_coord", "index", "condition"),
312
+ feat_keys=("coord", "strength"),
313
+ ),
314
+ ],
315
+ aug_transform=[
316
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
317
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
318
+ [dict(type="RandomScale", scale=[1, 1])],
319
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
320
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
321
+ [
322
+ dict(type="RandomScale", scale=[0.9, 0.9]),
323
+ dict(type="RandomFlip", p=1),
324
+ ],
325
+ [
326
+ dict(type="RandomScale", scale=[0.95, 0.95]),
327
+ dict(type="RandomFlip", p=1),
328
+ ],
329
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
330
+ [
331
+ dict(type="RandomScale", scale=[1.05, 1.05]),
332
+ dict(type="RandomFlip", p=1),
333
+ ],
334
+ [
335
+ dict(type="RandomScale", scale=[1.1, 1.1]),
336
+ dict(type="RandomFlip", p=1),
337
+ ],
338
+ ],
339
+ ),
340
+ ignore_index=-1,
341
+ ),
342
+ )
Pointcept/configs/nuscenes/semseg-ppt-v1m2-0-nu-sk-wa-spunet.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ num_worker = 24
6
+ mix_prob = 0.8
7
+ empty_cache = False
8
+ enable_amp = True
9
+ find_unused_parameters = True
10
+
11
+ # trainer
12
+ train = dict(
13
+ type="MultiDatasetTrainer",
14
+ )
15
+
16
+ # model settings
17
+ model = dict(
18
+ type="PPT-v1m2",
19
+ backbone=dict(
20
+ type="SpUNet-v1m3",
21
+ in_channels=4,
22
+ num_classes=0,
23
+ base_channels=32,
24
+ context_channels=256,
25
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
26
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
27
+ cls_mode=False,
28
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
29
+ zero_init=False,
30
+ norm_decouple=True,
31
+ norm_adaptive=False,
32
+ norm_affine=True,
33
+ ),
34
+ criteria=[
35
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
36
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
37
+ ],
38
+ backbone_out_channels=96,
39
+ context_channels=256,
40
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
41
+ num_classes=(19, 16, 22),
42
+ )
43
+
44
+ # scheduler settings
45
+ epoch = 50
46
+ eval_epoch = 50
47
+ optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
48
+ scheduler = dict(
49
+ type="OneCycleLR",
50
+ max_lr=optimizer["lr"],
51
+ pct_start=0.04,
52
+ anneal_strategy="cos",
53
+ div_factor=10.0,
54
+ final_div_factor=100.0,
55
+ )
56
+ # param_dicts = [dict(keyword="modulation", lr=0.0002)]
57
+
58
+ # dataset settings
59
+ data = dict(
60
+ num_classes=16,
61
+ ignore_index=-1,
62
+ names=[
63
+ "barrier",
64
+ "bicycle",
65
+ "bus",
66
+ "car",
67
+ "construction_vehicle",
68
+ "motorcycle",
69
+ "pedestrian",
70
+ "traffic_cone",
71
+ "trailer",
72
+ "truck",
73
+ "driveable_surface",
74
+ "other_flat",
75
+ "sidewalk",
76
+ "terrain",
77
+ "manmade",
78
+ "vegetation",
79
+ ],
80
+ train=dict(
81
+ type="ConcatDataset",
82
+ datasets=[
83
+ # nuScenes
84
+ dict(
85
+ type="NuScenesDataset",
86
+ split="train",
87
+ data_root="data/nuscenes",
88
+ transform=[
89
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
90
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
91
+ dict(
92
+ type="RandomRotate",
93
+ angle=[-1, 1],
94
+ axis="z",
95
+ center=[0, 0, 0],
96
+ p=0.5,
97
+ ),
98
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
99
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
100
+ dict(
101
+ type="PointClip",
102
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
103
+ ),
104
+ dict(type="RandomScale", scale=[0.9, 1.1]),
105
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
106
+ dict(type="RandomFlip", p=0.5),
107
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
108
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
109
+ dict(
110
+ type="GridSample",
111
+ grid_size=0.05,
112
+ hash_type="fnv",
113
+ mode="train",
114
+ keys=("coord", "strength", "segment"),
115
+ return_grid_coord=True,
116
+ ),
117
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
118
+ # dict(type="CenterShift", apply_z=False),
119
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
120
+ dict(type="ToTensor"),
121
+ dict(
122
+ type="Collect",
123
+ keys=("coord", "grid_coord", "segment", "condition"),
124
+ feat_keys=("coord", "strength"),
125
+ ),
126
+ ],
127
+ test_mode=False,
128
+ ignore_index=-1,
129
+ loop=1,
130
+ ),
131
+ # SemanticKITTI
132
+ dict(
133
+ type="SemanticKITTIDataset",
134
+ split="train",
135
+ data_root="data/semantic_kitti",
136
+ transform=[
137
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
138
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
139
+ dict(
140
+ type="RandomRotate",
141
+ angle=[-1, 1],
142
+ axis="z",
143
+ center=[0, 0, 0],
144
+ p=0.5,
145
+ ),
146
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
147
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
148
+ dict(
149
+ type="PointClip",
150
+ point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
151
+ ),
152
+ dict(type="RandomScale", scale=[0.9, 1.1]),
153
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
154
+ dict(type="RandomFlip", p=0.5),
155
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
156
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
157
+ dict(
158
+ type="GridSample",
159
+ grid_size=0.05,
160
+ hash_type="fnv",
161
+ mode="train",
162
+ keys=("coord", "strength", "segment"),
163
+ return_grid_coord=True,
164
+ ),
165
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
166
+ # dict(type="CenterShift", apply_z=False),
167
+ dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
168
+ dict(type="ToTensor"),
169
+ dict(
170
+ type="Collect",
171
+ keys=("coord", "grid_coord", "segment", "condition"),
172
+ feat_keys=("coord", "strength"),
173
+ ),
174
+ ],
175
+ test_mode=False,
176
+ ignore_index=-1,
177
+ loop=1,
178
+ ),
179
+ # Waymo
180
+ dict(
181
+ type="WaymoDataset",
182
+ split="training",
183
+ data_root="data/waymo",
184
+ transform=[
185
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
186
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
187
+ dict(
188
+ type="RandomRotate",
189
+ angle=[-1, 1],
190
+ axis="z",
191
+ center=[0, 0, 0],
192
+ p=0.5,
193
+ ),
194
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
195
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
196
+ dict(
197
+ type="PointClip",
198
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
199
+ ),
200
+ dict(type="RandomScale", scale=[0.9, 1.1]),
201
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
202
+ dict(type="RandomFlip", p=0.5),
203
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
204
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
205
+ dict(
206
+ type="GridSample",
207
+ grid_size=0.05,
208
+ hash_type="fnv",
209
+ mode="train",
210
+ keys=("coord", "strength", "segment"),
211
+ return_grid_coord=True,
212
+ ),
213
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
214
+ # dict(type="CenterShift", apply_z=False),
215
+ dict(type="Add", keys_dict={"condition": "Waymo"}),
216
+ dict(type="ToTensor"),
217
+ dict(
218
+ type="Collect",
219
+ keys=("coord", "grid_coord", "segment", "condition"),
220
+ feat_keys=("coord", "strength"),
221
+ ),
222
+ ],
223
+ test_mode=False,
224
+ ignore_index=-1,
225
+ loop=1,
226
+ ),
227
+ ],
228
+ ),
229
+ val=dict(
230
+ type="NuScenesDataset",
231
+ split="val",
232
+ data_root="data/nuscenes",
233
+ transform=[
234
+ dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
235
+ dict(
236
+ type="GridSample",
237
+ grid_size=0.05,
238
+ hash_type="fnv",
239
+ mode="train",
240
+ keys=("coord", "strength", "segment"),
241
+ return_grid_coord=True,
242
+ ),
243
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
244
+ dict(type="ToTensor"),
245
+ dict(
246
+ type="Collect",
247
+ keys=("coord", "grid_coord", "segment", "condition"),
248
+ feat_keys=("coord", "strength"),
249
+ ),
250
+ ],
251
+ test_mode=False,
252
+ ignore_index=-1,
253
+ ),
254
+ test=dict(
255
+ type="NuScenesDataset",
256
+ split="val",
257
+ data_root="data/nuscenes",
258
+ transform=[
259
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
260
+ dict(
261
+ type="GridSample",
262
+ grid_size=0.025,
263
+ hash_type="fnv",
264
+ mode="train",
265
+ keys=("coord", "strength", "segment"),
266
+ return_inverse=True,
267
+ ),
268
+ ],
269
+ test_mode=True,
270
+ test_cfg=dict(
271
+ voxelize=dict(
272
+ type="GridSample",
273
+ grid_size=0.05,
274
+ hash_type="fnv",
275
+ mode="test",
276
+ return_grid_coord=True,
277
+ keys=("coord", "strength"),
278
+ ),
279
+ crop=None,
280
+ post_transform=[
281
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
282
+ dict(type="ToTensor"),
283
+ dict(
284
+ type="Collect",
285
+ keys=("coord", "grid_coord", "index", "condition"),
286
+ feat_keys=("coord", "strength"),
287
+ ),
288
+ ],
289
+ aug_transform=[
290
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
291
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
292
+ [dict(type="RandomScale", scale=[1, 1])],
293
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
294
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
295
+ [
296
+ dict(type="RandomScale", scale=[0.9, 0.9]),
297
+ dict(type="RandomFlip", p=1),
298
+ ],
299
+ [
300
+ dict(type="RandomScale", scale=[0.95, 0.95]),
301
+ dict(type="RandomFlip", p=1),
302
+ ],
303
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
304
+ [
305
+ dict(type="RandomScale", scale=[1.05, 1.05]),
306
+ dict(type="RandomFlip", p=1),
307
+ ],
308
+ [
309
+ dict(type="RandomScale", scale=[1.1, 1.1]),
310
+ dict(type="RandomFlip", p=1),
311
+ ],
312
+ ],
313
+ ),
314
+ ignore_index=-1,
315
+ ),
316
+ )
Pointcept/configs/nuscenes/semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ num_worker = 24
6
+ mix_prob = 0.8
7
+ empty_cache = False
8
+ enable_amp = True
9
+ find_unused_parameters = True
10
+ evaluate = False
11
+
12
+ # trainer
13
+ train = dict(
14
+ type="MultiDatasetTrainer",
15
+ )
16
+
17
+ # model settings
18
+ model = dict(
19
+ type="PPT-v1m2",
20
+ backbone=dict(
21
+ type="SpUNet-v1m3",
22
+ in_channels=4,
23
+ num_classes=0,
24
+ base_channels=32,
25
+ context_channels=256,
26
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
27
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
28
+ cls_mode=False,
29
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
30
+ zero_init=False,
31
+ norm_decouple=True,
32
+ norm_adaptive=False,
33
+ norm_affine=True,
34
+ ),
35
+ criteria=[
36
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
37
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
38
+ ],
39
+ backbone_out_channels=96,
40
+ context_channels=256,
41
+ conditions=("SemanticKITTI", "nuScenes", "Waymo"),
42
+ num_classes=(19, 16, 22),
43
+ )
44
+
45
+ # scheduler settings
46
+ epoch = 50
47
+ eval_epoch = 50
48
+ optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
49
+ scheduler = dict(
50
+ type="OneCycleLR",
51
+ max_lr=optimizer["lr"],
52
+ pct_start=0.04,
53
+ anneal_strategy="cos",
54
+ div_factor=10.0,
55
+ final_div_factor=100.0,
56
+ )
57
+ # param_dicts = [dict(keyword="modulation", lr=0.0002)]
58
+
59
+ # dataset settings
60
+ data = dict(
61
+ num_classes=16,
62
+ ignore_index=-1,
63
+ names=[
64
+ "barrier",
65
+ "bicycle",
66
+ "bus",
67
+ "car",
68
+ "construction_vehicle",
69
+ "motorcycle",
70
+ "pedestrian",
71
+ "traffic_cone",
72
+ "trailer",
73
+ "truck",
74
+ "driveable_surface",
75
+ "other_flat",
76
+ "sidewalk",
77
+ "terrain",
78
+ "manmade",
79
+ "vegetation",
80
+ ],
81
+ train=dict(
82
+ type="ConcatDataset",
83
+ datasets=[
84
+ # nuScenes
85
+ dict(
86
+ type="NuScenesDataset",
87
+ split=["train", "val"],
88
+ data_root="data/nuscenes",
89
+ transform=[
90
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
91
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
92
+ dict(
93
+ type="RandomRotate",
94
+ angle=[-1, 1],
95
+ axis="z",
96
+ center=[0, 0, 0],
97
+ p=0.5,
98
+ ),
99
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
100
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
101
+ dict(
102
+ type="PointClip",
103
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
104
+ ),
105
+ dict(type="RandomScale", scale=[0.9, 1.1]),
106
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
107
+ dict(type="RandomFlip", p=0.5),
108
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
109
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
110
+ dict(
111
+ type="GridSample",
112
+ grid_size=0.05,
113
+ hash_type="fnv",
114
+ mode="train",
115
+ keys=("coord", "strength", "segment"),
116
+ return_grid_coord=True,
117
+ ),
118
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
119
+ # dict(type="CenterShift", apply_z=False),
120
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
121
+ dict(type="ToTensor"),
122
+ dict(
123
+ type="Collect",
124
+ keys=("coord", "grid_coord", "segment", "condition"),
125
+ feat_keys=("coord", "strength"),
126
+ ),
127
+ ],
128
+ test_mode=False,
129
+ ignore_index=-1,
130
+ loop=1,
131
+ ),
132
+ # SemanticKITTI
133
+ dict(
134
+ type="SemanticKITTIDataset",
135
+ split=["train", "val"],
136
+ data_root="data/semantic_kitti",
137
+ transform=[
138
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
139
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
140
+ dict(
141
+ type="RandomRotate",
142
+ angle=[-1, 1],
143
+ axis="z",
144
+ center=[0, 0, 0],
145
+ p=0.5,
146
+ ),
147
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
148
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
149
+ dict(
150
+ type="PointClip",
151
+ point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
152
+ ),
153
+ dict(type="RandomScale", scale=[0.9, 1.1]),
154
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
155
+ dict(type="RandomFlip", p=0.5),
156
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
157
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
158
+ dict(
159
+ type="GridSample",
160
+ grid_size=0.05,
161
+ hash_type="fnv",
162
+ mode="train",
163
+ keys=("coord", "strength", "segment"),
164
+ return_grid_coord=True,
165
+ ),
166
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
167
+ # dict(type="CenterShift", apply_z=False),
168
+ dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
169
+ dict(type="ToTensor"),
170
+ dict(
171
+ type="Collect",
172
+ keys=("coord", "grid_coord", "segment", "condition"),
173
+ feat_keys=("coord", "strength"),
174
+ ),
175
+ ],
176
+ test_mode=False,
177
+ ignore_index=-1,
178
+ loop=1,
179
+ ),
180
+ # Waymo
181
+ dict(
182
+ type="WaymoDataset",
183
+ split=["training", "validation"],
184
+ data_root="data/waymo",
185
+ transform=[
186
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
187
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
188
+ dict(
189
+ type="RandomRotate",
190
+ angle=[-1, 1],
191
+ axis="z",
192
+ center=[0, 0, 0],
193
+ p=0.5,
194
+ ),
195
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
196
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
197
+ dict(
198
+ type="PointClip",
199
+ point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
200
+ ),
201
+ dict(type="RandomScale", scale=[0.9, 1.1]),
202
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
203
+ dict(type="RandomFlip", p=0.5),
204
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
205
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
206
+ dict(
207
+ type="GridSample",
208
+ grid_size=0.05,
209
+ hash_type="fnv",
210
+ mode="train",
211
+ keys=("coord", "strength", "segment"),
212
+ return_grid_coord=True,
213
+ ),
214
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
215
+ # dict(type="CenterShift", apply_z=False),
216
+ dict(type="Add", keys_dict={"condition": "Waymo"}),
217
+ dict(type="ToTensor"),
218
+ dict(
219
+ type="Collect",
220
+ keys=("coord", "grid_coord", "segment", "condition"),
221
+ feat_keys=("coord", "strength"),
222
+ ),
223
+ ],
224
+ test_mode=False,
225
+ ignore_index=-1,
226
+ loop=1,
227
+ ),
228
+ ],
229
+ ),
230
+ test=dict(
231
+ type="NuScenesDataset",
232
+ split="test",
233
+ data_root="data/nuscenes",
234
+ transform=[
235
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
236
+ dict(
237
+ type="GridSample",
238
+ grid_size=0.025,
239
+ hash_type="fnv",
240
+ mode="train",
241
+ keys=("coord", "strength", "segment"),
242
+ return_inverse=True,
243
+ ),
244
+ ],
245
+ test_mode=True,
246
+ test_cfg=dict(
247
+ voxelize=dict(
248
+ type="GridSample",
249
+ grid_size=0.05,
250
+ hash_type="fnv",
251
+ mode="test",
252
+ return_grid_coord=True,
253
+ keys=("coord", "strength"),
254
+ ),
255
+ crop=None,
256
+ post_transform=[
257
+ dict(type="Add", keys_dict={"condition": "nuScenes"}),
258
+ dict(type="ToTensor"),
259
+ dict(
260
+ type="Collect",
261
+ keys=("coord", "grid_coord", "index", "condition"),
262
+ feat_keys=("coord", "strength"),
263
+ ),
264
+ ],
265
+ aug_transform=[
266
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
267
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
268
+ [dict(type="RandomScale", scale=[1, 1])],
269
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
270
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
271
+ [
272
+ dict(type="RandomScale", scale=[0.9, 0.9]),
273
+ dict(type="RandomFlip", p=1),
274
+ ],
275
+ [
276
+ dict(type="RandomScale", scale=[0.95, 0.95]),
277
+ dict(type="RandomFlip", p=1),
278
+ ],
279
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
280
+ [
281
+ dict(type="RandomScale", scale=[1.05, 1.05]),
282
+ dict(type="RandomFlip", p=1),
283
+ ],
284
+ [
285
+ dict(type="RandomScale", scale=[1.1, 1.1]),
286
+ dict(type="RandomFlip", p=1),
287
+ ],
288
+ ],
289
+ ),
290
+ ignore_index=-1,
291
+ ),
292
+ )
Pointcept/configs/nuscenes/semseg-pt-v2m2-0-base.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ mix_prob = 0.8
6
+ empty_cache = False
7
+ enable_amp = True
8
+
9
+ # model settings
10
+ model = dict(
11
+ type="DefaultSegmentor",
12
+ backbone=dict(
13
+ type="PT-v2m2",
14
+ in_channels=4,
15
+ num_classes=16,
16
+ patch_embed_depth=1,
17
+ patch_embed_channels=48,
18
+ patch_embed_groups=6,
19
+ patch_embed_neighbours=8,
20
+ enc_depths=(2, 2, 6, 2),
21
+ enc_channels=(96, 192, 384, 512),
22
+ enc_groups=(12, 24, 48, 64),
23
+ enc_neighbours=(16, 16, 16, 16),
24
+ dec_depths=(1, 1, 1, 1),
25
+ dec_channels=(48, 96, 192, 384),
26
+ dec_groups=(6, 12, 24, 48),
27
+ dec_neighbours=(16, 16, 16, 16),
28
+ grid_sizes=(0.15, 0.375, 0.9375, 2.34375), # x3, x2.5, x2.5, x2.5
29
+ attn_qkv_bias=True,
30
+ pe_multiplier=False,
31
+ pe_bias=True,
32
+ attn_drop_rate=0.0,
33
+ drop_path_rate=0.3,
34
+ enable_checkpoint=False,
35
+ unpool_backend="map", # map / interp
36
+ ),
37
+ criteria=[
38
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
39
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
40
+ ],
41
+ )
42
+
43
+ # scheduler settings
44
+ epoch = 50
45
+ eval_epoch = 50
46
+ optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
47
+ scheduler = dict(
48
+ type="OneCycleLR",
49
+ max_lr=optimizer["lr"],
50
+ pct_start=0.04,
51
+ anneal_strategy="cos",
52
+ div_factor=10.0,
53
+ final_div_factor=100.0,
54
+ )
55
+
56
+ # dataset settings
57
+ dataset_type = "NuScenesDataset"
58
+ data_root = "data/nuscenes"
59
+ ignore_index = -1
60
+ names = [
61
+ "barrier",
62
+ "bicycle",
63
+ "bus",
64
+ "car",
65
+ "construction_vehicle",
66
+ "motorcycle",
67
+ "pedestrian",
68
+ "traffic_cone",
69
+ "trailer",
70
+ "truck",
71
+ "driveable_surface",
72
+ "other_flat",
73
+ "sidewalk",
74
+ "terrain",
75
+ "manmade",
76
+ "vegetation",
77
+ ]
78
+
79
+ data = dict(
80
+ num_classes=16,
81
+ ignore_index=ignore_index,
82
+ names=names,
83
+ train=dict(
84
+ type=dataset_type,
85
+ split="train",
86
+ data_root=data_root,
87
+ transform=[
88
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
89
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
90
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
91
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
92
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
93
+ dict(type="RandomScale", scale=[0.9, 1.1]),
94
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
95
+ dict(type="RandomFlip", p=0.5),
96
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
97
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
98
+ # dict(type="GridSample", grid_size=0.05, hash_type="fnv", mode="train",
99
+ # keys=("coord", "strength", "segment"), return_grid_coord=True),
100
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
101
+ # dict(type="CenterShift", apply_z=False),
102
+ dict(type="ToTensor"),
103
+ dict(
104
+ type="Collect",
105
+ keys=("coord", "segment"),
106
+ feat_keys=("coord", "strength"),
107
+ ),
108
+ ],
109
+ test_mode=False,
110
+ ignore_index=ignore_index,
111
+ ),
112
+ val=dict(
113
+ type=dataset_type,
114
+ split="val",
115
+ data_root=data_root,
116
+ transform=[
117
+ # dict(type="PointClip", point_cloud_range=(-51.2, -51.2, -4, 51.2, 51.2, 2.4)),
118
+ # dict(type="GridSample", grid_size=0.05, hash_type="fnv", mode="train",
119
+ # keys=("coord", "strength", "segment"), return_grid_coord=True),
120
+ dict(type="ToTensor"),
121
+ dict(
122
+ type="Collect",
123
+ keys=("coord", "segment"),
124
+ feat_keys=("coord", "strength"),
125
+ ),
126
+ ],
127
+ test_mode=False,
128
+ ignore_index=ignore_index,
129
+ ),
130
+ test=dict(
131
+ type=dataset_type,
132
+ split="val",
133
+ data_root=data_root,
134
+ transform=[],
135
+ test_mode=True,
136
+ test_cfg=dict(
137
+ voxelize=None,
138
+ crop=None,
139
+ post_transform=[
140
+ dict(type="ToTensor"),
141
+ dict(
142
+ type="Collect",
143
+ keys=("coord", "index"),
144
+ feat_keys=("coord", "strength"),
145
+ ),
146
+ ],
147
+ aug_transform=[
148
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
149
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
150
+ [dict(type="RandomScale", scale=[1, 1])],
151
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
152
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
153
+ [
154
+ dict(type="RandomScale", scale=[0.9, 0.9]),
155
+ dict(type="RandomFlip", p=1),
156
+ ],
157
+ [
158
+ dict(type="RandomScale", scale=[0.95, 0.95]),
159
+ dict(type="RandomFlip", p=1),
160
+ ],
161
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
162
+ [
163
+ dict(type="RandomScale", scale=[1.05, 1.05]),
164
+ dict(type="RandomFlip", p=1),
165
+ ],
166
+ [
167
+ dict(type="RandomScale", scale=[1.1, 1.1]),
168
+ dict(type="RandomFlip", p=1),
169
+ ],
170
+ ],
171
+ ),
172
+ ignore_index=ignore_index,
173
+ ),
174
+ )
Pointcept/configs/nuscenes/semseg-pt-v2m2-1-benchmark-submit.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ mix_prob = 0.8
6
+ empty_cache = False
7
+ enable_amp = True
8
+ evaluate = False
9
+
10
+ # model settings
11
+ model = dict(
12
+ type="DefaultSegmentor",
13
+ backbone=dict(
14
+ type="PT-v2m2",
15
+ in_channels=4,
16
+ num_classes=16,
17
+ patch_embed_depth=1,
18
+ patch_embed_channels=48,
19
+ patch_embed_groups=6,
20
+ patch_embed_neighbours=8,
21
+ enc_depths=(2, 2, 6, 2),
22
+ enc_channels=(96, 192, 384, 512),
23
+ enc_groups=(12, 24, 48, 64),
24
+ enc_neighbours=(16, 16, 16, 16),
25
+ dec_depths=(1, 1, 1, 1),
26
+ dec_channels=(48, 96, 192, 384),
27
+ dec_groups=(6, 12, 24, 48),
28
+ dec_neighbours=(16, 16, 16, 16),
29
+ grid_sizes=(0.15, 0.375, 0.9375, 2.34375), # x3, x2.5, x2.5, x2.5
30
+ attn_qkv_bias=True,
31
+ pe_multiplier=False,
32
+ pe_bias=True,
33
+ attn_drop_rate=0.0,
34
+ drop_path_rate=0.3,
35
+ enable_checkpoint=False,
36
+ unpool_backend="map", # map / interp
37
+ ),
38
+ criteria=[
39
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
40
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
41
+ ],
42
+ )
43
+
44
+ # scheduler settings
45
+ epoch = 50
46
+ eval_epoch = 50
47
+ optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
48
+ scheduler = dict(
49
+ type="OneCycleLR",
50
+ max_lr=optimizer["lr"],
51
+ pct_start=0.04,
52
+ anneal_strategy="cos",
53
+ div_factor=10.0,
54
+ final_div_factor=100.0,
55
+ )
56
+
57
+ # dataset settings
58
+ dataset_type = "NuScenesDataset"
59
+ data_root = "data/nuscenes"
60
+ ignore_index = -1
61
+ names = [
62
+ "barrier",
63
+ "bicycle",
64
+ "bus",
65
+ "car",
66
+ "construction_vehicle",
67
+ "motorcycle",
68
+ "pedestrian",
69
+ "traffic_cone",
70
+ "trailer",
71
+ "truck",
72
+ "driveable_surface",
73
+ "other_flat",
74
+ "sidewalk",
75
+ "terrain",
76
+ "manmade",
77
+ "vegetation",
78
+ ]
79
+
80
+ data = dict(
81
+ num_classes=16,
82
+ ignore_index=ignore_index,
83
+ names=names,
84
+ train=dict(
85
+ type=dataset_type,
86
+ split=["train", "val"],
87
+ data_root=data_root,
88
+ transform=[
89
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
90
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
91
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
92
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
93
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
94
+ dict(type="RandomScale", scale=[0.9, 1.1]),
95
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
96
+ dict(type="RandomFlip", p=0.5),
97
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
98
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
99
+ # dict(type="GridSample", grid_size=0.05, hash_type="fnv", mode="train",
100
+ # keys=("coord", "strength", "segment"), return_grid_coord=True),
101
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
102
+ # dict(type="CenterShift", apply_z=False),
103
+ dict(type="ToTensor"),
104
+ dict(
105
+ type="Collect",
106
+ keys=("coord", "segment"),
107
+ feat_keys=("coord", "strength"),
108
+ ),
109
+ ],
110
+ test_mode=False,
111
+ ignore_index=ignore_index,
112
+ ),
113
+ test=dict(
114
+ type=dataset_type,
115
+ split="test",
116
+ data_root=data_root,
117
+ transform=[],
118
+ test_mode=True,
119
+ test_cfg=dict(
120
+ voxelize=None,
121
+ crop=None,
122
+ post_transform=[
123
+ dict(type="ToTensor"),
124
+ dict(
125
+ type="Collect",
126
+ keys=("coord", "index"),
127
+ feat_keys=("coord", "strength"),
128
+ ),
129
+ ],
130
+ aug_transform=[
131
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
132
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
133
+ [dict(type="RandomScale", scale=[1, 1])],
134
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
135
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
136
+ [
137
+ dict(type="RandomScale", scale=[0.9, 0.9]),
138
+ dict(type="RandomFlip", p=1),
139
+ ],
140
+ [
141
+ dict(type="RandomScale", scale=[0.95, 0.95]),
142
+ dict(type="RandomFlip", p=1),
143
+ ],
144
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
145
+ [
146
+ dict(type="RandomScale", scale=[1.05, 1.05]),
147
+ dict(type="RandomFlip", p=1),
148
+ ],
149
+ [
150
+ dict(type="RandomScale", scale=[1.1, 1.1]),
151
+ dict(type="RandomFlip", p=1),
152
+ ],
153
+ ],
154
+ ),
155
+ ignore_index=ignore_index,
156
+ ),
157
+ )
Pointcept/configs/nuscenes/semseg-pt-v3m1-0-base.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ mix_prob = 0.8
6
+ empty_cache = False
7
+ enable_amp = True
8
+
9
+ # model settings
10
+ model = dict(
11
+ type="DefaultSegmentorV2",
12
+ num_classes=16,
13
+ backbone_out_channels=64,
14
+ backbone=dict(
15
+ type="PT-v3m1",
16
+ in_channels=4,
17
+ order=["z", "z-trans", "hilbert", "hilbert-trans"],
18
+ stride=(2, 2, 2, 2),
19
+ enc_depths=(2, 2, 2, 6, 2),
20
+ enc_channels=(32, 64, 128, 256, 512),
21
+ enc_num_head=(2, 4, 8, 16, 32),
22
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
23
+ dec_depths=(2, 2, 2, 2),
24
+ dec_channels=(64, 64, 128, 256),
25
+ dec_num_head=(4, 4, 8, 16),
26
+ dec_patch_size=(1024, 1024, 1024, 1024),
27
+ mlp_ratio=4,
28
+ qkv_bias=True,
29
+ qk_scale=None,
30
+ attn_drop=0.0,
31
+ proj_drop=0.0,
32
+ drop_path=0.3,
33
+ shuffle_orders=True,
34
+ pre_norm=True,
35
+ enable_rpe=False,
36
+ enable_flash=True,
37
+ upcast_attention=False,
38
+ upcast_softmax=False,
39
+ cls_mode=False,
40
+ pdnorm_bn=False,
41
+ pdnorm_ln=False,
42
+ pdnorm_decouple=True,
43
+ pdnorm_adaptive=False,
44
+ pdnorm_affine=True,
45
+ pdnorm_conditions=("nuScenes", "SemanticKITTI", "Waymo"),
46
+ ),
47
+ criteria=[
48
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
49
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
50
+ ],
51
+ )
52
+
53
+ # scheduler settings
54
+ epoch = 50
55
+ eval_epoch = 50
56
+ optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
57
+ scheduler = dict(
58
+ type="OneCycleLR",
59
+ max_lr=[0.002, 0.0002],
60
+ pct_start=0.04,
61
+ anneal_strategy="cos",
62
+ div_factor=10.0,
63
+ final_div_factor=100.0,
64
+ )
65
+ param_dicts = [dict(keyword="block", lr=0.0002)]
66
+
67
+ # dataset settings
68
+ dataset_type = "NuScenesDataset"
69
+ data_root = "data/nuscenes"
70
+ ignore_index = -1
71
+ names = [
72
+ "barrier",
73
+ "bicycle",
74
+ "bus",
75
+ "car",
76
+ "construction_vehicle",
77
+ "motorcycle",
78
+ "pedestrian",
79
+ "traffic_cone",
80
+ "trailer",
81
+ "truck",
82
+ "driveable_surface",
83
+ "other_flat",
84
+ "sidewalk",
85
+ "terrain",
86
+ "manmade",
87
+ "vegetation",
88
+ ]
89
+
90
+ data = dict(
91
+ num_classes=16,
92
+ ignore_index=ignore_index,
93
+ names=names,
94
+ train=dict(
95
+ type=dataset_type,
96
+ split="train",
97
+ data_root=data_root,
98
+ transform=[
99
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
100
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
101
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
102
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
103
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
104
+ dict(type="RandomScale", scale=[0.9, 1.1]),
105
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
106
+ dict(type="RandomFlip", p=0.5),
107
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
108
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
109
+ dict(
110
+ type="GridSample",
111
+ grid_size=0.05,
112
+ hash_type="fnv",
113
+ mode="train",
114
+ keys=("coord", "strength", "segment"),
115
+ return_grid_coord=True,
116
+ ),
117
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
118
+ # dict(type="CenterShift", apply_z=False),
119
+ dict(type="ToTensor"),
120
+ dict(
121
+ type="Collect",
122
+ keys=("coord", "grid_coord", "segment"),
123
+ feat_keys=("coord", "strength"),
124
+ ),
125
+ ],
126
+ test_mode=False,
127
+ ignore_index=ignore_index,
128
+ ),
129
+ val=dict(
130
+ type=dataset_type,
131
+ split="val",
132
+ data_root=data_root,
133
+ transform=[
134
+ # dict(type="PointClip", point_cloud_range=(-51.2, -51.2, -4, 51.2, 51.2, 2.4)),
135
+ dict(
136
+ type="GridSample",
137
+ grid_size=0.05,
138
+ hash_type="fnv",
139
+ mode="train",
140
+ keys=("coord", "strength", "segment"),
141
+ return_grid_coord=True,
142
+ ),
143
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
144
+ dict(type="ToTensor"),
145
+ dict(
146
+ type="Collect",
147
+ keys=("coord", "grid_coord", "segment"),
148
+ feat_keys=("coord", "strength"),
149
+ ),
150
+ ],
151
+ test_mode=False,
152
+ ignore_index=ignore_index,
153
+ ),
154
+ test=dict(
155
+ type=dataset_type,
156
+ split="val",
157
+ data_root=data_root,
158
+ transform=[
159
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
160
+ dict(
161
+ type="GridSample",
162
+ grid_size=0.025,
163
+ hash_type="fnv",
164
+ mode="train",
165
+ keys=("coord", "strength", "segment"),
166
+ return_inverse=True,
167
+ ),
168
+ ],
169
+ test_mode=True,
170
+ test_cfg=dict(
171
+ voxelize=dict(
172
+ type="GridSample",
173
+ grid_size=0.05,
174
+ hash_type="fnv",
175
+ mode="test",
176
+ return_grid_coord=True,
177
+ keys=("coord", "strength"),
178
+ ),
179
+ crop=None,
180
+ post_transform=[
181
+ dict(type="ToTensor"),
182
+ dict(
183
+ type="Collect",
184
+ keys=("coord", "grid_coord", "index"),
185
+ feat_keys=("coord", "strength"),
186
+ ),
187
+ ],
188
+ aug_transform=[
189
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
190
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
191
+ [dict(type="RandomScale", scale=[1, 1])],
192
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
193
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
194
+ [
195
+ dict(type="RandomScale", scale=[0.9, 0.9]),
196
+ dict(type="RandomFlip", p=1),
197
+ ],
198
+ [
199
+ dict(type="RandomScale", scale=[0.95, 0.95]),
200
+ dict(type="RandomFlip", p=1),
201
+ ],
202
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
203
+ [
204
+ dict(type="RandomScale", scale=[1.05, 1.05]),
205
+ dict(type="RandomFlip", p=1),
206
+ ],
207
+ [
208
+ dict(type="RandomScale", scale=[1.1, 1.1]),
209
+ dict(type="RandomFlip", p=1),
210
+ ],
211
+ ],
212
+ ),
213
+ ignore_index=ignore_index,
214
+ ),
215
+ )
Pointcept/configs/nuscenes/semseg-spunet-v1m1-0-base.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ mix_prob = 0
6
+ empty_cache = False
7
+ enable_amp = True
8
+
9
+ # model settings
10
+ model = dict(
11
+ type="DefaultSegmentor",
12
+ backbone=dict(
13
+ type="SpUNet-v1m1",
14
+ in_channels=4,
15
+ num_classes=16,
16
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
17
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
18
+ ),
19
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
20
+ )
21
+
22
+ # scheduler settings
23
+ epoch = 50
24
+ eval_epoch = 50
25
+ optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
26
+ scheduler = dict(
27
+ type="OneCycleLR",
28
+ max_lr=optimizer["lr"],
29
+ pct_start=0.04,
30
+ anneal_strategy="cos",
31
+ div_factor=10.0,
32
+ final_div_factor=100.0,
33
+ )
34
+
35
+ # dataset settings
36
+ dataset_type = "NuScenesDataset"
37
+ data_root = "data/nuscenes"
38
+ ignore_index = -1
39
+ names = [
40
+ "barrier",
41
+ "bicycle",
42
+ "bus",
43
+ "car",
44
+ "construction_vehicle",
45
+ "motorcycle",
46
+ "pedestrian",
47
+ "traffic_cone",
48
+ "trailer",
49
+ "truck",
50
+ "driveable_surface",
51
+ "other_flat",
52
+ "sidewalk",
53
+ "terrain",
54
+ "manmade",
55
+ "vegetation",
56
+ ]
57
+
58
+ data = dict(
59
+ num_classes=16,
60
+ ignore_index=ignore_index,
61
+ names=names,
62
+ train=dict(
63
+ type=dataset_type,
64
+ split="train",
65
+ data_root=data_root,
66
+ transform=[
67
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
68
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
69
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
70
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
71
+ # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
72
+ dict(type="RandomScale", scale=[0.9, 1.1]),
73
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
74
+ dict(type="RandomFlip", p=0.5),
75
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
76
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
77
+ dict(
78
+ type="GridSample",
79
+ grid_size=0.05,
80
+ hash_type="fnv",
81
+ mode="train",
82
+ keys=("coord", "strength", "segment"),
83
+ return_grid_coord=True,
84
+ ),
85
+ # dict(type="SphereCrop", point_max=1000000, mode="random"),
86
+ # dict(type="CenterShift", apply_z=False),
87
+ dict(type="ToTensor"),
88
+ dict(
89
+ type="Collect",
90
+ keys=("coord", "grid_coord", "segment"),
91
+ feat_keys=("coord", "strength"),
92
+ ),
93
+ ],
94
+ test_mode=False,
95
+ ignore_index=ignore_index,
96
+ ),
97
+ val=dict(
98
+ type=dataset_type,
99
+ split="val",
100
+ data_root=data_root,
101
+ transform=[
102
+ # dict(type="PointClip", point_cloud_range=(-51.2, -51.2, -4, 51.2, 51.2, 2.4)),
103
+ dict(
104
+ type="GridSample",
105
+ grid_size=0.05,
106
+ hash_type="fnv",
107
+ mode="train",
108
+ keys=("coord", "strength", "segment"),
109
+ return_grid_coord=True,
110
+ ),
111
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
112
+ dict(type="ToTensor"),
113
+ dict(
114
+ type="Collect",
115
+ keys=("coord", "grid_coord", "segment"),
116
+ feat_keys=("coord", "strength"),
117
+ ),
118
+ ],
119
+ test_mode=False,
120
+ ignore_index=ignore_index,
121
+ ),
122
+ test=dict(
123
+ type=dataset_type,
124
+ split="val",
125
+ data_root=data_root,
126
+ transform=[
127
+ dict(type="Copy", keys_dict={"segment": "origin_segment"}),
128
+ dict(
129
+ type="GridSample",
130
+ grid_size=0.025,
131
+ hash_type="fnv",
132
+ mode="train",
133
+ keys=("coord", "strength", "segment"),
134
+ return_inverse=True,
135
+ ),
136
+ ],
137
+ test_mode=True,
138
+ test_cfg=dict(
139
+ voxelize=dict(
140
+ type="GridSample",
141
+ grid_size=0.05,
142
+ hash_type="fnv",
143
+ mode="test",
144
+ return_grid_coord=True,
145
+ keys=("coord", "strength"),
146
+ ),
147
+ crop=None,
148
+ post_transform=[
149
+ dict(type="ToTensor"),
150
+ dict(
151
+ type="Collect",
152
+ keys=("coord", "grid_coord", "index"),
153
+ feat_keys=("coord", "strength"),
154
+ ),
155
+ ],
156
+ aug_transform=[
157
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
158
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
159
+ [dict(type="RandomScale", scale=[1, 1])],
160
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
161
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
162
+ [
163
+ dict(type="RandomScale", scale=[0.9, 0.9]),
164
+ dict(type="RandomFlip", p=1),
165
+ ],
166
+ [
167
+ dict(type="RandomScale", scale=[0.95, 0.95]),
168
+ dict(type="RandomFlip", p=1),
169
+ ],
170
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
171
+ [
172
+ dict(type="RandomScale", scale=[1.05, 1.05]),
173
+ dict(type="RandomFlip", p=1),
174
+ ],
175
+ [
176
+ dict(type="RandomScale", scale=[1.1, 1.1]),
177
+ dict(type="RandomFlip", p=1),
178
+ ],
179
+ ],
180
+ ),
181
+ ignore_index=ignore_index,
182
+ ),
183
+ )
Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02-sc-aug.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ num_worker = 12
6
+ mix_prob = 0.0
7
+ empty_cache = False
8
+ enable_amp = True
9
+ evaluate = True
10
+
11
+ class_names = [
12
+ "ceiling",
13
+ "floor",
14
+ "wall",
15
+ "beam",
16
+ "column",
17
+ "window",
18
+ "door",
19
+ "table",
20
+ "chair",
21
+ "sofa",
22
+ "bookcase",
23
+ "board",
24
+ "clutter",
25
+ ]
26
+ num_classes = 13
27
+ segment_ignore_index = (-1,)
28
+
29
+ # model settings
30
+ model = dict(
31
+ type="PG-v1m1",
32
+ backbone=dict(
33
+ type="SpUNet-v1m1",
34
+ in_channels=6,
35
+ num_classes=0,
36
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
37
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
38
+ ),
39
+ backbone_out_channels=96,
40
+ semantic_num_classes=num_classes,
41
+ semantic_ignore_index=-1,
42
+ segment_ignore_index=segment_ignore_index,
43
+ instance_ignore_index=-1,
44
+ cluster_thresh=1.5,
45
+ cluster_closed_points=300,
46
+ cluster_propose_points=100,
47
+ cluster_min_points=50,
48
+ )
49
+
50
+ # scheduler settings
51
+ epoch = 3000
52
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
53
+ scheduler = dict(type="PolyLR")
54
+
55
+ # dataset settings
56
+ dataset_type = "S3DISDataset"
57
+ data_root = "data/s3dis"
58
+
59
+ data = dict(
60
+ num_classes=num_classes,
61
+ ignore_index=-1,
62
+ names=class_names,
63
+ train=dict(
64
+ type=dataset_type,
65
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
66
+ data_root=data_root,
67
+ transform=[
68
+ dict(type="CenterShift", apply_z=True),
69
+ dict(
70
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
71
+ ),
72
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
73
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
74
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
75
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
76
+ dict(type="RandomScale", scale=[0.9, 1.1]),
77
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
78
+ dict(type="RandomFlip", p=0.5),
79
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
80
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
81
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
82
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
83
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
84
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
85
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
86
+ dict(
87
+ type="GridSample",
88
+ grid_size=0.02,
89
+ hash_type="fnv",
90
+ mode="train",
91
+ return_grid_coord=True,
92
+ keys=("coord", "color", "normal", "segment", "instance"),
93
+ ),
94
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
95
+ dict(type="NormalizeColor"),
96
+ dict(
97
+ type="InstanceParser",
98
+ segment_ignore_index=segment_ignore_index,
99
+ instance_ignore_index=-1,
100
+ ),
101
+ dict(type="ToTensor"),
102
+ dict(
103
+ type="Collect",
104
+ keys=(
105
+ "coord",
106
+ "grid_coord",
107
+ "segment",
108
+ "instance",
109
+ "instance_centroid",
110
+ "bbox",
111
+ ),
112
+ feat_keys=("color", "normal"),
113
+ ),
114
+ ],
115
+ test_mode=False,
116
+ ),
117
+ val=dict(
118
+ type=dataset_type,
119
+ split="Area_5",
120
+ data_root=data_root,
121
+ transform=[
122
+ dict(type="CenterShift", apply_z=True),
123
+ dict(
124
+ type="Copy",
125
+ keys_dict={
126
+ "coord": "origin_coord",
127
+ "segment": "origin_segment",
128
+ "instance": "origin_instance",
129
+ },
130
+ ),
131
+ dict(
132
+ type="GridSample",
133
+ grid_size=0.02,
134
+ hash_type="fnv",
135
+ mode="train",
136
+ return_grid_coord=True,
137
+ keys=("coord", "color", "normal", "segment", "instance"),
138
+ ),
139
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
140
+ dict(type="CenterShift", apply_z=False),
141
+ dict(type="NormalizeColor"),
142
+ dict(
143
+ type="InstanceParser",
144
+ segment_ignore_index=segment_ignore_index,
145
+ instance_ignore_index=-1,
146
+ ),
147
+ dict(type="ToTensor"),
148
+ dict(
149
+ type="Collect",
150
+ keys=(
151
+ "coord",
152
+ "grid_coord",
153
+ "segment",
154
+ "instance",
155
+ "origin_coord",
156
+ "origin_segment",
157
+ "origin_instance",
158
+ "instance_centroid",
159
+ "bbox",
160
+ ),
161
+ feat_keys=("color", "normal"),
162
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
163
+ ),
164
+ ],
165
+ test_mode=False,
166
+ ),
167
+ test=dict(), # currently not available
168
+ )
169
+
170
+ hooks = [
171
+ dict(type="CheckpointLoader", keywords="module.", replacement="module."),
172
+ dict(type="IterationTimer", warmup_iter=2),
173
+ dict(type="InformationWriter"),
174
+ dict(
175
+ type="InsSegEvaluator",
176
+ segment_ignore_index=segment_ignore_index,
177
+ instance_ignore_index=-1,
178
+ ),
179
+ dict(type="CheckpointSaver", save_freq=None),
180
+ ]
Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ num_worker = 12
6
+ mix_prob = 0.0
7
+ empty_cache = False
8
+ enable_amp = True
9
+ evaluate = True
10
+
11
+ class_names = [
12
+ "ceiling",
13
+ "floor",
14
+ "wall",
15
+ "beam",
16
+ "column",
17
+ "window",
18
+ "door",
19
+ "table",
20
+ "chair",
21
+ "sofa",
22
+ "bookcase",
23
+ "board",
24
+ "clutter",
25
+ ]
26
+ num_classes = 13
27
+ segment_ignore_index = (-1,)
28
+
29
+ # model settings
30
+ model = dict(
31
+ type="PG-v1m1",
32
+ backbone=dict(
33
+ type="SpUNet-v1m1",
34
+ in_channels=6,
35
+ num_classes=0,
36
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
37
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
38
+ ),
39
+ backbone_out_channels=96,
40
+ semantic_num_classes=num_classes,
41
+ semantic_ignore_index=-1,
42
+ segment_ignore_index=segment_ignore_index,
43
+ instance_ignore_index=-1,
44
+ cluster_thresh=1.5,
45
+ cluster_closed_points=300,
46
+ cluster_propose_points=100,
47
+ cluster_min_points=50,
48
+ )
49
+
50
+ # scheduler settings
51
+ epoch = 3000
52
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
53
+ scheduler = dict(type="PolyLR")
54
+
55
+ # dataset settings
56
+ dataset_type = "S3DISDataset"
57
+ data_root = "data/s3dis"
58
+
59
+ data = dict(
60
+ num_classes=num_classes,
61
+ ignore_index=-1,
62
+ names=class_names,
63
+ train=dict(
64
+ type=dataset_type,
65
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
66
+ data_root=data_root,
67
+ transform=[
68
+ dict(type="CenterShift", apply_z=True),
69
+ dict(
70
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
71
+ ),
72
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
73
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
74
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
75
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
76
+ dict(type="RandomScale", scale=[0.9, 1.1]),
77
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
78
+ dict(type="RandomFlip", p=0.5),
79
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
80
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
81
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
82
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
83
+ dict(type="ChromaticJitter", p=0.95, std=0.005),
84
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
85
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
86
+ dict(
87
+ type="GridSample",
88
+ grid_size=0.02,
89
+ hash_type="fnv",
90
+ mode="train",
91
+ return_grid_coord=True,
92
+ keys=("coord", "color", "normal", "segment", "instance"),
93
+ ),
94
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
95
+ dict(type="NormalizeColor"),
96
+ dict(
97
+ type="InstanceParser",
98
+ segment_ignore_index=segment_ignore_index,
99
+ instance_ignore_index=-1,
100
+ ),
101
+ dict(type="ToTensor"),
102
+ dict(
103
+ type="Collect",
104
+ keys=(
105
+ "coord",
106
+ "grid_coord",
107
+ "segment",
108
+ "instance",
109
+ "instance_centroid",
110
+ "bbox",
111
+ ),
112
+ feat_keys=("color", "normal"),
113
+ ),
114
+ ],
115
+ test_mode=False,
116
+ ),
117
+ val=dict(
118
+ type=dataset_type,
119
+ split="Area_5",
120
+ data_root=data_root,
121
+ transform=[
122
+ dict(type="CenterShift", apply_z=True),
123
+ dict(
124
+ type="Copy",
125
+ keys_dict={
126
+ "coord": "origin_coord",
127
+ "segment": "origin_segment",
128
+ "instance": "origin_instance",
129
+ },
130
+ ),
131
+ dict(
132
+ type="GridSample",
133
+ grid_size=0.02,
134
+ hash_type="fnv",
135
+ mode="train",
136
+ return_grid_coord=True,
137
+ keys=("coord", "color", "normal", "segment", "instance"),
138
+ ),
139
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
140
+ dict(type="CenterShift", apply_z=False),
141
+ dict(type="NormalizeColor"),
142
+ dict(
143
+ type="InstanceParser",
144
+ segment_ignore_index=segment_ignore_index,
145
+ instance_ignore_index=-1,
146
+ ),
147
+ dict(type="ToTensor"),
148
+ dict(
149
+ type="Collect",
150
+ keys=(
151
+ "coord",
152
+ "grid_coord",
153
+ "segment",
154
+ "instance",
155
+ "origin_coord",
156
+ "origin_segment",
157
+ "origin_instance",
158
+ "instance_centroid",
159
+ "bbox",
160
+ ),
161
+ feat_keys=("color", "normal"),
162
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
163
+ ),
164
+ ],
165
+ test_mode=False,
166
+ ),
167
+ test=dict(), # currently not available
168
+ )
169
+
170
+ hooks = [
171
+ dict(type="CheckpointLoader", keywords="module.", replacement="module."),
172
+ dict(type="IterationTimer", warmup_iter=2),
173
+ dict(type="InformationWriter"),
174
+ dict(
175
+ type="InsSegEvaluator",
176
+ segment_ignore_index=segment_ignore_index,
177
+ instance_ignore_index=-1,
178
+ ),
179
+ dict(type="CheckpointSaver", save_freq=None),
180
+ ]
Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ num_worker = 12
6
+ mix_prob = 0.0
7
+ empty_cache = False
8
+ enable_amp = True
9
+ evaluate = True
10
+
11
+ class_names = [
12
+ "ceiling",
13
+ "floor",
14
+ "wall",
15
+ "beam",
16
+ "column",
17
+ "window",
18
+ "door",
19
+ "table",
20
+ "chair",
21
+ "sofa",
22
+ "bookcase",
23
+ "board",
24
+ "clutter",
25
+ ]
26
+ num_classes = 13
27
+ segment_ignore_index = (-1,)
28
+
29
+ # model settings
30
+ model = dict(
31
+ type="PG-v1m1",
32
+ backbone=dict(
33
+ type="SpUNet-v1m1",
34
+ in_channels=6,
35
+ num_classes=0,
36
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
37
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
38
+ ),
39
+ backbone_out_channels=96,
40
+ semantic_num_classes=num_classes,
41
+ semantic_ignore_index=-1,
42
+ segment_ignore_index=segment_ignore_index,
43
+ instance_ignore_index=-1,
44
+ cluster_thresh=1.5,
45
+ cluster_closed_points=300,
46
+ cluster_propose_points=100,
47
+ cluster_min_points=50,
48
+ voxel_size=0.05,
49
+ )
50
+
51
+ # scheduler settings
52
+ epoch = 3000
53
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
54
+ scheduler = dict(type="PolyLR")
55
+
56
+ # dataset settings
57
+ dataset_type = "S3DISDataset"
58
+ data_root = "data/s3dis"
59
+
60
+ data = dict(
61
+ num_classes=num_classes,
62
+ ignore_index=-1,
63
+ names=class_names,
64
+ train=dict(
65
+ type=dataset_type,
66
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
67
+ data_root=data_root,
68
+ transform=[
69
+ dict(type="CenterShift", apply_z=True),
70
+ dict(
71
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
72
+ ),
73
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
74
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
75
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
76
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
77
+ dict(type="RandomScale", scale=[0.9, 1.1]),
78
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
79
+ dict(type="RandomFlip", p=0.5),
80
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
81
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
82
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
83
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
84
+ dict(type="ChromaticJitter", p=0.95, std=0.005),
85
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
86
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
87
+ dict(
88
+ type="GridSample",
89
+ grid_size=0.05,
90
+ hash_type="fnv",
91
+ mode="train",
92
+ return_grid_coord=True,
93
+ keys=("coord", "color", "normal", "segment", "instance"),
94
+ ),
95
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
96
+ dict(type="NormalizeColor"),
97
+ dict(
98
+ type="InstanceParser",
99
+ segment_ignore_index=segment_ignore_index,
100
+ instance_ignore_index=-1,
101
+ ),
102
+ dict(type="ToTensor"),
103
+ dict(
104
+ type="Collect",
105
+ keys=(
106
+ "coord",
107
+ "grid_coord",
108
+ "segment",
109
+ "instance",
110
+ "instance_centroid",
111
+ "bbox",
112
+ ),
113
+ feat_keys=("color", "normal"),
114
+ ),
115
+ ],
116
+ test_mode=False,
117
+ ),
118
+ val=dict(
119
+ type=dataset_type,
120
+ split="Area_5",
121
+ data_root=data_root,
122
+ transform=[
123
+ dict(type="CenterShift", apply_z=True),
124
+ dict(
125
+ type="Copy",
126
+ keys_dict={
127
+ "coord": "origin_coord",
128
+ "segment": "origin_segment",
129
+ "instance": "origin_instance",
130
+ },
131
+ ),
132
+ dict(
133
+ type="GridSample",
134
+ grid_size=0.05,
135
+ hash_type="fnv",
136
+ mode="train",
137
+ return_grid_coord=True,
138
+ keys=("coord", "color", "normal", "segment", "instance"),
139
+ ),
140
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
141
+ dict(type="CenterShift", apply_z=False),
142
+ dict(type="NormalizeColor"),
143
+ dict(
144
+ type="InstanceParser",
145
+ segment_ignore_index=segment_ignore_index,
146
+ instance_ignore_index=-1,
147
+ ),
148
+ dict(type="ToTensor"),
149
+ dict(
150
+ type="Collect",
151
+ keys=(
152
+ "coord",
153
+ "grid_coord",
154
+ "segment",
155
+ "instance",
156
+ "origin_coord",
157
+ "origin_segment",
158
+ "origin_instance",
159
+ "instance_centroid",
160
+ "bbox",
161
+ ),
162
+ feat_keys=("color", "normal"),
163
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
164
+ ),
165
+ ],
166
+ test_mode=False,
167
+ ),
168
+ test=dict(), # currently not available
169
+ )
170
+
171
+ hooks = [
172
+ dict(type="CheckpointLoader", keywords="module.", replacement="module."),
173
+ dict(type="IterationTimer", warmup_iter=2),
174
+ dict(type="InformationWriter"),
175
+ dict(
176
+ type="InsSegEvaluator",
177
+ segment_ignore_index=segment_ignore_index,
178
+ instance_ignore_index=-1,
179
+ ),
180
+ dict(type="CheckpointSaver", save_freq=None),
181
+ ]
Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft-vs0p05.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ num_worker = 24
6
+ mix_prob = 0
7
+ empty_cache = False
8
+ enable_amp = True
9
+ evaluate = True
10
+ find_unused_parameters = True
11
+
12
+ class_names = [
13
+ "ceiling",
14
+ "floor",
15
+ "wall",
16
+ "beam",
17
+ "column",
18
+ "window",
19
+ "door",
20
+ "table",
21
+ "chair",
22
+ "sofa",
23
+ "bookcase",
24
+ "board",
25
+ "clutter",
26
+ ]
27
+ num_classes = 13
28
+ segment_ignore_index = (-1,)
29
+
30
+ # model settings
31
+ model = dict(
32
+ type="PG-v1m1",
33
+ backbone=dict(
34
+ type="PPT-v1m1",
35
+ backbone=dict(
36
+ type="SpUNet-v1m3",
37
+ in_channels=6,
38
+ num_classes=0,
39
+ base_channels=32,
40
+ context_channels=256,
41
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
42
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
43
+ cls_mode=False,
44
+ conditions=("ScanNet", "S3DIS", "Structured3D"),
45
+ zero_init=False,
46
+ norm_decouple=True,
47
+ norm_adaptive=True,
48
+ norm_affine=True,
49
+ ),
50
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
51
+ backbone_out_channels=96,
52
+ context_channels=256,
53
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
54
+ template="[x]",
55
+ clip_model="ViT-B/16",
56
+ class_name=(
57
+ "wall",
58
+ "floor",
59
+ "cabinet",
60
+ "bed",
61
+ "chair",
62
+ "sofa",
63
+ "table",
64
+ "door",
65
+ "window",
66
+ "bookshelf",
67
+ "bookcase",
68
+ "picture",
69
+ "counter",
70
+ "desk",
71
+ "shelves",
72
+ "curtain",
73
+ "dresser",
74
+ "pillow",
75
+ "mirror",
76
+ "ceiling",
77
+ "refrigerator",
78
+ "television",
79
+ "shower curtain",
80
+ "nightstand",
81
+ "toilet",
82
+ "sink",
83
+ "lamp",
84
+ "bathtub",
85
+ "garbagebin",
86
+ "board",
87
+ "beam",
88
+ "column",
89
+ "clutter",
90
+ "otherstructure",
91
+ "otherfurniture",
92
+ "otherprop",
93
+ ),
94
+ valid_index=(
95
+ (
96
+ 0,
97
+ 1,
98
+ 2,
99
+ 3,
100
+ 4,
101
+ 5,
102
+ 6,
103
+ 7,
104
+ 8,
105
+ 11,
106
+ 13,
107
+ 14,
108
+ 15,
109
+ 16,
110
+ 17,
111
+ 18,
112
+ 19,
113
+ 20,
114
+ 21,
115
+ 23,
116
+ 25,
117
+ 26,
118
+ 33,
119
+ 34,
120
+ 35,
121
+ ),
122
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
123
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
124
+ ),
125
+ backbone_mode=True,
126
+ ),
127
+ backbone_out_channels=96,
128
+ semantic_num_classes=num_classes,
129
+ semantic_ignore_index=-1,
130
+ segment_ignore_index=segment_ignore_index,
131
+ instance_ignore_index=-1,
132
+ cluster_thresh=1.5,
133
+ cluster_closed_points=300,
134
+ cluster_propose_points=100,
135
+ cluster_min_points=50,
136
+ voxel_size=0.05,
137
+ )
138
+
139
+ # scheduler settings
140
+ epoch = 3000
141
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
142
+ scheduler = dict(type="PolyLR")
143
+
144
+ # dataset settings
145
+ dataset_type = "S3DISDataset"
146
+ data_root = "data/s3dis"
147
+
148
+ data = dict(
149
+ num_classes=num_classes,
150
+ ignore_index=-1,
151
+ names=class_names,
152
+ train=dict(
153
+ type=dataset_type,
154
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
155
+ data_root=data_root,
156
+ transform=[
157
+ dict(type="CenterShift", apply_z=True),
158
+ dict(
159
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
160
+ ),
161
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
162
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
163
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
164
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
165
+ dict(type="RandomScale", scale=[0.9, 1.1]),
166
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
167
+ dict(type="RandomFlip", p=0.5),
168
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
169
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
170
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
171
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
172
+ dict(type="ChromaticJitter", p=0.95, std=0.005),
173
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
174
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
175
+ dict(
176
+ type="GridSample",
177
+ grid_size=0.05,
178
+ hash_type="fnv",
179
+ mode="train",
180
+ return_grid_coord=True,
181
+ keys=("coord", "color", "normal", "segment", "instance"),
182
+ ),
183
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
184
+ dict(type="NormalizeColor"),
185
+ dict(
186
+ type="InstanceParser",
187
+ segment_ignore_index=segment_ignore_index,
188
+ instance_ignore_index=-1,
189
+ ),
190
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
191
+ dict(type="ToTensor"),
192
+ dict(
193
+ type="Collect",
194
+ keys=(
195
+ "coord",
196
+ "grid_coord",
197
+ "segment",
198
+ "instance",
199
+ "instance_centroid",
200
+ "bbox",
201
+ "condition",
202
+ ),
203
+ feat_keys=("color", "normal"),
204
+ ),
205
+ ],
206
+ test_mode=False,
207
+ ),
208
+ val=dict(
209
+ type=dataset_type,
210
+ split="Area_5",
211
+ data_root=data_root,
212
+ transform=[
213
+ dict(type="CenterShift", apply_z=True),
214
+ dict(
215
+ type="Copy",
216
+ keys_dict={
217
+ "coord": "origin_coord",
218
+ "segment": "origin_segment",
219
+ "instance": "origin_instance",
220
+ },
221
+ ),
222
+ dict(
223
+ type="GridSample",
224
+ grid_size=0.05,
225
+ hash_type="fnv",
226
+ mode="train",
227
+ return_grid_coord=True,
228
+ keys=("coord", "color", "normal", "segment", "instance"),
229
+ ),
230
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
231
+ dict(type="CenterShift", apply_z=False),
232
+ dict(type="NormalizeColor"),
233
+ dict(
234
+ type="InstanceParser",
235
+ segment_ignore_index=segment_ignore_index,
236
+ instance_ignore_index=-1,
237
+ ),
238
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
239
+ dict(type="ToTensor"),
240
+ dict(
241
+ type="Collect",
242
+ keys=(
243
+ "coord",
244
+ "grid_coord",
245
+ "segment",
246
+ "instance",
247
+ "origin_coord",
248
+ "origin_segment",
249
+ "origin_instance",
250
+ "instance_centroid",
251
+ "bbox",
252
+ "condition",
253
+ ),
254
+ feat_keys=("color", "normal"),
255
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
256
+ ),
257
+ ],
258
+ test_mode=False,
259
+ ),
260
+ test=dict(), # currently not available
261
+ )
262
+
263
+ hooks = [
264
+ dict(type="CheckpointLoader", keywords="module.", replacement="module.backbone."),
265
+ dict(type="IterationTimer", warmup_iter=2),
266
+ dict(type="InformationWriter"),
267
+ dict(
268
+ type="InsSegEvaluator",
269
+ segment_ignore_index=segment_ignore_index,
270
+ instance_ignore_index=-1,
271
+ ),
272
+ dict(type="CheckpointSaver", save_freq=None),
273
+ ]
Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ num_worker = 24
6
+ mix_prob = 0
7
+ empty_cache = False
8
+ enable_amp = True
9
+ evaluate = True
10
+ find_unused_parameters = True
11
+
12
+ class_names = [
13
+ "ceiling",
14
+ "floor",
15
+ "wall",
16
+ "beam",
17
+ "column",
18
+ "window",
19
+ "door",
20
+ "table",
21
+ "chair",
22
+ "sofa",
23
+ "bookcase",
24
+ "board",
25
+ "clutter",
26
+ ]
27
+ num_classes = 13
28
+ segment_ignore_index = (-1,)
29
+
30
+ # model settings
31
+ model = dict(
32
+ type="PG-v1m1",
33
+ backbone=dict(
34
+ type="PPT-v1m1",
35
+ backbone=dict(
36
+ type="SpUNet-v1m3",
37
+ in_channels=6,
38
+ num_classes=0,
39
+ base_channels=32,
40
+ context_channels=256,
41
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
42
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
43
+ cls_mode=False,
44
+ conditions=("ScanNet", "S3DIS", "Structured3D"),
45
+ zero_init=False,
46
+ norm_decouple=True,
47
+ norm_adaptive=True,
48
+ norm_affine=True,
49
+ ),
50
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
51
+ backbone_out_channels=96,
52
+ context_channels=256,
53
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
54
+ template="[x]",
55
+ clip_model="ViT-B/16",
56
+ class_name=(
57
+ "wall",
58
+ "floor",
59
+ "cabinet",
60
+ "bed",
61
+ "chair",
62
+ "sofa",
63
+ "table",
64
+ "door",
65
+ "window",
66
+ "bookshelf",
67
+ "bookcase",
68
+ "picture",
69
+ "counter",
70
+ "desk",
71
+ "shelves",
72
+ "curtain",
73
+ "dresser",
74
+ "pillow",
75
+ "mirror",
76
+ "ceiling",
77
+ "refrigerator",
78
+ "television",
79
+ "shower curtain",
80
+ "nightstand",
81
+ "toilet",
82
+ "sink",
83
+ "lamp",
84
+ "bathtub",
85
+ "garbagebin",
86
+ "board",
87
+ "beam",
88
+ "column",
89
+ "clutter",
90
+ "otherstructure",
91
+ "otherfurniture",
92
+ "otherprop",
93
+ ),
94
+ valid_index=(
95
+ (
96
+ 0,
97
+ 1,
98
+ 2,
99
+ 3,
100
+ 4,
101
+ 5,
102
+ 6,
103
+ 7,
104
+ 8,
105
+ 11,
106
+ 13,
107
+ 14,
108
+ 15,
109
+ 16,
110
+ 17,
111
+ 18,
112
+ 19,
113
+ 20,
114
+ 21,
115
+ 23,
116
+ 25,
117
+ 26,
118
+ 33,
119
+ 34,
120
+ 35,
121
+ ),
122
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
123
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
124
+ ),
125
+ backbone_mode=True,
126
+ ),
127
+ backbone_out_channels=96,
128
+ semantic_num_classes=num_classes,
129
+ semantic_ignore_index=-1,
130
+ segment_ignore_index=segment_ignore_index,
131
+ instance_ignore_index=-1,
132
+ cluster_thresh=1.5,
133
+ cluster_closed_points=300,
134
+ cluster_propose_points=100,
135
+ cluster_min_points=50,
136
+ voxel_size=0.02,
137
+ )
138
+
139
+ # scheduler settings
140
+ epoch = 3000
141
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
142
+ scheduler = dict(type="PolyLR")
143
+
144
+ # dataset settings
145
+ dataset_type = "S3DISDataset"
146
+ data_root = "data/s3dis"
147
+
148
+ data = dict(
149
+ num_classes=num_classes,
150
+ ignore_index=-1,
151
+ names=class_names,
152
+ train=dict(
153
+ type=dataset_type,
154
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
155
+ data_root=data_root,
156
+ transform=[
157
+ dict(type="CenterShift", apply_z=True),
158
+ dict(
159
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
160
+ ),
161
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
162
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
163
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
164
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
165
+ dict(type="RandomScale", scale=[0.9, 1.1]),
166
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
167
+ dict(type="RandomFlip", p=0.5),
168
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
169
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
170
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
171
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
172
+ dict(type="ChromaticJitter", p=0.95, std=0.005),
173
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
174
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
175
+ dict(
176
+ type="GridSample",
177
+ grid_size=0.02,
178
+ hash_type="fnv",
179
+ mode="train",
180
+ return_grid_coord=True,
181
+ keys=("coord", "color", "normal", "segment", "instance"),
182
+ ),
183
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
184
+ dict(type="NormalizeColor"),
185
+ dict(
186
+ type="InstanceParser",
187
+ segment_ignore_index=segment_ignore_index,
188
+ instance_ignore_index=-1,
189
+ ),
190
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
191
+ dict(type="ToTensor"),
192
+ dict(
193
+ type="Collect",
194
+ keys=(
195
+ "coord",
196
+ "grid_coord",
197
+ "segment",
198
+ "instance",
199
+ "instance_centroid",
200
+ "bbox",
201
+ "condition",
202
+ ),
203
+ feat_keys=("color", "normal"),
204
+ ),
205
+ ],
206
+ test_mode=False,
207
+ ),
208
+ val=dict(
209
+ type=dataset_type,
210
+ split="Area_5",
211
+ data_root=data_root,
212
+ transform=[
213
+ dict(type="CenterShift", apply_z=True),
214
+ dict(
215
+ type="Copy",
216
+ keys_dict={
217
+ "coord": "origin_coord",
218
+ "segment": "origin_segment",
219
+ "instance": "origin_instance",
220
+ },
221
+ ),
222
+ dict(
223
+ type="GridSample",
224
+ grid_size=0.02,
225
+ hash_type="fnv",
226
+ mode="train",
227
+ return_grid_coord=True,
228
+ keys=("coord", "color", "normal", "segment", "instance"),
229
+ ),
230
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
231
+ dict(type="CenterShift", apply_z=False),
232
+ dict(type="NormalizeColor"),
233
+ dict(
234
+ type="InstanceParser",
235
+ segment_ignore_index=segment_ignore_index,
236
+ instance_ignore_index=-1,
237
+ ),
238
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
239
+ dict(type="ToTensor"),
240
+ dict(
241
+ type="Collect",
242
+ keys=(
243
+ "coord",
244
+ "grid_coord",
245
+ "segment",
246
+ "instance",
247
+ "origin_coord",
248
+ "origin_segment",
249
+ "origin_instance",
250
+ "instance_centroid",
251
+ "bbox",
252
+ "condition",
253
+ ),
254
+ feat_keys=("color", "normal"),
255
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
256
+ ),
257
+ ],
258
+ test_mode=False,
259
+ ),
260
+ test=dict(), # currently not available
261
+ )
262
+
263
+ hooks = [
264
+ dict(type="CheckpointLoader", keywords="module.", replacement="module.backbone."),
265
+ dict(type="IterationTimer", warmup_iter=2),
266
+ dict(type="InformationWriter"),
267
+ dict(
268
+ type="InsSegEvaluator",
269
+ segment_ignore_index=segment_ignore_index,
270
+ instance_ignore_index=-1,
271
+ ),
272
+ dict(type="CheckpointSaver", save_freq=None),
273
+ ]
Pointcept/configs/s3dis/semseg-minkunet34c-0-base.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+ # misc custom setting
3
+ batch_size = 12 # bs: total bs in all gpus
4
+ mix_prob = 0.8
5
+ empty_cache = False
6
+ enable_amp = True
7
+
8
+ # model settings
9
+ model = dict(
10
+ type="DefaultSegmentor",
11
+ backbone=dict(type="MinkUNet34C", in_channels=6, out_channels=13),
12
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
13
+ )
14
+
15
+ # scheduler settings
16
+ epoch = 3000
17
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
18
+ scheduler = dict(type="PolyLR")
19
+
20
+
21
+ # dataset settings
22
+ dataset_type = "S3DISDataset"
23
+ data_root = "data/s3dis"
24
+
25
+ data = dict(
26
+ num_classes=13,
27
+ ignore_index=-1,
28
+ names=[
29
+ "ceiling",
30
+ "floor",
31
+ "wall",
32
+ "beam",
33
+ "column",
34
+ "window",
35
+ "door",
36
+ "table",
37
+ "chair",
38
+ "sofa",
39
+ "bookcase",
40
+ "board",
41
+ "clutter",
42
+ ],
43
+ train=dict(
44
+ type=dataset_type,
45
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
46
+ data_root=data_root,
47
+ transform=[
48
+ dict(type="CenterShift", apply_z=True),
49
+ dict(
50
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
51
+ ),
52
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
53
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
54
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
55
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
56
+ dict(type="RandomScale", scale=[0.9, 1.1]),
57
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
58
+ dict(type="RandomFlip", p=0.5),
59
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
60
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
61
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
62
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
63
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
64
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
65
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
66
+ dict(
67
+ type="GridSample",
68
+ grid_size=0.05,
69
+ hash_type="fnv",
70
+ mode="train",
71
+ keys=("coord", "color", "segment"),
72
+ return_grid_coord=True,
73
+ ),
74
+ dict(type="SphereCrop", point_max=100000, mode="random"),
75
+ dict(type="CenterShift", apply_z=False),
76
+ dict(type="NormalizeColor"),
77
+ dict(type="ShufflePoint"),
78
+ dict(type="ToTensor"),
79
+ dict(
80
+ type="Collect",
81
+ keys=("coord", "grid_coord", "segment"),
82
+ feat_keys=["coord", "color"],
83
+ ),
84
+ ],
85
+ test_mode=False,
86
+ ),
87
+ val=dict(
88
+ type=dataset_type,
89
+ split="Area_5",
90
+ data_root=data_root,
91
+ transform=[
92
+ dict(type="CenterShift", apply_z=True),
93
+ dict(
94
+ type="Copy",
95
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
96
+ ),
97
+ dict(
98
+ type="GridSample",
99
+ grid_size=0.05,
100
+ hash_type="fnv",
101
+ mode="train",
102
+ keys=("coord", "color", "segment"),
103
+ return_grid_coord=True,
104
+ ),
105
+ dict(type="CenterShift", apply_z=False),
106
+ dict(type="NormalizeColor"),
107
+ dict(type="ToTensor"),
108
+ dict(
109
+ type="Collect",
110
+ keys=(
111
+ "coord",
112
+ "grid_coord",
113
+ "origin_coord",
114
+ "segment",
115
+ "origin_segment",
116
+ ),
117
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
118
+ feat_keys=["coord", "color"],
119
+ ),
120
+ ],
121
+ test_mode=False,
122
+ ),
123
+ test=dict(
124
+ type=dataset_type,
125
+ split="Area_5",
126
+ data_root=data_root,
127
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
128
+ test_mode=True,
129
+ test_cfg=dict(
130
+ voxelize=dict(
131
+ type="GridSample",
132
+ grid_size=0.05,
133
+ hash_type="fnv",
134
+ mode="test",
135
+ keys=("coord", "color"),
136
+ return_grid_coord=True,
137
+ ),
138
+ crop=None,
139
+ post_transform=[
140
+ dict(type="CenterShift", apply_z=False),
141
+ dict(type="ToTensor"),
142
+ dict(
143
+ type="Collect",
144
+ keys=("coord", "grid_coord", "index"),
145
+ feat_keys=("coord", "color"),
146
+ ),
147
+ ],
148
+ aug_transform=[
149
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
150
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
151
+ [dict(type="RandomScale", scale=[1, 1])],
152
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
153
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
154
+ [
155
+ dict(type="RandomScale", scale=[0.9, 0.9]),
156
+ dict(type="RandomFlip", p=1),
157
+ ],
158
+ [
159
+ dict(type="RandomScale", scale=[0.95, 0.95]),
160
+ dict(type="RandomFlip", p=1),
161
+ ],
162
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
163
+ [
164
+ dict(type="RandomScale", scale=[1.05, 1.05]),
165
+ dict(type="RandomFlip", p=1),
166
+ ],
167
+ [
168
+ dict(type="RandomScale", scale=[1.1, 1.1]),
169
+ dict(type="RandomFlip", p=1),
170
+ ],
171
+ ],
172
+ ),
173
+ ),
174
+ )
Pointcept/configs/s3dis/semseg-ppt-v1m1-0-s3-sc-st-spunet.py ADDED
@@ -0,0 +1,496 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 24 # bs: total bs in all gpus
5
+ num_worker = 48
6
+ mix_prob = 0.8
7
+ empty_cache = False
8
+ enable_amp = True
9
+ find_unused_parameters = True
10
+
11
+ # trainer
12
+ train = dict(
13
+ type="MultiDatasetTrainer",
14
+ )
15
+
16
+ # model settings
17
+ model = dict(
18
+ type="PPT-v1m1",
19
+ backbone=dict(
20
+ type="SpUNet-v1m3",
21
+ in_channels=6,
22
+ num_classes=0,
23
+ base_channels=32,
24
+ context_channels=256,
25
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
26
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
27
+ cls_mode=False,
28
+ conditions=("ScanNet", "S3DIS", "Structured3D"),
29
+ zero_init=False,
30
+ norm_decouple=True,
31
+ norm_adaptive=True,
32
+ norm_affine=True,
33
+ ),
34
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
35
+ backbone_out_channels=96,
36
+ context_channels=256,
37
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
38
+ template="[x]",
39
+ clip_model="ViT-B/16",
40
+ class_name=(
41
+ "wall",
42
+ "floor",
43
+ "cabinet",
44
+ "bed",
45
+ "chair",
46
+ "sofa",
47
+ "table",
48
+ "door",
49
+ "window",
50
+ "bookshelf",
51
+ "bookcase",
52
+ "picture",
53
+ "counter",
54
+ "desk",
55
+ "shelves",
56
+ "curtain",
57
+ "dresser",
58
+ "pillow",
59
+ "mirror",
60
+ "ceiling",
61
+ "refrigerator",
62
+ "television",
63
+ "shower curtain",
64
+ "nightstand",
65
+ "toilet",
66
+ "sink",
67
+ "lamp",
68
+ "bathtub",
69
+ "garbagebin",
70
+ "board",
71
+ "beam",
72
+ "column",
73
+ "clutter",
74
+ "otherstructure",
75
+ "otherfurniture",
76
+ "otherprop",
77
+ ),
78
+ valid_index=(
79
+ (
80
+ 0,
81
+ 1,
82
+ 2,
83
+ 3,
84
+ 4,
85
+ 5,
86
+ 6,
87
+ 7,
88
+ 8,
89
+ 11,
90
+ 13,
91
+ 14,
92
+ 15,
93
+ 16,
94
+ 17,
95
+ 18,
96
+ 19,
97
+ 20,
98
+ 21,
99
+ 23,
100
+ 25,
101
+ 26,
102
+ 33,
103
+ 34,
104
+ 35,
105
+ ),
106
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
107
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
108
+ ),
109
+ backbone_mode=False,
110
+ )
111
+
112
+ # scheduler settings
113
+ epoch = 100
114
+ optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
115
+ scheduler = dict(
116
+ type="OneCycleLR",
117
+ max_lr=optimizer["lr"],
118
+ pct_start=0.05,
119
+ anneal_strategy="cos",
120
+ div_factor=10.0,
121
+ final_div_factor=10000.0,
122
+ )
123
+ # param_dicts = [dict(keyword="modulation", lr=0.005)]
124
+
125
+ # dataset settings
126
+ data = dict(
127
+ num_classes=13,
128
+ ignore_index=-1,
129
+ names=[
130
+ "ceiling",
131
+ "floor",
132
+ "wall",
133
+ "beam",
134
+ "column",
135
+ "window",
136
+ "door",
137
+ "table",
138
+ "chair",
139
+ "sofa",
140
+ "bookcase",
141
+ "board",
142
+ "clutter",
143
+ ],
144
+ train=dict(
145
+ type="ConcatDataset",
146
+ datasets=[
147
+ # Structured3D
148
+ dict(
149
+ type="Structured3DDataset",
150
+ split="train",
151
+ data_root="data/structured3d",
152
+ transform=[
153
+ dict(type="CenterShift", apply_z=True),
154
+ dict(
155
+ type="RandomDropout",
156
+ dropout_ratio=0.2,
157
+ dropout_application_ratio=0.2,
158
+ ),
159
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
160
+ dict(
161
+ type="RandomRotate",
162
+ angle=[-1, 1],
163
+ axis="z",
164
+ center=[0, 0, 0],
165
+ p=0.5,
166
+ ),
167
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
168
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
169
+ dict(type="RandomScale", scale=[0.9, 1.1]),
170
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
171
+ dict(type="RandomFlip", p=0.5),
172
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
173
+ dict(
174
+ type="ElasticDistortion",
175
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
176
+ ),
177
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
178
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
179
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
180
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
181
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
182
+ dict(
183
+ type="GridSample",
184
+ grid_size=0.02,
185
+ hash_type="fnv",
186
+ mode="train",
187
+ return_grid_coord=True,
188
+ ),
189
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
190
+ dict(type="CenterShift", apply_z=False),
191
+ dict(type="NormalizeColor"),
192
+ dict(type="ShufflePoint"),
193
+ dict(type="Add", keys_dict={"condition": "Structured3D"}),
194
+ dict(type="ToTensor"),
195
+ dict(
196
+ type="Collect",
197
+ keys=("coord", "grid_coord", "segment", "condition"),
198
+ feat_keys=("color", "normal"),
199
+ ),
200
+ ],
201
+ test_mode=False,
202
+ loop=4, # sampling weight
203
+ ),
204
+ # ScanNet
205
+ dict(
206
+ type="ScanNetDataset",
207
+ split="train",
208
+ data_root="data/scannet",
209
+ transform=[
210
+ dict(type="CenterShift", apply_z=True),
211
+ dict(
212
+ type="RandomDropout",
213
+ dropout_ratio=0.2,
214
+ dropout_application_ratio=0.2,
215
+ ),
216
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
217
+ dict(
218
+ type="RandomRotate",
219
+ angle=[-1, 1],
220
+ axis="z",
221
+ center=[0, 0, 0],
222
+ p=0.5,
223
+ ),
224
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
225
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
226
+ dict(type="RandomScale", scale=[0.9, 1.1]),
227
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
228
+ dict(type="RandomFlip", p=0.5),
229
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
230
+ dict(
231
+ type="ElasticDistortion",
232
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
233
+ ),
234
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
235
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
236
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
237
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
238
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
239
+ dict(
240
+ type="GridSample",
241
+ grid_size=0.02,
242
+ hash_type="fnv",
243
+ mode="train",
244
+ return_grid_coord=True,
245
+ ),
246
+ dict(type="SphereCrop", point_max=100000, mode="random"),
247
+ dict(type="CenterShift", apply_z=False),
248
+ dict(type="NormalizeColor"),
249
+ dict(type="ShufflePoint"),
250
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
251
+ dict(type="ToTensor"),
252
+ dict(
253
+ type="Collect",
254
+ keys=("coord", "grid_coord", "segment", "condition"),
255
+ feat_keys=("color", "normal"),
256
+ ),
257
+ ],
258
+ test_mode=False,
259
+ loop=2, # sampling weight
260
+ ),
261
+ # S3DIS
262
+ dict(
263
+ type="S3DISDataset",
264
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
265
+ data_root="data/s3dis",
266
+ transform=[
267
+ dict(type="CenterShift", apply_z=True),
268
+ dict(
269
+ type="RandomDropout",
270
+ dropout_ratio=0.2,
271
+ dropout_application_ratio=0.2,
272
+ ),
273
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
274
+ dict(
275
+ type="RandomRotate",
276
+ angle=[-1, 1],
277
+ axis="z",
278
+ center=[0, 0, 0],
279
+ p=0.5,
280
+ ),
281
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
282
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
283
+ dict(type="RandomScale", scale=[0.9, 1.1]),
284
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
285
+ dict(type="RandomFlip", p=0.5),
286
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
287
+ dict(
288
+ type="ElasticDistortion",
289
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
290
+ ),
291
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
292
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
293
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
294
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
295
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
296
+ dict(
297
+ type="GridSample",
298
+ grid_size=0.02,
299
+ hash_type="fnv",
300
+ mode="train",
301
+ return_grid_coord=True,
302
+ ),
303
+ dict(type="SphereCrop", sample_rate=0.6, mode="random"),
304
+ dict(type="CenterShift", apply_z=False),
305
+ dict(type="NormalizeColor"),
306
+ dict(type="ShufflePoint"),
307
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
308
+ dict(type="ToTensor"),
309
+ dict(
310
+ type="Collect",
311
+ keys=("coord", "grid_coord", "segment", "condition"),
312
+ feat_keys=("color", "normal"),
313
+ ),
314
+ ],
315
+ test_mode=False,
316
+ loop=1, # sampling weight
317
+ ),
318
+ ],
319
+ ),
320
+ val=dict(
321
+ type="S3DISDataset",
322
+ split="Area_5",
323
+ data_root="data/s3dis",
324
+ transform=[
325
+ dict(type="CenterShift", apply_z=True),
326
+ dict(
327
+ type="GridSample",
328
+ grid_size=0.02,
329
+ hash_type="fnv",
330
+ mode="train",
331
+ return_grid_coord=True,
332
+ ),
333
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
334
+ dict(type="CenterShift", apply_z=False),
335
+ dict(type="NormalizeColor"),
336
+ dict(type="ToTensor"),
337
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
338
+ dict(
339
+ type="Collect",
340
+ keys=("coord", "grid_coord", "segment", "condition"),
341
+ feat_keys=("color", "normal"),
342
+ ),
343
+ ],
344
+ test_mode=False,
345
+ ),
346
+ test=dict(
347
+ type="S3DISDataset",
348
+ split="Area_5",
349
+ data_root="data/s3dis",
350
+ transform=[
351
+ dict(type="CenterShift", apply_z=True),
352
+ dict(type="NormalizeColor"),
353
+ ],
354
+ test_mode=True,
355
+ test_cfg=dict(
356
+ voxelize=dict(
357
+ type="GridSample",
358
+ grid_size=0.02,
359
+ hash_type="fnv",
360
+ mode="test",
361
+ return_grid_coord=True,
362
+ keys=("coord", "color", "normal"),
363
+ ),
364
+ crop=None,
365
+ post_transform=[
366
+ dict(type="CenterShift", apply_z=False),
367
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
368
+ dict(type="ToTensor"),
369
+ dict(
370
+ type="Collect",
371
+ keys=("coord", "grid_coord", "index", "condition"),
372
+ feat_keys=("color", "normal"),
373
+ ),
374
+ ],
375
+ aug_transform=[
376
+ [
377
+ dict(
378
+ type="RandomRotateTargetAngle",
379
+ angle=[0],
380
+ axis="z",
381
+ center=[0, 0, 0],
382
+ p=1,
383
+ )
384
+ ],
385
+ [
386
+ dict(
387
+ type="RandomRotateTargetAngle",
388
+ angle=[1 / 2],
389
+ axis="z",
390
+ center=[0, 0, 0],
391
+ p=1,
392
+ )
393
+ ],
394
+ [
395
+ dict(
396
+ type="RandomRotateTargetAngle",
397
+ angle=[1],
398
+ axis="z",
399
+ center=[0, 0, 0],
400
+ p=1,
401
+ )
402
+ ],
403
+ [
404
+ dict(
405
+ type="RandomRotateTargetAngle",
406
+ angle=[3 / 2],
407
+ axis="z",
408
+ center=[0, 0, 0],
409
+ p=1,
410
+ )
411
+ ],
412
+ [
413
+ dict(
414
+ type="RandomRotateTargetAngle",
415
+ angle=[0],
416
+ axis="z",
417
+ center=[0, 0, 0],
418
+ p=1,
419
+ ),
420
+ dict(type="RandomScale", scale=[0.95, 0.95]),
421
+ ],
422
+ [
423
+ dict(
424
+ type="RandomRotateTargetAngle",
425
+ angle=[1 / 2],
426
+ axis="z",
427
+ center=[0, 0, 0],
428
+ p=1,
429
+ ),
430
+ dict(type="RandomScale", scale=[0.95, 0.95]),
431
+ ],
432
+ [
433
+ dict(
434
+ type="RandomRotateTargetAngle",
435
+ angle=[1],
436
+ axis="z",
437
+ center=[0, 0, 0],
438
+ p=1,
439
+ ),
440
+ dict(type="RandomScale", scale=[0.95, 0.95]),
441
+ ],
442
+ [
443
+ dict(
444
+ type="RandomRotateTargetAngle",
445
+ angle=[3 / 2],
446
+ axis="z",
447
+ center=[0, 0, 0],
448
+ p=1,
449
+ ),
450
+ dict(type="RandomScale", scale=[0.95, 0.95]),
451
+ ],
452
+ [
453
+ dict(
454
+ type="RandomRotateTargetAngle",
455
+ angle=[0],
456
+ axis="z",
457
+ center=[0, 0, 0],
458
+ p=1,
459
+ ),
460
+ dict(type="RandomScale", scale=[1.05, 1.05]),
461
+ ],
462
+ [
463
+ dict(
464
+ type="RandomRotateTargetAngle",
465
+ angle=[1 / 2],
466
+ axis="z",
467
+ center=[0, 0, 0],
468
+ p=1,
469
+ ),
470
+ dict(type="RandomScale", scale=[1.05, 1.05]),
471
+ ],
472
+ [
473
+ dict(
474
+ type="RandomRotateTargetAngle",
475
+ angle=[1],
476
+ axis="z",
477
+ center=[0, 0, 0],
478
+ p=1,
479
+ ),
480
+ dict(type="RandomScale", scale=[1.05, 1.05]),
481
+ ],
482
+ [
483
+ dict(
484
+ type="RandomRotateTargetAngle",
485
+ angle=[3 / 2],
486
+ axis="z",
487
+ center=[0, 0, 0],
488
+ p=1,
489
+ ),
490
+ dict(type="RandomScale", scale=[1.05, 1.05]),
491
+ ],
492
+ [dict(type="RandomFlip", p=1)],
493
+ ],
494
+ ),
495
+ ),
496
+ )
Pointcept/configs/s3dis/semseg-pt-v1-0-base.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+ # misc custom setting
3
+ batch_size = 12 # bs: total bs in all gpus
4
+ mix_prob = 0.8
5
+ empty_cache = False
6
+ enable_amp = True
7
+
8
+ # model settings
9
+ model = dict(
10
+ type="DefaultSegmentor",
11
+ backbone=dict(
12
+ type="PointTransformer-Seg50",
13
+ in_channels=6,
14
+ num_classes=13,
15
+ ),
16
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
17
+ )
18
+
19
+
20
+ # scheduler settings
21
+ epoch = 3000
22
+ optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
23
+ scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
24
+
25
+ # dataset settings
26
+ dataset_type = "S3DISDataset"
27
+ data_root = "data/s3dis"
28
+
29
+ data = dict(
30
+ num_classes=13,
31
+ ignore_index=-1,
32
+ names=[
33
+ "ceiling",
34
+ "floor",
35
+ "wall",
36
+ "beam",
37
+ "column",
38
+ "window",
39
+ "door",
40
+ "table",
41
+ "chair",
42
+ "sofa",
43
+ "bookcase",
44
+ "board",
45
+ "clutter",
46
+ ],
47
+ train=dict(
48
+ type=dataset_type,
49
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
50
+ data_root=data_root,
51
+ transform=[
52
+ dict(type="CenterShift", apply_z=True),
53
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
54
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
55
+ # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
56
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
57
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
58
+ dict(type="RandomScale", scale=[0.9, 1.1]),
59
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
60
+ dict(type="RandomFlip", p=0.5),
61
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
62
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
63
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
64
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
65
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
66
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
67
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
68
+ dict(
69
+ type="GridSample",
70
+ grid_size=0.04,
71
+ hash_type="fnv",
72
+ mode="train",
73
+ keys=("coord", "color", "segment"),
74
+ return_grid_coord=True,
75
+ ),
76
+ dict(type="SphereCrop", point_max=100000, mode="random"),
77
+ dict(type="CenterShift", apply_z=False),
78
+ dict(type="NormalizeColor"),
79
+ # dict(type="ShufflePoint"),
80
+ dict(type="ToTensor"),
81
+ dict(
82
+ type="Collect",
83
+ keys=("coord", "grid_coord", "segment"),
84
+ feat_keys=["coord", "color"],
85
+ ),
86
+ ],
87
+ test_mode=False,
88
+ ),
89
+ val=dict(
90
+ type=dataset_type,
91
+ split="Area_5",
92
+ data_root=data_root,
93
+ transform=[
94
+ dict(type="CenterShift", apply_z=True),
95
+ dict(
96
+ type="Copy",
97
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
98
+ ),
99
+ dict(
100
+ type="GridSample",
101
+ grid_size=0.04,
102
+ hash_type="fnv",
103
+ mode="train",
104
+ keys=("coord", "color", "segment"),
105
+ return_grid_coord=True,
106
+ ),
107
+ dict(type="CenterShift", apply_z=False),
108
+ dict(type="NormalizeColor"),
109
+ dict(type="ToTensor"),
110
+ dict(
111
+ type="Collect",
112
+ keys=("coord", "grid_coord", "segment"),
113
+ offset_keys_dict=dict(offset="coord"),
114
+ feat_keys=["coord", "color"],
115
+ ),
116
+ ],
117
+ test_mode=False,
118
+ ),
119
+ test=dict(
120
+ type=dataset_type,
121
+ split="Area_5",
122
+ data_root=data_root,
123
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
124
+ test_mode=True,
125
+ test_cfg=dict(
126
+ voxelize=dict(
127
+ type="GridSample",
128
+ grid_size=0.04,
129
+ hash_type="fnv",
130
+ mode="test",
131
+ keys=("coord", "color"),
132
+ return_grid_coord=True,
133
+ ),
134
+ crop=None,
135
+ post_transform=[
136
+ dict(type="CenterShift", apply_z=False),
137
+ dict(type="ToTensor"),
138
+ dict(
139
+ type="Collect",
140
+ keys=("coord", "grid_coord", "index"),
141
+ feat_keys=("coord", "color"),
142
+ ),
143
+ ],
144
+ aug_transform=[
145
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
146
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
147
+ [dict(type="RandomScale", scale=[1, 1])],
148
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
149
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
150
+ [
151
+ dict(type="RandomScale", scale=[0.9, 0.9]),
152
+ dict(type="RandomFlip", p=1),
153
+ ],
154
+ [
155
+ dict(type="RandomScale", scale=[0.95, 0.95]),
156
+ dict(type="RandomFlip", p=1),
157
+ ],
158
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
159
+ [
160
+ dict(type="RandomScale", scale=[1.05, 1.05]),
161
+ dict(type="RandomFlip", p=1),
162
+ ],
163
+ [
164
+ dict(type="RandomScale", scale=[1.1, 1.1]),
165
+ dict(type="RandomFlip", p=1),
166
+ ],
167
+ ],
168
+ ),
169
+ ),
170
+ )
Pointcept/configs/s3dis/semseg-pt-v2m1-0-base.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+ # misc custom setting
3
+ batch_size = 12 # bs: total bs in all gpus
4
+ mix_prob = 0.8
5
+ empty_cache = False
6
+ enable_amp = False
7
+
8
+ # model settings
9
+ model = dict(
10
+ type="DefaultSegmentor",
11
+ backbone=dict(
12
+ type="PT-v2m1",
13
+ in_channels=6,
14
+ num_classes=13,
15
+ patch_embed_depth=2,
16
+ patch_embed_channels=48,
17
+ patch_embed_groups=6,
18
+ patch_embed_neighbours=16,
19
+ enc_depths=(2, 6, 2),
20
+ enc_channels=(96, 192, 384),
21
+ enc_groups=(12, 24, 48),
22
+ enc_neighbours=(16, 16, 16),
23
+ dec_depths=(1, 1, 1),
24
+ dec_channels=(48, 96, 192),
25
+ dec_groups=(6, 12, 24),
26
+ dec_neighbours=(16, 16, 16),
27
+ grid_sizes=(0.1, 0.2, 0.4),
28
+ attn_qkv_bias=True,
29
+ pe_multiplier=True,
30
+ pe_bias=True,
31
+ attn_drop_rate=0.0,
32
+ drop_path_rate=0.3,
33
+ enable_checkpoint=False,
34
+ unpool_backend="interp", # map / interp
35
+ ),
36
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
37
+ )
38
+
39
+ # scheduler settings
40
+ epoch = 3000
41
+ optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
42
+ scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
43
+
44
+ # dataset settings
45
+ dataset_type = "S3DISDataset"
46
+ data_root = "data/s3dis"
47
+
48
+ data = dict(
49
+ num_classes=13,
50
+ ignore_index=-1,
51
+ names=[
52
+ "ceiling",
53
+ "floor",
54
+ "wall",
55
+ "beam",
56
+ "column",
57
+ "window",
58
+ "door",
59
+ "table",
60
+ "chair",
61
+ "sofa",
62
+ "bookcase",
63
+ "board",
64
+ "clutter",
65
+ ],
66
+ train=dict(
67
+ type=dataset_type,
68
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
69
+ data_root=data_root,
70
+ transform=[
71
+ dict(type="CenterShift", apply_z=True),
72
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
73
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
74
+ # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
75
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
76
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
77
+ dict(type="RandomScale", scale=[0.9, 1.1]),
78
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
79
+ dict(type="RandomFlip", p=0.5),
80
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
81
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
82
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
83
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
84
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
85
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
86
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
87
+ dict(
88
+ type="GridSample",
89
+ grid_size=0.04,
90
+ hash_type="fnv",
91
+ mode="train",
92
+ keys=("coord", "color", "segment"),
93
+ return_grid_coord=True,
94
+ ),
95
+ dict(type="SphereCrop", point_max=80000, mode="random"),
96
+ dict(type="CenterShift", apply_z=False),
97
+ dict(type="NormalizeColor"),
98
+ # dict(type="ShufflePoint"),
99
+ dict(type="ToTensor"),
100
+ dict(
101
+ type="Collect",
102
+ keys=("coord", "grid_coord", "segment"),
103
+ feat_keys=["coord", "color"],
104
+ ),
105
+ ],
106
+ test_mode=False,
107
+ ),
108
+ val=dict(
109
+ type=dataset_type,
110
+ split="Area_5",
111
+ data_root=data_root,
112
+ transform=[
113
+ dict(type="CenterShift", apply_z=True),
114
+ dict(
115
+ type="Copy",
116
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
117
+ ),
118
+ dict(
119
+ type="GridSample",
120
+ grid_size=0.04,
121
+ hash_type="fnv",
122
+ mode="train",
123
+ keys=("coord", "color", "segment"),
124
+ return_grid_coord=True,
125
+ ),
126
+ dict(type="CenterShift", apply_z=False),
127
+ dict(type="NormalizeColor"),
128
+ dict(type="ToTensor"),
129
+ dict(
130
+ type="Collect",
131
+ keys=("coord", "grid_coord", "segment"),
132
+ offset_keys_dict=dict(offset="coord"),
133
+ feat_keys=["coord", "color"],
134
+ ),
135
+ ],
136
+ test_mode=False,
137
+ ),
138
+ test=dict(
139
+ type=dataset_type,
140
+ split="Area_5",
141
+ data_root=data_root,
142
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
143
+ test_mode=True,
144
+ test_cfg=dict(
145
+ voxelize=dict(
146
+ type="GridSample",
147
+ grid_size=0.04,
148
+ hash_type="fnv",
149
+ mode="test",
150
+ keys=("coord", "color"),
151
+ return_grid_coord=True,
152
+ ),
153
+ crop=None,
154
+ post_transform=[
155
+ dict(type="CenterShift", apply_z=False),
156
+ dict(type="ToTensor"),
157
+ dict(
158
+ type="Collect",
159
+ keys=("coord", "grid_coord", "index"),
160
+ feat_keys=("coord", "color"),
161
+ ),
162
+ ],
163
+ aug_transform=[
164
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
165
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
166
+ [dict(type="RandomScale", scale=[1, 1])],
167
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
168
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
169
+ [
170
+ dict(type="RandomScale", scale=[0.9, 0.9]),
171
+ dict(type="RandomFlip", p=1),
172
+ ],
173
+ [
174
+ dict(type="RandomScale", scale=[0.95, 0.95]),
175
+ dict(type="RandomFlip", p=1),
176
+ ],
177
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
178
+ [
179
+ dict(type="RandomScale", scale=[1.05, 1.05]),
180
+ dict(type="RandomFlip", p=1),
181
+ ],
182
+ [
183
+ dict(type="RandomScale", scale=[1.1, 1.1]),
184
+ dict(type="RandomFlip", p=1),
185
+ ],
186
+ ],
187
+ ),
188
+ ),
189
+ )
Pointcept/configs/s3dis/semseg-pt-v2m2-0-base.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+ # misc custom setting
3
+ batch_size = 12 # bs: total bs in all gpus
4
+ mix_prob = 0.8
5
+ empty_cache = False
6
+ enable_amp = True
7
+
8
+ # model settings
9
+ model = dict(
10
+ type="DefaultSegmentor",
11
+ backbone=dict(
12
+ type="PT-v2m2",
13
+ in_channels=6,
14
+ num_classes=13,
15
+ patch_embed_depth=2,
16
+ patch_embed_channels=48,
17
+ patch_embed_groups=6,
18
+ patch_embed_neighbours=16,
19
+ enc_depths=(2, 6, 2),
20
+ enc_channels=(96, 192, 384),
21
+ enc_groups=(12, 24, 48),
22
+ enc_neighbours=(16, 16, 16),
23
+ dec_depths=(1, 1, 1),
24
+ dec_channels=(48, 96, 192),
25
+ dec_groups=(6, 12, 24),
26
+ dec_neighbours=(16, 16, 16),
27
+ grid_sizes=(0.1, 0.2, 0.4),
28
+ attn_qkv_bias=True,
29
+ pe_multiplier=False,
30
+ pe_bias=True,
31
+ attn_drop_rate=0.0,
32
+ drop_path_rate=0.3,
33
+ enable_checkpoint=False,
34
+ unpool_backend="interp", # map / interp
35
+ ),
36
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
37
+ )
38
+
39
+ # scheduler settings
40
+ epoch = 3000
41
+ optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
42
+ scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
43
+
44
+ # dataset settings
45
+ dataset_type = "S3DISDataset"
46
+ data_root = "data/s3dis"
47
+
48
+ data = dict(
49
+ num_classes=13,
50
+ ignore_index=-1,
51
+ names=[
52
+ "ceiling",
53
+ "floor",
54
+ "wall",
55
+ "beam",
56
+ "column",
57
+ "window",
58
+ "door",
59
+ "table",
60
+ "chair",
61
+ "sofa",
62
+ "bookcase",
63
+ "board",
64
+ "clutter",
65
+ ],
66
+ train=dict(
67
+ type=dataset_type,
68
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
69
+ data_root=data_root,
70
+ transform=[
71
+ dict(type="CenterShift", apply_z=True),
72
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
73
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
74
+ # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
75
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
76
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
77
+ dict(type="RandomScale", scale=[0.9, 1.1]),
78
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
79
+ dict(type="RandomFlip", p=0.5),
80
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
81
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
82
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
83
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
84
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
85
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
86
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
87
+ dict(
88
+ type="GridSample",
89
+ grid_size=0.04,
90
+ hash_type="fnv",
91
+ mode="train",
92
+ keys=("coord", "color", "segment"),
93
+ return_grid_coord=True,
94
+ ),
95
+ dict(type="SphereCrop", point_max=80000, mode="random"),
96
+ dict(type="CenterShift", apply_z=False),
97
+ dict(type="NormalizeColor"),
98
+ # dict(type="ShufflePoint"),
99
+ dict(type="ToTensor"),
100
+ dict(
101
+ type="Collect",
102
+ keys=("coord", "grid_coord", "segment"),
103
+ feat_keys=["coord", "color"],
104
+ ),
105
+ ],
106
+ test_mode=False,
107
+ ),
108
+ val=dict(
109
+ type=dataset_type,
110
+ split="Area_5",
111
+ data_root=data_root,
112
+ transform=[
113
+ dict(type="CenterShift", apply_z=True),
114
+ dict(
115
+ type="Copy",
116
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
117
+ ),
118
+ dict(
119
+ type="GridSample",
120
+ grid_size=0.04,
121
+ hash_type="fnv",
122
+ mode="train",
123
+ keys=("coord", "color", "segment"),
124
+ return_grid_coord=True,
125
+ ),
126
+ dict(type="CenterShift", apply_z=False),
127
+ dict(type="NormalizeColor"),
128
+ dict(type="ToTensor"),
129
+ dict(
130
+ type="Collect",
131
+ keys=("coord", "grid_coord", "segment"),
132
+ offset_keys_dict=dict(offset="coord"),
133
+ feat_keys=["coord", "color"],
134
+ ),
135
+ ],
136
+ test_mode=False,
137
+ ),
138
+ test=dict(
139
+ type=dataset_type,
140
+ split="Area_5",
141
+ data_root=data_root,
142
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
143
+ test_mode=True,
144
+ test_cfg=dict(
145
+ voxelize=dict(
146
+ type="GridSample",
147
+ grid_size=0.04,
148
+ hash_type="fnv",
149
+ mode="test",
150
+ keys=("coord", "color"),
151
+ return_grid_coord=True,
152
+ ),
153
+ crop=None,
154
+ post_transform=[
155
+ dict(type="CenterShift", apply_z=False),
156
+ dict(type="ToTensor"),
157
+ dict(
158
+ type="Collect",
159
+ keys=("coord", "grid_coord", "index"),
160
+ feat_keys=("coord", "color"),
161
+ ),
162
+ ],
163
+ aug_transform=[
164
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
165
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
166
+ [dict(type="RandomScale", scale=[1, 1])],
167
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
168
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
169
+ [
170
+ dict(type="RandomScale", scale=[0.9, 0.9]),
171
+ dict(type="RandomFlip", p=1),
172
+ ],
173
+ [
174
+ dict(type="RandomScale", scale=[0.95, 0.95]),
175
+ dict(type="RandomFlip", p=1),
176
+ ],
177
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
178
+ [
179
+ dict(type="RandomScale", scale=[1.05, 1.05]),
180
+ dict(type="RandomFlip", p=1),
181
+ ],
182
+ [
183
+ dict(type="RandomScale", scale=[1.1, 1.1]),
184
+ dict(type="RandomFlip", p=1),
185
+ ],
186
+ ],
187
+ ),
188
+ ),
189
+ )
Pointcept/configs/s3dis/semseg-pt-v2m2-0-lovasz.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+ # misc custom setting
3
+ batch_size = 12 # bs: total bs in all gpus
4
+ mix_prob = 0.8
5
+ empty_cache = False
6
+ enable_amp = True
7
+
8
+ # model settings
9
+ model = dict(
10
+ type="DefaultSegmentor",
11
+ backbone=dict(
12
+ type="PT-v2m2",
13
+ in_channels=6,
14
+ num_classes=13,
15
+ patch_embed_depth=2,
16
+ patch_embed_channels=48,
17
+ patch_embed_groups=6,
18
+ patch_embed_neighbours=16,
19
+ enc_depths=(2, 6, 2),
20
+ enc_channels=(96, 192, 384),
21
+ enc_groups=(12, 24, 48),
22
+ enc_neighbours=(16, 16, 16),
23
+ dec_depths=(1, 1, 1),
24
+ dec_channels=(48, 96, 192),
25
+ dec_groups=(6, 12, 24),
26
+ dec_neighbours=(16, 16, 16),
27
+ grid_sizes=(0.1, 0.2, 0.4),
28
+ attn_qkv_bias=True,
29
+ pe_multiplier=False,
30
+ pe_bias=True,
31
+ attn_drop_rate=0.0,
32
+ drop_path_rate=0.3,
33
+ enable_checkpoint=False,
34
+ unpool_backend="interp", # map / interp
35
+ ),
36
+ criteria=[
37
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
38
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
39
+ ],
40
+ )
41
+
42
+ # scheduler settings
43
+ epoch = 3000
44
+ optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
45
+ scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
46
+
47
+ # dataset settings
48
+ dataset_type = "S3DISDataset"
49
+ data_root = "data/s3dis"
50
+
51
+ data = dict(
52
+ num_classes=13,
53
+ ignore_index=-1,
54
+ names=[
55
+ "ceiling",
56
+ "floor",
57
+ "wall",
58
+ "beam",
59
+ "column",
60
+ "window",
61
+ "door",
62
+ "table",
63
+ "chair",
64
+ "sofa",
65
+ "bookcase",
66
+ "board",
67
+ "clutter",
68
+ ],
69
+ train=dict(
70
+ type=dataset_type,
71
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
72
+ data_root=data_root,
73
+ transform=[
74
+ dict(type="CenterShift", apply_z=True),
75
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
76
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
77
+ # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
78
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
79
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
80
+ dict(type="RandomScale", scale=[0.9, 1.1]),
81
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
82
+ dict(type="RandomFlip", p=0.5),
83
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
84
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
85
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
86
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
87
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
88
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
89
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
90
+ dict(
91
+ type="GridSample",
92
+ grid_size=0.04,
93
+ hash_type="fnv",
94
+ mode="train",
95
+ keys=("coord", "color", "segment"),
96
+ return_grid_coord=True,
97
+ ),
98
+ dict(type="SphereCrop", point_max=80000, mode="random"),
99
+ dict(type="CenterShift", apply_z=False),
100
+ dict(type="NormalizeColor"),
101
+ # dict(type="ShufflePoint"),
102
+ dict(type="ToTensor"),
103
+ dict(
104
+ type="Collect",
105
+ keys=("coord", "grid_coord", "segment"),
106
+ feat_keys=["coord", "color"],
107
+ ),
108
+ ],
109
+ test_mode=False,
110
+ ),
111
+ val=dict(
112
+ type=dataset_type,
113
+ split="Area_5",
114
+ data_root=data_root,
115
+ transform=[
116
+ dict(type="CenterShift", apply_z=True),
117
+ dict(
118
+ type="Copy",
119
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
120
+ ),
121
+ dict(
122
+ type="GridSample",
123
+ grid_size=0.04,
124
+ hash_type="fnv",
125
+ mode="train",
126
+ keys=("coord", "color", "segment"),
127
+ return_grid_coord=True,
128
+ ),
129
+ dict(type="CenterShift", apply_z=False),
130
+ dict(type="NormalizeColor"),
131
+ dict(type="ToTensor"),
132
+ dict(
133
+ type="Collect",
134
+ keys=("coord", "grid_coord", "segment"),
135
+ offset_keys_dict=dict(offset="coord"),
136
+ feat_keys=["coord", "color"],
137
+ ),
138
+ ],
139
+ test_mode=False,
140
+ ),
141
+ test=dict(
142
+ type=dataset_type,
143
+ split="Area_5",
144
+ data_root=data_root,
145
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
146
+ test_mode=True,
147
+ test_cfg=dict(
148
+ voxelize=dict(
149
+ type="GridSample",
150
+ grid_size=0.04,
151
+ hash_type="fnv",
152
+ mode="test",
153
+ keys=("coord", "color"),
154
+ return_grid_coord=True,
155
+ ),
156
+ crop=None,
157
+ post_transform=[
158
+ dict(type="CenterShift", apply_z=False),
159
+ dict(type="ToTensor"),
160
+ dict(
161
+ type="Collect",
162
+ keys=("coord", "grid_coord", "index"),
163
+ feat_keys=("coord", "color"),
164
+ ),
165
+ ],
166
+ aug_transform=[
167
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
168
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
169
+ [dict(type="RandomScale", scale=[1, 1])],
170
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
171
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
172
+ [
173
+ dict(type="RandomScale", scale=[0.9, 0.9]),
174
+ dict(type="RandomFlip", p=1),
175
+ ],
176
+ [
177
+ dict(type="RandomScale", scale=[0.95, 0.95]),
178
+ dict(type="RandomFlip", p=1),
179
+ ],
180
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
181
+ [
182
+ dict(type="RandomScale", scale=[1.05, 1.05]),
183
+ dict(type="RandomFlip", p=1),
184
+ ],
185
+ [
186
+ dict(type="RandomScale", scale=[1.1, 1.1]),
187
+ dict(type="RandomFlip", p=1),
188
+ ],
189
+ ],
190
+ ),
191
+ ),
192
+ )
Pointcept/configs/s3dis/semseg-pt-v2m2-1-one-cycle.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+ # misc custom setting
3
+ batch_size = 12 # bs: total bs in all gpus
4
+ mix_prob = 0.8
5
+ empty_cache = False
6
+ enable_amp = True
7
+
8
+ # model settings
9
+ model = dict(
10
+ type="DefaultSegmentor",
11
+ backbone=dict(
12
+ type="PT-v2m2",
13
+ in_channels=6,
14
+ num_classes=13,
15
+ patch_embed_depth=2,
16
+ patch_embed_channels=48,
17
+ patch_embed_groups=6,
18
+ patch_embed_neighbours=16,
19
+ enc_depths=(2, 6, 2),
20
+ enc_channels=(96, 192, 384),
21
+ enc_groups=(12, 24, 48),
22
+ enc_neighbours=(16, 16, 16),
23
+ dec_depths=(1, 1, 1),
24
+ dec_channels=(48, 96, 192),
25
+ dec_groups=(6, 12, 24),
26
+ dec_neighbours=(16, 16, 16),
27
+ grid_sizes=(0.1, 0.2, 0.4),
28
+ attn_qkv_bias=True,
29
+ pe_multiplier=False,
30
+ pe_bias=True,
31
+ attn_drop_rate=0.0,
32
+ drop_path_rate=0.3,
33
+ enable_checkpoint=False,
34
+ unpool_backend="interp", # map / interp
35
+ ),
36
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
37
+ )
38
+
39
+ # scheduler settings
40
+ epoch = 3000
41
+ optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.05)
42
+ scheduler = dict(
43
+ type="OneCycleLR",
44
+ max_lr=optimizer["lr"],
45
+ pct_start=0.05,
46
+ anneal_strategy="cos",
47
+ div_factor=10.0,
48
+ final_div_factor=1000.0,
49
+ )
50
+
51
+ # dataset settings
52
+ dataset_type = "S3DISDataset"
53
+ data_root = "data/s3dis"
54
+
55
+ data = dict(
56
+ num_classes=13,
57
+ ignore_index=-1,
58
+ names=[
59
+ "ceiling",
60
+ "floor",
61
+ "wall",
62
+ "beam",
63
+ "column",
64
+ "window",
65
+ "door",
66
+ "table",
67
+ "chair",
68
+ "sofa",
69
+ "bookcase",
70
+ "board",
71
+ "clutter",
72
+ ],
73
+ train=dict(
74
+ type=dataset_type,
75
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
76
+ data_root=data_root,
77
+ transform=[
78
+ dict(type="CenterShift", apply_z=True),
79
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
80
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
81
+ # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
82
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
83
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
84
+ dict(type="RandomScale", scale=[0.9, 1.1]),
85
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
86
+ dict(type="RandomFlip", p=0.5),
87
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
88
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
89
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
90
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
91
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
92
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
93
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
94
+ dict(
95
+ type="GridSample",
96
+ grid_size=0.04,
97
+ hash_type="fnv",
98
+ mode="train",
99
+ keys=("coord", "color", "segment"),
100
+ return_grid_coord=True,
101
+ ),
102
+ dict(type="SphereCrop", point_max=80000, mode="random"),
103
+ dict(type="CenterShift", apply_z=False),
104
+ dict(type="NormalizeColor"),
105
+ # dict(type="ShufflePoint"),
106
+ dict(type="ToTensor"),
107
+ dict(
108
+ type="Collect",
109
+ keys=("coord", "grid_coord", "segment"),
110
+ feat_keys=["coord", "color"],
111
+ ),
112
+ ],
113
+ test_mode=False,
114
+ ),
115
+ val=dict(
116
+ type=dataset_type,
117
+ split="Area_5",
118
+ data_root=data_root,
119
+ transform=[
120
+ dict(type="CenterShift", apply_z=True),
121
+ dict(
122
+ type="Copy",
123
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
124
+ ),
125
+ dict(
126
+ type="GridSample",
127
+ grid_size=0.04,
128
+ hash_type="fnv",
129
+ mode="train",
130
+ keys=("coord", "color", "segment"),
131
+ return_grid_coord=True,
132
+ ),
133
+ dict(type="CenterShift", apply_z=False),
134
+ dict(type="NormalizeColor"),
135
+ dict(type="ToTensor"),
136
+ dict(
137
+ type="Collect",
138
+ keys=("coord", "grid_coord", "segment"),
139
+ offset_keys_dict=dict(offset="coord"),
140
+ feat_keys=["coord", "color"],
141
+ ),
142
+ ],
143
+ test_mode=False,
144
+ ),
145
+ test=dict(
146
+ type=dataset_type,
147
+ split="Area_5",
148
+ data_root=data_root,
149
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
150
+ test_mode=True,
151
+ test_cfg=dict(
152
+ voxelize=dict(
153
+ type="GridSample",
154
+ grid_size=0.04,
155
+ hash_type="fnv",
156
+ mode="test",
157
+ keys=("coord", "color"),
158
+ return_grid_coord=True,
159
+ ),
160
+ crop=None,
161
+ post_transform=[
162
+ dict(type="CenterShift", apply_z=False),
163
+ dict(type="ToTensor"),
164
+ dict(
165
+ type="Collect",
166
+ keys=("coord", "grid_coord", "index"),
167
+ feat_keys=("coord", "color"),
168
+ ),
169
+ ],
170
+ aug_transform=[
171
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
172
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
173
+ [dict(type="RandomScale", scale=[1, 1])],
174
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
175
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
176
+ [
177
+ dict(type="RandomScale", scale=[0.9, 0.9]),
178
+ dict(type="RandomFlip", p=1),
179
+ ],
180
+ [
181
+ dict(type="RandomScale", scale=[0.95, 0.95]),
182
+ dict(type="RandomFlip", p=1),
183
+ ],
184
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
185
+ [
186
+ dict(type="RandomScale", scale=[1.05, 1.05]),
187
+ dict(type="RandomFlip", p=1),
188
+ ],
189
+ [
190
+ dict(type="RandomScale", scale=[1.1, 1.1]),
191
+ dict(type="RandomFlip", p=1),
192
+ ],
193
+ ],
194
+ ),
195
+ ),
196
+ )
Pointcept/configs/s3dis/semseg-pt-v3m1-0-base.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ num_worker = 24
6
+ mix_prob = 0.8
7
+ empty_cache = False
8
+ enable_amp = True
9
+
10
+ # model settings
11
+ model = dict(
12
+ type="DefaultSegmentorV2",
13
+ num_classes=13,
14
+ backbone_out_channels=64,
15
+ backbone=dict(
16
+ type="PT-v3m1",
17
+ in_channels=6,
18
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
19
+ stride=(2, 2, 2, 2),
20
+ enc_depths=(2, 2, 2, 6, 2),
21
+ enc_channels=(32, 64, 128, 256, 512),
22
+ enc_num_head=(2, 4, 8, 16, 32),
23
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
24
+ dec_depths=(2, 2, 2, 2),
25
+ dec_channels=(64, 64, 128, 256),
26
+ dec_num_head=(4, 4, 8, 16),
27
+ dec_patch_size=(1024, 1024, 1024, 1024),
28
+ mlp_ratio=4,
29
+ qkv_bias=True,
30
+ qk_scale=None,
31
+ attn_drop=0.0,
32
+ proj_drop=0.0,
33
+ drop_path=0.3,
34
+ shuffle_orders=True,
35
+ pre_norm=True,
36
+ enable_rpe=False,
37
+ enable_flash=True,
38
+ upcast_attention=False,
39
+ upcast_softmax=False,
40
+ cls_mode=False,
41
+ pdnorm_bn=False,
42
+ pdnorm_ln=False,
43
+ pdnorm_decouple=True,
44
+ pdnorm_adaptive=False,
45
+ pdnorm_affine=True,
46
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
47
+ ),
48
+ criteria=[
49
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
50
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
51
+ ],
52
+ )
53
+
54
+ # scheduler settings
55
+ epoch = 3000
56
+ optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
57
+ scheduler = dict(
58
+ type="OneCycleLR",
59
+ max_lr=[0.006, 0.0006],
60
+ pct_start=0.05,
61
+ anneal_strategy="cos",
62
+ div_factor=10.0,
63
+ final_div_factor=1000.0,
64
+ )
65
+ param_dicts = [dict(keyword="block", lr=0.0006)]
66
+
67
+ # dataset settings
68
+ dataset_type = "S3DISDataset"
69
+ data_root = "data/s3dis"
70
+
71
+ data = dict(
72
+ num_classes=13,
73
+ ignore_index=-1,
74
+ names=[
75
+ "ceiling",
76
+ "floor",
77
+ "wall",
78
+ "beam",
79
+ "column",
80
+ "window",
81
+ "door",
82
+ "table",
83
+ "chair",
84
+ "sofa",
85
+ "bookcase",
86
+ "board",
87
+ "clutter",
88
+ ],
89
+ train=dict(
90
+ type=dataset_type,
91
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
92
+ data_root=data_root,
93
+ transform=[
94
+ dict(type="CenterShift", apply_z=True),
95
+ dict(
96
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
97
+ ),
98
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
99
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
100
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
101
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
102
+ dict(type="RandomScale", scale=[0.9, 1.1]),
103
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
104
+ dict(type="RandomFlip", p=0.5),
105
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
106
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
107
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
108
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
109
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
110
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
111
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
112
+ dict(
113
+ type="GridSample",
114
+ grid_size=0.02,
115
+ hash_type="fnv",
116
+ mode="train",
117
+ return_grid_coord=True,
118
+ ),
119
+ dict(type="SphereCrop", sample_rate=0.6, mode="random"),
120
+ dict(type="SphereCrop", point_max=204800, mode="random"),
121
+ dict(type="CenterShift", apply_z=False),
122
+ dict(type="NormalizeColor"),
123
+ # dict(type="ShufflePoint"),
124
+ dict(type="ToTensor"),
125
+ dict(
126
+ type="Collect",
127
+ keys=("coord", "grid_coord", "segment"),
128
+ feat_keys=("color", "normal"),
129
+ ),
130
+ ],
131
+ test_mode=False,
132
+ ),
133
+ val=dict(
134
+ type=dataset_type,
135
+ split="Area_5",
136
+ data_root=data_root,
137
+ transform=[
138
+ dict(type="CenterShift", apply_z=True),
139
+ dict(
140
+ type="Copy",
141
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
142
+ ),
143
+ dict(
144
+ type="GridSample",
145
+ grid_size=0.02,
146
+ hash_type="fnv",
147
+ mode="train",
148
+ return_grid_coord=True,
149
+ ),
150
+ dict(type="CenterShift", apply_z=False),
151
+ dict(type="NormalizeColor"),
152
+ dict(type="ToTensor"),
153
+ dict(
154
+ type="Collect",
155
+ keys=(
156
+ "coord",
157
+ "grid_coord",
158
+ "origin_coord",
159
+ "segment",
160
+ "origin_segment",
161
+ ),
162
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
163
+ feat_keys=("color", "normal"),
164
+ ),
165
+ ],
166
+ test_mode=False,
167
+ ),
168
+ test=dict(
169
+ type=dataset_type,
170
+ split="Area_5",
171
+ data_root=data_root,
172
+ transform=[
173
+ dict(type="CenterShift", apply_z=True),
174
+ dict(type="NormalizeColor"),
175
+ ],
176
+ test_mode=True,
177
+ test_cfg=dict(
178
+ voxelize=dict(
179
+ type="GridSample",
180
+ grid_size=0.02,
181
+ hash_type="fnv",
182
+ mode="test",
183
+ keys=("coord", "color", "normal"),
184
+ return_grid_coord=True,
185
+ ),
186
+ crop=None,
187
+ post_transform=[
188
+ dict(type="CenterShift", apply_z=False),
189
+ dict(type="ToTensor"),
190
+ dict(
191
+ type="Collect",
192
+ keys=("coord", "grid_coord", "index"),
193
+ feat_keys=("color", "normal"),
194
+ ),
195
+ ],
196
+ aug_transform=[
197
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
198
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
199
+ [dict(type="RandomScale", scale=[1, 1])],
200
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
201
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
202
+ [
203
+ dict(type="RandomScale", scale=[0.9, 0.9]),
204
+ dict(type="RandomFlip", p=1),
205
+ ],
206
+ [
207
+ dict(type="RandomScale", scale=[0.95, 0.95]),
208
+ dict(type="RandomFlip", p=1),
209
+ ],
210
+ [
211
+ dict(type="RandomScale", scale=[1, 1]),
212
+ dict(type="RandomFlip", p=1),
213
+ ],
214
+ [
215
+ dict(type="RandomScale", scale=[1.05, 1.05]),
216
+ dict(type="RandomFlip", p=1),
217
+ ],
218
+ [
219
+ dict(type="RandomScale", scale=[1.1, 1.1]),
220
+ dict(type="RandomFlip", p=1),
221
+ ],
222
+ ],
223
+ ),
224
+ ),
225
+ )
Pointcept/configs/s3dis/semseg-pt-v3m1-1-rpe.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ num_worker = 24
6
+ mix_prob = 0.8
7
+ empty_cache = False
8
+ enable_amp = True
9
+
10
+ # model settings
11
+ model = dict(
12
+ type="DefaultSegmentorV2",
13
+ num_classes=13,
14
+ backbone_out_channels=64,
15
+ backbone=dict(
16
+ type="PT-v3m1",
17
+ in_channels=6,
18
+ order=["z", "z-trans", "hilbert", "hilbert-trans"],
19
+ stride=(2, 2, 2, 2),
20
+ enc_depths=(2, 2, 2, 6, 2),
21
+ enc_channels=(32, 64, 128, 256, 512),
22
+ enc_num_head=(2, 4, 8, 16, 32),
23
+ enc_patch_size=(128, 128, 128, 128, 128),
24
+ dec_depths=(2, 2, 2, 2),
25
+ dec_channels=(64, 64, 128, 256),
26
+ dec_num_head=(4, 4, 8, 16),
27
+ dec_patch_size=(128, 128, 128, 128),
28
+ mlp_ratio=4,
29
+ qkv_bias=True,
30
+ qk_scale=None,
31
+ attn_drop=0.0,
32
+ proj_drop=0.0,
33
+ drop_path=0.3,
34
+ shuffle_orders=True,
35
+ pre_norm=True,
36
+ enable_rpe=True,
37
+ enable_flash=False,
38
+ upcast_attention=True,
39
+ upcast_softmax=True,
40
+ cls_mode=False,
41
+ pdnorm_bn=False,
42
+ pdnorm_ln=False,
43
+ pdnorm_decouple=True,
44
+ pdnorm_adaptive=False,
45
+ pdnorm_affine=True,
46
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
47
+ ),
48
+ criteria=[
49
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
50
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
51
+ ],
52
+ )
53
+
54
+ # scheduler settings
55
+ epoch = 3000
56
+ optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
57
+ scheduler = dict(
58
+ type="OneCycleLR",
59
+ max_lr=[0.006, 0.0006],
60
+ pct_start=0.05,
61
+ anneal_strategy="cos",
62
+ div_factor=10.0,
63
+ final_div_factor=1000.0,
64
+ )
65
+ param_dicts = [dict(keyword="block", lr=0.0006)]
66
+
67
+ # dataset settings
68
+ dataset_type = "S3DISDataset"
69
+ data_root = "data/s3dis"
70
+
71
+ data = dict(
72
+ num_classes=13,
73
+ ignore_index=-1,
74
+ names=[
75
+ "ceiling",
76
+ "floor",
77
+ "wall",
78
+ "beam",
79
+ "column",
80
+ "window",
81
+ "door",
82
+ "table",
83
+ "chair",
84
+ "sofa",
85
+ "bookcase",
86
+ "board",
87
+ "clutter",
88
+ ],
89
+ train=dict(
90
+ type=dataset_type,
91
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
92
+ data_root=data_root,
93
+ transform=[
94
+ dict(type="CenterShift", apply_z=True),
95
+ dict(
96
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
97
+ ),
98
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
99
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
100
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
101
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
102
+ dict(type="RandomScale", scale=[0.9, 1.1]),
103
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
104
+ dict(type="RandomFlip", p=0.5),
105
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
106
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
107
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
108
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
109
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
110
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
111
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
112
+ dict(
113
+ type="GridSample",
114
+ grid_size=0.02,
115
+ hash_type="fnv",
116
+ mode="train",
117
+ return_grid_coord=True,
118
+ ),
119
+ dict(type="SphereCrop", sample_rate=0.6, mode="random"),
120
+ dict(type="SphereCrop", point_max=204800, mode="random"),
121
+ dict(type="CenterShift", apply_z=False),
122
+ dict(type="NormalizeColor"),
123
+ # dict(type="ShufflePoint"),
124
+ dict(type="ToTensor"),
125
+ dict(
126
+ type="Collect",
127
+ keys=("coord", "grid_coord", "segment"),
128
+ feat_keys=("color", "normal"),
129
+ ),
130
+ ],
131
+ test_mode=False,
132
+ ),
133
+ val=dict(
134
+ type=dataset_type,
135
+ split="Area_5",
136
+ data_root=data_root,
137
+ transform=[
138
+ dict(type="CenterShift", apply_z=True),
139
+ dict(
140
+ type="Copy",
141
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
142
+ ),
143
+ dict(
144
+ type="GridSample",
145
+ grid_size=0.02,
146
+ hash_type="fnv",
147
+ mode="train",
148
+ return_grid_coord=True,
149
+ ),
150
+ dict(type="CenterShift", apply_z=False),
151
+ dict(type="NormalizeColor"),
152
+ dict(type="ToTensor"),
153
+ dict(
154
+ type="Collect",
155
+ keys=(
156
+ "coord",
157
+ "grid_coord",
158
+ "origin_coord",
159
+ "segment",
160
+ "origin_segment",
161
+ ),
162
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
163
+ feat_keys=("color", "normal"),
164
+ ),
165
+ ],
166
+ test_mode=False,
167
+ ),
168
+ test=dict(
169
+ type=dataset_type,
170
+ split="Area_5",
171
+ data_root=data_root,
172
+ transform=[
173
+ dict(type="CenterShift", apply_z=True),
174
+ dict(type="NormalizeColor"),
175
+ ],
176
+ test_mode=True,
177
+ test_cfg=dict(
178
+ voxelize=dict(
179
+ type="GridSample",
180
+ grid_size=0.02,
181
+ hash_type="fnv",
182
+ mode="test",
183
+ keys=("coord", "color", "normal"),
184
+ return_grid_coord=True,
185
+ ),
186
+ crop=None,
187
+ post_transform=[
188
+ dict(type="CenterShift", apply_z=False),
189
+ dict(type="ToTensor"),
190
+ dict(
191
+ type="Collect",
192
+ keys=("coord", "grid_coord", "index"),
193
+ feat_keys=("color", "normal"),
194
+ ),
195
+ ],
196
+ aug_transform=[
197
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
198
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
199
+ [dict(type="RandomScale", scale=[1, 1])],
200
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
201
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
202
+ [
203
+ dict(type="RandomScale", scale=[0.9, 0.9]),
204
+ dict(type="RandomFlip", p=1),
205
+ ],
206
+ [
207
+ dict(type="RandomScale", scale=[0.95, 0.95]),
208
+ dict(type="RandomFlip", p=1),
209
+ ],
210
+ [
211
+ dict(type="RandomScale", scale=[1, 1]),
212
+ dict(type="RandomFlip", p=1),
213
+ ],
214
+ [
215
+ dict(type="RandomScale", scale=[1.05, 1.05]),
216
+ dict(type="RandomFlip", p=1),
217
+ ],
218
+ [
219
+ dict(type="RandomScale", scale=[1.1, 1.1]),
220
+ dict(type="RandomFlip", p=1),
221
+ ],
222
+ ],
223
+ ),
224
+ ),
225
+ )
Pointcept/configs/s3dis/semseg-pt-v3m1-2-ppt-extreme.py ADDED
@@ -0,0 +1,487 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PTv3 + PPT
3
+ Pre-trained on ScanNet + Structured3D
4
+ (S3DIS is commented by default as a long data time issue of S3DIS: https://github.com/Pointcept/Pointcept/issues/103)
5
+ In the original PPT paper, 3 datasets are jointly trained and validated on the three datasets jointly with
6
+ one shared weight model. In PTv3, we trained on multi-dataset but only validated on one single dataset to
7
+ achieve extreme performance on one single dataset.
8
+
9
+ To enable joint training on three datasets, uncomment config for the S3DIS dataset and change the "loop" of
10
+ Structured3D and ScanNet to 4 and 2 respectively.
11
+ """
12
+
13
+ _base_ = ["../_base_/default_runtime.py"]
14
+
15
+ # misc custom setting
16
+ batch_size = 24 # bs: total bs in all gpus
17
+ num_worker = 48
18
+ mix_prob = 0.8
19
+ empty_cache = False
20
+ enable_amp = True
21
+ find_unused_parameters = True
22
+
23
+ # trainer
24
+ train = dict(
25
+ type="MultiDatasetTrainer",
26
+ )
27
+
28
+ # model settings
29
+ model = dict(
30
+ type="PPT-v1m1",
31
+ backbone=dict(
32
+ type="PT-v3m1",
33
+ in_channels=6,
34
+ order=("z", "z-trans", "hilbert", "hilbert-trans"),
35
+ stride=(2, 2, 2, 2),
36
+ enc_depths=(2, 2, 2, 6, 2),
37
+ enc_channels=(32, 64, 128, 256, 512),
38
+ enc_num_head=(2, 4, 8, 16, 32),
39
+ enc_patch_size=(1024, 1024, 1024, 1024, 1024),
40
+ dec_depths=(2, 2, 2, 2),
41
+ dec_channels=(64, 64, 128, 256),
42
+ dec_num_head=(4, 4, 8, 16),
43
+ dec_patch_size=(1024, 1024, 1024, 1024),
44
+ mlp_ratio=4,
45
+ qkv_bias=True,
46
+ qk_scale=None,
47
+ attn_drop=0.0,
48
+ proj_drop=0.0,
49
+ drop_path=0.3,
50
+ shuffle_orders=True,
51
+ pre_norm=True,
52
+ enable_rpe=False,
53
+ enable_flash=True,
54
+ upcast_attention=False,
55
+ upcast_softmax=False,
56
+ cls_mode=False,
57
+ pdnorm_bn=True,
58
+ pdnorm_ln=True,
59
+ pdnorm_decouple=True,
60
+ pdnorm_adaptive=False,
61
+ pdnorm_affine=True,
62
+ pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
63
+ ),
64
+ criteria=[
65
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
66
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
67
+ ],
68
+ backbone_out_channels=64,
69
+ context_channels=256,
70
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
71
+ template="[x]",
72
+ clip_model="ViT-B/16",
73
+ # fmt: off
74
+ class_name=(
75
+ "wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door",
76
+ "window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain",
77
+ "dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand",
78
+ "toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column",
79
+ "clutter", "otherstructure", "otherfurniture", "otherprop",
80
+ ),
81
+ valid_index=(
82
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35),
83
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
84
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
85
+ ),
86
+ # fmt: on
87
+ backbone_mode=False,
88
+ )
89
+
90
+ # scheduler settings
91
+ epoch = 100
92
+ optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.05)
93
+ scheduler = dict(
94
+ type="OneCycleLR",
95
+ max_lr=[0.005, 0.0005],
96
+ pct_start=0.05,
97
+ anneal_strategy="cos",
98
+ div_factor=10.0,
99
+ final_div_factor=1000.0,
100
+ )
101
+ param_dicts = [dict(keyword="block", lr=0.0005)]
102
+
103
+ # dataset settings
104
+ data = dict(
105
+ num_classes=13,
106
+ ignore_index=-1,
107
+ names=[
108
+ "ceiling",
109
+ "floor",
110
+ "wall",
111
+ "beam",
112
+ "column",
113
+ "window",
114
+ "door",
115
+ "table",
116
+ "chair",
117
+ "sofa",
118
+ "bookcase",
119
+ "board",
120
+ "clutter",
121
+ ],
122
+ train=dict(
123
+ type="ConcatDataset",
124
+ datasets=[
125
+ # Structured3D
126
+ dict(
127
+ type="Structured3DDataset",
128
+ split=["train", "val", "test"],
129
+ data_root="data/structured3d",
130
+ transform=[
131
+ dict(type="CenterShift", apply_z=True),
132
+ dict(
133
+ type="RandomDropout",
134
+ dropout_ratio=0.2,
135
+ dropout_application_ratio=0.2,
136
+ ),
137
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
138
+ dict(
139
+ type="RandomRotate",
140
+ angle=[-1, 1],
141
+ axis="z",
142
+ center=[0, 0, 0],
143
+ p=0.5,
144
+ ),
145
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
146
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
147
+ dict(type="RandomScale", scale=[0.9, 1.1]),
148
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
149
+ dict(type="RandomFlip", p=0.5),
150
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
151
+ # dict(
152
+ # type="ElasticDistortion",
153
+ # distortion_params=[[0.2, 0.4], [0.8, 1.6]],
154
+ # ),
155
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
156
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
157
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
158
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
159
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
160
+ dict(
161
+ type="GridSample",
162
+ grid_size=0.02,
163
+ hash_type="fnv",
164
+ mode="train",
165
+ return_grid_coord=True,
166
+ ),
167
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
168
+ dict(type="SphereCrop", point_max=204800, mode="random"),
169
+ dict(type="CenterShift", apply_z=False),
170
+ dict(type="NormalizeColor"),
171
+ # dict(type="ShufflePoint"),
172
+ dict(type="Add", keys_dict={"condition": "Structured3D"}),
173
+ dict(type="ToTensor"),
174
+ dict(
175
+ type="Collect",
176
+ keys=("coord", "grid_coord", "segment", "condition"),
177
+ feat_keys=("color", "normal"),
178
+ ),
179
+ ],
180
+ test_mode=False,
181
+ loop=4, # sampling weight
182
+ ),
183
+ # ScanNet
184
+ dict(
185
+ type="ScanNetDataset",
186
+ split="train",
187
+ data_root="data/scannet",
188
+ transform=[
189
+ dict(type="CenterShift", apply_z=True),
190
+ dict(
191
+ type="RandomDropout",
192
+ dropout_ratio=0.2,
193
+ dropout_application_ratio=0.2,
194
+ ),
195
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
196
+ dict(
197
+ type="RandomRotate",
198
+ angle=[-1, 1],
199
+ axis="z",
200
+ center=[0, 0, 0],
201
+ p=0.5,
202
+ ),
203
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
204
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
205
+ dict(type="RandomScale", scale=[0.9, 1.1]),
206
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
207
+ dict(type="RandomFlip", p=0.5),
208
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
209
+ # dict(
210
+ # type="ElasticDistortion",
211
+ # distortion_params=[[0.2, 0.4], [0.8, 1.6]],
212
+ # ),
213
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
214
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
215
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
216
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
217
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
218
+ dict(
219
+ type="GridSample",
220
+ grid_size=0.02,
221
+ hash_type="fnv",
222
+ mode="train",
223
+ return_grid_coord=True,
224
+ ),
225
+ dict(type="SphereCrop", point_max=102400, mode="random"),
226
+ dict(type="CenterShift", apply_z=False),
227
+ dict(type="NormalizeColor"),
228
+ # dict(type="ShufflePoint"),
229
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
230
+ dict(type="ToTensor"),
231
+ dict(
232
+ type="Collect",
233
+ keys=("coord", "grid_coord", "segment", "condition"),
234
+ feat_keys=("color", "normal"),
235
+ ),
236
+ ],
237
+ test_mode=False,
238
+ loop=2, # sampling weight
239
+ ),
240
+ # S3DIS
241
+ dict(
242
+ type="S3DISDataset",
243
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
244
+ data_root="data/s3dis",
245
+ transform=[
246
+ dict(type="CenterShift", apply_z=True),
247
+ dict(
248
+ type="RandomDropout",
249
+ dropout_ratio=0.2,
250
+ dropout_application_ratio=0.2,
251
+ ),
252
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
253
+ dict(
254
+ type="RandomRotate",
255
+ angle=[-1, 1],
256
+ axis="z",
257
+ center=[0, 0, 0],
258
+ p=0.5,
259
+ ),
260
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
261
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
262
+ dict(type="RandomScale", scale=[0.9, 1.1]),
263
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
264
+ dict(type="RandomFlip", p=0.5),
265
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
266
+ # dict(
267
+ # type="ElasticDistortion",
268
+ # distortion_params=[[0.2, 0.4], [0.8, 1.6]],
269
+ # ),
270
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
271
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
272
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
273
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
274
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
275
+ dict(
276
+ type="GridSample",
277
+ grid_size=0.02,
278
+ hash_type="fnv",
279
+ mode="train",
280
+ return_grid_coord=True,
281
+ ),
282
+ dict(type="SphereCrop", sample_rate=0.6, mode="random"),
283
+ dict(type="SphereCrop", point_max=204800, mode="random"),
284
+ dict(type="CenterShift", apply_z=False),
285
+ dict(type="NormalizeColor"),
286
+ # dict(type="ShufflePoint"),
287
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
288
+ dict(type="ToTensor"),
289
+ dict(
290
+ type="Collect",
291
+ keys=("coord", "grid_coord", "segment", "condition"),
292
+ feat_keys=("color", "normal"),
293
+ ),
294
+ ],
295
+ test_mode=False,
296
+ loop=1, # sampling weight
297
+ ),
298
+ ],
299
+ ),
300
+ val=dict(
301
+ type="S3DISDataset",
302
+ split="Area_5",
303
+ data_root="data/s3dis",
304
+ transform=[
305
+ dict(type="CenterShift", apply_z=True),
306
+ dict(
307
+ type="Copy",
308
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
309
+ ),
310
+ dict(
311
+ type="GridSample",
312
+ grid_size=0.02,
313
+ hash_type="fnv",
314
+ mode="train",
315
+ return_grid_coord=True,
316
+ ),
317
+ dict(type="CenterShift", apply_z=False),
318
+ dict(type="NormalizeColor"),
319
+ dict(type="ToTensor"),
320
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
321
+ dict(
322
+ type="Collect",
323
+ keys=(
324
+ "coord",
325
+ "grid_coord",
326
+ "origin_coord",
327
+ "segment",
328
+ "origin_segment",
329
+ "condition",
330
+ ),
331
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
332
+ feat_keys=("color", "normal"),
333
+ ),
334
+ ],
335
+ test_mode=False,
336
+ ),
337
+ test=dict(
338
+ type="S3DISDataset",
339
+ split="Area_5",
340
+ data_root="data/s3dis",
341
+ transform=[
342
+ dict(type="CenterShift", apply_z=True),
343
+ dict(type="NormalizeColor"),
344
+ ],
345
+ test_mode=True,
346
+ test_cfg=dict(
347
+ voxelize=dict(
348
+ type="GridSample",
349
+ grid_size=0.02,
350
+ hash_type="fnv",
351
+ mode="test",
352
+ keys=("coord", "color", "normal"),
353
+ return_grid_coord=True,
354
+ ),
355
+ crop=None,
356
+ post_transform=[
357
+ dict(type="CenterShift", apply_z=False),
358
+ dict(type="Add", keys_dict={"condition": "S3DIS"}),
359
+ dict(type="ToTensor"),
360
+ dict(
361
+ type="Collect",
362
+ keys=("coord", "grid_coord", "index", "condition"),
363
+ feat_keys=("color", "normal"),
364
+ ),
365
+ ],
366
+ aug_transform=[
367
+ [
368
+ dict(
369
+ type="RandomRotateTargetAngle",
370
+ angle=[0],
371
+ axis="z",
372
+ center=[0, 0, 0],
373
+ p=1,
374
+ )
375
+ ],
376
+ [
377
+ dict(
378
+ type="RandomRotateTargetAngle",
379
+ angle=[1 / 2],
380
+ axis="z",
381
+ center=[0, 0, 0],
382
+ p=1,
383
+ )
384
+ ],
385
+ [
386
+ dict(
387
+ type="RandomRotateTargetAngle",
388
+ angle=[1],
389
+ axis="z",
390
+ center=[0, 0, 0],
391
+ p=1,
392
+ )
393
+ ],
394
+ [
395
+ dict(
396
+ type="RandomRotateTargetAngle",
397
+ angle=[3 / 2],
398
+ axis="z",
399
+ center=[0, 0, 0],
400
+ p=1,
401
+ )
402
+ ],
403
+ [
404
+ dict(
405
+ type="RandomRotateTargetAngle",
406
+ angle=[0],
407
+ axis="z",
408
+ center=[0, 0, 0],
409
+ p=1,
410
+ ),
411
+ dict(type="RandomScale", scale=[0.95, 0.95]),
412
+ ],
413
+ [
414
+ dict(
415
+ type="RandomRotateTargetAngle",
416
+ angle=[1 / 2],
417
+ axis="z",
418
+ center=[0, 0, 0],
419
+ p=1,
420
+ ),
421
+ dict(type="RandomScale", scale=[0.95, 0.95]),
422
+ ],
423
+ [
424
+ dict(
425
+ type="RandomRotateTargetAngle",
426
+ angle=[1],
427
+ axis="z",
428
+ center=[0, 0, 0],
429
+ p=1,
430
+ ),
431
+ dict(type="RandomScale", scale=[0.95, 0.95]),
432
+ ],
433
+ [
434
+ dict(
435
+ type="RandomRotateTargetAngle",
436
+ angle=[3 / 2],
437
+ axis="z",
438
+ center=[0, 0, 0],
439
+ p=1,
440
+ ),
441
+ dict(type="RandomScale", scale=[0.95, 0.95]),
442
+ ],
443
+ [
444
+ dict(
445
+ type="RandomRotateTargetAngle",
446
+ angle=[0],
447
+ axis="z",
448
+ center=[0, 0, 0],
449
+ p=1,
450
+ ),
451
+ dict(type="RandomScale", scale=[1.05, 1.05]),
452
+ ],
453
+ [
454
+ dict(
455
+ type="RandomRotateTargetAngle",
456
+ angle=[1 / 2],
457
+ axis="z",
458
+ center=[0, 0, 0],
459
+ p=1,
460
+ ),
461
+ dict(type="RandomScale", scale=[1.05, 1.05]),
462
+ ],
463
+ [
464
+ dict(
465
+ type="RandomRotateTargetAngle",
466
+ angle=[1],
467
+ axis="z",
468
+ center=[0, 0, 0],
469
+ p=1,
470
+ ),
471
+ dict(type="RandomScale", scale=[1.05, 1.05]),
472
+ ],
473
+ [
474
+ dict(
475
+ type="RandomRotateTargetAngle",
476
+ angle=[3 / 2],
477
+ axis="z",
478
+ center=[0, 0, 0],
479
+ p=1,
480
+ ),
481
+ dict(type="RandomScale", scale=[1.05, 1.05]),
482
+ ],
483
+ [dict(type="RandomFlip", p=1)],
484
+ ],
485
+ ),
486
+ ),
487
+ )
Pointcept/configs/s3dis/semseg-spunet-v1m1-0-base.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+ # misc custom setting
3
+ batch_size = 12 # bs: total bs in all gpus
4
+ mix_prob = 0.8
5
+ empty_cache = False
6
+ enable_amp = True
7
+
8
+ # model settings
9
+ model = dict(
10
+ type="DefaultSegmentor",
11
+ backbone=dict(
12
+ type="SpUNet-v1m1",
13
+ in_channels=6,
14
+ num_classes=13,
15
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
16
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
17
+ ),
18
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
19
+ )
20
+
21
+ # scheduler settings
22
+ epoch = 3000
23
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
24
+ scheduler = dict(type="PolyLR")
25
+
26
+ # dataset settings
27
+ dataset_type = "S3DISDataset"
28
+ data_root = "data/s3dis"
29
+
30
+ data = dict(
31
+ num_classes=13,
32
+ ignore_index=-1,
33
+ names=[
34
+ "ceiling",
35
+ "floor",
36
+ "wall",
37
+ "beam",
38
+ "column",
39
+ "window",
40
+ "door",
41
+ "table",
42
+ "chair",
43
+ "sofa",
44
+ "bookcase",
45
+ "board",
46
+ "clutter",
47
+ ],
48
+ train=dict(
49
+ type=dataset_type,
50
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
51
+ data_root=data_root,
52
+ transform=[
53
+ dict(type="CenterShift", apply_z=True),
54
+ dict(
55
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
56
+ ),
57
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
58
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
59
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
60
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
61
+ dict(type="RandomScale", scale=[0.9, 1.1]),
62
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
63
+ dict(type="RandomFlip", p=0.5),
64
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
65
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
66
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
67
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
68
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
69
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
70
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
71
+ dict(
72
+ type="GridSample",
73
+ grid_size=0.05,
74
+ hash_type="fnv",
75
+ mode="train",
76
+ keys=("coord", "color", "segment"),
77
+ return_grid_coord=True,
78
+ ),
79
+ dict(type="SphereCrop", point_max=100000, mode="random"),
80
+ dict(type="CenterShift", apply_z=False),
81
+ dict(type="NormalizeColor"),
82
+ dict(type="ShufflePoint"),
83
+ dict(type="ToTensor"),
84
+ dict(
85
+ type="Collect",
86
+ keys=("coord", "grid_coord", "segment"),
87
+ feat_keys=["coord", "color"],
88
+ ),
89
+ ],
90
+ test_mode=False,
91
+ ),
92
+ val=dict(
93
+ type=dataset_type,
94
+ split="Area_5",
95
+ data_root=data_root,
96
+ transform=[
97
+ dict(type="CenterShift", apply_z=True),
98
+ dict(
99
+ type="GridSample",
100
+ grid_size=0.05,
101
+ hash_type="fnv",
102
+ mode="train",
103
+ keys=("coord", "color", "segment"),
104
+ return_grid_coord=True,
105
+ ),
106
+ dict(type="CenterShift", apply_z=False),
107
+ dict(type="NormalizeColor"),
108
+ dict(type="ToTensor"),
109
+ dict(
110
+ type="Collect",
111
+ keys=("coord", "grid_coord", "segment"),
112
+ feat_keys=["coord", "color"],
113
+ ),
114
+ ],
115
+ test_mode=False,
116
+ ),
117
+ test=dict(
118
+ type=dataset_type,
119
+ split="Area_5",
120
+ data_root=data_root,
121
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
122
+ test_mode=True,
123
+ test_cfg=dict(
124
+ voxelize=dict(
125
+ type="GridSample",
126
+ grid_size=0.05,
127
+ hash_type="fnv",
128
+ mode="test",
129
+ keys=("coord", "color"),
130
+ return_grid_coord=True,
131
+ ),
132
+ crop=None,
133
+ post_transform=[
134
+ dict(type="CenterShift", apply_z=False),
135
+ dict(type="ToTensor"),
136
+ dict(
137
+ type="Collect",
138
+ keys=("coord", "grid_coord", "index"),
139
+ feat_keys=("coord", "color"),
140
+ ),
141
+ ],
142
+ aug_transform=[
143
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
144
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
145
+ [dict(type="RandomScale", scale=[1, 1])],
146
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
147
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
148
+ [
149
+ dict(type="RandomScale", scale=[0.9, 0.9]),
150
+ dict(type="RandomFlip", p=1),
151
+ ],
152
+ [
153
+ dict(type="RandomScale", scale=[0.95, 0.95]),
154
+ dict(type="RandomFlip", p=1),
155
+ ],
156
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
157
+ [
158
+ dict(type="RandomScale", scale=[1.05, 1.05]),
159
+ dict(type="RandomFlip", p=1),
160
+ ],
161
+ [
162
+ dict(type="RandomScale", scale=[1.1, 1.1]),
163
+ dict(type="RandomFlip", p=1),
164
+ ],
165
+ ],
166
+ ),
167
+ ),
168
+ )
Pointcept/configs/s3dis/semseg-spunet-v1m1-0-cn-base.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # spconv is too fast, data loading speed is bottleneck. Cache data is a better choice.
2
+
3
+
4
+ _base_ = ["../_base_/default_runtime.py"]
5
+ # misc custom setting
6
+ batch_size = 12 # bs: total bs in all gpus
7
+ mix_prob = 0.8
8
+ empty_cache = False
9
+ enable_amp = True
10
+
11
+ # model settings
12
+ model = dict(
13
+ type="DefaultSegmentor",
14
+ backbone=dict(
15
+ type="SpUNet-v1m1",
16
+ in_channels=6,
17
+ num_classes=13,
18
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
19
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
20
+ ),
21
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
22
+ )
23
+
24
+ # scheduler settings
25
+ epoch = 3000
26
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
27
+ scheduler = dict(type="PolyLR")
28
+
29
+
30
+ # dataset settings
31
+ dataset_type = "S3DISDataset"
32
+ data_root = "data/s3dis"
33
+
34
+ data = dict(
35
+ num_classes=13,
36
+ ignore_index=-1,
37
+ names=[
38
+ "ceiling",
39
+ "floor",
40
+ "wall",
41
+ "beam",
42
+ "column",
43
+ "window",
44
+ "door",
45
+ "table",
46
+ "chair",
47
+ "sofa",
48
+ "bookcase",
49
+ "board",
50
+ "clutter",
51
+ ],
52
+ train=dict(
53
+ type=dataset_type,
54
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
55
+ data_root=data_root,
56
+ transform=[
57
+ dict(type="CenterShift", apply_z=True),
58
+ dict(
59
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
60
+ ),
61
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
62
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
63
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
64
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
65
+ dict(type="RandomScale", scale=[0.9, 1.1]),
66
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
67
+ dict(type="RandomFlip", p=0.5),
68
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
69
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
70
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
71
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
72
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
73
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
74
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
75
+ dict(
76
+ type="GridSample",
77
+ grid_size=0.05,
78
+ hash_type="fnv",
79
+ mode="train",
80
+ return_grid_coord=True,
81
+ ),
82
+ dict(type="SphereCrop", point_max=100000, mode="random"),
83
+ dict(type="CenterShift", apply_z=False),
84
+ dict(type="NormalizeColor"),
85
+ dict(type="ShufflePoint"),
86
+ dict(type="ToTensor"),
87
+ dict(
88
+ type="Collect",
89
+ keys=("coord", "grid_coord", "segment"),
90
+ feat_keys=["color", "normal"],
91
+ ),
92
+ ],
93
+ test_mode=False,
94
+ ),
95
+ val=dict(
96
+ type=dataset_type,
97
+ split="Area_5",
98
+ data_root=data_root,
99
+ transform=[
100
+ dict(type="CenterShift", apply_z=True),
101
+ dict(
102
+ type="Copy",
103
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
104
+ ),
105
+ dict(
106
+ type="GridSample",
107
+ grid_size=0.05,
108
+ hash_type="fnv",
109
+ mode="train",
110
+ return_grid_coord=True,
111
+ ),
112
+ dict(type="CenterShift", apply_z=False),
113
+ dict(type="NormalizeColor"),
114
+ dict(type="ToTensor"),
115
+ dict(
116
+ type="Collect",
117
+ keys=(
118
+ "coord",
119
+ "grid_coord",
120
+ "origin_coord",
121
+ "segment",
122
+ "origin_segment",
123
+ ),
124
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
125
+ feat_keys=["color", "normal"],
126
+ ),
127
+ ],
128
+ test_mode=False,
129
+ ),
130
+ test=dict(
131
+ type=dataset_type,
132
+ split="Area_5",
133
+ data_root=data_root,
134
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
135
+ test_mode=True,
136
+ test_cfg=dict(
137
+ voxelize=dict(
138
+ type="GridSample",
139
+ grid_size=0.05,
140
+ hash_type="fnv",
141
+ mode="test",
142
+ keys=("coord", "color", "normal"),
143
+ return_grid_coord=True,
144
+ ),
145
+ crop=None,
146
+ post_transform=[
147
+ dict(type="CenterShift", apply_z=False),
148
+ dict(type="ToTensor"),
149
+ dict(
150
+ type="Collect",
151
+ keys=("coord", "grid_coord", "index"),
152
+ feat_keys=("color", "normal"),
153
+ ),
154
+ ],
155
+ aug_transform=[
156
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
157
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
158
+ [dict(type="RandomScale", scale=[1, 1])],
159
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
160
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
161
+ [
162
+ dict(type="RandomScale", scale=[0.9, 0.9]),
163
+ dict(type="RandomFlip", p=1),
164
+ ],
165
+ [
166
+ dict(type="RandomScale", scale=[0.95, 0.95]),
167
+ dict(type="RandomFlip", p=1),
168
+ ],
169
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
170
+ [
171
+ dict(type="RandomScale", scale=[1.05, 1.05]),
172
+ dict(type="RandomFlip", p=1),
173
+ ],
174
+ [
175
+ dict(type="RandomScale", scale=[1.1, 1.1]),
176
+ dict(type="RandomFlip", p=1),
177
+ ],
178
+ ],
179
+ ),
180
+ ),
181
+ )
Pointcept/configs/s3dis/semseg-spunet-v1m2-0-base.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # spconv is too fast, data loading speed is bottleneck. Cache data is a better choice.
2
+
3
+
4
+ _base_ = ["../_base_/default_runtime.py"]
5
+ # misc custom setting
6
+ batch_size = 48 # bs: total bs in all gpus
7
+ mix_prob = 0.8
8
+ empty_cache = False
9
+ enable_amp = True
10
+
11
+ # model settings
12
+ model = dict(
13
+ type="DefaultSegmentor",
14
+ backbone=dict(
15
+ type="SpUNet-v1m2",
16
+ in_channels=3,
17
+ num_classes=13,
18
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
19
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
20
+ bn_momentum=0.1,
21
+ ),
22
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
23
+ )
24
+
25
+ # scheduler settings
26
+ epoch = 3000
27
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
28
+ scheduler = dict(type="PolyLR")
29
+
30
+
31
+ # dataset settings
32
+ dataset_type = "S3DISDataset"
33
+ data_root = "data/s3dis"
34
+
35
+ data = dict(
36
+ num_classes=13,
37
+ ignore_index=-1,
38
+ names=[
39
+ "ceiling",
40
+ "floor",
41
+ "wall",
42
+ "beam",
43
+ "column",
44
+ "window",
45
+ "door",
46
+ "table",
47
+ "chair",
48
+ "sofa",
49
+ "bookcase",
50
+ "board",
51
+ "clutter",
52
+ ],
53
+ train=dict(
54
+ type=dataset_type,
55
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
56
+ data_root=data_root,
57
+ transform=[
58
+ dict(type="CenterShift", apply_z=True),
59
+ dict(
60
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
61
+ ),
62
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
63
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
64
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
65
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
66
+ dict(type="RandomScale", scale=[0.9, 1.1]),
67
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
68
+ dict(type="RandomFlip", p=0.5),
69
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
70
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
71
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
72
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
73
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
74
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
75
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
76
+ dict(
77
+ type="GridSample",
78
+ grid_size=0.05,
79
+ hash_type="fnv",
80
+ mode="train",
81
+ keys=("coord", "color", "segment"),
82
+ return_grid_coord=True,
83
+ ),
84
+ dict(type="SphereCrop", point_max=100000, mode="random"),
85
+ dict(type="CenterShift", apply_z=False),
86
+ dict(type="NormalizeColor"),
87
+ dict(type="ShufflePoint"),
88
+ dict(type="ToTensor"),
89
+ dict(
90
+ type="Collect",
91
+ keys=("coord", "grid_coord", "segment"),
92
+ feat_keys=["color"],
93
+ ),
94
+ ],
95
+ test_mode=False,
96
+ ),
97
+ val=dict(
98
+ type=dataset_type,
99
+ split="Area_5",
100
+ data_root=data_root,
101
+ transform=[
102
+ dict(type="CenterShift", apply_z=True),
103
+ dict(
104
+ type="Copy",
105
+ keys_dict={"coord": "origin_coord", "segment": "origin_segment"},
106
+ ),
107
+ dict(
108
+ type="GridSample",
109
+ grid_size=0.05,
110
+ hash_type="fnv",
111
+ mode="train",
112
+ keys=("coord", "color", "segment"),
113
+ return_grid_coord=True,
114
+ ),
115
+ dict(type="CenterShift", apply_z=False),
116
+ dict(type="NormalizeColor"),
117
+ dict(type="ToTensor"),
118
+ dict(
119
+ type="Collect",
120
+ keys=(
121
+ "coord",
122
+ "grid_coord",
123
+ "origin_coord",
124
+ "segment",
125
+ "origin_segment",
126
+ ),
127
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
128
+ feat_keys=["color"],
129
+ ),
130
+ ],
131
+ test_mode=False,
132
+ ),
133
+ test=dict(
134
+ type=dataset_type,
135
+ split="Area_5",
136
+ data_root=data_root,
137
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
138
+ test_mode=True,
139
+ test_cfg=dict(
140
+ voxelize=dict(
141
+ type="GridSample",
142
+ grid_size=0.05,
143
+ hash_type="fnv",
144
+ mode="test",
145
+ keys=("coord", "color"),
146
+ return_grid_coord=True,
147
+ ),
148
+ crop=None,
149
+ post_transform=[
150
+ dict(type="CenterShift", apply_z=False),
151
+ dict(type="ToTensor"),
152
+ dict(
153
+ type="Collect",
154
+ keys=("coord", "grid_coord", "index"),
155
+ feat_keys=("coord", "color"),
156
+ ),
157
+ ],
158
+ aug_transform=[
159
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
160
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
161
+ [dict(type="RandomScale", scale=[1, 1])],
162
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
163
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
164
+ [
165
+ dict(type="RandomScale", scale=[0.9, 0.9]),
166
+ dict(type="RandomFlip", p=1),
167
+ ],
168
+ [
169
+ dict(type="RandomScale", scale=[0.95, 0.95]),
170
+ dict(type="RandomFlip", p=1),
171
+ ],
172
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
173
+ [
174
+ dict(type="RandomScale", scale=[1.05, 1.05]),
175
+ dict(type="RandomFlip", p=1),
176
+ ],
177
+ [
178
+ dict(type="RandomScale", scale=[1.1, 1.1]),
179
+ dict(type="RandomFlip", p=1),
180
+ ],
181
+ ],
182
+ ),
183
+ ),
184
+ )
Pointcept/configs/s3dis/semseg-swin3d-v1m1-0-small.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+ # misc custom setting
3
+ batch_size = 12 # bs: total bs in all gpus
4
+ mix_prob = 0.8
5
+ empty_cache = False
6
+ enable_amp = True
7
+
8
+ # model settings
9
+ model = dict(
10
+ type="DefaultSegmentor",
11
+ backbone=dict(
12
+ type="Swin3D-v1m1",
13
+ in_channels=9,
14
+ num_classes=13,
15
+ base_grid_size=0.02,
16
+ depths=[2, 4, 9, 4, 4],
17
+ channels=[48, 96, 192, 384, 384],
18
+ num_heads=[6, 6, 12, 24, 24],
19
+ window_sizes=[5, 7, 7, 7, 7],
20
+ quant_size=4,
21
+ drop_path_rate=0.3,
22
+ up_k=3,
23
+ num_layers=5,
24
+ stem_transformer=True,
25
+ down_stride=3,
26
+ upsample="linear_attn",
27
+ knn_down=True,
28
+ cRSE="XYZ_RGB_NORM",
29
+ fp16_mode=1,
30
+ ),
31
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
32
+ )
33
+
34
+ # scheduler settings
35
+ epoch = 3000
36
+ optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.05)
37
+ scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
38
+ param_dicts = [dict(keyword="blocks", lr=0.0001)]
39
+
40
+ # dataset settings
41
+ dataset_type = "S3DISDataset"
42
+ data_root = "data/s3dis"
43
+
44
+ data = dict(
45
+ num_classes=13,
46
+ ignore_index=-1,
47
+ names=[
48
+ "ceiling",
49
+ "floor",
50
+ "wall",
51
+ "beam",
52
+ "column",
53
+ "window",
54
+ "door",
55
+ "table",
56
+ "chair",
57
+ "sofa",
58
+ "bookcase",
59
+ "board",
60
+ "clutter",
61
+ ],
62
+ train=dict(
63
+ type=dataset_type,
64
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
65
+ data_root=data_root,
66
+ transform=[
67
+ dict(type="CenterShift", apply_z=True),
68
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
69
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
70
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
71
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
72
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
73
+ dict(type="RandomScale", scale=[0.8, 1.2]),
74
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
75
+ dict(type="RandomFlip", p=0.5),
76
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
77
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
78
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
79
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
80
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
81
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
82
+ dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
83
+ dict(
84
+ type="GridSample",
85
+ grid_size=0.04,
86
+ hash_type="fnv",
87
+ mode="train",
88
+ return_grid_coord=True,
89
+ return_displacement=True,
90
+ ),
91
+ dict(type="SphereCrop", point_max=80000, mode="random"),
92
+ dict(type="CenterShift", apply_z=False),
93
+ dict(type="NormalizeColor"),
94
+ # dict(type="ShufflePoint"),
95
+ dict(type="ToTensor"),
96
+ dict(
97
+ type="Collect",
98
+ keys=("coord", "grid_coord", "segment"),
99
+ feat_keys=("color", "normal", "displacement"),
100
+ coord_feat_keys=("color", "normal"),
101
+ ),
102
+ ],
103
+ test_mode=False,
104
+ ),
105
+ val=dict(
106
+ type=dataset_type,
107
+ split="Area_5",
108
+ data_root=data_root,
109
+ transform=[
110
+ dict(type="CenterShift", apply_z=True),
111
+ dict(
112
+ type="GridSample",
113
+ grid_size=0.04,
114
+ hash_type="fnv",
115
+ mode="train",
116
+ return_grid_coord=True,
117
+ return_displacement=True,
118
+ ),
119
+ dict(type="CenterShift", apply_z=False),
120
+ dict(type="NormalizeColor"),
121
+ dict(type="ToTensor"),
122
+ dict(
123
+ type="Collect",
124
+ keys=("coord", "grid_coord", "segment"),
125
+ feat_keys=("color", "normal", "displacement"),
126
+ coord_feat_keys=("color", "normal"),
127
+ ),
128
+ ],
129
+ test_mode=False,
130
+ ),
131
+ test=dict(
132
+ type=dataset_type,
133
+ split="Area_5",
134
+ data_root=data_root,
135
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
136
+ test_mode=True,
137
+ test_cfg=dict(
138
+ voxelize=dict(
139
+ type="GridSample",
140
+ grid_size=0.04,
141
+ hash_type="fnv",
142
+ mode="test",
143
+ keys=("coord", "color", "normal"),
144
+ return_grid_coord=True,
145
+ return_displacement=True,
146
+ ),
147
+ crop=None,
148
+ post_transform=[
149
+ dict(type="CenterShift", apply_z=False),
150
+ dict(type="ToTensor"),
151
+ dict(
152
+ type="Collect",
153
+ keys=("coord", "grid_coord", "index"),
154
+ feat_keys=("color", "normal", "displacement"),
155
+ coord_feat_keys=("color", "normal"),
156
+ ),
157
+ ],
158
+ aug_transform=[
159
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
160
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
161
+ [dict(type="RandomScale", scale=[1, 1])],
162
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
163
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
164
+ [
165
+ dict(type="RandomScale", scale=[0.9, 0.9]),
166
+ dict(type="RandomFlip", p=1),
167
+ ],
168
+ [
169
+ dict(type="RandomScale", scale=[0.95, 0.95]),
170
+ dict(type="RandomFlip", p=1),
171
+ ],
172
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
173
+ [
174
+ dict(type="RandomScale", scale=[1.05, 1.05]),
175
+ dict(type="RandomFlip", p=1),
176
+ ],
177
+ [
178
+ dict(type="RandomScale", scale=[1.1, 1.1]),
179
+ dict(type="RandomFlip", p=1),
180
+ ],
181
+ ],
182
+ ),
183
+ ),
184
+ )
Pointcept/configs/s3dis/semseg-swin3d-v1m1-1-large.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+ # misc custom setting
3
+ batch_size = 12 # bs: total bs in all gpus
4
+ mix_prob = 0.8
5
+ empty_cache = False
6
+ enable_amp = True
7
+
8
+ # model settings
9
+ model = dict(
10
+ type="DefaultSegmentor",
11
+ backbone=dict(
12
+ type="Swin3D-v1m1",
13
+ in_channels=9,
14
+ num_classes=13,
15
+ base_grid_size=0.02,
16
+ depths=[2, 4, 9, 4, 4],
17
+ channels=[80, 160, 320, 640, 640],
18
+ num_heads=[10, 10, 20, 40, 40],
19
+ window_sizes=[5, 7, 7, 7, 7],
20
+ quant_size=4,
21
+ drop_path_rate=0.3,
22
+ up_k=3,
23
+ num_layers=5,
24
+ stem_transformer=True,
25
+ down_stride=3,
26
+ upsample="linear_attn",
27
+ knn_down=True,
28
+ cRSE="XYZ_RGB_NORM",
29
+ fp16_mode=1,
30
+ ),
31
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
32
+ )
33
+
34
+ # scheduler settings
35
+ epoch = 3000
36
+ optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.05)
37
+ scheduler = dict(
38
+ type="OneCycleLR",
39
+ max_lr=[0.001, 0.0001],
40
+ pct_start=0.05,
41
+ anneal_strategy="cos",
42
+ div_factor=10.0,
43
+ final_div_factor=1000.0,
44
+ )
45
+ param_dicts = [dict(keyword="blocks", lr=0.0001)]
46
+
47
+ # dataset settings
48
+ dataset_type = "S3DISDataset"
49
+ data_root = "data/s3dis"
50
+
51
+ data = dict(
52
+ num_classes=13,
53
+ ignore_index=-1,
54
+ names=[
55
+ "ceiling",
56
+ "floor",
57
+ "wall",
58
+ "beam",
59
+ "column",
60
+ "window",
61
+ "door",
62
+ "table",
63
+ "chair",
64
+ "sofa",
65
+ "bookcase",
66
+ "board",
67
+ "clutter",
68
+ ],
69
+ train=dict(
70
+ type=dataset_type,
71
+ split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"),
72
+ data_root=data_root,
73
+ transform=[
74
+ dict(type="CenterShift", apply_z=True),
75
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
76
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
77
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
78
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
79
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
80
+ dict(type="RandomScale", scale=[0.8, 1.2]),
81
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
82
+ dict(type="RandomFlip", p=0.5),
83
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
84
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
85
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
86
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
87
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
88
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
89
+ dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
90
+ dict(
91
+ type="GridSample",
92
+ grid_size=0.04,
93
+ hash_type="fnv",
94
+ mode="train",
95
+ return_grid_coord=True,
96
+ return_displacement=True,
97
+ ),
98
+ dict(type="SphereCrop", point_max=80000, mode="random"),
99
+ dict(type="CenterShift", apply_z=False),
100
+ dict(type="NormalizeColor"),
101
+ # dict(type="ShufflePoint"),
102
+ dict(type="ToTensor"),
103
+ dict(
104
+ type="Collect",
105
+ keys=("coord", "grid_coord", "segment"),
106
+ feat_keys=("color", "normal", "displacement"),
107
+ coord_feat_keys=("color", "normal"),
108
+ ),
109
+ ],
110
+ test_mode=False,
111
+ ),
112
+ val=dict(
113
+ type=dataset_type,
114
+ split="Area_5",
115
+ data_root=data_root,
116
+ transform=[
117
+ dict(type="CenterShift", apply_z=True),
118
+ dict(
119
+ type="GridSample",
120
+ grid_size=0.04,
121
+ hash_type="fnv",
122
+ mode="train",
123
+ return_grid_coord=True,
124
+ return_displacement=True,
125
+ ),
126
+ dict(type="CenterShift", apply_z=False),
127
+ dict(type="NormalizeColor"),
128
+ dict(type="ToTensor"),
129
+ dict(
130
+ type="Collect",
131
+ keys=("coord", "grid_coord", "segment"),
132
+ feat_keys=("color", "normal", "displacement"),
133
+ coord_feat_keys=("color", "normal"),
134
+ ),
135
+ ],
136
+ test_mode=False,
137
+ ),
138
+ test=dict(
139
+ type=dataset_type,
140
+ split="Area_5",
141
+ data_root=data_root,
142
+ transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")],
143
+ test_mode=True,
144
+ test_cfg=dict(
145
+ voxelize=dict(
146
+ type="GridSample",
147
+ grid_size=0.04,
148
+ hash_type="fnv",
149
+ mode="test",
150
+ keys=("coord", "color", "normal"),
151
+ return_grid_coord=True,
152
+ return_displacement=True,
153
+ ),
154
+ crop=None,
155
+ post_transform=[
156
+ dict(type="CenterShift", apply_z=False),
157
+ dict(type="ToTensor"),
158
+ dict(
159
+ type="Collect",
160
+ keys=("coord", "grid_coord", "index"),
161
+ feat_keys=("color", "normal", "displacement"),
162
+ coord_feat_keys=("color", "normal"),
163
+ ),
164
+ ],
165
+ aug_transform=[
166
+ [dict(type="RandomScale", scale=[0.9, 0.9])],
167
+ [dict(type="RandomScale", scale=[0.95, 0.95])],
168
+ [dict(type="RandomScale", scale=[1, 1])],
169
+ [dict(type="RandomScale", scale=[1.05, 1.05])],
170
+ [dict(type="RandomScale", scale=[1.1, 1.1])],
171
+ [
172
+ dict(type="RandomScale", scale=[0.9, 0.9]),
173
+ dict(type="RandomFlip", p=1),
174
+ ],
175
+ [
176
+ dict(type="RandomScale", scale=[0.95, 0.95]),
177
+ dict(type="RandomFlip", p=1),
178
+ ],
179
+ [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
180
+ [
181
+ dict(type="RandomScale", scale=[1.05, 1.05]),
182
+ dict(type="RandomFlip", p=1),
183
+ ],
184
+ [
185
+ dict(type="RandomScale", scale=[1.1, 1.1]),
186
+ dict(type="RandomFlip", p=1),
187
+ ],
188
+ ],
189
+ ),
190
+ ),
191
+ )
Pointcept/configs/scannet/insseg-pointgroup-v1m1-0-spunet-base.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ num_worker = 12
6
+ mix_prob = 0
7
+ empty_cache = False
8
+ enable_amp = True
9
+ evaluate = True
10
+
11
+ class_names = [
12
+ "wall",
13
+ "floor",
14
+ "cabinet",
15
+ "bed",
16
+ "chair",
17
+ "sofa",
18
+ "table",
19
+ "door",
20
+ "window",
21
+ "bookshelf",
22
+ "picture",
23
+ "counter",
24
+ "desk",
25
+ "curtain",
26
+ "refridgerator",
27
+ "shower curtain",
28
+ "toilet",
29
+ "sink",
30
+ "bathtub",
31
+ "otherfurniture",
32
+ ]
33
+ num_classes = 20
34
+ segment_ignore_index = (-1, 0, 1)
35
+
36
+ # model settings
37
+ model = dict(
38
+ type="PG-v1m1",
39
+ backbone=dict(
40
+ type="SpUNet-v1m1",
41
+ in_channels=6,
42
+ num_classes=0,
43
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
44
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
45
+ ),
46
+ backbone_out_channels=96,
47
+ semantic_num_classes=num_classes,
48
+ semantic_ignore_index=-1,
49
+ segment_ignore_index=segment_ignore_index,
50
+ instance_ignore_index=-1,
51
+ cluster_thresh=1.5,
52
+ cluster_closed_points=300,
53
+ cluster_propose_points=100,
54
+ cluster_min_points=50,
55
+ )
56
+
57
+ # scheduler settings
58
+ epoch = 800
59
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
60
+ scheduler = dict(type="PolyLR")
61
+
62
+ # dataset settings
63
+ dataset_type = "ScanNetDataset"
64
+ data_root = "data/scannet"
65
+
66
+ data = dict(
67
+ num_classes=num_classes,
68
+ ignore_index=-1,
69
+ names=class_names,
70
+ train=dict(
71
+ type=dataset_type,
72
+ split="train",
73
+ data_root=data_root,
74
+ transform=[
75
+ dict(type="CenterShift", apply_z=True),
76
+ dict(
77
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
78
+ ),
79
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
80
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
81
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
82
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
83
+ dict(type="RandomScale", scale=[0.9, 1.1]),
84
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
85
+ dict(type="RandomFlip", p=0.5),
86
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
87
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
88
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
89
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
90
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
91
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
92
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
93
+ dict(
94
+ type="GridSample",
95
+ grid_size=0.02,
96
+ hash_type="fnv",
97
+ mode="train",
98
+ return_grid_coord=True,
99
+ keys=("coord", "color", "normal", "segment", "instance"),
100
+ ),
101
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
102
+ dict(type="NormalizeColor"),
103
+ dict(
104
+ type="InstanceParser",
105
+ segment_ignore_index=segment_ignore_index,
106
+ instance_ignore_index=-1,
107
+ ),
108
+ dict(type="ToTensor"),
109
+ dict(
110
+ type="Collect",
111
+ keys=(
112
+ "coord",
113
+ "grid_coord",
114
+ "segment",
115
+ "instance",
116
+ "instance_centroid",
117
+ "bbox",
118
+ ),
119
+ feat_keys=("color", "normal"),
120
+ ),
121
+ ],
122
+ test_mode=False,
123
+ ),
124
+ val=dict(
125
+ type=dataset_type,
126
+ split="val",
127
+ data_root=data_root,
128
+ transform=[
129
+ dict(type="CenterShift", apply_z=True),
130
+ dict(
131
+ type="Copy",
132
+ keys_dict={
133
+ "coord": "origin_coord",
134
+ "segment": "origin_segment",
135
+ "instance": "origin_instance",
136
+ },
137
+ ),
138
+ dict(
139
+ type="GridSample",
140
+ grid_size=0.02,
141
+ hash_type="fnv",
142
+ mode="train",
143
+ return_grid_coord=True,
144
+ keys=("coord", "color", "normal", "segment", "instance"),
145
+ ),
146
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
147
+ dict(type="CenterShift", apply_z=False),
148
+ dict(type="NormalizeColor"),
149
+ dict(
150
+ type="InstanceParser",
151
+ segment_ignore_index=segment_ignore_index,
152
+ instance_ignore_index=-1,
153
+ ),
154
+ dict(type="ToTensor"),
155
+ dict(
156
+ type="Collect",
157
+ keys=(
158
+ "coord",
159
+ "grid_coord",
160
+ "segment",
161
+ "instance",
162
+ "origin_coord",
163
+ "origin_segment",
164
+ "origin_instance",
165
+ "instance_centroid",
166
+ "bbox",
167
+ ),
168
+ feat_keys=("color", "normal"),
169
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
170
+ ),
171
+ ],
172
+ test_mode=False,
173
+ ),
174
+ test=dict(), # currently not available
175
+ )
176
+
177
+ hooks = [
178
+ dict(type="CheckpointLoader", keywords="module.", replacement="module."),
179
+ dict(type="IterationTimer", warmup_iter=2),
180
+ dict(type="InformationWriter"),
181
+ dict(
182
+ type="InsSegEvaluator",
183
+ segment_ignore_index=segment_ignore_index,
184
+ instance_ignore_index=-1,
185
+ ),
186
+ dict(type="CheckpointSaver", save_freq=None),
187
+ ]
Pointcept/configs/scannet/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ num_worker = 24
6
+ mix_prob = 0
7
+ empty_cache = False
8
+ enable_amp = True
9
+ evaluate = True
10
+ find_unused_parameters = True
11
+
12
+ class_names = [
13
+ "wall",
14
+ "floor",
15
+ "cabinet",
16
+ "bed",
17
+ "chair",
18
+ "sofa",
19
+ "table",
20
+ "door",
21
+ "window",
22
+ "bookshelf",
23
+ "picture",
24
+ "counter",
25
+ "desk",
26
+ "curtain",
27
+ "refridgerator",
28
+ "shower curtain",
29
+ "toilet",
30
+ "sink",
31
+ "bathtub",
32
+ "otherfurniture",
33
+ ]
34
+ num_classes = 20
35
+ segment_ignore_index = (-1, 0, 1)
36
+
37
+ # model settings
38
+ model = dict(
39
+ type="PG-v1m1",
40
+ backbone=dict(
41
+ type="PPT-v1m1",
42
+ backbone=dict(
43
+ type="SpUNet-v1m3",
44
+ in_channels=6,
45
+ num_classes=0,
46
+ base_channels=32,
47
+ context_channels=256,
48
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
49
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
50
+ cls_mode=False,
51
+ conditions=("ScanNet", "S3DIS", "Structured3D"),
52
+ zero_init=False,
53
+ norm_decouple=True,
54
+ norm_adaptive=True,
55
+ norm_affine=True,
56
+ ),
57
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
58
+ backbone_out_channels=96,
59
+ context_channels=256,
60
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
61
+ template="[x]",
62
+ clip_model="ViT-B/16",
63
+ class_name=(
64
+ "wall",
65
+ "floor",
66
+ "cabinet",
67
+ "bed",
68
+ "chair",
69
+ "sofa",
70
+ "table",
71
+ "door",
72
+ "window",
73
+ "bookshelf",
74
+ "bookcase",
75
+ "picture",
76
+ "counter",
77
+ "desk",
78
+ "shelves",
79
+ "curtain",
80
+ "dresser",
81
+ "pillow",
82
+ "mirror",
83
+ "ceiling",
84
+ "refrigerator",
85
+ "television",
86
+ "shower curtain",
87
+ "nightstand",
88
+ "toilet",
89
+ "sink",
90
+ "lamp",
91
+ "bathtub",
92
+ "garbagebin",
93
+ "board",
94
+ "beam",
95
+ "column",
96
+ "clutter",
97
+ "otherstructure",
98
+ "otherfurniture",
99
+ "otherprop",
100
+ ),
101
+ valid_index=(
102
+ (
103
+ 0,
104
+ 1,
105
+ 2,
106
+ 3,
107
+ 4,
108
+ 5,
109
+ 6,
110
+ 7,
111
+ 8,
112
+ 11,
113
+ 13,
114
+ 14,
115
+ 15,
116
+ 16,
117
+ 17,
118
+ 18,
119
+ 19,
120
+ 20,
121
+ 21,
122
+ 23,
123
+ 25,
124
+ 26,
125
+ 33,
126
+ 34,
127
+ 35,
128
+ ),
129
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
130
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
131
+ ),
132
+ backbone_mode=True,
133
+ ),
134
+ backbone_out_channels=96,
135
+ semantic_num_classes=num_classes,
136
+ semantic_ignore_index=-1,
137
+ segment_ignore_index=segment_ignore_index,
138
+ instance_ignore_index=-1,
139
+ cluster_thresh=1.5,
140
+ cluster_closed_points=300,
141
+ cluster_propose_points=100,
142
+ cluster_min_points=50,
143
+ )
144
+
145
+ # scheduler settings
146
+ epoch = 800
147
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
148
+ scheduler = dict(type="PolyLR")
149
+
150
+ # dataset settings
151
+ dataset_type = "ScanNetDataset"
152
+ data_root = "data/scannet"
153
+
154
+ data = dict(
155
+ num_classes=num_classes,
156
+ ignore_index=-1,
157
+ names=class_names,
158
+ train=dict(
159
+ type=dataset_type,
160
+ split="train",
161
+ data_root=data_root,
162
+ transform=[
163
+ dict(type="CenterShift", apply_z=True),
164
+ dict(
165
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5
166
+ ),
167
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
168
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
169
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
170
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
171
+ dict(type="RandomScale", scale=[0.9, 1.1]),
172
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
173
+ dict(type="RandomFlip", p=0.5),
174
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
175
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
176
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
177
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
178
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
179
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
180
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
181
+ dict(
182
+ type="GridSample",
183
+ grid_size=0.02,
184
+ hash_type="fnv",
185
+ mode="train",
186
+ return_grid_coord=True,
187
+ keys=("coord", "color", "normal", "segment", "instance"),
188
+ ),
189
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
190
+ dict(type="NormalizeColor"),
191
+ dict(
192
+ type="InstanceParser",
193
+ segment_ignore_index=segment_ignore_index,
194
+ instance_ignore_index=-1,
195
+ ),
196
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
197
+ dict(type="ToTensor"),
198
+ dict(
199
+ type="Collect",
200
+ keys=(
201
+ "coord",
202
+ "grid_coord",
203
+ "segment",
204
+ "instance",
205
+ "instance_centroid",
206
+ "bbox",
207
+ "condition",
208
+ ),
209
+ feat_keys=("color", "normal"),
210
+ ),
211
+ ],
212
+ test_mode=False,
213
+ ),
214
+ val=dict(
215
+ type=dataset_type,
216
+ split="val",
217
+ data_root=data_root,
218
+ transform=[
219
+ dict(type="CenterShift", apply_z=True),
220
+ dict(
221
+ type="Copy",
222
+ keys_dict={
223
+ "coord": "origin_coord",
224
+ "segment": "origin_segment",
225
+ "instance": "origin_instance",
226
+ },
227
+ ),
228
+ dict(
229
+ type="GridSample",
230
+ grid_size=0.02,
231
+ hash_type="fnv",
232
+ mode="train",
233
+ return_grid_coord=True,
234
+ keys=("coord", "color", "normal", "segment", "instance"),
235
+ ),
236
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
237
+ dict(type="CenterShift", apply_z=False),
238
+ dict(type="NormalizeColor"),
239
+ dict(
240
+ type="InstanceParser",
241
+ segment_ignore_index=segment_ignore_index,
242
+ instance_ignore_index=-1,
243
+ ),
244
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
245
+ dict(type="ToTensor"),
246
+ dict(
247
+ type="Collect",
248
+ keys=(
249
+ "coord",
250
+ "grid_coord",
251
+ "segment",
252
+ "instance",
253
+ "origin_coord",
254
+ "origin_segment",
255
+ "origin_instance",
256
+ "instance_centroid",
257
+ "bbox",
258
+ "condition",
259
+ ),
260
+ feat_keys=("color", "normal"),
261
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
262
+ ),
263
+ ],
264
+ test_mode=False,
265
+ ),
266
+ test=dict(), # currently not available
267
+ )
268
+
269
+ hooks = [
270
+ dict(type="CheckpointLoader", keywords="module.", replacement="module.backbone."),
271
+ dict(type="IterationTimer", warmup_iter=2),
272
+ dict(type="InformationWriter"),
273
+ dict(
274
+ type="InsSegEvaluator",
275
+ segment_ignore_index=segment_ignore_index,
276
+ instance_ignore_index=-1,
277
+ ),
278
+ dict(type="CheckpointSaver", save_freq=None),
279
+ ]
Pointcept/configs/scannet/objdet-cagroup3d-v1m1-0-base.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 16 # bs: total bs in all gpus
5
+ num_worker = 32
6
+ mix_prob = 0
7
+ empty_cache = False
8
+ enable_amp = False
9
+ evaluate = True
10
+
11
+ class_names = [
12
+ "wall",
13
+ "floor",
14
+ "cabinet",
15
+ "bed",
16
+ "chair",
17
+ "sofa",
18
+ "table",
19
+ "door",
20
+ "window",
21
+ "bookshelf",
22
+ "picture",
23
+ "counter",
24
+ "desk",
25
+ "curtain",
26
+ "refridgerator",
27
+ "shower curtain",
28
+ "toilet",
29
+ "sink",
30
+ "bathtub",
31
+ "otherfurniture",
32
+ ]
33
+ num_classes = 20
34
+ segment_ignore_index = (-1, 0, 1)
35
+
36
+ # model settings
37
+ model = dict(
38
+ type="PG-v1m1",
39
+ backbone=dict(
40
+ type="SpUNet-v1m1",
41
+ in_channels=6,
42
+ num_classes=0,
43
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
44
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
45
+ ),
46
+ backbone_out_channels=96,
47
+ semantic_num_classes=num_classes,
48
+ semantic_ignore_index=-1,
49
+ segment_ignore_index=segment_ignore_index,
50
+ instance_ignore_index=-1,
51
+ cluster_thresh=1.5,
52
+ cluster_closed_points=300,
53
+ cluster_propose_points=100,
54
+ cluster_min_points=50,
55
+ )
56
+
57
+ # scheduler settings
58
+ epoch = 800
59
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
60
+ scheduler = dict(type="PolyLR")
61
+
62
+ # dataset settings
63
+ dataset_type = "ScanNetDataset"
64
+ data_root = "data/scannet"
65
+
66
+ data = dict(
67
+ num_classes=num_classes,
68
+ ignore_index=-1,
69
+ names=class_names,
70
+ train=dict(
71
+ type=dataset_type,
72
+ split="train",
73
+ data_root=data_root,
74
+ transform=[
75
+ # dict(type="CenterShift", apply_z=True),
76
+ # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5),
77
+ # # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
78
+ # dict(type="RandomRotate", angle=[-1, 1], axis='z', center=[0, 0, 0], p=0.5),
79
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis='x', p=0.5),
80
+ # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis='y', p=0.5),
81
+ # dict(type="RandomScale", scale=[0.9, 1.1]),
82
+ # # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
83
+ # dict(type="RandomFlip", p=0.5),
84
+ # dict(type="RandomJitter", sigma=0.005, clip=0.02),
85
+ # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
86
+ # dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
87
+ # dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
88
+ # dict(type="ChromaticJitter", p=0.95, std=0.05),
89
+ # # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
90
+ # # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
91
+ # dict(type="GridSample",
92
+ # grid_size=0.02,
93
+ # hash_type='fnv',
94
+ # mode='train',
95
+ # return_grid_coord=True,
96
+ # keys=("coord", "color", "normal", "segment", "instance")),
97
+ # dict(type="SphereCrop", sample_rate=0.8, mode='random'),
98
+ # dict(type="NormalizeColor"),
99
+ dict(
100
+ type="InstanceParser",
101
+ segment_ignore_index=segment_ignore_index,
102
+ instance_ignore_index=-1,
103
+ ),
104
+ dict(type="ToTensor"),
105
+ dict(
106
+ type="Collect",
107
+ keys=(
108
+ "coord",
109
+ "grid_coord",
110
+ "segment",
111
+ "instance",
112
+ "instance_centroid",
113
+ "bbox",
114
+ ),
115
+ feat_keys=("color", "normal"),
116
+ ),
117
+ ],
118
+ test_mode=False,
119
+ ),
120
+ val=dict(
121
+ type=dataset_type,
122
+ split="val",
123
+ data_root=data_root,
124
+ transform=[
125
+ dict(type="CenterShift", apply_z=True),
126
+ dict(
127
+ type="Copy",
128
+ keys_dict={
129
+ "coord": "origin_coord",
130
+ "segment": "origin_segment",
131
+ "instance": "origin_instance",
132
+ },
133
+ ),
134
+ dict(
135
+ type="GridSample",
136
+ grid_size=0.02,
137
+ hash_type="fnv",
138
+ mode="train",
139
+ return_grid_coord=True,
140
+ keys=("coord", "color", "normal", "segment", "instance"),
141
+ ),
142
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
143
+ dict(type="CenterShift", apply_z=False),
144
+ dict(type="NormalizeColor"),
145
+ dict(
146
+ type="InstanceParser",
147
+ segment_ignore_index=segment_ignore_index,
148
+ instance_ignore_index=-1,
149
+ ),
150
+ dict(type="ToTensor"),
151
+ dict(
152
+ type="Collect",
153
+ keys=(
154
+ "coord",
155
+ "grid_coord",
156
+ "segment",
157
+ "instance",
158
+ "origin_coord",
159
+ "origin_segment",
160
+ "origin_instance",
161
+ "instance_centroid",
162
+ "bbox",
163
+ ),
164
+ feat_keys=("color", "normal"),
165
+ offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"),
166
+ ),
167
+ ],
168
+ test_mode=False,
169
+ ),
170
+ test=dict(), # currently not available
171
+ )
172
+
173
+ hooks = [
174
+ dict(type="CheckpointLoader", keywords="module.", replacement="module."),
175
+ dict(type="IterationTimer", warmup_iter=2),
176
+ dict(type="InformationWriter"),
177
+ dict(
178
+ type="InsSegEvaluator",
179
+ segment_ignore_index=segment_ignore_index,
180
+ instance_ignore_index=-1,
181
+ ),
182
+ dict(type="CheckpointSaver", save_freq=None),
183
+ ]
Pointcept/configs/scannet/pretrain-msc-v1m1-0-spunet-base.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 32 # bs: total bs in all gpus
5
+ num_worker = 32
6
+ mix_prob = 0
7
+ empty_cache = False
8
+ enable_amp = False
9
+ evaluate = False
10
+ find_unused_parameters = False
11
+
12
+ # model settings
13
+ model = dict(
14
+ type="MSC-v1m1",
15
+ backbone=dict(
16
+ type="SpUNet-v1m1",
17
+ in_channels=6,
18
+ num_classes=0,
19
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
20
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
21
+ ),
22
+ backbone_in_channels=6,
23
+ backbone_out_channels=96,
24
+ mask_grid_size=0.1,
25
+ mask_rate=0.4,
26
+ view1_mix_prob=0.8,
27
+ view2_mix_prob=0,
28
+ matching_max_k=8,
29
+ matching_max_radius=0.03,
30
+ matching_max_pair=8192,
31
+ nce_t=0.4,
32
+ contrast_weight=1,
33
+ reconstruct_weight=1,
34
+ reconstruct_color=True,
35
+ reconstruct_normal=False,
36
+ )
37
+
38
+ # scheduler settings
39
+ epoch = 600
40
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.8, weight_decay=0.0001, nesterov=True)
41
+ scheduler = dict(
42
+ type="OneCycleLR",
43
+ max_lr=optimizer["lr"],
44
+ pct_start=0.01,
45
+ anneal_strategy="cos",
46
+ div_factor=10.0,
47
+ final_div_factor=10000.0,
48
+ )
49
+
50
+ # dataset settings
51
+ dataset_type = "ScanNetDataset"
52
+ data_root = "data/scannet"
53
+
54
+ data = dict(
55
+ num_classes=20,
56
+ ignore_index=-1,
57
+ names=[
58
+ "wall",
59
+ "floor",
60
+ "cabinet",
61
+ "bed",
62
+ "chair",
63
+ "sofa",
64
+ "table",
65
+ "door",
66
+ "window",
67
+ "bookshelf",
68
+ "picture",
69
+ "counter",
70
+ "desk",
71
+ "curtain",
72
+ "refridgerator",
73
+ "shower curtain",
74
+ "toilet",
75
+ "sink",
76
+ "bathtub",
77
+ "otherfurniture",
78
+ ],
79
+ train=dict(
80
+ type=dataset_type,
81
+ split=["train", "val", "test"],
82
+ data_root=data_root,
83
+ transform=[
84
+ dict(type="CenterShift", apply_z=True),
85
+ dict(type="RandomScale", scale=[0.9, 1.1]),
86
+ dict(type="Copy", keys_dict={"coord": "origin_coord"}),
87
+ dict(
88
+ type="ContrastiveViewsGenerator",
89
+ view_keys=("coord", "color", "normal", "origin_coord"),
90
+ view_trans_cfg=[
91
+ dict(
92
+ type="RandomRotate",
93
+ angle=[-1, 1],
94
+ axis="z",
95
+ center=[0, 0, 0],
96
+ p=1,
97
+ ),
98
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
99
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
100
+ dict(type="RandomFlip", p=0.5),
101
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
102
+ dict(
103
+ type="RandomColorJitter",
104
+ brightness=0.4,
105
+ contrast=0.4,
106
+ saturation=0.2,
107
+ hue=0.02,
108
+ p=0.8,
109
+ ),
110
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
111
+ dict(
112
+ type="GridSample",
113
+ grid_size=0.02,
114
+ hash_type="fnv",
115
+ mode="train",
116
+ keys=("origin_coord", "coord", "color", "normal"),
117
+ return_grid_coord=True,
118
+ ),
119
+ dict(type="SphereCrop", sample_rate=0.6, mode="random"),
120
+ dict(type="CenterShift", apply_z=False),
121
+ dict(type="NormalizeColor"),
122
+ ],
123
+ ),
124
+ dict(type="ToTensor"),
125
+ dict(
126
+ type="Collect",
127
+ keys=(
128
+ "view1_origin_coord",
129
+ "view1_grid_coord",
130
+ "view1_coord",
131
+ "view1_color",
132
+ "view1_normal",
133
+ "view2_origin_coord",
134
+ "view2_grid_coord",
135
+ "view2_coord",
136
+ "view2_color",
137
+ "view2_normal",
138
+ ),
139
+ offset_keys_dict=dict(
140
+ view1_offset="view1_coord", view2_offset="view2_coord"
141
+ ),
142
+ view1_feat_keys=("view1_color", "view1_normal"),
143
+ view2_feat_keys=("view2_color", "view2_normal"),
144
+ ),
145
+ ],
146
+ test_mode=False,
147
+ ),
148
+ )
149
+
150
+ hooks = [
151
+ dict(type="CheckpointLoader"),
152
+ dict(type="IterationTimer", warmup_iter=2),
153
+ dict(type="InformationWriter"),
154
+ dict(type="CheckpointSaver", save_freq=None),
155
+ ]
Pointcept/configs/scannet/pretrain-msc-v1m1-1-spunet-pointcontrast.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 32 # bs: total bs in all gpus
5
+ num_worker = 32
6
+ mix_prob = 0
7
+ empty_cache = False
8
+ enable_amp = False
9
+ evaluate = False
10
+ find_unused_parameters = False
11
+
12
+ # model settings
13
+ model = dict(
14
+ type="MSC-v1m1",
15
+ backbone=dict(
16
+ type="SpUNet-v1m1",
17
+ in_channels=3,
18
+ num_classes=0,
19
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
20
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
21
+ ),
22
+ backbone_in_channels=3,
23
+ backbone_out_channels=96,
24
+ mask_grid_size=0.1,
25
+ mask_rate=0,
26
+ view1_mix_prob=0,
27
+ view2_mix_prob=0,
28
+ matching_max_k=8,
29
+ matching_max_radius=0.03,
30
+ matching_max_pair=4096,
31
+ nce_t=0.07,
32
+ contrast_weight=1,
33
+ reconstruct_weight=1,
34
+ reconstruct_color=False,
35
+ reconstruct_normal=False,
36
+ )
37
+
38
+ # scheduler settings
39
+ epoch = 10
40
+ eval_epoch = 10
41
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.8, weight_decay=0.0001, nesterov=True)
42
+ scheduler = dict(
43
+ type="OneCycleLR",
44
+ max_lr=optimizer["lr"],
45
+ pct_start=0.01,
46
+ anneal_strategy="cos",
47
+ div_factor=10.0,
48
+ final_div_factor=10000.0,
49
+ )
50
+
51
+ # dataset settings
52
+ dataset_type = "ScanNetPairDataset"
53
+ data_root = "data/scannet_pair"
54
+
55
+ data = dict(
56
+ num_classes=20,
57
+ ignore_index=-1,
58
+ names=[
59
+ "wall",
60
+ "floor",
61
+ "cabinet",
62
+ "bed",
63
+ "chair",
64
+ "sofa",
65
+ "table",
66
+ "door",
67
+ "window",
68
+ "bookshelf",
69
+ "picture",
70
+ "counter",
71
+ "desk",
72
+ "curtain",
73
+ "refridgerator",
74
+ "shower curtain",
75
+ "toilet",
76
+ "sink",
77
+ "bathtub",
78
+ "otherfurniture",
79
+ ],
80
+ train=dict(
81
+ type=dataset_type,
82
+ data_root=data_root,
83
+ view1_transform=[
84
+ dict(type="CenterShift", apply_z=True),
85
+ dict(type="Copy", keys_dict={"coord": "origin_coord"}),
86
+ # dict(type="RandomScale", scale=[0.9, 1.1]),
87
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1),
88
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
89
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
90
+ dict(type="RandomFlip", p=0.5),
91
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
92
+ dict(
93
+ type="RandomColorJitter",
94
+ brightness=0.4,
95
+ contrast=0.4,
96
+ saturation=0.2,
97
+ hue=0.02,
98
+ p=0.8,
99
+ ),
100
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
101
+ dict(
102
+ type="GridSample",
103
+ grid_size=0.025,
104
+ hash_type="fnv",
105
+ mode="train",
106
+ keys=("origin_coord", "coord", "color"),
107
+ return_grid_coord=True,
108
+ ),
109
+ dict(type="NormalizeColor"),
110
+ dict(type="ToTensor"),
111
+ dict(
112
+ type="Collect",
113
+ keys=("origin_coord", "grid_coord", "coord", "color"),
114
+ offset_keys_dict=dict(offset="coord"),
115
+ feat_keys=["color"],
116
+ ),
117
+ ],
118
+ view2_transform=[
119
+ dict(type="CenterShift", apply_z=True),
120
+ dict(type="Copy", keys_dict={"coord": "origin_coord"}),
121
+ # dict(type="RandomScale", scale=[0.9, 1.1]),
122
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1),
123
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
124
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
125
+ dict(type="RandomFlip", p=0.5),
126
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
127
+ dict(
128
+ type="RandomColorJitter",
129
+ brightness=0.4,
130
+ contrast=0.4,
131
+ saturation=0.2,
132
+ hue=0.02,
133
+ p=0.8,
134
+ ),
135
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
136
+ dict(
137
+ type="GridSample",
138
+ grid_size=0.025,
139
+ hash_type="fnv",
140
+ mode="train",
141
+ keys=("origin_coord", "coord", "color"),
142
+ return_grid_coord=True,
143
+ ),
144
+ dict(type="NormalizeColor"),
145
+ dict(type="ToTensor"),
146
+ dict(
147
+ type="Collect",
148
+ keys=("origin_coord", "grid_coord", "coord", "color"),
149
+ offset_keys_dict=dict(offset="coord"),
150
+ feat_keys=["color"],
151
+ ),
152
+ ],
153
+ test_mode=False,
154
+ ),
155
+ )
156
+
157
+ hooks = [
158
+ dict(type="CheckpointLoader"),
159
+ dict(type="IterationTimer", warmup_iter=2),
160
+ dict(type="InformationWriter"),
161
+ dict(type="CheckpointSaver", save_freq=None),
162
+ ]
Pointcept/configs/scannet/pretrain-msc-v1m2-0-spunet-csc.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 32 # bs: total bs in all gpus
5
+ num_worker = 32
6
+ mix_prob = 0
7
+ empty_cache = False
8
+ enable_amp = False
9
+ evaluate = False
10
+ find_unused_parameters = False
11
+
12
+ # model settings
13
+ model = dict(
14
+ type="MSC-v1m2",
15
+ backbone=dict(
16
+ type="SpUNet-v1m1",
17
+ in_channels=3,
18
+ num_classes=0,
19
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
20
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
21
+ ),
22
+ backbone_in_channels=3,
23
+ backbone_out_channels=96,
24
+ mask_grid_size=0.1,
25
+ mask_rate=0,
26
+ view1_mix_prob=0,
27
+ view2_mix_prob=0,
28
+ matching_max_k=8,
29
+ matching_max_radius=0.03,
30
+ matching_max_pair=8192,
31
+ nce_t=0.4,
32
+ contrast_weight=1,
33
+ reconstruct_weight=1,
34
+ reconstruct_color=False,
35
+ reconstruct_normal=False,
36
+ partitions=4,
37
+ r1=2,
38
+ r2=20,
39
+ )
40
+
41
+ # scheduler settings
42
+ epoch = 10
43
+ eval_epoch = 10
44
+ optimizer = dict(type="SGD", lr=0.1, momentum=0.8, weight_decay=0.0001, nesterov=True)
45
+ scheduler = dict(
46
+ type="OneCycleLR",
47
+ max_lr=optimizer["lr"],
48
+ pct_start=0.01,
49
+ anneal_strategy="cos",
50
+ div_factor=10.0,
51
+ final_div_factor=10000.0,
52
+ )
53
+
54
+ # dataset settings
55
+ dataset_type = "ScanNetPairDataset"
56
+ data_root = "data/scannet_pair"
57
+
58
+ data = dict(
59
+ num_classes=20,
60
+ ignore_index=-1,
61
+ names=[
62
+ "wall",
63
+ "floor",
64
+ "cabinet",
65
+ "bed",
66
+ "chair",
67
+ "sofa",
68
+ "table",
69
+ "door",
70
+ "window",
71
+ "bookshelf",
72
+ "picture",
73
+ "counter",
74
+ "desk",
75
+ "curtain",
76
+ "refridgerator",
77
+ "shower curtain",
78
+ "toilet",
79
+ "sink",
80
+ "bathtub",
81
+ "otherfurniture",
82
+ ],
83
+ train=dict(
84
+ type=dataset_type,
85
+ data_root=data_root,
86
+ view1_transform=[
87
+ dict(type="CenterShift", apply_z=True),
88
+ dict(type="Copy", keys_dict={"coord": "origin_coord"}),
89
+ # dict(type="RandomScale", scale=[0.9, 1.1]),
90
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1),
91
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
92
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
93
+ dict(type="RandomFlip", p=0.5),
94
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
95
+ dict(
96
+ type="RandomColorJitter",
97
+ brightness=0.4,
98
+ contrast=0.4,
99
+ saturation=0.2,
100
+ hue=0.02,
101
+ p=0.8,
102
+ ),
103
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
104
+ dict(
105
+ type="GridSample",
106
+ grid_size=0.025,
107
+ hash_type="fnv",
108
+ mode="train",
109
+ keys=("origin_coord", "coord", "color"),
110
+ return_grid_coord=True,
111
+ ),
112
+ dict(type="NormalizeColor"),
113
+ dict(type="ToTensor"),
114
+ dict(
115
+ type="Collect",
116
+ keys=("origin_coord", "grid_coord", "coord", "color"),
117
+ offset_keys_dict=dict(offset="coord"),
118
+ feat_keys=["color"],
119
+ ),
120
+ ],
121
+ view2_transform=[
122
+ dict(type="CenterShift", apply_z=True),
123
+ dict(type="Copy", keys_dict={"coord": "origin_coord"}),
124
+ # dict(type="RandomScale", scale=[0.9, 1.1]),
125
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1),
126
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1),
127
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1),
128
+ dict(type="RandomFlip", p=0.5),
129
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
130
+ dict(
131
+ type="RandomColorJitter",
132
+ brightness=0.4,
133
+ contrast=0.4,
134
+ saturation=0.2,
135
+ hue=0.02,
136
+ p=0.8,
137
+ ),
138
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
139
+ dict(
140
+ type="GridSample",
141
+ grid_size=0.025,
142
+ hash_type="fnv",
143
+ mode="train",
144
+ keys=("origin_coord", "coord", "color"),
145
+ return_grid_coord=True,
146
+ ),
147
+ dict(type="NormalizeColor"),
148
+ dict(type="ToTensor"),
149
+ dict(
150
+ type="Collect",
151
+ keys=("origin_coord", "grid_coord", "coord", "color"),
152
+ offset_keys_dict=dict(offset="coord"),
153
+ feat_keys=["color"],
154
+ ),
155
+ ],
156
+ test_mode=False,
157
+ ),
158
+ )
159
+
160
+ hooks = [
161
+ dict(type="CheckpointLoader"),
162
+ dict(type="IterationTimer", warmup_iter=2),
163
+ dict(type="InformationWriter"),
164
+ dict(type="CheckpointSaver", save_freq=None),
165
+ ]
Pointcept/configs/scannet/semseg-cac-v1m1-0-spunet-base.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ mix_prob = 0.8
6
+ empty_cache = False
7
+ enable_amp = True
8
+
9
+ # model settings
10
+ model = dict(
11
+ type="CAC-v1m1",
12
+ backbone=dict(
13
+ type="SpUNet-v1m1",
14
+ in_channels=6,
15
+ num_classes=0,
16
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
17
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
18
+ ),
19
+ criteria=[
20
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
21
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
22
+ ],
23
+ num_classes=20,
24
+ backbone_out_channels=96,
25
+ cos_temp=15,
26
+ main_weight=1,
27
+ pre_weight=1,
28
+ pre_self_weight=1,
29
+ kl_weight=1,
30
+ conf_thresh=0.75,
31
+ detach_pre_logits=True,
32
+ )
33
+
34
+ # scheduler settings
35
+ epoch = 800
36
+ optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
37
+ scheduler = dict(
38
+ type="OneCycleLR",
39
+ max_lr=optimizer["lr"],
40
+ pct_start=0.05,
41
+ anneal_strategy="cos",
42
+ div_factor=10.0,
43
+ final_div_factor=10000.0,
44
+ )
45
+
46
+ # dataset settings
47
+ dataset_type = "ScanNetDataset"
48
+ data_root = "data/scannet"
49
+
50
+ data = dict(
51
+ num_classes=20,
52
+ ignore_index=-1,
53
+ names=[
54
+ "wall",
55
+ "floor",
56
+ "cabinet",
57
+ "bed",
58
+ "chair",
59
+ "sofa",
60
+ "table",
61
+ "door",
62
+ "window",
63
+ "bookshelf",
64
+ "picture",
65
+ "counter",
66
+ "desk",
67
+ "curtain",
68
+ "refridgerator",
69
+ "shower curtain",
70
+ "toilet",
71
+ "sink",
72
+ "bathtub",
73
+ "otherfurniture",
74
+ ],
75
+ train=dict(
76
+ type=dataset_type,
77
+ split="train",
78
+ data_root=data_root,
79
+ transform=[
80
+ dict(type="CenterShift", apply_z=True),
81
+ dict(
82
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
83
+ ),
84
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
85
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
86
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
87
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
88
+ dict(type="RandomScale", scale=[0.9, 1.1]),
89
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
90
+ dict(type="RandomFlip", p=0.5),
91
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
92
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
93
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
94
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
95
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
96
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
97
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
98
+ dict(
99
+ type="GridSample",
100
+ grid_size=0.02,
101
+ hash_type="fnv",
102
+ mode="train",
103
+ return_grid_coord=True,
104
+ ),
105
+ dict(type="SphereCrop", point_max=100000, mode="random"),
106
+ dict(type="CenterShift", apply_z=False),
107
+ dict(type="NormalizeColor"),
108
+ dict(type="ShufflePoint"),
109
+ dict(type="ToTensor"),
110
+ dict(
111
+ type="Collect",
112
+ keys=("coord", "grid_coord", "segment"),
113
+ feat_keys=("color", "normal"),
114
+ ),
115
+ ],
116
+ test_mode=False,
117
+ ),
118
+ val=dict(
119
+ type=dataset_type,
120
+ split="val",
121
+ data_root=data_root,
122
+ transform=[
123
+ dict(type="CenterShift", apply_z=True),
124
+ dict(
125
+ type="GridSample",
126
+ grid_size=0.02,
127
+ hash_type="fnv",
128
+ mode="train",
129
+ return_grid_coord=True,
130
+ ),
131
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
132
+ dict(type="CenterShift", apply_z=False),
133
+ dict(type="NormalizeColor"),
134
+ dict(type="ToTensor"),
135
+ dict(
136
+ type="Collect",
137
+ keys=("coord", "grid_coord", "segment"),
138
+ feat_keys=("color", "normal"),
139
+ ),
140
+ ],
141
+ test_mode=False,
142
+ ),
143
+ test=dict(
144
+ type=dataset_type,
145
+ split="val",
146
+ data_root=data_root,
147
+ transform=[
148
+ dict(type="CenterShift", apply_z=True),
149
+ dict(type="NormalizeColor"),
150
+ ],
151
+ test_mode=True,
152
+ test_cfg=dict(
153
+ voxelize=dict(
154
+ type="GridSample",
155
+ grid_size=0.02,
156
+ hash_type="fnv",
157
+ mode="test",
158
+ return_grid_coord=True,
159
+ keys=("coord", "color", "normal"),
160
+ ),
161
+ crop=None,
162
+ post_transform=[
163
+ dict(type="CenterShift", apply_z=False),
164
+ dict(type="ToTensor"),
165
+ dict(
166
+ type="Collect",
167
+ keys=("coord", "grid_coord", "index"),
168
+ feat_keys=("color", "normal"),
169
+ ),
170
+ ],
171
+ aug_transform=[
172
+ [
173
+ dict(
174
+ type="RandomRotateTargetAngle",
175
+ angle=[0],
176
+ axis="z",
177
+ center=[0, 0, 0],
178
+ p=1,
179
+ )
180
+ ],
181
+ [
182
+ dict(
183
+ type="RandomRotateTargetAngle",
184
+ angle=[1 / 2],
185
+ axis="z",
186
+ center=[0, 0, 0],
187
+ p=1,
188
+ )
189
+ ],
190
+ [
191
+ dict(
192
+ type="RandomRotateTargetAngle",
193
+ angle=[1],
194
+ axis="z",
195
+ center=[0, 0, 0],
196
+ p=1,
197
+ )
198
+ ],
199
+ [
200
+ dict(
201
+ type="RandomRotateTargetAngle",
202
+ angle=[3 / 2],
203
+ axis="z",
204
+ center=[0, 0, 0],
205
+ p=1,
206
+ )
207
+ ],
208
+ [
209
+ dict(
210
+ type="RandomRotateTargetAngle",
211
+ angle=[0],
212
+ axis="z",
213
+ center=[0, 0, 0],
214
+ p=1,
215
+ ),
216
+ dict(type="RandomScale", scale=[0.95, 0.95]),
217
+ ],
218
+ [
219
+ dict(
220
+ type="RandomRotateTargetAngle",
221
+ angle=[1 / 2],
222
+ axis="z",
223
+ center=[0, 0, 0],
224
+ p=1,
225
+ ),
226
+ dict(type="RandomScale", scale=[0.95, 0.95]),
227
+ ],
228
+ [
229
+ dict(
230
+ type="RandomRotateTargetAngle",
231
+ angle=[1],
232
+ axis="z",
233
+ center=[0, 0, 0],
234
+ p=1,
235
+ ),
236
+ dict(type="RandomScale", scale=[0.95, 0.95]),
237
+ ],
238
+ [
239
+ dict(
240
+ type="RandomRotateTargetAngle",
241
+ angle=[3 / 2],
242
+ axis="z",
243
+ center=[0, 0, 0],
244
+ p=1,
245
+ ),
246
+ dict(type="RandomScale", scale=[0.95, 0.95]),
247
+ ],
248
+ [
249
+ dict(
250
+ type="RandomRotateTargetAngle",
251
+ angle=[0],
252
+ axis="z",
253
+ center=[0, 0, 0],
254
+ p=1,
255
+ ),
256
+ dict(type="RandomScale", scale=[1.05, 1.05]),
257
+ ],
258
+ [
259
+ dict(
260
+ type="RandomRotateTargetAngle",
261
+ angle=[1 / 2],
262
+ axis="z",
263
+ center=[0, 0, 0],
264
+ p=1,
265
+ ),
266
+ dict(type="RandomScale", scale=[1.05, 1.05]),
267
+ ],
268
+ [
269
+ dict(
270
+ type="RandomRotateTargetAngle",
271
+ angle=[1],
272
+ axis="z",
273
+ center=[0, 0, 0],
274
+ p=1,
275
+ ),
276
+ dict(type="RandomScale", scale=[1.05, 1.05]),
277
+ ],
278
+ [
279
+ dict(
280
+ type="RandomRotateTargetAngle",
281
+ angle=[3 / 2],
282
+ axis="z",
283
+ center=[0, 0, 0],
284
+ p=1,
285
+ ),
286
+ dict(type="RandomScale", scale=[1.05, 1.05]),
287
+ ],
288
+ [dict(type="RandomFlip", p=1)],
289
+ ],
290
+ ),
291
+ ),
292
+ )
Pointcept/configs/scannet/semseg-cac-v1m1-1-spunet-lovasz.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ mix_prob = 0.8
6
+ empty_cache = False
7
+ enable_amp = True
8
+
9
+ # model settings
10
+ model = dict(
11
+ type="CAC-v1m1",
12
+ backbone=dict(
13
+ type="SpUNet-v1m1",
14
+ in_channels=6,
15
+ num_classes=0,
16
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
17
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
18
+ ),
19
+ criteria=[
20
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
21
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
22
+ ],
23
+ num_classes=20,
24
+ backbone_out_channels=96,
25
+ cos_temp=15,
26
+ main_weight=1,
27
+ pre_weight=1,
28
+ pre_self_weight=1,
29
+ kl_weight=1,
30
+ conf_thresh=0.75,
31
+ detach_pre_logits=True,
32
+ )
33
+
34
+ # scheduler settings
35
+ epoch = 800
36
+ optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
37
+ scheduler = dict(
38
+ type="OneCycleLR",
39
+ max_lr=optimizer["lr"],
40
+ pct_start=0.05,
41
+ anneal_strategy="cos",
42
+ div_factor=10.0,
43
+ final_div_factor=10000.0,
44
+ )
45
+
46
+ # dataset settings
47
+ dataset_type = "ScanNetDataset"
48
+ data_root = "data/scannet"
49
+
50
+ data = dict(
51
+ num_classes=20,
52
+ ignore_index=-1,
53
+ names=[
54
+ "wall",
55
+ "floor",
56
+ "cabinet",
57
+ "bed",
58
+ "chair",
59
+ "sofa",
60
+ "table",
61
+ "door",
62
+ "window",
63
+ "bookshelf",
64
+ "picture",
65
+ "counter",
66
+ "desk",
67
+ "curtain",
68
+ "refridgerator",
69
+ "shower curtain",
70
+ "toilet",
71
+ "sink",
72
+ "bathtub",
73
+ "otherfurniture",
74
+ ],
75
+ train=dict(
76
+ type=dataset_type,
77
+ split="train",
78
+ data_root=data_root,
79
+ transform=[
80
+ dict(type="CenterShift", apply_z=True),
81
+ dict(
82
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
83
+ ),
84
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
85
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
86
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
87
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
88
+ dict(type="RandomScale", scale=[0.9, 1.1]),
89
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
90
+ dict(type="RandomFlip", p=0.5),
91
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
92
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
93
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
94
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
95
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
96
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
97
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
98
+ dict(
99
+ type="GridSample",
100
+ grid_size=0.02,
101
+ hash_type="fnv",
102
+ mode="train",
103
+ return_grid_coord=True,
104
+ ),
105
+ dict(type="SphereCrop", point_max=100000, mode="random"),
106
+ dict(type="CenterShift", apply_z=False),
107
+ dict(type="NormalizeColor"),
108
+ dict(type="ShufflePoint"),
109
+ dict(type="ToTensor"),
110
+ dict(
111
+ type="Collect",
112
+ keys=("coord", "grid_coord", "segment"),
113
+ feat_keys=("color", "normal"),
114
+ ),
115
+ ],
116
+ test_mode=False,
117
+ ),
118
+ val=dict(
119
+ type=dataset_type,
120
+ split="val",
121
+ data_root=data_root,
122
+ transform=[
123
+ dict(type="CenterShift", apply_z=True),
124
+ dict(
125
+ type="GridSample",
126
+ grid_size=0.02,
127
+ hash_type="fnv",
128
+ mode="train",
129
+ return_grid_coord=True,
130
+ ),
131
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
132
+ dict(type="CenterShift", apply_z=False),
133
+ dict(type="NormalizeColor"),
134
+ dict(type="ToTensor"),
135
+ dict(
136
+ type="Collect",
137
+ keys=("coord", "grid_coord", "segment"),
138
+ feat_keys=("color", "normal"),
139
+ ),
140
+ ],
141
+ test_mode=False,
142
+ ),
143
+ test=dict(
144
+ type=dataset_type,
145
+ split="val",
146
+ data_root=data_root,
147
+ transform=[
148
+ dict(type="CenterShift", apply_z=True),
149
+ dict(type="NormalizeColor"),
150
+ ],
151
+ test_mode=True,
152
+ test_cfg=dict(
153
+ voxelize=dict(
154
+ type="GridSample",
155
+ grid_size=0.02,
156
+ hash_type="fnv",
157
+ mode="test",
158
+ return_grid_coord=True,
159
+ keys=("coord", "color", "normal"),
160
+ ),
161
+ crop=None,
162
+ post_transform=[
163
+ dict(type="CenterShift", apply_z=False),
164
+ dict(type="ToTensor"),
165
+ dict(
166
+ type="Collect",
167
+ keys=("coord", "grid_coord", "index"),
168
+ feat_keys=("color", "normal"),
169
+ ),
170
+ ],
171
+ aug_transform=[
172
+ [
173
+ dict(
174
+ type="RandomRotateTargetAngle",
175
+ angle=[0],
176
+ axis="z",
177
+ center=[0, 0, 0],
178
+ p=1,
179
+ )
180
+ ],
181
+ [
182
+ dict(
183
+ type="RandomRotateTargetAngle",
184
+ angle=[1 / 2],
185
+ axis="z",
186
+ center=[0, 0, 0],
187
+ p=1,
188
+ )
189
+ ],
190
+ [
191
+ dict(
192
+ type="RandomRotateTargetAngle",
193
+ angle=[1],
194
+ axis="z",
195
+ center=[0, 0, 0],
196
+ p=1,
197
+ )
198
+ ],
199
+ [
200
+ dict(
201
+ type="RandomRotateTargetAngle",
202
+ angle=[3 / 2],
203
+ axis="z",
204
+ center=[0, 0, 0],
205
+ p=1,
206
+ )
207
+ ],
208
+ [
209
+ dict(
210
+ type="RandomRotateTargetAngle",
211
+ angle=[0],
212
+ axis="z",
213
+ center=[0, 0, 0],
214
+ p=1,
215
+ ),
216
+ dict(type="RandomScale", scale=[0.95, 0.95]),
217
+ ],
218
+ [
219
+ dict(
220
+ type="RandomRotateTargetAngle",
221
+ angle=[1 / 2],
222
+ axis="z",
223
+ center=[0, 0, 0],
224
+ p=1,
225
+ ),
226
+ dict(type="RandomScale", scale=[0.95, 0.95]),
227
+ ],
228
+ [
229
+ dict(
230
+ type="RandomRotateTargetAngle",
231
+ angle=[1],
232
+ axis="z",
233
+ center=[0, 0, 0],
234
+ p=1,
235
+ ),
236
+ dict(type="RandomScale", scale=[0.95, 0.95]),
237
+ ],
238
+ [
239
+ dict(
240
+ type="RandomRotateTargetAngle",
241
+ angle=[3 / 2],
242
+ axis="z",
243
+ center=[0, 0, 0],
244
+ p=1,
245
+ ),
246
+ dict(type="RandomScale", scale=[0.95, 0.95]),
247
+ ],
248
+ [
249
+ dict(
250
+ type="RandomRotateTargetAngle",
251
+ angle=[0],
252
+ axis="z",
253
+ center=[0, 0, 0],
254
+ p=1,
255
+ ),
256
+ dict(type="RandomScale", scale=[1.05, 1.05]),
257
+ ],
258
+ [
259
+ dict(
260
+ type="RandomRotateTargetAngle",
261
+ angle=[1 / 2],
262
+ axis="z",
263
+ center=[0, 0, 0],
264
+ p=1,
265
+ ),
266
+ dict(type="RandomScale", scale=[1.05, 1.05]),
267
+ ],
268
+ [
269
+ dict(
270
+ type="RandomRotateTargetAngle",
271
+ angle=[1],
272
+ axis="z",
273
+ center=[0, 0, 0],
274
+ p=1,
275
+ ),
276
+ dict(type="RandomScale", scale=[1.05, 1.05]),
277
+ ],
278
+ [
279
+ dict(
280
+ type="RandomRotateTargetAngle",
281
+ angle=[3 / 2],
282
+ axis="z",
283
+ center=[0, 0, 0],
284
+ p=1,
285
+ ),
286
+ dict(type="RandomScale", scale=[1.05, 1.05]),
287
+ ],
288
+ [dict(type="RandomFlip", p=1)],
289
+ ],
290
+ ),
291
+ ),
292
+ )
Pointcept/configs/scannet/semseg-cac-v1m1-2-ptv2-lovasz.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ mix_prob = 0.8
6
+ empty_cache = False
7
+ enable_amp = True
8
+
9
+ # model settings
10
+ model = dict(
11
+ type="CAC-v1m1",
12
+ backbone=dict(
13
+ type="PT-v2m2",
14
+ in_channels=9,
15
+ num_classes=0,
16
+ patch_embed_depth=1,
17
+ patch_embed_channels=48,
18
+ patch_embed_groups=6,
19
+ patch_embed_neighbours=8,
20
+ enc_depths=(2, 2, 6, 2),
21
+ enc_channels=(96, 192, 384, 512),
22
+ enc_groups=(12, 24, 48, 64),
23
+ enc_neighbours=(16, 16, 16, 16),
24
+ dec_depths=(1, 1, 1, 1),
25
+ dec_channels=(48, 96, 192, 384),
26
+ dec_groups=(6, 12, 24, 48),
27
+ dec_neighbours=(16, 16, 16, 16),
28
+ grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5
29
+ attn_qkv_bias=True,
30
+ pe_multiplier=False,
31
+ pe_bias=True,
32
+ attn_drop_rate=0.0,
33
+ drop_path_rate=0.3,
34
+ enable_checkpoint=False,
35
+ unpool_backend="map", # map / interp
36
+ ),
37
+ criteria=[
38
+ dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
39
+ dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
40
+ ],
41
+ num_classes=20,
42
+ backbone_out_channels=48,
43
+ cos_temp=15,
44
+ main_weight=1,
45
+ pre_weight=1,
46
+ pre_self_weight=1,
47
+ kl_weight=1,
48
+ conf_thresh=0.75,
49
+ detach_pre_logits=True,
50
+ )
51
+
52
+ # scheduler settings
53
+ epoch = 900
54
+ optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02)
55
+ scheduler = dict(
56
+ type="OneCycleLR",
57
+ max_lr=optimizer["lr"],
58
+ pct_start=0.05,
59
+ anneal_strategy="cos",
60
+ div_factor=10.0,
61
+ final_div_factor=1000.0,
62
+ )
63
+
64
+ # dataset settings
65
+ dataset_type = "ScanNetDataset"
66
+ data_root = "data/scannet"
67
+
68
+ data = dict(
69
+ num_classes=20,
70
+ ignore_index=-1,
71
+ names=[
72
+ "wall",
73
+ "floor",
74
+ "cabinet",
75
+ "bed",
76
+ "chair",
77
+ "sofa",
78
+ "table",
79
+ "door",
80
+ "window",
81
+ "bookshelf",
82
+ "picture",
83
+ "counter",
84
+ "desk",
85
+ "curtain",
86
+ "refridgerator",
87
+ "shower curtain",
88
+ "toilet",
89
+ "sink",
90
+ "bathtub",
91
+ "otherfurniture",
92
+ ],
93
+ train=dict(
94
+ type=dataset_type,
95
+ split="train",
96
+ data_root=data_root,
97
+ transform=[
98
+ dict(type="CenterShift", apply_z=True),
99
+ dict(
100
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
101
+ ),
102
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
103
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
104
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
105
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
106
+ dict(type="RandomScale", scale=[0.9, 1.1]),
107
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
108
+ dict(type="RandomFlip", p=0.5),
109
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
110
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
111
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
112
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
113
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
114
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
115
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
116
+ dict(
117
+ type="GridSample",
118
+ grid_size=0.02,
119
+ hash_type="fnv",
120
+ mode="train",
121
+ return_min_coord=True,
122
+ ),
123
+ dict(type="SphereCrop", point_max=100000, mode="random"),
124
+ dict(type="CenterShift", apply_z=False),
125
+ dict(type="NormalizeColor"),
126
+ dict(type="ShufflePoint"),
127
+ dict(type="ToTensor"),
128
+ dict(
129
+ type="Collect",
130
+ keys=("coord", "segment"),
131
+ feat_keys=("coord", "color", "normal"),
132
+ ),
133
+ ],
134
+ test_mode=False,
135
+ ),
136
+ val=dict(
137
+ type=dataset_type,
138
+ split="val",
139
+ data_root=data_root,
140
+ transform=[
141
+ dict(type="CenterShift", apply_z=True),
142
+ dict(
143
+ type="GridSample",
144
+ grid_size=0.02,
145
+ hash_type="fnv",
146
+ mode="train",
147
+ return_min_coord=True,
148
+ ),
149
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
150
+ dict(type="CenterShift", apply_z=False),
151
+ dict(type="NormalizeColor"),
152
+ dict(type="ToTensor"),
153
+ dict(
154
+ type="Collect",
155
+ keys=("coord", "segment"),
156
+ feat_keys=("coord", "color", "normal"),
157
+ ),
158
+ ],
159
+ test_mode=False,
160
+ ),
161
+ test=dict(
162
+ type=dataset_type,
163
+ split="val",
164
+ data_root=data_root,
165
+ transform=[
166
+ dict(type="CenterShift", apply_z=True),
167
+ dict(type="NormalizeColor"),
168
+ ],
169
+ test_mode=True,
170
+ test_cfg=dict(
171
+ voxelize=dict(
172
+ type="GridSample",
173
+ grid_size=0.02,
174
+ hash_type="fnv",
175
+ mode="test",
176
+ keys=("coord", "color", "normal"),
177
+ ),
178
+ crop=None,
179
+ post_transform=[
180
+ dict(type="CenterShift", apply_z=False),
181
+ dict(type="ToTensor"),
182
+ dict(
183
+ type="Collect",
184
+ keys=("coord", "index"),
185
+ feat_keys=("coord", "color", "normal"),
186
+ ),
187
+ ],
188
+ aug_transform=[
189
+ [
190
+ dict(
191
+ type="RandomRotateTargetAngle",
192
+ angle=[0],
193
+ axis="z",
194
+ center=[0, 0, 0],
195
+ p=1,
196
+ )
197
+ ],
198
+ [
199
+ dict(
200
+ type="RandomRotateTargetAngle",
201
+ angle=[1 / 2],
202
+ axis="z",
203
+ center=[0, 0, 0],
204
+ p=1,
205
+ )
206
+ ],
207
+ [
208
+ dict(
209
+ type="RandomRotateTargetAngle",
210
+ angle=[1],
211
+ axis="z",
212
+ center=[0, 0, 0],
213
+ p=1,
214
+ )
215
+ ],
216
+ [
217
+ dict(
218
+ type="RandomRotateTargetAngle",
219
+ angle=[3 / 2],
220
+ axis="z",
221
+ center=[0, 0, 0],
222
+ p=1,
223
+ )
224
+ ],
225
+ [
226
+ dict(
227
+ type="RandomRotateTargetAngle",
228
+ angle=[0],
229
+ axis="z",
230
+ center=[0, 0, 0],
231
+ p=1,
232
+ ),
233
+ dict(type="RandomScale", scale=[0.95, 0.95]),
234
+ ],
235
+ [
236
+ dict(
237
+ type="RandomRotateTargetAngle",
238
+ angle=[1 / 2],
239
+ axis="z",
240
+ center=[0, 0, 0],
241
+ p=1,
242
+ ),
243
+ dict(type="RandomScale", scale=[0.95, 0.95]),
244
+ ],
245
+ [
246
+ dict(
247
+ type="RandomRotateTargetAngle",
248
+ angle=[1],
249
+ axis="z",
250
+ center=[0, 0, 0],
251
+ p=1,
252
+ ),
253
+ dict(type="RandomScale", scale=[0.95, 0.95]),
254
+ ],
255
+ [
256
+ dict(
257
+ type="RandomRotateTargetAngle",
258
+ angle=[3 / 2],
259
+ axis="z",
260
+ center=[0, 0, 0],
261
+ p=1,
262
+ ),
263
+ dict(type="RandomScale", scale=[0.95, 0.95]),
264
+ ],
265
+ [
266
+ dict(
267
+ type="RandomRotateTargetAngle",
268
+ angle=[0],
269
+ axis="z",
270
+ center=[0, 0, 0],
271
+ p=1,
272
+ ),
273
+ dict(type="RandomScale", scale=[1.05, 1.05]),
274
+ ],
275
+ [
276
+ dict(
277
+ type="RandomRotateTargetAngle",
278
+ angle=[1 / 2],
279
+ axis="z",
280
+ center=[0, 0, 0],
281
+ p=1,
282
+ ),
283
+ dict(type="RandomScale", scale=[1.05, 1.05]),
284
+ ],
285
+ [
286
+ dict(
287
+ type="RandomRotateTargetAngle",
288
+ angle=[1],
289
+ axis="z",
290
+ center=[0, 0, 0],
291
+ p=1,
292
+ ),
293
+ dict(type="RandomScale", scale=[1.05, 1.05]),
294
+ ],
295
+ [
296
+ dict(
297
+ type="RandomRotateTargetAngle",
298
+ angle=[3 / 2],
299
+ axis="z",
300
+ center=[0, 0, 0],
301
+ p=1,
302
+ ),
303
+ dict(type="RandomScale", scale=[1.05, 1.05]),
304
+ ],
305
+ [dict(type="RandomFlip", p=1)],
306
+ ],
307
+ ),
308
+ ),
309
+ )
Pointcept/configs/scannet/semseg-minkunet34c-0-base.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ mix_prob = 0.8
6
+ empty_cache = False
7
+ enable_amp = True
8
+
9
+ # model settings
10
+ model = dict(
11
+ type="DefaultSegmentor",
12
+ backbone=dict(type="MinkUNet34C", in_channels=9, out_channels=20),
13
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
14
+ )
15
+
16
+ # scheduler settings
17
+ epoch = 600
18
+ optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
19
+ scheduler = dict(
20
+ type="OneCycleLR",
21
+ max_lr=optimizer["lr"],
22
+ pct_start=0.05,
23
+ anneal_strategy="cos",
24
+ div_factor=10.0,
25
+ final_div_factor=10000.0,
26
+ )
27
+
28
+ # dataset settings
29
+ dataset_type = "ScanNetDataset"
30
+ data_root = "data/scannet"
31
+
32
+ data = dict(
33
+ num_classes=20,
34
+ ignore_index=-1,
35
+ names=[
36
+ "wall",
37
+ "floor",
38
+ "cabinet",
39
+ "bed",
40
+ "chair",
41
+ "sofa",
42
+ "table",
43
+ "door",
44
+ "window",
45
+ "bookshelf",
46
+ "picture",
47
+ "counter",
48
+ "desk",
49
+ "curtain",
50
+ "refridgerator",
51
+ "shower curtain",
52
+ "toilet",
53
+ "sink",
54
+ "bathtub",
55
+ "otherfurniture",
56
+ ],
57
+ train=dict(
58
+ type=dataset_type,
59
+ split="train",
60
+ data_root=data_root,
61
+ transform=[
62
+ dict(type="CenterShift", apply_z=True),
63
+ dict(
64
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
65
+ ),
66
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
67
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
68
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
69
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
70
+ dict(type="RandomScale", scale=[0.9, 1.1]),
71
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
72
+ dict(type="RandomFlip", p=0.5),
73
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
74
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
75
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
76
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
77
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
78
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
79
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
80
+ dict(
81
+ type="GridSample",
82
+ grid_size=0.02,
83
+ hash_type="fnv",
84
+ mode="train",
85
+ return_grid_coord=True,
86
+ ),
87
+ # dict(type="SphereCrop", point_max=100000, mode="random"),
88
+ dict(type="CenterShift", apply_z=False),
89
+ dict(type="NormalizeColor"),
90
+ dict(type="ShufflePoint"),
91
+ dict(type="ToTensor"),
92
+ dict(
93
+ type="Collect",
94
+ keys=("coord", "grid_coord", "segment"),
95
+ feat_keys=("coord", "color", "normal"),
96
+ ),
97
+ ],
98
+ test_mode=False,
99
+ ),
100
+ val=dict(
101
+ type=dataset_type,
102
+ split="val",
103
+ data_root=data_root,
104
+ transform=[
105
+ dict(type="CenterShift", apply_z=True),
106
+ dict(
107
+ type="GridSample",
108
+ grid_size=0.02,
109
+ hash_type="fnv",
110
+ mode="train",
111
+ return_grid_coord=True,
112
+ ),
113
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
114
+ dict(type="CenterShift", apply_z=False),
115
+ dict(type="NormalizeColor"),
116
+ dict(type="ToTensor"),
117
+ dict(
118
+ type="Collect",
119
+ keys=("coord", "grid_coord", "segment"),
120
+ feat_keys=("coord", "color", "normal"),
121
+ ),
122
+ ],
123
+ test_mode=False,
124
+ ),
125
+ test=dict(
126
+ type=dataset_type,
127
+ split="val",
128
+ data_root=data_root,
129
+ transform=[
130
+ dict(type="CenterShift", apply_z=True),
131
+ dict(type="NormalizeColor"),
132
+ ],
133
+ test_mode=True,
134
+ test_cfg=dict(
135
+ voxelize=dict(
136
+ type="GridSample",
137
+ grid_size=0.02,
138
+ hash_type="fnv",
139
+ mode="test",
140
+ return_grid_coord=True,
141
+ keys=("coord", "color", "normal"),
142
+ ),
143
+ crop=None,
144
+ post_transform=[
145
+ dict(type="CenterShift", apply_z=False),
146
+ dict(type="ToTensor"),
147
+ dict(
148
+ type="Collect",
149
+ keys=("coord", "grid_coord", "index"),
150
+ feat_keys=("coord", "color", "normal"),
151
+ ),
152
+ ],
153
+ aug_transform=[
154
+ [
155
+ dict(
156
+ type="RandomRotateTargetAngle",
157
+ angle=[0],
158
+ axis="z",
159
+ center=[0, 0, 0],
160
+ p=1,
161
+ )
162
+ ],
163
+ [
164
+ dict(
165
+ type="RandomRotateTargetAngle",
166
+ angle=[1 / 2],
167
+ axis="z",
168
+ center=[0, 0, 0],
169
+ p=1,
170
+ )
171
+ ],
172
+ [
173
+ dict(
174
+ type="RandomRotateTargetAngle",
175
+ angle=[1],
176
+ axis="z",
177
+ center=[0, 0, 0],
178
+ p=1,
179
+ )
180
+ ],
181
+ [
182
+ dict(
183
+ type="RandomRotateTargetAngle",
184
+ angle=[3 / 2],
185
+ axis="z",
186
+ center=[0, 0, 0],
187
+ p=1,
188
+ )
189
+ ],
190
+ ],
191
+ ),
192
+ ),
193
+ )
Pointcept/configs/scannet/semseg-oacnns-v1m1-0-base.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ mix_prob = 0.8
6
+ empty_cache = False
7
+ enable_amp = True
8
+ sync_bn = True
9
+
10
+ # model settings
11
+ model = dict(
12
+ type="DefaultSegmentor",
13
+ backbone=dict(
14
+ type="OACNNs",
15
+ in_channels=9,
16
+ num_classes=20,
17
+ embed_channels=64,
18
+ enc_channels=[64, 64, 128, 256],
19
+ groups=[4, 4, 8, 16],
20
+ enc_depth=[3, 3, 9, 8],
21
+ dec_channels=[256, 256, 256, 256],
22
+ point_grid_size=[[8, 12, 16, 16], [6, 9, 12, 12], [4, 6, 8, 8], [3, 4, 6, 6]],
23
+ dec_depth=[2, 2, 2, 2],
24
+ enc_num_ref=[16, 16, 16, 16],
25
+ ),
26
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
27
+ )
28
+
29
+
30
+ epoch = 900
31
+ optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.02)
32
+ scheduler = dict(
33
+ type="OneCycleLR",
34
+ max_lr=optimizer["lr"],
35
+ pct_start=0.05,
36
+ anneal_strategy="cos",
37
+ div_factor=10.0,
38
+ final_div_factor=1000.0,
39
+ )
40
+
41
+ # dataset settings
42
+ dataset_type = "ScanNetDataset"
43
+ data_root = "data/scannet"
44
+
45
+ data = dict(
46
+ num_classes=20,
47
+ ignore_index=-1,
48
+ names=[
49
+ "wall",
50
+ "floor",
51
+ "cabinet",
52
+ "bed",
53
+ "chair",
54
+ "sofa",
55
+ "table",
56
+ "door",
57
+ "window",
58
+ "bookshelf",
59
+ "picture",
60
+ "counter",
61
+ "desk",
62
+ "curtain",
63
+ "refridgerator",
64
+ "shower curtain",
65
+ "toilet",
66
+ "sink",
67
+ "bathtub",
68
+ "otherfurniture",
69
+ ],
70
+ train=dict(
71
+ type=dataset_type,
72
+ split="train",
73
+ data_root=data_root,
74
+ transform=[
75
+ dict(type="CenterShift", apply_z=True),
76
+ dict(
77
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
78
+ ),
79
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
80
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
81
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
82
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
83
+ dict(type="RandomScale", scale=[0.9, 1.1]),
84
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
85
+ dict(type="RandomFlip", p=0.5),
86
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
87
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
88
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
89
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
90
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
91
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
92
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
93
+ dict(
94
+ type="GridSample",
95
+ grid_size=0.02,
96
+ hash_type="fnv",
97
+ mode="train",
98
+ return_grid_coord=True,
99
+ return_min_coord=True,
100
+ ),
101
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
102
+ dict(type="SphereCrop", point_max=100000, mode="random"),
103
+ dict(type="CenterShift", apply_z=False),
104
+ dict(type="NormalizeColor"),
105
+ dict(type="ShufflePoint"),
106
+ dict(type="ToTensor"),
107
+ dict(
108
+ type="Collect",
109
+ keys=("coord", "grid_coord", "segment"),
110
+ feat_keys=("coord", "normal", "color"),
111
+ ),
112
+ ],
113
+ test_mode=False,
114
+ ),
115
+ val=dict(
116
+ type=dataset_type,
117
+ split="val",
118
+ data_root=data_root,
119
+ transform=[
120
+ dict(type="CenterShift", apply_z=True),
121
+ dict(
122
+ type="GridSample",
123
+ grid_size=0.02,
124
+ hash_type="fnv",
125
+ mode="train",
126
+ return_grid_coord=True,
127
+ return_min_coord=True,
128
+ ),
129
+ # dict(type="SphereCrop", point_max=1000000, mode='center'),
130
+ dict(type="CenterShift", apply_z=False),
131
+ dict(type="NormalizeColor"),
132
+ dict(type="ToTensor"),
133
+ dict(
134
+ type="Collect",
135
+ keys=("coord", "grid_coord", "segment"),
136
+ feat_keys=("coord", "normal", "color"),
137
+ ),
138
+ ],
139
+ test_mode=False,
140
+ ),
141
+ test=dict(
142
+ type=dataset_type,
143
+ split="val",
144
+ data_root=data_root,
145
+ transform=[
146
+ dict(type="CenterShift", apply_z=True),
147
+ dict(type="NormalizeColor"),
148
+ ],
149
+ test_mode=True,
150
+ test_cfg=dict(
151
+ voxelize=dict(
152
+ type="GridSample",
153
+ grid_size=0.02,
154
+ hash_type="fnv",
155
+ mode="test",
156
+ return_grid_coord=True,
157
+ keys=("coord", "normal", "color"),
158
+ ),
159
+ crop=None,
160
+ post_transform=[
161
+ dict(type="CenterShift", apply_z=False),
162
+ dict(type="ToTensor"),
163
+ dict(
164
+ type="Collect",
165
+ keys=("coord", "grid_coord", "index"),
166
+ feat_keys=("coord", "normal", "color"),
167
+ ),
168
+ ],
169
+ aug_transform=[
170
+ [
171
+ dict(
172
+ type="RandomRotateTargetAngle",
173
+ angle=[0],
174
+ axis="z",
175
+ center=[0, 0, 0],
176
+ p=1,
177
+ )
178
+ ],
179
+ [
180
+ dict(
181
+ type="RandomRotateTargetAngle",
182
+ angle=[1 / 2],
183
+ axis="z",
184
+ center=[0, 0, 0],
185
+ p=1,
186
+ )
187
+ ],
188
+ [
189
+ dict(
190
+ type="RandomRotateTargetAngle",
191
+ angle=[1],
192
+ axis="z",
193
+ center=[0, 0, 0],
194
+ p=1,
195
+ )
196
+ ],
197
+ [
198
+ dict(
199
+ type="RandomRotateTargetAngle",
200
+ angle=[3 / 2],
201
+ axis="z",
202
+ center=[0, 0, 0],
203
+ p=1,
204
+ )
205
+ ],
206
+ [
207
+ dict(
208
+ type="RandomRotateTargetAngle",
209
+ angle=[0],
210
+ axis="z",
211
+ center=[0, 0, 0],
212
+ p=1,
213
+ ),
214
+ dict(type="RandomScale", scale=[0.95, 0.95]),
215
+ ],
216
+ [
217
+ dict(
218
+ type="RandomRotateTargetAngle",
219
+ angle=[1 / 2],
220
+ axis="z",
221
+ center=[0, 0, 0],
222
+ p=1,
223
+ ),
224
+ dict(type="RandomScale", scale=[0.95, 0.95]),
225
+ ],
226
+ [
227
+ dict(
228
+ type="RandomRotateTargetAngle",
229
+ angle=[1],
230
+ axis="z",
231
+ center=[0, 0, 0],
232
+ p=1,
233
+ ),
234
+ dict(type="RandomScale", scale=[0.95, 0.95]),
235
+ ],
236
+ [
237
+ dict(
238
+ type="RandomRotateTargetAngle",
239
+ angle=[3 / 2],
240
+ axis="z",
241
+ center=[0, 0, 0],
242
+ p=1,
243
+ ),
244
+ dict(type="RandomScale", scale=[0.95, 0.95]),
245
+ ],
246
+ [
247
+ dict(
248
+ type="RandomRotateTargetAngle",
249
+ angle=[0],
250
+ axis="z",
251
+ center=[0, 0, 0],
252
+ p=1,
253
+ ),
254
+ dict(type="RandomScale", scale=[1.05, 1.05]),
255
+ ],
256
+ [
257
+ dict(
258
+ type="RandomRotateTargetAngle",
259
+ angle=[1 / 2],
260
+ axis="z",
261
+ center=[0, 0, 0],
262
+ p=1,
263
+ ),
264
+ dict(type="RandomScale", scale=[1.05, 1.05]),
265
+ ],
266
+ [
267
+ dict(
268
+ type="RandomRotateTargetAngle",
269
+ angle=[1],
270
+ axis="z",
271
+ center=[0, 0, 0],
272
+ p=1,
273
+ ),
274
+ dict(type="RandomScale", scale=[1.05, 1.05]),
275
+ ],
276
+ [
277
+ dict(
278
+ type="RandomRotateTargetAngle",
279
+ angle=[3 / 2],
280
+ axis="z",
281
+ center=[0, 0, 0],
282
+ p=1,
283
+ ),
284
+ dict(type="RandomScale", scale=[1.05, 1.05]),
285
+ ],
286
+ [dict(type="RandomFlip", p=1)],
287
+ ],
288
+ ),
289
+ ),
290
+ )
Pointcept/configs/scannet/semseg-octformer-v1m1-0-base.py ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 12 # bs: total bs in all gpus
5
+ mix_prob = 0.8
6
+ empty_cache = False
7
+ enable_amp = False
8
+
9
+ # model settings
10
+ model = dict(
11
+ type="DefaultSegmentor",
12
+ backbone=dict(
13
+ type="OctFormer-v1m1",
14
+ in_channels=10,
15
+ num_classes=20,
16
+ fpn_channels=168,
17
+ channels=(96, 192, 384, 384),
18
+ num_blocks=(2, 2, 18, 2),
19
+ num_heads=(6, 12, 24, 24),
20
+ patch_size=26,
21
+ stem_down=2,
22
+ head_up=2,
23
+ dilation=4,
24
+ drop_path=0.5,
25
+ nempty=True,
26
+ octree_depth=11,
27
+ octree_full_depth=2,
28
+ ),
29
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
30
+ )
31
+
32
+ # scheduler settings
33
+ epoch = 600
34
+ optimizer = dict(type="AdamW", lr=0.0015, weight_decay=0.05)
35
+ scheduler = dict(
36
+ type="MultiStepWithWarmupLR",
37
+ milestones=[0.6, 0.9],
38
+ gamma=0.1,
39
+ warmup_rate=0.05,
40
+ warmup_scale=1e-5,
41
+ )
42
+ param_dicts = [dict(keyword="blocks", lr=0.00015)]
43
+
44
+ # dataset settings
45
+ dataset_type = "ScanNetDataset"
46
+ data_root = "data/scannet"
47
+
48
+ data = dict(
49
+ num_classes=20,
50
+ ignore_index=-1,
51
+ names=[
52
+ "wall",
53
+ "floor",
54
+ "cabinet",
55
+ "bed",
56
+ "chair",
57
+ "sofa",
58
+ "table",
59
+ "door",
60
+ "window",
61
+ "bookshelf",
62
+ "picture",
63
+ "counter",
64
+ "desk",
65
+ "curtain",
66
+ "refridgerator",
67
+ "shower curtain",
68
+ "toilet",
69
+ "sink",
70
+ "bathtub",
71
+ "otherfurniture",
72
+ ],
73
+ train=dict(
74
+ type=dataset_type,
75
+ split="train",
76
+ data_root=data_root,
77
+ transform=[
78
+ dict(type="CenterShift", apply_z=True),
79
+ dict(
80
+ type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
81
+ ),
82
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
83
+ dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
84
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
85
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
86
+ dict(type="RandomScale", scale=[0.9, 1.1]),
87
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
88
+ dict(type="RandomFlip", p=0.5),
89
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
90
+ dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
91
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
92
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.1),
93
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
94
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
95
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
96
+ dict(
97
+ type="GridSample",
98
+ grid_size=0.01,
99
+ hash_type="fnv",
100
+ mode="train",
101
+ return_min_coord=True,
102
+ return_displacement=True,
103
+ project_displacement=True,
104
+ ),
105
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
106
+ dict(type="SphereCrop", point_max=120000, mode="random"),
107
+ dict(type="CenterShift", apply_z=False),
108
+ dict(type="NormalizeColor"),
109
+ dict(type="ShufflePoint"),
110
+ dict(type="ToTensor"),
111
+ dict(
112
+ type="Collect",
113
+ keys=("coord", "normal", "segment"),
114
+ feat_keys=("coord", "color", "normal", "displacement"),
115
+ ),
116
+ ],
117
+ test_mode=False,
118
+ ),
119
+ val=dict(
120
+ type=dataset_type,
121
+ split="val",
122
+ data_root=data_root,
123
+ transform=[
124
+ dict(type="CenterShift", apply_z=True),
125
+ dict(
126
+ type="GridSample",
127
+ grid_size=0.01,
128
+ hash_type="fnv",
129
+ mode="train",
130
+ return_min_coord=True,
131
+ return_displacement=True,
132
+ project_displacement=True,
133
+ ),
134
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
135
+ dict(type="CenterShift", apply_z=False),
136
+ dict(type="NormalizeColor"),
137
+ dict(type="ToTensor"),
138
+ dict(
139
+ type="Collect",
140
+ keys=("coord", "normal", "segment"),
141
+ feat_keys=("coord", "color", "normal", "displacement"),
142
+ ),
143
+ ],
144
+ test_mode=False,
145
+ ),
146
+ test=dict(
147
+ type=dataset_type,
148
+ split="val",
149
+ data_root=data_root,
150
+ transform=[
151
+ dict(type="CenterShift", apply_z=True),
152
+ dict(type="NormalizeColor"),
153
+ ],
154
+ test_mode=True,
155
+ test_cfg=dict(
156
+ voxelize=dict(
157
+ type="GridSample",
158
+ grid_size=0.01,
159
+ hash_type="fnv",
160
+ mode="test",
161
+ keys=("coord", "color", "normal"),
162
+ return_displacement=True,
163
+ project_displacement=True,
164
+ ),
165
+ crop=None,
166
+ post_transform=[
167
+ dict(type="CenterShift", apply_z=False),
168
+ dict(type="ToTensor"),
169
+ dict(
170
+ type="Collect",
171
+ keys=("coord", "normal", "index"),
172
+ feat_keys=("coord", "color", "normal", "displacement"),
173
+ ),
174
+ ],
175
+ aug_transform=[
176
+ [
177
+ dict(
178
+ type="RandomRotateTargetAngle",
179
+ angle=[0],
180
+ axis="z",
181
+ center=[0, 0, 0],
182
+ p=1,
183
+ )
184
+ ],
185
+ [
186
+ dict(
187
+ type="RandomRotateTargetAngle",
188
+ angle=[1 / 2],
189
+ axis="z",
190
+ center=[0, 0, 0],
191
+ p=1,
192
+ )
193
+ ],
194
+ [
195
+ dict(
196
+ type="RandomRotateTargetAngle",
197
+ angle=[1],
198
+ axis="z",
199
+ center=[0, 0, 0],
200
+ p=1,
201
+ )
202
+ ],
203
+ [
204
+ dict(
205
+ type="RandomRotateTargetAngle",
206
+ angle=[3 / 2],
207
+ axis="z",
208
+ center=[0, 0, 0],
209
+ p=1,
210
+ )
211
+ ],
212
+ [
213
+ dict(
214
+ type="RandomRotateTargetAngle",
215
+ angle=[0],
216
+ axis="z",
217
+ center=[0, 0, 0],
218
+ p=1,
219
+ ),
220
+ dict(type="RandomScale", scale=[0.95, 0.95]),
221
+ ],
222
+ [
223
+ dict(
224
+ type="RandomRotateTargetAngle",
225
+ angle=[1 / 2],
226
+ axis="z",
227
+ center=[0, 0, 0],
228
+ p=1,
229
+ ),
230
+ dict(type="RandomScale", scale=[0.95, 0.95]),
231
+ ],
232
+ [
233
+ dict(
234
+ type="RandomRotateTargetAngle",
235
+ angle=[1],
236
+ axis="z",
237
+ center=[0, 0, 0],
238
+ p=1,
239
+ ),
240
+ dict(type="RandomScale", scale=[0.95, 0.95]),
241
+ ],
242
+ [
243
+ dict(
244
+ type="RandomRotateTargetAngle",
245
+ angle=[3 / 2],
246
+ axis="z",
247
+ center=[0, 0, 0],
248
+ p=1,
249
+ ),
250
+ dict(type="RandomScale", scale=[0.95, 0.95]),
251
+ ],
252
+ [
253
+ dict(
254
+ type="RandomRotateTargetAngle",
255
+ angle=[0],
256
+ axis="z",
257
+ center=[0, 0, 0],
258
+ p=1,
259
+ ),
260
+ dict(type="RandomScale", scale=[1.05, 1.05]),
261
+ ],
262
+ [
263
+ dict(
264
+ type="RandomRotateTargetAngle",
265
+ angle=[1 / 2],
266
+ axis="z",
267
+ center=[0, 0, 0],
268
+ p=1,
269
+ ),
270
+ dict(type="RandomScale", scale=[1.05, 1.05]),
271
+ ],
272
+ [
273
+ dict(
274
+ type="RandomRotateTargetAngle",
275
+ angle=[1],
276
+ axis="z",
277
+ center=[0, 0, 0],
278
+ p=1,
279
+ ),
280
+ dict(type="RandomScale", scale=[1.05, 1.05]),
281
+ ],
282
+ [
283
+ dict(
284
+ type="RandomRotateTargetAngle",
285
+ angle=[3 / 2],
286
+ axis="z",
287
+ center=[0, 0, 0],
288
+ p=1,
289
+ ),
290
+ dict(type="RandomScale", scale=[1.05, 1.05]),
291
+ ],
292
+ [dict(type="RandomFlip", p=1)],
293
+ ],
294
+ ),
295
+ ),
296
+ )
Pointcept/configs/scannet/semseg-ppt-v1m1-0-sc-st-spunet.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 24 # bs: total bs in all gpus
5
+ num_worker = 48
6
+ mix_prob = 0.8
7
+ empty_cache = False
8
+ enable_amp = True
9
+ find_unused_parameters = True
10
+
11
+ # trainer
12
+ train = dict(
13
+ type="MultiDatasetTrainer",
14
+ )
15
+
16
+ # model settings
17
+ model = dict(
18
+ type="PPT-v1m1",
19
+ backbone=dict(
20
+ type="SpUNet-v1m3",
21
+ in_channels=6,
22
+ num_classes=0,
23
+ base_channels=32,
24
+ context_channels=256,
25
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
26
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
27
+ cls_mode=False,
28
+ conditions=("ScanNet", "S3DIS", "Structured3D"),
29
+ zero_init=False,
30
+ norm_decouple=True,
31
+ norm_adaptive=True,
32
+ norm_affine=True,
33
+ ),
34
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
35
+ backbone_out_channels=96,
36
+ context_channels=256,
37
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
38
+ template="[x]",
39
+ clip_model="ViT-B/16",
40
+ # fmt: off
41
+ class_name=(
42
+ "wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door",
43
+ "window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain",
44
+ "dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand",
45
+ "toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column",
46
+ "clutter", "otherstructure", "otherfurniture", "otherprop",
47
+ ),
48
+ valid_index=(
49
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35),
50
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
51
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
52
+ ),
53
+ # fmt: on
54
+ backbone_mode=False,
55
+ )
56
+
57
+ # scheduler settings
58
+ epoch = 100
59
+ optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
60
+ scheduler = dict(
61
+ type="OneCycleLR",
62
+ max_lr=optimizer["lr"],
63
+ pct_start=0.05,
64
+ anneal_strategy="cos",
65
+ div_factor=10.0,
66
+ final_div_factor=10000.0,
67
+ )
68
+ # param_dicts = [dict(keyword="modulation", lr=0.005)]
69
+
70
+ # dataset settings
71
+ data = dict(
72
+ num_classes=20,
73
+ ignore_index=-1,
74
+ names=[
75
+ "wall",
76
+ "floor",
77
+ "cabinet",
78
+ "bed",
79
+ "chair",
80
+ "sofa",
81
+ "table",
82
+ "door",
83
+ "window",
84
+ "bookshelf",
85
+ "picture",
86
+ "counter",
87
+ "desk",
88
+ "curtain",
89
+ "refridgerator",
90
+ "shower curtain",
91
+ "toilet",
92
+ "sink",
93
+ "bathtub",
94
+ "otherfurniture",
95
+ ],
96
+ train=dict(
97
+ type="ConcatDataset",
98
+ datasets=[
99
+ # Structured3D
100
+ dict(
101
+ type="Structured3DDataset",
102
+ split="train",
103
+ data_root="data/structured3d",
104
+ transform=[
105
+ dict(type="CenterShift", apply_z=True),
106
+ dict(
107
+ type="RandomDropout",
108
+ dropout_ratio=0.2,
109
+ dropout_application_ratio=0.2,
110
+ ),
111
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
112
+ dict(
113
+ type="RandomRotate",
114
+ angle=[-1, 1],
115
+ axis="z",
116
+ center=[0, 0, 0],
117
+ p=0.5,
118
+ ),
119
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
120
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
121
+ dict(type="RandomScale", scale=[0.9, 1.1]),
122
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
123
+ dict(type="RandomFlip", p=0.5),
124
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
125
+ dict(
126
+ type="ElasticDistortion",
127
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
128
+ ),
129
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
130
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
131
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
132
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
133
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
134
+ dict(
135
+ type="GridSample",
136
+ grid_size=0.02,
137
+ hash_type="fnv",
138
+ mode="train",
139
+ return_grid_coord=True,
140
+ ),
141
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
142
+ dict(type="CenterShift", apply_z=False),
143
+ dict(type="NormalizeColor"),
144
+ dict(type="ShufflePoint"),
145
+ dict(type="Add", keys_dict={"condition": "Structured3D"}),
146
+ dict(type="ToTensor"),
147
+ dict(
148
+ type="Collect",
149
+ keys=("coord", "grid_coord", "segment", "condition"),
150
+ feat_keys=("color", "normal"),
151
+ ),
152
+ ],
153
+ test_mode=False,
154
+ loop=2, # sampling weight
155
+ ),
156
+ # ScanNet
157
+ dict(
158
+ type="ScanNetDataset",
159
+ split="train",
160
+ data_root="data/scannet",
161
+ transform=[
162
+ dict(type="CenterShift", apply_z=True),
163
+ dict(
164
+ type="RandomDropout",
165
+ dropout_ratio=0.2,
166
+ dropout_application_ratio=0.2,
167
+ ),
168
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
169
+ dict(
170
+ type="RandomRotate",
171
+ angle=[-1, 1],
172
+ axis="z",
173
+ center=[0, 0, 0],
174
+ p=0.5,
175
+ ),
176
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
177
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
178
+ dict(type="RandomScale", scale=[0.9, 1.1]),
179
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
180
+ dict(type="RandomFlip", p=0.5),
181
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
182
+ dict(
183
+ type="ElasticDistortion",
184
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
185
+ ),
186
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
187
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
188
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
189
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
190
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
191
+ dict(
192
+ type="GridSample",
193
+ grid_size=0.02,
194
+ hash_type="fnv",
195
+ mode="train",
196
+ return_grid_coord=True,
197
+ ),
198
+ dict(type="SphereCrop", point_max=100000, mode="random"),
199
+ dict(type="CenterShift", apply_z=False),
200
+ dict(type="NormalizeColor"),
201
+ dict(type="ShufflePoint"),
202
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
203
+ dict(type="ToTensor"),
204
+ dict(
205
+ type="Collect",
206
+ keys=("coord", "grid_coord", "segment", "condition"),
207
+ feat_keys=("color", "normal"),
208
+ ),
209
+ ],
210
+ test_mode=False,
211
+ loop=1, # sampling weight
212
+ ),
213
+ ],
214
+ ),
215
+ val=dict(
216
+ type="ScanNetDataset",
217
+ split="val",
218
+ data_root="data/scannet",
219
+ transform=[
220
+ dict(type="CenterShift", apply_z=True),
221
+ dict(
222
+ type="GridSample",
223
+ grid_size=0.02,
224
+ hash_type="fnv",
225
+ mode="train",
226
+ return_grid_coord=True,
227
+ ),
228
+ # dict(type="SphereCrop", point_max=1000000, mode="center"),
229
+ dict(type="CenterShift", apply_z=False),
230
+ dict(type="NormalizeColor"),
231
+ dict(type="ToTensor"),
232
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
233
+ dict(
234
+ type="Collect",
235
+ keys=("coord", "grid_coord", "segment", "condition"),
236
+ feat_keys=("color", "normal"),
237
+ ),
238
+ ],
239
+ test_mode=False,
240
+ ),
241
+ test=dict(
242
+ type="ScanNetDataset",
243
+ split="val",
244
+ data_root="data/scannet",
245
+ transform=[
246
+ dict(type="CenterShift", apply_z=True),
247
+ dict(type="NormalizeColor"),
248
+ ],
249
+ test_mode=True,
250
+ test_cfg=dict(
251
+ voxelize=dict(
252
+ type="GridSample",
253
+ grid_size=0.02,
254
+ hash_type="fnv",
255
+ mode="test",
256
+ return_grid_coord=True,
257
+ keys=("coord", "color", "normal"),
258
+ ),
259
+ crop=None,
260
+ post_transform=[
261
+ dict(type="CenterShift", apply_z=False),
262
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
263
+ dict(type="ToTensor"),
264
+ dict(
265
+ type="Collect",
266
+ keys=("coord", "grid_coord", "index", "condition"),
267
+ feat_keys=("color", "normal"),
268
+ ),
269
+ ],
270
+ aug_transform=[
271
+ [
272
+ dict(
273
+ type="RandomRotateTargetAngle",
274
+ angle=[0],
275
+ axis="z",
276
+ center=[0, 0, 0],
277
+ p=1,
278
+ )
279
+ ],
280
+ [
281
+ dict(
282
+ type="RandomRotateTargetAngle",
283
+ angle=[1 / 2],
284
+ axis="z",
285
+ center=[0, 0, 0],
286
+ p=1,
287
+ )
288
+ ],
289
+ [
290
+ dict(
291
+ type="RandomRotateTargetAngle",
292
+ angle=[1],
293
+ axis="z",
294
+ center=[0, 0, 0],
295
+ p=1,
296
+ )
297
+ ],
298
+ [
299
+ dict(
300
+ type="RandomRotateTargetAngle",
301
+ angle=[3 / 2],
302
+ axis="z",
303
+ center=[0, 0, 0],
304
+ p=1,
305
+ )
306
+ ],
307
+ [
308
+ dict(
309
+ type="RandomRotateTargetAngle",
310
+ angle=[0],
311
+ axis="z",
312
+ center=[0, 0, 0],
313
+ p=1,
314
+ ),
315
+ dict(type="RandomScale", scale=[0.95, 0.95]),
316
+ ],
317
+ [
318
+ dict(
319
+ type="RandomRotateTargetAngle",
320
+ angle=[1 / 2],
321
+ axis="z",
322
+ center=[0, 0, 0],
323
+ p=1,
324
+ ),
325
+ dict(type="RandomScale", scale=[0.95, 0.95]),
326
+ ],
327
+ [
328
+ dict(
329
+ type="RandomRotateTargetAngle",
330
+ angle=[1],
331
+ axis="z",
332
+ center=[0, 0, 0],
333
+ p=1,
334
+ ),
335
+ dict(type="RandomScale", scale=[0.95, 0.95]),
336
+ ],
337
+ [
338
+ dict(
339
+ type="RandomRotateTargetAngle",
340
+ angle=[3 / 2],
341
+ axis="z",
342
+ center=[0, 0, 0],
343
+ p=1,
344
+ ),
345
+ dict(type="RandomScale", scale=[0.95, 0.95]),
346
+ ],
347
+ [
348
+ dict(
349
+ type="RandomRotateTargetAngle",
350
+ angle=[0],
351
+ axis="z",
352
+ center=[0, 0, 0],
353
+ p=1,
354
+ ),
355
+ dict(type="RandomScale", scale=[1.05, 1.05]),
356
+ ],
357
+ [
358
+ dict(
359
+ type="RandomRotateTargetAngle",
360
+ angle=[1 / 2],
361
+ axis="z",
362
+ center=[0, 0, 0],
363
+ p=1,
364
+ ),
365
+ dict(type="RandomScale", scale=[1.05, 1.05]),
366
+ ],
367
+ [
368
+ dict(
369
+ type="RandomRotateTargetAngle",
370
+ angle=[1],
371
+ axis="z",
372
+ center=[0, 0, 0],
373
+ p=1,
374
+ ),
375
+ dict(type="RandomScale", scale=[1.05, 1.05]),
376
+ ],
377
+ [
378
+ dict(
379
+ type="RandomRotateTargetAngle",
380
+ angle=[3 / 2],
381
+ axis="z",
382
+ center=[0, 0, 0],
383
+ p=1,
384
+ ),
385
+ dict(type="RandomScale", scale=[1.05, 1.05]),
386
+ ],
387
+ [dict(type="RandomFlip", p=1)],
388
+ ],
389
+ ),
390
+ ),
391
+ )
Pointcept/configs/scannet/semseg-ppt-v1m1-1-sc-st-spunet-submit.py ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = ["../_base_/default_runtime.py"]
2
+
3
+ # misc custom setting
4
+ batch_size = 24 # bs: total bs in all gpus
5
+ num_worker = 48
6
+ mix_prob = 0.8
7
+ empty_cache = False
8
+ enable_amp = True
9
+ find_unused_parameters = True
10
+ evaluate = False
11
+
12
+ # trainer
13
+ train = dict(
14
+ type="MultiDatasetTrainer",
15
+ )
16
+
17
+ # model settings
18
+ model = dict(
19
+ type="PPT-v1m1",
20
+ backbone=dict(
21
+ type="SpUNet-v1m3",
22
+ in_channels=6,
23
+ num_classes=0,
24
+ base_channels=32,
25
+ context_channels=256,
26
+ channels=(32, 64, 128, 256, 256, 128, 96, 96),
27
+ layers=(2, 3, 4, 6, 2, 2, 2, 2),
28
+ cls_mode=False,
29
+ conditions=("ScanNet", "S3DIS", "Structured3D"),
30
+ zero_init=False,
31
+ norm_decouple=True,
32
+ norm_adaptive=True,
33
+ norm_affine=True,
34
+ ),
35
+ criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
36
+ backbone_out_channels=96,
37
+ context_channels=256,
38
+ conditions=("Structured3D", "ScanNet", "S3DIS"),
39
+ template="[x]",
40
+ clip_model="ViT-B/16",
41
+ # fmt: off
42
+ class_name=(
43
+ "wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door",
44
+ "window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain",
45
+ "dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand",
46
+ "toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column",
47
+ "clutter", "otherstructure", "otherfurniture", "otherprop",
48
+ ),
49
+ valid_index=(
50
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35),
51
+ (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34),
52
+ (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32),
53
+ ),
54
+ # fmt: on
55
+ backbone_mode=False,
56
+ )
57
+
58
+ # scheduler settings
59
+ epoch = 100
60
+ optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
61
+ scheduler = dict(
62
+ type="OneCycleLR",
63
+ max_lr=optimizer["lr"],
64
+ pct_start=0.05,
65
+ anneal_strategy="cos",
66
+ div_factor=10.0,
67
+ final_div_factor=10000.0,
68
+ )
69
+ # param_dicts = [dict(keyword="modulation", lr=0.005)]
70
+
71
+ # dataset settings
72
+ data = dict(
73
+ num_classes=20,
74
+ ignore_index=-1,
75
+ names=[
76
+ "wall",
77
+ "floor",
78
+ "cabinet",
79
+ "bed",
80
+ "chair",
81
+ "sofa",
82
+ "table",
83
+ "door",
84
+ "window",
85
+ "bookshelf",
86
+ "picture",
87
+ "counter",
88
+ "desk",
89
+ "curtain",
90
+ "refridgerator",
91
+ "shower curtain",
92
+ "toilet",
93
+ "sink",
94
+ "bathtub",
95
+ "otherfurniture",
96
+ ],
97
+ train=dict(
98
+ type="ConcatDataset",
99
+ datasets=[
100
+ # Structured3D
101
+ dict(
102
+ type="Structured3DDataset",
103
+ split=["train", "val"],
104
+ data_root="data/structured3d",
105
+ transform=[
106
+ dict(type="CenterShift", apply_z=True),
107
+ dict(
108
+ type="RandomDropout",
109
+ dropout_ratio=0.2,
110
+ dropout_application_ratio=0.2,
111
+ ),
112
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
113
+ dict(
114
+ type="RandomRotate",
115
+ angle=[-1, 1],
116
+ axis="z",
117
+ center=[0, 0, 0],
118
+ p=0.5,
119
+ ),
120
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
121
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
122
+ dict(type="RandomScale", scale=[0.9, 1.1]),
123
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
124
+ dict(type="RandomFlip", p=0.5),
125
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
126
+ dict(
127
+ type="ElasticDistortion",
128
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
129
+ ),
130
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
131
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
132
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
133
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
134
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
135
+ dict(
136
+ type="GridSample",
137
+ grid_size=0.02,
138
+ hash_type="fnv",
139
+ mode="train",
140
+ return_grid_coord=True,
141
+ ),
142
+ dict(type="SphereCrop", sample_rate=0.8, mode="random"),
143
+ dict(type="CenterShift", apply_z=False),
144
+ dict(type="NormalizeColor"),
145
+ dict(type="ShufflePoint"),
146
+ dict(type="Add", keys_dict={"condition": "Structured3D"}),
147
+ dict(type="ToTensor"),
148
+ dict(
149
+ type="Collect",
150
+ keys=("coord", "grid_coord", "segment", "condition"),
151
+ feat_keys=("color", "normal"),
152
+ ),
153
+ ],
154
+ test_mode=False,
155
+ loop=2, # sampling weight
156
+ ),
157
+ # ScanNet
158
+ dict(
159
+ type="ScanNetDataset",
160
+ split=["train", "val"],
161
+ data_root="data/scannet",
162
+ transform=[
163
+ dict(type="CenterShift", apply_z=True),
164
+ dict(
165
+ type="RandomDropout",
166
+ dropout_ratio=0.2,
167
+ dropout_application_ratio=0.2,
168
+ ),
169
+ # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
170
+ dict(
171
+ type="RandomRotate",
172
+ angle=[-1, 1],
173
+ axis="z",
174
+ center=[0, 0, 0],
175
+ p=0.5,
176
+ ),
177
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
178
+ dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
179
+ dict(type="RandomScale", scale=[0.9, 1.1]),
180
+ # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
181
+ dict(type="RandomFlip", p=0.5),
182
+ dict(type="RandomJitter", sigma=0.005, clip=0.02),
183
+ dict(
184
+ type="ElasticDistortion",
185
+ distortion_params=[[0.2, 0.4], [0.8, 1.6]],
186
+ ),
187
+ dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
188
+ dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
189
+ dict(type="ChromaticJitter", p=0.95, std=0.05),
190
+ # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
191
+ # dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
192
+ dict(
193
+ type="GridSample",
194
+ grid_size=0.02,
195
+ hash_type="fnv",
196
+ mode="train",
197
+ return_grid_coord=True,
198
+ ),
199
+ dict(type="SphereCrop", point_max=100000, mode="random"),
200
+ dict(type="CenterShift", apply_z=False),
201
+ dict(type="NormalizeColor"),
202
+ dict(type="ShufflePoint"),
203
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
204
+ dict(type="ToTensor"),
205
+ dict(
206
+ type="Collect",
207
+ keys=("coord", "grid_coord", "segment", "condition"),
208
+ feat_keys=("color", "normal"),
209
+ ),
210
+ ],
211
+ test_mode=False,
212
+ loop=1, # sampling weight
213
+ ),
214
+ ],
215
+ ),
216
+ test=dict(
217
+ type="ScanNetDataset",
218
+ split="test",
219
+ data_root="data/scannet",
220
+ transform=[
221
+ dict(type="CenterShift", apply_z=True),
222
+ dict(type="NormalizeColor"),
223
+ ],
224
+ test_mode=True,
225
+ test_cfg=dict(
226
+ voxelize=dict(
227
+ type="GridSample",
228
+ grid_size=0.02,
229
+ hash_type="fnv",
230
+ mode="test",
231
+ return_grid_coord=True,
232
+ keys=("coord", "color", "normal"),
233
+ ),
234
+ crop=None,
235
+ post_transform=[
236
+ dict(type="CenterShift", apply_z=False),
237
+ dict(type="Add", keys_dict={"condition": "ScanNet"}),
238
+ dict(type="ToTensor"),
239
+ dict(
240
+ type="Collect",
241
+ keys=("coord", "grid_coord", "index", "condition"),
242
+ feat_keys=("color", "normal"),
243
+ ),
244
+ ],
245
+ aug_transform=[
246
+ [
247
+ dict(
248
+ type="RandomRotateTargetAngle",
249
+ angle=[0],
250
+ axis="z",
251
+ center=[0, 0, 0],
252
+ p=1,
253
+ )
254
+ ],
255
+ [
256
+ dict(
257
+ type="RandomRotateTargetAngle",
258
+ angle=[1 / 2],
259
+ axis="z",
260
+ center=[0, 0, 0],
261
+ p=1,
262
+ )
263
+ ],
264
+ [
265
+ dict(
266
+ type="RandomRotateTargetAngle",
267
+ angle=[1],
268
+ axis="z",
269
+ center=[0, 0, 0],
270
+ p=1,
271
+ )
272
+ ],
273
+ [
274
+ dict(
275
+ type="RandomRotateTargetAngle",
276
+ angle=[3 / 2],
277
+ axis="z",
278
+ center=[0, 0, 0],
279
+ p=1,
280
+ )
281
+ ],
282
+ [
283
+ dict(
284
+ type="RandomRotateTargetAngle",
285
+ angle=[0],
286
+ axis="z",
287
+ center=[0, 0, 0],
288
+ p=1,
289
+ ),
290
+ dict(type="RandomScale", scale=[0.95, 0.95]),
291
+ ],
292
+ [
293
+ dict(
294
+ type="RandomRotateTargetAngle",
295
+ angle=[1 / 2],
296
+ axis="z",
297
+ center=[0, 0, 0],
298
+ p=1,
299
+ ),
300
+ dict(type="RandomScale", scale=[0.95, 0.95]),
301
+ ],
302
+ [
303
+ dict(
304
+ type="RandomRotateTargetAngle",
305
+ angle=[1],
306
+ axis="z",
307
+ center=[0, 0, 0],
308
+ p=1,
309
+ ),
310
+ dict(type="RandomScale", scale=[0.95, 0.95]),
311
+ ],
312
+ [
313
+ dict(
314
+ type="RandomRotateTargetAngle",
315
+ angle=[3 / 2],
316
+ axis="z",
317
+ center=[0, 0, 0],
318
+ p=1,
319
+ ),
320
+ dict(type="RandomScale", scale=[0.95, 0.95]),
321
+ ],
322
+ [
323
+ dict(
324
+ type="RandomRotateTargetAngle",
325
+ angle=[0],
326
+ axis="z",
327
+ center=[0, 0, 0],
328
+ p=1,
329
+ ),
330
+ dict(type="RandomScale", scale=[1.05, 1.05]),
331
+ ],
332
+ [
333
+ dict(
334
+ type="RandomRotateTargetAngle",
335
+ angle=[1 / 2],
336
+ axis="z",
337
+ center=[0, 0, 0],
338
+ p=1,
339
+ ),
340
+ dict(type="RandomScale", scale=[1.05, 1.05]),
341
+ ],
342
+ [
343
+ dict(
344
+ type="RandomRotateTargetAngle",
345
+ angle=[1],
346
+ axis="z",
347
+ center=[0, 0, 0],
348
+ p=1,
349
+ ),
350
+ dict(type="RandomScale", scale=[1.05, 1.05]),
351
+ ],
352
+ [
353
+ dict(
354
+ type="RandomRotateTargetAngle",
355
+ angle=[3 / 2],
356
+ axis="z",
357
+ center=[0, 0, 0],
358
+ p=1,
359
+ ),
360
+ dict(type="RandomScale", scale=[1.05, 1.05]),
361
+ ],
362
+ [dict(type="RandomFlip", p=1)],
363
+ ],
364
+ ),
365
+ ),
366
+ )