diff --git a/Pointcept/.gitignore b/Pointcept/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..7288b0a938b724007d2705abdd611bc5967b8e3e --- /dev/null +++ b/Pointcept/.gitignore @@ -0,0 +1,16 @@ +image/ +__pycache__ +**/build/ +**/*.egg-info/ +**/dist/ +*.so +exp +weights +data +log +outputs/ +.vscode +.idea +*/.DS_Store +**/*.out +Dockerfile diff --git a/Pointcept/LICENSE b/Pointcept/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..ee1fac1b22ae96f38e681900a3181d3e70ac6e4f --- /dev/null +++ b/Pointcept/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Pointcept + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Pointcept/README.md b/Pointcept/README.md new file mode 100644 index 0000000000000000000000000000000000000000..cf84efb36681d48d7f6d7500ff3723d853d3f709 --- /dev/null +++ b/Pointcept/README.md @@ -0,0 +1,896 @@ +

+ + + + + + pointcept + +
+ +

+ +[![Formatter](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml/badge.svg)](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml) + +**Pointcept** is a powerful and flexible codebase for point cloud perception research. It is also an official implementation of the following paper: +- **Point Transformer V3: Simpler, Faster, Stronger** +*Xiaoyang Wu, Li Jiang, Peng-Shuai Wang, Zhijian Liu, Xihui Liu, Yu Qiao, Wanli Ouyang, Tong He, Hengshuang Zhao* +IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 - Oral +[ Backbone ] [PTv3] - [ [arXiv](https://arxiv.org/abs/2312.10035) ] [ [Bib](https://xywu.me/research/ptv3/bib.txt) ] [ [Project](https://github.com/Pointcept/PointTransformerV3) ] → [here](https://github.com/Pointcept/PointTransformerV3) + +- **OA-CNNs: Omni-Adaptive Sparse CNNs for 3D Semantic Segmentation** +*Bohao Peng, Xiaoyang Wu, Li Jiang, Yukang Chen, Hengshuang Zhao, Zhuotao Tian, Jiaya Jia* +IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 +[ Backbone ] [ OA-CNNs ] - [ [arXiv](https://arxiv.org/abs/2403.14418) ] [ [Bib](https://xywu.me/research/oacnns/bib.txt) ] → [here](#oa-cnns) + +- **PonderV2: Pave the Way for 3D Foundation Model with A Universal Pre-training Paradigm** +*Haoyi Zhu\*, Honghui Yang\*, Xiaoyang Wu\*, Di Huang\*, Sha Zhang, Xianglong He, Tong He, Hengshuang Zhao, Chunhua Shen, Yu Qiao, Wanli Ouyang* +arXiv Preprint 2023 +[ Pretrain ] [PonderV2] - [ [arXiv](https://arxiv.org/abs/2310.08586) ] [ [Bib](https://xywu.me/research/ponderv2/bib.txt) ] [ [Project](https://github.com/OpenGVLab/PonderV2) ] → [here](https://github.com/OpenGVLab/PonderV2) + + +- **Towards Large-scale 3D Representation Learning with Multi-dataset Point Prompt Training** +*Xiaoyang Wu, Zhuotao Tian, Xin Wen, Bohao Peng, Xihui Liu, Kaicheng Yu, Hengshuang Zhao* +IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 +[ Pretrain ] [PPT] - [ [arXiv](https://arxiv.org/abs/2308.09718) ] [ [Bib](https://xywu.me/research/ppt/bib.txt) ] → [here](#point-prompt-training-ppt) + +- **Masked Scene Contrast: A Scalable Framework for Unsupervised 3D Representation Learning** +*Xiaoyang Wu, Xin Wen, Xihui Liu, Hengshuang Zhao* +IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2023 +[ Pretrain ] [ MSC ] - [ [arXiv](https://arxiv.org/abs/2303.14191) ] [ [Bib](https://xywu.me/research/msc/bib.txt) ] → [here](#masked-scene-contrast-msc) + + +- **Learning Context-aware Classifier for Semantic Segmentation** (3D Part) +*Zhuotao Tian, Jiequan Cui, Li Jiang, Xiaojuan Qi, Xin Lai, Yixin Chen, Shu Liu, Jiaya Jia* +AAAI Conference on Artificial Intelligence (**AAAI**) 2023 - Oral +[ SemSeg ] [ CAC ] - [ [arXiv](https://arxiv.org/abs/2303.11633) ] [ [Bib](https://xywu.me/research/cac/bib.txt) ] [ [2D Part](https://github.com/tianzhuotao/CAC) ] → [here](#context-aware-classifier) + + +- **Point Transformer V2: Grouped Vector Attention and Partition-based Pooling** +*Xiaoyang Wu, Yixing Lao, Li Jiang, Xihui Liu, Hengshuang Zhao* +Conference on Neural Information Processing Systems (**NeurIPS**) 2022 +[ Backbone ] [ PTv2 ] - [ [arXiv](https://arxiv.org/abs/2210.05666) ] [ [Bib](https://xywu.me/research/ptv2/bib.txt) ] → [here](#point-transformers) + + +- **Point Transformer** +*Hengshuang Zhao, Li Jiang, Jiaya Jia, Philip Torr, Vladlen Koltun* +IEEE International Conference on Computer Vision (**ICCV**) 2021 - Oral +[ Backbone ] [ PTv1 ] - [ [arXiv](https://arxiv.org/abs/2012.09164) ] [ [Bib](https://hszhao.github.io/papers/iccv21_pointtransformer_bib.txt) ] → [here](#point-transformers) + +Additionally, **Pointcept** integrates the following excellent work (contain above): +Backbone: +[MinkUNet](https://github.com/NVIDIA/MinkowskiEngine) ([here](#sparseunet)), +[SpUNet](https://github.com/traveller59/spconv) ([here](#sparseunet)), +[SPVCNN](https://github.com/mit-han-lab/spvnas) ([here](#spvcnn)), +[OACNNs](https://arxiv.org/abs/2403.14418) ([here](#oa-cnns)), +[PTv1](https://arxiv.org/abs/2012.09164) ([here](#point-transformers)), +[PTv2](https://arxiv.org/abs/2210.05666) ([here](#point-transformers)), +[PTv3](https://arxiv.org/abs/2312.10035) ([here](#point-transformers)), +[StratifiedFormer](https://github.com/dvlab-research/Stratified-Transformer) ([here](#stratified-transformer)), +[OctFormer](https://github.com/octree-nn/octformer) ([here](#octformer)), +[Swin3D](https://github.com/microsoft/Swin3D) ([here](#swin3d)); +Semantic Segmentation: +[Mix3d](https://github.com/kumuji/mix3d) ([here](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-spunet-v1m1-0-base.py#L5)), +[CAC](https://arxiv.org/abs/2303.11633) ([here](#context-aware-classifier)); +Instance Segmentation: +[PointGroup](https://github.com/dvlab-research/PointGroup) ([here](#pointgroup)); +Pre-training: +[PointContrast](https://github.com/facebookresearch/PointContrast) ([here](#pointcontrast)), +[Contrastive Scene Contexts](https://github.com/facebookresearch/ContrastiveSceneContexts) ([here](#contrastive-scene-contexts)), +[Masked Scene Contrast](https://arxiv.org/abs/2303.14191) ([here](#masked-scene-contrast-msc)), +[Point Prompt Training](https://arxiv.org/abs/2308.09718) ([here](#point-prompt-training-ppt)); +Datasets: +[ScanNet](http://www.scan-net.org/) ([here](#scannet-v2)), +[ScanNet200](http://www.scan-net.org/) ([here](#scannet-v2)), +[ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) ([here](#scannet)), +[S3DIS](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1) ([here](#s3dis)), +[Matterport3D](https://niessner.github.io/Matterport/) ([here](#matterport3d)), +[ArkitScene](https://github.com/apple/ARKitScenes), +[Structured3D](https://structured3d-dataset.org/) ([here](#structured3d)), +[SemanticKITTI](http://www.semantic-kitti.org/) ([here](#semantickitti)), +[nuScenes](https://www.nuscenes.org/nuscenes) ([here](#nuscenes)), +[ModelNet40](https://modelnet.cs.princeton.edu/) ([here](#modelnet)), +[Waymo](https://waymo.com/open/) ([here](#waymo)). + + +## Highlights +- *May, 2024*: In v1.5.2, we redesigned the default structure for each dataset for better performance. Please **re-preprocess** datasets or **download** our preprocessed datasets from **[here](https://huggingface.co/Pointcept)**. +- *Apr, 2024*: **PTv3** is selected as one of the 90 **Oral** papers (3.3% accepted papers, 0.78% submissions) by CVPR'24! +- *Mar, 2024*: We release code for **OA-CNNs**, accepted by CVPR'24. Issue related to **OA-CNNs** can @Pbihao. +- *Feb, 2024*: **PTv3** and **PPT** are accepted by CVPR'24, another **two** papers by our Pointcept team have also been accepted by CVPR'24 ๐ŸŽ‰๐ŸŽ‰๐ŸŽ‰. We will make them publicly available soon! +- *Dec, 2023*: **PTv3** is released on arXiv, and the code is available in Pointcept. PTv3 is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios. +- *Aug, 2023*: **PPT** is released on arXiv. PPT presents a multi-dataset pre-training framework that achieves SOTA performance in both **indoor** and **outdoor** scenarios. It is compatible with various existing pre-training frameworks and backbones. A **pre-release** version of the code is accessible; for those interested, please feel free to contact me directly for access. +- *Mar, 2023*: We released our codebase, **Pointcept**, a highly potent tool for point cloud representation learning and perception. We welcome new work to join the _Pointcept_ family and highly recommend reading [Quick Start](#quick-start) before starting your trail. +- *Feb, 2023*: **MSC** and **CeCo** accepted by CVPR 2023. _MSC_ is a highly efficient and effective pretraining framework that facilitates cross-dataset large-scale pretraining, while _CeCo_ is a segmentation method specifically designed for long-tail datasets. Both approaches are compatible with all existing backbone models in our codebase, and we will soon make the code available for public use. +- *Jan, 2023*: **CAC**, oral work of AAAI 2023, has expanded its 3D result with the incorporation of Pointcept. This addition will allow CAC to serve as a pluggable segmentor within our codebase. +- *Sep, 2022*: **PTv2** accepted by NeurIPS 2022. It is a continuation of the Point Transformer. The proposed GVA theory can apply to most existing attention mechanisms, while Grid Pooling is also a practical addition to existing pooling methods. + +## Citation +If you find _Pointcept_ useful to your research, please cite our work as encouragement. (เฉญหŠ๊’ณโ€‹ห‹)เฉญโœง +``` +@misc{pointcept2023, + title={Pointcept: A Codebase for Point Cloud Perception Research}, + author={Pointcept Contributors}, + howpublished = {\url{https://github.com/Pointcept/Pointcept}}, + year={2023} +} +``` + +## Overview + +- [Installation](#installation) +- [Data Preparation](#data-preparation) +- [Quick Start](#quick-start) +- [Model Zoo](#model-zoo) +- [Citation](#citation) +- [Acknowledgement](#acknowledgement) + +## Installation + +### Requirements +- Ubuntu: 18.04 and above. +- CUDA: 11.3 and above. +- PyTorch: 1.10.0 and above. + +### Conda Environment + +```bash +conda create -n pointcept python=3.8 -y +conda activate pointcept +conda install ninja -y +# Choose version you want here: https://pytorch.org/get-started/previous-versions/ +conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch -y +conda install h5py pyyaml -c anaconda -y +conda install sharedarray tensorboard tensorboardx yapf addict einops scipy plyfile termcolor timm -c conda-forge -y +conda install pytorch-cluster pytorch-scatter pytorch-sparse -c pyg -y +pip install torch-geometric + +# spconv (SparseUNet) +# refer https://github.com/traveller59/spconv +pip install spconv-cu113 + +# PPT (clip) +pip install ftfy regex tqdm +pip install git+https://github.com/openai/CLIP.git + +# PTv1 & PTv2 or precise eval +cd libs/pointops +# usual +python setup.py install +# docker & multi GPU arch +TORCH_CUDA_ARCH_LIST="ARCH LIST" python setup.py install +# e.g. 7.5: RTX 3000; 8.0: a100 More available in: https://developer.nvidia.com/cuda-gpus +TORCH_CUDA_ARCH_LIST="7.5 8.0" python setup.py install +cd ../.. + +# Open3D (visualization, optional) +pip install open3d +``` + +## Data Preparation + +### ScanNet v2 + +The preprocessing supports semantic and instance segmentation for both `ScanNet20`, `ScanNet200`, and `ScanNet Data Efficient`. +- Download the [ScanNet](http://www.scan-net.org/) v2 dataset. +- Run preprocessing code for raw ScanNet as follows: + + ```bash + # RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset. + # PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset (output dir). + python pointcept/datasets/preprocessing/scannet/preprocess_scannet.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_DIR} + ``` +- (Optional) Download ScanNet Data Efficient files: + ```bash + # download-scannet.py is the official download script + # or follow instructions here: https://kaldir.vc.in.tum.de/scannet_benchmark/data_efficient/documentation#download + python download-scannet.py --data_efficient -o ${RAW_SCANNET_DIR} + # unzip downloads + cd ${RAW_SCANNET_DIR}/tasks + unzip limited-annotation-points.zip + unzip limited-reconstruction-scenes.zip + # copy files to processed dataset folder + mkdir ${PROCESSED_SCANNET_DIR}/tasks + cp -r ${RAW_SCANNET_DIR}/tasks/points ${PROCESSED_SCANNET_DIR}/tasks + cp -r ${RAW_SCANNET_DIR}/tasks/scenes ${PROCESSED_SCANNET_DIR}/tasks + ``` +- (Alternative) Our preprocess data can be directly downloaded [[here](https://huggingface.co/datasets/Pointcept/scannet-compressed)], please agree the official license before download it. + +- Link processed dataset to codebase: + ```bash + # PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset. + mkdir data + ln -s ${PROCESSED_SCANNET_DIR} ${CODEBASE_DIR}/data/scannet + ``` + +### ScanNet++ +- Download the [ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) dataset. +- Run preprocessing code for raw ScanNet++ as follows: + ```bash + # RAW_SCANNETPP_DIR: the directory of downloaded ScanNet++ raw dataset. + # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir). + # NUM_WORKERS: the number of workers for parallel preprocessing. + python pointcept/datasets/preprocessing/scannetpp/preprocess_scannetpp.py --dataset_root ${RAW_SCANNETPP_DIR} --output_root ${PROCESSED_SCANNETPP_DIR} --num_workers ${NUM_WORKERS} + ``` +- Sampling and chunking large point cloud data in train/val split as follows (only used for training): + ```bash + # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir). + # NUM_WORKERS: the number of workers for parallel preprocessing. + python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split train --num_workers ${NUM_WORKERS} + python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split val --num_workers ${NUM_WORKERS} + ``` +- (Alternative) Our preprocess data can be directly downloaded [[here](https://huggingface.co/datasets/Pointcept/scannetpp-compressed)], please agree the official license before download it. +- Link processed dataset to codebase: + ```bash + # PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet dataset. + mkdir data + ln -s ${PROCESSED_SCANNETPP_DIR} ${CODEBASE_DIR}/data/scannetpp + ``` + +### S3DIS + +- Download S3DIS data by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1). Download the `Stanford3dDataset_v1.2.zip` file and unzip it. +- Fix error in `Area_5/office_19/Annotations/ceiling` Line 323474 (103.0๏ฟฝ0000 => 103.000000). +- (Optional) Download Full 2D-3D S3DIS dataset (no XYZ) from [here](https://github.com/alexsax/2D-3D-Semantics) for parsing normal. +- Run preprocessing code for S3DIS as follows: + + ```bash + # S3DIS_DIR: the directory of downloaded Stanford3dDataset_v1.2 dataset. + # RAW_S3DIS_DIR: the directory of Stanford2d3dDataset_noXYZ dataset. (optional, for parsing normal) + # PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset (output dir). + + # S3DIS without aligned angle + python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} + # S3DIS with aligned angle + python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --align_angle + # S3DIS with normal vector (recommended, normal is helpful) + python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --parse_normal + python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --align_angle --parse_normal + ``` + +- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/s3dis-compressed +)] (with normal vector and aligned angle), please agree with the official license before downloading it. + +- Link processed dataset to codebase. + ```bash + # PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset. + mkdir data + ln -s ${PROCESSED_S3DIS_DIR} ${CODEBASE_DIR}/data/s3dis + ``` +### Structured3D + +- Download Structured3D panorama related and perspective (full) related zip files by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLSc0qtvh4vHSoZaW6UvlXYy79MbcGdZfICjh4_t4bYofQIVIdw/viewform?pli=1) (no need to unzip them). +- Organize all downloaded zip file in one folder (`${STRUCT3D_DIR}`). +- Run preprocessing code for Structured3D as follows: + ```bash + # STRUCT3D_DIR: the directory of downloaded Structured3D dataset. + # PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir). + # NUM_WORKERS: Number for workers for preprocessing, default same as cpu count (might OOM). + export PYTHONPATH=./ + python pointcept/datasets/preprocessing/structured3d/preprocess_structured3d.py --dataset_root ${STRUCT3D_DIR} --output_root ${PROCESSED_STRUCT3D_DIR} --num_workers ${NUM_WORKERS} --grid_size 0.01 --fuse_prsp --fuse_pano + ``` +Following the instruction of [Swin3D](https://arxiv.org/abs/2304.06906), we keep 25 categories with frequencies of more than 0.001, out of the original 40 categories. + +[//]: # (- (Alternative) Our preprocess data can also be downloaded [[here]()], please agree the official license before download it.) + +- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/structured3d-compressed +)] (with perspective views and panorama view, 471.7G after unzipping), please agree the official license before download it. + +- Link processed dataset to codebase. + ```bash + # PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir). + mkdir data + ln -s ${PROCESSED_STRUCT3D_DIR} ${CODEBASE_DIR}/data/structured3d + ``` +### Matterport3D +- Follow [this page](https://niessner.github.io/Matterport/#download) to request access to the dataset. +- Download the "region_segmentation" type, which represents the division of a scene into individual rooms. + ```bash + # download-mp.py is the official download script + # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset. + python download-mp.py -o {MATTERPORT3D_DIR} --type region_segmentations + ``` +- Unzip the region_segmentations data + ```bash + # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset. + python pointcept/datasets/preprocessing/matterport3d/unzip_matterport3d_region_segmentation.py --dataset_root {MATTERPORT3D_DIR} + ``` +- Run preprocessing code for Matterport3D as follows: + ```bash + # MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset. + # PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir). + # NUM_WORKERS: the number of workers for this preprocessing. + python pointcept/datasets/preprocessing/matterport3d/preprocess_matterport3d_mesh.py --dataset_root ${MATTERPORT3D_DIR} --output_root ${PROCESSED_MATTERPORT3D_DIR} --num_workers ${NUM_WORKERS} + ``` +- Link processed dataset to codebase. + ```bash + # PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir). + mkdir data + ln -s ${PROCESSED_MATTERPORT3D_DIR} ${CODEBASE_DIR}/data/matterport3d + ``` + +Following the instruction of [OpenRooms](https://github.com/ViLab-UCSD/OpenRooms), we remapped Matterport3D's categories to ScanNet 20 semantic categories with the addition of a ceiling category. +* (Alternative) Our preprocess data can also be downloaded [here](https://huggingface.co/datasets/Pointcept/matterport3d-compressed), please agree the official license before download it. + +### SemanticKITTI +- Download [SemanticKITTI](http://www.semantic-kitti.org/dataset.html#download) dataset. +- Link dataset to codebase. + ```bash + # SEMANTIC_KITTI_DIR: the directory of SemanticKITTI dataset. + # |- SEMANTIC_KITTI_DIR + # |- dataset + # |- sequences + # |- 00 + # |- 01 + # |- ... + + mkdir -p data + ln -s ${SEMANTIC_KITTI_DIR} ${CODEBASE_DIR}/data/semantic_kitti + ``` + +### nuScenes +- Download the official [NuScene](https://www.nuscenes.org/nuscenes#download) dataset (with Lidar Segmentation) and organize the downloaded files as follows: + ```bash + NUSCENES_DIR + โ”‚โ”€โ”€ samples + โ”‚โ”€โ”€ sweeps + โ”‚โ”€โ”€ lidarseg + ... + โ”‚โ”€โ”€ v1.0-trainval + โ”‚โ”€โ”€ v1.0-test + ``` +- Run information preprocessing code (modified from OpenPCDet) for nuScenes as follows: + ```bash + # NUSCENES_DIR: the directory of downloaded nuScenes dataset. + # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir). + # MAX_SWEEPS: Max number of sweeps. Default: 10. + pip install nuscenes-devkit pyquaternion + python pointcept/datasets/preprocessing/nuscenes/preprocess_nuscenes_info.py --dataset_root ${NUSCENES_DIR} --output_root ${PROCESSED_NUSCENES_DIR} --max_sweeps ${MAX_SWEEPS} --with_camera + ``` +- (Alternative) Our preprocess nuScenes information data can also be downloaded [[here]( +https://huggingface.co/datasets/Pointcept/nuscenes-compressed)] (only processed information, still need to download raw dataset and link to the folder), please agree the official license before download it. + +- Link raw dataset to processed NuScene dataset folder: + ```bash + # NUSCENES_DIR: the directory of downloaded nuScenes dataset. + # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir). + ln -s ${NUSCENES_DIR} {PROCESSED_NUSCENES_DIR}/raw + ``` + then the processed nuscenes folder is organized as follows: + ```bash + nuscene + |โ”€โ”€ raw + โ”‚โ”€โ”€ samples + โ”‚โ”€โ”€ sweeps + โ”‚โ”€โ”€ lidarseg + ... + โ”‚โ”€โ”€ v1.0-trainval + โ”‚โ”€โ”€ v1.0-test + |โ”€โ”€ info + ``` + +- Link processed dataset to codebase. + ```bash + # PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir). + mkdir data + ln -s ${PROCESSED_NUSCENES_DIR} ${CODEBASE_DIR}/data/nuscenes + ``` + +### Waymo +- Download the official [Waymo](https://waymo.com/open/download/) dataset (v1.4.3) and organize the downloaded files as follows: + ```bash + WAYMO_RAW_DIR + โ”‚โ”€โ”€ training + โ”‚โ”€โ”€ validation + โ”‚โ”€โ”€ testing + ``` +- Install the following dependence: + ```bash + # If shows "No matching distribution found", download whl directly from Pypi and install the package. + conda create -n waymo python=3.10 -y + conda activate waymo + pip install waymo-open-dataset-tf-2-12-0 + ``` +- Run the preprocessing code as follows: + ```bash + # WAYMO_DIR: the directory of the downloaded Waymo dataset. + # PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir). + # NUM_WORKERS: num workers for preprocessing + python pointcept/datasets/preprocessing/waymo/preprocess_waymo.py --dataset_root ${WAYMO_DIR} --output_root ${PROCESSED_WAYMO_DIR} --splits training validation --num_workers ${NUM_WORKERS} + ``` + +- Link processed dataset to the codebase. + ```bash + # PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir). + mkdir data + ln -s ${PROCESSED_WAYMO_DIR} ${CODEBASE_DIR}/data/waymo + ``` + +### ModelNet +- Download [modelnet40_normal_resampled.zip](https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip) and unzip +- Link dataset to the codebase. + ```bash + mkdir -p data + ln -s ${MODELNET_DIR} ${CODEBASE_DIR}/data/modelnet40_normal_resampled + ``` + +## Quick Start + +### Training +**Train from scratch.** The training processing is based on configs in `configs` folder. +The training script will generate an experiment folder in `exp` folder and backup essential code in the experiment folder. +Training config, log, tensorboard, and checkpoints will also be saved into the experiment folder during the training process. +```bash +export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} +# Script (Recommended) +sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME} +# Direct +export PYTHONPATH=./ +python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} +``` + +For example: +```bash +# By script (Recommended) +# -p is default set as python and can be ignored +sh scripts/train.sh -p python -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# Direct +export PYTHONPATH=./ +python tools/train.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base +``` +**Resume training from checkpoint.** If the training process is interrupted by accident, the following script can resume training from a given checkpoint. +```bash +export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} +# Script (Recommended) +# simply add "-r true" +sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME} -r true +# Direct +export PYTHONPATH=./ +python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} resume=True weight=${CHECKPOINT_PATH} +``` + +### Testing +During training, model evaluation is performed on point clouds after grid sampling (voxelization), providing an initial assessment of model performance. However, to obtain precise evaluation results, testing is **essential**. The testing process involves subsampling a dense point cloud into a sequence of voxelized point clouds, ensuring comprehensive coverage of all points. These sub-results are then predicted and collected to form a complete prediction of the entire point cloud. This approach yields higher evaluation results compared to simply mapping/interpolating the prediction. In addition, our testing code supports TTA (test time augmentation) testing, which further enhances the stability of evaluation performance. + +```bash +# By script (Based on experiment folder created by training script) +sh scripts/test.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -n ${EXP_NAME} -w ${CHECKPOINT_NAME} +# Direct +export PYTHONPATH=./ +python tools/test.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} weight=${CHECKPOINT_PATH} +``` +For example: +```bash +# By script (Based on experiment folder created by training script) +# -p is default set as python and can be ignored +# -w is default set as model_best and can be ignored +sh scripts/test.sh -p python -d scannet -n semseg-pt-v2m2-0-base -w model_best +# Direct +export PYTHONPATH=./ +python tools/test.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base weight=exp/scannet/semseg-pt-v2m2-0-base/model/model_best.pth +``` + +The TTA can be disabled by replace `data.test.test_cfg.aug_transform = [...]` with: + +```python +data = dict( + train = dict(...), + val = dict(...), + test = dict( + ..., + test_cfg = dict( + ..., + aug_transform = [ + [dict(type="RandomRotateTargetAngle", angle=[0], axis="z", center=[0, 0, 0], p=1)] + ] + ) + ) +) +``` + +### Offset +`Offset` is the separator of point clouds in batch data, and it is similar to the concept of `Batch` in PyG. +A visual illustration of batch and offset is as follows: +

+ + + + + + pointcept + +
+ +

+ +## Model Zoo +### 1. Backbones and Semantic Segmentation +#### SparseUNet + +_Pointcept_ provides `SparseUNet` implemented by `SpConv` and `MinkowskiEngine`. The SpConv version is recommended since SpConv is easy to install and faster than MinkowskiEngine. Meanwhile, SpConv is also widely applied in outdoor perception. + +- **SpConv (recommend)** + +The SpConv version `SparseUNet` in the codebase was fully rewrite from `MinkowskiEngine` version, example running script is as follows: + +```bash +# ScanNet val +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# S3DIS (with normal) +sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-cn-base -n semseg-spunet-v1m1-0-cn-base +# SemanticKITTI +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# nuScenes +sh scripts/train.sh -g 4 -d nuscenes -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# ModelNet40 +sh scripts/train.sh -g 2 -d modelnet40 -c cls-spunet-v1m1-0-base -n cls-spunet-v1m1-0-base + +# ScanNet Data Efficient +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la20 -n semseg-spunet-v1m1-2-efficient-la20 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la50 -n semseg-spunet-v1m1-2-efficient-la50 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la100 -n semseg-spunet-v1m1-2-efficient-la100 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la200 -n semseg-spunet-v1m1-2-efficient-la200 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr1 -n semseg-spunet-v1m1-2-efficient-lr1 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr5 -n semseg-spunet-v1m1-2-efficient-lr5 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr10 -n semseg-spunet-v1m1-2-efficient-lr10 +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr20 -n semseg-spunet-v1m1-2-efficient-lr20 + +# Profile model run time +sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-enable-profiler -n semseg-spunet-v1m1-0-enable-profiler +``` + +- **MinkowskiEngine** + +The MinkowskiEngine version `SparseUNet` in the codebase was modified from the original MinkowskiEngine repo, and example running scripts are as follows: +1. Install MinkowskiEngine, refer https://github.com/NVIDIA/MinkowskiEngine +2. Training with the following example scripts: +```bash +# Uncomment "# from .sparse_unet import *" in "pointcept/models/__init__.py" +# Uncomment "# from .mink_unet import *" in "pointcept/models/sparse_unet/__init__.py" +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base +# SemanticKITTI +sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base +``` + +#### OA-CNNs +Introducing Omni-Adaptive 3D CNNs (**OA-CNNs**), a family of networks that integrates a lightweight module to greatly enhance the adaptivity of sparse CNNs at minimal computational cost. Without any self-attention modules, **OA-CNNs** favorably surpass point transformers in terms of accuracy in both indoor and outdoor scenes, with much less latency and memory cost. Issue related to **OA-CNNs** can @Pbihao. +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-oacnns-v1m1-0-base -n semseg-oacnns-v1m1-0-base +``` + +#### Point Transformers +- **PTv3** + +[PTv3](https://arxiv.org/abs/2312.10035) is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios. The full PTv3 relies on FlashAttention, while FlashAttention relies on CUDA 11.6 and above, make sure your local Pointcept environment satisfies the requirements. + +If you can not upgrade your local environment to satisfy the requirements (CUDA >= 11.6), then you can disable FlashAttention by setting the model parameter `enable_flash` to `false` and reducing the `enc_patch_size` and `dec_patch_size` to a level (e.g. 128). + +FlashAttention force disables RPE and forces the accuracy reduced to fp16. If you require these features, please disable `enable_flash` and adjust `enable_rpe`, `upcast_attention` and`upcast_softmax`. + +Detailed instructions and experiment records (containing weights) are available on the [project repository](https://github.com/Pointcept/PointTransformerV3). Example running scripts are as follows: +```bash +# Scratched ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# PPT joint training (ScanNet + Structured3D) and evaluate in ScanNet +sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme + +# Scratched ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# Fine-tuning from PPT joint training (ScanNet + Structured3D) with ScanNet200 +# PTV3_PPT_WEIGHT_PATH: Path to model weight trained by PPT multi-dataset joint training +# e.g. exp/scannet/semseg-pt-v3m1-1-ppt-extreme/model/model_best.pth +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-1-ppt-ft -n semseg-pt-v3m1-1-ppt-ft -w ${PTV3_PPT_WEIGHT_PATH} + +# Scratched ScanNet++ +sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# Scratched ScanNet++ test +sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-1-submit -n semseg-pt-v3m1-1-submit + + +# Scratched S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# an example for disbale flash_attention and enable rpe. +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-1-rpe -n semseg-pt-v3m1-0-rpe +# PPT joint training (ScanNet + S3DIS + Structured3D) and evaluate in ScanNet +sh scripts/train.sh -g 8 -d s3dis -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme +# S3DIS 6-fold cross validation +# 1. The default configs are evaluated on Area_5, modify the "data.train.split", "data.val.split", and "data.test.split" to make the config evaluated on Area_1 ~ Area_6 respectively. +# 2. Train and evaluate the model on each split of areas and gather result files located in "exp/s3dis/EXP_NAME/result/Area_x.pth" in one single folder, noted as RECORD_FOLDER. +# 3. Run the following script to get S3DIS 6-fold cross validation performance: +export PYTHONPATH=./ +python tools/test_s3dis_6fold.py --record_root ${RECORD_FOLDER} + +# Scratched nuScenes +sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base +# Scratched Waymo +sh scripts/train.sh -g 4 -d waymo -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base + +# More configs and exp records for PTv3 will be available soon. +``` + +Indoor semantic segmentation +| Model | Benchmark | Additional Data | Num GPUs | Val mIoU | Config | Tensorboard | Exp Record | +| :---: | :---: |:---------------:| :---: | :---: | :---: | :---: | :---: | +| PTv3 | ScanNet | ✗ | 4 | 77.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-0-base) | +| PTv3 + PPT | ScanNet | ✓ | 8 | 78.5% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-1-ppt-extreme) | +| PTv3 | ScanNet200 | ✗ | 4 | 35.3% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet200/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) |[link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet200-semseg-pt-v3m1-0-base)| +| PTv3 + PPT | ScanNet200 | ✓ (f.t.) | 4 | | | | | +| PTv3 | S3DIS (Area5) | ✗ | 4 | 73.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-0-rpe.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-0-rpe) | +| PTv3 + PPT | S3DIS (Area5) | ✓ | 8 | 75.4% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-1-ppt-extreme) | + +Outdoor semantic segmentation +| Model | Benchmark | Additional Data | Num GPUs | Val mIoU | Config | Tensorboard | Exp Record | +| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | +| PTv3 | nuScenes | ✗ | 4 | 80.3 | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/nuscenes/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard)|[link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/nuscenes-semseg-pt-v3m1-0-base) | +| PTv3 + PPT | nuScenes | ✓ | 8 | | | | | +| PTv3 | SemanticKITTI | ✗ | 4 | | | | | +| PTv3 + PPT | SemanticKITTI | ✓ | 8 | | | | | +| PTv3 | Waymo | ✗ | 4 | 71.2 | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/waymo/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/waymo-semseg-pt-v3m1-0-base) (log only) | +| PTv3 + PPT | Waymo | ✓ | 8 | | | | | + +_**\*Released model weights are trained for v1.5.1, weights for v1.5.2 and later is still ongoing.**_ + +- **PTv2 mode2** + +The original PTv2 was trained on 4 * RTX a6000 (48G memory). Even enabling AMP, the memory cost of the original PTv2 is slightly larger than 24G. Considering GPUs with 24G memory are much more accessible, I tuned the PTv2 on the latest Pointcept and made it runnable on 4 * RTX 3090 machines. + +`PTv2 Mode2` enables AMP and disables _Position Encoding Multiplier_ & _Grouped Linear_. During our further research, we found that precise coordinates are not necessary for point cloud understanding (Replacing precise coordinates with grid coordinates doesn't influence the performance. Also, SparseUNet is an example). As for Grouped Linear, my implementation of Grouped Linear seems to cost more memory than the Linear layer provided by PyTorch. Benefiting from the codebase and better parameter tuning, we also relieve the overfitting problem. The reproducing performance is even better than the results reported in our paper. + +Example running scripts are as follows: + +```bash +# ptv2m2: PTv2 mode2, disable PEM & Grouped Linear, GPU memory cost < 24G (recommend) +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-3-lovasz -n semseg-pt-v2m2-3-lovasz + +# ScanNet test +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# ScanNet++ +sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# ScanNet++ test +sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# SemanticKITTI +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +# nuScenes +sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +``` + +- **PTv2 mode1** + +`PTv2 mode1` is the original PTv2 we reported in our paper, example running scripts are as follows: + +```bash +# ptv2m1: PTv2 mode1, Original PTv2, GPU memory cost > 24G +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base +``` + +- **PTv1** + +The original PTv1 is also available in our Pointcept codebase. I haven't run PTv1 for a long time, but I have ensured that the example running script works well. + +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base +``` + + +#### Stratified Transformer +1. Additional requirements: +```bash +pip install torch-points3d +# Fix dependence, caused by installing torch-points3d +pip uninstall SharedArray +pip install SharedArray==3.2.1 + +cd libs/pointops2 +python setup.py install +cd ../.. +``` +2. Uncomment `# from .stratified_transformer import *` in `pointcept/models/__init__.py`. +3. Refer [Optional Installation](installation) to install dependence. +4. Training with the following example scripts: +```bash +# stv1m1: Stratified Transformer mode1, Modified from the original Stratified Transformer code. +# PTv2m2: Stratified Transformer mode2, My rewrite version (recommend). + +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined +sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m1-0-origin -n semseg-st-v1m1-0-origin +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined +# S3DIS +sh scripts/train.sh -g 4 -d s3dis -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined +``` + +#### SPVCNN +`SPVCNN` is a baseline model of [SPVNAS](https://github.com/mit-han-lab/spvnas), it is also a practical baseline for outdoor datasets. +1. Install torchsparse: +```bash +# refer https://github.com/mit-han-lab/torchsparse +# install method without sudo apt install +conda install google-sparsehash -c bioconda +export C_INCLUDE_PATH=${CONDA_PREFIX}/include:$C_INCLUDE_PATH +export CPLUS_INCLUDE_PATH=${CONDA_PREFIX}/include:CPLUS_INCLUDE_PATH +pip install --upgrade git+https://github.com/mit-han-lab/torchsparse.git +``` +2. Training with the following example scripts: +```bash +# SemanticKITTI +sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-spvcnn-v1m1-0-base -n semseg-spvcnn-v1m1-0-base +``` + +#### OctFormer +OctFormer from _OctFormer: Octree-based Transformers for 3D Point Clouds_. +1. Additional requirements: +```bash +cd libs +git clone https://github.com/octree-nn/dwconv.git +pip install ./dwconv +pip install ocnn +``` +2. Uncomment `# from .octformer import *` in `pointcept/models/__init__.py`. +2. Training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-octformer-v1m1-0-base -n semseg-octformer-v1m1-0-base +``` + +#### Swin3D +Swin3D from _Swin3D: A Pretrained Transformer Backbone for 3D Indoor Scene Understanding_. +1. Additional requirements: +```bash +# 1. Install MinkEngine v0.5.4, follow readme in https://github.com/NVIDIA/MinkowskiEngine; +# 2. Install Swin3D, mainly for cuda operation: +cd libs +git clone https://github.com/microsoft/Swin3D.git +cd Swin3D +pip install ./ +``` +2. Uncomment `# from .swin3d import *` in `pointcept/models/__init__.py`. +3. Pre-Training with the following example scripts (Structured3D preprocessing refer [here](#structured3d)): +```bash +# Structured3D + Swin-S +sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small +# Structured3D + Swin-L +sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large + +# Addition +# Structured3D + SpUNet +sh scripts/train.sh -g 4 -d structured3d -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base +# Structured3D + PTv2 +sh scripts/train.sh -g 4 -d structured3d -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base +``` +4. Fine-tuning with the following example scripts: +```bash +# ScanNet + Swin-S +sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small +# ScanNet + Swin-L +sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large + +# S3DIS + Swin-S (here we provide config support S3DIS normal vector) +sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small +# S3DIS + Swin-L (here we provide config support S3DIS normal vector) +sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large +``` + +#### Context-Aware Classifier +`Context-Aware Classifier` is a segmentor that can further boost the performance of each backbone, as a replacement for `Default Segmentor`. Training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base +sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz +sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz + +# ScanNet200 +sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base +sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz +sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz +``` + + +### 2. Instance Segmentation +#### PointGroup +[PointGroup](https://github.com/dvlab-research/PointGroup) is a baseline framework for point cloud instance segmentation. +1. Additional requirements: +```bash +conda install -c bioconda google-sparsehash +cd libs/pointgroup_ops +python setup.py install --include_dirs=${CONDA_PREFIX}/include +cd ../.. +``` +2. Uncomment `# from .point_group import *` in `pointcept/models/__init__.py`. +3. Training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base +# S3DIS +sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base +``` + +### 3. Pre-training +#### Masked Scene Contrast (MSC) +1. Pre-training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-0-spunet-base -n pretrain-msc-v1m1-0-spunet-base +``` + +2. Fine-tuning with the following example scripts: +enable PointGroup ([here](#pointgroup)) before fine-tuning on instance segmentation task. +```bash +# ScanNet20 Semantic Segmentation +sh scripts/train.sh -g 8 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c semseg-spunet-v1m1-4-ft -n semseg-msc-v1m1-0f-spunet-base +# ScanNet20 Instance Segmentation (enable PointGroup before running the script) +sh scripts/train.sh -g 4 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-msc-v1m1-0f-pointgroup-spunet-base +``` +3. Example log and weight: [[Pretrain](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EYvNV4XUJ_5Mlk-g15RelN4BW_P8lVBfC_zhjC_BlBDARg?e=UoGFWH)] [[Semseg](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EQkDiv5xkOFKgCpGiGtAlLwBon7i8W6my3TIbGVxuiTttQ?e=tQFnbr)] + +#### Point Prompt Training (PPT) +PPT presents a multi-dataset pre-training framework, and it is compatible with various existing pre-training frameworks and backbones. +1. PPT supervised joint training with the following example scripts: +```bash +# ScanNet + Structured3d, validate on ScanNet (S3DIS might cause long data time, w/o S3DIS for a quick validation) >= 3090 * 8 +sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-0-sc-st-spunet -n semseg-ppt-v1m1-0-sc-st-spunet +sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-1-sc-st-spunet-submit -n semseg-ppt-v1m1-1-sc-st-spunet-submit +# ScanNet + S3DIS + Structured3d, validate on S3DIS (>= a100 * 8) +sh scripts/train.sh -g 8 -d s3dis -c semseg-ppt-v1m1-0-s3-sc-st-spunet -n semseg-ppt-v1m1-0-s3-sc-st-spunet +# SemanticKITTI + nuScenes + Waymo, validate on SemanticKITTI (bs12 >= 3090 * 4 >= 3090 * 8, v1m1-0 is still on tuning) +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-0-sk-nu-wa-spunet -n semseg-ppt-v1m2-0-sk-nu-wa-spunet +sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit -n semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit +# SemanticKITTI + nuScenes + Waymo, validate on nuScenes (bs12 >= 3090 * 4; bs24 >= 3090 * 8, v1m1-0 is still on tuning)) +sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet +sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-0-nu-sk-wa-spunet -n semseg-ppt-v1m2-0-nu-sk-wa-spunet +sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit -n semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit +``` + +#### PointContrast +1. Preprocess and link ScanNet-Pair dataset (pair-wise matching with ScanNet raw RGB-D frame, ~1.5T): +```bash +# RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset. +# PROCESSED_SCANNET_PAIR_DIR: the directory of processed ScanNet pair dataset (output dir). +python pointcept/datasets/preprocessing/scannet/scannet_pair/preprocess.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_PAIR_DIR} +ln -s ${PROCESSED_SCANNET_PAIR_DIR} ${CODEBASE_DIR}/data/scannet +``` +2. Pre-training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-1-spunet-pointcontrast -n pretrain-msc-v1m1-1-spunet-pointcontrast +``` +3. Fine-tuning refer [MSC](#masked-scene-contrast-msc). + +#### Contrastive Scene Contexts +1. Preprocess and link ScanNet-Pair dataset (refer [PointContrast](#pointcontrast)): +2. Pre-training with the following example scripts: +```bash +# ScanNet +sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m2-0-spunet-csc -n pretrain-msc-v1m2-0-spunet-csc +``` +3. Fine-tuning refer [MSC](#masked-scene-contrast-msc). + +## Acknowledgement +_Pointcept_ is designed by [Xiaoyang](https://xywu.me/), named by [Yixing](https://github.com/yxlao) and the logo is created by [Yuechen](https://julianjuaner.github.io/). It is derived from [Hengshuang](https://hszhao.github.io/)'s [Semseg](https://github.com/hszhao/semseg) and inspirited by several repos, e.g., [MinkowskiEngine](https://github.com/NVIDIA/MinkowskiEngine), [pointnet2](https://github.com/charlesq34/pointnet2), [mmcv](https://github.com/open-mmlab/mmcv/tree/master/mmcv), and [Detectron2](https://github.com/facebookresearch/detectron2). diff --git a/Pointcept/configs/_base_/dataset/scannetpp.py b/Pointcept/configs/_base_/dataset/scannetpp.py new file mode 100644 index 0000000000000000000000000000000000000000..926850c22981b88f2b56f26507a7a1693e00800b --- /dev/null +++ b/Pointcept/configs/_base_/dataset/scannetpp.py @@ -0,0 +1,104 @@ +data = dict( + names=[ + "wall", + "ceiling", + "floor", + "table", + "door", + "ceiling lamp", + "cabinet", + "blinds", + "curtain", + "chair", + "storage cabinet", + "office chair", + "bookshelf", + "whiteboard", + "window", + "box", + "window frame", + "monitor", + "shelf", + "doorframe", + "pipe", + "heater", + "kitchen cabinet", + "sofa", + "windowsill", + "bed", + "shower wall", + "trash can", + "book", + "plant", + "blanket", + "tv", + "computer tower", + "kitchen counter", + "refrigerator", + "jacket", + "electrical duct", + "sink", + "bag", + "picture", + "pillow", + "towel", + "suitcase", + "backpack", + "crate", + "keyboard", + "rack", + "toilet", + "paper", + "printer", + "poster", + "painting", + "microwave", + "board", + "shoes", + "socket", + "bottle", + "bucket", + "cushion", + "basket", + "shoe rack", + "telephone", + "file folder", + "cloth", + "blind rail", + "laptop", + "plant pot", + "exhaust fan", + "cup", + "coat hanger", + "light switch", + "speaker", + "table lamp", + "air vent", + "clothes hanger", + "kettle", + "smoke detector", + "container", + "power strip", + "slippers", + "paper bag", + "mouse", + "cutting board", + "toilet paper", + "paper towel", + "pot", + "clock", + "pan", + "tap", + "jar", + "soap dispenser", + "binder", + "bowl", + "tissue box", + "whiteboard eraser", + "toilet brush", + "spray bottle", + "headphones", + "stapler", + "marker", + ] +) diff --git a/Pointcept/configs/_base_/default_runtime.py b/Pointcept/configs/_base_/default_runtime.py new file mode 100644 index 0000000000000000000000000000000000000000..1ec8bf179f3c462dd80e58dcd70debcbd775f5d2 --- /dev/null +++ b/Pointcept/configs/_base_/default_runtime.py @@ -0,0 +1,39 @@ +weight = None # path to model weight +resume = False # whether to resume training process +evaluate = True # evaluate after each epoch training process +test_only = False # test process + +seed = None # train process will init a random seed and record +save_path = "exp/default" +num_worker = 16 # total worker in all gpu +batch_size = 16 # total batch size in all gpu +batch_size_val = None # auto adapt to bs 1 for each gpu +batch_size_test = None # auto adapt to bs 1 for each gpu +epoch = 100 # total epoch, data loop = epoch // eval_epoch +eval_epoch = 100 # sche total eval & checkpoint epoch +clip_grad = None # disable with None, enable with a float + +sync_bn = False +enable_amp = False +empty_cache = False +empty_cache_per_epoch = False +find_unused_parameters = False + +mix_prob = 0 +param_dicts = None # example: param_dicts = [dict(keyword="block", lr_scale=0.1)] + +# hook +hooks = [ + dict(type="CheckpointLoader"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + dict(type="CheckpointSaver", save_freq=None), + dict(type="PreciseEvaluator", test_last=False), +] + +# Trainer +train = dict(type="DefaultTrainer") + +# Tester +test = dict(type="SemSegTester", verbose=True) diff --git a/Pointcept/configs/matterport3d/semseg-pt-v3m1-0-base.py b/Pointcept/configs/matterport3d/semseg-pt-v3m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..1559d97a2696fb7c9a5f6e2ec75238445ed13eb2 --- /dev/null +++ b/Pointcept/configs/matterport3d/semseg-pt-v3m1-0-base.py @@ -0,0 +1,313 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=21, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "DefaultDataset" +data_root = "data/matterport3d" + +data = dict( + num_classes=21, + ignore_index=-1, + names=( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refrigerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "other", + "ceiling", + ), + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/matterport3d/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/matterport3d/semseg-spunet-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..ef0305cd78a7fb58c029b4b69f2cfb48cc0d6648 --- /dev/null +++ b/Pointcept/configs/matterport3d/semseg-spunet-v1m1-0-base.py @@ -0,0 +1,282 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=21, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "DefaultDataset" +data_root = "data/matterport3d" + +data = dict( + num_classes=21, + ignore_index=-1, + names=( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refrigerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "other", + "ceiling", + ), + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/modelnet40/cls-ptv3-v1m1-0-base.py b/Pointcept/configs/modelnet40/cls-ptv3-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..235a5567e5382e297cb285af9d1ceb2c82a20e9b --- /dev/null +++ b/Pointcept/configs/modelnet40/cls-ptv3-v1m1-0-base.py @@ -0,0 +1,232 @@ +_base_ = ["../_base_/default_runtime.py"] +# misc custom setting +batch_size = 32 # bs: total bs in all gpus +num_worker = 16 +batch_size_val = 8 +empty_cache = False +enable_amp = False + +# model settings +model = dict( + type="DefaultClassifier", + num_classes=40, + backbone_embed_dim=512, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=True, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 300 +# optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +# scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1) +optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.01) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.001, 0.0001], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0001)] + +# dataset settings +dataset_type = "ModelNetDataset" +data_root = "data/modelnet40_normal_resampled" +cache_data = False +class_names = [ + "airplane", + "bathtub", + "bed", + "bench", + "bookshelf", + "bottle", + "bowl", + "car", + "chair", + "cone", + "cup", + "curtain", + "desk", + "door", + "dresser", + "flower_pot", + "glass_box", + "guitar", + "keyboard", + "lamp", + "laptop", + "mantel", + "monitor", + "night_stand", + "person", + "piano", + "plant", + "radio", + "range_hood", + "sink", + "sofa", + "stairs", + "stool", + "table", + "tent", + "toilet", + "tv_stand", + "vase", + "wardrobe", + "xbox", +] + +data = dict( + num_classes=40, + ignore_index=-1, + names=class_names, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + class_names=class_names, + transform=[ + dict(type="NormalizeCoord"), + # dict(type="CenterShift", apply_z=True), + # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1/24, 1/24], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/24, 1/24], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.7, 1.5], anisotropic=True), + dict(type="RandomShift", shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))), + # dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + keys=("coord", "normal"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=10000, mode="random"), + # dict(type="CenterShift", apply_z=True), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "category"), + feat_keys=["coord", "normal"], + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="test", + data_root=data_root, + class_names=class_names, + transform=[ + dict(type="NormalizeCoord"), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + keys=("coord", "normal"), + return_grid_coord=True, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "category"), + feat_keys=["coord", "normal"], + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="test", + data_root=data_root, + class_names=class_names, + transform=[ + dict(type="NormalizeCoord"), + ], + test_mode=True, + test_cfg=dict( + post_transform=[ + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + keys=("coord", "normal"), + return_grid_coord=True, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord"), + feat_keys=["coord", "normal"], + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[1, 1], anisotropic=True)], # 1 + [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 2 + [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 3 + [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 4 + [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 5 + [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 5 + [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 6 + [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 7 + [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 8 + [dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 9 + ], + ), + ), +) + +# hooks +hooks = [ + dict(type="CheckpointLoader"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="ClsEvaluator"), + dict(type="CheckpointSaver", save_freq=None), + dict(type="PreciseEvaluator", test_last=False), +] + +# tester +test = dict(type="ClsVotingTester", num_repeat=100) diff --git a/Pointcept/configs/modelnet40/cls-spunet-v1m1-0-base.py b/Pointcept/configs/modelnet40/cls-spunet-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..8e6585af547d3658093f5203468c2b3c12108f67 --- /dev/null +++ b/Pointcept/configs/modelnet40/cls-spunet-v1m1-0-base.py @@ -0,0 +1,176 @@ +_base_ = ["../_base_/default_runtime.py"] +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +# batch_size_val = 8 +empty_cache = False +enable_amp = False + +# model settings +model = dict( + type="DefaultClassifier", + num_classes=40, + backbone_embed_dim=256, + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=0, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=True, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 200 +optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1) + +# dataset settings +dataset_type = "ModelNetDataset" +data_root = "data/modelnet40_normal_resampled" +cache_data = False +class_names = [ + "airplane", + "bathtub", + "bed", + "bench", + "bookshelf", + "bottle", + "bowl", + "car", + "chair", + "cone", + "cup", + "curtain", + "desk", + "door", + "dresser", + "flower_pot", + "glass_box", + "guitar", + "keyboard", + "lamp", + "laptop", + "mantel", + "monitor", + "night_stand", + "person", + "piano", + "plant", + "radio", + "range_hood", + "sink", + "sofa", + "stairs", + "stool", + "table", + "tent", + "toilet", + "tv_stand", + "vase", + "wardrobe", + "xbox", +] + +data = dict( + num_classes=40, + ignore_index=-1, + names=class_names, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + class_names=class_names, + transform=[ + dict(type="NormalizeCoord"), + # dict(type="CenterShift", apply_z=True), + # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1/24, 1/24], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/24, 1/24], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + dict(type="RandomShift", shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))), + # dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + keys=("coord", "normal"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=10000, mode="random"), + # dict(type="CenterShift", apply_z=True), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "category"), + feat_keys=["coord", "normal"], + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="test", + data_root=data_root, + class_names=class_names, + transform=[ + dict(type="NormalizeCoord"), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + keys=("coord", "normal"), + return_grid_coord=True, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "category"), + feat_keys=["coord", "normal"], + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="test", + data_root=data_root, + class_names=class_names, + transform=[ + dict(type="NormalizeCoord"), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + keys=("coord", "normal"), + return_grid_coord=True, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "category"), + feat_keys=["coord", "normal"], + ), + ], + test_mode=True, + ), +) + +# hooks +hooks = [ + dict(type="CheckpointLoader"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="ClsEvaluator"), + dict(type="CheckpointSaver", save_freq=None), +] + +# tester +test = dict(type="ClsTester") diff --git a/Pointcept/configs/nuscenes/semseg-ppt-v1m1-0-nu-sk-wa-spunet.py b/Pointcept/configs/nuscenes/semseg-ppt-v1m1-0-nu-sk-wa-spunet.py new file mode 100644 index 0000000000000000000000000000000000000000..ed82be25301d2ac9650147cc68ebd7a2aa9534be --- /dev/null +++ b/Pointcept/configs/nuscenes/semseg-ppt-v1m1-0-nu-sk-wa-spunet.py @@ -0,0 +1,342 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +find_unused_parameters = True + +# trainer +train = dict( + type="MultiDatasetTrainer", +) + +# model settings +model = dict( + type="PPT-v1m1", + backbone=dict( + type="SpUNet-v1m3", + in_channels=4, + num_classes=0, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("SemanticKITTI", "nuScenes", "Waymo"), + zero_init=False, + norm_decouple=True, + norm_adaptive=False, + norm_affine=True, + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + backbone_out_channels=96, + context_channels=256, + conditions=("SemanticKITTI", "nuScenes", "Waymo"), + template="[x]", + clip_model="ViT-B/16", + # fmt: off + class_name=( + # SemanticKITTI + "car", "bicycle", "motorcycle", "truck", "other vehicle", + "person", "person who rides a bicycle", "person who rides a motorcycle", "road", "parking", + "path for pedestrians at the side of a road", "other ground", "building", "fence", "vegetation", + "trunk", "terrain", "pole", "traffic sign", + # nuScenes + "barrier", "bicycle", "bus", "car", "construction vehicle", + "motorcycle", "pedestrian", "traffic cone", "trailer", "truck", + "path suitable or safe for driving", "other flat", "sidewalk", "terrain", "man made", "vegetation", + # waymo + "car", "truck", "bus", "other vehicle", "person who rides a motorcycle", + "person who rides a bicycle", "pedestrian", "sign", "traffic light", "pole", + "construction cone", "bicycle", "motorcycle", "building", "vegetation", + "tree trunk", "curb", "road", "lane marker", "other ground", "horizontal surface that can not drive", + "surface when pedestrians most likely to walk on", + ), + valid_index=( + [i for i in range(19)], + [i for i in range(19, 19 + 16)], + [i for i in range(19 + 16, 19 + 16 + 22)], + ), + # fmt: on + backbone_mode=False, +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) +# param_dicts = [dict(keyword="modulation", lr=0.0002)] + +# dataset settings +data = dict( + num_classes=16, + ignore_index=-1, + names=[ + "barrier", + "bicycle", + "bus", + "car", + "construction_vehicle", + "motorcycle", + "pedestrian", + "traffic_cone", + "trailer", + "truck", + "driveable_surface", + "other_flat", + "sidewalk", + "terrain", + "manmade", + "vegetation", + ], + train=dict( + type="ConcatDataset", + datasets=[ + # nuScenes + dict( + type="NuScenesDataset", + split="train", + data_root="data/nuscenes", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5), + dict( + type="PointClip", + point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "nuScenes"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + # SemanticKITTI + dict( + type="SemanticKITTIDataset", + split="train", + data_root="data/semantic_kitti", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict( + type="PointClip", + point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "SemanticKITTI"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + # Waymo + dict( + type="WaymoDataset", + split="training", + data_root="data/waymo", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict( + type="PointClip", + point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "Waymo"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + ], + ), + val=dict( + type="NuScenesDataset", + split="val", + data_root="data/nuscenes", + transform=[ + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + dict(type="Add", keys_dict={"condition": "nuScenes"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + ), + test=dict( + type="NuScenesDataset", + split="val", + data_root="data/nuscenes", + transform=[ + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.025, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_inverse=True, + ), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict(type="Add", keys_dict={"condition": "nuScenes"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ignore_index=-1, + ), +) diff --git a/Pointcept/configs/nuscenes/semseg-ppt-v1m2-0-nu-sk-wa-spunet.py b/Pointcept/configs/nuscenes/semseg-ppt-v1m2-0-nu-sk-wa-spunet.py new file mode 100644 index 0000000000000000000000000000000000000000..bf0aba527a41fc745f24dbd5913a14a7698834f5 --- /dev/null +++ b/Pointcept/configs/nuscenes/semseg-ppt-v1m2-0-nu-sk-wa-spunet.py @@ -0,0 +1,316 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +find_unused_parameters = True + +# trainer +train = dict( + type="MultiDatasetTrainer", +) + +# model settings +model = dict( + type="PPT-v1m2", + backbone=dict( + type="SpUNet-v1m3", + in_channels=4, + num_classes=0, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("SemanticKITTI", "nuScenes", "Waymo"), + zero_init=False, + norm_decouple=True, + norm_adaptive=False, + norm_affine=True, + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + backbone_out_channels=96, + context_channels=256, + conditions=("SemanticKITTI", "nuScenes", "Waymo"), + num_classes=(19, 16, 22), +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) +# param_dicts = [dict(keyword="modulation", lr=0.0002)] + +# dataset settings +data = dict( + num_classes=16, + ignore_index=-1, + names=[ + "barrier", + "bicycle", + "bus", + "car", + "construction_vehicle", + "motorcycle", + "pedestrian", + "traffic_cone", + "trailer", + "truck", + "driveable_surface", + "other_flat", + "sidewalk", + "terrain", + "manmade", + "vegetation", + ], + train=dict( + type="ConcatDataset", + datasets=[ + # nuScenes + dict( + type="NuScenesDataset", + split="train", + data_root="data/nuscenes", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5), + dict( + type="PointClip", + point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "nuScenes"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + # SemanticKITTI + dict( + type="SemanticKITTIDataset", + split="train", + data_root="data/semantic_kitti", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict( + type="PointClip", + point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "SemanticKITTI"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + # Waymo + dict( + type="WaymoDataset", + split="training", + data_root="data/waymo", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict( + type="PointClip", + point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "Waymo"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + ], + ), + val=dict( + type="NuScenesDataset", + split="val", + data_root="data/nuscenes", + transform=[ + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + dict(type="Add", keys_dict={"condition": "nuScenes"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + ), + test=dict( + type="NuScenesDataset", + split="val", + data_root="data/nuscenes", + transform=[ + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.025, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_inverse=True, + ), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict(type="Add", keys_dict={"condition": "nuScenes"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ignore_index=-1, + ), +) diff --git a/Pointcept/configs/nuscenes/semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit.py b/Pointcept/configs/nuscenes/semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit.py new file mode 100644 index 0000000000000000000000000000000000000000..d8f254757995c51401643a7db1e9c48455b4fefb --- /dev/null +++ b/Pointcept/configs/nuscenes/semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit.py @@ -0,0 +1,292 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +find_unused_parameters = True +evaluate = False + +# trainer +train = dict( + type="MultiDatasetTrainer", +) + +# model settings +model = dict( + type="PPT-v1m2", + backbone=dict( + type="SpUNet-v1m3", + in_channels=4, + num_classes=0, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("SemanticKITTI", "nuScenes", "Waymo"), + zero_init=False, + norm_decouple=True, + norm_adaptive=False, + norm_affine=True, + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + backbone_out_channels=96, + context_channels=256, + conditions=("SemanticKITTI", "nuScenes", "Waymo"), + num_classes=(19, 16, 22), +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) +# param_dicts = [dict(keyword="modulation", lr=0.0002)] + +# dataset settings +data = dict( + num_classes=16, + ignore_index=-1, + names=[ + "barrier", + "bicycle", + "bus", + "car", + "construction_vehicle", + "motorcycle", + "pedestrian", + "traffic_cone", + "trailer", + "truck", + "driveable_surface", + "other_flat", + "sidewalk", + "terrain", + "manmade", + "vegetation", + ], + train=dict( + type="ConcatDataset", + datasets=[ + # nuScenes + dict( + type="NuScenesDataset", + split=["train", "val"], + data_root="data/nuscenes", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5), + dict( + type="PointClip", + point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "nuScenes"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + # SemanticKITTI + dict( + type="SemanticKITTIDataset", + split=["train", "val"], + data_root="data/semantic_kitti", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict( + type="PointClip", + point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "SemanticKITTI"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + # Waymo + dict( + type="WaymoDataset", + split=["training", "validation"], + data_root="data/waymo", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict( + type="PointClip", + point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "Waymo"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + ], + ), + test=dict( + type="NuScenesDataset", + split="test", + data_root="data/nuscenes", + transform=[ + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.025, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_inverse=True, + ), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict(type="Add", keys_dict={"condition": "nuScenes"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ignore_index=-1, + ), +) diff --git a/Pointcept/configs/nuscenes/semseg-pt-v2m2-0-base.py b/Pointcept/configs/nuscenes/semseg-pt-v2m2-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..0ce53d7d872e69bd2e66614124f6d4e19a6fdc02 --- /dev/null +++ b/Pointcept/configs/nuscenes/semseg-pt-v2m2-0-base.py @@ -0,0 +1,174 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=4, + num_classes=16, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.15, 0.375, 0.9375, 2.34375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) + +# dataset settings +dataset_type = "NuScenesDataset" +data_root = "data/nuscenes" +ignore_index = -1 +names = [ + "barrier", + "bicycle", + "bus", + "car", + "construction_vehicle", + "motorcycle", + "pedestrian", + "traffic_cone", + "trailer", + "truck", + "driveable_surface", + "other_flat", + "sidewalk", + "terrain", + "manmade", + "vegetation", +] + +data = dict( + num_classes=16, + ignore_index=ignore_index, + names=names, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + # dict(type="GridSample", grid_size=0.05, hash_type="fnv", mode="train", + # keys=("coord", "strength", "segment"), return_grid_coord=True), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + # dict(type="PointClip", point_cloud_range=(-51.2, -51.2, -4, 51.2, 51.2, 2.4)), + # dict(type="GridSample", grid_size=0.05, hash_type="fnv", mode="train", + # keys=("coord", "strength", "segment"), return_grid_coord=True), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[], + test_mode=True, + test_cfg=dict( + voxelize=None, + crop=None, + post_transform=[ + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ignore_index=ignore_index, + ), +) diff --git a/Pointcept/configs/nuscenes/semseg-pt-v2m2-1-benchmark-submit.py b/Pointcept/configs/nuscenes/semseg-pt-v2m2-1-benchmark-submit.py new file mode 100644 index 0000000000000000000000000000000000000000..19f7e7512d4f809704be97ee64653c1d852aafff --- /dev/null +++ b/Pointcept/configs/nuscenes/semseg-pt-v2m2-1-benchmark-submit.py @@ -0,0 +1,157 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True +evaluate = False + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=4, + num_classes=16, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.15, 0.375, 0.9375, 2.34375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) + +# dataset settings +dataset_type = "NuScenesDataset" +data_root = "data/nuscenes" +ignore_index = -1 +names = [ + "barrier", + "bicycle", + "bus", + "car", + "construction_vehicle", + "motorcycle", + "pedestrian", + "traffic_cone", + "trailer", + "truck", + "driveable_surface", + "other_flat", + "sidewalk", + "terrain", + "manmade", + "vegetation", +] + +data = dict( + num_classes=16, + ignore_index=ignore_index, + names=names, + train=dict( + type=dataset_type, + split=["train", "val"], + data_root=data_root, + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + # dict(type="GridSample", grid_size=0.05, hash_type="fnv", mode="train", + # keys=("coord", "strength", "segment"), return_grid_coord=True), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + test=dict( + type=dataset_type, + split="test", + data_root=data_root, + transform=[], + test_mode=True, + test_cfg=dict( + voxelize=None, + crop=None, + post_transform=[ + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ignore_index=ignore_index, + ), +) diff --git a/Pointcept/configs/nuscenes/semseg-pt-v3m1-0-base.py b/Pointcept/configs/nuscenes/semseg-pt-v3m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..4f64b9e67dedcf0cd1a7f950d2d6677dce0aa088 --- /dev/null +++ b/Pointcept/configs/nuscenes/semseg-pt-v3m1-0-base.py @@ -0,0 +1,215 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=16, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=4, + order=["z", "z-trans", "hilbert", "hilbert-trans"], + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("nuScenes", "SemanticKITTI", "Waymo"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.002, 0.0002], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) +param_dicts = [dict(keyword="block", lr=0.0002)] + +# dataset settings +dataset_type = "NuScenesDataset" +data_root = "data/nuscenes" +ignore_index = -1 +names = [ + "barrier", + "bicycle", + "bus", + "car", + "construction_vehicle", + "motorcycle", + "pedestrian", + "traffic_cone", + "trailer", + "truck", + "driveable_surface", + "other_flat", + "sidewalk", + "terrain", + "manmade", + "vegetation", +] + +data = dict( + num_classes=16, + ignore_index=ignore_index, + names=names, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + # dict(type="PointClip", point_cloud_range=(-51.2, -51.2, -4, 51.2, 51.2, 2.4)), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode='center'), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.025, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_inverse=True, + ), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ignore_index=ignore_index, + ), +) diff --git a/Pointcept/configs/nuscenes/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/nuscenes/semseg-spunet-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..d6b6a126086a210335b27953a2e620bc74e56503 --- /dev/null +++ b/Pointcept/configs/nuscenes/semseg-spunet-v1m1-0-base.py @@ -0,0 +1,183 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=4, + num_classes=16, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) + +# dataset settings +dataset_type = "NuScenesDataset" +data_root = "data/nuscenes" +ignore_index = -1 +names = [ + "barrier", + "bicycle", + "bus", + "car", + "construction_vehicle", + "motorcycle", + "pedestrian", + "traffic_cone", + "trailer", + "truck", + "driveable_surface", + "other_flat", + "sidewalk", + "terrain", + "manmade", + "vegetation", +] + +data = dict( + num_classes=16, + ignore_index=ignore_index, + names=names, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + # dict(type="PointClip", point_cloud_range=(-51.2, -51.2, -4, 51.2, 51.2, 2.4)), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode='center'), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.025, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_inverse=True, + ), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ignore_index=ignore_index, + ), +) diff --git a/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02-sc-aug.py b/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02-sc-aug.py new file mode 100644 index 0000000000000000000000000000000000000000..2cb44ce8269da20036a4b2b7e61109bb97529709 --- /dev/null +++ b/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02-sc-aug.py @@ -0,0 +1,180 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 12 +mix_prob = 0.0 +empty_cache = False +enable_amp = True +evaluate = True + +class_names = [ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", +] +num_classes = 13 +segment_ignore_index = (-1,) + +# model settings +model = dict( + type="PG-v1m1", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=0, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + backbone_out_channels=96, + semantic_num_classes=num_classes, + semantic_ignore_index=-1, + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + cluster_thresh=1.5, + cluster_closed_points=300, + cluster_propose_points=100, + cluster_min_points=50, +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict(type="PolyLR") + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=num_classes, + ignore_index=-1, + names=class_names, + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.1), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "instance_centroid", + "bbox", + ), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={ + "coord": "origin_coord", + "segment": "origin_segment", + "instance": "origin_instance", + }, + ), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + # dict(type="SphereCrop", point_max=1000000, mode='center'), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "origin_coord", + "origin_segment", + "origin_instance", + "instance_centroid", + "bbox", + ), + feat_keys=("color", "normal"), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + ), + ], + test_mode=False, + ), + test=dict(), # currently not available +) + +hooks = [ + dict(type="CheckpointLoader", keywords="module.", replacement="module."), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict( + type="InsSegEvaluator", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="CheckpointSaver", save_freq=None), +] diff --git a/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02.py b/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02.py new file mode 100644 index 0000000000000000000000000000000000000000..826d3731ac6662e03a956daa1f213f03b5c1984a --- /dev/null +++ b/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base-vs0p02.py @@ -0,0 +1,180 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 12 +mix_prob = 0.0 +empty_cache = False +enable_amp = True +evaluate = True + +class_names = [ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", +] +num_classes = 13 +segment_ignore_index = (-1,) + +# model settings +model = dict( + type="PG-v1m1", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=0, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + backbone_out_channels=96, + semantic_num_classes=num_classes, + semantic_ignore_index=-1, + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + cluster_thresh=1.5, + cluster_closed_points=300, + cluster_propose_points=100, + cluster_min_points=50, +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict(type="PolyLR") + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=num_classes, + ignore_index=-1, + names=class_names, + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.005), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "instance_centroid", + "bbox", + ), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={ + "coord": "origin_coord", + "segment": "origin_segment", + "instance": "origin_instance", + }, + ), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + # dict(type="SphereCrop", point_max=1000000, mode='center'), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "origin_coord", + "origin_segment", + "origin_instance", + "instance_centroid", + "bbox", + ), + feat_keys=("color", "normal"), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + ), + ], + test_mode=False, + ), + test=dict(), # currently not available +) + +hooks = [ + dict(type="CheckpointLoader", keywords="module.", replacement="module."), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict( + type="InsSegEvaluator", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="CheckpointSaver", save_freq=None), +] diff --git a/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base.py b/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base.py new file mode 100644 index 0000000000000000000000000000000000000000..3ce06b51d1a566c19305e486a614b73d4594bc58 --- /dev/null +++ b/Pointcept/configs/s3dis/insseg-pointgroup-v1m1-0-spunet-base.py @@ -0,0 +1,181 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 12 +mix_prob = 0.0 +empty_cache = False +enable_amp = True +evaluate = True + +class_names = [ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", +] +num_classes = 13 +segment_ignore_index = (-1,) + +# model settings +model = dict( + type="PG-v1m1", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=0, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + backbone_out_channels=96, + semantic_num_classes=num_classes, + semantic_ignore_index=-1, + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + cluster_thresh=1.5, + cluster_closed_points=300, + cluster_propose_points=100, + cluster_min_points=50, + voxel_size=0.05, +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict(type="PolyLR") + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=num_classes, + ignore_index=-1, + names=class_names, + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.005), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "instance_centroid", + "bbox", + ), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={ + "coord": "origin_coord", + "segment": "origin_segment", + "instance": "origin_instance", + }, + ), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + # dict(type="SphereCrop", point_max=1000000, mode='center'), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "origin_coord", + "origin_segment", + "origin_instance", + "instance_centroid", + "bbox", + ), + feat_keys=("color", "normal"), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + ), + ], + test_mode=False, + ), + test=dict(), # currently not available +) + +hooks = [ + dict(type="CheckpointLoader", keywords="module.", replacement="module."), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict( + type="InsSegEvaluator", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="CheckpointSaver", save_freq=None), +] diff --git a/Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft-vs0p05.py b/Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft-vs0p05.py new file mode 100644 index 0000000000000000000000000000000000000000..b1f5d0dbf5c4363eabb7f017422d005dc6ff1b57 --- /dev/null +++ b/Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft-vs0p05.py @@ -0,0 +1,273 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = True +evaluate = True +find_unused_parameters = True + +class_names = [ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", +] +num_classes = 13 +segment_ignore_index = (-1,) + +# model settings +model = dict( + type="PG-v1m1", + backbone=dict( + type="PPT-v1m1", + backbone=dict( + type="SpUNet-v1m3", + in_channels=6, + num_classes=0, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("ScanNet", "S3DIS", "Structured3D"), + zero_init=False, + norm_decouple=True, + norm_adaptive=True, + norm_affine=True, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], + backbone_out_channels=96, + context_channels=256, + conditions=("Structured3D", "ScanNet", "S3DIS"), + template="[x]", + clip_model="ViT-B/16", + class_name=( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "bookcase", + "picture", + "counter", + "desk", + "shelves", + "curtain", + "dresser", + "pillow", + "mirror", + "ceiling", + "refrigerator", + "television", + "shower curtain", + "nightstand", + "toilet", + "sink", + "lamp", + "bathtub", + "garbagebin", + "board", + "beam", + "column", + "clutter", + "otherstructure", + "otherfurniture", + "otherprop", + ), + valid_index=( + ( + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 11, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 23, + 25, + 26, + 33, + 34, + 35, + ), + (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34), + (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32), + ), + backbone_mode=True, + ), + backbone_out_channels=96, + semantic_num_classes=num_classes, + semantic_ignore_index=-1, + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + cluster_thresh=1.5, + cluster_closed_points=300, + cluster_propose_points=100, + cluster_min_points=50, + voxel_size=0.05, +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict(type="PolyLR") + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=num_classes, + ignore_index=-1, + names=class_names, + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.005), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="Add", keys_dict={"condition": "S3DIS"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "instance_centroid", + "bbox", + "condition", + ), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={ + "coord": "origin_coord", + "segment": "origin_segment", + "instance": "origin_instance", + }, + ), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + # dict(type="SphereCrop", point_max=1000000, mode='center'), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="Add", keys_dict={"condition": "S3DIS"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "origin_coord", + "origin_segment", + "origin_instance", + "instance_centroid", + "bbox", + "condition", + ), + feat_keys=("color", "normal"), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + ), + ], + test_mode=False, + ), + test=dict(), # currently not available +) + +hooks = [ + dict(type="CheckpointLoader", keywords="module.", replacement="module.backbone."), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict( + type="InsSegEvaluator", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="CheckpointSaver", save_freq=None), +] diff --git a/Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py b/Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py new file mode 100644 index 0000000000000000000000000000000000000000..ca4aa554cedd24771d12b645241773757a0ef253 --- /dev/null +++ b/Pointcept/configs/s3dis/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py @@ -0,0 +1,273 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = True +evaluate = True +find_unused_parameters = True + +class_names = [ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", +] +num_classes = 13 +segment_ignore_index = (-1,) + +# model settings +model = dict( + type="PG-v1m1", + backbone=dict( + type="PPT-v1m1", + backbone=dict( + type="SpUNet-v1m3", + in_channels=6, + num_classes=0, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("ScanNet", "S3DIS", "Structured3D"), + zero_init=False, + norm_decouple=True, + norm_adaptive=True, + norm_affine=True, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], + backbone_out_channels=96, + context_channels=256, + conditions=("Structured3D", "ScanNet", "S3DIS"), + template="[x]", + clip_model="ViT-B/16", + class_name=( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "bookcase", + "picture", + "counter", + "desk", + "shelves", + "curtain", + "dresser", + "pillow", + "mirror", + "ceiling", + "refrigerator", + "television", + "shower curtain", + "nightstand", + "toilet", + "sink", + "lamp", + "bathtub", + "garbagebin", + "board", + "beam", + "column", + "clutter", + "otherstructure", + "otherfurniture", + "otherprop", + ), + valid_index=( + ( + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 11, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 23, + 25, + 26, + 33, + 34, + 35, + ), + (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34), + (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32), + ), + backbone_mode=True, + ), + backbone_out_channels=96, + semantic_num_classes=num_classes, + semantic_ignore_index=-1, + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + cluster_thresh=1.5, + cluster_closed_points=300, + cluster_propose_points=100, + cluster_min_points=50, + voxel_size=0.02, +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict(type="PolyLR") + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=num_classes, + ignore_index=-1, + names=class_names, + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.005), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="Add", keys_dict={"condition": "S3DIS"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "instance_centroid", + "bbox", + "condition", + ), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={ + "coord": "origin_coord", + "segment": "origin_segment", + "instance": "origin_instance", + }, + ), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + # dict(type="SphereCrop", point_max=1000000, mode='center'), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="Add", keys_dict={"condition": "S3DIS"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "origin_coord", + "origin_segment", + "origin_instance", + "instance_centroid", + "bbox", + "condition", + ), + feat_keys=("color", "normal"), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + ), + ], + test_mode=False, + ), + test=dict(), # currently not available +) + +hooks = [ + dict(type="CheckpointLoader", keywords="module.", replacement="module.backbone."), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict( + type="InsSegEvaluator", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="CheckpointSaver", save_freq=None), +] diff --git a/Pointcept/configs/s3dis/semseg-minkunet34c-0-base.py b/Pointcept/configs/s3dis/semseg-minkunet34c-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..8234bb4b52de86edb05b540ee7250fdf53a7d02e --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-minkunet34c-0-base.py @@ -0,0 +1,174 @@ +_base_ = ["../_base_/default_runtime.py"] +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict(type="MinkUNet34C", in_channels=6, out_channels=13), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict(type="PolyLR") + + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=["coord", "color"], + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={"coord": "origin_coord", "segment": "origin_segment"}, + ), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "origin_coord", + "segment", + "origin_segment", + ), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + feat_keys=["coord", "color"], + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + keys=("coord", "color"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ), +) diff --git a/Pointcept/configs/s3dis/semseg-ppt-v1m1-0-s3-sc-st-spunet.py b/Pointcept/configs/s3dis/semseg-ppt-v1m1-0-s3-sc-st-spunet.py new file mode 100644 index 0000000000000000000000000000000000000000..e50ebbf10a1989342f39a476c1c3348671a78e95 --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-ppt-v1m1-0-s3-sc-st-spunet.py @@ -0,0 +1,496 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 24 # bs: total bs in all gpus +num_worker = 48 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +find_unused_parameters = True + +# trainer +train = dict( + type="MultiDatasetTrainer", +) + +# model settings +model = dict( + type="PPT-v1m1", + backbone=dict( + type="SpUNet-v1m3", + in_channels=6, + num_classes=0, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("ScanNet", "S3DIS", "Structured3D"), + zero_init=False, + norm_decouple=True, + norm_adaptive=True, + norm_affine=True, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], + backbone_out_channels=96, + context_channels=256, + conditions=("Structured3D", "ScanNet", "S3DIS"), + template="[x]", + clip_model="ViT-B/16", + class_name=( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "bookcase", + "picture", + "counter", + "desk", + "shelves", + "curtain", + "dresser", + "pillow", + "mirror", + "ceiling", + "refrigerator", + "television", + "shower curtain", + "nightstand", + "toilet", + "sink", + "lamp", + "bathtub", + "garbagebin", + "board", + "beam", + "column", + "clutter", + "otherstructure", + "otherfurniture", + "otherprop", + ), + valid_index=( + ( + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 11, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 23, + 25, + 26, + 33, + 34, + 35, + ), + (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34), + (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32), + ), + backbone_mode=False, +) + +# scheduler settings +epoch = 100 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) +# param_dicts = [dict(keyword="modulation", lr=0.005)] + +# dataset settings +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type="ConcatDataset", + datasets=[ + # Structured3D + dict( + type="Structured3DDataset", + split="train", + data_root="data/structured3d", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "Structured3D"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=4, # sampling weight + ), + # ScanNet + dict( + type="ScanNetDataset", + split="train", + data_root="data/scannet", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=2, # sampling weight + ), + # S3DIS + dict( + type="S3DISDataset", + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root="data/s3dis", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.6, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "S3DIS"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=1, # sampling weight + ), + ], + ), + val=dict( + type="S3DISDataset", + split="Area_5", + data_root="data/s3dis", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict(type="Add", keys_dict={"condition": "S3DIS"}), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type="S3DISDataset", + split="Area_5", + data_root="data/s3dis", + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "S3DIS"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/s3dis/semseg-pt-v1-0-base.py b/Pointcept/configs/s3dis/semseg-pt-v1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..a925757abee6d3000ee4c3613e5b3bc49dbe7dc6 --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-pt-v1-0-base.py @@ -0,0 +1,170 @@ +_base_ = ["../_base_/default_runtime.py"] +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PointTransformer-Seg50", + in_channels=6, + num_classes=13, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + + +# scheduler settings +epoch = 3000 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1) + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=["coord", "color"], + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={"coord": "origin_coord", "segment": "origin_segment"}, + ), + dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + offset_keys_dict=dict(offset="coord"), + feat_keys=["coord", "color"], + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="test", + keys=("coord", "color"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ), +) diff --git a/Pointcept/configs/s3dis/semseg-pt-v2m1-0-base.py b/Pointcept/configs/s3dis/semseg-pt-v2m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..3aca26a731096c1a1af9a34085c11184e6061d63 --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-pt-v2m1-0-base.py @@ -0,0 +1,189 @@ +_base_ = ["../_base_/default_runtime.py"] +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = False + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m1", + in_channels=6, + num_classes=13, + patch_embed_depth=2, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=16, + enc_depths=(2, 6, 2), + enc_channels=(96, 192, 384), + enc_groups=(12, 24, 48), + enc_neighbours=(16, 16, 16), + dec_depths=(1, 1, 1), + dec_channels=(48, 96, 192), + dec_groups=(6, 12, 24), + dec_neighbours=(16, 16, 16), + grid_sizes=(0.1, 0.2, 0.4), + attn_qkv_bias=True, + pe_multiplier=True, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="interp", # map / interp + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1) + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=80000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=["coord", "color"], + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={"coord": "origin_coord", "segment": "origin_segment"}, + ), + dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + offset_keys_dict=dict(offset="coord"), + feat_keys=["coord", "color"], + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="test", + keys=("coord", "color"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ), +) diff --git a/Pointcept/configs/s3dis/semseg-pt-v2m2-0-base.py b/Pointcept/configs/s3dis/semseg-pt-v2m2-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..a99cb34fea520e3cb28fbb2e9bfcfd59dea1cdda --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-pt-v2m2-0-base.py @@ -0,0 +1,189 @@ +_base_ = ["../_base_/default_runtime.py"] +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=6, + num_classes=13, + patch_embed_depth=2, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=16, + enc_depths=(2, 6, 2), + enc_channels=(96, 192, 384), + enc_groups=(12, 24, 48), + enc_neighbours=(16, 16, 16), + dec_depths=(1, 1, 1), + dec_channels=(48, 96, 192), + dec_groups=(6, 12, 24), + dec_neighbours=(16, 16, 16), + grid_sizes=(0.1, 0.2, 0.4), + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="interp", # map / interp + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1) + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=80000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=["coord", "color"], + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={"coord": "origin_coord", "segment": "origin_segment"}, + ), + dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + offset_keys_dict=dict(offset="coord"), + feat_keys=["coord", "color"], + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="test", + keys=("coord", "color"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ), +) diff --git a/Pointcept/configs/s3dis/semseg-pt-v2m2-0-lovasz.py b/Pointcept/configs/s3dis/semseg-pt-v2m2-0-lovasz.py new file mode 100644 index 0000000000000000000000000000000000000000..0a8b71a9ae8c676bd2d4f86dfbfa8adc79cf06d3 --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-pt-v2m2-0-lovasz.py @@ -0,0 +1,192 @@ +_base_ = ["../_base_/default_runtime.py"] +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=6, + num_classes=13, + patch_embed_depth=2, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=16, + enc_depths=(2, 6, 2), + enc_channels=(96, 192, 384), + enc_groups=(12, 24, 48), + enc_neighbours=(16, 16, 16), + dec_depths=(1, 1, 1), + dec_channels=(48, 96, 192), + dec_groups=(6, 12, 24), + dec_neighbours=(16, 16, 16), + grid_sizes=(0.1, 0.2, 0.4), + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="interp", # map / interp + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1) + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=80000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=["coord", "color"], + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={"coord": "origin_coord", "segment": "origin_segment"}, + ), + dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + offset_keys_dict=dict(offset="coord"), + feat_keys=["coord", "color"], + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="test", + keys=("coord", "color"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ), +) diff --git a/Pointcept/configs/s3dis/semseg-pt-v2m2-1-one-cycle.py b/Pointcept/configs/s3dis/semseg-pt-v2m2-1-one-cycle.py new file mode 100644 index 0000000000000000000000000000000000000000..e7d2493b92fcc21a00f2fecb270fb289dea58c0d --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-pt-v2m2-1-one-cycle.py @@ -0,0 +1,196 @@ +_base_ = ["../_base_/default_runtime.py"] +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=6, + num_classes=13, + patch_embed_depth=2, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=16, + enc_depths=(2, 6, 2), + enc_channels=(96, 192, 384), + enc_groups=(12, 24, 48), + enc_neighbours=(16, 16, 16), + dec_depths=(1, 1, 1), + dec_channels=(48, 96, 192), + dec_groups=(6, 12, 24), + dec_neighbours=(16, 16, 16), + grid_sizes=(0.1, 0.2, 0.4), + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="interp", # map / interp + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + # dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=80000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=["coord", "color"], + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={"coord": "origin_coord", "segment": "origin_segment"}, + ), + dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + offset_keys_dict=dict(offset="coord"), + feat_keys=["coord", "color"], + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="test", + keys=("coord", "color"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ), +) diff --git a/Pointcept/configs/s3dis/semseg-pt-v3m1-0-base.py b/Pointcept/configs/s3dis/semseg-pt-v3m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..89d34ba0ff00d52373f9cf791afb554e99ef7a57 --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-pt-v3m1-0-base.py @@ -0,0 +1,225 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=13, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.6, mode="random"), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={"coord": "origin_coord", "segment": "origin_segment"}, + ), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "origin_coord", + "segment", + "origin_segment", + ), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1, 1]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ), +) diff --git a/Pointcept/configs/s3dis/semseg-pt-v3m1-1-rpe.py b/Pointcept/configs/s3dis/semseg-pt-v3m1-1-rpe.py new file mode 100644 index 0000000000000000000000000000000000000000..ab612fc5449920103df6ea037588c87833ba73d5 --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-pt-v3m1-1-rpe.py @@ -0,0 +1,225 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=13, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=["z", "z-trans", "hilbert", "hilbert-trans"], + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(128, 128, 128, 128, 128), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(128, 128, 128, 128), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=True, + enable_flash=False, + upcast_attention=True, + upcast_softmax=True, + cls_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.6, mode="random"), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={"coord": "origin_coord", "segment": "origin_segment"}, + ), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "origin_coord", + "segment", + "origin_segment", + ), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1, 1]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ), +) diff --git a/Pointcept/configs/s3dis/semseg-pt-v3m1-2-ppt-extreme.py b/Pointcept/configs/s3dis/semseg-pt-v3m1-2-ppt-extreme.py new file mode 100644 index 0000000000000000000000000000000000000000..e2d892af5b5317eef3b3554c3a49c0c1861a2645 --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-pt-v3m1-2-ppt-extreme.py @@ -0,0 +1,487 @@ +""" +PTv3 + PPT +Pre-trained on ScanNet + Structured3D +(S3DIS is commented by default as a long data time issue of S3DIS: https://github.com/Pointcept/Pointcept/issues/103) +In the original PPT paper, 3 datasets are jointly trained and validated on the three datasets jointly with +one shared weight model. In PTv3, we trained on multi-dataset but only validated on one single dataset to +achieve extreme performance on one single dataset. + +To enable joint training on three datasets, uncomment config for the S3DIS dataset and change the "loop" of + Structured3D and ScanNet to 4 and 2 respectively. +""" + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 24 # bs: total bs in all gpus +num_worker = 48 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +find_unused_parameters = True + +# trainer +train = dict( + type="MultiDatasetTrainer", +) + +# model settings +model = dict( + type="PPT-v1m1", + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=True, + pdnorm_ln=True, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + backbone_out_channels=64, + context_channels=256, + conditions=("Structured3D", "ScanNet", "S3DIS"), + template="[x]", + clip_model="ViT-B/16", + # fmt: off + class_name=( + "wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door", + "window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain", + "dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand", + "toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column", + "clutter", "otherstructure", "otherfurniture", "otherprop", + ), + valid_index=( + (0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35), + (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34), + (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32), + ), + # fmt: on + backbone_mode=False, +) + +# scheduler settings +epoch = 100 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.005, 0.0005], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0005)] + +# dataset settings +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type="ConcatDataset", + datasets=[ + # Structured3D + dict( + type="Structured3DDataset", + split=["train", "val", "test"], + data_root="data/structured3d", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict( + # type="ElasticDistortion", + # distortion_params=[[0.2, 0.4], [0.8, 1.6]], + # ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "Structured3D"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=4, # sampling weight + ), + # ScanNet + dict( + type="ScanNetDataset", + split="train", + data_root="data/scannet", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict( + # type="ElasticDistortion", + # distortion_params=[[0.2, 0.4], [0.8, 1.6]], + # ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=2, # sampling weight + ), + # S3DIS + dict( + type="S3DISDataset", + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root="data/s3dis", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict( + # type="ElasticDistortion", + # distortion_params=[[0.2, 0.4], [0.8, 1.6]], + # ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.6, mode="random"), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "S3DIS"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=1, # sampling weight + ), + ], + ), + val=dict( + type="S3DISDataset", + split="Area_5", + data_root="data/s3dis", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={"coord": "origin_coord", "segment": "origin_segment"}, + ), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict(type="Add", keys_dict={"condition": "S3DIS"}), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "origin_coord", + "segment", + "origin_segment", + "condition", + ), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type="S3DISDataset", + split="Area_5", + data_root="data/s3dis", + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "S3DIS"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/s3dis/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/s3dis/semseg-spunet-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..6545ca151ef59f6ea6e151a90de5e188db7e8ea7 --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-spunet-v1m1-0-base.py @@ -0,0 +1,168 @@ +_base_ = ["../_base_/default_runtime.py"] +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=13, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict(type="PolyLR") + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=["coord", "color"], + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=["coord", "color"], + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + keys=("coord", "color"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ), +) diff --git a/Pointcept/configs/s3dis/semseg-spunet-v1m1-0-cn-base.py b/Pointcept/configs/s3dis/semseg-spunet-v1m1-0-cn-base.py new file mode 100644 index 0000000000000000000000000000000000000000..ee037115e69b6d7086109a31f7046520b42243f8 --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-spunet-v1m1-0-cn-base.py @@ -0,0 +1,181 @@ +# spconv is too fast, data loading speed is bottleneck. Cache data is a better choice. + + +_base_ = ["../_base_/default_runtime.py"] +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=13, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict(type="PolyLR") + + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=["color", "normal"], + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={"coord": "origin_coord", "segment": "origin_segment"}, + ), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "origin_coord", + "segment", + "origin_segment", + ), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + feat_keys=["color", "normal"], + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ), +) diff --git a/Pointcept/configs/s3dis/semseg-spunet-v1m2-0-base.py b/Pointcept/configs/s3dis/semseg-spunet-v1m2-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..98511c5919b5f40271e4d500a30ed0bd42a6d2c9 --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-spunet-v1m2-0-base.py @@ -0,0 +1,184 @@ +# spconv is too fast, data loading speed is bottleneck. Cache data is a better choice. + + +_base_ = ["../_base_/default_runtime.py"] +# misc custom setting +batch_size = 48 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m2", + in_channels=3, + num_classes=13, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + bn_momentum=0.1, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict(type="PolyLR") + + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=["color"], + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={"coord": "origin_coord", "segment": "origin_segment"}, + ), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "color", "segment"), + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "origin_coord", + "segment", + "origin_segment", + ), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + feat_keys=["color"], + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + keys=("coord", "color"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ), +) diff --git a/Pointcept/configs/s3dis/semseg-swin3d-v1m1-0-small.py b/Pointcept/configs/s3dis/semseg-swin3d-v1m1-0-small.py new file mode 100644 index 0000000000000000000000000000000000000000..119775214ad1e5e0f508d166e9aa9edfc987b76b --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-swin3d-v1m1-0-small.py @@ -0,0 +1,184 @@ +_base_ = ["../_base_/default_runtime.py"] +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="Swin3D-v1m1", + in_channels=9, + num_classes=13, + base_grid_size=0.02, + depths=[2, 4, 9, 4, 4], + channels=[48, 96, 192, 384, 384], + num_heads=[6, 6, 12, 24, 24], + window_sizes=[5, 7, 7, 7, 7], + quant_size=4, + drop_path_rate=0.3, + up_k=3, + num_layers=5, + stem_transformer=True, + down_stride=3, + upsample="linear_attn", + knn_down=True, + cRSE="XYZ_RGB_NORM", + fp16_mode=1, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.05) +scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1) +param_dicts = [dict(keyword="blocks", lr=0.0001)] + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.8, 1.2]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_displacement=True, + ), + dict(type="SphereCrop", point_max=80000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_displacement=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + return_displacement=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ), +) diff --git a/Pointcept/configs/s3dis/semseg-swin3d-v1m1-1-large.py b/Pointcept/configs/s3dis/semseg-swin3d-v1m1-1-large.py new file mode 100644 index 0000000000000000000000000000000000000000..02c43d2debf08f26305d5d5636b857b5f34f17c4 --- /dev/null +++ b/Pointcept/configs/s3dis/semseg-swin3d-v1m1-1-large.py @@ -0,0 +1,191 @@ +_base_ = ["../_base_/default_runtime.py"] +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="Swin3D-v1m1", + in_channels=9, + num_classes=13, + base_grid_size=0.02, + depths=[2, 4, 9, 4, 4], + channels=[80, 160, 320, 640, 640], + num_heads=[10, 10, 20, 40, 40], + window_sizes=[5, 7, 7, 7, 7], + quant_size=4, + drop_path_rate=0.3, + up_k=3, + num_layers=5, + stem_transformer=True, + down_stride=3, + upsample="linear_attn", + knn_down=True, + cRSE="XYZ_RGB_NORM", + fp16_mode=1, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 3000 +optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.001, 0.0001], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="blocks", lr=0.0001)] + +# dataset settings +dataset_type = "S3DISDataset" +data_root = "data/s3dis" + +data = dict( + num_classes=13, + ignore_index=-1, + names=[ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ], + train=dict( + type=dataset_type, + split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.8, 1.2]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_displacement=True, + ), + dict(type="SphereCrop", point_max=80000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_displacement=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="Area_5", + data_root=data_root, + transform=[dict(type="CenterShift", apply_z=True), dict(type="NormalizeColor")], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.04, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + return_displacement=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [dict(type="RandomScale", scale=[0.9, 0.9])], + [dict(type="RandomScale", scale=[0.95, 0.95])], + [dict(type="RandomScale", scale=[1, 1])], + [dict(type="RandomScale", scale=[1.05, 1.05])], + [dict(type="RandomScale", scale=[1.1, 1.1])], + [ + dict(type="RandomScale", scale=[0.9, 0.9]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[0.95, 0.95]), + dict(type="RandomFlip", p=1), + ], + [dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)], + [ + dict(type="RandomScale", scale=[1.05, 1.05]), + dict(type="RandomFlip", p=1), + ], + [ + dict(type="RandomScale", scale=[1.1, 1.1]), + dict(type="RandomFlip", p=1), + ], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/insseg-pointgroup-v1m1-0-spunet-base.py b/Pointcept/configs/scannet/insseg-pointgroup-v1m1-0-spunet-base.py new file mode 100644 index 0000000000000000000000000000000000000000..3dec6d47c51545b55b543f80ac0c13556bacc84f --- /dev/null +++ b/Pointcept/configs/scannet/insseg-pointgroup-v1m1-0-spunet-base.py @@ -0,0 +1,187 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 12 +mix_prob = 0 +empty_cache = False +enable_amp = True +evaluate = True + +class_names = [ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", +] +num_classes = 20 +segment_ignore_index = (-1, 0, 1) + +# model settings +model = dict( + type="PG-v1m1", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=0, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + backbone_out_channels=96, + semantic_num_classes=num_classes, + semantic_ignore_index=-1, + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + cluster_thresh=1.5, + cluster_closed_points=300, + cluster_propose_points=100, + cluster_min_points=50, +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict(type="PolyLR") + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=num_classes, + ignore_index=-1, + names=class_names, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.1), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "instance_centroid", + "bbox", + ), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={ + "coord": "origin_coord", + "segment": "origin_segment", + "instance": "origin_instance", + }, + ), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + # dict(type="SphereCrop", point_max=1000000, mode='center'), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "origin_coord", + "origin_segment", + "origin_instance", + "instance_centroid", + "bbox", + ), + feat_keys=("color", "normal"), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + ), + ], + test_mode=False, + ), + test=dict(), # currently not available +) + +hooks = [ + dict(type="CheckpointLoader", keywords="module.", replacement="module."), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict( + type="InsSegEvaluator", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="CheckpointSaver", save_freq=None), +] diff --git a/Pointcept/configs/scannet/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py b/Pointcept/configs/scannet/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py new file mode 100644 index 0000000000000000000000000000000000000000..09789883e5ae3c79f1cd3f3282380fdb30c21782 --- /dev/null +++ b/Pointcept/configs/scannet/insseg-ppt-v1m1-0-pointgroup-spunet-ft.py @@ -0,0 +1,279 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0 +empty_cache = False +enable_amp = True +evaluate = True +find_unused_parameters = True + +class_names = [ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", +] +num_classes = 20 +segment_ignore_index = (-1, 0, 1) + +# model settings +model = dict( + type="PG-v1m1", + backbone=dict( + type="PPT-v1m1", + backbone=dict( + type="SpUNet-v1m3", + in_channels=6, + num_classes=0, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("ScanNet", "S3DIS", "Structured3D"), + zero_init=False, + norm_decouple=True, + norm_adaptive=True, + norm_affine=True, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], + backbone_out_channels=96, + context_channels=256, + conditions=("Structured3D", "ScanNet", "S3DIS"), + template="[x]", + clip_model="ViT-B/16", + class_name=( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "bookcase", + "picture", + "counter", + "desk", + "shelves", + "curtain", + "dresser", + "pillow", + "mirror", + "ceiling", + "refrigerator", + "television", + "shower curtain", + "nightstand", + "toilet", + "sink", + "lamp", + "bathtub", + "garbagebin", + "board", + "beam", + "column", + "clutter", + "otherstructure", + "otherfurniture", + "otherprop", + ), + valid_index=( + ( + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 11, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 23, + 25, + 26, + 33, + 34, + 35, + ), + (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34), + (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32), + ), + backbone_mode=True, + ), + backbone_out_channels=96, + semantic_num_classes=num_classes, + semantic_ignore_index=-1, + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + cluster_thresh=1.5, + cluster_closed_points=300, + cluster_propose_points=100, + cluster_min_points=50, +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict(type="PolyLR") + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=num_classes, + ignore_index=-1, + names=class_names, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.1), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "instance_centroid", + "bbox", + "condition", + ), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={ + "coord": "origin_coord", + "segment": "origin_segment", + "instance": "origin_instance", + }, + ), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + # dict(type="SphereCrop", point_max=1000000, mode='center'), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "origin_coord", + "origin_segment", + "origin_instance", + "instance_centroid", + "bbox", + "condition", + ), + feat_keys=("color", "normal"), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + ), + ], + test_mode=False, + ), + test=dict(), # currently not available +) + +hooks = [ + dict(type="CheckpointLoader", keywords="module.", replacement="module.backbone."), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict( + type="InsSegEvaluator", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="CheckpointSaver", save_freq=None), +] diff --git a/Pointcept/configs/scannet/objdet-cagroup3d-v1m1-0-base.py b/Pointcept/configs/scannet/objdet-cagroup3d-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..8e31e32bf369b651f361135614953ae4fcaf1047 --- /dev/null +++ b/Pointcept/configs/scannet/objdet-cagroup3d-v1m1-0-base.py @@ -0,0 +1,183 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 16 # bs: total bs in all gpus +num_worker = 32 +mix_prob = 0 +empty_cache = False +enable_amp = False +evaluate = True + +class_names = [ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", +] +num_classes = 20 +segment_ignore_index = (-1, 0, 1) + +# model settings +model = dict( + type="PG-v1m1", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=0, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + backbone_out_channels=96, + semantic_num_classes=num_classes, + semantic_ignore_index=-1, + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + cluster_thresh=1.5, + cluster_closed_points=300, + cluster_propose_points=100, + cluster_min_points=50, +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict(type="PolyLR") + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=num_classes, + ignore_index=-1, + names=class_names, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + # dict(type="CenterShift", apply_z=True), + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5), + # # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + # dict(type="RandomRotate", angle=[-1, 1], axis='z', center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis='x', p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis='y', p=0.5), + # dict(type="RandomScale", scale=[0.9, 1.1]), + # # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + # dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + # dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + # dict(type="ChromaticTranslation", p=0.95, ratio=0.1), + # dict(type="ChromaticJitter", p=0.95, std=0.05), + # # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + # dict(type="GridSample", + # grid_size=0.02, + # hash_type='fnv', + # mode='train', + # return_grid_coord=True, + # keys=("coord", "color", "normal", "segment", "instance")), + # dict(type="SphereCrop", sample_rate=0.8, mode='random'), + # dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "instance_centroid", + "bbox", + ), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={ + "coord": "origin_coord", + "segment": "origin_segment", + "instance": "origin_instance", + }, + ), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + # dict(type="SphereCrop", point_max=1000000, mode='center'), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "origin_coord", + "origin_segment", + "origin_instance", + "instance_centroid", + "bbox", + ), + feat_keys=("color", "normal"), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + ), + ], + test_mode=False, + ), + test=dict(), # currently not available +) + +hooks = [ + dict(type="CheckpointLoader", keywords="module.", replacement="module."), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict( + type="InsSegEvaluator", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="CheckpointSaver", save_freq=None), +] diff --git a/Pointcept/configs/scannet/pretrain-msc-v1m1-0-spunet-base.py b/Pointcept/configs/scannet/pretrain-msc-v1m1-0-spunet-base.py new file mode 100644 index 0000000000000000000000000000000000000000..3f56a96af8c57a8ebe05380c53c03245420d0d93 --- /dev/null +++ b/Pointcept/configs/scannet/pretrain-msc-v1m1-0-spunet-base.py @@ -0,0 +1,155 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 32 # bs: total bs in all gpus +num_worker = 32 +mix_prob = 0 +empty_cache = False +enable_amp = False +evaluate = False +find_unused_parameters = False + +# model settings +model = dict( + type="MSC-v1m1", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=0, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + backbone_in_channels=6, + backbone_out_channels=96, + mask_grid_size=0.1, + mask_rate=0.4, + view1_mix_prob=0.8, + view2_mix_prob=0, + matching_max_k=8, + matching_max_radius=0.03, + matching_max_pair=8192, + nce_t=0.4, + contrast_weight=1, + reconstruct_weight=1, + reconstruct_color=True, + reconstruct_normal=False, +) + +# scheduler settings +epoch = 600 +optimizer = dict(type="SGD", lr=0.1, momentum=0.8, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.01, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split=["train", "val", "test"], + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="RandomScale", scale=[0.9, 1.1]), + dict(type="Copy", keys_dict={"coord": "origin_coord"}), + dict( + type="ContrastiveViewsGenerator", + view_keys=("coord", "color", "normal", "origin_coord"), + view_trans_cfg=[ + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="RandomColorJitter", + brightness=0.4, + contrast=0.4, + saturation=0.2, + hue=0.02, + p=0.8, + ), + dict(type="ChromaticJitter", p=0.95, std=0.05), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + keys=("origin_coord", "coord", "color", "normal"), + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.6, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + ], + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "view1_origin_coord", + "view1_grid_coord", + "view1_coord", + "view1_color", + "view1_normal", + "view2_origin_coord", + "view2_grid_coord", + "view2_coord", + "view2_color", + "view2_normal", + ), + offset_keys_dict=dict( + view1_offset="view1_coord", view2_offset="view2_coord" + ), + view1_feat_keys=("view1_color", "view1_normal"), + view2_feat_keys=("view2_color", "view2_normal"), + ), + ], + test_mode=False, + ), +) + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="CheckpointSaver", save_freq=None), +] diff --git a/Pointcept/configs/scannet/pretrain-msc-v1m1-1-spunet-pointcontrast.py b/Pointcept/configs/scannet/pretrain-msc-v1m1-1-spunet-pointcontrast.py new file mode 100644 index 0000000000000000000000000000000000000000..9ff9061f2fb93a2e72343e8f066893fbae4a897a --- /dev/null +++ b/Pointcept/configs/scannet/pretrain-msc-v1m1-1-spunet-pointcontrast.py @@ -0,0 +1,162 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 32 # bs: total bs in all gpus +num_worker = 32 +mix_prob = 0 +empty_cache = False +enable_amp = False +evaluate = False +find_unused_parameters = False + +# model settings +model = dict( + type="MSC-v1m1", + backbone=dict( + type="SpUNet-v1m1", + in_channels=3, + num_classes=0, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + backbone_in_channels=3, + backbone_out_channels=96, + mask_grid_size=0.1, + mask_rate=0, + view1_mix_prob=0, + view2_mix_prob=0, + matching_max_k=8, + matching_max_radius=0.03, + matching_max_pair=4096, + nce_t=0.07, + contrast_weight=1, + reconstruct_weight=1, + reconstruct_color=False, + reconstruct_normal=False, +) + +# scheduler settings +epoch = 10 +eval_epoch = 10 +optimizer = dict(type="SGD", lr=0.1, momentum=0.8, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.01, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetPairDataset" +data_root = "data/scannet_pair" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + data_root=data_root, + view1_transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"coord": "origin_coord"}), + # dict(type="RandomScale", scale=[0.9, 1.1]), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="RandomColorJitter", + brightness=0.4, + contrast=0.4, + saturation=0.2, + hue=0.02, + p=0.8, + ), + dict(type="ChromaticJitter", p=0.95, std=0.05), + dict( + type="GridSample", + grid_size=0.025, + hash_type="fnv", + mode="train", + keys=("origin_coord", "coord", "color"), + return_grid_coord=True, + ), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("origin_coord", "grid_coord", "coord", "color"), + offset_keys_dict=dict(offset="coord"), + feat_keys=["color"], + ), + ], + view2_transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"coord": "origin_coord"}), + # dict(type="RandomScale", scale=[0.9, 1.1]), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="RandomColorJitter", + brightness=0.4, + contrast=0.4, + saturation=0.2, + hue=0.02, + p=0.8, + ), + dict(type="ChromaticJitter", p=0.95, std=0.05), + dict( + type="GridSample", + grid_size=0.025, + hash_type="fnv", + mode="train", + keys=("origin_coord", "coord", "color"), + return_grid_coord=True, + ), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("origin_coord", "grid_coord", "coord", "color"), + offset_keys_dict=dict(offset="coord"), + feat_keys=["color"], + ), + ], + test_mode=False, + ), +) + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="CheckpointSaver", save_freq=None), +] diff --git a/Pointcept/configs/scannet/pretrain-msc-v1m2-0-spunet-csc.py b/Pointcept/configs/scannet/pretrain-msc-v1m2-0-spunet-csc.py new file mode 100644 index 0000000000000000000000000000000000000000..def70881496c3c04d1bf6a32df260fe9cbdac612 --- /dev/null +++ b/Pointcept/configs/scannet/pretrain-msc-v1m2-0-spunet-csc.py @@ -0,0 +1,165 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 32 # bs: total bs in all gpus +num_worker = 32 +mix_prob = 0 +empty_cache = False +enable_amp = False +evaluate = False +find_unused_parameters = False + +# model settings +model = dict( + type="MSC-v1m2", + backbone=dict( + type="SpUNet-v1m1", + in_channels=3, + num_classes=0, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + backbone_in_channels=3, + backbone_out_channels=96, + mask_grid_size=0.1, + mask_rate=0, + view1_mix_prob=0, + view2_mix_prob=0, + matching_max_k=8, + matching_max_radius=0.03, + matching_max_pair=8192, + nce_t=0.4, + contrast_weight=1, + reconstruct_weight=1, + reconstruct_color=False, + reconstruct_normal=False, + partitions=4, + r1=2, + r2=20, +) + +# scheduler settings +epoch = 10 +eval_epoch = 10 +optimizer = dict(type="SGD", lr=0.1, momentum=0.8, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.01, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetPairDataset" +data_root = "data/scannet_pair" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + data_root=data_root, + view1_transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"coord": "origin_coord"}), + # dict(type="RandomScale", scale=[0.9, 1.1]), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="RandomColorJitter", + brightness=0.4, + contrast=0.4, + saturation=0.2, + hue=0.02, + p=0.8, + ), + dict(type="ChromaticJitter", p=0.95, std=0.05), + dict( + type="GridSample", + grid_size=0.025, + hash_type="fnv", + mode="train", + keys=("origin_coord", "coord", "color"), + return_grid_coord=True, + ), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("origin_coord", "grid_coord", "coord", "color"), + offset_keys_dict=dict(offset="coord"), + feat_keys=["color"], + ), + ], + view2_transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="Copy", keys_dict={"coord": "origin_coord"}), + # dict(type="RandomScale", scale=[0.9, 1.1]), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=1), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=1), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=1), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="RandomColorJitter", + brightness=0.4, + contrast=0.4, + saturation=0.2, + hue=0.02, + p=0.8, + ), + dict(type="ChromaticJitter", p=0.95, std=0.05), + dict( + type="GridSample", + grid_size=0.025, + hash_type="fnv", + mode="train", + keys=("origin_coord", "coord", "color"), + return_grid_coord=True, + ), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("origin_coord", "grid_coord", "coord", "color"), + offset_keys_dict=dict(offset="coord"), + feat_keys=["color"], + ), + ], + test_mode=False, + ), +) + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="CheckpointSaver", save_freq=None), +] diff --git a/Pointcept/configs/scannet/semseg-cac-v1m1-0-spunet-base.py b/Pointcept/configs/scannet/semseg-cac-v1m1-0-spunet-base.py new file mode 100644 index 0000000000000000000000000000000000000000..3968225e55db565a1e675605aa91d8a7a0353010 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-cac-v1m1-0-spunet-base.py @@ -0,0 +1,292 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="CAC-v1m1", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=0, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + num_classes=20, + backbone_out_channels=96, + cos_temp=15, + main_weight=1, + pre_weight=1, + pre_self_weight=1, + kl_weight=1, + conf_thresh=0.75, + detach_pre_logits=True, +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-cac-v1m1-1-spunet-lovasz.py b/Pointcept/configs/scannet/semseg-cac-v1m1-1-spunet-lovasz.py new file mode 100644 index 0000000000000000000000000000000000000000..3968225e55db565a1e675605aa91d8a7a0353010 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-cac-v1m1-1-spunet-lovasz.py @@ -0,0 +1,292 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="CAC-v1m1", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=0, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + num_classes=20, + backbone_out_channels=96, + cos_temp=15, + main_weight=1, + pre_weight=1, + pre_self_weight=1, + kl_weight=1, + conf_thresh=0.75, + detach_pre_logits=True, +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-cac-v1m1-2-ptv2-lovasz.py b/Pointcept/configs/scannet/semseg-cac-v1m1-2-ptv2-lovasz.py new file mode 100644 index 0000000000000000000000000000000000000000..f36a0c3a9426a13217cbefcd21b20f798b02b6a7 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-cac-v1m1-2-ptv2-lovasz.py @@ -0,0 +1,309 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="CAC-v1m1", + backbone=dict( + type="PT-v2m2", + in_channels=9, + num_classes=0, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + num_classes=20, + backbone_out_channels=48, + cos_temp=15, + main_weight=1, + pre_weight=1, + pre_self_weight=1, + kl_weight=1, + conf_thresh=0.75, + detach_pre_logits=True, +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-minkunet34c-0-base.py b/Pointcept/configs/scannet/semseg-minkunet34c-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..795998f368308c830fcdac760df67252b87ebab5 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-minkunet34c-0-base.py @@ -0,0 +1,193 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict(type="MinkUNet34C", in_channels=9, out_channels=20), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 600 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-oacnns-v1m1-0-base.py b/Pointcept/configs/scannet/semseg-oacnns-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..57ef37e6aa35f153288571bceb76448463772365 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-oacnns-v1m1-0-base.py @@ -0,0 +1,290 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True +sync_bn = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="OACNNs", + in_channels=9, + num_classes=20, + embed_channels=64, + enc_channels=[64, 64, 128, 256], + groups=[4, 4, 8, 16], + enc_depth=[3, 3, 9, 8], + dec_channels=[256, 256, 256, 256], + point_grid_size=[[8, 12, 16, 16], [6, 9, 12, 12], [4, 6, 8, 8], [3, 4, 6, 6]], + dec_depth=[2, 2, 2, 2], + enc_num_ref=[16, 16, 16, 16], + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + + +epoch = 900 +optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_min_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "normal", "color"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_min_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode='center'), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "normal", "color"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "normal", "color"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "normal", "color"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-octformer-v1m1-0-base.py b/Pointcept/configs/scannet/semseg-octformer-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..cc0bafadc332cfa374e522699d314f721a9b57e8 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-octformer-v1m1-0-base.py @@ -0,0 +1,296 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = False + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="OctFormer-v1m1", + in_channels=10, + num_classes=20, + fpn_channels=168, + channels=(96, 192, 384, 384), + num_blocks=(2, 2, 18, 2), + num_heads=(6, 12, 24, 24), + patch_size=26, + stem_down=2, + head_up=2, + dilation=4, + drop_path=0.5, + nempty=True, + octree_depth=11, + octree_full_depth=2, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 600 +optimizer = dict(type="AdamW", lr=0.0015, weight_decay=0.05) +scheduler = dict( + type="MultiStepWithWarmupLR", + milestones=[0.6, 0.9], + gamma=0.1, + warmup_rate=0.05, + warmup_scale=1e-5, +) +param_dicts = [dict(keyword="blocks", lr=0.00015)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.1), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + return_min_coord=True, + return_displacement=True, + project_displacement=True, + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="SphereCrop", point_max=120000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "normal", "segment"), + feat_keys=("coord", "color", "normal", "displacement"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + return_min_coord=True, + return_displacement=True, + project_displacement=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "normal", "segment"), + feat_keys=("coord", "color", "normal", "displacement"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_displacement=True, + project_displacement=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "normal", "index"), + feat_keys=("coord", "color", "normal", "displacement"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-ppt-v1m1-0-sc-st-spunet.py b/Pointcept/configs/scannet/semseg-ppt-v1m1-0-sc-st-spunet.py new file mode 100644 index 0000000000000000000000000000000000000000..7fe0c7512b04aa874e32be04dc77ab35e706b67a --- /dev/null +++ b/Pointcept/configs/scannet/semseg-ppt-v1m1-0-sc-st-spunet.py @@ -0,0 +1,391 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 24 # bs: total bs in all gpus +num_worker = 48 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +find_unused_parameters = True + +# trainer +train = dict( + type="MultiDatasetTrainer", +) + +# model settings +model = dict( + type="PPT-v1m1", + backbone=dict( + type="SpUNet-v1m3", + in_channels=6, + num_classes=0, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("ScanNet", "S3DIS", "Structured3D"), + zero_init=False, + norm_decouple=True, + norm_adaptive=True, + norm_affine=True, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], + backbone_out_channels=96, + context_channels=256, + conditions=("Structured3D", "ScanNet", "S3DIS"), + template="[x]", + clip_model="ViT-B/16", + # fmt: off + class_name=( + "wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door", + "window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain", + "dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand", + "toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column", + "clutter", "otherstructure", "otherfurniture", "otherprop", + ), + valid_index=( + (0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35), + (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34), + (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32), + ), + # fmt: on + backbone_mode=False, +) + +# scheduler settings +epoch = 100 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) +# param_dicts = [dict(keyword="modulation", lr=0.005)] + +# dataset settings +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type="ConcatDataset", + datasets=[ + # Structured3D + dict( + type="Structured3DDataset", + split="train", + data_root="data/structured3d", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "Structured3D"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=2, # sampling weight + ), + # ScanNet + dict( + type="ScanNetDataset", + split="train", + data_root="data/scannet", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=1, # sampling weight + ), + ], + ), + val=dict( + type="ScanNetDataset", + split="val", + data_root="data/scannet", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type="ScanNetDataset", + split="val", + data_root="data/scannet", + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-ppt-v1m1-1-sc-st-spunet-submit.py b/Pointcept/configs/scannet/semseg-ppt-v1m1-1-sc-st-spunet-submit.py new file mode 100644 index 0000000000000000000000000000000000000000..d503080e21ff7d921dd29ca5dd77e95ccfd51c72 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-ppt-v1m1-1-sc-st-spunet-submit.py @@ -0,0 +1,366 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 24 # bs: total bs in all gpus +num_worker = 48 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +find_unused_parameters = True +evaluate = False + +# trainer +train = dict( + type="MultiDatasetTrainer", +) + +# model settings +model = dict( + type="PPT-v1m1", + backbone=dict( + type="SpUNet-v1m3", + in_channels=6, + num_classes=0, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("ScanNet", "S3DIS", "Structured3D"), + zero_init=False, + norm_decouple=True, + norm_adaptive=True, + norm_affine=True, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], + backbone_out_channels=96, + context_channels=256, + conditions=("Structured3D", "ScanNet", "S3DIS"), + template="[x]", + clip_model="ViT-B/16", + # fmt: off + class_name=( + "wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door", + "window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain", + "dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand", + "toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column", + "clutter", "otherstructure", "otherfurniture", "otherprop", + ), + valid_index=( + (0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35), + (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34), + (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32), + ), + # fmt: on + backbone_mode=False, +) + +# scheduler settings +epoch = 100 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) +# param_dicts = [dict(keyword="modulation", lr=0.005)] + +# dataset settings +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type="ConcatDataset", + datasets=[ + # Structured3D + dict( + type="Structured3DDataset", + split=["train", "val"], + data_root="data/structured3d", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "Structured3D"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=2, # sampling weight + ), + # ScanNet + dict( + type="ScanNetDataset", + split=["train", "val"], + data_root="data/scannet", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=1, # sampling weight + ), + ], + ), + test=dict( + type="ScanNetDataset", + split="test", + data_root="data/scannet", + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-pt-v1-0-base.py b/Pointcept/configs/scannet/semseg-pt-v1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..f7b56590ad284eaf9cc2c3b616316627120bdb7b --- /dev/null +++ b/Pointcept/configs/scannet/semseg-pt-v1-0-base.py @@ -0,0 +1,277 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PointTransformer-Seg50", + in_channels=9, + num_classes=20, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-pt-v2m1-0-origin.py b/Pointcept/configs/scannet/semseg-pt-v2m1-0-origin.py new file mode 100644 index 0000000000000000000000000000000000000000..fd1f61afea1657cbd3d8db536bdbf78865fce368 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-pt-v2m1-0-origin.py @@ -0,0 +1,297 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = False + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m1", + in_channels=9, + num_classes=20, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=True, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-pt-v2m2-0-base.py b/Pointcept/configs/scannet/semseg-pt-v2m2-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..3ec72b0177556edfb6bc2f93d286cf04b0a2b31e --- /dev/null +++ b/Pointcept/configs/scannet/semseg-pt-v2m2-0-base.py @@ -0,0 +1,297 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=9, + num_classes=20, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-pt-v2m2-1-submit.py b/Pointcept/configs/scannet/semseg-pt-v2m2-1-submit.py new file mode 100644 index 0000000000000000000000000000000000000000..daf9c9d6218d5f54f4a528d70c5a05c031c41910 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-pt-v2m2-1-submit.py @@ -0,0 +1,273 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True +evaluate = False + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=9, + num_classes=20, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split=["train", "val"], + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="test", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-pt-v2m2-2-precise-evaluate.py b/Pointcept/configs/scannet/semseg-pt-v2m2-2-precise-evaluate.py new file mode 100644 index 0000000000000000000000000000000000000000..c01cf0e9f21ed7aefa05a39c75a5b261e800faf4 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-pt-v2m2-2-precise-evaluate.py @@ -0,0 +1,307 @@ +""" +An example for enabling precise evaluation validation dataset during training. +PLease compare with semseg-pt-v2m2-0-base.py to lean the mechanism. +""" + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=9, + num_classes=20, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={"coord": "origin_coord", "segment": "origin_segment"}, + ), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "origin_coord", "segment", "origin_segment"), + feat_keys=("coord", "color", "normal"), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-pt-v2m2-3-lovasz.py b/Pointcept/configs/scannet/semseg-pt-v2m2-3-lovasz.py new file mode 100644 index 0000000000000000000000000000000000000000..ed46ff221baac62afcc03628f2d7aac399fa6e24 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-pt-v2m2-3-lovasz.py @@ -0,0 +1,300 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=9, + num_classes=20, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-pt-v3m1-0-base.py b/Pointcept/configs/scannet/semseg-pt-v3m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..47cc7e010a9ab3d9e7b4307d1a0d24da4d13f0f3 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-pt-v3m1-0-base.py @@ -0,0 +1,312 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=20, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-pt-v3m1-1-ppt-extreme.py b/Pointcept/configs/scannet/semseg-pt-v3m1-1-ppt-extreme.py new file mode 100644 index 0000000000000000000000000000000000000000..4e4804eaa9510bde3235c72cb2a34d2a1907ac14 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-pt-v3m1-1-ppt-extreme.py @@ -0,0 +1,483 @@ +""" +PTv3 + PPT +Pre-trained on ScanNet + Structured3D +(S3DIS is commented by default as a long data time issue of S3DIS: https://github.com/Pointcept/Pointcept/issues/103) +In the original PPT paper, 3 datasets are jointly trained and validated on the three datasets jointly with +one shared weight model. In PTv3, we trained on multi-dataset but only validated on one single dataset to +achieve extreme performance on one single dataset. + +To enable joint training on three datasets, uncomment config for the S3DIS dataset and change the "loop" of + Structured3D and ScanNet to 4 and 2 respectively. +""" + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 24 # bs: total bs in all gpus +num_worker = 48 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +find_unused_parameters = True +clip_grad = 3.0 + +# trainer +train = dict( + type="MultiDatasetTrainer", +) + +# model settings +model = dict( + type="PPT-v1m1", + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(3, 3, 3, 6, 3), + enc_channels=(48, 96, 192, 384, 512), + enc_num_head=(3, 6, 12, 24, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(3, 3, 3, 3), + dec_channels=(64, 96, 192, 384), + dec_num_head=(4, 6, 12, 24), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=True, + pdnorm_ln=True, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + backbone_out_channels=64, + context_channels=256, + conditions=("Structured3D", "ScanNet", "S3DIS"), + template="[x]", + clip_model="ViT-B/16", + # fmt: off + class_name=( + "wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door", + "window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain", + "dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand", + "toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column", + "clutter", "otherstructure", "otherfurniture", "otherprop", + ), + valid_index=( + (0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35), + (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34), + (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32), + ), + # fmt: on + backbone_mode=False, +) + +# scheduler settings +epoch = 100 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.005, 0.0005], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0005)] + +# dataset settings +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type="ConcatDataset", + datasets=[ + # Structured3D + dict( + type="Structured3DDataset", + split=["train", "val", "test"], + data_root="data/structured3d", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "Structured3D"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=2, # sampling weight + ), + # ScanNet + dict( + type="ScanNetDataset", + split="train", + data_root="data/scannet", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=1, # sampling weight + ), + # S3DIS + # dict( + # type="S3DISDataset", + # split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + # data_root="data/s3dis", + # transform=[ + # dict(type="CenterShift", apply_z=True), + # dict( + # type="RandomDropout", + # dropout_ratio=0.2, + # dropout_application_ratio=0.2, + # ), + # # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + # dict( + # type="RandomRotate", + # angle=[-1, 1], + # axis="z", + # center=[0, 0, 0], + # p=0.5, + # ), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + # dict(type="RandomScale", scale=[0.9, 1.1]), + # # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + # dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict( + # type="ElasticDistortion", + # distortion_params=[[0.2, 0.4], [0.8, 1.6]], + # ), + # dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + # dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + # dict(type="ChromaticJitter", p=0.95, std=0.05), + # # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + # dict( + # type="GridSample", + # grid_size=0.02, + # hash_type="fnv", + # mode="train", + # return_grid_coord=True, + # ), + # dict(type="SphereCrop", sample_rate=0.6, mode="random"), + # dict(type="SphereCrop", point_max=204800, mode="random"), + # dict(type="CenterShift", apply_z=False), + # dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + # dict(type="Add", keys_dict={"condition": "S3DIS"}), + # dict(type="ToTensor"), + # dict( + # type="Collect", + # keys=("coord", "grid_coord", "segment", "condition"), + # feat_keys=("color", "normal"), + # ), + # ], + # test_mode=False, + # loop=1, # sampling weight + # ), + ], + ), + val=dict( + type="ScanNetDataset", + split="val", + data_root="data/scannet", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type="ScanNetDataset", + split="val", + data_root="data/scannet", + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..0e2e41aa5d949cad7428f722f017db73a565571a --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-0-base.py @@ -0,0 +1,281 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=20, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-1-interp-eval.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-1-interp-eval.py new file mode 100644 index 0000000000000000000000000000000000000000..9adfb382ad61433c7c73c3e502adbeaf411ae946 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-1-interp-eval.py @@ -0,0 +1,285 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=20, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={"coord": "origin_coord", "segment": "origin_segment"}, + ), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-1-precise-eval.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-1-precise-eval.py new file mode 100644 index 0000000000000000000000000000000000000000..7afb3aef572bc5e22b13f95fe70dbe43bdbb8d5f --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-1-precise-eval.py @@ -0,0 +1,289 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + dict(type="CheckpointSaver", save_freq=None), + dict(type="PreciseEvaluator", test_last=False), +] + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=20, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la100.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la100.py new file mode 100644 index 0000000000000000000000000000000000000000..d8774321618c7d84e8a26911cfa418e777798c9c --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la100.py @@ -0,0 +1,282 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=20, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + la_file="data/scannet/tasks/points/points100", + ignore_index=-1, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la20.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la20.py new file mode 100644 index 0000000000000000000000000000000000000000..1171c51184cab3372798d6bf94036719571880f0 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la20.py @@ -0,0 +1,282 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=20, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + la_file="data/scannet/tasks/points/points20", + ignore_index=-1, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la200.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la200.py new file mode 100644 index 0000000000000000000000000000000000000000..158b0873af0db67a2877d966fb9dafdf417872ad --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la200.py @@ -0,0 +1,282 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=20, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + la_file="data/scannet/tasks/points/points200", + ignore_index=-1, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la50.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la50.py new file mode 100644 index 0000000000000000000000000000000000000000..6eb906429e1209881b75f223ff85ff67d4d55594 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-la50.py @@ -0,0 +1,282 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=20, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + la_file="data/scannet/tasks/points/points50", + ignore_index=-1, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr1.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr1.py new file mode 100644 index 0000000000000000000000000000000000000000..6f5b2267f31b2e68306e414d46486a27081a7f1a --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr1.py @@ -0,0 +1,281 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=20, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + lr_file="data/scannet/tasks/scenes/1.txt", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr10.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr10.py new file mode 100644 index 0000000000000000000000000000000000000000..cff1df75e21a22e49486beb712e1443959f84447 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr10.py @@ -0,0 +1,281 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=20, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + lr_file="data/scannet/tasks/scenes/10.txt", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr20.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr20.py new file mode 100644 index 0000000000000000000000000000000000000000..0d1891fe6e0a984089d5b7ff465bd16e0a6c3ae8 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr20.py @@ -0,0 +1,281 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=20, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + lr_file="data/scannet/tasks/scenes/20.txt", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr5.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr5.py new file mode 100644 index 0000000000000000000000000000000000000000..e5a420a349a1ef0e12a58b3b4ea969092081a274 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-2-efficient-lr5.py @@ -0,0 +1,281 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=20, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + lr_file="data/scannet/tasks/scenes/5.txt", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-3-enable-profiler.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-3-enable-profiler.py new file mode 100644 index 0000000000000000000000000000000000000000..32acb2be05092162561c66822e8cfe0758871147 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-3-enable-profiler.py @@ -0,0 +1,296 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = False + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=20, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +hooks = [ + dict(type="CheckpointLoader"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + dict(type="CheckpointSaver", save_freq=None), + dict( + type="RuntimeProfiler", + forward=True, + backward=True, + interrupt=True, + warm_up=2, + row_limit=30, + ), +] diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-4-ft.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-4-ft.py new file mode 100644 index 0000000000000000000000000000000000000000..f90564e23b3422550c7fe209b977aa428c779b0e --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-4-ft.py @@ -0,0 +1,280 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 48 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=20, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m1-5-lovasz.py b/Pointcept/configs/scannet/semseg-spunet-v1m1-5-lovasz.py new file mode 100644 index 0000000000000000000000000000000000000000..fb976abbd6570196e589ece51566a12e679c0364 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m1-5-lovasz.py @@ -0,0 +1,283 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=20, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-spunet-v1m3-0-pdnorm-base.py b/Pointcept/configs/scannet/semseg-spunet-v1m3-0-pdnorm-base.py new file mode 100644 index 0000000000000000000000000000000000000000..c6aed1fb9f5f4d7d7dd3b8608ac8be972931fbc3 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-spunet-v1m3-0-pdnorm-base.py @@ -0,0 +1,291 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m3", + in_channels=6, + num_classes=20, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("ScanNet", "S3DIS", "Structured3D"), + zero_init=False, + norm_decouple=True, + norm_adaptive=False, + norm_affine=True, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="Add", keys_dict=dict(condition="ScanNet")), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="Add", keys_dict=dict(condition="ScanNet")), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict=dict(condition="ScanNet")), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-st-v1m1-0-origin.py b/Pointcept/configs/scannet/semseg-st-v1m1-0-origin.py new file mode 100644 index 0000000000000000000000000000000000000000..4c05848c5212616f68db4ec70f7dfdebd1ef7d35 --- /dev/null +++ b/Pointcept/configs/scannet/semseg-st-v1m1-0-origin.py @@ -0,0 +1,286 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 8 # bs: total bs in all gpus +mix_prob = 0 +empty_cache = False +enable_amp = True +find_unused_parameters = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="ST-v1m1", + downsample_scale=4, + depths=[3, 3, 9, 3, 3], + channels=[48, 96, 192, 384, 384], + num_heads=[3, 6, 12, 24, 24], + window_size=[0.1, 0.2, 0.4, 0.8, 1.6], + up_k=3, + grid_sizes=[0.02, 0.04, 0.08, 0.16, 0.32], + quant_sizes=[0.005, 0.01, 0.02, 0.04, 0.08], + rel_query=True, + rel_key=True, + rel_value=True, + drop_path_rate=0.3, + num_layers=5, + concat_xyz=True, + num_classes=20, + ratio=0.25, + k=16, + prev_grid_size=0.02, + sigma=1.0, + stem_transformer=False, + kp_ball_radius=0.02 * 2.5, + kp_max_neighbor=34, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 600 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", keys=("coord", "segment"), feat_keys=("coord", "color") + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", keys=("coord", "segment"), feat_keys=("coord", "color") + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-st-v1m2-0-refined.py b/Pointcept/configs/scannet/semseg-st-v1m2-0-refined.py new file mode 100644 index 0000000000000000000000000000000000000000..7b9963eedc36be41897e041bc67c245cab0c1e5b --- /dev/null +++ b/Pointcept/configs/scannet/semseg-st-v1m2-0-refined.py @@ -0,0 +1,287 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 8 # bs: total bs in all gpus +mix_prob = 0 +empty_cache = False +enable_amp = True +find_unused_parameters = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="ST-v1m2", + in_channels=9, + num_classes=20, + channels=(48, 96, 192, 384, 384), + num_heads=(6, 12, 24, 24), + depths=(3, 9, 3, 3), + window_size=(0.2, 0.4, 0.8, 1.6), + quant_size=(0.01, 0.02, 0.04, 0.08), + mlp_expend_ratio=4.0, + down_ratio=0.25, + down_num_sample=16, + kp_ball_radius=2.5 * 0.02, + kp_max_neighbor=34, + kp_grid_size=0.02, + kp_sigma=1.0, + drop_path_rate=0.2, + rel_query=True, + rel_key=True, + rel_value=True, + qkv_bias=True, + stem=True, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) +# scheduler settings +epoch = 600 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1) + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-swin3d-v1m1-0-small.py b/Pointcept/configs/scannet/semseg-swin3d-v1m1-0-small.py new file mode 100644 index 0000000000000000000000000000000000000000..e8d8308de55cd42ba4ea0bea8a3b951bfda8d6df --- /dev/null +++ b/Pointcept/configs/scannet/semseg-swin3d-v1m1-0-small.py @@ -0,0 +1,219 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="Swin3D-v1m1", + in_channels=9, + num_classes=20, + base_grid_size=0.02, + depths=[2, 4, 9, 4, 4], + channels=[48, 96, 192, 384, 384], + num_heads=[6, 6, 12, 24, 24], + window_sizes=[5, 7, 7, 7, 7], + quant_size=4, + drop_path_rate=0.3, + up_k=3, + num_layers=5, + stem_transformer=True, + down_stride=3, + upsample="linear_attn", + knn_down=True, + cRSE="XYZ_RGB_NORM", + fp16_mode=1, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 600 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="blocks", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.8, 1.2]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_displacement=True, + ), + dict(type="SphereCrop", point_max=120000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_displacement=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + return_displacement=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet/semseg-swin3d-v1m1-1-large.py b/Pointcept/configs/scannet/semseg-swin3d-v1m1-1-large.py new file mode 100644 index 0000000000000000000000000000000000000000..0957ff85e1902220e0d13676b6814e5420a1776d --- /dev/null +++ b/Pointcept/configs/scannet/semseg-swin3d-v1m1-1-large.py @@ -0,0 +1,219 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="Swin3D-v1m1", + in_channels=9, + num_classes=20, + base_grid_size=0.02, + depths=[2, 4, 9, 4, 4], + channels=[80, 160, 320, 640, 640], + num_heads=[10, 10, 20, 40, 40], + window_sizes=[5, 7, 7, 7, 7], + quant_size=4, + drop_path_rate=0.3, + up_k=3, + num_layers=5, + stem_transformer=True, + down_stride=3, + upsample="linear_attn", + knn_down=True, + cRSE="XYZ_RGB_NORM", + fp16_mode=1, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 600 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="blocks", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetDataset" +data_root = "data/scannet" + +data = dict( + num_classes=20, + ignore_index=-1, + names=[ + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refridgerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", + ], + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.8, 1.2]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_displacement=True, + ), + dict(type="SphereCrop", point_max=120000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_displacement=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + return_displacement=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet200/insseg-pointgroup-spunet-0-base.py b/Pointcept/configs/scannet200/insseg-pointgroup-spunet-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..90a8d5e384b59fd5e6a004d57006861de9ca921a --- /dev/null +++ b/Pointcept/configs/scannet200/insseg-pointgroup-spunet-0-base.py @@ -0,0 +1,170 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 12 +mix_prob = 0 +empty_cache = False +enable_amp = True +evaluate = True + +class_names = CLASS_LABELS_200 +num_classes = 200 +segment_ignore_index = (-1, 0, 2) + +# model settings +model = dict( + type="PG-v1m1", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=0, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + backbone_out_channels=96, + semantic_num_classes=num_classes, + semantic_ignore_index=-1, + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + cluster_thresh=1.5, + cluster_closed_points=300, + cluster_propose_points=100, + cluster_min_points=50, +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict(type="PolyLR") + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=num_classes, + ignore_index=-1, + names=class_names, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.5 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.1), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "instance_centroid", + "bbox", + ), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="Copy", + keys_dict={ + "coord": "origin_coord", + "segment": "origin_segment", + "instance": "origin_instance", + }, + ), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + keys=("coord", "color", "normal", "segment", "instance"), + ), + # dict(type="SphereCrop", point_max=1000000, mode='center'), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict( + type="InstanceParser", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=( + "coord", + "grid_coord", + "segment", + "instance", + "origin_coord", + "origin_segment", + "origin_instance", + "instance_centroid", + "bbox", + ), + feat_keys=("color", "normal"), + offset_keys_dict=dict(offset="coord", origin_offset="origin_coord"), + ), + ], + test_mode=False, + ), + test=dict(), # currently not available +) + +hooks = [ + dict(type="CheckpointLoader", keywords="module.", replacement="module."), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict( + type="InsSegEvaluator", + segment_ignore_index=segment_ignore_index, + instance_ignore_index=-1, + ), + dict(type="CheckpointSaver", save_freq=None), +] diff --git a/Pointcept/configs/scannet200/semseg-cac-v1m1-0-spunet-base.py b/Pointcept/configs/scannet200/semseg-cac-v1m1-0-spunet-base.py new file mode 100644 index 0000000000000000000000000000000000000000..736bc767abdcba23de5676c5321cef01e19707c4 --- /dev/null +++ b/Pointcept/configs/scannet200/semseg-cac-v1m1-0-spunet-base.py @@ -0,0 +1,192 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="CAC-v1m1", + backbone=dict( + type="SpUNet-v1m1", + in_channels=9, + num_classes=0, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], + num_classes=200, + backbone_out_channels=96, + cos_temp=15, + main_weight=1, + pre_weight=1, + pre_self_weight=1, + kl_weight=1, + conf_thresh=0, + detach_pre_logits=True, +) + + +# scheduler settings +epoch = 600 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=200, + ignore_index=-1, + names=CLASS_LABELS_200, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet200/semseg-cac-v1m1-1-spunet-lovasz.py b/Pointcept/configs/scannet200/semseg-cac-v1m1-1-spunet-lovasz.py new file mode 100644 index 0000000000000000000000000000000000000000..e5584a8b763e911a896d4c948cc42f4d0da880e1 --- /dev/null +++ b/Pointcept/configs/scannet200/semseg-cac-v1m1-1-spunet-lovasz.py @@ -0,0 +1,195 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="CAC-v1m1", + backbone=dict( + type="SpUNet-v1m1", + in_channels=9, + num_classes=0, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + num_classes=200, + backbone_out_channels=96, + cos_temp=15, + main_weight=1, + pre_weight=1, + pre_self_weight=1, + kl_weight=1, + conf_thresh=0, + detach_pre_logits=True, +) + + +# scheduler settings +epoch = 600 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=200, + ignore_index=-1, + names=CLASS_LABELS_200, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet200/semseg-cac-v1m1-2-ptv2-lovasz.py b/Pointcept/configs/scannet200/semseg-cac-v1m1-2-ptv2-lovasz.py new file mode 100644 index 0000000000000000000000000000000000000000..bbd49a65d32cac158ece4cf06c641d08b452207c --- /dev/null +++ b/Pointcept/configs/scannet200/semseg-cac-v1m1-2-ptv2-lovasz.py @@ -0,0 +1,292 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="CAC-v1m1", + backbone=dict( + type="PT-v2m2", + in_channels=9, + num_classes=0, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + num_classes=200, + backbone_out_channels=48, + cos_temp=15, + main_weight=1, + pre_weight=1, + pre_self_weight=1, + kl_weight=1, + conf_thresh=0, + detach_pre_logits=True, +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=200, + ignore_index=-1, + names=CLASS_LABELS_200, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet200/semseg-minkunet34c-0-base.py b/Pointcept/configs/scannet200/semseg-minkunet34c-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..dd8479414cc4a8306c1914490c6621cb6debaeab --- /dev/null +++ b/Pointcept/configs/scannet200/semseg-minkunet34c-0-base.py @@ -0,0 +1,176 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict(type="MinkUNet34C", in_channels=9, out_channels=200), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 600 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=200, + ignore_index=-1, + names=CLASS_LABELS_200, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 6, 1 / 6], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 6, 1 / 6], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet200/semseg-pt-v1-0-base.py b/Pointcept/configs/scannet200/semseg-pt-v1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..60c1e2fbd99f8cfac633545d265bfc6cbc7219f8 --- /dev/null +++ b/Pointcept/configs/scannet200/semseg-pt-v1-0-base.py @@ -0,0 +1,260 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PointTransformer-Seg50", + in_channels=9, + num_classes=200, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=200, + ignore_index=-1, + names=CLASS_LABELS_200, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet200/semseg-pt-v2m1-0-base.py b/Pointcept/configs/scannet200/semseg-pt-v2m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..b454f1de6563aec984578c8b7bff52bab7010218 --- /dev/null +++ b/Pointcept/configs/scannet200/semseg-pt-v2m1-0-base.py @@ -0,0 +1,280 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = False + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m1", + in_channels=9, + num_classes=200, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=True, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=200, + ignore_index=-1, + names=CLASS_LABELS_200, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet200/semseg-pt-v2m2-0-base.py b/Pointcept/configs/scannet200/semseg-pt-v2m2-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..51287dd96eb4d2af1cbe01cbf29e029918dc21b8 --- /dev/null +++ b/Pointcept/configs/scannet200/semseg-pt-v2m2-0-base.py @@ -0,0 +1,280 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=9, + num_classes=200, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=200, + ignore_index=-1, + names=CLASS_LABELS_200, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet200/semseg-pt-v2m2-1-benchmark-submit.py b/Pointcept/configs/scannet200/semseg-pt-v2m2-1-benchmark-submit.py new file mode 100644 index 0000000000000000000000000000000000000000..e8d3d1e449b688dc04d225d22626b2009285e52d --- /dev/null +++ b/Pointcept/configs/scannet200/semseg-pt-v2m2-1-benchmark-submit.py @@ -0,0 +1,256 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True +evaluate = False + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=9, + num_classes=200, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=200, + ignore_index=-1, + names=CLASS_LABELS_200, + train=dict( + type=dataset_type, + split=["train", "val"], + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="test", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet200/semseg-pt-v2m2-2-lovasz.py b/Pointcept/configs/scannet200/semseg-pt-v2m2-2-lovasz.py new file mode 100644 index 0000000000000000000000000000000000000000..c3ab0f65f6e56a5faa876ab4ec52f531cd9e2453 --- /dev/null +++ b/Pointcept/configs/scannet200/semseg-pt-v2m2-2-lovasz.py @@ -0,0 +1,283 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=9, + num_classes=200, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=200, + ignore_index=-1, + names=CLASS_LABELS_200, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet200/semseg-pt-v3m1-0-base.py b/Pointcept/configs/scannet200/semseg-pt-v3m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..ed73ca90ec80dec027906fb69b1678a006840f36 --- /dev/null +++ b/Pointcept/configs/scannet200/semseg-pt-v3m1-0-base.py @@ -0,0 +1,295 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=200, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=["z", "z-trans", "hilbert", "hilbert-trans"], + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=200, + ignore_index=-1, + names=CLASS_LABELS_200, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet200/semseg-pt-v3m1-1-ppt-ft.py b/Pointcept/configs/scannet200/semseg-pt-v3m1-1-ppt-ft.py new file mode 100644 index 0000000000000000000000000000000000000000..fa92b10fe935193e4333b19e78ad8997aab1b102 --- /dev/null +++ b/Pointcept/configs/scannet200/semseg-pt-v3m1-1-ppt-ft.py @@ -0,0 +1,299 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +find_unused_parameters = True + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=200, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(3, 3, 3, 6, 3), + enc_channels=(48, 96, 192, 384, 512), + enc_num_head=(3, 6, 12, 24, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(3, 3, 3, 3), + dec_channels=(64, 96, 192, 384), + dec_num_head=(4, 6, 12, 24), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=True, + pdnorm_ln=True, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=200, + ignore_index=-1, + names=CLASS_LABELS_200, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=102400, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet200/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/scannet200/semseg-spunet-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..1fcd0fecf93c058b0f68c251ebb0210a64da976b --- /dev/null +++ b/Pointcept/configs/scannet200/semseg-spunet-v1m1-0-base.py @@ -0,0 +1,182 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=9, + num_classes=200, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 600 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=200, + ignore_index=-1, + names=CLASS_LABELS_200, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet200/semseg-spunet-v1m1-1-lovasz.py b/Pointcept/configs/scannet200/semseg-spunet-v1m1-1-lovasz.py new file mode 100644 index 0000000000000000000000000000000000000000..12c17df2be7bf2895c4e2018c124c196e8297f80 --- /dev/null +++ b/Pointcept/configs/scannet200/semseg-spunet-v1m1-1-lovasz.py @@ -0,0 +1,185 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=9, + num_classes=200, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 600 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=200, + ignore_index=-1, + names=CLASS_LABELS_200, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ), +) diff --git a/Pointcept/configs/scannet200/semseg-st-v1m2-0-refined.py b/Pointcept/configs/scannet200/semseg-st-v1m2-0-refined.py new file mode 100644 index 0000000000000000000000000000000000000000..98363e34c91f344faf410493066f6291d8b75eaf --- /dev/null +++ b/Pointcept/configs/scannet200/semseg-st-v1m2-0-refined.py @@ -0,0 +1,270 @@ +from pointcept.datasets.preprocessing.scannet.meta_data.scannet200_constants import ( + CLASS_LABELS_200, +) + +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 8 # bs: total bs in all gpus +mix_prob = 0 +empty_cache = False +enable_amp = True +find_unused_parameters = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="ST-v1m2", + in_channels=9, + num_classes=200, + channels=(48, 96, 192, 384, 384), + num_heads=(6, 12, 24, 24), + depths=(3, 9, 3, 3), + window_size=(0.2, 0.4, 0.8, 1.6), + quant_size=(0.01, 0.02, 0.04, 0.08), + mlp_expend_ratio=4.0, + down_ratio=0.25, + down_num_sample=16, + kp_ball_radius=2.5 * 0.02, + kp_max_neighbor=34, + kp_grid_size=0.02, + kp_sigma=1.0, + drop_path_rate=0.2, + rel_query=True, + rel_key=True, + rel_value=True, + qkv_bias=True, + stem=True, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) +# scheduler settings +epoch = 600 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1) + +# dataset settings +dataset_type = "ScanNet200Dataset" +data_root = "data/scannet" + +data = dict( + num_classes=200, + ignore_index=-1, + names=CLASS_LABELS_200, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="SphereCrop", point_max=100000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_min_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannetpp/semseg-pt-v2m2-0-base.py b/Pointcept/configs/scannetpp/semseg-pt-v2m2-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..81baa7c600fbcfce1121899897879fd13f9ed0eb --- /dev/null +++ b/Pointcept/configs/scannetpp/semseg-pt-v2m2-0-base.py @@ -0,0 +1,291 @@ +_base_ = [ + "../_base_/default_runtime.py", + "../_base_/dataset/scannetpp.py", +] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=9, + num_classes=100, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNetPPDataset" +data_root = "data/scannetpp" + +data = dict( + num_classes=100, + ignore_index=-1, + train=dict( + type=dataset_type, + split="train_grid1mm_chunk6x6_stride3x3", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + keys=("coord", "color", "normal", "segment"), + return_inverse=True, + ), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannetpp/semseg-pt-v2m2-1-submit.py b/Pointcept/configs/scannetpp/semseg-pt-v2m2-1-submit.py new file mode 100644 index 0000000000000000000000000000000000000000..e068997c5158ab4d1e84f3d2e75454198ee6897f --- /dev/null +++ b/Pointcept/configs/scannetpp/semseg-pt-v2m2-1-submit.py @@ -0,0 +1,278 @@ +_base_ = [ + "../_base_/default_runtime.py", + "../_base_/dataset/scannetpp.py", +] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +evaluate = False + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=9, + num_classes=100, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 900 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.02) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) + +# dataset settings +dataset_type = "ScanNetPPDataset" +data_root = "data/scannetpp" + +data = dict( + num_classes=100, + ignore_index=-1, + train=dict( + type=dataset_type, + split=["train_grid1mm_chunk6x6_stride3x3", "val_grid1mm_chunk6x6_stride3x3"], + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="test", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + keys=("coord", "color", "normal", "segment"), + return_inverse=True, + ), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# hook +hooks = [ + dict(type="CheckpointLoader"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + dict(type="CheckpointSaver", save_freq=None), + dict(type="PreciseEvaluator", test_last=True), +] diff --git a/Pointcept/configs/scannetpp/semseg-pt-v3m1-0-base.py b/Pointcept/configs/scannetpp/semseg-pt-v3m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..efd95ad6190fb3a14234a9da293338ec67140658 --- /dev/null +++ b/Pointcept/configs/scannetpp/semseg-pt-v3m1-0-base.py @@ -0,0 +1,302 @@ +_base_ = [ + "../_base_/default_runtime.py", + "../_base_/dataset/scannetpp.py", +] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=100, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetPPDataset" +data_root = "data/scannetpp" + +data = dict( + num_classes=100, + ignore_index=-1, + train=dict( + type=dataset_type, + split="train_grid1mm_chunk6x6_stride3x3", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + keys=("coord", "color", "normal", "segment"), + return_inverse=True, + ), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannetpp/semseg-pt-v3m1-1-submit.py b/Pointcept/configs/scannetpp/semseg-pt-v3m1-1-submit.py new file mode 100644 index 0000000000000000000000000000000000000000..9b91ca6d93037bf502566afd3a704b2ef7aa010b --- /dev/null +++ b/Pointcept/configs/scannetpp/semseg-pt-v3m1-1-submit.py @@ -0,0 +1,289 @@ +_base_ = [ + "../_base_/default_runtime.py", + "../_base_/dataset/scannetpp.py", +] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +evaluate = False + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=100, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 800 +optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.006, 0.0006], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0006)] + +# dataset settings +dataset_type = "ScanNetPPDataset" +data_root = "data/scannetpp" + +data = dict( + num_classes=100, + ignore_index=-1, + train=dict( + type=dataset_type, + split=["train_grid1mm_chunk6x6_stride3x3", "val_grid1mm_chunk6x6_stride3x3"], + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="test", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + keys=("coord", "color", "normal", "segment"), + return_inverse=True, + ), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# hook +hooks = [ + dict(type="CheckpointLoader"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + dict(type="CheckpointSaver", save_freq=None), + dict(type="PreciseEvaluator", test_last=True), +] diff --git a/Pointcept/configs/scannetpp/semseg-pt-v3m1-2-ppt-extreme.py b/Pointcept/configs/scannetpp/semseg-pt-v3m1-2-ppt-extreme.py new file mode 100644 index 0000000000000000000000000000000000000000..5b0c756223bd65cafca57ba077ff2d6830887403 --- /dev/null +++ b/Pointcept/configs/scannetpp/semseg-pt-v3m1-2-ppt-extreme.py @@ -0,0 +1,499 @@ +_base_ = [ + "../_base_/default_runtime.py", + "../_base_/dataset/scannetpp.py", +] + +# misc custom setting +batch_size = 24 # bs: total bs in all gpus +num_worker = 48 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +find_unused_parameters = True + +# trainer +train = dict( + type="MultiDatasetTrainer", +) + +# model settings +model = dict( + type="PPT-v1m2", + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(3, 3, 3, 6, 3), + enc_channels=(48, 96, 192, 384, 512), + enc_num_head=(3, 6, 12, 24, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(3, 3, 3, 3), + dec_channels=(64, 96, 192, 384), + dec_num_head=(4, 6, 12, 24), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=True, + pdnorm_ln=True, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "ScanNet++", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + backbone_out_channels=64, + context_channels=256, + conditions=("ScanNet", "ScanNet++", "S3DIS", "Structured3D"), + num_classes=(200, 100, 13, 25), +) + +# scheduler settings +epoch = 100 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.005, 0.0005], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0005)] + +# dataset settings +data = dict( + num_classes=100, + ignore_index=-1, + train=dict( + type="ConcatDataset", + datasets=[ + # Structured3D + dict( + type="Structured3DDataset", + split=["train", "val", "test"], + data_root="data/structured3d", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "Structured3D"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=2, # sampling weight + ), + # ScanNet + dict( + type="ScanNet200Dataset", + split=["train", "val"], + data_root="data/scannet", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=1, # sampling weight + ), + # S3DIS + # dict( + # type="S3DISDataset", + # split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + # data_root="data/s3dis", + # transform=[ + # dict(type="CenterShift", apply_z=True), + # dict( + # type="RandomDropout", + # dropout_ratio=0.2, + # dropout_application_ratio=0.2, + # ), + # # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + # dict( + # type="RandomRotate", + # angle=[-1, 1], + # axis="z", + # center=[0, 0, 0], + # p=0.5, + # ), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + # dict(type="RandomScale", scale=[0.9, 1.1]), + # # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + # dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict( + # type="ElasticDistortion", + # distortion_params=[[0.2, 0.4], [0.8, 1.6]], + # ), + # dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + # dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + # dict(type="ChromaticJitter", p=0.95, std=0.05), + # # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + # dict( + # type="GridSample", + # grid_size=0.02, + # hash_type="fnv", + # mode="train", + # return_grid_coord=True, + # ), + # dict(type="SphereCrop", sample_rate=0.6, mode="random"), + # dict(type="SphereCrop", point_max=204800, mode="random"), + # dict(type="CenterShift", apply_z=False), + # dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + # dict(type="Add", keys_dict={"condition": "S3DIS"}), + # dict(type="ToTensor"), + # dict( + # type="Collect", + # keys=("coord", "grid_coord", "segment", "condition"), + # feat_keys=("color", "normal"), + # ), + # ], + # test_mode=False, + # loop=1, # sampling weight + # ), + dict( + type="ScanNetPPDataset", + split="train_grid1mm_chunk6x6_stride3x3", + data_root="data/scannetpp", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "ScanNet++"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + ], + ), + val=dict( + type="ScanNetPPDataset", + split="val", + data_root="data/scannetpp", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict(type="Add", keys_dict={"condition": "ScanNet++"}), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type="ScanNetPPDataset", + split="val", + data_root="data/scannetpp", + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + keys=("coord", "color", "normal", "segment"), + return_inverse=True, + ), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "ScanNet++"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannetpp/semseg-pt-v3m1-3-ppt-extreme-submit.py b/Pointcept/configs/scannetpp/semseg-pt-v3m1-3-ppt-extreme-submit.py new file mode 100644 index 0000000000000000000000000000000000000000..eb48cf2db8af6d55dd85945e92be33df1e49b5fc --- /dev/null +++ b/Pointcept/configs/scannetpp/semseg-pt-v3m1-3-ppt-extreme-submit.py @@ -0,0 +1,488 @@ +_base_ = [ + "../_base_/default_runtime.py", + "../_base_/dataset/scannetpp.py", +] + +# misc custom setting +batch_size = 24 # bs: total bs in all gpus +num_worker = 48 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +evaluate = False +find_unused_parameters = True + +# trainer +train = dict( + type="MultiDatasetTrainer", +) + +# model settings +model = dict( + type="PPT-v1m2", + backbone=dict( + type="PT-v3m1", + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(3, 3, 3, 6, 3), + enc_channels=(48, 96, 192, 384, 512), + enc_num_head=(3, 6, 12, 24, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(3, 3, 3, 3), + dec_channels=(64, 96, 192, 384), + dec_num_head=(4, 6, 12, 24), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=True, + pdnorm_ln=True, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "ScanNet++", "S3DIS", "Structured3D"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + backbone_out_channels=64, + context_channels=256, + conditions=("ScanNet", "ScanNet++", "S3DIS", "Structured3D"), + num_classes=(200, 100, 13, 25), +) + +# scheduler settings +epoch = 100 +optimizer = dict(type="AdamW", lr=0.005, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.005, 0.0005], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="block", lr=0.0005)] + +# dataset settings +data = dict( + num_classes=100, + ignore_index=-1, + train=dict( + type="ConcatDataset", + datasets=[ + # Structured3D + dict( + type="Structured3DDataset", + split=["train", "val", "test"], + data_root="data/structured3d", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "Structured3D"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=2, # sampling weight + ), + # ScanNet + dict( + type="ScanNet200Dataset", + split=["train", "val"], + data_root="data/scannet", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=1, # sampling weight + ), + # S3DIS + # dict( + # type="S3DISDataset", + # split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + # data_root="data/s3dis", + # transform=[ + # dict(type="CenterShift", apply_z=True), + # dict( + # type="RandomDropout", + # dropout_ratio=0.2, + # dropout_application_ratio=0.2, + # ), + # # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + # dict( + # type="RandomRotate", + # angle=[-1, 1], + # axis="z", + # center=[0, 0, 0], + # p=0.5, + # ), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + # dict(type="RandomScale", scale=[0.9, 1.1]), + # # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + # dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict( + # type="ElasticDistortion", + # distortion_params=[[0.2, 0.4], [0.8, 1.6]], + # ), + # dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + # dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + # dict(type="ChromaticJitter", p=0.95, std=0.05), + # # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + # dict( + # type="GridSample", + # grid_size=0.02, + # hash_type="fnv", + # mode="train", + # return_grid_coord=True, + # ), + # dict(type="SphereCrop", sample_rate=0.6, mode="random"), + # dict(type="SphereCrop", point_max=204800, mode="random"), + # dict(type="CenterShift", apply_z=False), + # dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + # dict(type="Add", keys_dict={"condition": "S3DIS"}), + # dict(type="ToTensor"), + # dict( + # type="Collect", + # keys=("coord", "grid_coord", "segment", "condition"), + # feat_keys=("color", "normal"), + # ), + # ], + # test_mode=False, + # loop=1, # sampling weight + # ), + dict( + type="ScanNetPPDataset", + split=[ + "train_grid1mm_chunk6x6_stride3x3", + "val_grid1mm_chunk6x6_stride3x3", + ], + data_root="data/scannetpp", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "ScanNet++"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + ], + ), + test=dict( + type="ScanNetPPDataset", + split="test", + data_root="data/scannetpp", + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + keys=("coord", "color", "normal", "segment"), + return_inverse=True, + ), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "ScanNet++"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) + +# hook +hooks = [ + dict(type="CheckpointLoader"), + dict(type="IterationTimer", warmup_iter=2), + dict(type="InformationWriter"), + dict(type="SemSegEvaluator"), + dict(type="CheckpointSaver", save_freq=None), + dict(type="PreciseEvaluator", test_last=True), +] diff --git a/Pointcept/configs/scannetpp/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/scannetpp/semseg-spunet-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..871aa42954765ea6091c328910e40ee6c31e6173 --- /dev/null +++ b/Pointcept/configs/scannetpp/semseg-spunet-v1m1-0-base.py @@ -0,0 +1,271 @@ +_base_ = [ + "../_base_/default_runtime.py", + "../_base_/dataset/scannetpp.py", +] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=100, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + + +# scheduler settings +epoch = 800 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "ScanNetPPDataset" +data_root = "data/scannetpp" + +data = dict( + num_classes=100, + ignore_index=-1, + train=dict( + type=dataset_type, + split="train_grid1mm_chunk6x6_stride3x3", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + keys=("coord", "color", "normal", "segment"), + return_inverse=True, + ), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/scannetpp/semseg-spunet-v1m1-1-ppt-extreme.py b/Pointcept/configs/scannetpp/semseg-spunet-v1m1-1-ppt-extreme.py new file mode 100644 index 0000000000000000000000000000000000000000..55ed0fe2be2b23404980d7c19633458f843506bd --- /dev/null +++ b/Pointcept/configs/scannetpp/semseg-spunet-v1m1-1-ppt-extreme.py @@ -0,0 +1,480 @@ +_base_ = [ + "../_base_/default_runtime.py", + "../_base_/dataset/scannetpp.py", +] + +# misc custom setting +batch_size = 24 # bs: total bs in all gpus +num_worker = 48 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +find_unused_parameters = True + +# trainer +train = dict( + type="MultiDatasetTrainer", +) + +# model settings +model = dict( + type="PPT-v1m2", + backbone=dict( + type="SpUNet-v1m3", + in_channels=6, + num_classes=0, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("ScanNet", "ScanNet++", "S3DIS", "Structured3D"), + zero_init=False, + norm_decouple=True, + norm_adaptive=True, + norm_affine=True, + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + backbone_out_channels=96, + context_channels=256, + conditions=("ScanNet", "ScanNet++", "S3DIS", "Structured3D"), + num_classes=(200, 100, 13, 25), +) + +# scheduler settings +epoch = 100 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +data = dict( + num_classes=100, + ignore_index=-1, + train=dict( + type="ConcatDataset", + datasets=[ + # Structured3D + dict( + type="Structured3DDataset", + split=["train", "val", "test"], + data_root="data/structured3d", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "Structured3D"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=2, # sampling weight + ), + # ScanNet + dict( + type="ScanNet200Dataset", + split=["train", "val"], + data_root="data/scannet", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "ScanNet"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + loop=1, # sampling weight + ), + # S3DIS + # dict( + # type="S3DISDataset", + # split=("Area_1", "Area_2", "Area_3", "Area_4", "Area_6"), + # data_root="data/s3dis", + # transform=[ + # dict(type="CenterShift", apply_z=True), + # dict( + # type="RandomDropout", + # dropout_ratio=0.2, + # dropout_application_ratio=0.2, + # ), + # # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + # dict( + # type="RandomRotate", + # angle=[-1, 1], + # axis="z", + # center=[0, 0, 0], + # p=0.5, + # ), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + # dict(type="RandomScale", scale=[0.9, 1.1]), + # # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + # dict(type="RandomFlip", p=0.5), + # dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict( + # type="ElasticDistortion", + # distortion_params=[[0.2, 0.4], [0.8, 1.6]], + # ), + # dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + # dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + # dict(type="ChromaticJitter", p=0.95, std=0.05), + # # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + # dict( + # type="GridSample", + # grid_size=0.02, + # hash_type="fnv", + # mode="train", + # return_grid_coord=True, + # ), + # dict(type="SphereCrop", sample_rate=0.6, mode="random"), + # dict(type="SphereCrop", point_max=204800, mode="random"), + # dict(type="CenterShift", apply_z=False), + # dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + # dict(type="Add", keys_dict={"condition": "S3DIS"}), + # dict(type="ToTensor"), + # dict( + # type="Collect", + # keys=("coord", "grid_coord", "segment", "condition"), + # feat_keys=("color", "normal"), + # ), + # ], + # test_mode=False, + # loop=1, # sampling weight + # ), + dict( + type="ScanNetPPDataset", + split="train_grid1mm_chunk6x6_stride3x3", + data_root="data/scannetpp", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", + dropout_ratio=0.2, + dropout_application_ratio=0.2, + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict( + type="ElasticDistortion", + distortion_params=[[0.2, 0.4], [0.8, 1.6]], + ), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", point_max=204800, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + # dict(type="ShufflePoint"), + dict(type="Add", keys_dict={"condition": "ScanNet++"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + ], + ), + val=dict( + type="ScanNetPPDataset", + split="val", + data_root="data/scannetpp", + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict(type="Add", keys_dict={"condition": "ScanNet++"}), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type="ScanNetPPDataset", + split="val", + data_root="data/scannetpp", + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.01, + hash_type="fnv", + mode="train", + keys=("coord", "color", "normal", "segment"), + return_inverse=True, + ), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + keys=("coord", "color", "normal"), + return_grid_coord=True, + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "ScanNet++"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/semantic_kitti/semseg-minkunet34c-0-base.py b/Pointcept/configs/semantic_kitti/semseg-minkunet34c-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..6b22906b26f70255b437c1f8e50e71e24dde3f6d --- /dev/null +++ b/Pointcept/configs/semantic_kitti/semseg-minkunet34c-0-base.py @@ -0,0 +1,213 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 8 # bs: total bs in all gpus +mix_prob = 0 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict(type="MinkUNet34C", in_channels=4, out_channels=19), + criteria=[ + dict( + type="CrossEntropyLoss", + weight=[ + 3.1557, + 8.7029, + 7.8281, + 6.1354, + 6.3161, + 7.9937, + 8.9704, + 10.1922, + 1.6155, + 4.2187, + 1.9385, + 5.5455, + 2.0198, + 2.6261, + 1.3212, + 5.1102, + 2.5492, + 5.8585, + 7.3929, + ], + loss_weight=1.0, + ignore_index=-1, + ) + ], +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) + +# dataset settings +dataset_type = "SemanticKITTIDataset" +data_root = "data/semantic_kitti" +ignore_index = -1 +names = [ + "car", + "bicycle", + "motorcycle", + "truck", + "other-vehicle", + "person", + "bicyclist", + "motorcyclist", + "road", + "parking", + "sidewalk", + "other-ground", + "building", + "fence", + "vegetation", + "trunk", + "terrain", + "pole", + "traffic-sign", +] + +data = dict( + num_classes=19, + ignore_index=ignore_index, + names=names, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ignore_index=ignore_index, + ), +) diff --git a/Pointcept/configs/semantic_kitti/semseg-ppt-v1m1-0-sk-nu-wa-spunet.py b/Pointcept/configs/semantic_kitti/semseg-ppt-v1m1-0-sk-nu-wa-spunet.py new file mode 100644 index 0000000000000000000000000000000000000000..2ea85111fdc0eb44096367ce5c50df3c2683da68 --- /dev/null +++ b/Pointcept/configs/semantic_kitti/semseg-ppt-v1m1-0-sk-nu-wa-spunet.py @@ -0,0 +1,351 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +find_unused_parameters = True + +# trainer +train = dict( + type="MultiDatasetTrainer", +) + +# model settings +model = dict( + type="PPT-v1m1", + backbone=dict( + type="SpUNet-v1m3", + in_channels=4, + num_classes=0, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("SemanticKITTI", "nuScenes", "Waymo"), + zero_init=False, + norm_decouple=True, + norm_adaptive=False, + norm_affine=True, + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + backbone_out_channels=96, + context_channels=256, + conditions=("SemanticKITTI", "nuScenes", "Waymo"), + template="[x]", + clip_model="ViT-B/16", + # fmt: off + class_name=( + # SemanticKITTI + "car", "bicycle", "motorcycle", "truck", "other vehicle", + "person", "person who rides a bicycle", "person who rides a motorcycle", "road", "parking", + "path for pedestrians at the side of a road", "other ground", "building", "fence", "vegetation", + "trunk", "terrain", "pole", "traffic sign", + # nuScenes + "barrier", "bicycle", "bus", "car", "construction vehicle", + "motorcycle", "pedestrian", "traffic cone", "trailer", "truck", + "path suitable or safe for driving", "other flat", "sidewalk", "terrain", "man made", "vegetation", + # waymo + "car", "truck", "bus", "other vehicle", "person who rides a motorcycle", + "person who rides a bicycle", "pedestrian", "sign", "traffic light", "pole", + "construction cone", "bicycle", "motorcycle", "building", "vegetation", + "tree trunk", "curb", "road", "lane marker", "other ground", "horizontal surface that can not drive", + "surface when pedestrians most likely to walk on", + ), + valid_index=( + [i for i in range(19)], + [i for i in range(19, 19 + 16)], + [i for i in range(19 + 16, 19 + 16 + 22)], + ), + # fmt: on + backbone_mode=False, +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) +# param_dicts = [dict(keyword="modulation", lr=0.0002)] + +# dataset settings +data = dict( + num_classes=19, + ignore_index=-1, + names=[ + "car", + "bicycle", + "motorcycle", + "truck", + "other-vehicle", + "person", + "bicyclist", + "motorcyclist", + "road", + "parking", + "sidewalk", + "other-ground", + "building", + "fence", + "vegetation", + "trunk", + "terrain", + "pole", + "traffic-sign", + ], + train=dict( + type="ConcatDataset", + datasets=[ + # nuScenes + dict( + type="NuScenesDataset", + split="train", + data_root="data/nuscenes", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5), + dict( + type="PointClip", + point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "nuScenes"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + # SemanticKITTI + dict( + type="SemanticKITTIDataset", + split="train", + data_root="data/semantic_kitti", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict( + type="PointClip", + point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "SemanticKITTI"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + # Waymo + dict( + type="WaymoDataset", + split="training", + data_root="data/waymo", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict( + type="PointClip", + point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "Waymo"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + ], + ), + val=dict( + type="SemanticKITTIDataset", + split="val", + data_root="data/semantic_kitti", + transform=[ + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + dict(type="Add", keys_dict={"condition": "SemanticKITTI"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + ), + test=dict( + type="SemanticKITTIDataset", + split="val", + data_root="data/semantic_kitti", + transform=[ + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict(type="Add", keys_dict={"condition": "SemanticKITTI"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ignore_index=-1, + ), +) diff --git a/Pointcept/configs/semantic_kitti/semseg-ppt-v1m2-0-sk-nu-wa-spunet-submit.py b/Pointcept/configs/semantic_kitti/semseg-ppt-v1m2-0-sk-nu-wa-spunet-submit.py new file mode 100644 index 0000000000000000000000000000000000000000..1f1c21cb9e0a05067de2862d918ce39490cfe4c2 --- /dev/null +++ b/Pointcept/configs/semantic_kitti/semseg-ppt-v1m2-0-sk-nu-wa-spunet-submit.py @@ -0,0 +1,301 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +find_unused_parameters = True +evaluate = False + +# trainer +train = dict( + type="MultiDatasetTrainer", +) + +# model settings +model = dict( + type="PPT-v1m2", + backbone=dict( + type="SpUNet-v1m3", + in_channels=4, + num_classes=0, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("SemanticKITTI", "nuScenes", "Waymo"), + zero_init=False, + norm_decouple=True, + norm_adaptive=False, + norm_affine=True, + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + backbone_out_channels=96, + context_channels=256, + conditions=("SemanticKITTI", "nuScenes", "Waymo"), + num_classes=(19, 16, 22), +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) +# param_dicts = [dict(keyword="modulation", lr=0.0002)] + +# dataset settings +data = dict( + num_classes=19, + ignore_index=-1, + names=[ + "car", + "bicycle", + "motorcycle", + "truck", + "other-vehicle", + "person", + "bicyclist", + "motorcyclist", + "road", + "parking", + "sidewalk", + "other-ground", + "building", + "fence", + "vegetation", + "trunk", + "terrain", + "pole", + "traffic-sign", + ], + train=dict( + type="ConcatDataset", + datasets=[ + # nuScenes + dict( + type="NuScenesDataset", + split=["train", "val"], + data_root="data/nuscenes", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5), + dict( + type="PointClip", + point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "nuScenes"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + # SemanticKITTI + dict( + type="SemanticKITTIDataset", + split=["train", "val"], + data_root="data/semantic_kitti", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict( + type="PointClip", + point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "SemanticKITTI"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + # Waymo + dict( + type="WaymoDataset", + split=["training", "validation"], + data_root="data/waymo", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict( + type="PointClip", + point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "Waymo"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + ], + ), + test=dict( + type="SemanticKITTIDataset", + split="test", + data_root="data/semantic_kitti", + transform=[ + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict(type="Add", keys_dict={"condition": "SemanticKITTI"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ignore_index=-1, + ), +) diff --git a/Pointcept/configs/semantic_kitti/semseg-ppt-v1m2-0-sk-nu-wa-spunet.py b/Pointcept/configs/semantic_kitti/semseg-ppt-v1m2-0-sk-nu-wa-spunet.py new file mode 100644 index 0000000000000000000000000000000000000000..eb5cd428b7e6856a130e7bc30c1c5e9ea7c58428 --- /dev/null +++ b/Pointcept/configs/semantic_kitti/semseg-ppt-v1m2-0-sk-nu-wa-spunet.py @@ -0,0 +1,325 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +num_worker = 24 +mix_prob = 0.8 +empty_cache = False +enable_amp = True +find_unused_parameters = True + +# trainer +train = dict( + type="MultiDatasetTrainer", +) + +# model settings +model = dict( + type="PPT-v1m2", + backbone=dict( + type="SpUNet-v1m3", + in_channels=4, + num_classes=0, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("SemanticKITTI", "nuScenes", "Waymo"), + zero_init=False, + norm_decouple=True, + norm_adaptive=False, + norm_affine=True, + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + backbone_out_channels=96, + context_channels=256, + conditions=("SemanticKITTI", "nuScenes", "Waymo"), + num_classes=(19, 16, 22), +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) +# param_dicts = [dict(keyword="modulation", lr=0.0002)] + +# dataset settings +data = dict( + num_classes=19, + ignore_index=-1, + names=[ + "car", + "bicycle", + "motorcycle", + "truck", + "other-vehicle", + "person", + "bicyclist", + "motorcyclist", + "road", + "parking", + "sidewalk", + "other-ground", + "building", + "fence", + "vegetation", + "trunk", + "terrain", + "pole", + "traffic-sign", + ], + train=dict( + type="ConcatDataset", + datasets=[ + # nuScenes + dict( + type="NuScenesDataset", + split="train", + data_root="data/nuscenes", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5), + dict( + type="PointClip", + point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "nuScenes"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + # SemanticKITTI + dict( + type="SemanticKITTIDataset", + split="train", + data_root="data/semantic_kitti", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict( + type="PointClip", + point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "SemanticKITTI"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + # Waymo + dict( + type="WaymoDataset", + split="training", + data_root="data/waymo", + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict( + type="RandomRotate", + angle=[-1, 1], + axis="z", + center=[0, 0, 0], + p=0.5, + ), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict( + type="PointClip", + point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2), + ), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="Add", keys_dict={"condition": "Waymo"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + loop=1, + ), + ], + ), + val=dict( + type="SemanticKITTIDataset", + split="val", + data_root="data/semantic_kitti", + transform=[ + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + dict(type="Add", keys_dict={"condition": "SemanticKITTI"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment", "condition"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=-1, + ), + test=dict( + type="SemanticKITTIDataset", + split="val", + data_root="data/semantic_kitti", + transform=[ + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict(type="Add", keys_dict={"condition": "SemanticKITTI"}), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index", "condition"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ignore_index=-1, + ), +) diff --git a/Pointcept/configs/semantic_kitti/semseg-pt-v2m2-0-base.py b/Pointcept/configs/semantic_kitti/semseg-pt-v2m2-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..7d1670a65c4e06b06caeef456bd0626310acbc40 --- /dev/null +++ b/Pointcept/configs/semantic_kitti/semseg-pt-v2m2-0-base.py @@ -0,0 +1,222 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 8 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=4, + num_classes=19, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.15, 0.375, 0.9375, 2.34375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + # fmt: off + criteria=[ + dict(type="CrossEntropyLoss", + weight=[3.1557, 8.7029, 7.8281, 6.1354, 6.3161, 7.9937, 8.9704, 10.1922, 1.6155, 4.2187, + 1.9385, 5.5455, 2.0198, 2.6261, 1.3212, 5.1102, 2.5492, 5.8585, 7.3929], + loss_weight=1.0, + ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], + # fmt: on +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) + +# dataset settings +dataset_type = "SemanticKITTIDataset" +data_root = "data/semantic_kitti" +ignore_index = -1 +names = [ + "car", + "bicycle", + "motorcycle", + "truck", + "other-vehicle", + "person", + "bicyclist", + "motorcyclist", + "road", + "parking", + "sidewalk", + "other-ground", + "building", + "fence", + "vegetation", + "trunk", + "terrain", + "pole", + "traffic-sign", +] + +data = dict( + num_classes=19, + ignore_index=ignore_index, + names=names, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="SphereCrop", point_max=120000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict( + type="PointClip", + point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2), + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ignore_index=ignore_index, + ), +) diff --git a/Pointcept/configs/semantic_kitti/semseg-pt-v2m2-1-benchmark-submit.py b/Pointcept/configs/semantic_kitti/semseg-pt-v2m2-1-benchmark-submit.py new file mode 100644 index 0000000000000000000000000000000000000000..d65cda64e4ee3f485c11ada2c1eeccb30ca9e8ef --- /dev/null +++ b/Pointcept/configs/semantic_kitti/semseg-pt-v2m2-1-benchmark-submit.py @@ -0,0 +1,218 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 8 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True +evaluate = False + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=4, + num_classes=19, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.15, 0.375, 0.9375, 2.34375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[ + dict( + type="CrossEntropyLoss", + weight=[ + 3.1557, + 8.7029, + 7.8281, + 6.1354, + 6.3161, + 7.9937, + 8.9704, + 10.1922, + 1.6155, + 4.2187, + 1.9385, + 5.5455, + 2.0198, + 2.6261, + 1.3212, + 5.1102, + 2.5492, + 5.8585, + 7.3929, + ], + loss_weight=1.0, + ignore_index=-1, + ), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) + +# dataset settings +dataset_type = "SemanticKITTIDataset" +data_root = "data/semantic_kitti" +ignore_index = -1 +names = [ + "car", + "bicycle", + "motorcycle", + "truck", + "other-vehicle", + "person", + "bicyclist", + "motorcyclist", + "road", + "parking", + "sidewalk", + "other-ground", + "building", + "fence", + "vegetation", + "trunk", + "terrain", + "pole", + "traffic-sign", +] + +data = dict( + num_classes=19, + ignore_index=ignore_index, + names=names, + train=dict( + type=dataset_type, + split=["train", "val"], + data_root=data_root, + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="SphereCrop", point_max=120000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + test=dict( + type=dataset_type, + split="test", + data_root=data_root, + transform=[], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict( + type="PointClip", + point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2), + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ignore_index=ignore_index, + ), +) diff --git a/Pointcept/configs/semantic_kitti/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/semantic_kitti/semseg-spunet-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..7be9cb0979f431a137768849c0328ada0601fbd7 --- /dev/null +++ b/Pointcept/configs/semantic_kitti/semseg-spunet-v1m1-0-base.py @@ -0,0 +1,219 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=4, + num_classes=19, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[ + dict( + type="CrossEntropyLoss", + weight=[ + 3.1557, + 8.7029, + 7.8281, + 6.1354, + 6.3161, + 7.9937, + 8.9704, + 10.1922, + 1.6155, + 4.2187, + 1.9385, + 5.5455, + 2.0198, + 2.6261, + 1.3212, + 5.1102, + 2.5492, + 5.8585, + 7.3929, + ], + loss_weight=1.0, + ignore_index=-1, + ) + ], +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) + +# dataset settings +dataset_type = "SemanticKITTIDataset" +data_root = "data/semantic_kitti" +ignore_index = -1 +names = [ + "car", + "bicycle", + "motorcycle", + "truck", + "other-vehicle", + "person", + "bicyclist", + "motorcyclist", + "road", + "parking", + "sidewalk", + "other-ground", + "building", + "fence", + "vegetation", + "trunk", + "terrain", + "pole", + "traffic-sign", +] + +data = dict( + num_classes=19, + ignore_index=ignore_index, + names=names, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ignore_index=ignore_index, + ), +) diff --git a/Pointcept/configs/semantic_kitti/semseg-spvcnn-v1m1-0-base.py b/Pointcept/configs/semantic_kitti/semseg-spvcnn-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..599ff4d538e1e2bc706abc9c00a41dc71cd5e007 --- /dev/null +++ b/Pointcept/configs/semantic_kitti/semseg-spvcnn-v1m1-0-base.py @@ -0,0 +1,219 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 8 # bs: total bs in all gpus +mix_prob = 0 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SPVCNN", + in_channels=4, + out_channels=19, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 2, 2, 2, 2, 2, 2, 2), + ), + criteria=[ + dict( + type="CrossEntropyLoss", + weight=[ + 3.1557, + 8.7029, + 7.8281, + 6.1354, + 6.3161, + 7.9937, + 8.9704, + 10.1922, + 1.6155, + 4.2187, + 1.9385, + 5.5455, + 2.0198, + 2.6261, + 1.3212, + 5.1102, + 2.5492, + 5.8585, + 7.3929, + ], + loss_weight=1.0, + ignore_index=-1, + ) + ], +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) + +# dataset settings +dataset_type = "SemanticKITTIDataset" +data_root = "data/semantic_kitti" +ignore_index = -1 +names = [ + "car", + "bicycle", + "motorcycle", + "truck", + "other-vehicle", + "person", + "bicyclist", + "motorcyclist", + "road", + "parking", + "sidewalk", + "other-ground", + "building", + "fence", + "vegetation", + "trunk", + "terrain", + "pole", + "traffic-sign", +] + +data = dict( + num_classes=19, + ignore_index=ignore_index, + names=names, + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ignore_index=ignore_index, + ), +) diff --git a/Pointcept/configs/structured3d/semseg-pt-v2m2-0-base.py b/Pointcept/configs/structured3d/semseg-pt-v2m2-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..5c67cd6a103d6b3176482c6710938d8e23f2feea --- /dev/null +++ b/Pointcept/configs/structured3d/semseg-pt-v2m2-0-base.py @@ -0,0 +1,304 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="PT-v2m2", + in_channels=9, + num_classes=25, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.15, 0.375, 0.9375), # x3, x2.5, x2.5, x2.5 + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.3, + enable_checkpoint=False, + unpool_backend="map", # map / interp + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 100 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "Structured3DDataset" +data_root = "data/structured3d" + +data = dict( + num_classes=25, + ignore_index=-1, + names=( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "picture", + "desk", + "shelves", + "curtain", + "dresser", + "pillow", + "mirror", + "ceiling", + "refrigerator", + "television", + "nightstand", + "sink", + "lamp", + "otherstructure", + "otherfurniture", + "otherprop", + ), + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="SphereCrop", point_max=120000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/structured3d/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/structured3d/semseg-spunet-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..1a2fa48320601baed972856a20afcba21991df0c --- /dev/null +++ b/Pointcept/configs/structured3d/semseg-spunet-v1m1-0-base.py @@ -0,0 +1,285 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=6, + num_classes=25, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 100 +optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=10000.0, +) + +# dataset settings +dataset_type = "Structured3DDataset" +data_root = "data/structured3d" + +data = dict( + num_classes=25, + ignore_index=-1, + names=( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "picture", + "desk", + "shelves", + "curtain", + "dresser", + "pillow", + "mirror", + "ceiling", + "refrigerator", + "television", + "nightstand", + "sink", + "lamp", + "otherstructure", + "otherfurniture", + "otherprop", + ), + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + dict(type="SphereCrop", sample_rate=0.6, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/structured3d/semseg-swin3d-v1m1-0-small.py b/Pointcept/configs/structured3d/semseg-swin3d-v1m1-0-small.py new file mode 100644 index 0000000000000000000000000000000000000000..e52bb1ee7a4da43ca523c1979129ec2fa4c5ecbe --- /dev/null +++ b/Pointcept/configs/structured3d/semseg-swin3d-v1m1-0-small.py @@ -0,0 +1,306 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="Swin3D-v1m1", + in_channels=9, + num_classes=25, + base_grid_size=0.02, + depths=[2, 4, 9, 4, 4], + channels=[48, 96, 192, 384, 384], + num_heads=[6, 6, 12, 24, 24], + window_sizes=[5, 7, 7, 7, 7], + quant_size=4, + drop_path_rate=0.3, + up_k=3, + num_layers=5, + stem_transformer=True, + down_stride=3, + upsample="linear_attn", + knn_down=True, + cRSE="XYZ_RGB_NORM", + fp16_mode=1, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 100 +optimizer = dict(type="AdamW", lr=0.008, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.008, 0.0008], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="blocks", lr=0.0008)] + +# dataset settings +dataset_type = "Structured3DDataset" +data_root = "data/structured3d" + +data = dict( + num_classes=25, + ignore_index=-1, + names=( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "picture", + "desk", + "shelves", + "curtain", + "dresser", + "pillow", + "mirror", + "ceiling", + "refrigerator", + "television", + "nightstand", + "sink", + "lamp", + "otherstructure", + "otherfurniture", + "otherprop", + ), + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_displacement=True, + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="SphereCrop", point_max=120000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_displacement=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + return_displacement=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/structured3d/semseg-swin3d-v1m1-1-large.py b/Pointcept/configs/structured3d/semseg-swin3d-v1m1-1-large.py new file mode 100644 index 0000000000000000000000000000000000000000..de62b4234473f34648201507a6b7d37d11674df6 --- /dev/null +++ b/Pointcept/configs/structured3d/semseg-swin3d-v1m1-1-large.py @@ -0,0 +1,306 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="Swin3D-v1m1", + in_channels=9, + num_classes=25, + base_grid_size=0.02, + depths=[2, 4, 9, 4, 4], + channels=[80, 160, 320, 640, 640], + num_heads=[10, 10, 20, 40, 40], + window_sizes=[5, 7, 7, 7, 7], + quant_size=4, + drop_path_rate=0.3, + up_k=3, + num_layers=5, + stem_transformer=True, + down_stride=3, + upsample="linear_attn", + knn_down=True, + cRSE="XYZ_RGB_NORM", + fp16_mode=1, + ), + criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)], +) + +# scheduler settings +epoch = 100 +optimizer = dict(type="AdamW", lr=0.008, weight_decay=0.05) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.008, 0.0008], + pct_start=0.05, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=1000.0, +) +param_dicts = [dict(keyword="blocks", lr=0.0008)] + +# dataset settings +dataset_type = "Structured3DDataset" +data_root = "data/structured3d" + +data = dict( + num_classes=25, + ignore_index=-1, + names=( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "picture", + "desk", + "shelves", + "curtain", + "dresser", + "pillow", + "mirror", + "ceiling", + "refrigerator", + "television", + "nightstand", + "sink", + "lamp", + "otherstructure", + "otherfurniture", + "otherprop", + ), + train=dict( + type=dataset_type, + split="train", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2 + ), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5), + dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None), + dict(type="ChromaticTranslation", p=0.95, ratio=0.05), + dict(type="ChromaticJitter", p=0.95, std=0.05), + # dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2), + # dict(type="RandomColorDrop", p=0.2, color_augment=0.0), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_displacement=True, + ), + dict(type="SphereCrop", sample_rate=0.8, mode="random"), + dict(type="SphereCrop", point_max=120000, mode="random"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ShufflePoint"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + val=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="train", + return_grid_coord=True, + return_displacement=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="center"), + dict(type="CenterShift", apply_z=False), + dict(type="NormalizeColor"), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + test_mode=False, + ), + test=dict( + type=dataset_type, + split="val", + data_root=data_root, + transform=[ + dict(type="CenterShift", apply_z=True), + dict(type="NormalizeColor"), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.02, + hash_type="fnv", + mode="test", + return_grid_coord=True, + return_displacement=True, + keys=("coord", "color", "normal"), + ), + crop=None, + post_transform=[ + dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("color", "normal", "displacement"), + coord_feat_keys=("color", "normal"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[0.95, 0.95]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ), + dict(type="RandomScale", scale=[1.05, 1.05]), + ], + [dict(type="RandomFlip", p=1)], + ], + ), + ), +) diff --git a/Pointcept/configs/waymo/semseg-pt-v3m1-0-base.py b/Pointcept/configs/waymo/semseg-pt-v3m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..89cb3bc588ce2ec662d76c81dad3fbcea8373f8a --- /dev/null +++ b/Pointcept/configs/waymo/semseg-pt-v3m1-0-base.py @@ -0,0 +1,248 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentorV2", + num_classes=22, + backbone_out_channels=64, + backbone=dict( + type="PT-v3m1", + in_channels=4, + order=["z", "z-trans", "hilbert", "hilbert-trans"], + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + shuffle_orders=True, + pre_norm=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("nuScenes", "SemanticKITTI", "Waymo"), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=[0.002, 0.0002], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) +param_dicts = [dict(keyword="block", lr=0.0002)] + +# dataset settings +dataset_type = "WaymoDataset" +data_root = "data/waymo" +ignore_index = -1 +names = [ + "Car", + "Truck", + "Bus", + # Other small vehicles (e.g. pedicab) and large vehicles (e.g. construction vehicles, RV, limo, tram). + "Other Vehicle", + "Motorcyclist", + "Bicyclist", + "Pedestrian", + "Sign", + "Traffic Light", + # Lamp post, traffic sign pole etc. + "Pole", + # Construction cone/pole. + "Construction Cone", + "Bicycle", + "Motorcycle", + "Building", + # Bushes, tree branches, tall grasses, flowers etc. + "Vegetation", + "Tree Trunk", + # Curb on the edge of roads. This does not include road boundaries if thereโ€™s no curb. + "Curb", + # Surface a vehicle could drive on. This includes the driveway connecting + # parking lot and road over a section of sidewalk. + "Road", + # Marking on the road thatโ€™s specifically for defining lanes such as + # single/double white/yellow lines. + "Lane Marker", + # Marking on the road other than lane markers, bumps, cateyes, railtracks etc. + "Other Ground", + # Most horizontal surface thatโ€™s not drivable, e.g. grassy hill, pedestrian walkway stairs etc. + "Walkable", + # Nicely paved walkable surface when pedestrians most likely to walk on. + "Sidewalk", +] + +data = dict( + num_classes=22, + ignore_index=ignore_index, + names=names, + train=dict( + type=dataset_type, + split="training", + data_root=data_root, + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict(type="PointClip", point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2)), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + val=dict( + type=dataset_type, + split="validation", + data_root=data_root, + transform=[ + dict(type="PointClip", point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2)), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + test=dict( + type=dataset_type, + split="validation", + data_root=data_root, + transform=[ + dict(type="PointClip", point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2)), + dict(type="Copy", keys_dict={"segment": "origin_segment"}), + dict( + type="GridSample", + grid_size=0.025, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_inverse=True, + ), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ignore_index=ignore_index, + ), +) diff --git a/Pointcept/configs/waymo/semseg-spunet-v1m1-0-base.py b/Pointcept/configs/waymo/semseg-spunet-v1m1-0-base.py new file mode 100644 index 0000000000000000000000000000000000000000..67d8011d2ff4df45757749facdb44d2f1c175b10 --- /dev/null +++ b/Pointcept/configs/waymo/semseg-spunet-v1m1-0-base.py @@ -0,0 +1,210 @@ +_base_ = ["../_base_/default_runtime.py"] + +# misc custom setting +batch_size = 12 # bs: total bs in all gpus +mix_prob = 0.8 +empty_cache = False +enable_amp = True + +# model settings +model = dict( + type="DefaultSegmentor", + backbone=dict( + type="SpUNet-v1m1", + in_channels=4, + num_classes=22, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ), + criteria=[ + dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1), + dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1), + ], +) + +# scheduler settings +epoch = 50 +eval_epoch = 50 +optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005) +scheduler = dict( + type="OneCycleLR", + max_lr=optimizer["lr"], + pct_start=0.04, + anneal_strategy="cos", + div_factor=10.0, + final_div_factor=100.0, +) + +# dataset settings +dataset_type = "WaymoDataset" +data_root = "data/waymo" +ignore_index = -1 +names = [ + "Car", + "Truck", + "Bus", + # Other small vehicles (e.g. pedicab) and large vehicles (e.g. construction vehicles, RV, limo, tram). + "Other Vehicle", + "Motorcyclist", + "Bicyclist", + "Pedestrian", + "Sign", + "Traffic Light", + # Lamp post, traffic sign pole etc. + "Pole", + # Construction cone/pole. + "Construction Cone", + "Bicycle", + "Motorcycle", + "Building", + # Bushes, tree branches, tall grasses, flowers etc. + "Vegetation", + "Tree Trunk", + # Curb on the edge of roads. This does not include road boundaries if thereโ€™s no curb. + "Curb", + # Surface a vehicle could drive on. This includes the driveway connecting + # parking lot and road over a section of sidewalk. + "Road", + # Marking on the road thatโ€™s specifically for defining lanes such as + # single/double white/yellow lines. + "Lane Marker", + # Marking on the road other than lane markers, bumps, cateyes, railtracks etc. + "Other Ground", + # Most horizontal surface thatโ€™s not drivable, e.g. grassy hill, pedestrian walkway stairs etc. + "Walkable", + # Nicely paved walkable surface when pedestrians most likely to walk on. + "Sidewalk", +] + +data = dict( + num_classes=22, + ignore_index=ignore_index, + names=names, + train=dict( + type=dataset_type, + split="training", + data_root=data_root, + transform=[ + # dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2), + # dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75), + dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5), + # dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5), + dict(type="PointClip", point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2)), + dict(type="RandomScale", scale=[0.9, 1.1]), + # dict(type="RandomShift", shift=[0.2, 0.2, 0.2]), + dict(type="RandomFlip", p=0.5), + dict(type="RandomJitter", sigma=0.005, clip=0.02), + # dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + # dict(type="SphereCrop", point_max=1000000, mode="random"), + # dict(type="CenterShift", apply_z=False), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + val=dict( + type=dataset_type, + split="validation", + data_root=data_root, + transform=[ + dict(type="PointClip", point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2)), + dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "strength", "segment"), + return_grid_coord=True, + ), + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "segment"), + feat_keys=("coord", "strength"), + ), + ], + test_mode=False, + ignore_index=ignore_index, + ), + test=dict( + type=dataset_type, + split="validation", + data_root=data_root, + transform=[ + dict(type="PointClip", point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2)), + ], + test_mode=True, + test_cfg=dict( + voxelize=dict( + type="GridSample", + grid_size=0.05, + hash_type="fnv", + mode="test", + return_grid_coord=True, + keys=("coord", "strength"), + ), + crop=None, + post_transform=[ + dict(type="ToTensor"), + dict( + type="Collect", + keys=("coord", "grid_coord", "index"), + feat_keys=("coord", "strength"), + ), + ], + aug_transform=[ + [ + dict( + type="RandomRotateTargetAngle", + angle=[0], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[1], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + [ + dict( + type="RandomRotateTargetAngle", + angle=[3 / 2], + axis="z", + center=[0, 0, 0], + p=1, + ) + ], + ], + ), + ignore_index=ignore_index, + ), +) diff --git a/Pointcept/docs/logo.png b/Pointcept/docs/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..cdd04d6aa6cc7a31e29e3970977427e7edde2c17 Binary files /dev/null and b/Pointcept/docs/logo.png differ diff --git a/Pointcept/docs/logo_dark.png b/Pointcept/docs/logo_dark.png new file mode 100644 index 0000000000000000000000000000000000000000..32af83bd9e42d65251637fb4a90a9e3a4d2e0f83 Binary files /dev/null and b/Pointcept/docs/logo_dark.png differ diff --git a/Pointcept/docs/offset.png b/Pointcept/docs/offset.png new file mode 100644 index 0000000000000000000000000000000000000000..e66df4f29321312d700f2bb7960e502fe233c5d6 Binary files /dev/null and b/Pointcept/docs/offset.png differ diff --git a/Pointcept/docs/offset_dark.png b/Pointcept/docs/offset_dark.png new file mode 100755 index 0000000000000000000000000000000000000000..4db79e0d94e7669b6653c0e9abad96ec1cc86364 Binary files /dev/null and b/Pointcept/docs/offset_dark.png differ diff --git a/Pointcept/libs/pointgroup_ops/functions/__init__.py b/Pointcept/libs/pointgroup_ops/functions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1d8120292a327a400e15fa9a72cd08b16a68794f --- /dev/null +++ b/Pointcept/libs/pointgroup_ops/functions/__init__.py @@ -0,0 +1 @@ +from .functions import bfs_cluster, ballquery_batch_p, Clustering diff --git a/Pointcept/libs/pointgroup_ops/functions/functions.py b/Pointcept/libs/pointgroup_ops/functions/functions.py new file mode 100644 index 0000000000000000000000000000000000000000..c8ed62b10c2e88236d814ab34cece5a335e16930 --- /dev/null +++ b/Pointcept/libs/pointgroup_ops/functions/functions.py @@ -0,0 +1,176 @@ +import torch +from torch.autograd import Function +import pointgroup_ops_cuda + + +class BallQueryBatchP(Function): + @staticmethod + def forward(ctx, coords, batch_idxs, batch_offsets, radius, meanActive): + """ + :param ctx: + :param coords: (n, 3) float + :param batch_idxs: (n) int + :param batch_offsets: (B+1) int + :param radius: float + :param meanActive: int + :return: idx (nActive), int + :return: start_len (n, 2), int + """ + + n = coords.size(0) + + assert coords.is_contiguous() and coords.is_cuda + assert batch_idxs.is_contiguous() and batch_idxs.is_cuda + assert batch_offsets.is_contiguous() and batch_offsets.is_cuda + + while True: + idx = torch.cuda.IntTensor(n * meanActive).zero_() + start_len = torch.cuda.IntTensor(n, 2).zero_() + nActive = pointgroup_ops_cuda.ballquery_batch_p( + coords, batch_idxs, batch_offsets, idx, start_len, n, meanActive, radius + ) + if nActive <= n * meanActive: + break + meanActive = int(nActive // n + 1) + idx = idx[:nActive] + + return idx, start_len + + @staticmethod + def backward(ctx, a=None, b=None): + return None, None, None + + +ballquery_batch_p = BallQueryBatchP.apply + + +class Clustering: + def __init__( + self, + ignored_labels, + class_mapping, + thresh=0.03, + closed_points=300, + min_points=50, + propose_points=100, + score_func=torch.max, + ) -> None: + self.ignored_labels = ignored_labels + self.thresh = thresh + self.closed_points = closed_points + self.min_points = min_points + self.class_mapping = class_mapping + self.propose_points = propose_points + self.score_func = score_func + + def cluster(self, vertices, scores): + labels = torch.max(scores, 1)[1] # (N) long, cuda + proposals_idx, proposals_offset = self.cluster_(vertices, labels) + + ## debug + # import ipdb; ipdb.set_trace() + # colors = np.array(create_color_palette())[labels.cpu()] + # write_triangle_mesh(vertices, colors, None, 'semantics.ply') + + # scatter + proposals_pred = torch.zeros( + (proposals_offset.shape[0] - 1, vertices.shape[0]), dtype=torch.int + ) # (nProposal, N), int, cuda + proposals_pred[proposals_idx[:, 0].long(), proposals_idx[:, 1].long()] = 1 + labels = labels[proposals_idx[:, 1][proposals_offset[:-1].long()].long()] + + proposals_pointnum = proposals_pred.sum(1) + npoint_mask = proposals_pointnum > self.propose_points + + proposals_pred = proposals_pred[npoint_mask] + labels = labels[npoint_mask] + return proposals_pred, labels + + def cluster_(self, vertices, labels): + """ + :param batch_idxs: (N), int, cuda + :labels: 0-19 + """ + batch_idxs = torch.zeros_like(labels) + + mask_non_ignored = torch.ones_like(labels).bool() + for ignored_label in self.ignored_labels: + mask_non_ignored = mask_non_ignored & ( + self.class_mapping[labels] != ignored_label + ) + object_idxs = mask_non_ignored.nonzero().view(-1) + + vertices_ = vertices[object_idxs].float() + labels_ = labels[object_idxs].int() + + if vertices_.numel() == 0: + return torch.zeros((0, 2)).int(), torch.zeros(1).int() + + batch_idxs_ = batch_idxs[object_idxs].int() + batch_offsets_ = torch.FloatTensor([0, object_idxs.shape[0]]).int().cuda() + + idx, start_len = ballquery_batch_p( + vertices_, batch_idxs_, batch_offsets_, self.thresh, self.closed_points + ) + proposals_idx, proposals_offset = bfs_cluster( + labels_.cpu(), idx.cpu(), start_len.cpu(), self.min_points + ) + proposals_idx[:, 1] = object_idxs[proposals_idx[:, 1].long()].int() + + return proposals_idx, proposals_offset + + def get_instances(self, vertices, scores): + proposals_pred, labels = self.cluster(vertices, scores) + instances = {} + for proposal_id in range(len(proposals_pred)): + clusters_i = proposals_pred[proposal_id] + score = scores[clusters_i.bool(), labels[proposal_id]] + score = self.score_func(score) + instances[proposal_id] = {} + instances[proposal_id]["conf"] = score.cpu().numpy() + instances[proposal_id]["label_id"] = self.class_mapping.cpu()[ + labels[proposal_id] + ] + instances[proposal_id]["pred_mask"] = clusters_i.cpu().numpy() + return instances + + +class BFSCluster(Function): + @staticmethod + def forward(ctx, semantic_label, ball_query_idxs, start_len, threshold): + """ + :param ctx: + :param semantic_label: (N), int + :param ball_query_idxs: (nActive), int + :param start_len: (N, 2), int + :return: cluster_idxs: int (sumNPoint, 2), dim 0 for cluster_id, dim 1 for corresponding point idxs in N + :return: cluster_offsets: int (nCluster + 1) + """ + + N = start_len.size(0) + + assert semantic_label.is_contiguous() + assert ball_query_idxs.is_contiguous() + assert start_len.is_contiguous() + + cluster_idxs = semantic_label.new() + cluster_offsets = semantic_label.new() + + pointgroup_ops_cuda.bfs_cluster( + semantic_label, + ball_query_idxs, + start_len, + cluster_idxs, + cluster_offsets, + N, + threshold, + ) + + return cluster_idxs, cluster_offsets + + @staticmethod + def backward(ctx, a=None): + return None + + +bfs_cluster = BFSCluster.apply diff --git a/Pointcept/libs/pointgroup_ops/setup.py b/Pointcept/libs/pointgroup_ops/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..162b68258920afcd46c3b49764236beecfbf35cb --- /dev/null +++ b/Pointcept/libs/pointgroup_ops/setup.py @@ -0,0 +1,59 @@ +import os +from sys import argv +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension +from distutils.sysconfig import get_config_vars + +(opt,) = get_config_vars("OPT") +os.environ["OPT"] = " ".join( + flag for flag in opt.split() if flag != "-Wstrict-prototypes" +) + + +def _argparse(pattern, argv, is_flag=True, is_list=False): + if is_flag: + found = pattern in argv + if found: + argv.remove(pattern) + return found, argv + else: + arr = [arg for arg in argv if pattern == arg.split("=")[0]] + if is_list: + if len(arr) == 0: # not found + return False, argv + else: + assert "=" in arr[0], f"{arr[0]} requires a value." + argv.remove(arr[0]) + val = arr[0].split("=")[1] + if "," in val: + return val.split(","), argv + else: + return [val], argv + else: + if len(arr) == 0: # not found + return False, argv + else: + assert "=" in arr[0], f"{arr[0]} requires a value." + argv.remove(arr[0]) + return arr[0].split("=")[1], argv + + +INCLUDE_DIRS, argv = _argparse("--include_dirs", argv, False, is_list=True) +include_dirs = [] +if not (INCLUDE_DIRS is False): + include_dirs += INCLUDE_DIRS + +setup( + name="pointgroup_ops", + packages=["pointgroup_ops"], + package_dir={"pointgroup_ops": "functions"}, + ext_modules=[ + CUDAExtension( + name="pointgroup_ops_cuda", + sources=["src/bfs_cluster.cpp", "src/bfs_cluster_kernel.cu"], + extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]}, + ) + ], + include_dirs=[*include_dirs], + cmdclass={"build_ext": BuildExtension}, +) diff --git a/Pointcept/libs/pointgroup_ops/src/bfs_cluster.cpp b/Pointcept/libs/pointgroup_ops/src/bfs_cluster.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d0298aae5bf1f184eb4e923d8f9f8893168c8e19 --- /dev/null +++ b/Pointcept/libs/pointgroup_ops/src/bfs_cluster.cpp @@ -0,0 +1,145 @@ +/* +Ball Query with BatchIdx & Clustering Algorithm +Written by Li Jiang +All Rights Reserved 2020. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int ballquery_batch_p_cuda(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, cudaStream_t stream); + + +using Int = int32_t; +class ConnectedComponent{ +public: + std::vector pt_idxs {}; + + ConnectedComponent(){}; + void addPoint(Int pt_idx) + { + pt_idxs.push_back(pt_idx); + + } +}; +using ConnectedComponents = std::vector; + +/* ================================== ballquery_batch_p ================================== */ +// input xyz: (n, 3) float +// input batch_idxs: (n) int +// input batch_offsets: (B+1) int, batch_offsets[-1] +// output idx: (n * meanActive) dim 0 for number of points in the ball, idx in n +// output start_len: (n, 2), int +int ballquery_batch_p(at::Tensor xyz_tensor, at::Tensor batch_idxs_tensor, at::Tensor batch_offsets_tensor, at::Tensor idx_tensor, at::Tensor start_len_tensor, int n, int meanActive, float radius){ + const float *xyz = xyz_tensor.data(); + const int *batch_idxs = batch_idxs_tensor.data(); + const int *batch_offsets = batch_offsets_tensor.data(); + int *idx = idx_tensor.data(); + int *start_len = start_len_tensor.data(); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + int cumsum = ballquery_batch_p_cuda(n, meanActive, radius, xyz, batch_idxs, batch_offsets, idx, start_len, stream); + return cumsum; +} + +/* ================================== bfs_cluster ================================== */ +ConnectedComponent find_cc(Int idx, int *semantic_label, Int *ball_query_idxs, int *start_len, int *visited){ + ConnectedComponent cc; + cc.addPoint(idx); + visited[idx] = 1; + + std::queue Q; + assert(Q.empty()); + Q.push(idx); + + while(!Q.empty()){ + Int cur = Q.front(); Q.pop(); + int start = start_len[cur * 2]; + int len = start_len[cur * 2 + 1]; + int label_cur = semantic_label[cur]; + for(Int i = start; i < start + len; i++){ + Int idx_i = ball_query_idxs[i]; + if(semantic_label[idx_i] != label_cur) continue; + if(visited[idx_i] == 1) continue; + + cc.addPoint(idx_i); + visited[idx_i] = 1; + + Q.push(idx_i); + } + } + return cc; +} + +//input: semantic_label, int, N +//input: ball_query_idxs, Int, (nActive) +//input: start_len, int, (N, 2) +//output: clusters, CCs +int get_clusters(int *semantic_label, Int *ball_query_idxs, int *start_len, const Int nPoint, int threshold, ConnectedComponents &clusters){ + int visited[nPoint] = {0}; + + int sumNPoint = 0; + for(Int i = 0; i < nPoint; i++){ + if(visited[i] == 0){ + ConnectedComponent CC = find_cc(i, semantic_label, ball_query_idxs, start_len, visited); + if((int)CC.pt_idxs.size() >= threshold){ + clusters.push_back(CC); + sumNPoint += (int)CC.pt_idxs.size(); + } + } + } + + return sumNPoint; +} + +void fill_cluster_idxs_(ConnectedComponents &CCs, int *cluster_idxs, int *cluster_offsets){ + for(int i = 0; i < (int)CCs.size(); i++){ + cluster_offsets[i + 1] = cluster_offsets[i] + (int)CCs[i].pt_idxs.size(); + for(int j = 0; j < (int)CCs[i].pt_idxs.size(); j++){ + int idx = CCs[i].pt_idxs[j]; + cluster_idxs[(cluster_offsets[i] + j) * 2 + 0] = i; + cluster_idxs[(cluster_offsets[i] + j) * 2 + 1] = idx; + } + } +} + +//input: semantic_label, int, N +//input: ball_query_idxs, int, (nActive) +//input: start_len, int, (N, 2) +//output: cluster_idxs, int (sumNPoint, 2), dim 0 for cluster_id, dim 1 for corresponding point idxs in N +//output: cluster_offsets, int (nCluster + 1) +void bfs_cluster(at::Tensor semantic_label_tensor, at::Tensor ball_query_idxs_tensor, at::Tensor start_len_tensor, +at::Tensor cluster_idxs_tensor, at::Tensor cluster_offsets_tensor, const int N, int threshold){ + int *semantic_label = semantic_label_tensor.data(); + Int *ball_query_idxs = ball_query_idxs_tensor.data(); + int *start_len = start_len_tensor.data(); + + ConnectedComponents CCs; + int sumNPoint = get_clusters(semantic_label, ball_query_idxs, start_len, N, threshold, CCs); + + int nCluster = (int)CCs.size(); + cluster_idxs_tensor.resize_({sumNPoint, 2}); + cluster_offsets_tensor.resize_({nCluster + 1}); + cluster_idxs_tensor.zero_(); + cluster_offsets_tensor.zero_(); + + int *cluster_idxs = cluster_idxs_tensor.data(); + int *cluster_offsets = cluster_offsets_tensor.data(); + + fill_cluster_idxs_(CCs, cluster_idxs, cluster_offsets); +} + +//------------------------------------API------------------------------------------ +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){ + + m.def("ballquery_batch_p", &ballquery_batch_p, "ballquery_batch_p"); + m.def("bfs_cluster", &bfs_cluster, "bfs_cluster"); + +} diff --git a/Pointcept/libs/pointgroup_ops/src/bfs_cluster_kernel.cu b/Pointcept/libs/pointgroup_ops/src/bfs_cluster_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..99a31842d605588b214826223143f2669475f402 --- /dev/null +++ b/Pointcept/libs/pointgroup_ops/src/bfs_cluster_kernel.cu @@ -0,0 +1,91 @@ +/* +Ball Query with BatchIdx +Written by Li Jiang +All Rights Reserved 2020. +*/ +#include +#include +#include + +#define TOTAL_THREADS 1024 +#define THREADS_PER_BLOCK 512 +#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) + + +/* ================================== ballquery_batch_p ================================== */ +__global__ void ballquery_batch_p_cuda_(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, int *cumsum) { + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (pt_idx >= n) return; + + start_len += (pt_idx * 2); + int idx_temp[1000]; + + float radius2 = radius * radius; + float o_x = xyz[pt_idx * 3 + 0]; + float o_y = xyz[pt_idx * 3 + 1]; + float o_z = xyz[pt_idx * 3 + 2]; + + int batch_idx = batch_idxs[pt_idx]; + int start = batch_offsets[batch_idx]; + int end = batch_offsets[batch_idx + 1]; + + int cnt = 0; + for(int k = start; k < end; k++){ + float x = xyz[k * 3 + 0]; + float y = xyz[k * 3 + 1]; + float z = xyz[k * 3 + 2]; + float d2 = (o_x - x) * (o_x - x) + (o_y - y) * (o_y - y) + (o_z - z) * (o_z - z); + if(d2 < radius2){ + if(cnt < 1000){ + idx_temp[cnt] = k; + } + else{ + break; + } + ++cnt; + } + } + + start_len[0] = atomicAdd(cumsum, cnt); + start_len[1] = cnt; + + int thre = n * meanActive; + if(start_len[0] >= thre) return; + + idx += start_len[0]; + if(start_len[0] + cnt >= thre) cnt = thre - start_len[0]; + + for(int k = 0; k < cnt; k++){ + idx[k] = idx_temp[k]; + } +} + + +int ballquery_batch_p_cuda(int n, int meanActive, float radius, const float *xyz, const int *batch_idxs, const int *batch_offsets, int *idx, int *start_len, cudaStream_t stream) { + // param xyz: (n, 3) + // param batch_idxs: (n) + // param batch_offsets: (B + 1) + // output idx: (n * meanActive) dim 0 for number of points in the ball, idx in n + // output start_len: (n, 2), int + + cudaError_t err; + + dim3 blocks(DIVUP(n, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + + int cumsum = 0; + int* p_cumsum; + cudaMalloc((void**)&p_cumsum, sizeof(int)); + cudaMemcpy(p_cumsum, &cumsum, sizeof(int), cudaMemcpyHostToDevice); + + ballquery_batch_p_cuda_<<>>(n, meanActive, radius, xyz, batch_idxs, batch_offsets, idx, start_len, p_cumsum); + + err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } + + cudaMemcpy(&cumsum, p_cumsum, sizeof(int), cudaMemcpyDeviceToHost); + return cumsum; +} diff --git a/Pointcept/libs/pointops/__init__.py b/Pointcept/libs/pointops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c8f75488366c12e144febe3adccd63b40820cdfa --- /dev/null +++ b/Pointcept/libs/pointops/__init__.py @@ -0,0 +1 @@ +from .functions import * diff --git a/Pointcept/libs/pointops/functions/__init__.py b/Pointcept/libs/pointops/functions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8c05f2f4b74f1ae4343daf9b38b4576d75f13e81 --- /dev/null +++ b/Pointcept/libs/pointops/functions/__init__.py @@ -0,0 +1,14 @@ +from .query import knn_query, ball_query, random_ball_query +from .sampling import farthest_point_sampling +from .grouping import grouping, grouping2 +from .interpolation import interpolation, interpolation2 +from .subtraction import subtraction +from .aggregation import aggregation +from .attention import attention_relation_step, attention_fusion_step +from .utils import ( + query_and_group, + knn_query_and_group, + ball_query_and_group, + batch2offset, + offset2batch, +) diff --git a/Pointcept/libs/pointops/functions/aggregation.py b/Pointcept/libs/pointops/functions/aggregation.py new file mode 100644 index 0000000000000000000000000000000000000000..f0f62444a70d317dfb8df4adc1167bba5dd19ef1 --- /dev/null +++ b/Pointcept/libs/pointops/functions/aggregation.py @@ -0,0 +1,57 @@ +import torch +from torch.autograd import Function + +from pointops._C import aggregation_forward_cuda, aggregation_backward_cuda + + +class Aggregation(Function): + @staticmethod + def forward(ctx, input, position, weight, idx): + """ + input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample) + output: (n, c) + """ + assert ( + input.is_contiguous() + and position.is_contiguous() + and weight.is_contiguous() + ) + n, nsample, c = position.shape + w_c = weight.shape[-1] + output = torch.cuda.FloatTensor(n, c).zero_() + aggregation_forward_cuda( + n, nsample, c, w_c, input, position, weight, idx, output + ) + ctx.save_for_backward(input, position, weight, idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_out: (n, c) + output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c') + """ + input, position, weight, idx = ctx.saved_tensors + n, nsample, c = position.shape + w_c = weight.shape[-1] + grad_input = torch.cuda.FloatTensor(n, c).zero_() + grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_() + grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_() + aggregation_backward_cuda( + n, + nsample, + c, + w_c, + input, + position, + weight, + idx, + grad_output, + grad_input, + grad_position, + grad_weight, + ) + return grad_input, grad_position, grad_weight, None + + +aggregation = Aggregation.apply diff --git a/Pointcept/libs/pointops/functions/attention.py b/Pointcept/libs/pointops/functions/attention.py new file mode 100644 index 0000000000000000000000000000000000000000..4e774ff67051d6272f7de3fd751bf3b712431249 --- /dev/null +++ b/Pointcept/libs/pointops/functions/attention.py @@ -0,0 +1,120 @@ +import torch +from torch.autograd import Function + +from pointops._C import ( + attention_relation_step_forward_cuda, + attention_relation_step_backward_cuda, + attention_fusion_step_forward_cuda, + attention_fusion_step_backward_cuda, +) + + +class AttentionRelationStep(Function): + @staticmethod + def forward(ctx, query, key, weight, index_target, index_refer): + """ + input - query: (n, g, c), key: (n, g, c), weight: (c) 1_c for scatter attention, + index_target: (m), index_refer: (m) + output - relation: (M, g) + """ + + assert ( + query.is_contiguous() + and key.is_contiguous() + and index_target.is_contiguous() + and index_refer.is_contiguous() + and weight.is_contiguous() + ) + + assert index_target.shape[0] == index_refer.shape[0] + + _, g, c = query.shape + m = index_target.shape[0] + output = torch.cuda.FloatTensor(m, g).zero_() + attention_relation_step_forward_cuda( + m, g, c, query, key, weight, index_target.int(), index_refer.int(), output + ) + ctx.save_for_backward(query, key, weight, index_target, index_refer) + return output + + @staticmethod + def backward(ctx, grad_output): + query, key, weight, index_target, index_refer = ctx.saved_tensors + n, g, c = query.shape + m = index_target.shape[0] + grad_query = torch.cuda.FloatTensor(n, g, c).zero_() + grad_key = torch.cuda.FloatTensor(n, g, c).zero_() + grad_weight = torch.cuda.FloatTensor(c).zero_() + attention_relation_step_backward_cuda( + m, + g, + c, + query, + grad_query, + key, + grad_key, + weight, + grad_weight, + index_target.int(), + index_refer.int(), + grad_output, + ) + return grad_query, grad_key, None, None, None + + +class AttentionFusionStep(Function): + @staticmethod + def forward(ctx, weight, value, index_target, index_refer): + """ + input - weight: (m, g), value: (n, g, c) + index_target: (m), index_value: (m) + output - output: (n, g, c) + """ + + assert ( + weight.is_contiguous() + and value.is_contiguous() + and index_target.is_contiguous() + and index_refer.is_contiguous() + and weight.is_contiguous() + ) + + assert index_target.shape[0] == index_refer.shape[0] + + n, g, c = value.shape + m = index_refer.shape[0] + output = torch.cuda.FloatTensor(n, g, c).zero_() + attention_fusion_step_forward_cuda( + m, g, c, weight, value, index_target.int(), index_refer.int(), output + ) + ctx.save_for_backward(weight, value, index_target, index_refer) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_output: (n, g, c) + output: grad_weight: (m, g), grad_value: (n, g, c), none, none + """ + weight, value, index_target, index_refer = ctx.saved_tensors + n, g, c = value.shape + m = index_target.shape[0] + grad_weight = torch.cuda.FloatTensor(m, g).zero_() + grad_value = torch.cuda.FloatTensor(n, g, c).zero_() + attention_fusion_step_backward_cuda( + m, + g, + c, + weight, + grad_weight, + value, + grad_value, + index_target.int(), + index_refer.int(), + grad_output, + ) + return grad_weight, grad_value, None, None + + +attention_relation_step = AttentionRelationStep.apply +attention_fusion_step = AttentionFusionStep.apply diff --git a/Pointcept/libs/pointops/functions/grouping.py b/Pointcept/libs/pointops/functions/grouping.py new file mode 100644 index 0000000000000000000000000000000000000000..c22d1e827f82331a4287362a368ccf93927493e6 --- /dev/null +++ b/Pointcept/libs/pointops/functions/grouping.py @@ -0,0 +1,63 @@ +import torch +from torch.autograd import Function + +from pointops._C import grouping_forward_cuda, grouping_backward_cuda + + +class Grouping(Function): + @staticmethod + def forward(ctx, input, idx): + """ + input: input: (n, c), idx : (m, nsample) + output: (m, nsample, c) + """ + assert input.is_contiguous() and idx.is_contiguous() + m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1] + output = torch.cuda.FloatTensor(m, nsample, c) + grouping_forward_cuda(m, nsample, c, input, idx, output) + ctx.n = n + ctx.save_for_backward(idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_out: (m, c, nsample) + output: (n, c), None + """ + n = ctx.n + (idx,) = ctx.saved_tensors + m, nsample, c = grad_output.shape + grad_input = torch.cuda.FloatTensor(n, c).zero_() + grouping_backward_cuda(m, nsample, c, grad_output, idx, grad_input) + return grad_input, None + + +def grouping(idx, feat, xyz, new_xyz=None, with_xyz=False): + if new_xyz is None: + new_xyz = xyz + assert xyz.is_contiguous() and feat.is_contiguous() + m, nsample, c = idx.shape[0], idx.shape[1], feat.shape[1] + xyz = torch.cat([xyz, torch.zeros([1, 3]).to(xyz.device)], dim=0) + feat = torch.cat([feat, torch.zeros([1, c]).to(feat.device)], dim=0) + grouped_feat = feat[idx.view(-1).long(), :].view( + m, nsample, c + ) # (m, num_sample, c) + + if with_xyz: + assert new_xyz.is_contiguous() + mask = torch.sign(idx + 1) + grouped_xyz = xyz[idx.view(-1).long(), :].view( + m, nsample, 3 + ) - new_xyz.unsqueeze( + 1 + ) # (m, num_sample, 3) + grouped_xyz = torch.einsum( + "n s c, n s -> n s c", grouped_xyz, mask + ) # (m, num_sample, 3) + return torch.cat((grouped_xyz, grouped_feat), -1) + else: + return grouped_feat + + +grouping2 = Grouping.apply diff --git a/Pointcept/libs/pointops/functions/interpolation.py b/Pointcept/libs/pointops/functions/interpolation.py new file mode 100644 index 0000000000000000000000000000000000000000..4a5c861f272f89421fa097505d9882b2c473a060 --- /dev/null +++ b/Pointcept/libs/pointops/functions/interpolation.py @@ -0,0 +1,59 @@ +import torch +from torch.autograd import Function + +from pointops._C import interpolation_forward_cuda, interpolation_backward_cuda +from .query import knn_query + + +def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3): + """ + input: coords: (m, 3), new_xyz: (n, 3), color: (m, c), offset: (b), new_offset: (b) + output: (n, c) + """ + assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous() + idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset) # (n, 3), (n, 3) + dist_recip = 1.0 / (dist + 1e-8) # (n, 3) + norm = torch.sum(dist_recip, dim=1, keepdim=True) + weight = dist_recip / norm # (n, 3) + + new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_() + for i in range(k): + new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1) + return new_feat + + +class Interpolation(Function): + @staticmethod + def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3): + """ + input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) + output: (n, c) + """ + assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous() + idx, dist = knn_query(k, xyz, offset, new_xyz, new_offset) # (n, k), (n, k) + dist_recip = 1.0 / (dist + 1e-8) # (n, k) + norm = torch.sum(dist_recip, dim=1, keepdim=True) + weight = dist_recip / norm # (n, k) + + n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0] + output = torch.cuda.FloatTensor(n, c).zero_() + interpolation_forward_cuda(n, c, k, input, idx, weight, output) + ctx.m, ctx.k = m, k + ctx.save_for_backward(idx, weight) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: coords: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) + output: (n, c) + """ + m, k = ctx.m, ctx.k + idx, weight = ctx.saved_tensors + n, c = grad_output.shape + grad_input = torch.cuda.FloatTensor(m, c).zero_() + interpolation_backward_cuda(n, c, k, grad_output, idx, weight, grad_input) + return None, None, grad_input, None, None, None + + +interpolation2 = Interpolation.apply diff --git a/Pointcept/libs/pointops/functions/query.py b/Pointcept/libs/pointops/functions/query.py new file mode 100644 index 0000000000000000000000000000000000000000..c1294b6125e00ae1d1dec21ed52a803c164c4810 --- /dev/null +++ b/Pointcept/libs/pointops/functions/query.py @@ -0,0 +1,113 @@ +import torch +from torch.autograd import Function + +from pointops._C import knn_query_cuda, random_ball_query_cuda, ball_query_cuda + + +class KNNQuery(Function): + @staticmethod + def forward(ctx, nsample, xyz, offset, new_xyz=None, new_offset=None): + """ + input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) + output: idx: (m, nsample) -1 is placeholder, dist2: (m, nsample) + """ + if new_xyz is None or new_offset is None: + new_xyz = xyz + new_offset = offset + assert xyz.is_contiguous() and new_xyz.is_contiguous() + m = new_xyz.shape[0] + idx = torch.cuda.IntTensor(m, nsample).zero_() + dist2 = torch.cuda.FloatTensor(m, nsample).zero_() + knn_query_cuda( + m, nsample, xyz, new_xyz, offset.int(), new_offset.int(), idx, dist2 + ) + return idx, torch.sqrt(dist2) + + +class RandomBallQuery(Function): + """Random Ball Query. + + Find nearby points in spherical space. + """ + + @staticmethod + def forward( + ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None + ): + """ + input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) + output: idx: (m, nsample), dist2: (m, nsample) + """ + if new_xyz is None or new_offset is None: + new_xyz = xyz + new_offset = offset + assert xyz.is_contiguous() and new_xyz.is_contiguous() + assert min_radius < max_radius + + m = new_xyz.shape[0] + order = [] + for k in range(offset.shape[0]): + s_k, e_k = (0, offset[0]) if k == 0 else (offset[k - 1], offset[k]) + order.append( + torch.randperm(e_k - s_k, dtype=torch.int32, device=offset.device) + s_k + ) + order = torch.cat(order, dim=0) + idx = torch.cuda.IntTensor(m, nsample).zero_() + dist2 = torch.cuda.FloatTensor(m, nsample).zero_() + random_ball_query_cuda( + m, + nsample, + min_radius, + max_radius, + order, + xyz, + new_xyz, + offset.int(), + new_offset.int(), + idx, + dist2, + ) + return idx, torch.sqrt(dist2) + + +class BallQuery(Function): + """Ball Query. + + Find nearby points in spherical space. + """ + + @staticmethod + def forward( + ctx, nsample, max_radius, min_radius, xyz, offset, new_xyz=None, new_offset=None + ): + """ + input: coords: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) + output: idx: (m, nsample), dist2: (m, nsample) + """ + if new_xyz is None or new_offset is None: + new_xyz = xyz + new_offset = offset + assert xyz.is_contiguous() and new_xyz.is_contiguous() + assert min_radius < max_radius + + m = new_xyz.shape[0] + idx = torch.cuda.IntTensor(m, nsample).zero_() + dist2 = torch.cuda.FloatTensor(m, nsample).zero_() + ball_query_cuda( + m, + nsample, + min_radius, + max_radius, + xyz, + new_xyz, + offset.int(), + new_offset.int(), + idx, + dist2, + ) + return idx, torch.sqrt(dist2) + + +knn_query = KNNQuery.apply +ball_query = BallQuery.apply +random_ball_query = RandomBallQuery.apply diff --git a/Pointcept/libs/pointops/functions/sampling.py b/Pointcept/libs/pointops/functions/sampling.py new file mode 100644 index 0000000000000000000000000000000000000000..9f233d4afe02e43a6a390ca465f7108a01b98541 --- /dev/null +++ b/Pointcept/libs/pointops/functions/sampling.py @@ -0,0 +1,27 @@ +import torch +from torch.autograd import Function + +from pointops._C import farthest_point_sampling_cuda + + +class FarthestPointSampling(Function): + @staticmethod + def forward(ctx, xyz, offset, new_offset): + """ + input: coords: (n, 3), offset: (b), new_offset: (b) + output: idx: (m) + """ + assert xyz.is_contiguous() + n, b, n_max = xyz.shape[0], offset.shape[0], offset[0] + for i in range(1, b): + n_max = max(offset[i] - offset[i - 1], n_max) + idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_() + tmp = torch.cuda.FloatTensor(n).fill_(1e10) + farthest_point_sampling_cuda( + b, n_max, xyz, offset.int(), new_offset.int(), tmp, idx + ) + del tmp + return idx + + +farthest_point_sampling = FarthestPointSampling.apply diff --git a/Pointcept/libs/pointops/functions/subtraction.py b/Pointcept/libs/pointops/functions/subtraction.py new file mode 100644 index 0000000000000000000000000000000000000000..bc683ce3d75901777e57886adc077d570230e027 --- /dev/null +++ b/Pointcept/libs/pointops/functions/subtraction.py @@ -0,0 +1,38 @@ +import torch +from torch.autograd import Function + +from pointops._C import subtraction_forward_cuda, subtraction_backward_cuda + + +class Subtraction(Function): + @staticmethod + def forward(ctx, input1, input2, idx): + """ + input: input1: (n, c), input2: (n, c), idx: (n, nsample) + output: (n, nsample, c) + """ + assert input1.is_contiguous() and input2.is_contiguous() + n, c = input1.shape + nsample = idx.shape[-1] + output = torch.cuda.FloatTensor(n, nsample, c).zero_() + subtraction_forward_cuda(n, nsample, c, input1, input2, idx, output) + ctx.save_for_backward(idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_out: (n, nsample, c) + output: grad_input1: (n, c), grad_input2: (n, c) + """ + (idx,) = ctx.saved_tensors + n, nsample, c = grad_output.shape + grad_input1 = torch.cuda.FloatTensor(n, c).zero_() + grad_input2 = torch.cuda.FloatTensor(n, c).zero_() + subtraction_backward_cuda( + n, nsample, c, idx, grad_output, grad_input1, grad_input2 + ) + return grad_input1, grad_input2, None + + +subtraction = Subtraction.apply diff --git a/Pointcept/libs/pointops/functions/utils.py b/Pointcept/libs/pointops/functions/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..15e3e328bb012bb684787466f3ec2e97d1317b2b --- /dev/null +++ b/Pointcept/libs/pointops/functions/utils.py @@ -0,0 +1,121 @@ +import torch +from pointops import knn_query, ball_query, grouping + + +def knn_query_and_group( + feat, + xyz, + offset=None, + new_xyz=None, + new_offset=None, + idx=None, + nsample=None, + with_xyz=False, +): + if idx is None: + assert nsample is not None + idx, _ = knn_query(nsample, xyz, offset, new_xyz, new_offset) + return grouping(idx, feat, xyz, new_xyz, with_xyz), idx + + +def ball_query_and_group( + feat, + xyz, + offset=None, + new_xyz=None, + new_offset=None, + idx=None, + max_radio=None, + min_radio=0, + nsample=None, + with_xyz=False, +): + if idx is None: + assert nsample is not None and offset is not None + assert max_radio is not None and min_radio is not None + idx, _ = ball_query( + nsample, max_radio, min_radio, xyz, offset, new_xyz, new_offset + ) + return grouping(idx, feat, xyz, new_xyz, with_xyz), idx + + +def query_and_group( + nsample, + xyz, + new_xyz, + feat, + idx, + offset, + new_offset, + dilation=0, + with_feat=True, + with_xyz=True, +): + """ + input: coords: (n, 3), new_xyz: (m, 3), color: (n, c), idx: (m, nsample), offset: (b), new_offset: (b) + output: new_feat: (m, nsample, c+3), grouped_idx: (m, nsample) + """ + assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous() + if new_xyz is None: + new_xyz = xyz + + if idx is None: + num_samples_total = 1 + (nsample - 1) * (dilation + 1) + # num points in a batch might < num_samples_total => [n1, n2, ..., nk, ns, ns, ns, ...] + idx_no_dilation, _ = knn_query( + num_samples_total, xyz, offset, new_xyz, new_offset + ) # (m, nsample * (d + 1)) + idx = [] + batch_end = offset.tolist() + batch_start = [0] + batch_end[:-1] + new_batch_end = new_offset.tolist() + new_batch_start = [0] + new_batch_end[:-1] + for i in range(offset.shape[0]): + if batch_end[i] - batch_start[i] < num_samples_total: + soft_dilation = (batch_end[i] - batch_start[i] - 1) / (nsample - 1) - 1 + else: + soft_dilation = dilation + idx.append( + idx_no_dilation[ + new_batch_start[i] : new_batch_end[i], + [int((soft_dilation + 1) * i) for i in range(nsample)], + ] + ) + idx = torch.cat(idx, dim=0) + + if not with_feat: + return idx + + n, m, c = xyz.shape[0], new_xyz.shape[0], feat.shape[1] + grouped_xyz = xyz[idx.view(-1).long(), :].view(m, nsample, 3) # (m, nsample, 3) + # grouped_xyz = grouping(coords, idx) # (m, nsample, 3) + grouped_xyz -= new_xyz.unsqueeze(1) # (m, nsample, 3) + grouped_feat = feat[idx.view(-1).long(), :].view(m, nsample, c) # (m, nsample, c) + # grouped_feat = grouping(color, idx) # (m, nsample, c) + + if with_xyz: + return torch.cat((grouped_xyz, grouped_feat), -1), idx # (m, nsample, 3+c) + else: + return grouped_feat, idx + + +def offset2batch(offset): + return ( + torch.cat( + [ + ( + torch.tensor([i] * (o - offset[i - 1])) + if i > 0 + else torch.tensor([i] * o) + ) + for i, o in enumerate(offset) + ], + dim=0, + ) + .long() + .to(offset.device) + ) + + +def batch2offset(batch): + return torch.cumsum(batch.bincount(), dim=0).int() diff --git a/Pointcept/libs/pointops/setup.py b/Pointcept/libs/pointops/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..0cdf07b6c12bf702b40accbb51fd1825e4050a8b --- /dev/null +++ b/Pointcept/libs/pointops/setup.py @@ -0,0 +1,33 @@ +import os +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension +from distutils.sysconfig import get_config_vars + +(opt,) = get_config_vars("OPT") +os.environ["OPT"] = " ".join( + flag for flag in opt.split() if flag != "-Wstrict-prototypes" +) + +src = "src" +sources = [ + os.path.join(root, file) + for root, dirs, files in os.walk(src) + for file in files + if file.endswith(".cpp") or file.endswith(".cu") +] + +setup( + name="pointops", + version="1.0", + install_requires=["torch", "numpy"], + packages=["pointops"], + package_dir={"pointops": "functions"}, + ext_modules=[ + CUDAExtension( + name="pointops._C", + sources=sources, + extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]}, + ) + ], + cmdclass={"build_ext": BuildExtension}, +) diff --git a/Pointcept/libs/pointops/src/__init__.py b/Pointcept/libs/pointops/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Pointcept/libs/pointops/src/aggregation/aggregation_cuda.cpp b/Pointcept/libs/pointops/src/aggregation/aggregation_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..491b6f41660edf9b5ea5656cc88edba8ed807d71 --- /dev/null +++ b/Pointcept/libs/pointops/src/aggregation/aggregation_cuda.cpp @@ -0,0 +1,28 @@ +#include +#include +#include +#include "aggregation_cuda_kernel.h" + + +void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) +{ + const float *input = input_tensor.data_ptr(); + const float *position = position_tensor.data_ptr(); + const float *weight = weight_tensor.data_ptr(); + const int *idx = idx_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output); +} + +void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor) +{ + const float *input = input_tensor.data_ptr(); + const float *position = position_tensor.data_ptr(); + const float *weight = weight_tensor.data_ptr(); + const int *idx = idx_tensor.data_ptr(); + const float *grad_output = grad_output_tensor.data_ptr(); + float *grad_input = grad_input_tensor.data_ptr(); + float *grad_position = grad_position_tensor.data_ptr(); + float *grad_weight = grad_weight_tensor.data_ptr(); + aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); +} diff --git a/Pointcept/libs/pointops/src/aggregation/aggregation_cuda_kernel.cu b/Pointcept/libs/pointops/src/aggregation/aggregation_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..8339bb7e2088abffefba02c26b248edafed6cf47 --- /dev/null +++ b/Pointcept/libs/pointops/src/aggregation/aggregation_cuda_kernel.cu @@ -0,0 +1,53 @@ +#include "../cuda_utils.h" +#include "aggregation_cuda_kernel.h" + + +__global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { + // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= n * c) return; + const int c_idx = index % c; + const int n_idx = index / c; + const int w_c_idx = c_idx % w_c; + for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) + { + int idx_idx = n_idx * nsample + nsample_idx; + int input_idx = idx[idx_idx] * c + c_idx; + int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; + int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; + output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx]; + } +} + +__global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { + // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= n * c) return; + const int c_idx = index % c; + const int n_idx = index / c; + const int w_c_idx = c_idx % w_c; + for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) + { + int idx_idx = n_idx * nsample + nsample_idx; + int input_idx = idx[idx_idx] * c + c_idx; + int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; + int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; + atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]); + grad_position[position_idx] = grad_output[index] * weight[weight_idx]; + atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx])); + } +} + +void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { + // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) + dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output); +} + +void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { + // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) + dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); +} diff --git a/Pointcept/libs/pointops/src/aggregation/aggregation_cuda_kernel.h b/Pointcept/libs/pointops/src/aggregation/aggregation_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..5211a96aa2acbe0d9baf32bddc9ab4be87703072 --- /dev/null +++ b/Pointcept/libs/pointops/src/aggregation/aggregation_cuda_kernel.h @@ -0,0 +1,20 @@ +#ifndef _AGGREGATION_CUDA_KERNEL +#define _AGGREGATION_CUDA_KERNEL +#include +#include +#include + +void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); +void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output); +void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops/src/attention/attention_cuda.cpp b/Pointcept/libs/pointops/src/attention/attention_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..79b90c7ebc3ed85dc389bc4ae3169a086efc5848 --- /dev/null +++ b/Pointcept/libs/pointops/src/attention/attention_cuda.cpp @@ -0,0 +1,76 @@ +#include +#include +#include +#include "attention_cuda_kernel.h" + + +void attention_relation_step_forward_cuda(int m, int g, int c, + at::Tensor query_tensor, at::Tensor key_tensor, at::Tensor weight_tensor, + at::Tensor index_target_tensor, at::Tensor index_refer_tensor, + at::Tensor output_tensor) +{ + const float *query = query_tensor.data_ptr(); + const float *key = key_tensor.data_ptr(); + const float *weight = weight_tensor.data_ptr(); + const int *index_target = index_target_tensor.data_ptr(); + const int *index_refer = index_refer_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + attention_relation_step_forward_cuda_launcher(m, g, c, query, key, weight, index_target, index_refer, output); +} + +void attention_relation_step_backward_cuda(int m, int g, int c, + at::Tensor query_tensor, at::Tensor grad_query_tensor, + at::Tensor key_tensor, at::Tensor grad_key_tensor, + at::Tensor weight_tensor, at::Tensor grad_weight_tensor, + at::Tensor index_target_tensor, at::Tensor index_refer_tensor, + at::Tensor grad_output_tensor) +{ + const float *query = query_tensor.data_ptr(); + float *grad_query = grad_query_tensor.data_ptr(); + const float *key = key_tensor.data_ptr(); + float *grad_key = grad_key_tensor.data_ptr(); + const float *weight = weight_tensor.data_ptr(); + float *grad_weight = grad_weight_tensor.data_ptr(); + const int *index_target = index_target_tensor.data_ptr(); + const int *index_refer = index_refer_tensor.data_ptr(); + const float *grad_output = grad_output_tensor.data_ptr(); + attention_relation_step_backward_cuda_launcher(m, g, c, + query, grad_query, + key, grad_key, + weight, grad_weight, + index_target, index_refer, grad_output); +} + + +void attention_fusion_step_forward_cuda(int m, int g, int c, + at::Tensor weight_tensor, at::Tensor value_tensor, + at::Tensor index_target_tensor, at::Tensor index_refer_tensor, + at::Tensor output_tensor) +{ + const float *weight = weight_tensor.data_ptr(); + const float *value = value_tensor.data_ptr(); + const int *index_target = index_target_tensor.data_ptr(); + const int *index_refer = index_refer_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + attention_fusion_step_forward_cuda_launcher(m, g, c, weight, value, index_target, index_refer, output); +} + + +void attention_fusion_step_backward_cuda(int m, int g, int c, + at::Tensor weight_tensor, at::Tensor grad_weight_tensor, + at::Tensor value_tensor, at::Tensor grad_value_tensor, + at::Tensor index_target_tensor, at::Tensor index_refer_tensor, + at::Tensor grad_output_tensor) +{ + const float *weight = weight_tensor.data_ptr(); + float *grad_weight = grad_weight_tensor.data_ptr(); + const float *value = value_tensor.data_ptr(); + float *grad_value = grad_value_tensor.data_ptr(); + const int *index_target = index_target_tensor.data_ptr(); + const int *index_refer = index_refer_tensor.data_ptr(); + const float *grad_output = grad_output_tensor.data_ptr(); + attention_fusion_step_backward_cuda_launcher(m, g, c, + weight, grad_weight, + value, grad_value, + index_target, index_refer, grad_output); +} diff --git a/Pointcept/libs/pointops/src/attention/attention_cuda_kernel.cu b/Pointcept/libs/pointops/src/attention/attention_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..05f4544a4dc4da584ad70eece75265d4845171e7 --- /dev/null +++ b/Pointcept/libs/pointops/src/attention/attention_cuda_kernel.cu @@ -0,0 +1,149 @@ +#include "../cuda_utils.h" +#include "attention_cuda_kernel.h" + + +/* +Kernels +*/ + +__global__ void attention_relation_step_forward_cuda_kernel(int m, int g, int c, + const float *query, const float *key, const float *weight, + const int *index_target, const int *index_refer, + float *output) +{ + int r_idx = blockIdx.x * blockDim.x + threadIdx.x; + int g_idx = blockIdx.y; + int c_idx = blockIdx.z; + + if (r_idx >= m || g_idx >= g || c_idx >= c) return; + int q_idx = index_target[r_idx] * g * c + g_idx * c + c_idx; + int k_idx = index_refer[r_idx] * g * c + g_idx * c + c_idx; + + float r = query[q_idx] * key[k_idx] * weight[c_idx]; + atomicAdd(output + r_idx * g + g_idx, r); +} + +__global__ void attention_relation_step_backward_cuda_kernel(int m, int g, int c, + const float *query, float *grad_query, + const float *key, float *grad_key, + const float *weight, float *grad_weight, + const int *index_target, const int *index_refer, + const float *grad_output) +{ + int r_idx = blockIdx.x * blockDim.x + threadIdx.x; + int g_idx = blockIdx.y; + int c_idx = blockIdx.z; + + if (r_idx >= m || g_idx >= g || c_idx >= c) return; + + int q_idx = index_target[r_idx] * g * c + g_idx * c + c_idx; + int k_idx = index_refer[r_idx] * g * c + g_idx * c + c_idx; + int o_idx = r_idx * g + g_idx; + float grad_r = grad_output[o_idx]; + atomicAdd(grad_query + q_idx, grad_r * key[k_idx] * weight[c_idx]); + atomicAdd(grad_key + k_idx, grad_r * query[q_idx] * weight[c_idx]); + atomicAdd(grad_weight + c_idx, grad_r * key[k_idx] * query[q_idx]); +} + + +__global__ void attention_fusion_step_forward_cuda_kernel(int m, int g, int c, + const float *weight, const float *value, + const int *index_target, const int *index_refer, + float *output) +{ + int r_idx = blockIdx.x * blockDim.x + threadIdx.x; + int g_idx = blockIdx.y; + int c_idx = blockIdx.z; + + if (r_idx >= m || g_idx >= g || c_idx >= c) return; + + int o_idx = index_target[r_idx] * g * c + g_idx * c + c_idx; + int v_idx = index_refer[r_idx] * g * c + g_idx * c + c_idx; + + float f = weight[r_idx * g + g_idx] * value[v_idx]; + atomicAdd(output + o_idx, f); +} + + +__global__ void attention_fusion_step_backward_cuda_kernel(int m, int g, int c, + const float *weight, float *grad_weight, + const float *value, float *grad_value, + const int *index_target, const int *index_refer, + const float *grad_output) +{ + int r_idx = blockIdx.x * blockDim.x + threadIdx.x; + int g_idx = blockIdx.y; + int c_idx = blockIdx.z; + + if (r_idx >= m || g_idx >= g || c_idx >= c) return; + + int o_idx = index_target[r_idx] * g * c + g_idx * c + c_idx; + int v_idx = index_refer[r_idx] * g * c + g_idx * c + c_idx; + int w_idx = r_idx * g + g_idx; + float grad = grad_output[o_idx]; + atomicAdd(grad_weight + w_idx, grad * value[v_idx]); + atomicAdd(grad_value + v_idx, grad * weight[w_idx]); +} + +/* +Launchers +*/ + + +void attention_relation_step_forward_cuda_launcher(int m, int g, int c, + const float *query, const float *key, const float *weight, + const int *index_target, const int *index_refer, + float *output) +{ + dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), g, c); + dim3 threads(THREADS_PER_BLOCK); + attention_relation_step_forward_cuda_kernel<<>>(m, g, c, query, key, weight, + index_target, index_refer, output); +} + +void attention_relation_step_backward_cuda_launcher(int m, int g, int c, + const float *query, float *grad_query, + const float *key, float *grad_key, + const float *weight, float *grad_weight, + const int *index_target, const int *index_refer, + const float *grad_output) +{ + dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), g, c); + dim3 threads(THREADS_PER_BLOCK); + attention_relation_step_backward_cuda_kernel<<>>(m, g, c, + query, grad_query, + key, grad_key, + weight, grad_weight, + index_target, index_refer, + grad_output); +} + + +void attention_fusion_step_forward_cuda_launcher(int m, int g, int c, + const float *weight, const float *value, + const int *index_target, const int *index_refer, + float *output) +{ + dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), g, c); + dim3 threads(THREADS_PER_BLOCK); + attention_fusion_step_forward_cuda_kernel<<>>(m, g, c, weight, value, + index_target, index_refer, output); +} + + +void attention_fusion_step_backward_cuda_launcher(int m, int g, int c, + const float *weight, float *grad_weight, + const float *value, float *grad_value, + const int *index_target, const int *index_refer, + const float *grad_output) +{ + dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), g, c); + dim3 threads(THREADS_PER_BLOCK); + attention_fusion_step_backward_cuda_kernel<<>>(m, g, c, + weight, grad_weight, + value, grad_value, + index_target, index_refer, + grad_output); +} + + diff --git a/Pointcept/libs/pointops/src/attention/attention_cuda_kernel.h b/Pointcept/libs/pointops/src/attention/attention_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..fec965c0415c4cb5c64fd10e441b6a4c6a6c9ae9 --- /dev/null +++ b/Pointcept/libs/pointops/src/attention/attention_cuda_kernel.h @@ -0,0 +1,54 @@ +#ifndef _ATTENTION_CUDA_KERNEL +#define _ATTENTION_CUDA_KERNEL +#include +#include +#include + +void attention_relation_step_forward_cuda(int m, int g, int c, + at::Tensor query_tensor, at::Tensor key_tensor, at::Tensor weight_tensor, + at::Tensor index_target_tensor, at::Tensor index_refer_tensor, + at::Tensor output_tensor); +void attention_relation_step_backward_cuda(int m, int g, int c, + at::Tensor query_tensor, at::Tensor grad_query_tensor, + at::Tensor key_tensor, at::Tensor grad_key_tensor, + at::Tensor weight_tensor, at::Tensor grad_weight_tensor, + at::Tensor index_target_tensor, at::Tensor index_refer_tensor, + at::Tensor grad_output_tensor); +void attention_fusion_step_forward_cuda(int m, int g, int c, + at::Tensor weight_tensor, at::Tensor value_tensor, + at::Tensor index_target_tensor, at::Tensor index_refer_tensor, + at::Tensor output_tensor); +void attention_fusion_step_backward_cuda(int m, int g, int c, + at::Tensor weight_tensor, at::Tensor grad_weight_tensor, + at::Tensor value_tensor, at::Tensor grad_value_tensor, + at::Tensor index_target_tensor, at::Tensor index_refer_tensor, + at::Tensor grad_output_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void attention_relation_step_forward_cuda_launcher(int m, int g, int c, + const float *query, const float *key, const float *weight, + const int *index_target, const int *index_refer, + float *output); +void attention_relation_step_backward_cuda_launcher(int m, int g, int c, + const float *query, float *grad_query, + const float *key, float *grad_key, + const float *weight, float *grad_weight, + const int *index_target, const int *index_refer, + const float *grad_output); +void attention_fusion_step_forward_cuda_launcher(int m, int g, int c, + const float *weight, const float *value, + const int *index_target, const int *index_refer, + float *output); +void attention_fusion_step_backward_cuda_launcher(int m, int g, int c, + const float *weight, float *grad_weight, + const float *value, float *grad_value, + const int *index_target, const int *index_refer, + const float *grad_output); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops/src/ball_query/ball_query_cuda.cpp b/Pointcept/libs/pointops/src/ball_query/ball_query_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..04cd5ff9e8e39c006222d5651f3aae70ce2e35c9 --- /dev/null +++ b/Pointcept/libs/pointops/src/ball_query/ball_query_cuda.cpp @@ -0,0 +1,20 @@ +#include +#include +#include +#include "ball_query_cuda_kernel.h" + + +void ball_query_cuda(int m, int nsample, + float min_radius, float max_radius, + at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, + at::Tensor offset_tensor, at::Tensor new_offset_tensor, + at::Tensor idx_tensor, at::Tensor dist2_tensor) +{ + const float *xyz = xyz_tensor.data_ptr(); + const float *new_xyz = new_xyz_tensor.data_ptr(); + const int *offset = offset_tensor.data_ptr(); + const int *new_offset = new_offset_tensor.data_ptr(); + int *idx = idx_tensor.data_ptr(); + float *dist2 = dist2_tensor.data_ptr(); + ball_query_cuda_launcher(m, nsample, min_radius, max_radius, xyz, new_xyz, offset, new_offset, idx, dist2); +} diff --git a/Pointcept/libs/pointops/src/ball_query/ball_query_cuda_kernel.cu b/Pointcept/libs/pointops/src/ball_query/ball_query_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..7b3d95a9835f607798f0d63e2b66ddb3af9032da --- /dev/null +++ b/Pointcept/libs/pointops/src/ball_query/ball_query_cuda_kernel.cu @@ -0,0 +1,190 @@ +#include "../cuda_utils.h" +#include "ball_query_cuda_kernel.h" + + +namespace ball_query_utils{ + +template +__device__ void swap(DType *x, DType *y) +{ + DType tmp = *x; + *x = *y; + *y = tmp; +} + +__device__ void reheap(float *dist, int *idx, int k) +{ + int root = 0; + int child = root * 2 + 1; + while (child < k) + { + if(child + 1 < k && dist[child+1] > dist[child]) + child++; + if(dist[root] > dist[child]) + return; + swap(&dist[root], &dist[child]); + swap(&idx[root], &idx[child]); + root = child; + child = root * 2 + 1; + } +} + + +__device__ void heap_sort(float *dist, int *idx, int k) +{ + int i; + for (i = k - 1; i > 0; i--) + { + swap(&dist[0], &dist[i]); + swap(&idx[0], &idx[i]); + reheap(dist, idx, i); + } +} + +__device__ int get_bt_idx(int idx, const int *offset) +{ + int i = 0; + while (1) + { + if (idx < offset[i]) + break; + else + i++; + } + return i; +} +} // namespace ball_query_utils + +__global__ void ball_query_cuda_kernel(int m, int nsample, + float min_radius, float max_radius, + const float *__restrict__ xyz, const float *__restrict__ new_xyz, + const int *__restrict__ offset, const int *__restrict__ new_offset, + int *__restrict__ idx, float *__restrict__ dist2) { + // input: xyz (n, 3) new_xyz (m, 3) + // output: idx (m, nsample) dist (m, nsample) + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (pt_idx >= m) return; + + new_xyz += pt_idx * 3; + idx += pt_idx * nsample; + dist2 += pt_idx * nsample; + + int bt_idx = ball_query_utils::get_bt_idx(pt_idx, new_offset); + int start; + if (bt_idx == 0) + start = 0; + else + start = offset[bt_idx - 1]; + int end = offset[bt_idx]; + + float max_radius2 = max_radius * max_radius; + float min_radius2 = min_radius * min_radius; + float new_x = new_xyz[0]; + float new_y = new_xyz[1]; + float new_z = new_xyz[2]; + + float candi_dist[2048]; + int candi_idx[2048]; + int candi_num = 0; + + for(int i = start; i < end; i++){ + float x = xyz[i * 3 + 0]; + float y = xyz[i * 3 + 1]; + float z = xyz[i * 3 + 2]; + float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); + + if (d2 <= 1e-5 || (d2 >= min_radius2 && d2 < max_radius2)){ + // TODO: Check d2 <= 1e-5 + candi_dist[candi_num] = d2; + candi_idx[candi_num] = i; + candi_num += 1; + } + } + ball_query_utils::heap_sort(candi_dist, candi_idx, candi_num); + if(candi_num <= nsample){ + for(int i = 0; i < candi_num; i++){ + idx[i] = candi_idx[i]; + dist2[i] = candi_dist[i]; + } + for(int i = candi_num; i < nsample; i++){ + idx[i] = -1; + dist2[i] = 1e10; + } + } + else{ + float sep = static_cast(candi_num) / nsample; + for(int i = 0; i < nsample; i++) + { + int index = static_cast(sep * i); + idx[i] = candi_idx[index]; + dist2[i] = candi_idx[index]; + } + } +} + +/* Random Sample Mode Ball Query */ + +// __global__ void ball_query_cuda_kernel(int m, int nsample, +// float min_radius, float max_radius, +// const float *__restrict__ xyz, const float *__restrict__ new_xyz, +// const int *__restrict__ offset, const int *__restrict__ new_offset, +// int *__restrict__ idx, float *__restrict__ dist2) { +// // input: xyz (n, 3) new_xyz (m, 3) +// // output: idx (m, nsample) dist (m, nsample) +// int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; +// if (pt_idx >= m) return; +// +// new_xyz += pt_idx * 3; +// idx += pt_idx * nsample; +// dist2 += pt_idx * nsample; +// +// int bt_idx = ball_get_bt_idx(pt_idx, new_offset); +// int start; +// if (bt_idx == 0) +// start = 0; +// else +// start = offset[bt_idx - 1]; +// int end = offset[bt_idx]; +// +// float max_radius2 = max_radius * max_radius; +// float min_radius2 = min_radius * min_radius; +// float new_x = new_xyz[0]; +// float new_y = new_xyz[1]; +// float new_z = new_xyz[2]; +// +// int cnt = 0; +// for(int i = start; i < end; i++){ +// float x = xyz[i * 3 + 0]; +// float y = xyz[i * 3 + 1]; +// float z = xyz[i * 3 + 2]; +// float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); +// +// if (d2 == 0 || (d2 >= min_radius2 && d2 < max_radius2)) { +// if (cnt == 0) { +// for (int l = 0; l < nsample; ++l) { +// idx[l] = i; +// dist2[l] = d2; +// } +// } +// idx[cnt] = i; +// ++cnt; +// if (cnt >= nsample) break; +// } +// } +// } + + +void ball_query_cuda_launcher(int m, int nsample, + float min_radius, float max_radius, + const float *xyz, const float *new_xyz, + const int *offset, const int *new_offset, + int *idx, float *dist2) { + // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) + dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + ball_query_cuda_kernel<<>>(m, nsample, + min_radius, max_radius, + xyz, new_xyz, + offset, new_offset, + idx, dist2); +} diff --git a/Pointcept/libs/pointops/src/ball_query/ball_query_cuda_kernel.h b/Pointcept/libs/pointops/src/ball_query/ball_query_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..03007a285a3559da85d099681f1316915e1d31b1 --- /dev/null +++ b/Pointcept/libs/pointops/src/ball_query/ball_query_cuda_kernel.h @@ -0,0 +1,26 @@ +#ifndef _BALL_QUERY_CUDA_KERNEL +#define _BALL_QUERY_CUDA_KERNEL +#include +#include +#include + +void ball_query_cuda(int m, int nsample, + float min_radius, float max_radius, + at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, + at::Tensor offset_tensor, at::Tensor new_offset_tensor, + at::Tensor idx_tensor, at::Tensor dist2_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void ball_query_cuda_launcher(int m, int nsample, + float min_radius, float max_radius, + const float *xyz, const float *new_xyz, + const int *offset, const int *new_offset, + int *idx, float *dist2); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops/src/cuda_utils.h b/Pointcept/libs/pointops/src/cuda_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..bbfe7a06bf989056c0bd99e3e64fdbe7d15bb093 --- /dev/null +++ b/Pointcept/libs/pointops/src/cuda_utils.h @@ -0,0 +1,23 @@ +#ifndef _CUDA_UTILS_H +#define _CUDA_UTILS_H + +#include +#include + +#define TOTAL_THREADS 1024 +#define THREADS_PER_BLOCK 512 +#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) + +inline int opt_n_threads(int work_size) { + const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); + return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1); +} + +inline dim3 opt_block_config(int x, int y) { + const int x_threads = opt_n_threads(x); + const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); + dim3 block_config(x_threads, y_threads, 1); + return block_config; +} + +#endif diff --git a/Pointcept/libs/pointops/src/grouping/grouping_cuda.cpp b/Pointcept/libs/pointops/src/grouping/grouping_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6f7990adaf43f0a77050eed0d55adad19f256e10 --- /dev/null +++ b/Pointcept/libs/pointops/src/grouping/grouping_cuda.cpp @@ -0,0 +1,21 @@ +#include +#include +#include +#include "grouping_cuda_kernel.h" + + +void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) +{ + const float *input = input_tensor.data_ptr(); + const int *idx = idx_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + grouping_forward_cuda_launcher(m, nsample, c, input, idx, output); +} + +void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor) +{ + const float *grad_output = grad_output_tensor.data_ptr(); + const int *idx = idx_tensor.data_ptr(); + float *grad_input = grad_input_tensor.data_ptr(); + grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input); +} diff --git a/Pointcept/libs/pointops/src/grouping/grouping_cuda_kernel.cu b/Pointcept/libs/pointops/src/grouping/grouping_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..58ec0a21a2949f9f82504ccd24597c544c50af40 --- /dev/null +++ b/Pointcept/libs/pointops/src/grouping/grouping_cuda_kernel.cu @@ -0,0 +1,40 @@ +#include "../cuda_utils.h" +#include "grouping_cuda_kernel.h" + + +__global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) { + // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= m * nsample * c) return; + const int c_idx = index % c; + const int nsample_idx = (index / c) % nsample; + const int m_idx = index / nsample / c; + const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; + output[index] = input[input_idx]; +} + +__global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) { + // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= m * nsample * c) return; + const int c_idx = index % c; + const int nsample_idx = (index / c) % nsample; + const int m_idx = index / nsample / c; + const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; + atomicAdd(grad_input + input_idx, grad_output[index]); +} + +void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) { + // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) + dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + grouping_forward_cuda_kernel<<>>(m, nsample, c, input, idx, output); +} + +void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input) +{ + // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) + dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + grouping_backward_cuda_kernel<<>>(m, nsample, c, grad_output, idx, grad_input); +} diff --git a/Pointcept/libs/pointops/src/grouping/grouping_cuda_kernel.h b/Pointcept/libs/pointops/src/grouping/grouping_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..3db4aaa9fad5811d559d47c500e4b00f0165d9b4 --- /dev/null +++ b/Pointcept/libs/pointops/src/grouping/grouping_cuda_kernel.h @@ -0,0 +1,20 @@ +#ifndef _GROUPING_CUDA_KERNEL +#define _GROUPING_CUDA_KERNEL +#include +#include +#include + +void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); +void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output); +void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops/src/interpolation/interpolation_cuda.cpp b/Pointcept/libs/pointops/src/interpolation/interpolation_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f2c1b0078f4b70626705d7b3f5d1d65d37ee6de7 --- /dev/null +++ b/Pointcept/libs/pointops/src/interpolation/interpolation_cuda.cpp @@ -0,0 +1,23 @@ +#include +#include +#include +#include "interpolation_cuda_kernel.h" + + +void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor) +{ + const float *input = input_tensor.data_ptr(); + const int *idx = idx_tensor.data_ptr(); + const float *weight = weight_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output); +} + +void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor) +{ + const float *grad_output = grad_output_tensor.data_ptr(); + const int *idx = idx_tensor.data_ptr(); + const float *weight = weight_tensor.data_ptr(); + float *grad_input = grad_input_tensor.data_ptr(); + interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input); +} diff --git a/Pointcept/libs/pointops/src/interpolation/interpolation_cuda_kernel.cu b/Pointcept/libs/pointops/src/interpolation/interpolation_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..f560d8c92c6eac865b8c1e1dc27140fe3fcc2250 --- /dev/null +++ b/Pointcept/libs/pointops/src/interpolation/interpolation_cuda_kernel.cu @@ -0,0 +1,47 @@ +#include "../cuda_utils.h" +#include "interpolation_cuda_kernel.h" + + +__global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) +{ + // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= n * c) return; + int c_idx = index % c; + int n_idx = index / c; + for (int i = 0; i < k; i++) + { + int idx_idx = n_idx * k + i; + int input_idx = idx[idx_idx] * c + c_idx; + output[index] += input[input_idx] * weight[idx_idx]; + } +} + +__global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) +{ + // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= n * c) return; + int c_idx = index % c; + int n_idx = index / c; + for (int i = 0; i < k; i++) + { + int idx_idx = n_idx * k + i; + int input_idx = idx[idx_idx] * c + c_idx; + atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]); + } +} + +void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) { + // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) + dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + interpolation_forward_cuda_kernel<<>>(n, c, k, input, idx, weight, output); +} + +void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) { + // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) + dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + interpolation_backward_cuda_kernel<<>>(n, c, k, grad_output, idx, weight, grad_input); +} diff --git a/Pointcept/libs/pointops/src/interpolation/interpolation_cuda_kernel.h b/Pointcept/libs/pointops/src/interpolation/interpolation_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..309e5dd0a34ccb58807bbf32389ba65e7ee6961b --- /dev/null +++ b/Pointcept/libs/pointops/src/interpolation/interpolation_cuda_kernel.h @@ -0,0 +1,20 @@ +#ifndef _INTERPOLATION_CUDA_KERNEL +#define _INTERPOLATION_CUDA_KERNEL +#include +#include +#include + +void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor); +void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output); +void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops/src/knn_query/knn_query_cuda.cpp b/Pointcept/libs/pointops/src/knn_query/knn_query_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bbe841ce0352fd234143b3b4978ec001522b31dd --- /dev/null +++ b/Pointcept/libs/pointops/src/knn_query/knn_query_cuda.cpp @@ -0,0 +1,16 @@ +#include +#include +#include +#include "knn_query_cuda_kernel.h" + + +void knn_query_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) +{ + const float *xyz = xyz_tensor.data_ptr(); + const float *new_xyz = new_xyz_tensor.data_ptr(); + const int *offset = offset_tensor.data_ptr(); + const int *new_offset = new_offset_tensor.data_ptr(); + int *idx = idx_tensor.data_ptr(); + float *dist2 = dist2_tensor.data_ptr(); + knn_query_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); +} diff --git a/Pointcept/libs/pointops/src/knn_query/knn_query_cuda_kernel.cu b/Pointcept/libs/pointops/src/knn_query/knn_query_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..297740237eae98cc4e61421bc261755d79b83142 --- /dev/null +++ b/Pointcept/libs/pointops/src/knn_query/knn_query_cuda_kernel.cu @@ -0,0 +1,112 @@ +#include "../cuda_utils.h" +#include "knn_query_cuda_kernel.h" + + +namespace knn_query_utils{ + +template +__device__ void swap(DType *x, DType *y) +{ + DType tmp = *x; + *x = *y; + *y = tmp; +} + +__device__ void reheap(float *dist, int *idx, int k) +{ + int root = 0; + int child = root * 2 + 1; + while (child < k) + { + if(child + 1 < k && dist[child+1] > dist[child]) + child++; + if(dist[root] > dist[child]) + return; + swap(&dist[root], &dist[child]); + swap(&idx[root], &idx[child]); + root = child; + child = root * 2 + 1; + } +} + + +__device__ void heap_sort(float *dist, int *idx, int k) +{ + int i; + for (i = k - 1; i > 0; i--) + { + swap(&dist[0], &dist[i]); + swap(&idx[0], &idx[i]); + reheap(dist, idx, i); + } +} + + +__device__ int get_bt_idx(int idx, const int *offset) +{ + int i = 0; + while (1) + { + if (idx < offset[i]) + break; + else + i++; + } + return i; +} +} // namespace knn_query_utils + + +__global__ void knn_query_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) { + // input: xyz (n, 3) new_xyz (m, 3) + // output: idx (m, nsample) dist2 (m, nsample) + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (pt_idx >= m) return; + + new_xyz += pt_idx * 3; + idx += pt_idx * nsample; + dist2 += pt_idx * nsample; + + int bt_idx = knn_query_utils::get_bt_idx(pt_idx, new_offset); + int start; + if (bt_idx == 0) + start = 0; + else + start = offset[bt_idx - 1]; + int end = offset[bt_idx]; + + float new_x = new_xyz[0]; + float new_y = new_xyz[1]; + float new_z = new_xyz[2]; + + float best_dist[128]; + int best_idx[128]; + for(int i = 0; i < nsample; i++){ + best_dist[i] = 1e10; + best_idx[i] = -1; + } + for(int i = start; i < end; i++){ + float x = xyz[i * 3 + 0]; + float y = xyz[i * 3 + 1]; + float z = xyz[i * 3 + 2]; + float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); + if (d2 < best_dist[0]){ + best_dist[0] = d2; + best_idx[0] = i; + knn_query_utils::reheap(best_dist, best_idx, nsample); + } + } + knn_query_utils::heap_sort(best_dist, best_idx, nsample); + for(int i = 0; i < nsample; i++){ + idx[i] = best_idx[i]; + dist2[i] = best_dist[i]; + } +} + + +void knn_query_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) { + // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) + dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + knn_query_cuda_kernel<<>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); +} diff --git a/Pointcept/libs/pointops/src/knn_query/knn_query_cuda_kernel.h b/Pointcept/libs/pointops/src/knn_query/knn_query_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..c07c1cb46a56b7a37d55e25fb78816e034a8387e --- /dev/null +++ b/Pointcept/libs/pointops/src/knn_query/knn_query_cuda_kernel.h @@ -0,0 +1,18 @@ +#ifndef _KNN_QUERY_CUDA_KERNEL +#define _KNN_QUERY_CUDA_KERNEL +#include +#include +#include + +void knn_query_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void knn_query_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops/src/pointops_api.cpp b/Pointcept/libs/pointops/src/pointops_api.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5ca4377607eb181d48d458d700f1df876294a848 --- /dev/null +++ b/Pointcept/libs/pointops/src/pointops_api.cpp @@ -0,0 +1,32 @@ +#include +#include + +#include "knn_query/knn_query_cuda_kernel.h" +#include "ball_query/ball_query_cuda_kernel.h" +#include "random_ball_query/random_ball_query_cuda_kernel.h" +#include "sampling/sampling_cuda_kernel.h" +#include "grouping/grouping_cuda_kernel.h" +#include "interpolation/interpolation_cuda_kernel.h" +#include "aggregation/aggregation_cuda_kernel.h" +#include "subtraction/subtraction_cuda_kernel.h" +#include "attention/attention_cuda_kernel.h" + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("knn_query_cuda", &knn_query_cuda, "knn_query_cuda"); + m.def("ball_query_cuda", &ball_query_cuda, "ball_query_cuda"); + m.def("random_ball_query_cuda", &random_ball_query_cuda, "random_ball_query_cuda"); + m.def("farthest_point_sampling_cuda", &farthest_point_sampling_cuda, "farthest_point_sampling_cuda"); + m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda"); + m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda"); + m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda"); + m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda"); + m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda"); + m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda"); + m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda"); + m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda"); + m.def("attention_relation_step_forward_cuda", &attention_relation_step_forward_cuda, "attention_relation_step_forward_cuda"); + m.def("attention_relation_step_backward_cuda", &attention_relation_step_backward_cuda, "attention_relation_step_backward_cuda"); + m.def("attention_fusion_step_forward_cuda", &attention_fusion_step_forward_cuda, "attention_fusion_step_forward_cuda"); + m.def("attention_fusion_step_backward_cuda", &attention_fusion_step_backward_cuda, "attention_fusion_step_backward_cuda"); +} diff --git a/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda.cpp b/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c2618c94b6b19175f044131cebeefe8a23152c47 --- /dev/null +++ b/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda.cpp @@ -0,0 +1,21 @@ +#include +#include +#include +#include "random_ball_query_cuda_kernel.h" + + +void random_ball_query_cuda(int m, int nsample, + float min_radius, float max_radius, at::Tensor order_tensor, + at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, + at::Tensor offset_tensor, at::Tensor new_offset_tensor, + at::Tensor idx_tensor, at::Tensor dist2_tensor) +{ + const int *order = order_tensor.data_ptr(); + const float *xyz = xyz_tensor.data_ptr(); + const float *new_xyz = new_xyz_tensor.data_ptr(); + const int *offset = offset_tensor.data_ptr(); + const int *new_offset = new_offset_tensor.data_ptr(); + int *idx = idx_tensor.data_ptr(); + float *dist2 = dist2_tensor.data_ptr(); + random_ball_query_cuda_launcher(m, nsample, min_radius, max_radius, order, xyz, new_xyz, offset, new_offset, idx, dist2); +} diff --git a/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.cu b/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..bfafb0f8b731e201783c94144cad9de3e11228ad --- /dev/null +++ b/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.cu @@ -0,0 +1,123 @@ +#include "../cuda_utils.h" +#include "random_ball_query_cuda_kernel.h" + + +namespace random_ball_query_utils{ + +template +__device__ void swap(DType *x, DType *y) +{ + DType tmp = *x; + *x = *y; + *y = tmp; +} + +__device__ void reheap(float *dist, int *idx, int k) +{ + int root = 0; + int child = root * 2 + 1; + while (child < k) + { + if(child + 1 < k && dist[child+1] > dist[child]) + child++; + if(dist[root] > dist[child]) + return; + swap(&dist[root], &dist[child]); + swap(&idx[root], &idx[child]); + root = child; + child = root * 2 + 1; + } +} + + +__device__ void heap_sort(float *dist, int *idx, int k) +{ + int i; + for (i = k - 1; i > 0; i--) + { + swap(&dist[0], &dist[i]); + swap(&idx[0], &idx[i]); + reheap(dist, idx, i); + } +} + +__device__ int get_bt_idx(int idx, const int *offset) +{ + int i = 0; + while (1) + { + if (idx < offset[i]) + break; + else + i++; + } + return i; +} +} // namespace ball_query_utils + +__global__ void random_ball_query_cuda_kernel(int m, int nsample, + float min_radius, float max_radius, const int *__restrict__ order, + const float *__restrict__ xyz, const float *__restrict__ new_xyz, + const int *__restrict__ offset, const int *__restrict__ new_offset, + int *__restrict__ idx, float *__restrict__ dist2) { + // input: xyz (n, 3) new_xyz (m, 3) + // output: idx (m, nsample) dist (m, nsample) + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (pt_idx >= m) return; + + new_xyz += pt_idx * 3; + idx += pt_idx * nsample; + dist2 += pt_idx * nsample; + + int bt_idx = random_ball_query_utils::get_bt_idx(pt_idx, new_offset); + int start; + if (bt_idx == 0) + start = 0; + else + start = offset[bt_idx - 1]; + int end = offset[bt_idx]; + + float max_radius2 = max_radius * max_radius; + float min_radius2 = min_radius * min_radius; + float new_x = new_xyz[0]; + float new_y = new_xyz[1]; + float new_z = new_xyz[2]; + + int cnt = 0; + + for(int i = start; i < end; i++){ + float x = xyz[order[i] * 3 + 0]; + float y = xyz[order[i] * 3 + 1]; + float z = xyz[order[i] * 3 + 2]; + float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); + + if (d2 <= 1e-5 || (d2 >= min_radius2 && d2 < max_radius2)){ + dist2[cnt] = d2; + idx[cnt] = order[i]; + cnt += 1; + if (cnt >= nsample) break; + } + } + + if (cnt < nsample) { + for (int i = cnt; i < nsample; i++){ + idx[i] = -1; + dist2[i] = 1e10; + } + } +} + +void random_ball_query_cuda_launcher(int m, int nsample, + float min_radius, float max_radius, const int *order, + const float *xyz, const float *new_xyz, + const int *offset, const int *new_offset, + int *idx, float *dist2) { + // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) + dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + random_ball_query_cuda_kernel<<>>(m, nsample, + min_radius, max_radius, order, + xyz, new_xyz, + offset, new_offset, + idx, dist2); +} diff --git a/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.h b/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..d3e35be21933d95b50e9c42150067071502bbc1e --- /dev/null +++ b/Pointcept/libs/pointops/src/random_ball_query/random_ball_query_cuda_kernel.h @@ -0,0 +1,26 @@ +#ifndef _RANDOM_BALL_QUERY_CUDA_KERNEL +#define _RANDOM_BALL_QUERY_CUDA_KERNEL +#include +#include +#include + +void random_ball_query_cuda(int m, int nsample, + float min_radius, float max_radius, at::Tensor order_tensor, + at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, + at::Tensor offset_tensor, at::Tensor new_offset_tensor, + at::Tensor idx_tensor, at::Tensor dist2_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void random_ball_query_cuda_launcher(int m, int nsample, + float min_radius, float max_radius, const int *order, + const float *xyz, const float *new_xyz, + const int *offset, const int *new_offset, + int *idx, float *dist2); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops/src/sampling/sampling_cuda.cpp b/Pointcept/libs/pointops/src/sampling/sampling_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7dc8094c3343f874457fd23d1506b25fd006fd0b --- /dev/null +++ b/Pointcept/libs/pointops/src/sampling/sampling_cuda.cpp @@ -0,0 +1,15 @@ +#include +#include +#include +#include "sampling_cuda_kernel.h" + + +void farthest_point_sampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor) +{ + const float *xyz = xyz_tensor.data_ptr(); + const int *offset = offset_tensor.data_ptr(); + const int *new_offset = new_offset_tensor.data_ptr(); + float *tmp = tmp_tensor.data_ptr(); + int *idx = idx_tensor.data_ptr(); + farthest_point_sampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx); +} diff --git a/Pointcept/libs/pointops/src/sampling/sampling_cuda_kernel.cu b/Pointcept/libs/pointops/src/sampling/sampling_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..9a8676876672f68cd94913a0500d64813133b387 --- /dev/null +++ b/Pointcept/libs/pointops/src/sampling/sampling_cuda_kernel.cu @@ -0,0 +1,171 @@ +#include "../cuda_utils.h" +#include "sampling_cuda_kernel.h" + + +__device__ void __update(float *dists, int *dists_i, int idx1, int idx2) { + const float v1 = dists[idx1], v2 = dists[idx2]; + const int i1 = dists_i[idx1], i2 = dists_i[idx2]; + dists[idx1] = max(v1, v2); + dists_i[idx1] = v2 > v1 ? i2 : i1; +} + +// input xyz: (n, 3), tmp: (b, n_max) +// ouput idx (m) +template +__global__ void farthest_point_sampling_cuda_kernel(const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx) +{ + __shared__ float dists[block_size]; + __shared__ int dists_i[block_size]; + + int bid = blockIdx.x; + int start_n, end_n, start_m, end_m, old; + if (bid == 0) { + start_n = 0; + end_n = offset[0]; + start_m = 0; + end_m = new_offset[0]; + old = 0; + } + else { + start_n = offset[bid - 1]; + end_n = offset[bid]; + start_m = new_offset[bid - 1]; + end_m = new_offset[bid]; + old = offset[bid - 1]; + } + + const int stride = block_size; + int tid = threadIdx.x; + if (tid == 0) idx[start_m] = start_n; + + __syncthreads(); + for (int j = start_m + 1; j < end_m; j++) + { + int besti = start_n; + float best = -1; + float x1 = xyz[old * 3 + 0]; + float y1 = xyz[old * 3 + 1]; + float z1 = xyz[old * 3 + 2]; + for (int k = start_n + tid; k < end_n; k += stride) + { + float x2 = xyz[k * 3 + 0]; + float y2 = xyz[k * 3 + 1]; + float z2 = xyz[k * 3 + 2]; + float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); + float d2 = min(d, tmp[k]); + tmp[k] = d2; + besti = d2 > best ? k : besti; + best = d2 > best ? d2 : best; + } + dists[tid] = best; + dists_i[tid] = besti; + __syncthreads(); + + if (block_size >= 1024) { + if (tid < 512) { + __update(dists, dists_i, tid, tid + 512); + } + __syncthreads(); + } + if (block_size >= 512) { + if (tid < 256) { + __update(dists, dists_i, tid, tid + 256); + } + __syncthreads(); + } + if (block_size >= 256) { + if (tid < 128) { + __update(dists, dists_i, tid, tid + 128); + } + __syncthreads(); + } + if (block_size >= 128) { + if (tid < 64) { + __update(dists, dists_i, tid, tid + 64); + } + __syncthreads(); + } + if (block_size >= 64) { + if (tid < 32) { + __update(dists, dists_i, tid, tid + 32); + } + __syncthreads(); + } + if (block_size >= 32) { + if (tid < 16) { + __update(dists, dists_i, tid, tid + 16); + } + __syncthreads(); + } + if (block_size >= 16) { + if (tid < 8) { + __update(dists, dists_i, tid, tid + 8); + } + __syncthreads(); + } + if (block_size >= 8) { + if (tid < 4) { + __update(dists, dists_i, tid, tid + 4); + } + __syncthreads(); + } + if (block_size >= 4) { + if (tid < 2) { + __update(dists, dists_i, tid, tid + 2); + } + __syncthreads(); + } + if (block_size >= 2) { + if (tid < 1) { + __update(dists, dists_i, tid, tid + 1); + } + __syncthreads(); + } + + old = dists_i[0]; + if (tid == 0) + idx[j] = old; + } +} + +void farthest_point_sampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx) +{ + unsigned int n_threads = opt_n_threads(n); + switch (n_threads) { + case 1024: + farthest_point_sampling_cuda_kernel<1024><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 512: + farthest_point_sampling_cuda_kernel<512><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 256: + farthest_point_sampling_cuda_kernel<256><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 128: + farthest_point_sampling_cuda_kernel<128><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 64: + farthest_point_sampling_cuda_kernel<64><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 32: + farthest_point_sampling_cuda_kernel<32><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 16: + farthest_point_sampling_cuda_kernel<16><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 8: + farthest_point_sampling_cuda_kernel<8><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 4: + farthest_point_sampling_cuda_kernel<4><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 2: + farthest_point_sampling_cuda_kernel<2><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 1: + farthest_point_sampling_cuda_kernel<1><<>>(xyz, offset, new_offset, tmp, idx); + break; + default: + farthest_point_sampling_cuda_kernel<512><<>>(xyz, offset, new_offset, tmp, idx); + } +} diff --git a/Pointcept/libs/pointops/src/sampling/sampling_cuda_kernel.h b/Pointcept/libs/pointops/src/sampling/sampling_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..f0e07607394a10b2b70c29f7497589d5edb8aab3 --- /dev/null +++ b/Pointcept/libs/pointops/src/sampling/sampling_cuda_kernel.h @@ -0,0 +1,18 @@ +#ifndef _SAMPLING_CUDA_KERNEL +#define _SAMPLING_CUDA_KERNEL +#include +#include +#include + +void farthest_point_sampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void farthest_point_sampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops/src/subtraction/subtraction_cuda.cpp b/Pointcept/libs/pointops/src/subtraction/subtraction_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b343857a1671eafe5199089973e863e2ac5b618c --- /dev/null +++ b/Pointcept/libs/pointops/src/subtraction/subtraction_cuda.cpp @@ -0,0 +1,23 @@ +#include +#include +#include +#include "subtraction_cuda_kernel.h" + + +void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) +{ + const float *input1 = input1_tensor.data_ptr(); + const float *input2 = input2_tensor.data_ptr(); + const int *idx = idx_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output); +} + +void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor) +{ + const int *idx = idx_tensor.data_ptr(); + const float *grad_output = grad_output_tensor.data_ptr(); + float *grad_input1 = grad_input1_tensor.data_ptr(); + float *grad_input2 = grad_input2_tensor.data_ptr(); + subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2); +} diff --git a/Pointcept/libs/pointops/src/subtraction/subtraction_cuda_kernel.cu b/Pointcept/libs/pointops/src/subtraction/subtraction_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..9b8d4f752940d580ee2b49f1b2946a8d6386d11a --- /dev/null +++ b/Pointcept/libs/pointops/src/subtraction/subtraction_cuda_kernel.cu @@ -0,0 +1,44 @@ +#include "../cuda_utils.h" +#include "subtraction_cuda_kernel.h" + + +__global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { + // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= n * nsample * c) return; + const int c_idx = index % c; + const int nsample_idx = (index / c) % nsample; + const int n_idx = index / nsample / c; + const int idx_idx = n_idx * nsample + nsample_idx; + const int input1_idx = n_idx * c + c_idx; + const int input2_idx = idx[idx_idx] * c + c_idx; + output[index] = input1[input1_idx] - input2[input2_idx]; +} + +__global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { + // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= n * nsample * c) return; + const int c_idx = index % c; + const int nsample_idx = (index / c) % nsample; + const int n_idx = index / nsample / c; + const int idx_idx = n_idx * nsample + nsample_idx; + const int input1_idx = n_idx * c + c_idx; + const int input2_idx = idx[idx_idx] * c + c_idx; + atomicAdd(grad_input1 + input1_idx, grad_output[index]); + atomicAdd(grad_input2 + input2_idx, -grad_output[index]); +} + +void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { + // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) + dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + subtraction_forward_cuda_kernel<<>>(n, nsample, c, input1, input2, idx, output); +} + +void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { + // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) + dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + subtraction_backward_cuda_kernel<<>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2); +} diff --git a/Pointcept/libs/pointops/src/subtraction/subtraction_cuda_kernel.h b/Pointcept/libs/pointops/src/subtraction/subtraction_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..856133d97bdd3dc58f29c746ff240fc9d489c22e --- /dev/null +++ b/Pointcept/libs/pointops/src/subtraction/subtraction_cuda_kernel.h @@ -0,0 +1,20 @@ +#ifndef _SUBTRACTION_CUDA_KERNEL +#define _SUBTRACTION_CUDA_KERNEL +#include +#include +#include + +void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); +void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output); +void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops2/__init__.py b/Pointcept/libs/pointops2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Pointcept/libs/pointops2/functions/__init__.py b/Pointcept/libs/pointops2/functions/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..25a2367da463d2f32b923166b48396d7292ad1f2 --- /dev/null +++ b/Pointcept/libs/pointops2/functions/__init__.py @@ -0,0 +1 @@ +from pointops2 import * diff --git a/Pointcept/libs/pointops2/functions/pointops.py b/Pointcept/libs/pointops2/functions/pointops.py new file mode 100644 index 0000000000000000000000000000000000000000..efda900b3e702c4e5f8576baad5f4168cb756ee9 --- /dev/null +++ b/Pointcept/libs/pointops2/functions/pointops.py @@ -0,0 +1,1193 @@ +""" +The part of attention operations is written by Xin Lai. +Email: xinlai@cse.cuhk.edu.hk +""" + +from typing import Tuple + +import torch +from torch.autograd import Function +import torch.nn as nn + +import pointops2_cuda as pointops_cuda +import time + + +class FurthestSampling(Function): + @staticmethod + def forward(ctx, xyz, offset, new_offset): + """ + input: xyz: (n, 3), offset: (b), new_offset: (b) + output: idx: (m) + """ + assert xyz.is_contiguous() + n, b, n_max = xyz.shape[0], offset.shape[0], offset[0] + for i in range(1, b): + n_max = max(offset[i] - offset[i - 1], n_max) + idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_() + tmp = torch.cuda.FloatTensor(n).fill_(1e10) + pointops_cuda.furthestsampling_cuda(b, n_max, xyz, offset, new_offset, tmp, idx) + del tmp + return idx + + +furthestsampling = FurthestSampling.apply + + +class KNNQuery(Function): + @staticmethod + def forward(ctx, nsample, xyz, new_xyz, offset, new_offset): + """ + input: xyz: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) + output: idx: (m, nsample), dist2: (m, nsample) + """ + if new_xyz is None: + new_xyz = xyz + assert xyz.is_contiguous() and new_xyz.is_contiguous() + m = new_xyz.shape[0] + idx = torch.cuda.IntTensor(m, nsample).zero_() + dist2 = torch.cuda.FloatTensor(m, nsample).zero_() + pointops_cuda.knnquery_cuda( + m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2 + ) + return idx, torch.sqrt(dist2) + + +knnquery = KNNQuery.apply + + +class Grouping(Function): + @staticmethod + def forward(ctx, input, idx): + """ + input: input: (n, c), idx : (m, nsample) + output: (m, nsample, c) + """ + assert input.is_contiguous() and idx.is_contiguous() + m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1] + output = torch.cuda.FloatTensor(m, nsample, c) + pointops_cuda.grouping_forward_cuda(m, nsample, c, input, idx, output) + ctx.n = n + ctx.save_for_backward(idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_out: (m, c, nsample) + output: (n, c), None + """ + n = ctx.n + (idx,) = ctx.saved_tensors + m, nsample, c = grad_output.shape + grad_input = torch.cuda.FloatTensor(n, c).zero_() + pointops_cuda.grouping_backward_cuda( + m, nsample, c, grad_output, idx, grad_input + ) + return grad_input, None + + +grouping = Grouping.apply + + +class AttentionStep1(Function): + @staticmethod + def forward(ctx, q, k, index0, index1): + """ + input: q: (N, h, C//h), k: (N, h, C//h), index0: (M), index1: (M) + output: output: [N, h, C//h] + """ + assert ( + q.is_contiguous() + and k.is_contiguous() + and index0.is_contiguous() + and index1.is_contiguous() + ) + + N_q, h, C_div_h = q.shape + N_k = k.shape[0] + M = index0.shape[0] + C = int(C_div_h * h) + + output = torch.cuda.FloatTensor(M, h).zero_() + pointops_cuda.attention_step1_forward_cuda( + N_k, M, h, C, q, k, index0, index1, output + ) + ctx.N_q = N_q + ctx.N_k = N_k + ctx.C = C + ctx.save_for_backward(q, k, index0, index1) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_output: (N, h, C//h) + output: (M, h), (N, h, C//h), None, None + """ + + N_q = ctx.N_q + N_k = ctx.N_k + C = ctx.C + q, k, index0, index1 = ctx.saved_tensors + M, h = grad_output.shape + + grad_output = grad_output.contiguous() + # print("grad_output.is_contiguous(): ", grad_output.is_contiguous()) + assert ( + q.is_contiguous() + and k.is_contiguous() + and index0.is_contiguous() + and index1.is_contiguous() + and grad_output.is_contiguous() + ) + + # print("back: attn[:5,:5]: ", attn[:5, :5]) + + # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape)) + + grad_q = torch.cuda.FloatTensor(N_q, h, C // h).zero_() + grad_k = torch.cuda.FloatTensor(N_k, h, C // h).zero_() + + # torch.cuda.synchronize() + # start = time.time() + + pointops_cuda.attention_step1_backward_cuda( + N_q, M, h, C, grad_output, index0, index1, q, k, grad_q, grad_k + ) + + # torch.cuda.synchronize() + # end = time.time() + # print("time v7: {}".format(end - start)) + # # input() + + return grad_q, grad_k, None, None + + +attention_step1 = AttentionStep1.apply + + +class AttentionStep1_v2(Function): + @staticmethod + def forward(ctx, q, k, index1, index0_offsets, n_max): + """ + input: q: (N, h, C//h), k: (N, h, C//h), index0: (M), index1: (M) + output: output: [N, h, C//h] + """ + assert ( + q.is_contiguous() + and k.is_contiguous() + and index0_offsets.is_contiguous() + and index1.is_contiguous() + ) + assert n_max <= 1024 + + N_q, h, C_div_h = q.shape + N_k = k.shape[0] + M = index1.shape[0] + C = int(C_div_h * h) + + output = torch.cuda.FloatTensor(M, h).zero_() + pointops_cuda.attention_step1_forward_cuda_v2( + N_k, M, h, C, n_max, q, k, index0_offsets, index1, output + ) + ctx.N_q = N_q + ctx.N_k = N_k + ctx.C = C + ctx.n_max = n_max + ctx.save_for_backward(q, k, index0_offsets, index1) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_output: (N, h, C//h) + output: (M, h), (N, h, C//h), None, None + """ + + N_q = ctx.N_q + N_k = ctx.N_k + C = ctx.C + n_max = ctx.n_max + q, k, index0_offsets, index1 = ctx.saved_tensors + M, h = grad_output.shape + + grad_output = grad_output.contiguous() + # print("grad_output.is_contiguous(): ", grad_output.is_contiguous()) + assert ( + q.is_contiguous() + and k.is_contiguous() + and index0_offsets.is_contiguous() + and index1.is_contiguous() + and grad_output.is_contiguous() + ) + + # print("back: attn[:5,:5]: ", attn[:5, :5]) + + # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape)) + + grad_q = torch.cuda.FloatTensor(N_q, h, C // h).zero_() + grad_k = torch.cuda.FloatTensor(N_k, h, C // h).zero_() + + # torch.cuda.synchronize() + # start = time.time() + + pointops_cuda.attention_step1_backward_cuda_v2( + N_q, + M, + h, + C, + n_max, + grad_output, + index0_offsets, + index1, + q, + k, + grad_q, + grad_k, + ) + + # torch.cuda.synchronize() + # end = time.time() + # print("time v7: {}".format(end - start)) + # # input() + + return grad_q, grad_k, None, None, None + + +attention_step1_v2 = AttentionStep1_v2.apply + + +class AttentionStep2(Function): + @staticmethod + def forward(ctx, attn, v, index0, index1): + """ + input: attn: (M, h), v: (N, h, C//h), index0: (M), index1: (M) + output: output: [N, h, C//h] + """ + assert ( + attn.is_contiguous() + and v.is_contiguous() + and index0.is_contiguous() + and index1.is_contiguous() + ) + + M, h = attn.shape + N_q = index0.max().item() + 1 + N_v, h, C_div_h = v.shape + C = int(C_div_h * h) + + output = torch.cuda.FloatTensor(N_q, h, C // h).zero_() + pointops_cuda.attention_step2_forward_cuda( + N_q, M, h, C, attn, v, index0, index1, output + ) + ctx.M = M + + # print("attn[:5,:5]: ", attn[:5, :5]) + + ctx.save_for_backward(attn, v, index0, index1) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_output: (N, h, C//h) + output: (M, h), (N, h, C//h), None, None + """ + M = ctx.M + attn, v, index0, index1 = ctx.saved_tensors + N_v = v.shape[0] + N_q, h, C_div_h = grad_output.shape + C = h * C_div_h + + grad_output = grad_output.contiguous() + # print("grad_output.is_contiguous(): ", grad_output.is_contiguous()) + assert ( + attn.is_contiguous() + and v.is_contiguous() + and index0.is_contiguous() + and index1.is_contiguous() + and grad_output.is_contiguous() + ) + + # print("back: attn[:5,:5]: ", attn[:5, :5]) + + # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape)) + + grad_attn = torch.cuda.FloatTensor(M, h).zero_() + grad_v = torch.cuda.FloatTensor(N_v, h, C // h).zero_() + + # torch.cuda.synchronize() + # start = time.time() + + pointops_cuda.attention_step2_backward_cuda( + N_q, M, h, C, grad_output, index0, index1, attn, v, grad_attn, grad_v + ) + + # torch.cuda.synchronize() + # end = time.time() + # print("time v8: {}".format(end - start)) + # # input() + + return grad_attn, grad_v, None, None + + +attention_step2 = AttentionStep2.apply + + +class AttentionStep2_v2(Function): + @staticmethod + def forward(ctx, attn, v, index0, index1): + """ + input: attn: (M, h), v: (N, h, C//h), index0: (M), index1: (M) + output: output: [L, h, C//h] + """ + assert ( + attn.is_contiguous() + and v.is_contiguous() + and index0.is_contiguous() + and index1.is_contiguous() + ) + + L = int(index0.max().item()) + 1 + + M, h = attn.shape + N, h, C_div_h = v.shape + C = int(C_div_h * h) + + output = torch.cuda.FloatTensor(L, h, C // h).zero_() + pointops_cuda.attention_step2_forward_cuda( + N, M, h, C, attn, v, index0, index1, output + ) + ctx.M = M + + # print("attn[:5,:5]: ", attn[:5, :5]) + + ctx.save_for_backward(attn, v, index0, index1) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_output: (L, h, C//h) + output: (M, h), (N, h, C//h), None, None + """ + M = ctx.M + attn, v, index0, index1 = ctx.saved_tensors + L, h, C_div_h = grad_output.shape + N = v.shape[0] + C = h * C_div_h + + grad_output = grad_output.contiguous() + # print("grad_output.is_contiguous(): ", grad_output.is_contiguous()) + assert ( + attn.is_contiguous() + and v.is_contiguous() + and index0.is_contiguous() + and index1.is_contiguous() + and grad_output.is_contiguous() + ) + + # print("back: attn[:5,:5]: ", attn[:5, :5]) + + # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape)) + + grad_attn = torch.cuda.FloatTensor(M, h).zero_() + grad_v = torch.cuda.FloatTensor(N, h, C // h).zero_() + + pointops_cuda.attention_step2_backward_cuda( + N, M, h, C, grad_output, index0, index1, attn, v, grad_attn, grad_v + ) + return grad_attn, grad_v, None, None + + +attention_step2_v2 = AttentionStep2_v2.apply + + +class DotProdWithIdx(Function): + @staticmethod + def forward(ctx, q, index, table, rel_idx): + """ + input: q: (N, h, hdim), index: (M), table: (L, h, hdim, 3), rel_idx: (M, 3) + output: output: [M, h] + """ + assert ( + q.is_contiguous() + and index.is_contiguous() + and table.is_contiguous() + and rel_idx.is_contiguous() + ) + + N, h, hdim = q.shape + M = index.shape[0] + + output = torch.cuda.FloatTensor(M, h).zero_() + pointops_cuda.dot_prod_with_idx_forward_cuda( + N, M, h, hdim, q, index, table, rel_idx, output + ) + ctx.save_for_backward(q, index, table, rel_idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_output: [M, h] + output: (N, h, hdim), None, (L, h, hdim, 3), None + """ + q, index, table, rel_idx = ctx.saved_tensors + M, h = grad_output.shape + N, _, hdim = q.shape + L = table.shape[0] + + grad_output = grad_output.contiguous() + assert ( + q.is_contiguous() + and index.is_contiguous() + and table.is_contiguous() + and rel_idx.is_contiguous() + and grad_output.is_contiguous() + ) + + # print("back: attn[:5,:5]: ", attn[:5, :5]) + + # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape)) + + grad_q = torch.cuda.FloatTensor(N, h, hdim).zero_() + grad_table = torch.cuda.FloatTensor(L, h, hdim, 3).zero_() + + # torch.cuda.synchronize() + # start = time.time() + + pointops_cuda.dot_prod_with_idx_backward_cuda( + N, M, h, hdim, grad_output, q, index, table, rel_idx, grad_q, grad_table + ) + + # torch.cuda.synchronize() + # end = time.time() + # print("time v9: {}".format(end - start)) + # # input() + + return grad_q, None, grad_table, None + + +dot_prod_with_idx = DotProdWithIdx.apply + + +class DotProdWithIdx_v2(Function): + @staticmethod + def forward(ctx, q, index_q, k, index_k, table_q, table_k, rel_idx): + """ + input: q: (N, h, hdim), index_q: (M), k: (N, h, hdim), index_k: (M), table_q: (L, h, hdim, 3), table_k: (L, h, hdim, 3), rel_idx: (M, 3) + output: output: [M, h] + """ + assert ( + q.is_contiguous() + and index_q.is_contiguous() + and k.is_contiguous() + and index_k.is_contiguous() + and table_q.is_contiguous() + and table_k.is_contiguous() + and rel_idx.is_contiguous() + ) + + N, h, hdim = q.shape + M = index_q.shape[0] + L = table_q.shape[0] + assert table_k.shape[0] == L and index_k.shape[0] == M + + # obtain the mapping from block_idx to m_idx + rel_idx_merge = ( + rel_idx[:, 0] + rel_idx[:, 1] * L + rel_idx[:, 2] * (L**2) + ) # [M, ] + sorted_values, sort_indices = torch.sort(rel_idx_merge) + _, counts = torch.unique_consecutive(sorted_values, return_counts=True) + rel_idx_offsets = torch.cumsum(counts, dim=-1) # [T,] + rel_idx_offsets = torch.cat( + [torch.zeros(1, dtype=torch.long).cuda(), rel_idx_offsets], 0 + ) # [T+1,] + n_max = counts.max() + T = counts.shape[0] + + # print("M: {}, L: {}, n_max: {}, T: {}".format(M, L, n_max, T)) + # print("rel_idx_merge.shape: {}, sorted_values.shape: {}".format(rel_idx_merge.shape, sorted_values.shape)) + # print("counts.shape: {}".format(counts.shape)) + + output = torch.cuda.FloatTensor(M, h).zero_() + # pointops_cuda.dot_prod_with_idx_forward_cuda(N, M, h, hdim, q, index, table, rel_idx, output) + pointops_cuda.dot_prod_with_idx_forward_cuda_v2( + N, + M, + h, + hdim, + n_max, + T, + q, + index_q, + k, + index_k, + table_q, + table_k, + rel_idx, + rel_idx_offsets.int(), + sort_indices.int(), + output, + ) + + ctx.n_max = n_max + ctx.T = T + ctx.save_for_backward( + q, + index_q, + k, + index_k, + table_q, + table_k, + rel_idx, + rel_idx_offsets, + sort_indices, + ) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_output: [M, h] + output: (N, h, hdim), None, (L, h, hdim, 3), None + """ + ( + q, + index_q, + k, + index_k, + table_q, + table_k, + rel_idx, + rel_idx_offsets, + sort_indices, + ) = ctx.saved_tensors + M, h = grad_output.shape + N, _, hdim = q.shape + L = table_q.shape[0] + T, n_max = ctx.T, ctx.n_max + + grad_output = grad_output.contiguous() + assert ( + q.is_contiguous() + and index_q.is_contiguous() + and k.is_contiguous() + and index_k.is_contiguous() + and table_q.is_contiguous() + and table_k.is_contiguous() + and rel_idx.is_contiguous() + and rel_idx_offsets.is_contiguous() + and sort_indices.is_contiguous() + and grad_output.is_contiguous() + ) + + # print("back: attn[:5,:5]: ", attn[:5, :5]) + + # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape)) + + grad_q = torch.cuda.FloatTensor(N, h, hdim).zero_() + grad_table_q = torch.cuda.FloatTensor(L, h, hdim, 3).zero_() + grad_k = torch.cuda.FloatTensor(N, h, hdim).zero_() + grad_table_k = torch.cuda.FloatTensor(L, h, hdim, 3).zero_() + + # torch.cuda.synchronize() + # start = time.time() + + pointops_cuda.dot_prod_with_idx_backward_cuda_v2( + N, + M, + h, + hdim, + n_max, + T, + grad_output, + q, + index_q, + k, + index_k, + table_q, + table_k, + rel_idx, + rel_idx_offsets.int(), + sort_indices.int(), + grad_q, + grad_k, + grad_table_q, + grad_table_k, + ) + + # torch.cuda.synchronize() + # end = time.time() + # print("time v9: {}".format(end - start)) + # # input() + return grad_q, None, grad_k, None, grad_table_q, grad_table_k, None + + +dot_prod_with_idx_v2 = DotProdWithIdx_v2.apply + + +class DotProdWithIdx_v3(Function): + @staticmethod + def forward(ctx, q, index_q_offsets, n_max, k, index_k, table_q, table_k, rel_idx): + """ + input: q: (N, h, hdim), index_q: (M), k: (N, h, hdim), index_k: (M), table_q: (L, h, hdim, 3), table_k: (L, h, hdim, 3), rel_idx: (M, 3) + output: output: [M, h] + """ + assert ( + q.is_contiguous() + and index_q_offsets.is_contiguous() + and k.is_contiguous() + and index_k.is_contiguous() + and table_q.is_contiguous() + and table_k.is_contiguous() + and rel_idx.is_contiguous() + ) + + N, h, hdim = q.shape + M = index_k.shape[0] + L = table_q.shape[0] + assert table_k.shape[0] == L + + # # obtain the mapping from block_idx to m_idx + # rel_idx_merge = rel_idx[:, 0] + rel_idx[:, 1] * L + rel_idx[:, 2] * (L ** 2) #[M, ] + # sorted_values, sort_indices = torch.sort(rel_idx_merge) + # _, counts = torch.unique_consecutive(sorted_values, return_counts=True) + # rel_idx_offsets = torch.cumsum(counts, dim=-1) #[T,] + # rel_idx_offsets = torch.cat([torch.zeros(1, dtype=torch.long).cuda(), rel_idx_offsets], 0) #[T+1,] + # n_max = counts.max() + # T = counts.shape[0] + + # print("M: {}, L: {}, n_max: {}, T: {}".format(M, L, n_max, T)) + # print("rel_idx_merge.shape: {}, sorted_values.shape: {}".format(rel_idx_merge.shape, sorted_values.shape)) + # print("counts.shape: {}".format(counts.shape)) + + # print("M: {}, L: {}, n_max: {}".format(M, L, n_max)) + + output = torch.cuda.FloatTensor(M, h).zero_() + # pointops_cuda.dot_prod_with_idx_forward_cuda(N, M, h, hdim, q, index, table, rel_idx, output) + pointops_cuda.dot_prod_with_idx_forward_cuda_v3( + N, + M, + h, + hdim, + n_max, + q, + index_q_offsets, + k, + index_k, + table_q, + table_k, + rel_idx, + output, + ) + + ctx.n_max = n_max + # ctx.T = T + ctx.save_for_backward(q, index_q_offsets, k, index_k, table_q, table_k, rel_idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_output: [M, h] + output: (N, h, hdim), None, (L, h, hdim, 3), None + """ + q, index_q_offsets, k, index_k, table_q, table_k, rel_idx = ctx.saved_tensors + M, h = grad_output.shape + N, _, hdim = q.shape + L = table_q.shape[0] + n_max = ctx.n_max + + grad_output = grad_output.contiguous() + assert ( + q.is_contiguous() + and index_q_offsets.is_contiguous() + and k.is_contiguous() + and index_k.is_contiguous() + and table_q.is_contiguous() + and table_k.is_contiguous() + and rel_idx.is_contiguous() + and grad_output.is_contiguous() + ) + + # print("back: attn[:5,:5]: ", attn[:5, :5]) + + # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape)) + + grad_q = torch.cuda.FloatTensor(N, h, hdim).zero_() + grad_table_q = torch.cuda.FloatTensor(L, h, hdim, 3).zero_() + grad_k = torch.cuda.FloatTensor(N, h, hdim).zero_() + grad_table_k = torch.cuda.FloatTensor(L, h, hdim, 3).zero_() + + # torch.cuda.synchronize() + # start = time.time() + + pointops_cuda.dot_prod_with_idx_backward_cuda_v3( + N, + M, + h, + hdim, + n_max, + grad_output, + q, + index_q_offsets, + k, + index_k, + table_q, + table_k, + rel_idx, + grad_q, + grad_k, + grad_table_q, + grad_table_k, + ) + + # torch.cuda.synchronize() + # end = time.time() + # print("time v9: {}".format(end - start)) + # # input() + return grad_q, None, None, grad_k, None, grad_table_q, grad_table_k, None + + +dot_prod_with_idx_v3 = DotProdWithIdx_v3.apply + + +class AttentionStep2WithRelPosValue(Function): + @staticmethod + def forward(ctx, attn, v, index0, index1, table, rel_idx): + """ + input: attn: (M, h), v: (N, h, hdim), index0: (M), index1: (M), table: (L, h, hdim, 3), rel_idx: (M, 3) + output: output: [N, h, hdim] + """ + assert ( + attn.is_contiguous() + and v.is_contiguous() + and index0.is_contiguous() + and index1.is_contiguous() + and table.is_contiguous() + and rel_idx.is_contiguous() + ) + + M, h = attn.shape + N_v, h, hdim = v.shape + N_q = index0.max().item() + 1 + + output = torch.cuda.FloatTensor(N_q, h, hdim).zero_() + pointops_cuda.attention_step2_with_rel_pos_value_forward_cuda( + N_q, M, h, hdim, attn, v, index0, index1, table, rel_idx, output + ) + + # print("attn[:5,:5]: ", attn[:5, :5]) + + ctx.save_for_backward(attn, v, index0, index1, table, rel_idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_output: (N, h, C//h) + output: (M, h), (N, h, C//h), None, None, (L, h, hdim, 3), None + """ + attn, v, index0, index1, table, rel_idx = ctx.saved_tensors + N_q, h, hdim = grad_output.shape + N_v = v.shape[0] + M = attn.shape[0] + L = table.shape[0] + + grad_output = grad_output.contiguous() + # print("grad_output.is_contiguous(): ", grad_output.is_contiguous()) + assert ( + attn.is_contiguous() + and v.is_contiguous() + and index0.is_contiguous() + and index1.is_contiguous() + and grad_output.is_contiguous() + and table.is_contiguous() + and rel_idx.is_contiguous() + ) + + # print("back: attn[:5,:5]: ", attn[:5, :5]) + + # print("attn.shape: {} v.shape: {}, index0.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0.shape, index1.shape)) + + grad_attn = torch.cuda.FloatTensor(M, h).zero_() + grad_v = torch.cuda.FloatTensor(N_v, h, hdim).zero_() + grad_table = torch.cuda.FloatTensor(L, h, hdim, 3).zero_() + + # print("attn.shape: {}, grad_attn.shape: {}".format(attn.shape, grad_attn.shape)) + # print("v.shape: {}, grad_v.shape: {}".format(v.shape, grad_v.shape)) + # print("table.shape: {}, grad_table.shape: {}".format(table.shape, grad_table.shape)) + + # torch.cuda.synchronize() + # start = time.time() + + pointops_cuda.attention_step2_with_rel_pos_value_backward_cuda( + N_q, + M, + h, + hdim, + grad_output, + index0, + index1, + attn, + v, + table, + rel_idx, + grad_attn, + grad_v, + grad_table, + ) + + # torch.cuda.synchronize() + # end = time.time() + # print("time v10: {}".format(end - start)) + # # input() + return grad_attn, grad_v, None, None, grad_table, None + + +attention_step2_with_rel_pos_value = AttentionStep2WithRelPosValue.apply + + +class AttentionStep2WithRelPosValue_v2(Function): + @staticmethod + def forward(ctx, attn, v, index0_offsets, n_max, index1, table, rel_idx): + """ + input: attn: (M, h), v: (N, h, hdim), index0_offsets: (M), index1: (M), table: (L, h, hdim, 3), rel_idx: (M, 3) + output: output: [N, h, hdim] + """ + assert ( + attn.is_contiguous() + and v.is_contiguous() + and index0_offsets.is_contiguous() + and index1.is_contiguous() + and table.is_contiguous() + and rel_idx.is_contiguous() + ) + + M, h = attn.shape + N, h, hdim = v.shape + # N_q = int(index0_offsets.max().item()) + 1 + + output = torch.cuda.FloatTensor(N, h, hdim).zero_() + pointops_cuda.attention_step2_with_rel_pos_value_forward_cuda_v2( + N, + M, + h, + hdim, + n_max, + attn, + v, + index0_offsets, + index1, + table, + rel_idx, + output, + ) + + # print("attn[:5,:5]: ", attn[:5, :5]) + + ctx.n_max = n_max + ctx.save_for_backward(attn, v, index0_offsets, index1, table, rel_idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_output: (N, h, C//h) + output: (M, h), (N, h, C//h), None, None, (L, h, hdim, 3), None + """ + n_max = ctx.n_max + attn, v, index0_offsets, index1, table, rel_idx = ctx.saved_tensors + N, h, hdim = grad_output.shape + N = v.shape[0] + M = attn.shape[0] + L = table.shape[0] + + # grad_output = grad_output.contiguous() + # print("grad_output.is_contiguous(): ", grad_output.is_contiguous()) + assert ( + attn.is_contiguous() + and v.is_contiguous() + and index0_offsets.is_contiguous() + and index1.is_contiguous() + and grad_output.is_contiguous() + and table.is_contiguous() + and rel_idx.is_contiguous() + ) + + # print("back: attn[:5,:5]: ", attn[:5, :5]) + + # print("attn.shape: {} v.shape: {}, index0_offsets.shape: {}, index1.shape: {}".format(attn.shape, v.shape, index0_offsets.shape, index1.shape)) + + grad_attn = torch.cuda.FloatTensor(M, h).zero_() + grad_v = torch.cuda.FloatTensor(N, h, hdim).zero_() + grad_table = torch.cuda.FloatTensor(L, h, hdim, 3).zero_() + + # print("attn.shape: {}, grad_attn.shape: {}".format(attn.shape, grad_attn.shape)) + # print("v.shape: {}, grad_v.shape: {}".format(v.shape, grad_v.shape)) + # print("table.shape: {}, grad_table.shape: {}".format(table.shape, grad_table.shape)) + + # torch.cuda.synchronize() + # start = time.time() + + pointops_cuda.attention_step2_with_rel_pos_value_backward_cuda_v2( + N, + M, + h, + hdim, + n_max, + grad_output, + index0_offsets, + index1, + attn, + v, + table, + rel_idx, + grad_attn, + grad_v, + grad_table, + ) + + # torch.cuda.synchronize() + # end = time.time() + # print("time v10: {}".format(end - start)) + + return grad_attn, grad_v, None, None, None, grad_table, None + + +attention_step2_with_rel_pos_value_v2 = AttentionStep2WithRelPosValue_v2.apply + + +def queryandgroup( + nsample, + xyz, + new_xyz, + feat, + idx, + offset, + new_offset, + use_xyz=True, + return_indx=False, +): + """ + input: xyz: (n, 3), new_xyz: (m, 3), feat: (n, c), idx: (m, nsample), offset: (b), new_offset: (b) + output: new_feat: (m, c+3, nsample), grouped_idx: (m, nsample) + """ + assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous() + if new_xyz is None: + new_xyz = xyz + if idx is None: + idx, _ = knnquery(nsample, xyz, new_xyz, offset, new_offset) # (m, nsample) + + n, m, c = xyz.shape[0], new_xyz.shape[0], feat.shape[1] + grouped_xyz = xyz[idx.view(-1).long(), :].view(m, nsample, 3) # (m, nsample, 3) + # grouped_xyz = grouping(xyz, idx) # (m, nsample, 3) + # ็›ธๅฏนไฝ็ฝฎ + grouped_xyz -= new_xyz.unsqueeze(1) # (m, nsample, 3) + grouped_feat = feat[idx.view(-1).long(), :].view(m, nsample, c) # (m, nsample, c) + # grouped_feat = grouping(feat, idx) # (m, nsample, c) + if use_xyz: + if return_indx: + return torch.cat((grouped_xyz, grouped_feat), -1), idx # (m, nsample, 3+c) + else: + return torch.cat((grouped_xyz, grouped_feat), -1) + else: + if return_indx: + return grouped_feat, idx + else: + return grouped_feat + + +def Divide2Patch(nsample, xyz, offset, return_offset=False, anchor_scale=None): + # nsample: 16 xyz: (n, 3) offset: (b) + downsample_scale = anchor_scale or nsample + new_offset, count = [offset[0].item() // downsample_scale], offset[ + 0 + ].item() // downsample_scale + for i in range(1, offset.shape[0]): + count += (offset[i].item() - offset[i - 1].item()) // downsample_scale + new_offset.append(count) + # print("donw sample scale:", downsample_scale,"offset:", offset, "newoffset:", new_offset) + new_offset = torch.cuda.IntTensor(new_offset) + idx = furthestsampling(xyz, offset, new_offset) # (m) + new_xyz = xyz[idx.long()] + p_idx, _ = knnquery(nsample, xyz, new_xyz, offset, new_offset) # (m, nsample) + if return_offset: + return p_idx, new_offset + else: + return p_idx + + +class Subtraction(Function): + @staticmethod + def forward(ctx, input1, input2, idx): + """ + input: input1: (n, c), input2: (n, c), idx: (n, nsample) + output: (n, nsample, c) + """ + assert input1.is_contiguous() and input2.is_contiguous() + n, c = input1.shape + nsample = idx.shape[-1] + output = torch.cuda.FloatTensor(n, nsample, c).zero_() + pointops_cuda.subtraction_forward_cuda( + n, nsample, c, input1, input2, idx, output + ) + ctx.save_for_backward(idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_out: (n, nsample, c) + output: grad_input1: (n, c), grad_input2: (n, c) + """ + (idx,) = ctx.saved_tensors + n, nsample, c = grad_output.shape + grad_input1 = torch.cuda.FloatTensor(n, c).zero_() + grad_input2 = torch.cuda.FloatTensor(n, c).zero_() + pointops_cuda.subtraction_backward_cuda( + n, nsample, c, idx, grad_output, grad_input1, grad_input2 + ) + return grad_input1, grad_input2, None + + +subtraction = Subtraction.apply + + +class Aggregation(Function): + @staticmethod + def forward(ctx, input, position, weight, idx): + """ + input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample) + output: (n, c) + """ + assert ( + input.is_contiguous() + and position.is_contiguous() + and weight.is_contiguous() + ) + n, nsample, c = position.shape + w_c = weight.shape[-1] + output = torch.cuda.FloatTensor(n, c).zero_() + pointops_cuda.aggregation_forward_cuda( + n, nsample, c, w_c, input, position, weight, idx, output + ) + ctx.save_for_backward(input, position, weight, idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_out: (n, c) + output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c') + """ + input, position, weight, idx = ctx.saved_tensors + n, nsample, c = position.shape + w_c = weight.shape[-1] + grad_input = torch.cuda.FloatTensor(n, c).zero_() + grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_() + grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_() + pointops_cuda.aggregation_backward_cuda( + n, + nsample, + c, + w_c, + input, + position, + weight, + idx, + grad_output, + grad_input, + grad_position, + grad_weight, + ) + return grad_input, grad_position, grad_weight, None + + +aggregation = Aggregation.apply + + +def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3): + """ + input: xyz: (m, 3), new_xyz: (n, 3), feat: (m, c), offset: (b), new_offset: (b) + output: (n, c) + """ + assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous() + idx, dist = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, 3), (n, 3) + dist_recip = 1.0 / (dist + 1e-8) # (n, 3) + norm = torch.sum(dist_recip, dim=1, keepdim=True) + weight = dist_recip / norm # (n, 3) + + new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_() + for i in range(k): + new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1) + return new_feat + + +def interpolation_v2(xyz, new_xyz, feat, offset, new_offset, k=3): + """ + input: xyz: (m, 3), new_xyz: (n, 3), feat: (m, c), offset: (b), new_offset: (b) + output: (n, c) + """ + assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous() + + idx, _ = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, 3), (n, 3) + + # print("e3: idx.shape: {}, idx[:5]: {}".format(idx.shape, idx[:5])) + + dist = torch.sqrt(((new_xyz.unsqueeze(1) - xyz[idx.long()]) ** 2).sum(-1) + 1e-8) + + # print("e4: dist.shape: {}, dist[:5]: {}".format(dist.shape, dist[:5])) + # print("((_-dist)**2).max(): ", ((_-dist)**2).max()) + # input() + + dist_recip = 1.0 / (dist + 1e-8) # (n, 3) + norm = torch.sum(dist_recip, dim=1, keepdim=True) + weight = dist_recip / norm # (n, 3) + + new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_() + for i in range(k): + new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1) + return new_feat + + +class Interpolation(Function): + @staticmethod + def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3): + """ + input: xyz: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) + output: (n, c) + """ + assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous() + idx, dist = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, k), (n, k) + dist_recip = 1.0 / (dist + 1e-8) # (n, k) + norm = torch.sum(dist_recip, dim=1, keepdim=True) + weight = dist_recip / norm # (n, k) + + n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0] + output = torch.cuda.FloatTensor(n, c).zero_() + pointops_cuda.interpolation_forward_cuda(n, c, k, input, idx, weight, output) + ctx.m, ctx.k = m, k + ctx.save_for_backward(idx, weight) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: xyz: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) + output: (n, c) + """ + m, k = ctx.m, ctx.k + idx, weight = ctx.saved_tensors + n, c = grad_output.shape + grad_input = torch.cuda.FloatTensor(m, c).zero_() + pointops_cuda.interpolation_backward_cuda( + n, c, k, grad_output, idx, weight, grad_input + ) + return None, None, grad_input, None, None, None + + +interpolation2 = Interpolation.apply diff --git a/Pointcept/libs/pointops2/functions/pointops2.py b/Pointcept/libs/pointops2/functions/pointops2.py new file mode 100644 index 0000000000000000000000000000000000000000..e019eca4235e014421f0df3097c93bcec2d3a3d2 --- /dev/null +++ b/Pointcept/libs/pointops2/functions/pointops2.py @@ -0,0 +1,253 @@ +from typing import Tuple + +import torch +from torch.autograd import Function +import torch.nn as nn + +import pointops2_cuda as pointops_cuda + + +class FurthestSampling(Function): + @staticmethod + def forward(ctx, xyz, offset, new_offset): + """ + input: xyz: (n, 3), offset: (b), new_offset: (b) + output: idx: (m) + """ + assert xyz.is_contiguous() + n, b, n_max = xyz.shape[0], offset.shape[0], offset[0] + for i in range(1, b): + n_max = max(offset[i] - offset[i - 1], n_max) + idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_() + tmp = torch.cuda.FloatTensor(n).fill_(1e10) + pointops_cuda.furthestsampling_cuda(b, n_max, xyz, offset, new_offset, tmp, idx) + del tmp + return idx + + +furthestsampling = FurthestSampling.apply + + +class KNNQuery(Function): + @staticmethod + def forward(ctx, nsample, xyz, new_xyz, offset, new_offset): + """ + input: xyz: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) + output: idx: (m, nsample), dist2: (m, nsample) + """ + if new_xyz is None: + new_xyz = xyz + assert xyz.is_contiguous() and new_xyz.is_contiguous() + m = new_xyz.shape[0] + idx = torch.cuda.IntTensor(m, nsample).zero_() + dist2 = torch.cuda.FloatTensor(m, nsample).zero_() + pointops_cuda.knnquery_cuda( + m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2 + ) + return idx, torch.sqrt(dist2) + + +knnquery = KNNQuery.apply + + +class Grouping(Function): + @staticmethod + def forward(ctx, input, idx): + """ + input: input: (n, c), idx : (m, nsample) + output: (m, nsample, c) + """ + assert input.is_contiguous() and idx.is_contiguous() + m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1] + output = torch.cuda.FloatTensor(m, nsample, c) + pointops_cuda.grouping_forward_cuda(m, nsample, c, input, idx, output) + ctx.n = n + ctx.save_for_backward(idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_out: (m, c, nsample) + output: (n, c), None + """ + n = ctx.n + (idx,) = ctx.saved_tensors + m, nsample, c = grad_output.shape + grad_input = torch.cuda.FloatTensor(n, c).zero_() + pointops_cuda.grouping_backward_cuda( + m, nsample, c, grad_output, idx, grad_input + ) + return grad_input, None + + +grouping = Grouping.apply + + +def queryandgroup(nsample, xyz, new_xyz, feat, idx, offset, new_offset, use_xyz=True): + """ + input: xyz: (n, 3), new_xyz: (m, 3), feat: (n, c), idx: (m, nsample), offset: (b), new_offset: (b) + output: new_feat: (m, c+3, nsample), grouped_idx: (m, nsample) + """ + assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous() + if new_xyz is None: + new_xyz = xyz + if idx is None: + idx, _ = knnquery(nsample, xyz, new_xyz, offset, new_offset) # (m, nsample) + + n, m, c = xyz.shape[0], new_xyz.shape[0], feat.shape[1] + grouped_xyz = xyz[idx.view(-1).long(), :].view(m, nsample, 3) # (m, nsample, 3) + # grouped_xyz = grouping(xyz, idx) # (m, nsample, 3) + grouped_xyz -= new_xyz.unsqueeze(1) # (m, nsample, 3) + grouped_feat = feat[idx.view(-1).long(), :].view(m, nsample, c) # (m, nsample, c) + # grouped_feat = grouping(feat, idx) # (m, nsample, c) + + if use_xyz: + return torch.cat((grouped_xyz, grouped_feat), -1) # (m, nsample, 3+c) + else: + return grouped_feat + + +class Subtraction(Function): + @staticmethod + def forward(ctx, input1, input2, idx): + """ + input: input1: (n, c), input2: (n, c), idx: (n, nsample) + output: (n, nsample, c) + """ + assert input1.is_contiguous() and input2.is_contiguous() + n, c = input1.shape + nsample = idx.shape[-1] + output = torch.cuda.FloatTensor(n, nsample, c).zero_() + pointops_cuda.subtraction_forward_cuda( + n, nsample, c, input1, input2, idx, output + ) + ctx.save_for_backward(idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_out: (n, nsample, c) + output: grad_input1: (n, c), grad_input2: (n, c) + """ + (idx,) = ctx.saved_tensors + n, nsample, c = grad_output.shape + grad_input1 = torch.cuda.FloatTensor(n, c).zero_() + grad_input2 = torch.cuda.FloatTensor(n, c).zero_() + pointops_cuda.subtraction_backward_cuda( + n, nsample, c, idx, grad_output, grad_input1, grad_input2 + ) + return grad_input1, grad_input2, None + + +subtraction = Subtraction.apply + + +class Aggregation(Function): + @staticmethod + def forward(ctx, input, position, weight, idx): + """ + input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample) + output: (n, c) + """ + assert ( + input.is_contiguous() + and position.is_contiguous() + and weight.is_contiguous() + ) + n, nsample, c = position.shape + w_c = weight.shape[-1] + output = torch.cuda.FloatTensor(n, c).zero_() + pointops_cuda.aggregation_forward_cuda( + n, nsample, c, w_c, input, position, weight, idx, output + ) + ctx.save_for_backward(input, position, weight, idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_out: (n, c) + output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c') + """ + input, position, weight, idx = ctx.saved_tensors + n, nsample, c = position.shape + w_c = weight.shape[-1] + grad_input = torch.cuda.FloatTensor(n, c).zero_() + grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_() + grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_() + pointops_cuda.aggregation_backward_cuda( + n, + nsample, + c, + w_c, + input, + position, + weight, + idx, + grad_output, + grad_input, + grad_position, + grad_weight, + ) + return grad_input, grad_position, grad_weight, None + + +aggregation = Aggregation.apply + + +def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3): + """ + input: xyz: (m, 3), new_xyz: (n, 3), feat: (m, c), offset: (b), new_offset: (b) + output: (n, c) + """ + assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous() + idx, dist = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, 3), (n, 3) + dist_recip = 1.0 / (dist + 1e-8) # (n, 3) + norm = torch.sum(dist_recip, dim=1, keepdim=True) + weight = dist_recip / norm # (n, 3) + + new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_() + for i in range(k): + new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1) + return new_feat + + +class Interpolation(Function): + @staticmethod + def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3): + """ + input: xyz: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) + output: (n, c) + """ + assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous() + idx, dist = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, k), (n, k) + dist_recip = 1.0 / (dist + 1e-8) # (n, k) + norm = torch.sum(dist_recip, dim=1, keepdim=True) + weight = dist_recip / norm # (n, k) + + n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0] + output = torch.cuda.FloatTensor(n, c).zero_() + pointops_cuda.interpolation_forward_cuda(n, c, k, input, idx, weight, output) + ctx.m, ctx.k = m, k + ctx.save_for_backward(idx, weight) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: xyz: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) + output: (n, c) + """ + m, k = ctx.m, ctx.k + idx, weight = ctx.saved_tensors + n, c = grad_output.shape + grad_input = torch.cuda.FloatTensor(m, c).zero_() + pointops_cuda.interpolation_backward_cuda( + n, c, k, grad_output, idx, weight, grad_input + ) + return None, None, grad_input, None, None, None + + +interpolation2 = Interpolation.apply diff --git a/Pointcept/libs/pointops2/functions/pointops_ablation.py b/Pointcept/libs/pointops2/functions/pointops_ablation.py new file mode 100644 index 0000000000000000000000000000000000000000..abfcc8bc1fb99379ff6d0fd97e19b7ca7fb0e723 --- /dev/null +++ b/Pointcept/libs/pointops2/functions/pointops_ablation.py @@ -0,0 +1,256 @@ +from typing import Tuple + +import torch +from torch.autograd import Function +import torch.nn as nn + +import pointops2_cuda as pointops_cuda + + +class FurthestSampling(Function): + @staticmethod + def forward(ctx, xyz, offset, new_offset): + """ + input: xyz: (n, 3), offset: (b), new_offset: (b) + output: idx: (m) + """ + assert xyz.is_contiguous() + n, b, n_max = xyz.shape[0], offset.shape[0], offset[0] + for i in range(1, b): + n_max = max(offset[i] - offset[i - 1], n_max) + idx = torch.cuda.IntTensor(new_offset[b - 1].item()).zero_() + tmp = torch.cuda.FloatTensor(n).fill_(1e10) + pointops_cuda.furthestsampling_cuda(b, n_max, xyz, offset, new_offset, tmp, idx) + del tmp + return idx + + +furthestsampling = FurthestSampling.apply + + +class KNNQuery(Function): + @staticmethod + def forward(ctx, nsample, xyz, new_xyz, offset, new_offset): + """ + input: xyz: (n, 3), new_xyz: (m, 3), offset: (b), new_offset: (b) + output: idx: (m, nsample), dist2: (m, nsample) + """ + if new_xyz is None: + new_xyz = xyz + assert xyz.is_contiguous() and new_xyz.is_contiguous() + m = new_xyz.shape[0] + idx = torch.cuda.IntTensor(m, nsample).zero_() + dist2 = torch.cuda.FloatTensor(m, nsample).zero_() + pointops_cuda.knnquery_cuda( + m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2 + ) + return idx, torch.sqrt(dist2) + + +knnquery = KNNQuery.apply + + +class Grouping(Function): + @staticmethod + def forward(ctx, input, idx): + """ + input: input: (n, c), idx : (m, nsample) + output: (m, nsample, c) + """ + assert input.is_contiguous() and idx.is_contiguous() + m, nsample, n, c = idx.shape[0], idx.shape[1], input.shape[0], input.shape[1] + output = torch.cuda.FloatTensor(m, nsample, c) + pointops_cuda.grouping_forward_cuda(m, nsample, c, input, idx, output) + ctx.n = n + ctx.save_for_backward(idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_out: (m, c, nsample) + output: (n, c), None + """ + n = ctx.n + (idx,) = ctx.saved_tensors + m, nsample, c = grad_output.shape + grad_input = torch.cuda.FloatTensor(n, c).zero_() + pointops_cuda.grouping_backward_cuda( + m, nsample, c, grad_output, idx, grad_input + ) + return grad_input, None + + +grouping = Grouping.apply + + +def queryandgroup( + nsample, xyz, new_xyz, feat, idx, offset, new_offset, use_xyz=True, relative=True +): + """ + input: xyz: (n, 3), new_xyz: (m, 3), feat: (n, c), idx: (m, nsample), offset: (b), new_offset: (b) + output: new_feat: (m, c+3, nsample), grouped_idx: (m, nsample) + """ + assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous() + if new_xyz is None: + new_xyz = xyz + if idx is None: + idx, _ = knnquery(nsample, xyz, new_xyz, offset, new_offset) # (m, nsample) + + n, m, c = xyz.shape[0], new_xyz.shape[0], feat.shape[1] + grouped_xyz = xyz[idx.view(-1).long(), :].view(m, nsample, 3) # (m, nsample, 3) + # grouped_xyz = grouping(xyz, idx) # (m, nsample, 3) + if relative: + grouped_xyz -= new_xyz.unsqueeze(1) # (m, nsample, 3) + grouped_feat = feat[idx.view(-1).long(), :].view(m, nsample, c) # (m, nsample, c) + # grouped_feat = grouping(feat, idx) # (m, nsample, c) + + if use_xyz: + return torch.cat((grouped_xyz, grouped_feat), -1) # (m, nsample, 3+c) + else: + return grouped_feat + + +class Subtraction(Function): + @staticmethod + def forward(ctx, input1, input2, idx): + """ + input: input1: (n, c), input2: (n, c), idx: (n, nsample) + output: (n, nsample, c) + """ + assert input1.is_contiguous() and input2.is_contiguous() + n, c = input1.shape + nsample = idx.shape[-1] + output = torch.cuda.FloatTensor(n, nsample, c).zero_() + pointops_cuda.subtraction_forward_cuda( + n, nsample, c, input1, input2, idx, output + ) + ctx.save_for_backward(idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_out: (n, nsample, c) + output: grad_input1: (n, c), grad_input2: (n, c) + """ + (idx,) = ctx.saved_tensors + n, nsample, c = grad_output.shape + grad_input1 = torch.cuda.FloatTensor(n, c).zero_() + grad_input2 = torch.cuda.FloatTensor(n, c).zero_() + pointops_cuda.subtraction_backward_cuda( + n, nsample, c, idx, grad_output, grad_input1, grad_input2 + ) + return grad_input1, grad_input2, None + + +subtraction = Subtraction.apply + + +class Aggregation(Function): + @staticmethod + def forward(ctx, input, position, weight, idx): + """ + input: input: (n, c), position: (n, nsample, c), weight : (n, nsample, c'), idx: (n, nsample) + output: (n, c) + """ + assert ( + input.is_contiguous() + and position.is_contiguous() + and weight.is_contiguous() + ) + n, nsample, c = position.shape + w_c = weight.shape[-1] + output = torch.cuda.FloatTensor(n, c).zero_() + pointops_cuda.aggregation_forward_cuda( + n, nsample, c, w_c, input, position, weight, idx, output + ) + ctx.save_for_backward(input, position, weight, idx) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: grad_out: (n, c) + output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight : (n, nsample, c') + """ + input, position, weight, idx = ctx.saved_tensors + n, nsample, c = position.shape + w_c = weight.shape[-1] + grad_input = torch.cuda.FloatTensor(n, c).zero_() + grad_position = torch.cuda.FloatTensor(n, nsample, c).zero_() + grad_weight = torch.cuda.FloatTensor(n, nsample, w_c).zero_() + pointops_cuda.aggregation_backward_cuda( + n, + nsample, + c, + w_c, + input, + position, + weight, + idx, + grad_output, + grad_input, + grad_position, + grad_weight, + ) + return grad_input, grad_position, grad_weight, None + + +aggregation = Aggregation.apply + + +def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3): + """ + input: xyz: (m, 3), new_xyz: (n, 3), feat: (m, c), offset: (b), new_offset: (b) + output: (n, c) + """ + assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous() + idx, dist = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, 3), (n, 3) + dist_recip = 1.0 / (dist + 1e-8) # (n, 3) + norm = torch.sum(dist_recip, dim=1, keepdim=True) + weight = dist_recip / norm # (n, 3) + + new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_() + for i in range(k): + new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1) + return new_feat + + +class Interpolation(Function): + @staticmethod + def forward(ctx, xyz, new_xyz, input, offset, new_offset, k=3): + """ + input: xyz: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) + output: (n, c) + """ + assert xyz.is_contiguous() and new_xyz.is_contiguous() and input.is_contiguous() + idx, dist = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, k), (n, k) + dist_recip = 1.0 / (dist + 1e-8) # (n, k) + norm = torch.sum(dist_recip, dim=1, keepdim=True) + weight = dist_recip / norm # (n, k) + + n, c, m = new_xyz.shape[0], input.shape[1], input.shape[0] + output = torch.cuda.FloatTensor(n, c).zero_() + pointops_cuda.interpolation_forward_cuda(n, c, k, input, idx, weight, output) + ctx.m, ctx.k = m, k + ctx.save_for_backward(idx, weight) + return output + + @staticmethod + def backward(ctx, grad_output): + """ + input: xyz: (m, 3), new_xyz: (n, 3), input: (m, c), offset: (b), new_offset: (b) + output: (n, c) + """ + m, k = ctx.m, ctx.k + idx, weight = ctx.saved_tensors + n, c = grad_output.shape + grad_input = torch.cuda.FloatTensor(m, c).zero_() + pointops_cuda.interpolation_backward_cuda( + n, c, k, grad_output, idx, weight, grad_input + ) + return None, None, grad_input, None, None, None + + +interpolation2 = Interpolation.apply diff --git a/Pointcept/libs/pointops2/functions/test_attention_op_step1.py b/Pointcept/libs/pointops2/functions/test_attention_op_step1.py new file mode 100644 index 0000000000000000000000000000000000000000..b2d8428c8e283811db156acc0e6ba563f92e72ce --- /dev/null +++ b/Pointcept/libs/pointops2/functions/test_attention_op_step1.py @@ -0,0 +1,106 @@ +import torch +import pointops +from torch_scatter import ( + scatter_max, + scatter_mean, + scatter_add, + scatter_min, + scatter_sum, +) + +torch.manual_seed(1) + +M = 800000 +N = 35000 +C = 96 +h = 6 +query = torch.rand(N, h, C // h).cuda() +key = torch.rand(N, h, C // h).cuda() + +index_0 = torch.rand(M) +index_0[index_0 < 0] = 0 +index_0 = (index_0 * N).long().cuda() + +index_1 = torch.rand(M) +index_1[index_1 < 0] = 0 +index_1 = (index_1 * N).long().cuda() + +query.requires_grad = True +key.requires_grad = True + +# rearrange index for acceleration +index_0, indices = torch.sort(index_0) # [M,] +index_1 = index_1[indices] # [M,] +index_0_counts = index_0.bincount() + +print("index_0_counts.shape: ", index_0_counts.shape) + +n_max = index_0_counts.max() +index_0_offsets = index_0_counts.cumsum(dim=-1) # [N] + +print("v1 index_0_offsets.shape: ", index_0_offsets.shape) + +index_0_offsets = torch.cat( + [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0 +) # [N+1] + +# print("index_0[:100]: ", index_0[:100]) +print("n_max: ", n_max) +print("index_0_offsets.shape: ", index_0_offsets.shape) +# input() + +print("index_0_offsets[:100]: ", index_0_offsets[:100]) +print("index_1[300:320]: ", index_1[300:320]) + + +attn_flat = pointops.attention_step1( + query.float(), key.float(), index_0.int(), index_1.int() +) +# loss = attn_flat.sum() +# loss.backward() +print( + "attn_flat.shape: {}, attn_flat[300:320,:10]: {}".format( + attn_flat.shape, attn_flat[300:320, :10] + ) +) +# print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) +# print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) +# input() + +print("query.is_contiguous(): ", query.is_contiguous()) +print("key.is_contiguous(): ", key.is_contiguous()) +print("index_0.is_contiguous(): ", index_0.is_contiguous()) +print("index_1.is_contiguous(): ", index_1.is_contiguous()) + +attn_flat_v2 = pointops.attention_step1_v2( + query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max +) +# loss = attn_flat_v2.sum() +# loss.backward() +print( + "attn_flat_v2.shape: {}, attn_flat_v2[300:320,:10]: {}".format( + attn_flat_v2.shape, attn_flat_v2[300:320, :10] + ) +) +# print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) +# print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) +# input() + +mask = attn_flat_v2.sum(-1) != 0 +print("mask.sum(): ", mask.sum()) +print( + "attn_flat_v2[mask] - attn_flat[mask]: ", + ((attn_flat_v2[mask] - attn_flat[mask]) ** 2).max(), +) + + +print( + "((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ", + ((attn_flat - attn_flat_v2) ** 2 < 1e-8).all(), +) + +selected = 10000 +print( + "torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ", + torch.max((attn_flat[:selected] - attn_flat_v2[:selected]) ** 2, 0), +) diff --git a/Pointcept/libs/pointops2/functions/test_attention_op_step1_v2.py b/Pointcept/libs/pointops2/functions/test_attention_op_step1_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..941ea13da10fd4567aeb16b30740899535d6a0a6 --- /dev/null +++ b/Pointcept/libs/pointops2/functions/test_attention_op_step1_v2.py @@ -0,0 +1,123 @@ +import torch +import pointops +from torch_scatter import ( + scatter_max, + scatter_mean, + scatter_add, + scatter_min, + scatter_sum, +) + +torch.manual_seed(1) + +M = 800000 +N = 35000 +C = 96 +h = 6 +query = torch.rand(N, h, C // h).cuda() +key = torch.rand(N, h, C // h).cuda() + +index_0 = torch.rand(M) +index_0[index_0 < 0] = 0 +index_0 = (index_0 * N).long().cuda() + +index_1 = torch.rand(M) +index_1[index_1 < 0] = 0 +index_1 = (index_1 * N).long().cuda() + +query.requires_grad = True +key.requires_grad = True + + +attn_flat = pointops.attention_step1( + query.float(), key.float(), index_0.int(), index_1.int() +) +loss = attn_flat.sum() +loss.backward() +print( + "attn_flat.shape: {}, attn_flat[:20,:10]: {}".format( + attn_flat.shape, attn_flat[:20, :10] + ) +) +print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) +print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) +input() + + +# rearrange index for acceleration +index_0, indices = torch.sort(index_0) # [M,] +index_1 = index_1[indices] # [M,] +index_0_counts = index_0.bincount() + +print("index_0_counts.shape: ", index_0_counts.shape) + +n_max = index_0_counts.max() +index_0_offsets = index_0_counts.cumsum(dim=-1) # [N] + +print("v1 index_0_offsets.shape: ", index_0_offsets.shape) + +index_0_offsets = torch.cat( + [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0 +) # [N+1] + +# print("index_0[:100]: ", index_0[:100]) +print("n_max: ", n_max) +print("index_0_offsets.shape: ", index_0_offsets.shape) +# input() + +print("index_0_offsets[:100]: ", index_0_offsets[:100]) +print("index_1[:20]: ", index_1[:20]) + + +attn_flat = pointops.attention_step1( + query.float(), key.float(), index_0.int(), index_1.int() +) +# loss = attn_flat.sum() +# loss.backward() +# # attn_flat = pointops.attention_step1(query.float(), key.float(), index_0.int(), index_1.int()) +# # loss = attn_flat.sum() +# # loss.backward() +# print("attn_flat.shape: {}, attn_flat[:20,:10]: {}".format(attn_flat.shape, attn_flat[:20,:10])) +# print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) +# print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) +# input() + +print("query.is_contiguous(): ", query.is_contiguous()) +print("key.is_contiguous(): ", key.is_contiguous()) +print("index_0.is_contiguous(): ", index_0.is_contiguous()) +print("index_1.is_contiguous(): ", index_1.is_contiguous()) + +attn_flat_v2 = pointops.attention_step1_v2( + query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max +) +loss = attn_flat_v2.sum() +loss.backward() + +# attn_flat_v2 = pointops.attention_step1_v2(query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max) +# loss = attn_flat_v2.sum() +# loss.backward() + +print( + "attn_flat_v2.shape: {}, attn_flat_v2[:20,:10]: {}".format( + attn_flat_v2.shape, attn_flat_v2[:20, :10] + ) +) +print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) +print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) +# input() + +# mask = attn_flat_v2.sum(-1) != 0 +# print("mask.sum(): ", mask.sum()) +# print("attn_flat_v2[mask] - attn_flat[mask]: ", ((attn_flat_v2[mask] - attn_flat[mask])**2).max()) + + +print( + "((attn_flat-attn_flat_v2)**2 < 1e-8).all(): ", + ((attn_flat - attn_flat_v2) ** 2 < 1e-8).all(), +) + +selected = 10000 +print( + "torch.max((attn_flat[:selected]-attn_flat_v2[:selected])**2, 0): ", + torch.max((attn_flat[:selected] - attn_flat_v2[:selected]) ** 2, 0), +) diff --git a/Pointcept/libs/pointops2/functions/test_attention_op_step2.py b/Pointcept/libs/pointops2/functions/test_attention_op_step2.py new file mode 100644 index 0000000000000000000000000000000000000000..036340377abedda932c74bb014ec82052ef2c884 --- /dev/null +++ b/Pointcept/libs/pointops2/functions/test_attention_op_step2.py @@ -0,0 +1,62 @@ +import torch +import pointops +from torch_scatter import ( + scatter_max, + scatter_mean, + scatter_add, + scatter_min, + scatter_sum, +) + +torch.manual_seed(1) + +M = 800000 +N = 35000 +C = 96 +h = 6 +softmax_attn_flat = torch.rand(M, h).cuda() +value = torch.rand(N, h, C // h).cuda() + +index_0 = torch.rand(M) +index_0[index_0 < 0] = 0 +index_0 = (index_0 * N).long().cuda() + +index_1 = torch.rand(M) +index_1[index_1 < 0] = 0 +index_1 = (index_1 * N).long().cuda() + +softmax_attn_flat.requires_grad = True +value.requires_grad = True + +# value_flat = value[index_1] #[M, num_heads, C // num_heads] +# x = (softmax_attn_flat.unsqueeze(-1) * value_flat).reshape(M, C) +# x = scatter_sum(src=x, index=index_0, dim=0, dim_size=N) #[N, C] +# loss = x.sum() +# loss.backward() + +# print("x.shape: {}, x[:5,:10]: {}".format(x.shape, x[:5,:10])) +# print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10]) +# print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5]) +# input() + +print("softmax_attn_flat.is_contiguous(): ", softmax_attn_flat.is_contiguous()) +print("value.is_contiguous(): ", value.is_contiguous()) +print("index_0.is_contiguous(): ", index_0.is_contiguous()) +print("index_1.is_contiguous(): ", index_1.is_contiguous()) + +x_v2 = pointops.attention_step2( + softmax_attn_flat.float(), value.float(), index_0.int(), index_1.int() +) +x_v2 = x_v2.view(N, C) +loss = x_v2.sum() +loss.backward() + +print("x_v2.shape: {}, x_v2[:5,:10]: {}".format(x_v2.shape, x_v2[:5, :10])) + +print("softmax_attn_flat.grad[:5, :10]: ", softmax_attn_flat.grad[:5, :10]) +print("value.grad[:5, :3, :5]: ", value.grad[:5, :3, :5]) +input() + +print("((x-x_v2)**2 < 1e-8).all(): ", ((x - x_v2) ** 2 < 1e-8).all()) + +print("torch.max((x-x_v2)**2): ", torch.max((x - x_v2) ** 2)) diff --git a/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1.py b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1.py new file mode 100644 index 0000000000000000000000000000000000000000..145c0fcbf765e65b52afc0c9fcc49041fdfd7d0d --- /dev/null +++ b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1.py @@ -0,0 +1,65 @@ +import torch +import pointops +from torch_scatter import ( + scatter_max, + scatter_mean, + scatter_add, + scatter_min, + scatter_sum, +) + +torch.manual_seed(1) + +M = 80000 +N = 3500 +hdim = 16 +h = 6 +L = 31 +query = torch.rand(N, h, hdim).cuda() +table = torch.rand(L, h, hdim, 3).cuda() + +index = torch.rand(M) +index[index < 0] = 0 +index = (index * N).long().cuda() + +rel_index = torch.rand(M, 3) +rel_index[rel_index < 0] = 0 +rel_index = (rel_index * L).long().cuda() + +query.requires_grad = True +table.requires_grad = True + +# query_flat = query[index] #[M, h, hdim] +# table_x, table_y, table_z = table[:,:,:,0], table[:,:,:,1], table[:,:,:,2] #[L, h, hdim] +# rel_index_x, rel_index_y, rel_index_z = rel_index[:,0], rel_index[:,1], rel_index[:,2] #[M] +# rel_pos_encoding = table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] #[M, h, hdim] +# output = (query_flat * rel_pos_encoding).sum(-1) #[M, h] +# loss = output.mean() +# loss.backward() + +# print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) +# print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) +# print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) +# input() + +# print("query.is_contiguous(): ", query.is_contiguous()) +# print("key.is_contiguous(): ", key.is_contiguous()) +# print("index_0.is_contiguous(): ", index_0.is_contiguous()) +# print("index_1.is_contiguous(): ", index_1.is_contiguous()) + +output_v2 = pointops.dot_prod_with_idx(query, index.int(), table, rel_index.int()) +loss = output_v2.mean() +loss.backward() + +print( + "output_v2.shape: {}, output_v2[:5,:10]: {}".format( + output_v2.shape, output_v2[:5, :10] + ) +) +print("v2: query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) +print("v2: table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) +input() + +# print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max()) + +# print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) diff --git a/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v2.py b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..9bf9975a725bfb30e8c73449668e41e0f53917d5 --- /dev/null +++ b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v2.py @@ -0,0 +1,75 @@ +import torch +import pointops +from torch_scatter import ( + scatter_max, + scatter_mean, + scatter_add, + scatter_min, + scatter_sum, +) + +torch.manual_seed(1) + +M = 80000 +N = 3500 +hdim = 16 +h = 6 +L = 31 +query = torch.rand(N, h, hdim).cuda() +table_q = torch.rand(L, h, hdim, 3).cuda() +key = torch.rand(N, h, hdim).cuda() +table_k = torch.rand(L, h, hdim, 3).cuda() + +index_q = torch.rand(M) +index_q[index_q < 0] = 0 +index_q = (index_q * N).long().cuda() + +index_k = torch.rand(M) +index_k[index_k < 0] = 0 +index_k = (index_k * N).long().cuda() + +rel_index = torch.rand(M, 3) +rel_index[rel_index < 0] = 0 +rel_index = (rel_index * L).long().cuda() + +query.requires_grad = True +table_q.requires_grad = True +key.requires_grad = True +table_k.requires_grad = True + +output1 = pointops.dot_prod_with_idx(query, index_q.int(), table_q, rel_index.int()) +output2 = pointops.dot_prod_with_idx(key, index_k.int(), table_k, rel_index.int()) +output = output1 + output2 +# loss = output.mean() +# loss.backward() + +# print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) +# print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) +# print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) +# print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) +# print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) +# input() + +# print("query.is_contiguous(): ", query.is_contiguous()) +# print("key.is_contiguous(): ", key.is_contiguous()) +# print("index_0.is_contiguous(): ", index_0.is_contiguous()) +# print("index_1.is_contiguous(): ", index_1.is_contiguous()) + +output_v2 = pointops.dot_prod_with_idx_v2( + query, index_q.int(), key, index_k.int(), table_q, table_k, rel_index.int() +) +loss = output_v2.mean() +loss.backward() + +print( + "output_v2.shape: {}, output_v2[:5,:10]: {}".format( + output_v2.shape, output_v2[:5, :10] + ) +) +print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) +print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) +print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) +print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) +# input() + +print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max()) diff --git a/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v3.py b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v3.py new file mode 100644 index 0000000000000000000000000000000000000000..3738ba69b0d9be7aa9e890b0357f8de6cc42708b --- /dev/null +++ b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step1_v3.py @@ -0,0 +1,106 @@ +import torch +import pointops +from torch_scatter import ( + scatter_max, + scatter_mean, + scatter_add, + scatter_min, + scatter_sum, +) + +torch.manual_seed(1) + +M = 80000 +N = 3500 +# M = 80 +# N = 5 +hdim = 16 +h = 6 +L = 31 +query = torch.rand(N, h, hdim).cuda() +table_q = torch.rand(L, h, hdim, 3).cuda() +key = torch.rand(N, h, hdim).cuda() +table_k = torch.rand(L, h, hdim, 3).cuda() + +index_q = torch.rand(M) +index_q[index_q < 0] = 0 +index_q = (index_q * N).long().cuda() + +index_k = torch.rand(M) +index_k[index_k < 0] = 0 +index_k = (index_k * N).long().cuda() + +rel_index = torch.rand(M, 3) +rel_index[rel_index < 0] = 0 +rel_index = (rel_index * L).long().cuda() + + +# rearrange index for acceleration +index_q, indices = torch.sort(index_q) # [M,] +index_k = index_k[indices] # [M,] +rel_index = rel_index[indices] +index_q_counts = index_q.bincount() + +print("index_q_counts.shape: ", index_q_counts.shape) + +n_max = index_q_counts.max() +index_q_offsets = index_q_counts.cumsum(dim=-1) # [N] + +print("v1 index_q_offsets.shape: ", index_q_offsets.shape) + +index_q_offsets = torch.cat( + [torch.zeros(1, dtype=torch.long).cuda(), index_q_offsets], 0 +) # [N+1] + +# print("index_q[:100]: ", index_q[:100]) +print("n_max: ", n_max) +print("index_q_offsets.shape: ", index_q_offsets.shape) +# input() + +print("index_q_offsets[:100]: ", index_q_offsets[:100]) +print("index_k[:20]: ", index_k[:20]) + +query.requires_grad = True +table_q.requires_grad = True +key.requires_grad = True +table_k.requires_grad = True + +output1 = pointops.dot_prod_with_idx(query, index_q.int(), table_q, rel_index.int()) +output2 = pointops.dot_prod_with_idx(key, index_k.int(), table_k, rel_index.int()) +output = output1 + output2 +loss = output.mean() +loss.backward() + +# print("output.shape: {}, output[:5,:10]: {}".format(output.shape, output[:5,:10])) +# print("query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) +# print("table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) +# print("key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) +# print("table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) +# input() + +# print("query.is_contiguous(): ", query.is_contiguous()) +# print("key.is_contiguous(): ", key.is_contiguous()) +# print("index_q.is_contiguous(): ", index_q.is_contiguous()) +# print("index_k.is_contiguous(): ", index_k.is_contiguous()) + +output_v2 = pointops.dot_prod_with_idx_v3( + query, + index_q_offsets.int(), + n_max, + key, + index_k.int(), + table_q, + table_k, + rel_index.int(), +) +# loss = output_v2.mean() +# loss.backward() + +# print("output_v2.shape: {}, output_v2[:5,:10]: {}".format(output_v2.shape, output_v2[:5,:10])) +# print("v2 query.grad[:5, :3, :5]: ", query.grad[:5, :3, :5]) +# print("v2 table_q.grad[:5, :3, :5, :2]: ", table_q.grad[:5, :3, :5, :2]) +# print("v2 key.grad[:5, :3, :5]: ", key.grad[:5, :3, :5]) +# print("v2 table_k.grad[:5, :3, :5, :2]: ", table_k.grad[:5, :3, :5, :2]) +# input() + +print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max()) diff --git a/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step2.py b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step2.py new file mode 100644 index 0000000000000000000000000000000000000000..f1cb9ef37b4350f50f23857b365729b1a85b24f9 --- /dev/null +++ b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step2.py @@ -0,0 +1,83 @@ +import torch +import pointops +from torch_scatter import ( + scatter_max, + scatter_mean, + scatter_add, + scatter_min, + scatter_sum, +) + +torch.manual_seed(1) + +M = 80000 +N = 3500 +hdim = 16 +h = 6 +L = 31 +attn = torch.rand(M, h).cuda() +v = torch.rand(N, h, hdim).cuda() +table = torch.rand(L, h, hdim, 3).cuda() + +index_0 = torch.rand(M) +index_0[index_0 < 0] = 0 +index_0 = (index_0 * N).long().cuda() + +index_1 = torch.rand(M) +index_1[index_1 < 0] = 0 +index_1 = (index_1 * N).long().cuda() + +rel_index = torch.rand(M, 3) +rel_index[rel_index < 0] = 0 +rel_index = (rel_index * L).long().cuda() + +attn.requires_grad = True +v.requires_grad = True +table.requires_grad = True + +v_flat = v[index_1] # [M, h, hdim] +table_x, table_y, table_z = ( + table[:, :, :, 0], + table[:, :, :, 1], + table[:, :, :, 2], +) # [L, h, hdim] +rel_index_x, rel_index_y, rel_index_z = ( + rel_index[:, 0], + rel_index[:, 1], + rel_index[:, 2], +) # [M] +rel_pos_encoding = ( + table_x[rel_index_x] + table_y[rel_index_y] + table_z[rel_index_z] +) # [M, h, hdim] +v_flat_new = v_flat + rel_pos_encoding # [M, h, hdim] +output = attn.unsqueeze(-1) * v_flat_new # [M, h, hdim] +output = scatter_sum(src=output, index=index_0, dim=0, dim_size=N) # [N, h, hdim] +loss = output.mean() +loss.backward() + +print( + "output.shape: {}, output[:5,:10,:5]: {}".format(output.shape, output[:5, :10, :5]) +) +print("attn.grad[:5, :3]: ", attn.grad[:5, :3]) +print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) +print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) +input() + +# print("query.is_contiguous(): ", query.is_contiguous()) +# print("key.is_contiguous(): ", key.is_contiguous()) +# print("index_0.is_contiguous(): ", index_0.is_contiguous()) +# print("index_1.is_contiguous(): ", index_1.is_contiguous()) + +# output_v2 = pointops.attention_step2_with_rel_pos_value(attn, v, index_0.int(), index_1.int(), table, rel_index.int()) +# loss = output_v2.mean() +# loss.backward() + +# print("output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format(output_v2.shape, output_v2[:5,:10,:5])) +# print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3]) +# print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) +# print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) +# input() + +# print("((output-output_v2)**2).max(): ", ((output-output_v2)**2).max()) + +# print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) diff --git a/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step2_v2.py b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step2_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..3090b980cf2ddf9803db40d161a21ce09edc7392 --- /dev/null +++ b/Pointcept/libs/pointops2/functions/test_relative_pos_encoding_op_step2_v2.py @@ -0,0 +1,109 @@ +import torch +import pointops +from torch_scatter import ( + scatter_max, + scatter_mean, + scatter_add, + scatter_min, + scatter_sum, +) + +torch.manual_seed(1) + +M = 80000 +N = 3500 +hdim = 16 +h = 6 +L = 31 +attn = torch.rand(M, h).cuda() +v = torch.rand(N, h, hdim).cuda() +table = torch.rand(L, h, hdim, 3).cuda() + +index_0 = torch.rand(M) +index_0[index_0 < 0] = 0 +index_0 = (index_0 * N).long().cuda() + +index_1 = torch.rand(M) +index_1[index_1 < 0] = 0 +index_1 = (index_1 * N).long().cuda() + +rel_index = torch.rand(M, 3) +rel_index[rel_index < 0] = 0 +rel_index = (rel_index * L).long().cuda() + + +# rearrange index for acceleration +index_0, indices = torch.sort(index_0) # [M,] +index_1 = index_1[indices] # [M,] +rel_index = rel_index[indices] +index_0_counts = index_0.bincount() + +print("index_0_counts.shape: ", index_0_counts.shape) + +n_max = index_0_counts.max() +index_0_offsets = index_0_counts.cumsum(dim=-1) # [N] + +print("v1 index_0_offsets.shape: ", index_0_offsets.shape) + +index_0_offsets = torch.cat( + [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0 +) # [N+1] + + +attn.requires_grad = True +v.requires_grad = True +table.requires_grad = True + + +output = pointops.attention_step2_with_rel_pos_value( + attn, v, index_0.int(), index_1.int(), table, rel_index.int() +) +loss = output.mean() +loss.backward() + +print( + "output.shape: {}, output[:5,:10,:5]: {}".format(output.shape, output[:5, :10, :5]) +) +print("attn.grad[:5, :3]: ", attn.grad[:5, :3]) +print("v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) +print("table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) +# input() + +attn_grad = attn.grad.clone() +v_grad = v.grad.clone() +table_grad = table.grad.clone() + +attn.grad.zero_() +v.grad.zero_() +table.grad.zero_() + +# print("query.is_contiguous(): ", query.is_contiguous()) +# print("key.is_contiguous(): ", key.is_contiguous()) +# print("index_0.is_contiguous(): ", index_0.is_contiguous()) +# print("index_1.is_contiguous(): ", index_1.is_contiguous()) + +output_v2 = pointops.attention_step2_with_rel_pos_value_v2( + attn, v, index_0_offsets.int(), n_max, index_1.int(), table, rel_index.int() +) +loss = output_v2.mean() +loss.backward() + +print( + "output_v2.shape: {}, output_v2[:5,:10,:5]: {}".format( + output_v2.shape, output_v2[:5, :10, :5] + ) +) +print("v2 attn.grad[:5, :3]: ", attn.grad[:5, :3]) +print("v2 v.grad[:5, :3, :5]: ", v.grad[:5, :3, :5]) +print("v2 table.grad[:5, :3, :5, :2]: ", table.grad[:5, :3, :5, :2]) +# input() + +print("((output-output_v2)**2).max(): ", ((output - output_v2) ** 2).max()) + +print("((attn_grad-attn.grad)**2).max(): ", ((attn_grad - attn.grad) ** 2).max()) + +print("((v_grad-v.grad)**2).max(): ", ((v_grad - v.grad) ** 2).max()) + +print("((table_grad-table.grad)**2).max(): ", ((table_grad - table.grad) ** 2).max()) + +# print("torch.max((attn_flat-attn_flat_v2)**2): ", torch.max((attn_flat-attn_flat_v2)**2)) diff --git a/Pointcept/libs/pointops2/setup.py b/Pointcept/libs/pointops2/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..b33cb3b83c39302500efb464667d346d5699f0aa --- /dev/null +++ b/Pointcept/libs/pointops2/setup.py @@ -0,0 +1,33 @@ +import os +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension +from distutils.sysconfig import get_config_vars + +(opt,) = get_config_vars("OPT") +os.environ["OPT"] = " ".join( + flag for flag in opt.split() if flag != "-Wstrict-prototypes" +) + +src = "src" +sources = [ + os.path.join(root, file) + for root, dirs, files in os.walk(src) + for file in files + if file.endswith(".cpp") or file.endswith(".cu") +] + +setup( + name="pointops2", + version="1.0", + install_requires=["torch", "numpy"], + packages=["pointops2"], + package_dir={"pointops2": "functions"}, + ext_modules=[ + CUDAExtension( + name="pointops2_cuda", + sources=sources, + extra_compile_args={"cxx": ["-g"], "nvcc": ["-O2"]}, + ) + ], + cmdclass={"build_ext": BuildExtension}, +) diff --git a/Pointcept/libs/pointops2/src/__init__.py b/Pointcept/libs/pointops2/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda.cpp b/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..491b6f41660edf9b5ea5656cc88edba8ed807d71 --- /dev/null +++ b/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda.cpp @@ -0,0 +1,28 @@ +#include +#include +#include +#include "aggregation_cuda_kernel.h" + + +void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) +{ + const float *input = input_tensor.data_ptr(); + const float *position = position_tensor.data_ptr(); + const float *weight = weight_tensor.data_ptr(); + const int *idx = idx_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + aggregation_forward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, output); +} + +void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor) +{ + const float *input = input_tensor.data_ptr(); + const float *position = position_tensor.data_ptr(); + const float *weight = weight_tensor.data_ptr(); + const int *idx = idx_tensor.data_ptr(); + const float *grad_output = grad_output_tensor.data_ptr(); + float *grad_input = grad_input_tensor.data_ptr(); + float *grad_position = grad_position_tensor.data_ptr(); + float *grad_weight = grad_weight_tensor.data_ptr(); + aggregation_backward_cuda_launcher(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); +} diff --git a/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda_kernel.cu b/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..8339bb7e2088abffefba02c26b248edafed6cf47 --- /dev/null +++ b/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda_kernel.cu @@ -0,0 +1,53 @@ +#include "../cuda_utils.h" +#include "aggregation_cuda_kernel.h" + + +__global__ void aggregation_forward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { + // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= n * c) return; + const int c_idx = index % c; + const int n_idx = index / c; + const int w_c_idx = c_idx % w_c; + for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) + { + int idx_idx = n_idx * nsample + nsample_idx; + int input_idx = idx[idx_idx] * c + c_idx; + int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; + int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; + output[index] += (input[input_idx] + position[position_idx]) * weight[weight_idx]; + } +} + +__global__ void aggregation_backward_cuda_kernel(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { + // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= n * c) return; + const int c_idx = index % c; + const int n_idx = index / c; + const int w_c_idx = c_idx % w_c; + for (int nsample_idx = 0; nsample_idx < nsample; nsample_idx++) + { + int idx_idx = n_idx * nsample + nsample_idx; + int input_idx = idx[idx_idx] * c + c_idx; + int position_idx = n_idx * nsample * c + nsample_idx * c + c_idx; + int weight_idx = n_idx * nsample * w_c + nsample_idx * w_c + w_c_idx; + atomicAdd(grad_input + input_idx, grad_output[index] * weight[weight_idx]); + grad_position[position_idx] = grad_output[index] * weight[weight_idx]; + atomicAdd(grad_weight + weight_idx, grad_output[index] * (input[input_idx] + position[position_idx])); + } +} + +void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output) { + // input: input: (n, c), position: (n, nsample, c), weight: (n, nsample, w_c), idx: (n, nsample), output: (n, c) + dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + aggregation_forward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, output); +} + +void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight) { + // input: grad_output: (n, c), output: grad_input: (n, c), grad_position: (n, nsample, c), grad_weight: (n, nsample, w_c) + dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + aggregation_backward_cuda_kernel<<>>(n, nsample, c, w_c, input, position, weight, idx, grad_output, grad_input, grad_position, grad_weight); +} diff --git a/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda_kernel.h b/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..5211a96aa2acbe0d9baf32bddc9ab4be87703072 --- /dev/null +++ b/Pointcept/libs/pointops2/src/aggregation/aggregation_cuda_kernel.h @@ -0,0 +1,20 @@ +#ifndef _AGGREGATION_CUDA_KERNEL +#define _AGGREGATION_CUDA_KERNEL +#include +#include +#include + +void aggregation_forward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); +void aggregation_backward_cuda(int n, int nsample, int c, int w_c, at::Tensor input_tensor, at::Tensor position_tensor, at::Tensor weight_tensor, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input_tensor, at::Tensor grad_position_tensor, at::Tensor grad_weight_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void aggregation_forward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, float *output); +void aggregation_backward_cuda_launcher(int n, int nsample, int c, int w_c, const float *input, const float *position, const float *weight, const int *idx, const float *grad_output, float *grad_input, float *grad_position, float *grad_weight); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops2/src/attention/attention_cuda.cpp b/Pointcept/libs/pointops2/src/attention/attention_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..27493d19ebfd11b083b8f31455ac12c4416208a9 --- /dev/null +++ b/Pointcept/libs/pointops2/src/attention/attention_cuda.cpp @@ -0,0 +1,55 @@ +#include +#include +#include +#include "attention_cuda_kernel.h" + +void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, + at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor) +{ + const float *q = q_tensor.data_ptr(); + const float *k = k_tensor.data_ptr(); + const int *index0 = index0_tensor.data_ptr(); + const int *index1 = index1_tensor.data_ptr(); + float *attn = attn_tensor.data_ptr(); + attention_step1_forward_cuda_launcher(N, M, h, C, q, k, index0, index1, attn); +} + +void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, + at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, + at::Tensor grad_q_tensor, at::Tensor grad_k_tensor) +{ + const float *grad_out = grad_out_tensor.data_ptr(); + const int *index0 = index0_tensor.data_ptr(); + const int *index1 = index1_tensor.data_ptr(); + const float *q = q_tensor.data_ptr(); + const float *k = k_tensor.data_ptr(); + float *grad_q = grad_q_tensor.data_ptr(); + float *grad_k = grad_k_tensor.data_ptr(); + attention_step1_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, q, k, grad_q, grad_k); +} + +void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, + at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor) +{ + const float *attn = attn_tensor.data_ptr(); + const float *v = v_tensor.data_ptr(); + const int *index0 = index0_tensor.data_ptr(); + const int *index1 = index1_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + attention_step2_forward_cuda_launcher(N, M, h, C, attn, v, index0, index1, output); +} + + +void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, + at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, + at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor) +{ + const float *grad_out = grad_out_tensor.data_ptr(); + const int *index0 = index0_tensor.data_ptr(); + const int *index1 = index1_tensor.data_ptr(); + const float *attn = attn_tensor.data_ptr(); + const float *v = v_tensor.data_ptr(); + float *grad_attn = grad_attn_tensor.data_ptr(); + float *grad_v = grad_v_tensor.data_ptr(); + attention_step2_backward_cuda_launcher(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); +} diff --git a/Pointcept/libs/pointops2/src/attention/attention_cuda_kernel.cu b/Pointcept/libs/pointops2/src/attention/attention_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..6bf8d718bf4811fd441b4df869d9498b35316976 --- /dev/null +++ b/Pointcept/libs/pointops2/src/attention/attention_cuda_kernel.cu @@ -0,0 +1,105 @@ +/* written by Xin Lai. Email: xinlai@cse.cuhk.edu.hk */ + +#include "../cuda_utils.h" +#include "attention_cuda_kernel.h" + + +__global__ void attention_step1_forward_cuda_kernel( // M, h, C//h + int N, int M, int h, int C, const float *q, const float *k, + const int *index0, const int *index1, float *attn) { + + int c_idx = blockIdx.z; + int h_idx = blockIdx.y; + int m_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (m_idx >= M || h_idx >= h || c_idx >= C / h) return; + + int idx0 = index0[m_idx]; + int idx1 = index1[m_idx]; + float val = q[idx0*C+h_idx*C/h+c_idx] * k[idx1*C+h_idx*C/h+c_idx]; + atomicAdd(attn+m_idx*h+h_idx, val); +} + +__global__ void attention_step1_backward_cuda_kernel( // M, h, C//h + int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *q, const float *k, + float *grad_q, float *grad_k) { + + int c_idx = blockIdx.z; + int h_idx = blockIdx.y; + int m_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (m_idx >= M || h_idx >= h || c_idx >= C / h) return; + + int idx0 = index0[m_idx]; + int idx1 = index1[m_idx]; + int grad_out_idx = m_idx*h+h_idx; + int q_idx = idx0*C+h_idx*C/h+c_idx; + int k_idx = idx1*C+h_idx*C/h+c_idx; + atomicAdd(grad_q+q_idx, grad_out[grad_out_idx] * k[k_idx]); + atomicAdd(grad_k+k_idx, grad_out[grad_out_idx] * q[q_idx]); +} + +void attention_step1_forward_cuda_launcher(int N, int M, int h, int C, const float *q, const float *k, + const int *index0, const int *index1, float *attn) { + // input: attn: (M, h), v: (N, h, C/h), index0: (M, ), index1: (M, ) + //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M); + dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h); + dim3 threads(THREADS_PER_BLOCK); + attention_step1_forward_cuda_kernel<<>>(N, M, h, C, q, k, index0, index1, attn); +} + +void attention_step1_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, + const float *q, const float *k, float *grad_q, float *grad_k) { + // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) + //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M); + dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h); + dim3 threads(THREADS_PER_BLOCK); + attention_step1_backward_cuda_kernel<<>>(N, M, h, C, grad_out, index0, index1, q, k, grad_q, grad_k); +} + +__global__ void attention_step2_forward_cuda_kernel( // M, h, C//h + int N, int M, int h, int C, const float *attn, const float *v, + const int *index0, const int *index1, float *output) { + + int c_idx = blockIdx.z; + int h_idx = blockIdx.y; + int m_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (m_idx >= M || h_idx >= h || c_idx >= C / h) return; + + int idx1 = index1[m_idx]; + float val = attn[m_idx*h+h_idx] * v[idx1*C+h_idx*C/h+c_idx]; + int idx0 = index0[m_idx]; + atomicAdd(output+idx0*C+h_idx*C/h+c_idx, val); +} + +__global__ void attention_step2_backward_cuda_kernel( // M, h, C//h + int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, + float *grad_attn, float *grad_v) { + + int c_idx = blockIdx.z; + int h_idx = blockIdx.y; + int m_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (m_idx >= M || h_idx >= h || c_idx >= C / h) return; + + int idx0 = index0[m_idx]; + int idx1 = index1[m_idx]; + int grad_out_idx = idx0*C+h_idx*C/h+c_idx; + atomicAdd(grad_attn+m_idx*h+h_idx, grad_out[grad_out_idx] * v[idx1*C+h_idx*C/h+c_idx]); + atomicAdd(grad_v+idx1*C+h_idx*C/h+c_idx, grad_out[grad_out_idx] * attn[m_idx*h+h_idx]); +} + +void attention_step2_forward_cuda_launcher(int N, int M, int h, int C, const float *attn, const float *v, + const int *index0, const int *index1, float *output) { + // input: attn: (M, h), v: (N, h, C/h), index0: (M, ), index1: (M, ) + //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M); + dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h); + dim3 threads(THREADS_PER_BLOCK); + attention_step2_forward_cuda_kernel<<>>(N, M, h, C, attn, v, index0, index1, output); +} + +void attention_step2_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, + const float *attn, const float *v, float *grad_attn, float *grad_v) { + // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) + //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M); + dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h); + dim3 threads(THREADS_PER_BLOCK); + attention_step2_backward_cuda_kernel<<>>(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); +} diff --git a/Pointcept/libs/pointops2/src/attention/attention_cuda_kernel.h b/Pointcept/libs/pointops2/src/attention/attention_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..cbd99b9b6a9c65af76aa95d00fff6306446114cd --- /dev/null +++ b/Pointcept/libs/pointops2/src/attention/attention_cuda_kernel.h @@ -0,0 +1,26 @@ +#ifndef _ATTENTION_CUDA_KERNEL +#define _ATTENTION_CUDA_KERNEL +#include +#include +#include + +void attention_step1_forward_cuda(int N, int M, int h, int C, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor); +void attention_step1_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor); + +void attention_step2_forward_cuda(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor); +void attention_step2_backward_cuda(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void attention_step1_forward_cuda_launcher(int N, int M, int h, int C, const float *q, const float *k, const int *index0, const int *index1, float *attn); +void attention_step1_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k); + +void attention_step2_forward_cuda_launcher(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output); +void attention_step2_backward_cuda_launcher(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops2/src/attention_v2/attention_cuda_kernel_v2.cu b/Pointcept/libs/pointops2/src/attention_v2/attention_cuda_kernel_v2.cu new file mode 100644 index 0000000000000000000000000000000000000000..52e65fc1f8b47d80212fe28fa0b811fd4442a4ce --- /dev/null +++ b/Pointcept/libs/pointops2/src/attention_v2/attention_cuda_kernel_v2.cu @@ -0,0 +1,195 @@ +/* written by Xin Lai. Email: xinlai@cse.cuhk.edu.hk */ + +#include "../cuda_utils.h" +#include "attention_cuda_kernel_v2.h" + + +template +__global__ void attention_step1_forward_cuda_kernel_v2( // M, h, C//h + int N, int M, int h, const float *q, const float *k, + const int *index0_offsets, const int *index1, float *attn) { + + int h_idx = blockIdx.y; + int q_idx = blockIdx.x; + int n_idx = threadIdx.x; + int C = h * d; + // if (m_idx >= M || h_idx >= h || c_idx >= C / h) return; + + __shared__ float query_vec[d]; + __shared__ int start, end; + + // if(n_idx == 0){ + // printf("blockDim.x: %d\n", blockDim.x); + // } + + if (n_idx == 0){ + start = index0_offsets[q_idx]; + end = index0_offsets[q_idx+1]; + // printf("start: %d, end: %d, blockDim.x: %d\n", start, end, blockDim.x); + } + for(int i = n_idx; i < d; i += blockDim.x) + query_vec[i] = q[q_idx*C + h_idx*d + i]; + + __syncthreads(); + + int m_idx = start + n_idx; + if(m_idx >= end) + return; + + float sum = 0; + for(int i = 0; i < d; i++){ + int k_idx = index1[m_idx]; + float key = k[k_idx * C + h_idx * d + i]; + sum += query_vec[i] * key; + } + attn[m_idx*h + h_idx] = sum; + // int idx0 = index0[m_idx]; + // int idx1 = index1[m_idx]; + // float val = q[idx0*C+h_idx*C/h+c_idx] * k[idx1*C+h_idx*C/h+c_idx]; + // atomicAdd(attn+m_idx*h+h_idx, val); +} + +template +__global__ void attention_step1_backward_cuda_kernel_v2( // M, h, C//h + int N, int M, int h, const float *grad_out, const int *index0_offsets, const int *index1, const float *q, const float *k, + float *grad_q, float *grad_k) { + + int h_idx = blockIdx.y; + int q_idx = blockIdx.x; + int n_idx = threadIdx.x; + int C = d * h; + + __shared__ float query_vec[d]; + __shared__ int start, end; + + if (n_idx == 0){ + start = index0_offsets[q_idx]; + end = index0_offsets[q_idx+1]; + } + for(int i = n_idx; i < d; i += blockDim.x) + query_vec[i] = q[q_idx*C + h_idx*d + i]; + + __shared__ float gradient_new[d]; + for(int i = n_idx; i < d; i += blockDim.x) + gradient_new[i] = 0; + + __syncthreads(); + + int m_idx = start + n_idx; + if(m_idx < end){ + float gradient = grad_out[m_idx*h + h_idx]; + for(int i = 0; i < d; i++){ + int k_idx = index1[m_idx]; + atomicAdd(&gradient_new[i], gradient * k[k_idx*C + h_idx*d + i]); + atomicAdd(grad_k + k_idx*C + h_idx*d + i, gradient * query_vec[i]); + } + } + __syncthreads(); + + for(int i = n_idx; i < d; i += blockDim.x) + grad_q[q_idx*C + h_idx*d + i] = gradient_new[i]; +} + +void attention_step1_forward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, + const float *q, const float *k, const int *index0_offsets, const int *index1, float *attn) { + // input: attn: (M, h), v: (N, h, C/h), index0: (M, ), index1: (M, ) + //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M); + dim3 blocks(N, h); + unsigned int n_threads = opt_n_threads(n_max); + + n_threads = n_threads == n_max ? n_threads : n_threads * 2; + // n_threads = n_threads > 1024 ? 512 : n_threads; + + // printf("n_max: %d, n_threads: %d\n", n_max, n_threads); + + // dim3 threads(THREADS_PER_BLOCK); + // attention_step1_forward_cuda_kernel_v2<<>>(N, M, h, C, q, k, index0, index1, attn); + + switch (C / h) { + case 16: + attention_step1_forward_cuda_kernel_v2<16><<>>(N, M, h, q, k, index0_offsets, index1, attn); + break; + case 32: + attention_step1_forward_cuda_kernel_v2<32><<>>(N, M, h, q, k, index0_offsets, index1, attn); + break; + default: + throw "d != 16 and d != 32"; + } +} + +void attention_step1_backward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, + const float *grad_out, const int *index0_offsets, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k) { + // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) + //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M); + // dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h); + // dim3 threads(THREADS_PER_BLOCK); + dim3 blocks(N, h); + unsigned int n_threads = opt_n_threads(n_max); + // attention_step1_backward_cuda_kernel_v2<<>>(N, M, h, C/h, grad_out, index0_offsets, index1, q, k, grad_q, grad_k); + + n_threads = n_threads == n_max ? n_threads : n_threads * 2; + // n_threads = n_threads > 1024 ? 512 : n_threads; + + // printf("n_max: %d, n_threads: %d\n", n_max, n_threads); + + switch (C / h) { + case 16: + attention_step1_backward_cuda_kernel_v2<16><<>>(N, M, h, grad_out, index0_offsets, index1, q, k, grad_q, grad_k); + break; + case 32: + attention_step1_backward_cuda_kernel_v2<32><<>>(N, M, h, grad_out, index0_offsets, index1, q, k, grad_q, grad_k); + break; + default: + throw "d != 16 and d != 32"; + } + +} + +__global__ void attention_step2_forward_cuda_kernel_v2( // M, h, C//h + int N, int M, int h, int C, const float *attn, const float *v, + const int *index0, const int *index1, float *output) { + + int c_idx = blockIdx.z; + int h_idx = blockIdx.y; + int m_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (m_idx >= M || h_idx >= h || c_idx >= C / h) return; + + int idx1 = index1[m_idx]; + float val = attn[m_idx*h+h_idx] * v[idx1*C+h_idx*C/h+c_idx]; + int idx0 = index0[m_idx]; + atomicAdd(output+idx0*C+h_idx*C/h+c_idx, val); +} + +__global__ void attention_step2_backward_cuda_kernel_v2( // M, h, C//h + int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, + float *grad_attn, float *grad_v) { + + int c_idx = blockIdx.z; + int h_idx = blockIdx.y; + int m_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (m_idx >= M || h_idx >= h || c_idx >= C / h) return; + + int idx0 = index0[m_idx]; + int idx1 = index1[m_idx]; + int grad_out_idx = idx0*C+h_idx*C/h+c_idx; + atomicAdd(grad_attn+m_idx*h+h_idx, grad_out[grad_out_idx] * v[idx1*C+h_idx*C/h+c_idx]); + atomicAdd(grad_v+idx1*C+h_idx*C/h+c_idx, grad_out[grad_out_idx] * attn[m_idx*h+h_idx]); +} + +void attention_step2_forward_cuda_launcher_v2(int N, int M, int h, int C, const float *attn, const float *v, + const int *index0, const int *index1, float *output) { + // input: attn: (M, h), v: (N, h, C/h), index0: (M, ), index1: (M, ) + //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M); + dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h); + dim3 threads(THREADS_PER_BLOCK); + attention_step2_forward_cuda_kernel_v2<<>>(N, M, h, C, attn, v, index0, index1, output); +} + +void attention_step2_backward_cuda_launcher_v2(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, + const float *attn, const float *v, float *grad_attn, float *grad_v) { + // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) + //dim3 blocks(DIVUP(C/h, THREADS_PER_BLOCK), h, M); + dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), h, C/h); + dim3 threads(THREADS_PER_BLOCK); + attention_step2_backward_cuda_kernel_v2<<>>(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); +} diff --git a/Pointcept/libs/pointops2/src/attention_v2/attention_cuda_kernel_v2.h b/Pointcept/libs/pointops2/src/attention_v2/attention_cuda_kernel_v2.h new file mode 100644 index 0000000000000000000000000000000000000000..d7e7f047bc318928ddb9402acbcdf20204596450 --- /dev/null +++ b/Pointcept/libs/pointops2/src/attention_v2/attention_cuda_kernel_v2.h @@ -0,0 +1,26 @@ +#ifndef _ATTENTION_V2_CUDA_KERNEL +#define _ATTENTION_V2_CUDA_KERNEL +#include +#include +#include + +void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor); +void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor); + +void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor); +void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void attention_step1_forward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *q, const float *k, const int *index0_offsets, const int *index1, float *attn); +void attention_step1_backward_cuda_launcher_v2(int N, int M, int h, int C, const unsigned int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *q, const float *k, float *grad_q, float *grad_k); + +void attention_step2_forward_cuda_launcher_v2(int N, int M, int h, int C, const float *attn, const float *v, const int *index0, const int *index1, float *output); +void attention_step2_backward_cuda_launcher_v2(int N, int M, int h, int C, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, float *grad_attn, float *grad_v); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops2/src/attention_v2/attention_cuda_v2.cpp b/Pointcept/libs/pointops2/src/attention_v2/attention_cuda_v2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..03329e5e6c4bb5ffd4320a94fc25a481785668ed --- /dev/null +++ b/Pointcept/libs/pointops2/src/attention_v2/attention_cuda_v2.cpp @@ -0,0 +1,55 @@ +#include +#include +#include +#include "attention_cuda_kernel_v2.h" + +void attention_step1_forward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor q_tensor, at::Tensor k_tensor, + at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor attn_tensor) +{ + const float *q = q_tensor.data_ptr(); + const float *k = k_tensor.data_ptr(); + const int *index0_offsets = index0_tensor_offsets.data_ptr(); + const int *index1 = index1_tensor.data_ptr(); + float *attn = attn_tensor.data_ptr(); + attention_step1_forward_cuda_launcher_v2(N, M, h, C, n_max, q, k, index0_offsets, index1, attn); +} + +void attention_step1_backward_cuda_v2(int N, int M, int h, int C, const unsigned int n_max, at::Tensor grad_out_tensor, + at::Tensor index0_tensor_offsets, at::Tensor index1_tensor, at::Tensor q_tensor, at::Tensor k_tensor, + at::Tensor grad_q_tensor, at::Tensor grad_k_tensor) +{ + const float *grad_out = grad_out_tensor.data_ptr(); + const int *index0_offsets = index0_tensor_offsets.data_ptr(); + const int *index1 = index1_tensor.data_ptr(); + const float *q = q_tensor.data_ptr(); + const float *k = k_tensor.data_ptr(); + float *grad_q = grad_q_tensor.data_ptr(); + float *grad_k = grad_k_tensor.data_ptr(); + attention_step1_backward_cuda_launcher_v2(N, M, h, C, n_max, grad_out, index0_offsets, index1, q, k, grad_q, grad_k); +} + +void attention_step2_forward_cuda_v2(int N, int M, int h, int C, at::Tensor attn_tensor, at::Tensor v_tensor, + at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor output_tensor) +{ + const float *attn = attn_tensor.data_ptr(); + const float *v = v_tensor.data_ptr(); + const int *index0 = index0_tensor.data_ptr(); + const int *index1 = index1_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + attention_step2_forward_cuda_launcher_v2(N, M, h, C, attn, v, index0, index1, output); +} + + +void attention_step2_backward_cuda_v2(int N, int M, int h, int C, at::Tensor grad_out_tensor, + at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, + at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor) +{ + const float *grad_out = grad_out_tensor.data_ptr(); + const int *index0 = index0_tensor.data_ptr(); + const int *index1 = index1_tensor.data_ptr(); + const float *attn = attn_tensor.data_ptr(); + const float *v = v_tensor.data_ptr(); + float *grad_attn = grad_attn_tensor.data_ptr(); + float *grad_v = grad_v_tensor.data_ptr(); + attention_step2_backward_cuda_launcher_v2(N, M, h, C, grad_out, index0, index1, attn, v, grad_attn, grad_v); +} diff --git a/Pointcept/libs/pointops2/src/cuda_utils.h b/Pointcept/libs/pointops2/src/cuda_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..e67749c4f5f8964ffb5916c13f5260cf8df45f52 --- /dev/null +++ b/Pointcept/libs/pointops2/src/cuda_utils.h @@ -0,0 +1,23 @@ +#ifndef _CUDA_UTILS_H +#define _CUDA_UTILS_H + +#include +#include + +#define TOTAL_THREADS 1024 +#define THREADS_PER_BLOCK 256 +#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) + +inline int opt_n_threads(int work_size) { + const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); + return std::max(std::min(1 << pow_2, TOTAL_THREADS), 1); +} + +inline dim3 opt_block_config(int x, int y) { + const int x_threads = opt_n_threads(x); + const int y_threads = std::max(std::min(opt_n_threads(y), TOTAL_THREADS / x_threads), 1); + dim3 block_config(x_threads, y_threads, 1); + return block_config; +} + +#endif diff --git a/Pointcept/libs/pointops2/src/grouping/grouping_cuda.cpp b/Pointcept/libs/pointops2/src/grouping/grouping_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6f7990adaf43f0a77050eed0d55adad19f256e10 --- /dev/null +++ b/Pointcept/libs/pointops2/src/grouping/grouping_cuda.cpp @@ -0,0 +1,21 @@ +#include +#include +#include +#include "grouping_cuda_kernel.h" + + +void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) +{ + const float *input = input_tensor.data_ptr(); + const int *idx = idx_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + grouping_forward_cuda_launcher(m, nsample, c, input, idx, output); +} + +void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor) +{ + const float *grad_output = grad_output_tensor.data_ptr(); + const int *idx = idx_tensor.data_ptr(); + float *grad_input = grad_input_tensor.data_ptr(); + grouping_backward_cuda_launcher(m, nsample, c, grad_output, idx, grad_input); +} diff --git a/Pointcept/libs/pointops2/src/grouping/grouping_cuda_kernel.cu b/Pointcept/libs/pointops2/src/grouping/grouping_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..58ec0a21a2949f9f82504ccd24597c544c50af40 --- /dev/null +++ b/Pointcept/libs/pointops2/src/grouping/grouping_cuda_kernel.cu @@ -0,0 +1,40 @@ +#include "../cuda_utils.h" +#include "grouping_cuda_kernel.h" + + +__global__ void grouping_forward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ input, const int *__restrict__ idx, float *__restrict__ output) { + // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= m * nsample * c) return; + const int c_idx = index % c; + const int nsample_idx = (index / c) % nsample; + const int m_idx = index / nsample / c; + const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; + output[index] = input[input_idx]; +} + +__global__ void grouping_backward_cuda_kernel(int m, int nsample, int c, const float *__restrict__ grad_output, const int *__restrict__ idx, float *__restrict__ grad_input) { + // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= m * nsample * c) return; + const int c_idx = index % c; + const int nsample_idx = (index / c) % nsample; + const int m_idx = index / nsample / c; + const int input_idx = idx[m_idx * nsample + nsample_idx] * c + c_idx; + atomicAdd(grad_input + input_idx, grad_output[index]); +} + +void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output) { + // input: input: (n, c), idx: (m, nsample), output: (m, nsample, c) + dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + grouping_forward_cuda_kernel<<>>(m, nsample, c, input, idx, output); +} + +void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input) +{ + // input: grad_output: (m, nsample, c), idx: (m, nsample), output: grad_input: (n, c) + dim3 blocks(DIVUP(m * nsample * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + grouping_backward_cuda_kernel<<>>(m, nsample, c, grad_output, idx, grad_input); +} diff --git a/Pointcept/libs/pointops2/src/grouping/grouping_cuda_kernel.h b/Pointcept/libs/pointops2/src/grouping/grouping_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..3db4aaa9fad5811d559d47c500e4b00f0165d9b4 --- /dev/null +++ b/Pointcept/libs/pointops2/src/grouping/grouping_cuda_kernel.h @@ -0,0 +1,20 @@ +#ifndef _GROUPING_CUDA_KERNEL +#define _GROUPING_CUDA_KERNEL +#include +#include +#include + +void grouping_forward_cuda(int m, int nsample, int c, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); +void grouping_backward_cuda(int m, int nsample, int c, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor grad_input_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void grouping_forward_cuda_launcher(int m, int nsample, int c, const float *input, const int *idx, float *output); +void grouping_backward_cuda_launcher(int m, int nsample, int c, const float *grad_output, const int *idx, float *grad_input); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops2/src/interpolation/interpolation_cuda.cpp b/Pointcept/libs/pointops2/src/interpolation/interpolation_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f2c1b0078f4b70626705d7b3f5d1d65d37ee6de7 --- /dev/null +++ b/Pointcept/libs/pointops2/src/interpolation/interpolation_cuda.cpp @@ -0,0 +1,23 @@ +#include +#include +#include +#include "interpolation_cuda_kernel.h" + + +void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor) +{ + const float *input = input_tensor.data_ptr(); + const int *idx = idx_tensor.data_ptr(); + const float *weight = weight_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + interpolation_forward_cuda_launcher(n, c, k, input, idx, weight, output); +} + +void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor) +{ + const float *grad_output = grad_output_tensor.data_ptr(); + const int *idx = idx_tensor.data_ptr(); + const float *weight = weight_tensor.data_ptr(); + float *grad_input = grad_input_tensor.data_ptr(); + interpolation_backward_cuda_launcher(n, c, k, grad_output, idx, weight, grad_input); +} diff --git a/Pointcept/libs/pointops2/src/interpolation/interpolation_cuda_kernel.cu b/Pointcept/libs/pointops2/src/interpolation/interpolation_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..f560d8c92c6eac865b8c1e1dc27140fe3fcc2250 --- /dev/null +++ b/Pointcept/libs/pointops2/src/interpolation/interpolation_cuda_kernel.cu @@ -0,0 +1,47 @@ +#include "../cuda_utils.h" +#include "interpolation_cuda_kernel.h" + + +__global__ void interpolation_forward_cuda_kernel(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) +{ + // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= n * c) return; + int c_idx = index % c; + int n_idx = index / c; + for (int i = 0; i < k; i++) + { + int idx_idx = n_idx * k + i; + int input_idx = idx[idx_idx] * c + c_idx; + output[index] += input[input_idx] * weight[idx_idx]; + } +} + +__global__ void interpolation_backward_cuda_kernel(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) +{ + // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= n * c) return; + int c_idx = index % c; + int n_idx = index / c; + for (int i = 0; i < k; i++) + { + int idx_idx = n_idx * k + i; + int input_idx = idx[idx_idx] * c + c_idx; + atomicAdd(grad_input + input_idx, grad_output[index] * weight[idx_idx]); + } +} + +void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output) { + // input: input: (m, c), idx: (n, k), weight: (n, k), output: output (n, c) + dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + interpolation_forward_cuda_kernel<<>>(n, c, k, input, idx, weight, output); +} + +void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input) { + // input: grad_output: (n, c), idx: (n, k), weight: (n, k), output: grad_input (m, c) + dim3 blocks(DIVUP(n * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + interpolation_backward_cuda_kernel<<>>(n, c, k, grad_output, idx, weight, grad_input); +} diff --git a/Pointcept/libs/pointops2/src/interpolation/interpolation_cuda_kernel.h b/Pointcept/libs/pointops2/src/interpolation/interpolation_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..309e5dd0a34ccb58807bbf32389ba65e7ee6961b --- /dev/null +++ b/Pointcept/libs/pointops2/src/interpolation/interpolation_cuda_kernel.h @@ -0,0 +1,20 @@ +#ifndef _INTERPOLATION_CUDA_KERNEL +#define _INTERPOLATION_CUDA_KERNEL +#include +#include +#include + +void interpolation_forward_cuda(int n, int c, int k, at::Tensor input_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor output_tensor); +void interpolation_backward_cuda(int n, int c, int k, at::Tensor grad_output_tensor, at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_input_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void interpolation_forward_cuda_launcher(int n, int c, int k, const float *input, const int *idx, const float *weight, float *output); +void interpolation_backward_cuda_launcher(int n, int c, int k, const float *grad_output, const int *idx, const float *weight, float *grad_input); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops2/src/knnquery/knnquery_cuda.cpp b/Pointcept/libs/pointops2/src/knnquery/knnquery_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a90fe9fc44f8d4963b8e0c3246ad09a8c2e01222 --- /dev/null +++ b/Pointcept/libs/pointops2/src/knnquery/knnquery_cuda.cpp @@ -0,0 +1,16 @@ +#include +#include +#include +#include "knnquery_cuda_kernel.h" + + +void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor) +{ + const float *xyz = xyz_tensor.data_ptr(); + const float *new_xyz = new_xyz_tensor.data_ptr(); + const int *offset = offset_tensor.data_ptr(); + const int *new_offset = new_offset_tensor.data_ptr(); + int *idx = idx_tensor.data_ptr(); + float *dist2 = dist2_tensor.data_ptr(); + knnquery_cuda_launcher(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); +} diff --git a/Pointcept/libs/pointops2/src/knnquery/knnquery_cuda_kernel.cu b/Pointcept/libs/pointops2/src/knnquery/knnquery_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..83762bc0110e38c7b5fa8adf0ef4ce255bc9d0b9 --- /dev/null +++ b/Pointcept/libs/pointops2/src/knnquery/knnquery_cuda_kernel.cu @@ -0,0 +1,116 @@ +#include "../cuda_utils.h" +#include "knnquery_cuda_kernel.h" + + +__device__ void swap_float(float *x, float *y) +{ + float tmp = *x; + *x = *y; + *y = tmp; +} + + +__device__ void swap_int(int *x, int *y) +{ + int tmp = *x; + *x = *y; + *y = tmp; +} + + +__device__ void reheap(float *dist, int *idx, int k) +{ + int root = 0; + int child = root * 2 + 1; + while (child < k) + { + if(child + 1 < k && dist[child+1] > dist[child]) + child++; + if(dist[root] > dist[child]) + return; + swap_float(&dist[root], &dist[child]); + swap_int(&idx[root], &idx[child]); + root = child; + child = root * 2 + 1; + } +} + + +__device__ void heap_sort(float *dist, int *idx, int k) +{ + int i; + for (i = k - 1; i > 0; i--) + { + swap_float(&dist[0], &dist[i]); + swap_int(&idx[0], &idx[i]); + reheap(dist, idx, i); + } +} + + +__device__ int get_bt_idx(int idx, const int *offset) +{ + int i = 0; + while (1) + { + if (idx < offset[i]) + break; + else + i++; + } + return i; +} + + +__global__ void knnquery_cuda_kernel(int m, int nsample, const float *__restrict__ xyz, const float *__restrict__ new_xyz, const int *__restrict__ offset, const int *__restrict__ new_offset, int *__restrict__ idx, float *__restrict__ dist2) { + // input: xyz (n, 3) new_xyz (m, 3) + // output: idx (m, nsample) dist2 (m, nsample) + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (pt_idx >= m) return; + + new_xyz += pt_idx * 3; + idx += pt_idx * nsample; + dist2 += pt_idx * nsample; + int bt_idx = get_bt_idx(pt_idx, new_offset); + int start; + if (bt_idx == 0) + start = 0; + else + start = offset[bt_idx - 1]; + int end = offset[bt_idx]; + + float new_x = new_xyz[0]; + float new_y = new_xyz[1]; + float new_z = new_xyz[2]; + + float best_dist[100]; + int best_idx[100]; + for(int i = 0; i < nsample; i++){ + best_dist[i] = 1e10; + best_idx[i] = start; + } + for(int i = start; i < end; i++){ + float x = xyz[i * 3 + 0]; + float y = xyz[i * 3 + 1]; + float z = xyz[i * 3 + 2]; + float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); + if (d2 < best_dist[0]){ + best_dist[0] = d2; + best_idx[0] = i; + reheap(best_dist, best_idx, nsample); + } + } + heap_sort(best_dist, best_idx, nsample); + for(int i = 0; i < nsample; i++){ + idx[i] = best_idx[i]; + dist2[i] = best_dist[i]; + } +} + + +void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2) { + // input: new_xyz: (m, 3), xyz: (n, 3), idx: (m, nsample) + dim3 blocks(DIVUP(m, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + knnquery_cuda_kernel<<>>(m, nsample, xyz, new_xyz, offset, new_offset, idx, dist2); +} diff --git a/Pointcept/libs/pointops2/src/knnquery/knnquery_cuda_kernel.h b/Pointcept/libs/pointops2/src/knnquery/knnquery_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..3c0aedfe8fbe6c427ee15bb550c2c1829e9f4b97 --- /dev/null +++ b/Pointcept/libs/pointops2/src/knnquery/knnquery_cuda_kernel.h @@ -0,0 +1,18 @@ +#ifndef _KNNQUERY_CUDA_KERNEL +#define _KNNQUERY_CUDA_KERNEL +#include +#include +#include + +void knnquery_cuda(int m, int nsample, at::Tensor xyz_tensor, at::Tensor new_xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor idx_tensor, at::Tensor dist2_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void knnquery_cuda_launcher(int m, int nsample, const float *xyz, const float *new_xyz, const int *offset, const int *new_offset, int *idx, float *dist2); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops2/src/pointops_api.cpp b/Pointcept/libs/pointops2/src/pointops_api.cpp new file mode 100644 index 0000000000000000000000000000000000000000..812789f7d4fdf961b960641ba6c2fd660c16a654 --- /dev/null +++ b/Pointcept/libs/pointops2/src/pointops_api.cpp @@ -0,0 +1,45 @@ +#include +#include + +#include "knnquery/knnquery_cuda_kernel.h" +#include "sampling/sampling_cuda_kernel.h" +#include "grouping/grouping_cuda_kernel.h" +#include "interpolation/interpolation_cuda_kernel.h" +#include "aggregation/aggregation_cuda_kernel.h" +#include "subtraction/subtraction_cuda_kernel.h" +#include "attention/attention_cuda_kernel.h" +#include "rpe/relative_pos_encoding_cuda_kernel.h" +#include "attention_v2/attention_cuda_kernel_v2.h" +#include "rpe_v2/relative_pos_encoding_cuda_kernel_v2.h" + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("knnquery_cuda", &knnquery_cuda, "knnquery_cuda"); + m.def("furthestsampling_cuda", &furthestsampling_cuda, "furthestsampling_cuda"); + m.def("grouping_forward_cuda", &grouping_forward_cuda, "grouping_forward_cuda"); + m.def("grouping_backward_cuda", &grouping_backward_cuda, "grouping_backward_cuda"); + m.def("interpolation_forward_cuda", &interpolation_forward_cuda, "interpolation_forward_cuda"); + m.def("interpolation_backward_cuda", &interpolation_backward_cuda, "interpolation_backward_cuda"); + m.def("subtraction_forward_cuda", &subtraction_forward_cuda, "subtraction_forward_cuda"); + m.def("subtraction_backward_cuda", &subtraction_backward_cuda, "subtraction_backward_cuda"); + m.def("aggregation_forward_cuda", &aggregation_forward_cuda, "aggregation_forward_cuda"); + m.def("aggregation_backward_cuda", &aggregation_backward_cuda, "aggregation_backward_cuda"); + m.def("attention_step1_forward_cuda", &attention_step1_forward_cuda, "attention_step1_forward_cuda"); + m.def("attention_step1_backward_cuda", &attention_step1_backward_cuda, "attention_step1_backward_cuda"); + m.def("attention_step2_forward_cuda", &attention_step2_forward_cuda, "attention_step2_forward_cuda"); + m.def("attention_step2_backward_cuda", &attention_step2_backward_cuda, "attention_step2_backward_cuda"); + m.def("dot_prod_with_idx_forward_cuda", &dot_prod_with_idx_forward_cuda, "dot_prod_with_idx_forward_cuda"); + m.def("dot_prod_with_idx_backward_cuda", &dot_prod_with_idx_backward_cuda, "dot_prod_with_idx_backward_cuda"); + m.def("attention_step2_with_rel_pos_value_forward_cuda", &attention_step2_with_rel_pos_value_forward_cuda, "attention_step2_with_rel_pos_value_forward_cuda"); + m.def("attention_step2_with_rel_pos_value_backward_cuda", &attention_step2_with_rel_pos_value_backward_cuda, "attention_step2_with_rel_pos_value_backward_cuda"); + m.def("attention_step1_forward_cuda_v2", &attention_step1_forward_cuda_v2, "attention_step1_forward_cuda_v2"); + m.def("attention_step1_backward_cuda_v2", &attention_step1_backward_cuda_v2, "attention_step1_backward_cuda_v2"); + m.def("attention_step2_forward_cuda_v2", &attention_step2_forward_cuda_v2, "attention_step2_forward_cuda_v2"); + m.def("attention_step2_backward_cuda_v2", &attention_step2_backward_cuda_v2, "attention_step2_backward_cuda_v2"); + m.def("dot_prod_with_idx_forward_cuda_v2", &dot_prod_with_idx_forward_cuda_v2, "dot_prod_with_idx_forward_cuda_v2"); + m.def("dot_prod_with_idx_backward_cuda_v2", &dot_prod_with_idx_backward_cuda_v2, "dot_prod_with_idx_backward_cuda_v2"); + m.def("attention_step2_with_rel_pos_value_forward_cuda_v2", &attention_step2_with_rel_pos_value_forward_cuda_v2, "attention_step2_with_rel_pos_value_forward_cuda_v2"); + m.def("attention_step2_with_rel_pos_value_backward_cuda_v2", &attention_step2_with_rel_pos_value_backward_cuda_v2, "attention_step2_with_rel_pos_value_backward_cuda_v2"); + m.def("dot_prod_with_idx_forward_cuda_v3", &dot_prod_with_idx_forward_cuda_v3, "dot_prod_with_idx_forward_cuda_v3"); + m.def("dot_prod_with_idx_backward_cuda_v3", &dot_prod_with_idx_backward_cuda_v3, "dot_prod_with_idx_backward_cuda_v3"); + } diff --git a/Pointcept/libs/pointops2/src/rpe/relative_pos_encoding_cuda.cpp b/Pointcept/libs/pointops2/src/rpe/relative_pos_encoding_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..333a1b1a083a2b1e9699917c7f74f73eab176f43 --- /dev/null +++ b/Pointcept/libs/pointops2/src/rpe/relative_pos_encoding_cuda.cpp @@ -0,0 +1,59 @@ +#include +#include +#include +#include "relative_pos_encoding_cuda_kernel.h" + +void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, + at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) +{ + const float *q = q_tensor.data_ptr(); + const float *table = table_tensor.data_ptr(); + const int *index = index_tensor.data_ptr(); + const int *rel_idx = rel_idx_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + dot_prod_with_idx_forward_cuda_launcher(N, M, h, hdim, q, index, table, rel_idx, output); +} + +void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, + at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, + at::Tensor grad_q_tensor, at::Tensor grad_table_tensor) +{ + const float *grad_out = grad_out_tensor.data_ptr(); + const float *q = q_tensor.data_ptr(); + const int *index = index_tensor.data_ptr(); + const float *table = table_tensor.data_ptr(); + const int *rel_idx = rel_idx_tensor.data_ptr(); + float *grad_q = grad_q_tensor.data_ptr(); + float *grad_table = grad_table_tensor.data_ptr(); + dot_prod_with_idx_backward_cuda_launcher(N, M, h, hdim, grad_out, q, index, table, rel_idx, grad_q, grad_table); +} + +void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, + at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) +{ + const float *attn = attn_tensor.data_ptr(); + const float *v = v_tensor.data_ptr(); + const int *index0 = index0_tensor.data_ptr(); + const int *index1 = index1_tensor.data_ptr(); + const float *table = table_tensor.data_ptr(); + const int *rel_idx = rel_idx_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + attention_step2_with_rel_pos_value_forward_cuda_launcher(N, M, h, hdim, attn, v, index0, index1, table, rel_idx, output); +} + +void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, + at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, + at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor) +{ + const float *grad_out = grad_out_tensor.data_ptr(); + const int *index0 = index0_tensor.data_ptr(); + const int *index1 = index1_tensor.data_ptr(); + const float *attn = attn_tensor.data_ptr(); + const float *v = v_tensor.data_ptr(); + const float *table = table_tensor.data_ptr(); + const int *rel_idx = rel_idx_tensor.data_ptr(); + float *grad_attn = grad_attn_tensor.data_ptr(); + float *grad_v = grad_v_tensor.data_ptr(); + float *grad_table = grad_table_tensor.data_ptr(); + attention_step2_with_rel_pos_value_backward_cuda_launcher(N, M, h, hdim, grad_out, index0, index1, attn, v, table, rel_idx, grad_attn, grad_v, grad_table); +} diff --git a/Pointcept/libs/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.cu b/Pointcept/libs/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..8ccab4db11e11d57a600787e6cef8a4a281a2791 --- /dev/null +++ b/Pointcept/libs/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.cu @@ -0,0 +1,136 @@ +/* written by Xin Lai. Email: xinlai@cse.cuhk.edu.hk */ + +#include "../cuda_utils.h" +#include "relative_pos_encoding_cuda_kernel.h" + + +__global__ void dot_prod_with_idx_forward_cuda_kernel( // M, h, hdim + int N, int M, int h, int hdim, const float *q, const int *index, + const float *table, const int *rel_idx, float *output) { + // input: q: (N, h, hdim), index: (M), table: (L, h, hdim, 3), rel_idx: (M, 3), output: (M, h) + + int c_idx = blockIdx.z; + int h_idx = blockIdx.y; + int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_idx >= M*3 || h_idx >= h || c_idx >= hdim) return; + + int dim = thread_idx % 3; + int m_idx = thread_idx / 3; + + int q_idx = index[m_idx]; + int rel_idx_dim = rel_idx[thread_idx]; + float rel_table_val = table[rel_idx_dim*h*hdim*3+h_idx*hdim*3+c_idx*3+dim]; + float val = q[q_idx*h*hdim+h_idx*hdim+c_idx] * rel_table_val; + atomicAdd(output+m_idx*h+h_idx, val); +} + +__global__ void dot_prod_with_idx_backward_cuda_kernel( // M, h, hdim + int N, int M, int h, int hdim, const float *grad_out, const float *q, const int *index, + const float *table, const int *rel_idx, float *grad_q, float *grad_table) { + + int c_idx = blockIdx.z; + int h_idx = blockIdx.y; + int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_idx >= M*3 || h_idx >= h || c_idx >= hdim) return; + + int dim = thread_idx % 3; + int m_idx = thread_idx / 3; + + int q_idx = index[m_idx]; + int rel_idx_dim = rel_idx[thread_idx]; + int grad_out_idx = m_idx*h+h_idx; + float grad_out_value = grad_out[grad_out_idx]; + + float rel_table_val = table[rel_idx_dim*h*hdim*3+h_idx*hdim*3+c_idx*3+dim]; + atomicAdd(grad_q+q_idx*h*hdim+h_idx*hdim+c_idx, grad_out_value * rel_table_val); + + float q_value = q[q_idx*h*hdim+h_idx*hdim+c_idx]; + atomicAdd(grad_table+rel_idx_dim*h*hdim*3+h_idx*hdim*3+c_idx*3+dim, grad_out_value * q_value); +} + +void dot_prod_with_idx_forward_cuda_launcher(int N, int M, int h, int hdim, const float *q, const int *index, + const float *table, const int *rel_idx, float *output) { + // input: q: (N, h, hdim), index: (M), table: (L, h, hdim, 3), rel_idx: (M, 3) + //dim3 blocks(DIVUP(hdim, THREADS_PER_BLOCK), h, M); + dim3 blocks(DIVUP(M*3, THREADS_PER_BLOCK), h, hdim); + dim3 threads(THREADS_PER_BLOCK); + dot_prod_with_idx_forward_cuda_kernel<<>>(N, M, h, hdim, q, index, table, rel_idx, output); +} + +void dot_prod_with_idx_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, + const float *q, const int *index, const float *table, const int *rel_idx, float *grad_q, float *grad_table) { + // input: grad_out: (M, h), output: grad_q: (N, h, hdim), grad_table: (L, h, hdim, 3) + //dim3 blocks(DIVUP(hdim, THREADS_PER_BLOCK), h, M); + dim3 blocks(DIVUP(M*3, THREADS_PER_BLOCK), h, hdim); + dim3 threads(THREADS_PER_BLOCK); + dot_prod_with_idx_backward_cuda_kernel<<>>(N, M, h, hdim, grad_out, q, index, table, rel_idx, grad_q, grad_table); +} + +__global__ void attention_step2_with_rel_pos_value_forward_cuda_kernel( // M, h, hdim + int N, int M, int h, int hdim, const float *attn, const float *v, + const int *index0, const int *index1, const float *table, const int *rel_idx, float *output) { + // input: attn: (M, h), v: (N, h, hdim), index0: (M, ), index1: (M, ), table: (L, h, hdim, 3), rel_idx: (M, 3) + + int c_idx = blockIdx.z; + int h_idx = blockIdx.y; + int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_idx >= M*3 || h_idx >= h || c_idx >= hdim) return; + + int dim = thread_idx % 3; + int m_idx = thread_idx / 3; + + int idx1 = index1[m_idx]; + + int rel_idx_dim = rel_idx[thread_idx]; + float table_val = table[rel_idx_dim*h*hdim*3+h_idx*hdim*3+c_idx*3+dim]; + + float val = attn[m_idx*h+h_idx] * (v[idx1*h*hdim+h_idx*hdim+c_idx] / 3.0 + table_val); + + int idx0 = index0[m_idx]; + atomicAdd(output+idx0*h*hdim+h_idx*hdim+c_idx, val); +} + + +__global__ void attention_step2_with_rel_pos_value_backward_cuda_kernel( // M, h, hdim + int N, int M, int h, int hdim, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, const float *table, + const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table) { + // input: attn: (M, h), v: (N, h, hdim), index0: (M, ), index1: (M, ), table: (L, h, hdim, 3), rel_idx: (M, 3) + + int c_idx = blockIdx.z; + int h_idx = blockIdx.y; + int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_idx >= M*3 || h_idx >= h || c_idx >= hdim) return; + + int dim = thread_idx % 3; + int m_idx = thread_idx / 3; + + int idx0 = index0[m_idx]; + int idx1 = index1[m_idx]; + int grad_out_idx = idx0*h*hdim+h_idx*hdim+c_idx; + + int rel_idx_dim = rel_idx[thread_idx]; + float table_val = table[rel_idx_dim*h*hdim*3+h_idx*hdim*3+c_idx*3+dim]; + float grad_out_value = grad_out[grad_out_idx]; + + atomicAdd(grad_attn+m_idx*h+h_idx, grad_out_value * (v[idx1*h*hdim+h_idx*hdim+c_idx]/3 + table_val)); + atomicAdd(grad_v+idx1*h*hdim+h_idx*hdim+c_idx, grad_out_value * attn[m_idx*h+h_idx]/3); + atomicAdd(grad_table+rel_idx_dim*h*hdim*3+h_idx*hdim*3+c_idx*3+dim, grad_out_value * attn[m_idx*h+h_idx]); +} + +void attention_step2_with_rel_pos_value_forward_cuda_launcher(int N, int M, int h, int hdim, const float *attn, const float *v, const int *index0, + const int *index1, const float *table, const int *rel_idx, float *output) { + // input: attn: (M, h), v: (N, h, hdim), index0: (M, ), index1: (M, ), table: (L, h, hdim, 3), rel_idx: (M, 3) + //dim3 blocks(DIVUP(hdim, THREADS_PER_BLOCK), h, M); + dim3 blocks(DIVUP(M*3, THREADS_PER_BLOCK), h, hdim); + dim3 threads(THREADS_PER_BLOCK); + attention_step2_with_rel_pos_value_forward_cuda_kernel<<>>(N, M, h, hdim, attn, v, index0, index1, table, rel_idx, output); +} + +void attention_step2_with_rel_pos_value_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const int *index0, + const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table) { + // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) + //dim3 blocks(DIVUP(hdim, THREADS_PER_BLOCK), h, M); + dim3 blocks(DIVUP(M*3, THREADS_PER_BLOCK), h, hdim); + dim3 threads(THREADS_PER_BLOCK); + attention_step2_with_rel_pos_value_backward_cuda_kernel<<>>(N, M, h, hdim, grad_out, index0, index1, attn, v, table, rel_idx, grad_attn, grad_v, grad_table); +} diff --git a/Pointcept/libs/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.h b/Pointcept/libs/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..cafc7b69152fff9c0c440a093346fb6005923db0 --- /dev/null +++ b/Pointcept/libs/pointops2/src/rpe/relative_pos_encoding_cuda_kernel.h @@ -0,0 +1,26 @@ +#ifndef _RPE_CUDA_KERNEL +#define _RPE_CUDA_KERNEL +#include +#include +#include + +void dot_prod_with_idx_forward_cuda(int N, int M, int h, int hdim, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); +void dot_prod_with_idx_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_table_tensor); + +void attention_step2_with_rel_pos_value_forward_cuda(int N, int M, int h, int hdim, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); +void attention_step2_with_rel_pos_value_backward_cuda(int N, int M, int h, int hdim, at::Tensor grad_out_tensor, at::Tensor index0_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void dot_prod_with_idx_forward_cuda_launcher(int N, int M, int h, int hdim, const float *q, const int *index, const float *table, const int *rel_idx, float *output); +void dot_prod_with_idx_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const float *q, const int *index, const float *table, const int *rel_idx, float *grad_q, float *grad_table); + +void attention_step2_with_rel_pos_value_forward_cuda_launcher(int N, int M, int h, int hdim, const float *attn, const float *v, const int *index0, const int *index1, const float *table, const int *rel_idx, float *output); +void attention_step2_with_rel_pos_value_backward_cuda_launcher(int N, int M, int h, int hdim, const float *grad_out, const int *index0, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops2/src/rpe_v2/relative_pos_encoding_cuda_kernel_v2.cu b/Pointcept/libs/pointops2/src/rpe_v2/relative_pos_encoding_cuda_kernel_v2.cu new file mode 100644 index 0000000000000000000000000000000000000000..afa536a480c6f20969528e34d7b9d9ea29175b06 --- /dev/null +++ b/Pointcept/libs/pointops2/src/rpe_v2/relative_pos_encoding_cuda_kernel_v2.cu @@ -0,0 +1,527 @@ +/* written by Xin Lai. Email: xinlai@cse.cuhk.edu.hk */ + +#include "../cuda_utils.h" +#include "relative_pos_encoding_cuda_kernel_v2.h" + + +// N, M, h, q, index_q, k, index_k, table_q, table_k, rel_idx, rel_idx_offsets, output + +template +__global__ void dot_prod_with_idx_forward_cuda_kernel_v2( // M, h, hdim + int N, int M, int h, const float *q, const int *index_q, const float *k, const int *index_k, + const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, + const int *sort_indices, float *output) { + // input: q: (N, h, hdim), index: (M), table: (L, h, hdim, 3), rel_idx: (M, 3), output: (M, h) + + int h_idx = blockIdx.y; + int t_idx = blockIdx.x; + int n_idx = threadIdx.x; + int C = h*d; + + __shared__ int start, end; + if(n_idx == 0){ + start = rel_idx_offsets[t_idx]; + end = rel_idx_offsets[t_idx+1]; + // printf("e2: start: %d, end: %d\n", start, end); + } + + __syncthreads(); + + int m_idx_prev = start + n_idx; + // if(m_idx_prev >= end) + // return; + + __shared__ int m_idx; + if(n_idx == 0) + m_idx = sort_indices[m_idx_prev]; + + __syncthreads(); + + __shared__ int rel_idx_vec[3]; + if(n_idx < 3) + rel_idx_vec[n_idx] = rel_idx[m_idx*3 + n_idx]; + + __syncthreads(); + + __shared__ float table_q_vec[d]; + __shared__ float table_k_vec[d]; + + for(int i = n_idx; i < 2*d; i += blockDim.x){ + if (i < d){ + int ind0 = rel_idx_vec[0] * C * 3 + h_idx * d * 3 + i * 3 + 0; + int ind1 = rel_idx_vec[1] * C * 3 + h_idx * d * 3 + i * 3 + 1; + int ind2 = rel_idx_vec[2] * C * 3 + h_idx * d * 3 + i * 3 + 2; + table_q_vec[i] = table_q[ind0] + table_q[ind1] + table_q[ind2]; + } else{ + int ind0 = rel_idx_vec[0] * C * 3 + h_idx * d * 3 + (i-d) * 3 + 0; + int ind1 = rel_idx_vec[1] * C * 3 + h_idx * d * 3 + (i-d) * 3 + 1; + int ind2 = rel_idx_vec[2] * C * 3 + h_idx * d * 3 + (i-d) * 3 + 2; + table_k_vec[i-d] = table_k[ind0] + table_k[ind1] + table_k[ind2]; + } + } + + __syncthreads(); + + for(int i = m_idx_prev; i < end; i += blockDim.x){ + float sum = 0; + int m_idx_i = sort_indices[i]; + int q_idx = index_q[m_idx_i]; + int k_idx = index_k[m_idx_i]; + for(int j = 0; j < d; j++){ + sum += q[q_idx*C + h_idx*d + j] * table_q_vec[j]; + sum += k[k_idx*C + h_idx*d + j] * table_k_vec[j]; + } + output[m_idx_i*h + h_idx] = sum; + } +} + +// N, M, h, hdim, grad_out, q, index_q, k, index_k, table_q, table_k, rel_idx, rel_idx_offsets, sort_indices, grad_q, grad_k, grad_table_q, grad_table_k + +template +__global__ void dot_prod_with_idx_backward_cuda_kernel_v2( // M, h, hdim + int N, int M, int h, const float *grad_out, const float *q, const int *index_q, + const float *k, const int *index_k, const float *table_q, const float *table_k, + const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *grad_q, + float *grad_k, float *grad_table_q, float *grad_table_k) { + + int h_idx = blockIdx.y; + int t_idx = blockIdx.x; + int n_idx = threadIdx.x; + int C = h*d; + + __shared__ int start, end; + if(n_idx == 0){ + start = rel_idx_offsets[t_idx]; + end = rel_idx_offsets[t_idx+1]; + } + + __syncthreads(); + + int m_idx_prev = start + n_idx; + // if(m_idx_prev >= end) + // return; + + __shared__ int m_idx; + if(n_idx == 0) + m_idx = sort_indices[m_idx_prev]; + + __syncthreads(); + + __shared__ int rel_idx_vec[3]; + if(n_idx < 3) + rel_idx_vec[n_idx] = rel_idx[m_idx*3 + n_idx]; + + __syncthreads(); + + __shared__ float table_q_vec[d]; + __shared__ float table_k_vec[d]; + + for(int i = n_idx; i < 2*d; i += blockDim.x){ + if (i < d){ + int ind0 = rel_idx_vec[0] * C * 3 + h_idx * d * 3 + i * 3 + 0; + int ind1 = rel_idx_vec[1] * C * 3 + h_idx * d * 3 + i * 3 + 1; + int ind2 = rel_idx_vec[2] * C * 3 + h_idx * d * 3 + i * 3 + 2; + table_q_vec[i] = table_q[ind0] + table_q[ind1] + table_q[ind2]; + } else{ + int ind0 = rel_idx_vec[0] * C * 3 + h_idx * d * 3 + (i-d) * 3 + 0; + int ind1 = rel_idx_vec[1] * C * 3 + h_idx * d * 3 + (i-d) * 3 + 1; + int ind2 = rel_idx_vec[2] * C * 3 + h_idx * d * 3 + (i-d) * 3 + 2; + table_k_vec[i-d] = table_k[ind0] + table_k[ind1] + table_k[ind2]; + } + } + + __shared__ float gradient_q[d]; + __shared__ float gradient_k[d]; + for(int i = n_idx; i < d; i += blockDim.x){ + gradient_q[i] = 0; + gradient_k[i] = 0; + } + + __syncthreads(); + + for(int i = m_idx_prev; i < end; i += blockDim.x){ + int m_idx_i = sort_indices[i]; + int q_idx = index_q[m_idx_i]; + int k_idx = index_k[m_idx_i]; + float grad_out_i = grad_out[m_idx_i*h+h_idx]; + for(int j = 0; j < d; j++){ + atomicAdd(&gradient_q[j], q[q_idx*C + h_idx*d + j] * grad_out_i); + atomicAdd(&gradient_k[j], k[k_idx*C + h_idx*d + j] * grad_out_i); + atomicAdd(grad_q + q_idx*C + h_idx*d + j, table_q_vec[j] * grad_out_i); + atomicAdd(grad_k + k_idx*C + h_idx*d + j, table_k_vec[j] * grad_out_i); + } + } + + __syncthreads(); + + for(int i = n_idx; i < d*2; i += blockDim.x){ + if(i < d){ + atomicAdd(grad_table_q + rel_idx_vec[0] * C * 3 + h_idx * d * 3 + i * 3, gradient_q[i]); + atomicAdd(grad_table_q + rel_idx_vec[1] * C * 3 + h_idx * d * 3 + i * 3 + 1, gradient_q[i]); + atomicAdd(grad_table_q + rel_idx_vec[2] * C * 3 + h_idx * d * 3 + i * 3 + 2, gradient_q[i]); + }else{ + atomicAdd(grad_table_k + rel_idx_vec[0] * C * 3 + h_idx * d * 3 + (i-d) * 3, gradient_k[i-d]); + atomicAdd(grad_table_k + rel_idx_vec[1] * C * 3 + h_idx * d * 3 + (i-d) * 3 + 1, gradient_k[i-d]); + atomicAdd(grad_table_k + rel_idx_vec[2] * C * 3 + h_idx * d * 3 + (i-d) * 3 + 2, gradient_k[i-d]); + } + } + + // int c_idx = blockIdx.z; + // int h_idx = blockIdx.y; + // int thread_idx = blockIdx.x * blockDim.x + threadIdx.x; + // if (thread_idx >= M*3 || h_idx >= h || c_idx >= hdim) return; + + // int dim = thread_idx % 3; + // int m_idx = thread_idx / 3; + + // int q_idx = index[m_idx]; + // int rel_idx_dim = rel_idx[thread_idx]; + // int grad_out_idx = m_idx*h+h_idx; + // float grad_out_value = grad_out[grad_out_idx]; + + // float rel_table_val = table[rel_idx_dim*h*hdim*3+h_idx*hdim*3+c_idx*3+dim]; + // atomicAdd(grad_q+q_idx*h*hdim+h_idx*hdim+c_idx, grad_out_value * rel_table_val); + + // float q_value = q[q_idx*h*hdim+h_idx*hdim+c_idx]; + // atomicAdd(grad_table+rel_idx_dim*h*hdim*3+h_idx*hdim*3+c_idx*3+dim, grad_out_value * q_value); +} + +void dot_prod_with_idx_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *q, + const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, + const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *output) +{ + // input: q: (N, h, hdim), index: (M), table: (L, h, hdim, 3), rel_idx: (M, 3) + //dim3 blocks(DIVUP(hdim, THREADS_PER_BLOCK), h, M); + dim3 blocks(T, h); + // dim3 threads(THREADS_PER_BLOCK); + + unsigned int n_threads = opt_n_threads(n_max); + n_threads = n_threads == n_max ? n_threads : n_threads * 2; + n_threads = n_threads > 1024 ? 512 : n_threads; + + // printf("e1: T: %d, h: %d, n_threads: %d\n", T, h, n_threads); + + switch (hdim) { + case 16: + dot_prod_with_idx_forward_cuda_kernel_v2<16><<>>(N, M, h, q, index_q, k, index_k, table_q, table_k, rel_idx, rel_idx_offsets, sort_indices, output); + break; + case 32: + dot_prod_with_idx_forward_cuda_kernel_v2<32><<>>(N, M, h, q, index_q, k, index_k, table_q, table_k, rel_idx, rel_idx_offsets, sort_indices, output); + break; + default: + throw "d != 16 and d != 32"; + } +} + +void dot_prod_with_idx_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, + const float *grad_out, const float *q, const int *index_q, const float *k, const int *index_k, + const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, + float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k) +{ + // input: grad_out: (M, h), output: grad_q: (N, h, hdim), grad_table: (L, h, hdim, 3) + //dim3 blocks(DIVUP(hdim, THREADS_PER_BLOCK), h, M); + // dim3 blocks(DIVUP(M*3, THREADS_PER_BLOCK), h, hdim); + // dim3 threads(THREADS_PER_BLOCK); + + dim3 blocks(T, h); + // dim3 threads(THREADS_PER_BLOCK); + + unsigned int n_threads = opt_n_threads(n_max); + n_threads = n_threads == n_max ? n_threads : n_threads * 2; + n_threads = n_threads > 1024 ? 512 : n_threads; + + switch (hdim) { + case 16: + dot_prod_with_idx_backward_cuda_kernel_v2<16><<>>(N, M, h, grad_out, q, index_q, k, index_k, table_q, table_k, rel_idx, rel_idx_offsets, sort_indices, grad_q, grad_k, grad_table_q, grad_table_k); + break; + case 32: + dot_prod_with_idx_backward_cuda_kernel_v2<32><<>>(N, M, h, grad_out, q, index_q, k, index_k, table_q, table_k, rel_idx, rel_idx_offsets, sort_indices, grad_q, grad_k, grad_table_q, grad_table_k); + break; + default: + throw "d != 16 and d != 32"; + } +} + + + +template +__global__ void dot_prod_with_idx_forward_cuda_kernel_v3( // M, h, hdim + int N, int M, int h, const float *q, const int *index_q_offsets, const float *k, const int *index_k, + const float *table_q, const float *table_k, const int *rel_idx, float *output) { + // input: q: (N, h, hdim), index: (M), table: (L, h, hdim, 3), rel_idx: (M, 3), output: (M, h) + int q_idx = blockIdx.x; + int h_idx = blockIdx.y; + int n_idx = threadIdx.x; + int C = h*d; + + __shared__ float query_vec[d]; + __shared__ int start, end; + if (n_idx == 0){ + start = index_q_offsets[q_idx]; + end = index_q_offsets[q_idx+1]; + } + for(int i = n_idx; i < d; i += blockDim.x) + query_vec[i] = q[q_idx*C + h_idx*d + i]; + + __syncthreads(); + + int m_idx = start + n_idx; + if(m_idx >= end) + return; + + int k_idx = index_k[m_idx]; + int r_idx1 = rel_idx[m_idx*3], r_idx2 = rel_idx[m_idx*3+1], r_idx3 = rel_idx[m_idx*3+2]; + float sum = 0; + for(int i = 0; i < d; i++){ + float table_q_scalar_i = table_q[r_idx1*C*3+h_idx*d*3+i*3] + table_q[r_idx2*C*3+h_idx*d*3+i*3+1] + table_q[r_idx3*C*3+h_idx*d*3+i*3+2]; + sum += query_vec[i] * table_q_scalar_i; + float table_k_scalar_i = table_k[r_idx1*C*3+h_idx*d*3+i*3] + table_k[r_idx2*C*3+h_idx*d*3+i*3+1] + table_k[r_idx3*C*3+h_idx*d*3+i*3+2]; + sum += k[k_idx*C+h_idx*d+i] * table_k_scalar_i; + } + output[m_idx*h + h_idx] = sum; + +} + +// N, M, h, hdim, grad_out, q, index_q, k, index_k, table_q, table_k, rel_idx, rel_idx_offsets, sort_indices, grad_q, grad_k, grad_table_q, grad_table_k + +template +__global__ void dot_prod_with_idx_backward_cuda_kernel_v3( // M, h, hdim + int N, int M, int h, const float *grad_out, const float *q, const int *index_q_offsets, + const float *k, const int *index_k, const float *table_q, const float *table_k, + const int *rel_idx, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k) { + + int q_idx = blockIdx.x; + int h_idx = blockIdx.y; + int n_idx = threadIdx.x; + int C = h*d; + + __shared__ float query_vec[d]; + __shared__ int start, end; + if (n_idx == 0){ + start = index_q_offsets[q_idx]; + end = index_q_offsets[q_idx+1]; + } + for(int i = n_idx; i < d; i += blockDim.x) + query_vec[i] = q[q_idx*C + h_idx*d + i]; + + __shared__ float gradients_q[d]; + for(int i = n_idx; i < d; i += blockDim.x){ + gradients_q[i] = 0; + } + + __syncthreads(); + + int m_idx = start + n_idx; + + if(m_idx < end){ + int k_idx = index_k[m_idx]; + int r_idx1 = rel_idx[m_idx*3], r_idx2 = rel_idx[m_idx*3+1], r_idx3 = rel_idx[m_idx*3+2]; + float gradient = grad_out[m_idx*h + h_idx]; + for(int i = 0; i < d; i++){ + float table_q_scalar_i = table_q[r_idx1*C*3+h_idx*d*3+i*3] + table_q[r_idx2*C*3+h_idx*d*3+i*3+1] + table_q[r_idx3*C*3+h_idx*d*3+i*3+2]; + float table_k_scalar_i = table_k[r_idx1*C*3+h_idx*d*3+i*3] + table_k[r_idx2*C*3+h_idx*d*3+i*3+1] + table_k[r_idx3*C*3+h_idx*d*3+i*3+2]; + float q_scalar_i = query_vec[i]; + float k_scalar_i = k[k_idx*C+h_idx*d+i]; + atomicAdd(&gradients_q[i], table_q_scalar_i * gradient); + atomicAdd(grad_k+k_idx*C+h_idx*d+i, table_k_scalar_i * gradient); + atomicAdd(grad_table_q+r_idx1*C*3+h_idx*d*3+i*3, q_scalar_i * gradient); + atomicAdd(grad_table_q+r_idx2*C*3+h_idx*d*3+i*3+1, q_scalar_i * gradient); + atomicAdd(grad_table_q+r_idx3*C*3+h_idx*d*3+i*3+2, q_scalar_i * gradient); + atomicAdd(grad_table_k+r_idx1*C*3+h_idx*d*3+i*3, k_scalar_i * gradient); + atomicAdd(grad_table_k+r_idx2*C*3+h_idx*d*3+i*3+1, k_scalar_i * gradient); + atomicAdd(grad_table_k+r_idx3*C*3+h_idx*d*3+i*3+2, k_scalar_i * gradient); + } + } + __syncthreads(); + + for(int i = n_idx; i < d; i += blockDim.x){ + grad_q[q_idx*C+h_idx*d+i] = gradients_q[i]; + } +} + +void dot_prod_with_idx_forward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *q, + const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, + const int *rel_idx, float *output) +{ + // input: q: (N, h, hdim), index: (M), table: (L, h, hdim, 3), rel_idx: (M, 3) + //dim3 blocks(DIVUP(hdim, THREADS_PER_BLOCK), h, M); + dim3 blocks(N, h); + // dim3 threads(THREADS_PER_BLOCK); + + unsigned int n_threads = opt_n_threads(n_max); + n_threads = n_threads == n_max ? n_threads : n_threads * 2; + + // printf("e1: h: %d, n_max: %d, n_threads: %d\n", h, n_max, n_threads); + + switch (hdim) { + case 16: + dot_prod_with_idx_forward_cuda_kernel_v3<16><<>>(N, M, h, q, index_q_offsets, k, index_k, table_q, table_k, rel_idx, output); + break; + case 32: + dot_prod_with_idx_forward_cuda_kernel_v3<32><<>>(N, M, h, q, index_q_offsets, k, index_k, table_q, table_k, rel_idx, output); + break; + default: + throw "d != 16 and d != 32"; + } +} + +void dot_prod_with_idx_backward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, + const float *grad_out, const float *q, const int *index_q_offsets, const float *k, const int *index_k, + const float *table_q, const float *table_k, const int *rel_idx, + float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k) +{ + // input: grad_out: (M, h), output: grad_q: (N, h, hdim), grad_table: (L, h, hdim, 3) + //dim3 blocks(DIVUP(hdim, THREADS_PER_BLOCK), h, M); + // dim3 blocks(DIVUP(M*3, THREADS_PER_BLOCK), h, hdim); + // dim3 threads(THREADS_PER_BLOCK); + + dim3 blocks(N, h); + // dim3 threads(THREADS_PER_BLOCK); + + unsigned int n_threads = opt_n_threads(n_max); + n_threads = n_threads == n_max ? n_threads : n_threads * 2; + + switch (hdim) { + case 16: + dot_prod_with_idx_backward_cuda_kernel_v3<16><<>>(N, M, h, grad_out, q, index_q_offsets, k, index_k, table_q, table_k, rel_idx, grad_q, grad_k, grad_table_q, grad_table_k); + break; + case 32: + dot_prod_with_idx_backward_cuda_kernel_v3<32><<>>(N, M, h, grad_out, q, index_q_offsets, k, index_k, table_q, table_k, rel_idx, grad_q, grad_k, grad_table_q, grad_table_k); + break; + default: + throw "d != 16 and d != 32"; + } +} + + +template +__global__ void attention_step2_with_rel_pos_value_forward_cuda_kernel_v2( // M, h, hdim + int N, int M, int h, const float *attn, const float *v, + const int *index0_offsets, const int *index1, const float *table, const int *rel_idx, float *output) { + // input: attn: (M, h), v: (N, h, hdim), index0: (M, ), index1: (M, ), table: (L, h, hdim, 3), rel_idx: (M, 3) + + int q_idx = blockIdx.x; + int h_idx = blockIdx.y; + int n_idx = threadIdx.x; + + int C = h*d; + + __shared__ int start, end; + __shared__ float result[d]; + + if (n_idx == 0){ + start = index0_offsets[q_idx]; + end = index0_offsets[q_idx+1]; + } + for (int i = n_idx; i < d; i += blockDim.x){ + result[i] = 0; + } + + __syncthreads(); + + int m_idx = start + n_idx; + if (m_idx < end){ + float attn_scalar = attn[m_idx*h + h_idx]; + int r_idx1 = rel_idx[m_idx*3], r_idx2 = rel_idx[m_idx*3+1], r_idx3 = rel_idx[m_idx*3+2]; + for(int i = 0; i < d; i ++){ + int v_idx = index1[m_idx]; + float table_scaler_i = table[r_idx1*C*3+h_idx*d*3+i*3] + table[r_idx2*C*3+h_idx*d*3+i*3+1] + table[r_idx3*C*3+h_idx*d*3+i*3+2]; + float value_scaler_i = v[v_idx*C + h_idx*d + i]; + atomicAdd(&result[i], (table_scaler_i + value_scaler_i) * attn_scalar); + } + } + + __syncthreads(); + + for (int i = n_idx; i < d; i += blockDim.x) + output[q_idx*C + h_idx*d + i] = result[i]; +} + + +template +__global__ void attention_step2_with_rel_pos_value_backward_cuda_kernel_v2( // M, h, hdim + int N, int M, int h, const float *grad_out, const int *index0_offsets, const int *index1, const float *attn, const float *v, const float *table, + const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table) { + // input: attn: (M, h), v: (N, h, hdim), index0: (M, ), index1: (M, ), table: (L, h, hdim, 3), rel_idx: (M, 3) + + int q_idx = blockIdx.x; + int h_idx = blockIdx.y; + int n_idx = threadIdx.x; + + int C = h*d; + + __shared__ int start, end; + __shared__ float gradients[d]; + + if (n_idx == 0){ + start = index0_offsets[q_idx]; + end = index0_offsets[q_idx+1]; + } + for (int i = n_idx; i < d; i += blockDim.x){ + gradients[i] = grad_out[q_idx*C + h_idx*d + i]; + } + + __syncthreads(); + + int m_idx = start + n_idx; + if (m_idx < end){ + int v_idx = index1[m_idx]; + int r_idx1 = rel_idx[m_idx*3], r_idx2 = rel_idx[m_idx*3+1], r_idx3 = rel_idx[m_idx*3+2]; + float attn_scalar = attn[m_idx*h + h_idx]; + float grad_attn_sum = 0; + for (int i = 0; i < d; i++){ + float grad_out_scaler_i = gradients[i]; + float table_scaler_i = table[r_idx1*C*3+h_idx*d*3+i*3] + table[r_idx2*C*3+h_idx*d*3+i*3+1] + table[r_idx3*C*3+h_idx*d*3+i*3+2]; + float value_scaler_i = v[v_idx*C + h_idx*d + i]; + grad_attn_sum += (table_scaler_i + value_scaler_i) * grad_out_scaler_i; + atomicAdd(grad_v + v_idx*C + h_idx*d + i, attn_scalar * grad_out_scaler_i); + atomicAdd(grad_table + r_idx1*C*3 + h_idx*d*3 + i*3, attn_scalar * grad_out_scaler_i); + atomicAdd(grad_table + r_idx2*C*3 + h_idx*d*3 + i*3 + 1, attn_scalar * grad_out_scaler_i); + atomicAdd(grad_table + r_idx3*C*3 + h_idx*d*3 + i*3 + 2, attn_scalar * grad_out_scaler_i); + } + grad_attn[m_idx*h + h_idx] = grad_attn_sum; + } +} + +void attention_step2_with_rel_pos_value_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *attn, const float *v, const int *index0_offsets, + const int *index1, const float *table, const int *rel_idx, float *output) { + // input: attn: (M, h), v: (N, h, hdim), index0: (M, ), index1: (M, ), table: (L, h, hdim, 3), rel_idx: (M, 3) + //dim3 blocks(DIVUP(hdim, THREADS_PER_BLOCK), h, M); + // dim3 blocks(DIVUP(M*3, THREADS_PER_BLOCK), h, hdim); + // dim3 threads(THREADS_PER_BLOCK); + dim3 blocks(N, h); + unsigned int n_threads = opt_n_threads(n_max); + n_threads = n_threads == n_max ? n_threads : n_threads * 2; + + switch (hdim) { + case 16: + attention_step2_with_rel_pos_value_forward_cuda_kernel_v2<16><<>>(N, M, h, attn, v, index0_offsets, index1, table, rel_idx, output); + break; + case 32: + attention_step2_with_rel_pos_value_forward_cuda_kernel_v2<32><<>>(N, M, h, attn, v, index0_offsets, index1, table, rel_idx, output); + break; + default: + throw "d != 16 and d != 32"; + } +} + +void attention_step2_with_rel_pos_value_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *grad_out, const int *index0_offsets, + const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table) { + // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) + //dim3 blocks(DIVUP(hdim, THREADS_PER_BLOCK), h, M); + + dim3 blocks(N, h); + unsigned int n_threads = opt_n_threads(n_max); + n_threads = n_threads == n_max ? n_threads : n_threads * 2; + + switch (hdim) { + case 16: + attention_step2_with_rel_pos_value_backward_cuda_kernel_v2<16><<>>(N, M, h, grad_out, index0_offsets, index1, attn, v, table, rel_idx, grad_attn, grad_v, grad_table); + break; + case 32: + attention_step2_with_rel_pos_value_backward_cuda_kernel_v2<32><<>>(N, M, h, grad_out, index0_offsets, index1, attn, v, table, rel_idx, grad_attn, grad_v, grad_table); + break; + default: + throw "d != 16 and d != 32"; + } +} diff --git a/Pointcept/libs/pointops2/src/rpe_v2/relative_pos_encoding_cuda_kernel_v2.h b/Pointcept/libs/pointops2/src/rpe_v2/relative_pos_encoding_cuda_kernel_v2.h new file mode 100644 index 0000000000000000000000000000000000000000..648b152afe16d3011b62ff141a4e20b2a83579b4 --- /dev/null +++ b/Pointcept/libs/pointops2/src/rpe_v2/relative_pos_encoding_cuda_kernel_v2.h @@ -0,0 +1,32 @@ +#ifndef _RPE_V2_CUDA_KERNEL +#define _RPE_V2_CUDA_KERNEL +#include +#include +#include + +void dot_prod_with_idx_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor output_tensor); +void dot_prod_with_idx_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor); + +void dot_prod_with_idx_forward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); +void dot_prod_with_idx_backward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor); + +void attention_step2_with_rel_pos_value_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor); +void attention_step2_with_rel_pos_value_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void dot_prod_with_idx_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *q, const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *output); +void dot_prod_with_idx_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, int T, const float *grad_out, const float *q, const int *index_q, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, const int *rel_idx_offsets, const int *sort_indices, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k); + +void dot_prod_with_idx_forward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *q, const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, float *output); +void dot_prod_with_idx_backward_cuda_launcher_v3(int N, int M, int h, int hdim, int n_max, const float *grad_out, const float *q, const int *index_q_offsets, const float *k, const int *index_k, const float *table_q, const float *table_k, const int *rel_idx, float *grad_q, float *grad_k, float *grad_table_q, float *grad_table_k); + +void attention_step2_with_rel_pos_value_forward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *attn, const float *v, const int *index0_offsets, const int *index1, const float *table, const int *rel_idx, float *output); +void attention_step2_with_rel_pos_value_backward_cuda_launcher_v2(int N, int M, int h, int hdim, int n_max, const float *grad_out, const int *index0_offsets, const int *index1, const float *attn, const float *v, const float *table, const int *rel_idx, float *grad_attn, float *grad_v, float *grad_table); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops2/src/rpe_v2/relative_pos_encoding_cuda_v2.cpp b/Pointcept/libs/pointops2/src/rpe_v2/relative_pos_encoding_cuda_v2.cpp new file mode 100644 index 0000000000000000000000000000000000000000..091380881625d469d1eba681e63321e7f4ea1b2f --- /dev/null +++ b/Pointcept/libs/pointops2/src/rpe_v2/relative_pos_encoding_cuda_v2.cpp @@ -0,0 +1,110 @@ +#include +#include +#include +#include "relative_pos_encoding_cuda_kernel_v2.h" + +void dot_prod_with_idx_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor q_tensor, + at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, + at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, at::Tensor sort_indices_tensor, at::Tensor output_tensor) +{ + const float *q = q_tensor.data_ptr(); + const int *index_q = index_q_tensor.data_ptr(); + const float *k = k_tensor.data_ptr(); + const int *index_k = index_k_tensor.data_ptr(); + const float *table_q = table_q_tensor.data_ptr(); + const float *table_k = table_k_tensor.data_ptr(); + const int *rel_idx = rel_idx_tensor.data_ptr(); + const int *rel_idx_offsets = rel_idx_offsets_tensor.data_ptr(); + const int *sort_indices = sort_indices_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + dot_prod_with_idx_forward_cuda_launcher_v2(N, M, h, hdim, n_max, T, q, index_q, k, index_k, table_q, table_k, rel_idx, rel_idx_offsets, sort_indices, output); +} + +void dot_prod_with_idx_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, int T, at::Tensor grad_out_tensor, + at::Tensor q_tensor, at::Tensor index_q_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, + at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor rel_idx_offsets_tensor, + at::Tensor sort_indices_tensor, at::Tensor grad_q_tensor, at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor) +{ + const float *grad_out = grad_out_tensor.data_ptr(); + const float *q = q_tensor.data_ptr(); + const int *index_q = index_q_tensor.data_ptr(); + const float *k = k_tensor.data_ptr(); + const int *index_k = index_k_tensor.data_ptr(); + const float *table_q = table_q_tensor.data_ptr(); + const float *table_k = table_k_tensor.data_ptr(); + const int *rel_idx = rel_idx_tensor.data_ptr(); + const int *rel_idx_offsets = rel_idx_offsets_tensor.data_ptr(); + const int *sort_indices = sort_indices_tensor.data_ptr(); + float *grad_q = grad_q_tensor.data_ptr(); + float *grad_k = grad_k_tensor.data_ptr(); + float *grad_table_q = grad_table_q_tensor.data_ptr(); + float *grad_table_k = grad_table_k_tensor.data_ptr(); + dot_prod_with_idx_backward_cuda_launcher_v2(N, M, h, hdim, n_max, T, grad_out, q, index_q, k, index_k, table_q, table_k, rel_idx, rel_idx_offsets, sort_indices, grad_q, grad_k, grad_table_q, grad_table_k); +} + + +void dot_prod_with_idx_forward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor q_tensor, + at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, at::Tensor table_q_tensor, + at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) +{ + const float *q = q_tensor.data_ptr(); + const int *index_q_offsets = index_q_offsets_tensor.data_ptr(); + const float *k = k_tensor.data_ptr(); + const int *index_k = index_k_tensor.data_ptr(); + const float *table_q = table_q_tensor.data_ptr(); + const float *table_k = table_k_tensor.data_ptr(); + const int *rel_idx = rel_idx_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + dot_prod_with_idx_forward_cuda_launcher_v3(N, M, h, hdim, n_max, q, index_q_offsets, k, index_k, table_q, table_k, rel_idx, output); +} + +void dot_prod_with_idx_backward_cuda_v3(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, + at::Tensor q_tensor, at::Tensor index_q_offsets_tensor, at::Tensor k_tensor, at::Tensor index_k_tensor, + at::Tensor table_q_tensor, at::Tensor table_k_tensor, at::Tensor rel_idx_tensor, at::Tensor grad_q_tensor, + at::Tensor grad_k_tensor, at::Tensor grad_table_q_tensor, at::Tensor grad_table_k_tensor) +{ + const float *grad_out = grad_out_tensor.data_ptr(); + const float *q = q_tensor.data_ptr(); + const int *index_q_offsets = index_q_offsets_tensor.data_ptr(); + const float *k = k_tensor.data_ptr(); + const int *index_k = index_k_tensor.data_ptr(); + const float *table_q = table_q_tensor.data_ptr(); + const float *table_k = table_k_tensor.data_ptr(); + const int *rel_idx = rel_idx_tensor.data_ptr(); + float *grad_q = grad_q_tensor.data_ptr(); + float *grad_k = grad_k_tensor.data_ptr(); + float *grad_table_q = grad_table_q_tensor.data_ptr(); + float *grad_table_k = grad_table_k_tensor.data_ptr(); + dot_prod_with_idx_backward_cuda_launcher_v3(N, M, h, hdim, n_max, grad_out, q, index_q_offsets, k, index_k, table_q, table_k, rel_idx, grad_q, grad_k, grad_table_q, grad_table_k); +} + + +void attention_step2_with_rel_pos_value_forward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor attn_tensor, at::Tensor v_tensor, + at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor table_tensor, at::Tensor rel_idx_tensor, at::Tensor output_tensor) +{ + const float *attn = attn_tensor.data_ptr(); + const float *v = v_tensor.data_ptr(); + const int *index0_offsets = index0_offsets_tensor.data_ptr(); + const int *index1 = index1_tensor.data_ptr(); + const float *table = table_tensor.data_ptr(); + const int *rel_idx = rel_idx_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + attention_step2_with_rel_pos_value_forward_cuda_launcher_v2(N, M, h, hdim, n_max, attn, v, index0_offsets, index1, table, rel_idx, output); +} + +void attention_step2_with_rel_pos_value_backward_cuda_v2(int N, int M, int h, int hdim, int n_max, at::Tensor grad_out_tensor, + at::Tensor index0_offsets_tensor, at::Tensor index1_tensor, at::Tensor attn_tensor, at::Tensor v_tensor, at::Tensor table_tensor, + at::Tensor rel_idx_tensor, at::Tensor grad_attn_tensor, at::Tensor grad_v_tensor, at::Tensor grad_table_tensor) +{ + const float *grad_out = grad_out_tensor.data_ptr(); + const int *index0_offsets = index0_offsets_tensor.data_ptr(); + const int *index1 = index1_tensor.data_ptr(); + const float *attn = attn_tensor.data_ptr(); + const float *v = v_tensor.data_ptr(); + const float *table = table_tensor.data_ptr(); + const int *rel_idx = rel_idx_tensor.data_ptr(); + float *grad_attn = grad_attn_tensor.data_ptr(); + float *grad_v = grad_v_tensor.data_ptr(); + float *grad_table = grad_table_tensor.data_ptr(); + attention_step2_with_rel_pos_value_backward_cuda_launcher_v2(N, M, h, hdim, n_max, grad_out, index0_offsets, index1, attn, v, table, rel_idx, grad_attn, grad_v, grad_table); +} diff --git a/Pointcept/libs/pointops2/src/sampling/sampling_cuda.cpp b/Pointcept/libs/pointops2/src/sampling/sampling_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..395b446750e6c9030b3b53f7e65e3ba2b3c01e80 --- /dev/null +++ b/Pointcept/libs/pointops2/src/sampling/sampling_cuda.cpp @@ -0,0 +1,15 @@ +#include +#include +#include +#include "sampling_cuda_kernel.h" + + +void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor) +{ + const float *xyz = xyz_tensor.data_ptr(); + const int *offset = offset_tensor.data_ptr(); + const int *new_offset = new_offset_tensor.data_ptr(); + float *tmp = tmp_tensor.data_ptr(); + int *idx = idx_tensor.data_ptr(); + furthestsampling_cuda_launcher(b, n, xyz, offset, new_offset, tmp, idx); +} diff --git a/Pointcept/libs/pointops2/src/sampling/sampling_cuda_kernel.cu b/Pointcept/libs/pointops2/src/sampling/sampling_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..eeb69da3d213ec17fc349334173de6ecfe07a784 --- /dev/null +++ b/Pointcept/libs/pointops2/src/sampling/sampling_cuda_kernel.cu @@ -0,0 +1,171 @@ +#include "../cuda_utils.h" +#include "sampling_cuda_kernel.h" + + +__device__ void __update(float *dists, int *dists_i, int idx1, int idx2) { + const float v1 = dists[idx1], v2 = dists[idx2]; + const int i1 = dists_i[idx1], i2 = dists_i[idx2]; + dists[idx1] = max(v1, v2); + dists_i[idx1] = v2 > v1 ? i2 : i1; +} + +// input xyz: (n, 3), tmp: (b, n_max) +// output idx (m) +template +__global__ void furthestsampling_cuda_kernel(const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx) +{ + __shared__ float dists[block_size]; + __shared__ int dists_i[block_size]; + + int bid = blockIdx.x; + int start_n, end_n, start_m, end_m, old; + if (bid == 0) { + start_n = 0; + end_n = offset[0]; + start_m = 0; + end_m = new_offset[0]; + old = 0; + } + else { + start_n = offset[bid - 1]; + end_n = offset[bid]; + start_m = new_offset[bid - 1]; + end_m = new_offset[bid]; + old = offset[bid - 1]; + } + + const int stride = block_size; + int tid = threadIdx.x; + if (tid == 0) idx[start_m] = start_n; + + __syncthreads(); + for (int j = start_m + 1; j < end_m; j++) + { + int besti = start_n; + float best = -1; + float x1 = xyz[old * 3 + 0]; + float y1 = xyz[old * 3 + 1]; + float z1 = xyz[old * 3 + 2]; + for (int k = start_n + tid; k < end_n; k += stride) + { + float x2 = xyz[k * 3 + 0]; + float y2 = xyz[k * 3 + 1]; + float z2 = xyz[k * 3 + 2]; + float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); + float d2 = min(d, tmp[k]); + tmp[k] = d2; + besti = d2 > best ? k : besti; + best = d2 > best ? d2 : best; + } + dists[tid] = best; + dists_i[tid] = besti; + __syncthreads(); + + if (block_size >= 1024) { + if (tid < 512) { + __update(dists, dists_i, tid, tid + 512); + } + __syncthreads(); + } + if (block_size >= 512) { + if (tid < 256) { + __update(dists, dists_i, tid, tid + 256); + } + __syncthreads(); + } + if (block_size >= 256) { + if (tid < 128) { + __update(dists, dists_i, tid, tid + 128); + } + __syncthreads(); + } + if (block_size >= 128) { + if (tid < 64) { + __update(dists, dists_i, tid, tid + 64); + } + __syncthreads(); + } + if (block_size >= 64) { + if (tid < 32) { + __update(dists, dists_i, tid, tid + 32); + } + __syncthreads(); + } + if (block_size >= 32) { + if (tid < 16) { + __update(dists, dists_i, tid, tid + 16); + } + __syncthreads(); + } + if (block_size >= 16) { + if (tid < 8) { + __update(dists, dists_i, tid, tid + 8); + } + __syncthreads(); + } + if (block_size >= 8) { + if (tid < 4) { + __update(dists, dists_i, tid, tid + 4); + } + __syncthreads(); + } + if (block_size >= 4) { + if (tid < 2) { + __update(dists, dists_i, tid, tid + 2); + } + __syncthreads(); + } + if (block_size >= 2) { + if (tid < 1) { + __update(dists, dists_i, tid, tid + 1); + } + __syncthreads(); + } + + old = dists_i[0]; + if (tid == 0) + idx[j] = old; + } +} + +void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx) +{ + unsigned int n_threads = opt_n_threads(n); + switch (n_threads) { + case 1024: + furthestsampling_cuda_kernel<1024><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 512: + furthestsampling_cuda_kernel<512><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 256: + furthestsampling_cuda_kernel<256><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 128: + furthestsampling_cuda_kernel<128><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 64: + furthestsampling_cuda_kernel<64><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 32: + furthestsampling_cuda_kernel<32><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 16: + furthestsampling_cuda_kernel<16><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 8: + furthestsampling_cuda_kernel<8><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 4: + furthestsampling_cuda_kernel<4><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 2: + furthestsampling_cuda_kernel<2><<>>(xyz, offset, new_offset, tmp, idx); + break; + case 1: + furthestsampling_cuda_kernel<1><<>>(xyz, offset, new_offset, tmp, idx); + break; + default: + furthestsampling_cuda_kernel<512><<>>(xyz, offset, new_offset, tmp, idx); + } +} diff --git a/Pointcept/libs/pointops2/src/sampling/sampling_cuda_kernel.h b/Pointcept/libs/pointops2/src/sampling/sampling_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..c903f638eb30bbf5bf01141ed2740cc0cd37452e --- /dev/null +++ b/Pointcept/libs/pointops2/src/sampling/sampling_cuda_kernel.h @@ -0,0 +1,18 @@ +#ifndef _SAMPLING_CUDA_KERNEL +#define _SAMPLING_CUDA_KERNEL +#include +#include +#include + +void furthestsampling_cuda(int b, int n, at::Tensor xyz_tensor, at::Tensor offset_tensor, at::Tensor new_offset_tensor, at::Tensor tmp_tensor, at::Tensor idx_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void furthestsampling_cuda_launcher(int b, int n, const float *xyz, const int *offset, const int *new_offset, float *tmp, int *idx); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/libs/pointops2/src/subtraction/subtraction_cuda.cpp b/Pointcept/libs/pointops2/src/subtraction/subtraction_cuda.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b343857a1671eafe5199089973e863e2ac5b618c --- /dev/null +++ b/Pointcept/libs/pointops2/src/subtraction/subtraction_cuda.cpp @@ -0,0 +1,23 @@ +#include +#include +#include +#include "subtraction_cuda_kernel.h" + + +void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor) +{ + const float *input1 = input1_tensor.data_ptr(); + const float *input2 = input2_tensor.data_ptr(); + const int *idx = idx_tensor.data_ptr(); + float *output = output_tensor.data_ptr(); + subtraction_forward_cuda_launcher(n, nsample, c, input1, input2, idx, output); +} + +void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor) +{ + const int *idx = idx_tensor.data_ptr(); + const float *grad_output = grad_output_tensor.data_ptr(); + float *grad_input1 = grad_input1_tensor.data_ptr(); + float *grad_input2 = grad_input2_tensor.data_ptr(); + subtraction_backward_cuda_launcher(n, nsample, c, idx, grad_output, grad_input1, grad_input2); +} diff --git a/Pointcept/libs/pointops2/src/subtraction/subtraction_cuda_kernel.cu b/Pointcept/libs/pointops2/src/subtraction/subtraction_cuda_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..9b8d4f752940d580ee2b49f1b2946a8d6386d11a --- /dev/null +++ b/Pointcept/libs/pointops2/src/subtraction/subtraction_cuda_kernel.cu @@ -0,0 +1,44 @@ +#include "../cuda_utils.h" +#include "subtraction_cuda_kernel.h" + + +__global__ void subtraction_forward_cuda_kernel(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { + // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= n * nsample * c) return; + const int c_idx = index % c; + const int nsample_idx = (index / c) % nsample; + const int n_idx = index / nsample / c; + const int idx_idx = n_idx * nsample + nsample_idx; + const int input1_idx = n_idx * c + c_idx; + const int input2_idx = idx[idx_idx] * c + c_idx; + output[index] = input1[input1_idx] - input2[input2_idx]; +} + +__global__ void subtraction_backward_cuda_kernel(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { + // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) + int index = blockIdx.x * blockDim.x + threadIdx.x; + if (index >= n * nsample * c) return; + const int c_idx = index % c; + const int nsample_idx = (index / c) % nsample; + const int n_idx = index / nsample / c; + const int idx_idx = n_idx * nsample + nsample_idx; + const int input1_idx = n_idx * c + c_idx; + const int input2_idx = idx[idx_idx] * c + c_idx; + atomicAdd(grad_input1 + input1_idx, grad_output[index]); + atomicAdd(grad_input2 + input2_idx, -grad_output[index]); +} + +void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output) { + // input: input1: (n, c), input2: (n, c), idx: (n, nsample), output: (n, nsample, c) + dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + subtraction_forward_cuda_kernel<<>>(n, nsample, c, input1, input2, idx, output); +} + +void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2) { + // input: grad_output: (n, nsample, c), output: grad_input1: (n, c), grad_input2: (n, c) + dim3 blocks(DIVUP(n * nsample * c, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + subtraction_backward_cuda_kernel<<>>(n, nsample, c, idx, grad_output, grad_input1, grad_input2); +} diff --git a/Pointcept/libs/pointops2/src/subtraction/subtraction_cuda_kernel.h b/Pointcept/libs/pointops2/src/subtraction/subtraction_cuda_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..856133d97bdd3dc58f29c746ff240fc9d489c22e --- /dev/null +++ b/Pointcept/libs/pointops2/src/subtraction/subtraction_cuda_kernel.h @@ -0,0 +1,20 @@ +#ifndef _SUBTRACTION_CUDA_KERNEL +#define _SUBTRACTION_CUDA_KERNEL +#include +#include +#include + +void subtraction_forward_cuda(int n, int nsample, int c, at::Tensor input1_tensor, at::Tensor input2_tensor, at::Tensor idx_tensor, at::Tensor output_tensor); +void subtraction_backward_cuda(int n, int nsample, int c, at::Tensor idx_tensor, at::Tensor grad_output_tensor, at::Tensor grad_input1_tensor, at::Tensor grad_input2_tensor); + +#ifdef __cplusplus +extern "C" { +#endif + +void subtraction_forward_cuda_launcher(int n, int nsample, int c, const float *input1, const float *input2, const int *idx, float *output); +void subtraction_backward_cuda_launcher(int n, int nsample, int c, const int *idx, const float *grad_output, float *grad_input1, float *grad_input2); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/Pointcept/pointcept/__init__.py b/Pointcept/pointcept/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Pointcept/pointcept/datasets/__init__.py b/Pointcept/pointcept/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a24e44cc46b4067a58f2516de4d0b799e2d7517a --- /dev/null +++ b/Pointcept/pointcept/datasets/__init__.py @@ -0,0 +1,23 @@ +from .defaults import DefaultDataset, ConcatDataset +from .builder import build_dataset +from .utils import point_collate_fn, collate_fn + +# indoor scene +from .s3dis import S3DISDataset +from .scannet import ScanNetDataset, ScanNet200Dataset +from .scannetpp import ScanNetPPDataset +from .scannet_pair import ScanNetPairDataset +from .arkitscenes import ArkitScenesDataset +from .structure3d import Structured3DDataset + +# outdoor scene +from .semantic_kitti import SemanticKITTIDataset +from .nuscenes import NuScenesDataset +from .waymo import WaymoDataset + +# object +from .modelnet import ModelNetDataset +from .shapenet_part import ShapeNetPartDataset + +# dataloader +from .dataloader import MultiDatasetDataloader diff --git a/Pointcept/pointcept/datasets/arkitscenes.py b/Pointcept/pointcept/datasets/arkitscenes.py new file mode 100644 index 0000000000000000000000000000000000000000..a5481bf553351b09c5f3081b95bcafc77c37f979 --- /dev/null +++ b/Pointcept/pointcept/datasets/arkitscenes.py @@ -0,0 +1,114 @@ +""" +ArkitScenes Dataset + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import glob +import numpy as np +import torch +from copy import deepcopy +from torch.utils.data import Dataset + +from pointcept.utils.logger import get_root_logger +from .builder import DATASETS +from .transform import Compose, TRANSFORMS +from .preprocessing.scannet.meta_data.scannet200_constants import VALID_CLASS_IDS_200 + + +@DATASETS.register_module() +class ArkitScenesDataset(Dataset): + def __init__( + self, + split="Training", + data_root="data/ARKitScenesMesh", + transform=None, + test_mode=False, + test_cfg=None, + loop=1, + ): + super(ArkitScenesDataset, self).__init__() + self.data_root = data_root + self.split = split + self.transform = Compose(transform) + self.loop = ( + loop if not test_mode else 1 + ) # force make loop = 1 while in test mode + self.test_mode = test_mode + self.test_cfg = test_cfg if test_mode else None + self.class2id = np.array(VALID_CLASS_IDS_200) + + if test_mode: + self.test_voxelize = TRANSFORMS.build(self.test_cfg.voxelize) + self.test_crop = TRANSFORMS.build(self.test_cfg.crop) + self.post_transform = Compose(self.test_cfg.post_transform) + self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform] + + self.data_list = self.get_data_list() + logger = get_root_logger() + logger.info( + "Totally {} x {} samples in {} set.".format( + len(self.data_list), self.loop, split + ) + ) + + def get_data_list(self): + if isinstance(self.split, str): + data_list = glob.glob(os.path.join(self.data_root, self.split, "*.pth")) + elif isinstance(self.split, list): + data_list = [] + for split in self.split: + data_list += glob.glob(os.path.join(self.data_root, split, "*.pth")) + else: + raise NotImplementedError + return data_list + + def get_data(self, idx): + data = torch.load(self.data_list[idx % len(self.data_list)]) + coord = data["coord"] + color = data["color"] + normal = data["normal"] + segment = np.zeros(coord.shape[0]) + data_dict = dict(coord=coord, normal=normal, color=color, segment=segment) + return data_dict + + def get_data_name(self, idx): + data_idx = self.data_idx[idx % len(self.data_idx)] + return os.path.basename(self.data_list[data_idx]).split(".")[0] + + def prepare_train_data(self, idx): + # load data + data_dict = self.get_data(idx) + data_dict = self.transform(data_dict) + return data_dict + + def prepare_test_data(self, idx): + # load data + data_dict = self.get_data(idx) + segment = data_dict.pop("segment") + data_dict = self.transform(data_dict) + data_dict_list = [] + for aug in self.aug_transform: + data_dict_list.append(aug(deepcopy(data_dict))) + + input_dict_list = [] + for data in data_dict_list: + data_part_list = self.test_voxelize(data) + for data_part in data_part_list: + data_part_list = self.test_crop(data_part) + input_dict_list += data_part_list + + for i in range(len(input_dict_list)): + input_dict_list[i] = self.post_transform(input_dict_list[i]) + return input_dict_list, segment + + def __getitem__(self, idx): + if self.test_mode: + return self.prepare_test_data(idx) + else: + return self.prepare_train_data(idx) + + def __len__(self): + return len(self.data_list) * self.loop diff --git a/Pointcept/pointcept/datasets/builder.py b/Pointcept/pointcept/datasets/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..1fa5f0ee71bf934d5c1bfe5c71446bfecba49f11 --- /dev/null +++ b/Pointcept/pointcept/datasets/builder.py @@ -0,0 +1,15 @@ +""" +Dataset Builder + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.utils.registry import Registry + +DATASETS = Registry("datasets") + + +def build_dataset(cfg): + """Build datasets.""" + return DATASETS.build(cfg) diff --git a/Pointcept/pointcept/datasets/dataloader.py b/Pointcept/pointcept/datasets/dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..a3c8e1da41179896eb3443e91b2c49d94b62762a --- /dev/null +++ b/Pointcept/pointcept/datasets/dataloader.py @@ -0,0 +1,112 @@ +from functools import partial +import weakref +import torch +import torch.utils.data + +import pointcept.utils.comm as comm +from pointcept.datasets.utils import point_collate_fn +from pointcept.datasets import ConcatDataset +from pointcept.utils.env import set_seed + + +class MultiDatasetDummySampler: + def __init__(self): + self.dataloader = None + + def set_epoch(self, epoch): + if comm.get_world_size() > 1: + for dataloader in self.dataloader.dataloaders: + dataloader.sampler.set_epoch(epoch) + return + + +class MultiDatasetDataloader: + """ + Multiple Datasets Dataloader, batch data from a same dataset and mix up ratio determined by loop of each sub dataset. + The overall length is determined by the main dataset (first) and loop of concat dataset. + """ + + def __init__( + self, + concat_dataset: ConcatDataset, + batch_size_per_gpu: int, + num_worker_per_gpu: int, + mix_prob=0, + seed=None, + ): + self.datasets = concat_dataset.datasets + self.ratios = [dataset.loop for dataset in self.datasets] + # reset data loop, original loop serve as ratios + for dataset in self.datasets: + dataset.loop = 1 + # determine union training epoch by main dataset + self.datasets[0].loop = concat_dataset.loop + # build sub-dataloaders + num_workers = num_worker_per_gpu // len(self.datasets) + self.dataloaders = [] + for dataset_id, dataset in enumerate(self.datasets): + if comm.get_world_size() > 1: + sampler = torch.utils.data.distributed.DistributedSampler(dataset) + else: + sampler = None + + init_fn = ( + partial( + self._worker_init_fn, + dataset_id=dataset_id, + num_workers=num_workers, + num_datasets=len(self.datasets), + rank=comm.get_rank(), + seed=seed, + ) + if seed is not None + else None + ) + self.dataloaders.append( + torch.utils.data.DataLoader( + dataset, + batch_size=batch_size_per_gpu, + shuffle=(sampler is None), + num_workers=num_worker_per_gpu, + sampler=sampler, + collate_fn=partial(point_collate_fn, mix_prob=mix_prob), + pin_memory=True, + worker_init_fn=init_fn, + drop_last=True, + persistent_workers=True, + ) + ) + self.sampler = MultiDatasetDummySampler() + self.sampler.dataloader = weakref.proxy(self) + + def __iter__(self): + iterator = [iter(dataloader) for dataloader in self.dataloaders] + while True: + for i in range(len(self.ratios)): + for _ in range(self.ratios[i]): + try: + batch = next(iterator[i]) + except StopIteration: + if i == 0: + return + else: + iterator[i] = iter(self.dataloaders[i]) + batch = next(iterator[i]) + yield batch + + def __len__(self): + main_data_loader_length = len(self.dataloaders[0]) + return ( + main_data_loader_length // self.ratios[0] * sum(self.ratios) + + main_data_loader_length % self.ratios[0] + ) + + @staticmethod + def _worker_init_fn(worker_id, num_workers, dataset_id, num_datasets, rank, seed): + worker_seed = ( + num_workers * num_datasets * rank + + num_workers * dataset_id + + worker_id + + seed + ) + set_seed(worker_seed) diff --git a/Pointcept/pointcept/datasets/defaults.py b/Pointcept/pointcept/datasets/defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..7c7accd75993e3fd198f0331f534c34b6416f314 --- /dev/null +++ b/Pointcept/pointcept/datasets/defaults.py @@ -0,0 +1,214 @@ +""" +Default Datasets + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import glob +import numpy as np +import torch +from copy import deepcopy +from torch.utils.data import Dataset +from collections.abc import Sequence + +from pointcept.utils.logger import get_root_logger +from pointcept.utils.cache import shared_dict + +from .builder import DATASETS, build_dataset +from .transform import Compose, TRANSFORMS + + +@DATASETS.register_module() +class DefaultDataset(Dataset): + VALID_ASSETS = [ + "coord", + "color", + "normal", + "strength", + "segment", + "instance", + "pose", + ] + + def __init__( + self, + split="train", + data_root="data/dataset", + transform=None, + test_mode=False, + test_cfg=None, + cache=False, + ignore_index=-1, + loop=1, + ): + super(DefaultDataset, self).__init__() + self.data_root = data_root + self.split = split + self.transform = Compose(transform) + self.cache = cache + self.ignore_index = ignore_index + self.loop = ( + loop if not test_mode else 1 + ) # force make loop = 1 while in test mode + self.test_mode = test_mode + self.test_cfg = test_cfg if test_mode else None + + if test_mode: + self.test_voxelize = TRANSFORMS.build(self.test_cfg.voxelize) + self.test_crop = ( + TRANSFORMS.build(self.test_cfg.crop) if self.test_cfg.crop else None + ) + self.post_transform = Compose(self.test_cfg.post_transform) + self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform] + + self.data_list = self.get_data_list() + logger = get_root_logger() + logger.info( + "Totally {} x {} samples in {} set.".format( + len(self.data_list), self.loop, split + ) + ) + + def get_data_list(self): + if isinstance(self.split, str): + data_list = glob.glob(os.path.join(self.data_root, self.split, "*")) + elif isinstance(self.split, Sequence): + data_list = [] + for split in self.split: + data_list += glob.glob(os.path.join(self.data_root, split, "*")) + else: + raise NotImplementedError + return data_list + + def get_data(self, idx): + data_path = self.data_list[idx % len(self.data_list)] + name = self.get_data_name(idx) + if self.cache: + cache_name = f"pointcept-{name}" + return shared_dict(cache_name) + + data_dict = {} + assets = os.listdir(data_path) + for asset in assets: + if not asset.endswith(".npy"): + continue + if asset[:-4] not in self.VALID_ASSETS: + continue + data_dict[asset[:-4]] = np.load(os.path.join(data_path, asset)) + data_dict["name"] = name + + if "coord" in data_dict.keys(): + data_dict["coord"] = data_dict["coord"].astype(np.float32) + + if "color" in data_dict.keys(): + data_dict["color"] = data_dict["color"].astype(np.float32) + + if "normal" in data_dict.keys(): + data_dict["normal"] = data_dict["normal"].astype(np.float32) + + if "segment" in data_dict.keys(): + data_dict["segment"] = data_dict["segment"].reshape([-1]).astype(np.int32) + else: + data_dict["segment"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + + if "instance" in data_dict.keys(): + data_dict["instance"] = data_dict["instance"].reshape([-1]).astype(np.int32) + else: + data_dict["instance"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + return data_dict + + def get_data_name(self, idx): + return os.path.basename(self.data_list[idx % len(self.data_list)]) + + def prepare_train_data(self, idx): + # load data + data_dict = self.get_data(idx) + data_dict = self.transform(data_dict) + return data_dict + + def prepare_test_data(self, idx): + # load data + data_dict = self.get_data(idx) + data_dict = self.transform(data_dict) + result_dict = dict(segment=data_dict.pop("segment"), name=data_dict.pop("name")) + if "origin_segment" in data_dict: + assert "inverse" in data_dict + result_dict["origin_segment"] = data_dict.pop("origin_segment") + result_dict["inverse"] = data_dict.pop("inverse") + + data_dict_list = [] + for aug in self.aug_transform: + data_dict_list.append(aug(deepcopy(data_dict))) + + fragment_list = [] + for data in data_dict_list: + if self.test_voxelize is not None: + data_part_list = self.test_voxelize(data) + else: + data["index"] = np.arange(data["coord"].shape[0]) + data_part_list = [data] + for data_part in data_part_list: + if self.test_crop is not None: + data_part = self.test_crop(data_part) + else: + data_part = [data_part] + fragment_list += data_part + + for i in range(len(fragment_list)): + fragment_list[i] = self.post_transform(fragment_list[i]) + result_dict["fragment_list"] = fragment_list + return result_dict + + def __getitem__(self, idx): + if self.test_mode: + return self.prepare_test_data(idx) + else: + return self.prepare_train_data(idx) + + def __len__(self): + return len(self.data_list) * self.loop + + +@DATASETS.register_module() +class ConcatDataset(Dataset): + def __init__(self, datasets, loop=1): + super(ConcatDataset, self).__init__() + self.datasets = [build_dataset(dataset) for dataset in datasets] + self.loop = loop + self.data_list = self.get_data_list() + logger = get_root_logger() + logger.info( + "Totally {} x {} samples in the concat set.".format( + len(self.data_list), self.loop + ) + ) + + def get_data_list(self): + data_list = [] + for i in range(len(self.datasets)): + data_list.extend( + zip( + np.ones(len(self.datasets[i])) * i, np.arange(len(self.datasets[i])) + ) + ) + return data_list + + def get_data(self, idx): + dataset_idx, data_idx = self.data_list[idx % len(self.data_list)] + return self.datasets[dataset_idx][data_idx] + + def get_data_name(self, idx): + dataset_idx, data_idx = self.data_list[idx % len(self.data_list)] + return self.datasets[dataset_idx].get_data_name(data_idx) + + def __getitem__(self, idx): + return self.get_data(idx) + + def __len__(self): + return len(self.data_list) * self.loop diff --git a/Pointcept/pointcept/datasets/modelnet.py b/Pointcept/pointcept/datasets/modelnet.py new file mode 100644 index 0000000000000000000000000000000000000000..213f3ed2dfe4d788380824b87355e6fcae1ee531 --- /dev/null +++ b/Pointcept/pointcept/datasets/modelnet.py @@ -0,0 +1,150 @@ +""" +ModelNet40 Dataset + +get sampled point clouds of ModelNet40 (XYZ and normal from mesh, 10k points per shape) +at "https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip" + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import numpy as np +import pointops +import torch +from torch.utils.data import Dataset +from copy import deepcopy + + +from pointcept.utils.logger import get_root_logger +from .builder import DATASETS +from .transform import Compose + + +@DATASETS.register_module() +class ModelNetDataset(Dataset): + def __init__( + self, + split="train", + data_root="data/modelnet40", + class_names=None, + transform=None, + num_points=8192, + uniform_sampling=True, + save_record=True, + test_mode=False, + test_cfg=None, + loop=1, + ): + super().__init__() + self.data_root = data_root + self.class_names = dict(zip(class_names, range(len(class_names)))) + self.split = split + self.num_point = num_points + self.uniform_sampling = uniform_sampling + self.transform = Compose(transform) + self.loop = ( + loop if not test_mode else 1 + ) # force make loop = 1 while in test mode + self.test_mode = test_mode + self.test_cfg = test_cfg if test_mode else None + if test_mode: + self.post_transform = Compose(self.test_cfg.post_transform) + self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform] + + self.data_list = self.get_data_list() + logger = get_root_logger() + logger.info( + "Totally {} x {} samples in {} set.".format( + len(self.data_list), self.loop, split + ) + ) + + # check, prepare record + record_name = f"modelnet40_{self.split}" + if num_points is not None: + record_name += f"_{num_points}points" + if uniform_sampling: + record_name += "_uniform" + record_path = os.path.join(self.data_root, f"{record_name}.pth") + if os.path.isfile(record_path): + logger.info(f"Loading record: {record_name} ...") + self.data = torch.load(record_path) + else: + logger.info(f"Preparing record: {record_name} ...") + self.data = {} + for idx in range(len(self.data_list)): + data_name = self.data_list[idx] + logger.info(f"Parsing data [{idx}/{len(self.data_list)}]: {data_name}") + self.data[data_name] = self.get_data(idx) + if save_record: + torch.save(self.data, record_path) + + def get_data(self, idx): + data_idx = idx % len(self.data_list) + data_name = self.data_list[data_idx] + if data_name in self.data.keys(): + return self.data[data_name] + else: + data_shape = "_".join(data_name.split("_")[0:-1]) + data_path = os.path.join( + self.data_root, data_shape, self.data_list[data_idx] + ".txt" + ) + data = np.loadtxt(data_path, delimiter=",").astype(np.float32) + if self.num_point is not None: + if self.uniform_sampling: + with torch.no_grad(): + mask = pointops.farthest_point_sampling( + torch.tensor(data).float().cuda(), + torch.tensor([len(data)]).long().cuda(), + torch.tensor([self.num_point]).long().cuda(), + ) + data = data[mask.cpu()] + else: + data = data[: self.num_point] + coord, normal = data[:, 0:3], data[:, 3:6] + category = np.array([self.class_names[data_shape]]) + return dict(coord=coord, normal=normal, category=category) + + def get_data_list(self): + assert isinstance(self.split, str) + split_path = os.path.join( + self.data_root, "modelnet40_{}.txt".format(self.split) + ) + data_list = np.loadtxt(split_path, dtype="str") + return data_list + + def get_data_name(self, idx): + data_idx = idx % len(self.data_list) + return self.data_list[data_idx] + + def __getitem__(self, idx): + if self.test_mode: + return self.prepare_test_data(idx) + else: + return self.prepare_train_data(idx) + + def __len__(self): + return len(self.data_list) * self.loop + + def prepare_train_data(self, idx): + data_dict = self.get_data(idx) + data_dict = self.transform(data_dict) + return data_dict + + def prepare_test_data(self, idx): + assert idx < len(self.data_list) + data_dict = self.get_data(idx) + category = data_dict.pop("category") + data_dict = self.transform(data_dict) + data_dict_list = [] + for aug in self.aug_transform: + data_dict_list.append(aug(deepcopy(data_dict))) + for i in range(len(data_dict_list)): + data_dict_list[i] = self.post_transform(data_dict_list[i]) + data_dict = dict( + voting_list=data_dict_list, + category=category, + name=self.get_data_name(idx), + ) + return data_dict diff --git a/Pointcept/pointcept/datasets/nuscenes.py b/Pointcept/pointcept/datasets/nuscenes.py new file mode 100644 index 0000000000000000000000000000000000000000..d3ab8f092fa8854baa14bad71ab5588bb402dc0c --- /dev/null +++ b/Pointcept/pointcept/datasets/nuscenes.py @@ -0,0 +1,125 @@ +""" +nuScenes Dataset + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Zheng Zhang +Please cite our work if the code is helpful to you. +""" + +import os +import numpy as np +from collections.abc import Sequence +import pickle + +from .builder import DATASETS +from .defaults import DefaultDataset + + +@DATASETS.register_module() +class NuScenesDataset(DefaultDataset): + def __init__(self, sweeps=10, ignore_index=-1, **kwargs): + self.sweeps = sweeps + self.ignore_index = ignore_index + self.learning_map = self.get_learning_map(ignore_index) + super().__init__(ignore_index=ignore_index, **kwargs) + + def get_info_path(self, split): + assert split in ["train", "val", "test"] + if split == "train": + return os.path.join( + self.data_root, "info", f"nuscenes_infos_{self.sweeps}sweeps_train.pkl" + ) + elif split == "val": + return os.path.join( + self.data_root, "info", f"nuscenes_infos_{self.sweeps}sweeps_val.pkl" + ) + elif split == "test": + return os.path.join( + self.data_root, "info", f"nuscenes_infos_{self.sweeps}sweeps_test.pkl" + ) + else: + raise NotImplementedError + + def get_data_list(self): + if isinstance(self.split, str): + info_paths = [self.get_info_path(self.split)] + elif isinstance(self.split, Sequence): + info_paths = [self.get_info_path(s) for s in self.split] + else: + raise NotImplementedError + data_list = [] + for info_path in info_paths: + with open(info_path, "rb") as f: + info = pickle.load(f) + data_list.extend(info) + return data_list + + def get_data(self, idx): + data = self.data_list[idx % len(self.data_list)] + lidar_path = os.path.join(self.data_root, "raw", data["lidar_path"]) + points = np.fromfile(str(lidar_path), dtype=np.float32, count=-1).reshape( + [-1, 5] + ) + coord = points[:, :3] + strength = points[:, 3].reshape([-1, 1]) / 255 # scale strength to [0, 1] + + if "gt_segment_path" in data.keys(): + gt_segment_path = os.path.join( + self.data_root, "raw", data["gt_segment_path"] + ) + segment = np.fromfile( + str(gt_segment_path), dtype=np.uint8, count=-1 + ).reshape([-1]) + segment = np.vectorize(self.learning_map.__getitem__)(segment).astype( + np.int64 + ) + else: + segment = np.ones((points.shape[0],), dtype=np.int64) * self.ignore_index + data_dict = dict( + coord=coord, + strength=strength, + segment=segment, + name=self.get_data_name(idx), + ) + return data_dict + + def get_data_name(self, idx): + # return data name for lidar seg, optimize the code when need to support detection + return self.data_list[idx % len(self.data_list)]["lidar_token"] + + @staticmethod + def get_learning_map(ignore_index): + learning_map = { + 0: ignore_index, + 1: ignore_index, + 2: 6, + 3: 6, + 4: 6, + 5: ignore_index, + 6: 6, + 7: ignore_index, + 8: ignore_index, + 9: 0, + 10: ignore_index, + 11: ignore_index, + 12: 7, + 13: ignore_index, + 14: 1, + 15: 2, + 16: 2, + 17: 3, + 18: 4, + 19: ignore_index, + 20: ignore_index, + 21: 5, + 22: 8, + 23: 9, + 24: 10, + 25: 11, + 26: 12, + 27: 13, + 28: 14, + 29: ignore_index, + 30: 15, + 31: ignore_index, + } + return learning_map diff --git a/Pointcept/pointcept/datasets/preprocessing/arkitscenes/preprocess_arkitscenes_mesh.py b/Pointcept/pointcept/datasets/preprocessing/arkitscenes/preprocess_arkitscenes_mesh.py new file mode 100644 index 0000000000000000000000000000000000000000..9bc9b3e47a35f5baa00bcf0f526d5d986b28494e --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/arkitscenes/preprocess_arkitscenes_mesh.py @@ -0,0 +1,87 @@ +""" +Preprocessing ArkitScenes +""" + +import os +import argparse +import glob +import plyfile +import numpy as np +import pandas as pd +import multiprocessing as mp +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat + +import torch + + +def read_plymesh(filepath): + """Read ply file and return it as numpy array. Returns None if emtpy.""" + with open(filepath, "rb") as f: + plydata = plyfile.PlyData.read(f) + if plydata.elements: + vertices = pd.DataFrame(plydata["vertex"].data).values + faces = np.stack(plydata["face"].data["vertex_indices"], axis=0) + return vertices, faces + + +def face_normal(vertex, face): + v01 = vertex[face[:, 1]] - vertex[face[:, 0]] + v02 = vertex[face[:, 2]] - vertex[face[:, 0]] + vec = np.cross(v01, v02) + length = np.sqrt(np.sum(vec**2, axis=1, keepdims=True)) + 1.0e-8 + nf = vec / length + area = length * 0.5 + return nf, area + + +def vertex_normal(vertex, face): + nf, area = face_normal(vertex, face) + nf = nf * area + + nv = np.zeros_like(vertex) + for i in range(face.shape[0]): + nv[face[i]] += nf[i] + + length = np.sqrt(np.sum(nv**2, axis=1, keepdims=True)) + 1.0e-8 + nv = nv / length + return nv + + +def parse_scene(scene_path, output_dir): + print(f"Parsing scene {scene_path}") + split = os.path.basename(os.path.dirname(os.path.dirname(scene_path))) + scene_id = os.path.basename(os.path.dirname(scene_path)) + vertices, faces = read_plymesh(scene_path) + coords = vertices[:, :3] + colors = vertices[:, 3:6] + data_dict = dict(coord=coords, color=colors, scene_id=scene_id) + data_dict["normal"] = vertex_normal(coords, faces) + torch.save(data_dict, os.path.join(output_dir, split, f"{scene_id}.pth")) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet dataset containing scene folders", + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where train/val folders will be located", + ) + opt = parser.parse_args() + # Create output directories + train_output_dir = os.path.join(opt.output_root, "Training") + os.makedirs(train_output_dir, exist_ok=True) + val_output_dir = os.path.join(opt.output_root, "Validation") + os.makedirs(val_output_dir, exist_ok=True) + # Load scene paths + scene_paths = sorted(glob.glob(opt.dataset_root + "/3dod/*/*/*_mesh.ply")) + # Preprocess data. + pool = ProcessPoolExecutor(max_workers=mp.cpu_count()) + # pool = ProcessPoolExecutor(max_workers=1) + print("Processing scenes...") + _ = list(pool.map(parse_scene, scene_paths, repeat(opt.output_root))) diff --git a/Pointcept/pointcept/datasets/preprocessing/matterport3d/meta_data/category_mapping.tsv b/Pointcept/pointcept/datasets/preprocessing/matterport3d/meta_data/category_mapping.tsv new file mode 100644 index 0000000000000000000000000000000000000000..177c75ae021287bfc2462ae487b042886313cab1 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/matterport3d/meta_data/category_mapping.tsv @@ -0,0 +1,1660 @@ +index raw_category category count nyuId nyu40id eigen13id nyuClass nyu40class eigen13class ModelNet40 ModelNet10 ShapeNetCore55 synsetoffset wnsynsetid wnsynsetkey mpcat40index mpcat40 +1 wall wall 7667 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +2 door door 2544 28 8 12 door door Wall door n03221720 door.n.01 4 door +3 ceiling ceiling 2363 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +4 floor floor 2252 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +5 picture picture 2125 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +6 window window 2013 59 9 13 window window Window n04587648 window.n.01 9 window +7 chair chair 1947 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +8 doorframe door frame 1935 28 8 12 door door Wall door n03221720 door.n.01 4 door +9 remove remove 1622 0 0 0 void void void 0 void +10 pillow pillow 1335 119 18 7 pillow pillow Objects pillow 03938244 n03938244 pillow.n.01 8 cushion +11 object object 1145 40 7 otherprop Objects n00002684 object.n.01 39 objects +12 light light 967 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +13 cabinet cabinet 785 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +14 curtain curtain 692 89 16 13 curtain curtain Window curtain n03151077 curtain.n.01 12 curtain +15 table table 691 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +16 plant plant 632 82 40 7 plant otherprop Objects plant n00017222 plant.n.02 14 plant +17 decoration decoration 532 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +18 window frame window frame 526 59 9 13 window window Window n04589593 window_frame.n.01 9 window +19 lamp lamp 540 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +20 mirror mirror 488 122 19 7 mirror mirror Objects n03773035 mirror.n.01 21 mirror +21 towel towel 437 135 27 7 towel towel Objects n04459362 towel.n.01 20 towel +22 sink sink 417 24 34 7 sink sink Objects sink n04223580 sink.n.01 15 sink +23 shelf shelf 417 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +24 couch couch 290 83 6 9 sofa sofa Sofa sofa sofa sofa 04256520 n04256520 sofa.n.01 10 sofa +25 dining chair dining chair 277 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +26 bed bed 270 157 4 1 bed bed Bed bed bed bed 02818832 n02818832 bed.n.01 11 bed +27 nightstand nightstand 275 158 32 6 night stand night stand Furniture night_stand night_stand n03015254 chest_of_drawers.n.01 13 chest_of_drawers +28 toilet toilet 268 124 33 7 toilet toilet Objects toilet toilet n04446276 toilet.n.01 18 toilet +29 sofa chair sofa chair 262 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +30 column pillar 243 94 38 7 column otherstructure Objects n03073977 column.n.07 24 column +31 handrail handrail 241 453 38 7 banister otherstructure Objects n02788148 bannister.n.02 30 railing +32 stair stair 241 215 38 7 stairs otherstructure Objects stairs n04314914 step.n.04 16 stairs +33 stool stool 232 150 40 7 stool otherprop Objects stool n04326896 stool.n.01 19 stool +34 armchair armchair 224 5 5 4 chair chair Chair chair chair chair 03001627 n02738535 armchair.n.01 3 chair +35 kitchen cabinet kitchen cabinet 212 3 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 7 cabinet +36 vase vase 205 78 40 7 vase otherprop Objects vase jar 03593526 n04522168 vase.n.01 39 objects +37 cushion cushion 204 119 18 7 pillow pillow Objects n03151500 cushion.n.03 8 cushion +38 tv tv 204 172 25 11 television television TV tv or monitor 03211117 n03211117 display.n.06 22 tv_monitor +39 door frame door frame 190 28 8 12 door door Wall door n03221720 door.n.01 4 door +40 unknown unknown 186 20 40 7 unknown otherprop Objects n08632096 unknown.n.01 41 unlabeled +41 pot pot 185 16 40 7 pot otherprop Objects n03991062 pot.n.04 39 objects +42 desk desk 187 36 14 10 desk desk Table desk desk table 04379243 n03179701 desk.n.01 5 table +43 painting picture 194 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +44 roof roof 173 4 22 3 ceiling ceiling Ceiling n04105068 roof.n.01 17 ceiling +45 box box 172 26 29 7 box box Objects n02883344 box.n.01 39 objects +46 shower wall shower wall 168 21 1 12 wall wall Wall n04208936 shower.n.01 23 shower +47 coffee table coffee table 159 356 39 6 coffee table otherfurniture Furniture table table table 04379243 n03063968 coffee_table.n.01 5 table +48 countertop countertop 159 7 12 6 counter counter Furniture n03118245 countertop.n.01 26 counter +49 bench bench 154 204 39 6 bench otherfurniture Furniture bench bench 02828884 n02828884 bench.n.01 34 seating +50 wall frame picture 142 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +51 trash can trashcan 131 12 39 6 garbage bin otherfurniture Furniture trash_bin 02747177 n02747177 ashcan.n.01 39 objects +52 fireplace fireplace 128 372 38 7 fireplace otherstructure Objects n03346455 fireplace.n.01 27 fireplace +53 clothes clothes 125 141 21 7 clothes clothes Objects n02728440 apparel.n.01 38 clothes +54 pillar pillar 117 94 38 7 column otherstructure Objects n03073977 column.n.07 24 column +55 bathtub bathtub 121 136 36 7 bathtub bathtub Objects bathtub bathtub tub 02808440 n02808440 bathtub.n.01 25 bathtub +56 ceiling duct ceiling duct 110 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +57 bath cabinet bath cabinet 111 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +58 book book 104 1 23 2 book books Books n02870526 book.n.11 39 objects +59 beam beam 101 38 7 otherstructure Objects n02815950 beam.n.02 29 beam +60 vent vent 101 25 38 7 air vent otherstructure Objects n04526241 vent.n.01 40 misc +61 shower floor shower floor 97 11 2 5 floor floor Floor n04208936 shower.n.01 23 shower +62 faucet faucet 97 9 40 7 faucet otherprop Objects faucet 03325088 n03325088 faucet.n.01 39 objects +63 photo photo 100 508 40 7 photo otherprop Objects n03925226 photograph.n.01 6 picture +64 delete remove 90 0 0 0 void void void 0 void +65 toilet paper toilet paper 90 139 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 39 objects +66 counter counter 91 7 12 6 counter counter Furniture table table table 04379243 n03116530 counter.n.01 26 counter +67 fan fan 85 74 40 7 fan otherprop Objects n03320046 fan.n.01 39 objects +68 step step 86 38 7 otherstructure Objects n04314914 step.n.04 16 stairs +69 table lamp table lamp 96 144 35 7 lamp lamp Objects lamp lamp 03636649 n04380533 table_lamp.n.01 28 lighting +70 wall / other room wall /otherroom 84 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +71 washbasin washbasin 91 24 34 7 sink sink Objects sink n04553920 washbasin.n.01 15 sink +72 rail railing 83 497 38 7 railing otherstructure Objects n04047401 railing.n.01 30 railing +73 side table table 85 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +74 decor decoration 79 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +75 shelves shelving 79 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +76 statue statue 78 294 40 7 sculpture otherprop Objects n04306847 statue.n.01 39 objects +77 dresser dresser 79 169 17 6 dresser dresser Furniture dresser dresser n03015254 chest_of_drawers.n.01 13 chest_of_drawers +78 stairs stair 76 215 38 7 stairs otherstructure Objects stairs n04314914 step.n.04 16 stairs +79 rug rug 77 130 40 7 rug floor mat Objects n04118021 rug.n.01 2 floor +80 ottoman ottoman 79 359 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 19 stool +81 round table table 74 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +82 bottle bottle 101 2 40 7 bottle otherprop Objects bottle bottle 02876657 n02876657 bottle.n.01 39 objects +83 kitchen counter kitchen counter 73 7 12 6 counter counter Furniture table table table 04379243 n03116530 counter.n.01 26 counter +84 windowframe window frame 66 59 9 13 window window Window n04589593 window_frame.n.01 9 window +85 office chair office chair 70 5 5 4 chair chair Chair chair chair chair 03001627 n04373704 swivel_chair.n.01 3 chair +86 frame frame 66 38 7 otherstructure Objects 40 misc +87 refrigerator refrigerator 65 17 24 6 refridgerator refridgerator Furniture n04070727 refrigerator.n.01 37 appliances +88 bookshelf bookshelf 63 88 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 02871439 n02871439 bookshelf.n.01 31 shelving +89 end table end table 67 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +90 wardrobe wardrobe 64 772 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 36 furniture +91 toiletry toiletry 63 40 7 otherprop Objects n04447443 toiletry.n.01 39 objects +92 windowsill window frame 62 59 9 13 window window Window n04589593 window_frame.n.01 9 window +93 pipe pipe 62 41 40 7 pipe otherprop Objects n03944672 pipe.n.02 40 misc +94 monitor monitor 60 49 40 7 monitor otherprop Objects monitor monitor tv or monitor 03211117 n03782190 monitor.n.04 22 tv_monitor +95 stand stand 62 50 39 6 stand otherfurniture Furniture table table table 04379243 n04301000 stand.n.04 5 table +96 drawers drawer 60 174 39 6 drawer otherfurniture Furniture n03233905 drawer.n.01 13 chest_of_drawers +97 dining table dining table 61 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +98 container container 59 140 40 7 container otherprop Objects n03094503 container.n.01 39 objects +99 light switch light switch 59 301 38 7 light switch otherstructure Objects n04372370 switch.n.01 39 objects +100 skylight skylight 57 59 9 13 window window Window n04232800 skylight.n.01 9 window +101 purse purse 57 181 40 7 purse otherprop Objects n02774152 bag.n.04 39 objects +102 wall /otherroom wall /otherroom 55 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +103 sofa couch 56 83 6 9 sofa sofa Sofa sofa sofa sofa 04256520 n04256520 sofa.n.01 10 sofa +104 books book 53 1 23 2 book books Books n02870526 book.n.11 39 objects +105 doorway doorway 54 609 38 7 door way otherstructure Objects door n03224032 doorway.n.01 4 door +106 railing railing 52 497 38 7 railing otherstructure Objects n04047401 railing.n.01 30 railing +107 wainscotting paneling 52 21 1 12 wall wall Wall n03882611 paneling.n.01 1 wall +108 basket basket 52 39 40 7 basket otherprop Objects basket 02801938 n02801938 basket.n.01 39 objects +109 closet closet 50 40 7 otherprop Objects n03148324 cupboard.n.01 7 cabinet +110 arch arch 48 40 7 otherprop Objects n02733524 arch.n.04 40 misc +111 chandelier chandelier 48 342 38 7 chandelier otherstructure Objects n03005285 chandelier.n.01 28 lighting +112 oven oven 47 238 38 7 oven otherstructure Objects n03862676 oven.n.01 37 appliances +113 clock clock 47 56 40 7 clock otherprop Objects clock 03046257 n03046257 clock.n.01 39 objects +114 footstool footstool 48 359 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 19 stool +115 stove stove 44 242 38 7 stove otherstructure Objects stove 04330267 n04330267 stove.n.02 37 appliances +116 trashcan trashcan 44 12 39 6 garbage bin otherfurniture Furniture trash_bin 02747177 n02747177 ashcan.n.01 39 objects +117 drawer drawer 45 174 39 6 drawer otherfurniture Furniture n03233905 drawer.n.01 13 chest_of_drawers +118 bathroom countertop object object 43 40 7 otherprop Objects n00002684 object.n.01 39 objects +119 washing machine washing machine 43 278 39 6 washing machine otherfurniture Furniture washing_machine 04554684 n04554684 washer.n.03 37 appliances +120 shower curtain shower curtain 43 123 28 7 shower curtain shower curtain Objects curtain n04209239 shower_curtain.n.01 12 curtain +121 rack rack 41 50 39 6 stand otherfurniture Furniture n04038440 rack.n.05 31 shelving +122 art picture 40 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +123 firealarm fire alarm 37 338 40 7 fire alarm otherprop Objects n03343737 fire_alarm.n.02 39 objects +124 bin bin 37 307 40 7 bin otherprop Objects n02839910 bin.n.01 39 objects +125 chest chest 36 344 39 6 chest otherfurniture Furniture dresser dresser 40 misc +126 microwave microwave 36 13 40 7 microwave otherprop Objects microwave 03761084 n03761084 microwave.n.02 37 appliances +127 blinds blinds 36 80 13 13 blinds blinds Window n04589190 window_blind.n.01 32 blinds +128 bowl bowl 35 22 40 7 bowl otherprop Objects bowl bowl 02880940 n02880940 bowl.n.03 39 objects +129 ceiling pipe ceiling pipe 34 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +130 tree tree 34 82 40 7 plant otherprop Objects plant n13104059 tree.n.01 14 plant +131 vanity vanity 34 169 17 6 dresser dresser Furniture dresser dresser table 04379243 n03238586 dressing_table.n.01 5 table +132 ceiling fan ceiling fan 37 74 40 7 fan otherprop Objects n03320046 fan.n.01 39 objects +133 tissue box tissue box 34 138 40 7 tissue box otherprop Objects n02883344 box.n.01 39 objects +134 desk chair desk chair 34 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +135 plate plate 32 233 40 7 plate otherprop Objects n03959485 plate.n.04 39 objects +136 tv stand tv stand 32 291 39 6 tv stand otherfurniture Furniture tv_stand n03290653 entertainment_center.n.01 36 furniture +137 shoes shoe 32 149 40 7 shoe otherprop Objects n04199027 shoe.n.01 39 objects +138 heater heater 32 111 39 6 heater otherfurniture Furniture n03508101 heater.n.01 39 objects +139 bedframe bedframe 33 157 4 1 bed bed Bed n02822579 bedstead.n.01 11 bed +140 headboard headboard 33 161 39 6 headboard otherfurniture Furniture n03502200 headboard.n.01 11 bed +141 post post 32 94 38 7 column otherstructure Objects n03988170 post.n.04 24 column +142 swivel chair swivel chair 31 5 5 4 chair chair Chair chair chair chair 03001627 n04373704 swivel_chair.n.01 3 chair +143 pedestal pedestal 31 50 39 6 stand otherfurniture Furniture 40 misc +144 fence fence 31 38 7 otherstructure Objects n03327234 fence.n.01 40 misc +145 ceiling pipes ceiling pipe 30 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +146 pew pew 28 204 39 6 bench otherfurniture Furniture bench bench 02828884 n03920867 pew.n.01 34 seating +147 decorative object decoration 28 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +148 bucket bucket 28 427 40 7 bucket otherprop Objects n02909870 bucket.n.01 39 objects +149 mask decorative mask 27 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +150 candle candle 27 137 40 7 candle otherprop Objects lamp n02948072 candle.n.01 39 objects +151 flowerpot flowerpot 28 146 40 7 flower pot otherprop Objects flower_pot flower pot 03991062 n03991062 pot.n.04 39 objects +152 speaker speaker 27 54 40 7 speaker otherprop Objects speaker 03691459 n03691459 loudspeaker.n.01 39 objects +153 seat seat 26 524 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 34 seating +154 sign sign 25 208 40 7 sign otherprop Objects n04217882 signboard.n.01 40 misc +155 air conditioner air conditioner 26 79 38 7 air conditioner otherstructure Objects n02686379 air_conditioner.n.01 39 objects +156 shower curtain rod shower curtain rod 25 40 7 otherprop Objects n04100174 rod.n.01 12 curtain +157 unknown / other room unknown /otherroom 24 20 40 7 unknown otherprop Objects n08632096 unknown.n.01 41 unlabeled +158 flowers plant 25 82 40 7 plant otherprop Objects plant n00017222 plant.n.02 14 plant +159 clutter clutter 24 40 7 otherprop Objects 40 misc +160 pillows pillow 24 119 18 7 pillow pillow Objects pillow 03938244 n03938244 pillow.n.01 8 cushion +161 plants plant 24 82 40 7 plant otherprop Objects plant n00017222 plant.n.02 14 plant +162 wall \other room wall /otherroom 24 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +163 fire extinguisher fire extinguisher 24 10 40 7 fire extinguisher otherprop Objects n03345837 fire_extinguisher.n.01 39 objects +164 towels towel 24 135 27 7 towel towel Objects n04459362 towel.n.01 20 towel +165 curtains curtain 23 89 16 13 curtain curtain Window curtain n03151077 curtain.n.01 12 curtain +166 curtain rod curtain rod 23 582 38 7 curtain rod otherstructure Objects n04100174 rod.n.01 12 curtain +167 kitchen countertop object object 23 40 7 otherprop Objects n00002684 object.n.01 39 objects +168 mat mat 23 143 20 5 floor mat floor mat Floor n03727837 mat.n.01 2 floor +169 flower plant 23 82 40 7 plant otherprop Objects plant n00017222 plant.n.02 14 plant +170 sculpture sculpture 23 294 40 7 sculpture otherprop Objects n04157320 sculpture.n.01 39 objects +171 shelving shelving 22 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +172 wall/other room wall /otherroom 22 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +173 knickknack object 21 40 7 otherprop Objects n00002684 object.n.01 39 objects +174 printer printer 21 66 40 7 printer otherprop Objects printer 04004475 n04004475 printer.n.03 39 objects +175 wall behind wall /otherroom 21 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +176 telephone telephone 21 32 40 7 telephone otherprop Objects telephone 04401088 n04401088 telephone.n.01 39 objects +177 bedside table nightstand 21 158 32 6 night stand night stand Furniture night_stand night_stand n03015254 chest_of_drawers.n.01 13 chest_of_drawers +178 moulding molding 21 38 7 otherstructure Objects n02800354 baseboard.n.01 1 wall +179 handbag handbag 21 40 7 otherprop Objects n02774152 bag.n.04 39 objects +180 wall /other room wall /otherroom 21 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +181 blanket blanket 21 312 40 7 blanket otherprop Objects n02849154 blanket.n.01 39 objects +182 shower shower 21 38 7 otherstructure Objects n04208936 shower.n.01 23 shower +183 steps step 20 38 7 otherstructure Objects n04314914 step.n.04 16 stairs +184 switch switch 21 40 7 otherprop Objects n04372370 switch.n.01 39 objects +185 toilet paper dispenser toilet paper dispenser 20 40 7 otherprop Objects 40 misc +186 objects object 21 40 7 otherprop Objects n00002684 object.n.01 39 objects +187 handle handle 20 758 40 7 handle otherprop Objects n03485997 handle.n.01 39 objects +188 frame /outside frame /outside 20 38 7 otherstructure Objects 40 misc +189 screen screen 20 89 16 13 curtain curtain Window curtain n03151077 curtain.n.01 12 curtain +190 shower head showerhead 19 650 40 7 shower head otherprop Objects n04209383 showerhead.n.01 23 shower +191 baracade barricade 19 40 7 otherprop Objects n04096848 roadblock.n.02 40 misc +192 picture frame picture frame 25 64 11 8 picture picture Picture n03931765 picture_frame.n.01 6 picture +193 soap soap 19 133 40 7 soap otherprop Objects n04253437 soap.n.01 39 objects +194 staircase railing banister 18 453 38 7 banister otherstructure Objects n02788148 bannister.n.02 30 railing +195 keyboard keyboard 18 47 40 7 keyboard otherprop Objects keyboard computer keyboard 03085013 n03085013 computer_keyboard.n.01 39 objects +196 thermostat thermostat 18 110 40 7 thermostat otherprop Objects n04422875 thermostat.n.01 39 objects +197 radiator radiator 18 236 39 6 radiator otherfurniture Furniture n04041069 radiator.n.02 39 objects +198 kitchen island kitchen island 18 456 38 7 kitchen island otherstructure Objects n03620600 kitchen_island.n.01 26 counter +199 paper towel paper towel 18 113 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 20 towel +200 wall decoration picture 17 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +201 phone telephone 17 32 40 7 telephone otherprop Objects telephone 04401088 n04401088 telephone.n.01 39 objects +202 mirror frame mirror 17 122 19 7 mirror mirror Objects n03773035 mirror.n.01 21 mirror +203 clothes dryer clothes dryer 18 39 6 otherfurniture Furniture n03251766 dryer.n.01 37 appliances +204 panel panel 17 559 40 7 sheet otherprop Objects n03882058 panel.n.01 35 board_panel +205 glass glass 16 612 38 7 glass otherstructure Objects n03438257 glass.n.02 39 objects +206 soap dispenser soap dispenser 16 40 7 otherprop Objects n04254120 soap_dispenser.n.01 39 objects +207 dishwasher dishwasher 16 8 38 7 dishwasher otherstructure Objects dishwasher 03207941 n03207941 dishwasher.n.01 37 appliances +208 cup cup 16 35 40 7 cup otherprop Objects cup cup or mug 03797390 n03797390 mug.n.04 39 objects +209 bathroom cabinet bathroom cabinet 17 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +210 ladder ladder 17 48 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 16 stairs +211 garage door garage door 16 850 38 7 garage door otherstructure Objects door 4 door +212 hat hat 15 193 40 7 hat otherprop Objects n03497657 hat.n.01 38 clothes +213 chest of drawers chest of drawers 15 524 39 6 furniture otherfurniture Furniture dresser dresser n03015254 chest_of_drawers.n.01 13 chest_of_drawers +214 exit sign exit sign 15 86 40 7 exit sign otherprop Objects 40 misc +215 sidetable side table 15 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +216 office table office table 15 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +217 piano piano 15 298 39 6 piano otherfurniture Furniture piano piano 03928116 n03928116 piano.n.01 39 objects +218 painter picture 17 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +219 board board 15 408 38 7 board otherstructure Objects 35 board_panel +220 windows window 14 59 9 13 window window Window n04587648 window.n.01 9 window +221 archway archway 14 21 1 12 wall wall Wall n02734217 arch.n.03 4 door +222 rope rope 14 560 40 7 rope otherprop Objects n04108268 rope.n.01 39 objects +223 ball ball 15 60 40 7 ball otherprop Objects 40 misc +224 gym equipment gym equipment 14 457 39 6 excercise equipment otherfurniture Furniture n04285146 sports_equipment.n.01 33 gym_equipment +225 clothes hangers clothes hanger 13 211 40 7 hanger otherprop Objects n03057920 coat_hanger.n.01 39 objects +226 bathroom object object 13 40 7 otherprop Objects n00002684 object.n.01 39 objects +227 easy chair easy chair 13 5 5 4 chair chair Chair chair chair chair 03001627 n03262932 easy_chair.n.01 3 chair +228 lounge chair lounge chair 15 5 5 4 chair chair Chair chair chair chair 03001627 n03262932 easy_chair.n.01 3 chair +229 furniture furniture 13 524 39 6 furniture otherfurniture Furniture n03405725 furniture.n.01 36 furniture +230 cabinets cabinet 16 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +231 carpet carpet 14 130 40 7 rug floor mat Objects n04118021 rug.n.01 2 floor +232 food food 13 40 7 otherprop Objects n00021265 food.n.01 40 misc +233 plant pot pot 13 16 40 7 pot otherprop Objects n03991062 pot.n.04 39 objects +234 duct duct 13 40 7 otherprop Objects n03253398 duct.n.03 40 misc +235 ridge ridge 13 40 7 otherprop Objects 40 misc +236 candlestick candlestick 12 148 40 7 candlestick otherprop Objects n02948557 candlestick.n.01 39 objects +237 computer desk computer desk 12 36 14 10 desk desk Table desk desk table 04379243 n03179701 desk.n.01 5 table +238 shower door shower door 12 28 8 12 door door Wall door n04208936 shower.n.01 23 shower +239 trash trashcan 12 12 39 6 garbage bin otherfurniture Furniture trash_bin 02747177 n02747177 ashcan.n.01 39 objects +240 crown molding molding 12 38 7 otherstructure Objects n02800354 baseboard.n.01 1 wall +241 wall sconce sconce 12 62 38 7 light otherstructure Objects n04148703 sconce.n.03 28 lighting +242 door handle door handle 12 758 40 7 handle otherprop Objects 40 misc +243 scale scale 12 639 40 7 scale otherprop Objects n04141975 scale.n.07 39 objects +244 trash bin trashcan 12 12 39 6 garbage bin otherfurniture Furniture trash_bin 02747177 n02747177 ashcan.n.01 39 objects +245 baseboard baseboard 13 38 7 otherstructure Objects n02800354 baseboard.n.01 1 wall +246 window /otherroom window /otherroom 12 59 9 13 window window Window n04587648 window.n.01 9 window +247 bag bag 11 55 37 7 bag bag Objects suitcase 02773838 n02773838 bag.n.06 39 objects +248 laptop laptop 11 37 40 7 laptop otherprop Objects laptop laptop 03642806 n03642806 laptop.n.01 39 objects +249 treadmill treadmill 12 458 39 6 treadmill otherfurniture Furniture n04477387 treadmill.n.01 33 gym_equipment +250 staircase staircase 11 215 38 7 stairs otherstructure Objects n04298308 stairway.n.01 16 stairs +251 guitar guitar 11 300 40 7 guitar otherprop Objects guitar guitar 03467517 n03467517 guitar.n.01 39 objects +252 light fixture light fixture 11 62 38 7 light otherstructure Objects n03665366 light.n.02 28 lighting +253 pipes pipe 11 41 40 7 pipe otherprop Objects n03944672 pipe.n.02 40 misc +254 display case display case 11 540 39 6 display case otherfurniture Furniture n02975212 case.n.20 39 objects +255 weight machine exercise equipment 10 457 39 6 excercise equipment otherfurniture Furniture n04285146 sports_equipment.n.01 33 gym_equipment +256 toilet paper holder toilet paper holder 10 647 40 7 toilet paper holder otherprop Objects 40 misc +257 basin basin 10 24 34 7 sink sink Objects sink n04223580 sink.n.01 15 sink +258 towel bar towel bar 10 51 38 7 bar otherstructure Objects n04459909 towel_rail.n.01 39 objects +259 floor behind floor /otherroom 10 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +260 wooden chair chair 10 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +261 potted plant plant 10 82 40 7 plant otherprop Objects plant n00017222 plant.n.02 14 plant +262 ceiling / other room ceiling /otherroom 10 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +263 window sill window frame 10 59 9 13 window window Window n04589593 window_frame.n.01 9 window +264 floor / other room floor /otherroom 10 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +265 cupboard cabinet 10 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +266 tray tray 10 179 40 7 tray otherprop Objects n04476259 tray.n.01 39 objects +267 urn urn 10 151 40 7 urn otherprop Objects vase jar 03593526 n04516116 urn.n.01 39 objects +268 church seating pew 10 204 39 6 bench otherfurniture Furniture bench bench 02828884 n03920867 pew.n.01 34 seating +269 decorative plate decorative plate 9 383 40 7 decorative plate otherprop Objects 40 misc +270 doors door 9 28 8 12 door door Wall door n03221720 door.n.01 4 door +271 bar bar 9 51 38 7 bar otherstructure Objects n02788689 bar.n.03 39 objects +272 stair rail banister 9 453 38 7 banister otherstructure Objects n02788148 bannister.n.02 30 railing +273 window shade window shade 9 40 7 otherprop Objects n04590129 window_shade.n.01 32 blinds +274 grass grass 11 82 40 7 plant otherprop Objects plant n12102133 grass.n.01 14 plant +275 pool table pool table 9 515 39 6 pool table otherfurniture Furniture table table table 04379243 n03982430 pool_table.n.01 5 table +276 coat coat 9 324 40 7 jacket otherprop Objects n03057021 coat.n.01 38 clothes +277 trees tree 9 82 40 7 plant otherprop Objects plant n13104059 tree.n.01 14 plant +278 cloth cloth 11 40 7 otherprop Objects n03309808 fabric.n.01 39 objects +279 bottle of soap bottle of soap 9 502 40 7 bottle of soap otherprop Objects 40 misc +280 floor lamp floor lamp 9 144 35 7 lamp lamp Objects lamp lamp 03636649 n03367059 floor_lamp.n.01 28 lighting +281 water cooler water cooler 9 509 39 6 water cooler otherfurniture Furniture n04559166 water_cooler.n.01 39 objects +282 pews pew 9 204 39 6 bench otherfurniture Furniture bench bench 02828884 n03920867 pew.n.01 34 seating +283 ledge ledge 10 38 7 otherstructure Objects n09337253 ledge.n.01 39 objects +284 kitchen shelf kitchen shelf 9 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +285 bathroom utencils bathroom utensil 8 267 40 7 utensil otherprop Objects n04516672 utensil.n.01 39 objects +286 hanger hanger 8 211 40 7 hanger otherprop Objects n03490884 hanger.n.02 39 objects +287 shrubbery shrubbery 8 40 7 otherprop Objects n08649067 shrubbery.n.01 39 objects +288 teapot teapot 8 678 40 7 tea pot otherprop Objects n04398044 teapot.n.01 39 objects +289 bottles bottle 8 2 40 7 bottle otherprop Objects bottle bottle 02876657 n02876657 bottle.n.01 39 objects +290 exercise equipment exercise equipment 8 457 39 6 excercise equipment otherfurniture Furniture n04285146 sports_equipment.n.01 33 gym_equipment +291 boxes box 8 26 29 7 box box Objects n02883344 box.n.01 39 objects +292 locker locker 8 3 3 6 cabinet cabinet Furniture n02933462 cabinet.n.03 40 misc +293 wall cabinet wall cabinet 8 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +294 wainscotting \other room paneling /otherroom 8 21 1 12 wall wall Wall n03882611 paneling.n.01 1 wall +295 ceiling light ceiling lamp 9 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +296 ornament ornament 8 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +297 bidet bidet 8 124 33 7 toilet toilet Objects toilet toilet n02836174 bidet.n.01 18 toilet +298 shower soap shelf shower soap shelf 8 40 7 otherprop Objects 40 misc +299 window / door window/door 8 40 7 otherprop Objects 40 misc +300 stuffed animal stuffed animal 8 177 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 39 objects +301 paper towel dispenser paper towel dispenser 8 14 40 7 paper towel dispenser otherprop Objects 40 misc +302 chair bottom chair 8 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +303 fencing fencing 8 40 7 otherprop Objects n03327234 fence.n.01 40 misc +304 lampshade lampshade 8 859 40 7 lamp shade otherprop Objects n03637318 lampshade.n.01 28 lighting +305 door side door frame 12 28 8 12 door door Wall door n03221720 door.n.01 4 door +306 bust bust 7 294 40 7 sculpture otherprop Objects n02926188 bust.n.03 39 objects +307 car car 7 530 40 7 car otherprop Objects car car 02958343 n02958343 car.n.01 39 objects +308 figure figure 7 40 7 otherprop Objects 40 misc +309 sofa set sofa set 7 83 6 9 sofa sofa Sofa sofa sofa sofa 04256520 n04256520 sofa.n.01 10 sofa +310 commode toilet 7 124 33 7 toilet toilet Objects toilet toilet n04446276 toilet.n.01 18 toilet +311 toilet brush toilet brush 7 630 40 7 toilet brush otherprop Objects 40 misc +312 doll doll 7 99 40 7 doll otherprop Objects n03219135 doll.n.01 39 objects +313 drums drum 7 145 40 7 drum otherprop Objects n03249569 drum.n.01 39 objects +314 bathroom counter bathroom counter 7 7 12 6 counter counter Furniture table table table 04379243 n03116530 counter.n.01 26 counter +315 dress dress 7 40 7 otherprop Objects n03236735 dress.n.01 38 clothes +316 shower handle shower handle 7 758 40 7 handle otherprop Objects 40 misc +317 closet door closet door 7 28 8 12 door door Wall door n03221720 door.n.01 4 door +318 whiteboard whiteboard 7 45 30 7 whiteboard whiteboard Objects n03211616 display_panel.n.01 22 tv_monitor +319 garage door opener garage door opener 7 40 7 otherprop Objects 40 misc +320 range hood range hood 7 380 38 7 range hood otherstructure Objects range_hood n04053677 range_hood.n.01 39 objects +321 window curtain window curtain 7 89 16 13 curtain curtain Window curtain n03151077 curtain.n.01 12 curtain +322 easel easel 7 50 39 6 stand otherfurniture Furniture n03262809 easel.n.01 31 shelving +323 bowl of fruit bowl of fruit 7 22 40 7 bowl otherprop Objects bowl bowl 02880940 n02880940 bowl.n.03 39 objects +324 molding molding 10 38 7 otherstructure Objects n02800354 baseboard.n.01 1 wall +325 pool pool 7 38 7 otherstructure Objects n03982060 pool.n.01 40 misc +326 kitchen appliance kitchen appliance 6 40 7 otherprop Objects n03620052 kitchen_appliance.n.01 37 appliances +327 candelabra candelabra 6 605 40 7 candelabra otherprop Objects n02947818 candelabrum.n.01 39 objects +328 ceiling lamp ceiling lamp 6 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +329 toy toy 6 389 40 7 toy otherprop Objects n03964744 plaything.n.01 39 objects +330 top top 6 40 7 otherprop Objects 40 misc +331 wall art picture 6 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +332 highchair highchair 6 5 5 4 chair chair Chair chair chair chair 03001627 n03518445 highchair.n.01 3 chair +333 footrest footrest 6 163 39 6 foot rest otherfurniture Furniture stool n03380724 footstool.n.01 19 stool +334 bathroom sink sink 6 24 34 7 sink sink Objects sink n04223580 sink.n.01 15 sink +335 soap dish soap dish 6 638 40 7 soap dish otherprop Objects n04254009 soap_dish.n.01 39 objects +336 miscellaneous object object 6 40 7 otherprop Objects n00002684 object.n.01 39 objects +337 trim molding 7 38 7 otherstructure Objects n02800354 baseboard.n.01 1 wall +338 tabletop object object 6 40 7 otherprop Objects n00002684 object.n.01 39 objects +339 clothes hanger rod clothes hanger rod 6 40 7 otherprop Objects n04100174 rod.n.01 39 objects +340 altar altar 6 19 7 10 table table Table table table table 04379243 n02699629 altar.n.01 5 table +341 candles candle 6 137 40 7 candle otherprop Objects lamp n02948072 candle.n.01 39 objects +342 placemat place mat 6 154 40 7 placemat otherprop Objects n03952886 place_mat.n.01 39 objects +343 kitchen center island kitchen island 6 456 38 7 kitchen island otherstructure Objects n03620600 kitchen_island.n.01 26 counter +344 plate of food plate of food 6 40 7 otherprop Objects 40 misc +345 sheet sheet 7 559 40 7 sheet otherprop Objects 40 misc +346 wood wood 6 40 7 otherprop Objects 40 misc +347 robe robe 6 40 7 otherprop Objects n04097866 robe.n.01 38 clothes +348 bathroom stall bathroom stall 6 38 7 otherstructure Objects n02873839 booth.n.02 40 misc +349 tabletop decoration decoration 6 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +350 plush toy plush toy 6 389 40 7 toy otherprop Objects n04399382 teddy.n.01 39 objects +351 wash basin washbasin 6 24 34 7 sink sink Objects sink n04553920 washbasin.n.01 15 sink +352 celing ceiling 6 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +353 hangers hanger 6 211 40 7 hanger otherprop Objects n03490884 hanger.n.02 39 objects +354 bushes bush 6 82 40 7 plant otherprop Objects plant n13112664 shrub.n.01 14 plant +355 floor/other room floor /otherroom 6 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +356 dinner placesetting place mat 6 154 40 7 placemat otherprop Objects n03952886 place_mat.n.01 39 objects +357 curtain valence curtain valence 6 89 16 13 curtain curtain Window curtain n03151077 curtain.n.01 12 curtain +358 control control 6 40 7 otherprop Objects n03096960 control.n.09 39 objects +359 tap tap 6 9 40 7 faucet otherprop Objects faucet 03325088 n04559451 water_faucet.n.01 39 objects +360 shampoo shampoo 6 548 40 7 cleaner otherprop Objects n04183516 shampoo.n.01 39 objects +361 computer computer 7 46 40 7 computer otherprop Objects n03082979 computer.n.01 39 objects +362 massage bed massage bed 6 157 4 1 bed bed Bed bed bed bed 02818832 n02818832 bed.n.01 11 bed +363 knob knob 6 652 40 7 knob otherprop Objects 40 misc +364 door stopper door stopper 6 40 7 otherprop Objects 40 misc +365 bulletin board bulletin board 6 408 38 7 board otherstructure Objects n03211616 display_panel.n.01 22 tv_monitor +366 brick archway archway 6 21 1 12 wall wall Wall n02734217 arch.n.03 4 door +367 fruit bowl fruit bowl 6 22 40 7 bowl otherprop Objects bowl bowl 02880940 n02880940 bowl.n.03 39 objects +368 electric wire casing electric wire casing 5 40 7 otherprop Objects 40 misc +369 bookcase bookshelf 5 88 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 02871439 n02871439 bookshelf.n.01 31 shelving +370 other room unknown /otherroom 6 20 40 7 unknown otherprop Objects n08632096 unknown.n.01 41 unlabeled +371 exercise machine exercise equipment 5 457 39 6 excercise equipment otherfurniture Furniture n04285146 sports_equipment.n.01 33 gym_equipment +372 storage shelving storage shelving 5 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +373 coffee maker coffee maker 5 40 7 otherprop Objects n03063338 coffee_maker.n.01 37 appliances +374 shower ceiling shower ceiling 5 4 22 3 ceiling ceiling Ceiling n04208936 shower.n.01 23 shower +375 fire alarm fire alarm 5 338 40 7 fire alarm otherprop Objects n03343737 fire_alarm.n.02 39 objects +376 tissue paper tissue paper 5 15 26 7 paper paper Objects 40 misc +377 projector projector 5 90 40 7 projector otherprop Objects n04009552 projector.n.02 39 objects +378 coat hanger coat hanger 5 400 40 7 coat hanger otherprop Objects n03057920 coat_hanger.n.01 39 objects +379 wall cubby wall cubby 5 40 7 otherprop Objects 40 misc +380 balcony railing balcony railing 5 497 38 7 railing otherstructure Objects 40 misc +381 shelf /w clutter shelf /w clutter 5 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +382 case case 5 851 40 7 case otherprop Objects 40 misc +383 coat rack coat rack 5 40 7 otherprop Objects n03059103 coatrack.n.01 40 misc +384 pan pan 5 589 40 7 pan otherprop Objects n03880531 pan.n.01 39 objects +385 fridge refrigerator 5 17 24 6 refridgerator refridgerator Furniture n04070727 refrigerator.n.01 37 appliances +386 suitcase luggage 5 783 40 7 luggage otherprop Objects n02774630 baggage.n.01 39 objects +387 wardrobe rod closet rod 5 40 7 otherprop Objects n04100174 rod.n.01 39 objects +388 hamper clothes hamper 5 39 40 7 basket otherprop Objects basket 02801938 n03050864 clothes_hamper.n.01 39 objects +389 trinket trinket 5 844 40 7 trinket otherprop Objects n02787435 bangle.n.02 39 objects +390 c;lothes hangers clothes hanger 5 211 40 7 hanger otherprop Objects n03057920 coat_hanger.n.01 39 objects +391 paper paper 5 15 26 7 paper paper Objects n14974264 paper.n.01 39 objects +392 back splash backsplash 5 40 7 otherprop Objects 40 misc +393 chimney chimney 5 702 38 7 chimney otherstructure Objects n03017428 chimney.n.01 40 misc +394 arc arch 5 40 7 otherprop Objects n02733524 arch.n.04 40 misc +395 shower bench shower bench 5 204 39 6 bench otherfurniture Furniture bench bench 02828884 n02828884 bench.n.01 34 seating +396 person person 5 331 31 7 person person Objects person n05217688 person.n.02 39 objects +397 tablet tablet 5 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +398 exercise mat exercise mat 5 143 20 5 floor mat floor mat Floor n03727946 mat.n.03 33 gym_equipment +399 smoke alarm smoke alarm 5 525 40 7 alarm otherprop Objects n03343737 fire_alarm.n.02 39 objects +400 kitchen utensils kitchen utensil 5 267 40 7 utensil otherprop Objects n03621049 kitchen_utensil.n.01 39 objects +401 weights weight 5 40 7 otherprop Objects n04571292 weight.n.02 33 gym_equipment +402 display cabinet display cabinet 5 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +403 showcase display cabinet 5 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +404 bedpost bedpost 5 40 7 otherprop Objects n02821415 bedpost.n.01 11 bed +405 file cabinet file cabinet 5 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +406 umbrella umbrella 5 203 40 7 umbrella otherprop Objects n04507155 umbrella.n.01 39 objects +407 massage table massage table 5 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +408 laundry basket laundry basket 5 164 40 7 laundry basket otherprop Objects basket 02801938 n03050864 clothes_hamper.n.01 39 objects +409 jar jar 5 70 40 7 jar otherprop Objects jar 03593526 n03593526 jar.n.01 39 objects +410 exercise bike exercise bike 5 457 39 6 excercise equipment otherfurniture Furniture n03302671 exercise_bike.n.01 33 gym_equipment +411 hose hose 5 40 7 otherprop Objects n03539875 hose.n.03 40 misc +412 window dormer window dormer 5 40 7 otherprop Objects 40 misc +413 closet shelf closet shelf 5 40 7 otherprop Objects n04190052 shelf.n.01 31 shelving +414 power breaker box power breaker box 5 26 29 7 box box Objects 40 misc +415 smoke detector smoke detector 5 40 7 otherprop Objects 40 misc +416 door knob door knob 4 27 40 7 door knob otherprop Objects 40 misc +417 shelf side shelf 4 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +418 jacuzzi jacuzzi 4 40 7 otherprop Objects 40 misc +419 backpack backpack 4 206 40 7 backpack otherprop Objects n02769748 backpack.n.01 39 objects +420 wooden desk desk 4 36 14 10 desk desk Table desk desk table 04379243 n03179701 desk.n.01 5 table +421 bath mat bath mat 4 40 7 otherprop Objects n02807401 bath_mat.n.01 2 floor +422 unknown clutter unknown clutter 4 40 7 otherprop Objects 40 misc +423 hook hook 4 40 7 otherprop Objects 40 misc +424 sauna wall wall 4 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +425 elevator elevator 4 40 7 otherprop Objects 40 misc +426 tool tool 4 40 7 otherprop Objects n04451818 tool.n.01 39 objects +427 recliner recliner 4 5 5 4 chair chair Chair chair chair chair 03001627 n04062428 recliner.n.01 3 chair +428 recessed wall recessed wall 4 21 1 12 wall wall Wall 40 misc +429 closet bar closet rod 4 40 7 otherprop Objects n04100174 rod.n.01 39 objects +430 tank tank 4 40 7 otherprop Objects 40 misc +431 toaster toaster 4 251 40 7 toaster otherprop Objects n04442312 toaster.n.02 37 appliances +432 toilet brush holder toilet brush holder 4 40 7 otherprop Objects 40 misc +433 handrail /otherroom handrail /otherroom 4 453 38 7 banister otherstructure Objects n02788148 bannister.n.02 30 railing +434 landing landing 6 40 7 otherprop Objects n03638511 landing.n.01 2 floor +435 book rack book rack 4 224 39 6 bookrack otherfurniture Furniture 40 misc +436 wall mirror mirror 4 122 19 7 mirror mirror Objects n03773035 mirror.n.01 21 mirror +437 hunting trophy hunting trophy 4 547 40 7 trophy otherprop Objects 40 misc +438 rod rod 4 40 7 otherprop Objects n04100174 rod.n.01 39 objects +439 floor mat floor mat 4 143 20 5 floor mat floor mat Floor n03727837 mat.n.01 2 floor +440 motion detector motion detector 4 40 7 otherprop Objects 40 misc +441 clothing clothes 4 141 21 7 clothes clothes Objects n02728440 apparel.n.01 38 clothes +442 can of paint can of paint 4 40 7 otherprop Objects 40 misc +443 medicine cabinet medicine cabinet 4 3 3 6 cabinet cabinet Furniture cabinet 02933112 n03742115 medicine_chest.n.01 7 cabinet +444 sensor sensor 4 40 7 otherprop Objects n03180969 detector.n.01 39 objects +445 cart cart 4 305 40 7 cart otherprop Objects n03484083 handcart.n.01 39 objects +446 slab slab 4 38 7 otherstructure Objects n04233405 slab.n.01 39 objects +447 bean bag chair bean bag chair 4 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +448 window valence window valence 4 89 16 13 curtain curtain Window curtain n03151077 curtain.n.01 12 curtain +449 window /outside window /outside 4 59 9 13 window window Window n04587648 window.n.01 9 window +450 pole pole 4 40 7 otherprop Objects n03976657 pole.n.01 39 objects +451 picture / other room picture /otherroom 4 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +452 canister canister 4 794 40 7 canister otherprop Objects 40 misc +453 washbasin counter washbasin counter 4 7 12 6 counter counter Furniture table table table 04379243 n03116530 counter.n.01 26 counter +454 kitchen top kitchen top 4 7 12 6 counter counter Furniture n03118245 countertop.n.01 26 counter +455 unknown object object 4 40 7 otherprop Objects n00002684 object.n.01 39 objects +456 pitcher pitcher 4 273 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 39 objects +457 showerhead showerhead 4 650 40 7 shower head otherprop Objects n04209383 showerhead.n.01 23 shower +458 podium podium 4 40 7 otherprop Objects n03159640 dais.n.01 39 objects +459 ceiling vent ceiling vent 4 25 38 7 air vent otherstructure Objects n04526241 vent.n.01 40 misc +460 throw pillow pillow 4 119 18 7 pillow pillow Objects pillow 03938244 n03938244 pillow.n.01 8 cushion +461 grill grill 4 700 38 7 grill otherstructure Objects n03459591 grill.n.02 40 misc +462 sink cabinet sink cabinet 4 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +463 tapestry tapestry 4 40 7 otherprop Objects n04393549 tapestry.n.02 39 objects +464 bed sheet bed sheet 5 559 40 7 sheet otherprop Objects n04188179 sheet.n.03 11 bed +465 shade shade 4 40 7 otherprop Objects n04181718 shade.n.03 39 objects +466 doorknob doorknob 4 27 40 7 door knob otherprop Objects n03222959 doorknob.n.01 4 door +467 stair railing banister 4 453 38 7 banister otherstructure Objects n02788148 bannister.n.02 30 railing +468 vacuum cleaner vacuum cleaner 4 306 40 7 vacuum cleaner otherprop Objects n04517823 vacuum.n.04 37 appliances +469 bed comforter bed comforter 4 484 40 7 comforter otherprop Objects 40 misc +470 door / other room door /otherroom 4 28 8 12 door door Wall door n03221720 door.n.01 4 door +471 pictures picture 4 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +472 bed table bed table 4 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +473 shirt shirt 4 40 7 otherprop Objects n04197391 shirt.n.01 38 clothes +474 dressing table dressing table 4 19 7 10 table table Table table table table 04379243 n03238586 dressing_table.n.01 5 table +475 shower wall cubby shower wall cubby 4 40 7 otherprop Objects n04208936 shower.n.01 23 shower +476 side wall wall 4 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +477 beside table beside table 4 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +478 curb curb 4 40 7 otherprop Objects n03149135 curb.n.02 39 objects +479 arches arch 4 40 7 otherprop Objects n02733524 arch.n.04 40 misc +480 storage bin storage bin 4 812 40 7 storage bin otherprop Objects 40 misc +481 support beam support beam 4 40 7 otherprop Objects n02815950 beam.n.02 29 beam +482 globe globe 5 347 40 7 globe otherprop Objects 40 misc +483 pantry pantry 4 38 7 otherstructure Objects n03885535 pantry.n.01 40 misc +484 skateboard skateboard 4 408 38 7 board otherstructure Objects skateboard 04225987 n04225987 skateboard.n.01 39 objects +485 stove hood range hood 4 380 38 7 range hood otherstructure Objects range_hood n04053677 range_hood.n.01 39 objects +486 cabin cabin 4 40 7 otherprop Objects 40 misc +487 shower bar shower bar 4 51 38 7 bar otherstructure Objects 40 misc +488 chaise chaise 4 5 5 4 chair chair Chair chair chair chair 03001627 n03002711 chaise_longue.n.01 3 chair +489 flower wash flower vase 4 78 40 7 vase otherprop Objects vase jar 03593526 n04522168 vase.n.01 39 objects +490 desk and chairs desk and chairs 4 40 7 otherprop Objects 40 misc +491 plant vase flower vase 4 78 40 7 vase otherprop Objects vase jar 03593526 n04522168 vase.n.01 39 objects +492 towel rack towel rack 4 40 7 otherprop Objects n04459773 towel_rack.n.01 40 misc +493 cross cross 4 49 40 7 monitor otherprop Objects n03857828 oscilloscope.n.01 39 objects +494 sliding door sliding door 4 28 8 12 door door Wall door n04239074 sliding_door.n.01 4 door +495 cosmetics cosmetics 4 40 7 otherprop Objects n03113152 cosmetic.n.01 39 objects +496 wall other room wall /otherroom 4 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +497 kettle kettle 4 16 40 7 pot otherprop Objects n03612814 kettle.n.01 39 objects +498 junk junk 4 40 7 otherprop Objects n14857897 debris.n.01 39 objects +499 stationery stationery 3 15 26 7 paper paper Objects n06258852 stationery.n.01 39 objects +500 lights light 3 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +501 window / other room window /otherroom 3 59 9 13 window window Window n04587648 window.n.01 9 window +502 dish cabinet dish cabinet 3 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +503 wall panel wall panel 3 21 1 12 wall wall Wall n04548503 wall_panel.n.01 1 wall +504 gate gate 3 223 38 7 gate otherstructure Objects n03427296 gate.n.01 40 misc +505 safe safe 3 26 29 7 box box Objects n04125021 safe.n.01 39 objects +506 ventilation ventilation 3 40 7 otherprop Objects n04526520 ventilation.n.02 39 objects +507 logs firewood 3 40 7 otherprop Objects n15100644 firewood.n.01 40 misc +508 sliding doors sliding door 3 28 8 12 door door Wall door n04239074 sliding_door.n.01 4 door +509 shower rod shower rod 3 40 7 otherprop Objects n04100174 rod.n.01 12 curtain +510 towel shelf towel shelf 3 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +511 row of theater chairs row of theater chairs 3 40 7 otherprop Objects 40 misc +512 closet wall wall 3 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +513 bath utencils bath utensil 3 267 40 7 utensil otherprop Objects 40 misc +514 sconce sconce 3 62 38 7 light otherstructure Objects n04148703 sconce.n.03 28 lighting +515 garage light garage light 3 62 38 7 light otherstructure Objects 40 misc +516 ceiling beam beam 3 38 7 otherstructure Objects n02815950 beam.n.02 29 beam +517 toolbox toolbox 3 344 39 6 chest otherfurniture Furniture n04452615 toolbox.n.01 39 objects +518 sponge stool stool 3 150 40 7 stool otherprop Objects stool n04326896 stool.n.01 19 stool +519 security camera security camera 3 212 40 7 security camera otherprop Objects camera 02942699 n02942699 camera.n.01 39 objects +520 gym machine exercise equipment 3 457 39 6 excercise equipment otherfurniture Furniture n04285146 sports_equipment.n.01 33 gym_equipment +521 wall /outside wall /outside 3 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +522 mantle mantle 3 874 38 7 mantle otherstructure Objects mantel n03719343 mantel.n.01 27 fireplace +523 floor 2 floor 3 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +524 skirting skirting 3 40 7 otherprop Objects 40 misc +525 banister banister 3 453 38 7 banister otherstructure Objects n02788148 bannister.n.02 30 railing +526 lockers locker 3 3 3 6 cabinet cabinet Furniture n02933462 cabinet.n.03 40 misc +527 trophy trophy 3 547 40 7 trophy otherprop Objects 40 misc +528 tile tile 3 40 7 otherprop Objects n04435180 tile.n.01 39 objects +529 unknown / room below unknown /otherroom 3 20 40 7 unknown otherprop Objects n08632096 unknown.n.01 41 unlabeled +530 picture car picture car 3 530 40 7 car otherprop Objects car car 02958343 n02958343 car.n.01 39 objects +531 outlet outlet 3 40 7 otherprop Objects n04548771 wall_socket.n.01 39 objects +532 plant ridge plant ridge 3 40 7 otherprop Objects 40 misc +533 backsplash backsplash 3 40 7 otherprop Objects 40 misc +534 computer monitor monitor 3 49 40 7 monitor otherprop Objects monitor monitor tv or monitor 03211117 n03782190 monitor.n.04 22 tv_monitor +535 doorframe / other room doorframe /otherroom 3 615 38 7 door frame otherstructure Objects n03222722 doorframe.n.01 4 door +536 shower wall /otherroom shower wall /otherroom 3 21 1 12 wall wall Wall n04208936 shower.n.01 23 shower +537 shelves with wine shelving 3 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +538 shoe shoe 3 149 40 7 shoe otherprop Objects n04199027 shoe.n.01 39 objects +539 hedge hedge 3 40 7 otherprop Objects n03511175 hedge.n.01 40 misc +540 glass window window 3 59 9 13 window window Window n04587648 window.n.01 9 window +541 sofachair sofa chair 3 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +542 hand soap hand soap 3 133 40 7 soap otherprop Objects n04253437 soap.n.01 39 objects +543 window blinds blinds 3 80 13 13 blinds blinds Window n04589190 window_blind.n.01 32 blinds +544 flower vase flower vase 3 78 40 7 vase otherprop Objects vase jar 03593526 n04522168 vase.n.01 39 objects +545 appliance appliance 3 40 7 otherprop Objects 40 misc +546 christmas tree christmas tree 3 40 7 otherprop Objects 40 misc +547 support column column 3 94 38 7 column otherstructure Objects n03074380 column.n.06 40 misc +548 dish plate 3 233 40 7 plate otherprop Objects n03959485 plate.n.04 39 objects +549 closet floor closet floor 3 11 2 5 floor floor Floor 40 misc +550 celling ceiling 3 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +551 stuffed animals stuffed animal 3 177 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 39 objects +552 casket casket 3 40 7 otherprop Objects 40 misc +553 rowing machine exercise machine 3 220 40 7 machine otherprop Objects 40 misc +554 dining table centerpiece dining table centerpiece 3 878 40 7 centerpiece otherprop Objects 40 misc +555 bedside lamp bedside lamp 5 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +556 kitchen countertop item kitchen countertop item 3 40 7 otherprop Objects 40 misc +557 fountain fountain 3 40 7 otherprop Objects n03388043 fountain.n.01 40 misc +558 window glass window glass 3 612 38 7 glass otherstructure Objects n03881893 pane.n.01 39 objects +559 wall soffet soffit 3 40 7 otherprop Objects n04256758 soffit.n.01 39 objects +560 urinal urinal 3 40 7 otherprop Objects n04515991 urinal.n.01 39 objects +561 kitchen hood range hood 3 380 38 7 range hood otherstructure Objects range_hood n04053677 range_hood.n.01 39 objects +562 showpiece decoration 3 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +563 wall trim molding 3 38 7 otherstructure Objects n02800354 baseboard.n.01 1 wall +564 barrel barrel 3 343 39 6 barrel otherfurniture Furniture 40 misc +565 firewood firewood 3 40 7 otherprop Objects n15100644 firewood.n.01 40 misc +566 columns pillar 3 94 38 7 column otherstructure Objects n03073977 column.n.07 24 column +567 beams beam 3 38 7 otherstructure Objects n02815950 beam.n.02 29 beam +568 carpet roll carpet roll 3 40 7 otherprop Objects 40 misc +569 portrait portrait 3 64 11 8 picture picture Picture n03987079 portrait.n.02 6 picture +570 table light table light 3 62 38 7 light otherstructure Objects 40 misc +571 water heater water heater 3 588 40 7 water heater otherprop Objects n04560113 water_heater.n.01 39 objects +572 stairs 2 stair 3 215 38 7 stairs otherstructure Objects stairs n04314914 step.n.04 16 stairs +573 pouffe pouffe 3 359 39 6 ottoman otherfurniture Furniture n03858418 ottoman.n.03 34 seating +574 ceiling behind ceiling /otherroom 3 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +575 pillars pillar 3 94 38 7 column otherstructure Objects n03073977 column.n.07 24 column +576 concrete block concrete block 3 40 7 otherprop Objects 40 misc +577 range stove 3 242 38 7 stove otherstructure Objects stove 04330267 n04330267 stove.n.02 37 appliances +578 shelf with objects shelf 3 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +579 toilet seat liner dispenser toilet seat liner dispenser 3 40 7 otherprop Objects 40 misc +580 patio chair patio chair 3 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +581 folding chair folding chair 3 5 5 4 chair chair Chair chair chair chair 03001627 n03376595 folding_chair.n.01 3 chair +582 island island 4 456 38 7 kitchen island otherstructure Objects n03620600 kitchen_island.n.01 26 counter +583 toaster oven toaster oven 3 275 40 7 toaster oven otherprop Objects n04442441 toaster_oven.n.01 37 appliances +584 overlook railing railing 3 497 38 7 railing otherstructure Objects n04047401 railing.n.01 30 railing +585 bathroom mirror mirror 3 122 19 7 mirror mirror Objects n03773035 mirror.n.01 21 mirror +586 recycle bin recycle bin 3 307 40 7 bin otherprop Objects 40 misc +587 counter top countertop 3 7 12 6 counter counter Furniture n03118245 countertop.n.01 26 counter +588 rafter rafter 3 40 7 otherprop Objects n04045644 rafter.n.01 29 beam +589 dryer clothes dryer 3 39 6 otherfurniture Furniture n03251766 dryer.n.01 37 appliances +590 bed lamp bedside lamp 3 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +591 wall paneling paneling 3 21 1 12 wall wall Wall n03882611 paneling.n.01 1 wall +592 stage stage 3 40 7 otherprop Objects 40 misc +593 fire sprinkler fire sprinkler 3 40 7 otherprop Objects 40 misc +594 brush brush 3 40 7 otherprop Objects n02908217 brush.n.02 39 objects +595 wall 2 wall 3 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +596 balcony balcony 3 40 7 otherprop Objects 40 misc +597 water tank water tank 2 263 40 7 vessel otherprop Objects n03035715 cistern.n.02 39 objects +598 pile of objects clutter 2 40 7 otherprop Objects 40 misc +599 garage door frame garage door frame 2 40 7 otherprop Objects 40 misc +600 back wall wall 2 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +601 plant soil plant soil 2 40 7 otherprop Objects 40 misc +602 globe stand globe stand 2 466 40 7 globe stand otherprop Objects 40 misc +603 bicycle bicycle 2 189 40 7 bicycle otherprop Objects bicycle 02834778 n02834778 bicycle.n.01 39 objects +604 stairframe stair frame 2 40 7 otherprop Objects 40 misc +605 separating screen partition 2 21 1 12 wall wall Wall n03894379 partition.n.01 40 misc +606 unknown wall object object 2 40 7 otherprop Objects n00002684 object.n.01 39 objects +607 air duct air duct 2 38 38 7 air duct otherstructure Objects n02690941 air_passage.n.01 40 misc +608 kitchen upper cabinet kitchen cabinet 2 3 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 7 cabinet +609 chair pillow pillow 2 119 18 7 pillow pillow Objects pillow 03938244 n03938244 pillow.n.01 8 cushion +610 led tv led tv 2 40 7 otherprop Objects 40 misc +611 deco decoration 2 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +612 ceiling under staircase ceiling under staircase 2 40 7 otherprop Objects 40 misc +613 chair /w books chair /w books 2 85 23 2 books books Books 40 misc +614 bed back rest headboard 2 161 39 6 headboard otherfurniture Furniture n03502200 headboard.n.01 11 bed +615 giraffe giraffe 2 40 7 otherprop Objects n02439033 giraffe.n.01 39 objects +616 lightswitch light switch 2 301 38 7 light switch otherstructure Objects n04372370 switch.n.01 39 objects +617 doorframe \other room doorframe /otherroom 2 615 38 7 door frame otherstructure Objects n03222722 doorframe.n.01 4 door +618 object / camera? object 2 40 7 otherprop Objects n00002684 object.n.01 39 objects +619 grandfather clock grandfather clock 2 462 39 6 grandfather clock otherfurniture Furniture clock 03046257 n03452594 grandfather_clock.n.01 39 objects +620 jewelry box jewelry box 2 26 29 7 box box Objects 40 misc +621 bottles of wine bottles of wine 2 766 40 7 wine otherprop Objects 40 misc +622 massage table base massage table 2 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +623 hood range hood 2 380 38 7 range hood otherstructure Objects range_hood n04053677 range_hood.n.01 39 objects +624 mirrorframe mirror frame 2 122 19 7 mirror mirror Objects n03773035 mirror.n.01 21 mirror +625 wall beam wall beam 2 40 7 otherprop Objects 40 misc +626 wooden trim molding 2 38 7 otherstructure Objects n02800354 baseboard.n.01 1 wall +627 stalls stall 2 38 7 otherstructure Objects n02873839 booth.n.02 40 misc +628 partition partition 2 21 1 12 wall wall Wall n03894379 partition.n.01 40 misc +629 dog dog 2 701 40 7 dog otherprop Objects 40 misc +630 valance valance 2 40 7 otherprop Objects n03111296 cornice.n.01 40 misc +631 radio radio 2 188 40 7 radio otherprop Objects radio 40 misc +632 bush bush 2 82 40 7 plant otherprop Objects plant n13112664 shrub.n.01 14 plant +633 row of theater seats row of theater seats 2 40 7 otherprop Objects 40 misc +634 bath utencil bath utensil 2 267 40 7 utensil otherprop Objects 40 misc +635 basket of towels basket of towels 2 40 7 otherprop Objects 40 misc +636 laundry machine washing machine 2 278 39 6 washing machine otherfurniture Furniture washing_machine 04554684 n04554684 washer.n.03 37 appliances +637 mirror /otherroom mirror /otherroom 2 122 19 7 mirror mirror Objects n03773035 mirror.n.01 21 mirror +638 toilet sink toilet sink 2 24 34 7 sink sink Objects sink 40 misc +639 sauna heater sauna heater 2 111 39 6 heater otherfurniture Furniture 40 misc +640 dining bench dining bench 2 204 39 6 bench otherfurniture Furniture bench bench 02828884 n02828884 bench.n.01 34 seating +641 fume cupboard fume cupboard 2 40 7 otherprop Objects 40 misc +642 mouse mouse 2 103 40 7 mouse otherprop Objects n03793489 mouse.n.04 39 objects +643 boiler boiler 2 40 7 otherprop Objects 40 misc +644 hearth hearth 2 372 38 7 fireplace otherstructure Objects 40 misc +645 curtain darker curtain 2 89 16 13 curtain curtain Window curtain n03151077 curtain.n.01 12 curtain +646 round chair round chair 2 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +647 toilet bowl toilet 2 124 33 7 toilet toilet Objects toilet toilet n04446276 toilet.n.01 18 toilet +648 whine rack wine rack 2 299 40 7 wine rack otherprop Objects 40 misc +649 doorstep doorstep 3 28 8 12 door door Wall n03223686 doorsill.n.01 39 objects +650 binder binder 2 399 40 7 binder otherprop Objects 40 misc +651 shower door frame shower door frame 2 40 7 otherprop Objects n04208936 shower.n.01 23 shower +652 bed runner bed runner 2 157 4 1 bed bed Bed n02822579 bedstead.n.01 11 bed +653 cubicle cubicle 2 40 7 otherprop Objects 40 misc +654 fitness device exercise equipment 2 457 39 6 excercise equipment otherfurniture Furniture n04285146 sports_equipment.n.01 33 gym_equipment +655 support support 2 40 7 otherprop Objects 40 misc +656 overhang overhang 2 40 7 otherprop Objects n03864356 overhang.n.01 40 misc +657 electric box electric box 2 550 38 7 electric box otherstructure Objects 40 misc +658 bathrobe bathrobe 3 40 7 otherprop Objects n02807616 bathrobe.n.01 38 clothes +659 door mat doormat 2 143 20 5 floor mat floor mat Floor n03223299 doormat.n.02 2 floor +660 jacket jacket 2 324 40 7 jacket otherprop Objects n03589791 jacket.n.01 38 clothes +661 cabinet table cabinet table 2 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +662 side frame frame 2 38 7 otherstructure Objects 40 misc +663 wainscoting paneling 2 21 1 12 wall wall Wall n03882611 paneling.n.01 1 wall +664 staircase trim staircase trim 2 40 7 otherprop Objects 40 misc +665 box /w books box /w books 2 85 23 2 books books Books 40 misc +666 nighstand nightstand 2 158 32 6 night stand night stand Furniture night_stand night_stand n03015254 chest_of_drawers.n.01 13 chest_of_drawers +667 window reflection window reflection 2 40 7 otherprop Objects 40 misc +668 pulpit pulpit 2 40 7 otherprop Objects n03159640 dais.n.01 39 objects +669 set of armchairs set of armchairs 2 40 7 otherprop Objects 40 misc +670 fish tank fish tank 2 782 38 7 fish tank otherstructure Objects n02732072 aquarium.n.01 40 misc +671 bathroom countertop objects objects 2 40 7 otherprop Objects n00002684 object.n.01 39 objects +672 concrete shelf shelf 2 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +673 ceiling wall ceiling wall 2 21 1 12 wall wall Wall 40 misc +674 picture \other room picture /otherroom 2 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +675 wall entry wall 2 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +676 lintel lintel 2 40 7 otherprop Objects n03503233 header.n.02 29 beam +677 wall table wall table 2 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +678 small table table 3 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +679 lighting fixture lighting fixture 2 40 7 otherprop Objects n03667380 lighting_fixture.n.01 39 objects +680 bed frame bedframe 2 157 4 1 bed bed Bed n02822579 bedstead.n.01 11 bed +681 freezer freezer 2 17 24 6 refridgerator refridgerator Furniture n03170635 deep-freeze.n.01 37 appliances +682 glass doors door 2 28 8 12 door door Wall door n03221720 door.n.01 4 door +683 extractor extractor 2 40 7 otherprop Objects 40 misc +684 flower pot flowerpot 2 146 40 7 flower pot otherprop Objects flower_pot flower pot 03991062 n03991062 pot.n.04 39 objects +685 soffet soffit 2 40 7 otherprop Objects n04256758 soffit.n.01 39 objects +686 platform platform 2 38 7 otherstructure Objects 40 misc +687 hot tub hot tub 2 136 36 7 bathtub bathtub Objects bathtub bathtub tub 02808440 n03543603 hot_tub.n.01 25 bathtub +688 paper towels paper towel 2 113 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 20 towel +689 kitchen utencils kitchen utensil 2 267 40 7 utensil otherprop Objects n03621049 kitchen_utensil.n.01 39 objects +690 shower grab bar shower grab bar 2 51 38 7 bar otherstructure Objects 40 misc +691 wall detail wall detail 2 40 7 otherprop Objects 40 misc +692 whineshelf whine shelf 2 40 7 otherprop Objects 40 misc +693 painting/other room painting /otherroom 2 64 11 8 picture picture Picture n03876519 painting.n.01 39 objects +694 television wall tv 2 40 7 otherprop Objects 40 misc +695 sauna bench bench 2 204 39 6 bench otherfurniture Furniture bench bench 02828884 n02828884 bench.n.01 34 seating +696 fruits fruit 2 286 40 7 fruit otherprop Objects n13134947 fruit.n.01 39 objects +697 picture frames picture frame 2 64 11 8 picture picture Picture n03931765 picture_frame.n.01 6 picture +698 buffet buffet 2 7 12 6 counter counter Furniture table table table 04379243 n04247736 snack_bar.n.01 26 counter +699 billow billow 2 40 7 otherprop Objects 40 misc +700 computer chair computer chair 2 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +701 sofa seat sofa seat 2 40 7 otherprop Objects 40 misc +702 wall tv wall tv 2 40 7 otherprop Objects 40 misc +703 ground floor 2 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +704 wall2 wall 2 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +705 ceiling under stairs ceiling under stairs 2 215 38 7 stairs otherstructure Objects stairs 40 misc +706 calendar calendar 2 583 40 7 calendar otherprop Objects 40 misc +707 dome dome 2 40 7 otherprop Objects 40 misc +708 object /outside object /outside 2 40 7 otherprop Objects n00002684 object.n.01 39 objects +709 poll poll 2 40 7 otherprop Objects n01817346 poll.n.04 39 objects +710 wet bar wet bar 2 51 38 7 bar otherstructure Objects table table table 04379243 n04573513 wet_bar.n.01 26 counter +711 folding table folding table 2 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +712 stovetop stovetop 2 40 7 otherprop Objects 40 misc +713 gym equip gym equipment 2 457 39 6 excercise equipment otherfurniture Furniture n04285146 sports_equipment.n.01 33 gym_equipment +714 fruitbowl fruit bowl 2 22 40 7 bowl otherprop Objects bowl bowl 02880940 n02880940 bowl.n.03 39 objects +715 vending machine vending machine 2 220 40 7 machine otherprop Objects n04525305 vending_machine.n.01 39 objects +716 handwash hand wash 2 40 7 otherprop Objects 40 misc +717 wall clock wall clock 2 56 40 7 clock otherprop Objects clock 03046257 n04548280 wall_clock.n.01 39 objects +718 liquid soap liquid soap 2 133 40 7 soap otherprop Objects 40 misc +719 trinkets trinket 2 844 40 7 trinket otherprop Objects n02787435 bangle.n.02 39 objects +720 small table / stand small table/stand 2 40 7 otherprop Objects 40 misc +721 window shuts window shutters 2 40 7 otherprop Objects n04211356 shutter.n.02 39 objects +722 door frame 3 door frame 2 28 8 12 door door Wall door n03221720 door.n.01 4 door +723 stone stone 2 578 40 7 stones otherprop Objects n09416076 rock.n.01 39 objects +724 tripod tripod 2 50 39 6 stand otherfurniture Furniture n04485082 tripod.n.01 31 shelving +725 projector screen projector screen 2 53 38 7 projector screen otherstructure Objects 40 misc +726 window frame / other room window frame /otherroom 2 477 38 7 window frame otherstructure Objects n04589593 window_frame.n.01 9 window +727 wreath wreath 2 881 40 7 wreathe otherprop Objects n04606014 wreath.n.01 39 objects +728 door hinge door hinge 2 40 7 otherprop Objects 40 misc +729 sound speaker 2 54 40 7 speaker otherprop Objects speaker 03691459 n03691459 loudspeaker.n.01 39 objects +730 french door french door 2 28 8 12 door door Wall door n03394649 french_door.n.01 4 door +731 staircase handrail staircase handrail 2 40 7 otherprop Objects 40 misc +732 nighttable night table 2 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +733 photo in frame picure 2 64 11 8 picture picture Picture 40 misc +734 photoframe picture 2 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +735 stair wall stair wall 2 21 1 12 wall wall Wall 40 misc +736 closet ceiling closet ceiling 2 4 22 3 ceiling ceiling Ceiling 40 misc +737 stick stick 2 529 40 7 stick otherprop Objects 40 misc +738 fluorescent light fluorescent light 2 62 38 7 light otherstructure Objects 40 misc +739 wash cabinet wash cabinet 2 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +740 shower seat shower seat 2 40 7 otherprop Objects 40 misc +741 trellis trellis 2 40 7 otherprop Objects n04478512 trellis.n.01 40 misc +742 patio patio 2 40 7 otherprop Objects n03899768 patio.n.01 40 misc +743 dart board dartboard 2 408 38 7 board otherstructure Objects n03162940 dartboard.n.01 39 objects +744 comforter comforter 2 484 40 7 comforter otherprop Objects n04033995 quilt.n.01 39 objects +745 table /w books table /w books 2 85 23 2 books books Books 40 misc +746 picture/other room picture /otherroom 2 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +747 dirt dirt 2 40 7 otherprop Objects 40 misc +748 base base 2 40 7 otherprop Objects 40 misc +749 chemical tank chemical tank 2 40 7 otherprop Objects 40 misc +750 step stool step stool 2 276 40 7 step stool otherprop Objects stool n04315713 step_stool.n.01 19 stool +751 misc misc 2 40 7 otherprop Objects 40 misc +752 sink table sink table 2 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +753 curved wall wall 2 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +754 roof beam beam 2 38 7 otherstructure Objects n02815950 beam.n.02 29 beam +755 cover cover 2 312 40 7 blanket otherprop Objects 40 misc +756 reading table reading table 2 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +757 side steps wall steps wall 2 21 1 12 wall wall Wall 40 misc +758 sideboard sideboard 2 7 12 6 counter counter Furniture 40 misc +759 wall from another room wall /otherroom 2 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +760 electric outlet electric outlet 2 98 40 7 electrical outlet otherprop Objects n04548771 wall_socket.n.01 39 objects +761 stall door stall door 2 28 8 12 door door Wall door n03221720 door.n.01 4 door +762 separator separator 2 40 7 otherprop Objects n02995998 centrifuge.n.01 39 objects +763 toilet bowl brush holder toilet bowl brush holder 2 40 7 otherprop Objects 40 misc +764 vessel vessel 2 263 40 7 vessel otherprop Objects watercraft 04530566 n04530566 vessel.n.02 39 objects +765 table clutter table clutter 2 40 7 otherprop Objects 40 misc +766 partition wall wall 2 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +767 bed curtain bed curtain 2 89 16 13 curtain curtain Window curtain 40 misc +768 stairs skirt stairs skirt 2 40 7 otherprop Objects 40 misc +769 small chair chair 2 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +770 ceiling window ceiling window 2 59 9 13 window window Window 40 misc +771 rocking chair rocking chair 2 5 5 4 chair chair Chair chair chair chair 03001627 n04099969 rocking_chair.n.01 3 chair +772 window \other room window /otherroom 2 59 9 13 window window Window n04587648 window.n.01 9 window +773 under stair under stair 2 40 7 otherprop Objects 40 misc +774 unknown outside building unknown /outside 2 20 40 7 unknown otherprop Objects n08632096 unknown.n.01 41 unlabeled +775 bath wall bath wall 2 21 1 12 wall wall Wall 40 misc +776 panel screen panel screen 2 40 7 otherprop Objects 40 misc +777 doorfra,e doorframe 2 615 38 7 door frame otherstructure Objects n03222722 doorframe.n.01 4 door +778 shower mat shower mat 2 143 20 5 floor mat floor mat Floor n03727837 mat.n.01 2 floor +779 blackboard blackboard 2 225 38 7 blackboard otherstructure Objects n02846511 blackboard.n.01 39 objects +780 drawer desk drawer desk 2 36 14 10 desk desk Table desk desk 40 misc +781 poster picture 2 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +782 fireplace sconce fireplace sconce 2 40 7 otherprop Objects 40 misc +783 table support table support 2 40 7 otherprop Objects 40 misc +784 wall lamp wall lamp 2 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +785 closet shelving closet shelving 5 40 7 otherprop Objects 40 misc +786 closest area closest area 2 40 7 otherprop Objects 40 misc +787 scroll scroll 2 40 7 otherprop Objects 40 misc +788 foot stand foot stand 2 50 39 6 stand otherfurniture Furniture 40 misc +789 button button 2 774 40 7 button otherprop Objects 40 misc +790 art / clutter art/clutter 2 40 7 otherprop Objects 40 misc +791 door arc arch 2 40 7 otherprop Objects n02733524 arch.n.04 40 misc +792 stairs railing stairs railing 2 497 38 7 railing otherstructure Objects 40 misc +793 floor /otherroom floor /otherroom 2 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +794 shovel shovel 2 607 40 7 shovel otherprop Objects 40 misc +795 alarm controls alarm control 2 40 7 otherprop Objects 40 misc +796 lamb lamp 2 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +797 cluttered objects clutter 2 40 7 otherprop Objects 40 misc +798 door/other room door /otherroom 2 28 8 12 door door Wall door n03221720 door.n.01 4 door +799 closet shelves closet shelf 2 40 7 otherprop Objects n04190052 shelf.n.01 31 shelving +800 scultpure sculpture 2 294 40 7 sculpture otherprop Objects n04157320 sculpture.n.01 39 objects +801 arm chair armchair 2 5 5 4 chair chair Chair chair chair chair 03001627 n02738535 armchair.n.01 3 chair +802 door behind door /otherroom 2 28 8 12 door door Wall door n03221720 door.n.01 4 door +803 exercise ball exercise ball 2 457 39 6 excercise equipment otherfurniture Furniture n04285146 sports_equipment.n.01 33 gym_equipment +804 yard yard 2 40 7 otherprop Objects 40 misc +805 semi chair semi chair 2 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +806 bouquet bouquet 2 40 7 otherprop Objects n02879087 bouquet.n.01 39 objects +807 garage door opener bar garage door opener bar 2 51 38 7 bar otherstructure Objects 40 misc +808 pots pot 2 16 40 7 pot otherprop Objects n03991062 pot.n.04 39 objects +809 decorations decoration 2 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +810 unkown unknown 2 20 40 7 unknown otherprop Objects n08632096 unknown.n.01 41 unlabeled +811 kitchen decoration kitchen decoration 2 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +812 archway corner archway corner 2 40 7 otherprop Objects 40 misc +813 kitchen wall kitchen wall 2 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +814 cabinet door cabinet door 2 28 8 12 door door Wall door 40 misc +815 sauna bowl sauna bowl 2 22 40 7 bowl otherprop Objects bowl bowl 02880940 n02880940 bowl.n.03 39 objects +816 bar chair bar chair 2 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +817 shelf cubby shelf cubby 2 40 7 otherprop Objects 40 misc +818 toilet plunger toilet plunger 2 563 40 7 toilet plunger otherprop Objects n03970156 plunger.n.03 39 objects +819 belts belt 2 610 40 7 belt otherprop Objects 40 misc +820 sewing machine sewing machine 2 890 40 7 sewing machine otherprop Objects n04179913 sewing_machine.n.01 37 appliances +821 hot water/cold water knob hot water/cold water knob 2 652 40 7 knob otherprop Objects 40 misc +822 barbeque barbecue 2 40 7 otherprop Objects n02790669 barbecue.n.03 40 misc +823 cutting board cutting board 2 247 40 7 cutting board otherprop Objects n03025513 chopping_board.n.01 39 objects +824 soapbox soapbox 2 671 40 7 soap box otherprop Objects 40 misc +825 washing stuff washing stuff 2 40 7 otherprop Objects 40 misc +826 dining table decoration decoration 2 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +827 copier copier 2 40 7 otherprop Objects n03257586 duplicator.n.01 39 objects +828 unknown (picture or window) unknown picture/window 2 40 7 otherprop Objects 40 misc +829 stair handle stair handle 2 758 40 7 handle otherprop Objects n04047401 railing.n.01 30 railing +830 reflection reflection 2 64 11 8 picture picture Picture n04068976 reflection.n.05 6 picture +831 horizonal bar for exercise? exercise equipment 2 457 39 6 excercise equipment otherfurniture Furniture n04285146 sports_equipment.n.01 33 gym_equipment +832 shelving / other room shelving /otherroom 2 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +833 seats seat 2 524 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 34 seating +834 shower stall shower stall 2 40 7 otherprop Objects n04209613 shower_stall.n.01 40 misc +835 chairs chair 2 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +836 stair /otherroom stair /otherroom 2 215 38 7 stairs otherstructure Objects stairs n04314914 step.n.04 16 stairs +837 kitchen stuff clutter 2 40 7 otherprop Objects 40 misc +838 unkown object object 2 40 7 otherprop Objects n00002684 object.n.01 39 objects +839 throne throne 2 5 5 4 chair chair Chair chair chair chair 03001627 n04429376 throne.n.01 3 chair +840 socket socket 2 40 7 otherprop Objects n04255163 socket.n.02 39 objects +841 bathroom art bathroom art 2 40 7 otherprop Objects n02743547 art.n.01 39 objects +842 night table night table 2 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +843 display display 2 40 7 otherprop Objects n03211117 display.n.06 22 tv_monitor +844 tabletop tabletop 2 19 7 10 table table Table n04381860 tabletop.n.01 39 objects +845 thrash bin trash bin 2 307 40 7 bin otherprop Objects trash_bin 02747177 n02747177 ashcan.n.01 39 objects +846 l shape sofa l-shaped sofa 2 83 6 9 sofa sofa Sofa sofa sofa sofa 04256520 n04256520 sofa.n.01 10 sofa +847 cardboard box cardboard box 2 26 29 7 box box Objects 40 misc +848 wall borader wall board 1 408 38 7 board otherstructure Objects 40 misc +849 kitchen appliances kitchen appliance 1 40 7 otherprop Objects n03620052 kitchen_appliance.n.01 37 appliances +850 ceiling object object 1 40 7 otherprop Objects n00002684 object.n.01 39 objects +851 wall hanging decoration wall hanging decoration 1 40 7 otherprop Objects 40 misc +852 stand / small table stand/small table 1 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +853 kitchen ceiling kitchen ceiling 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +854 floor matt floor mat 1 143 20 5 floor mat floor mat Floor n03727837 mat.n.01 2 floor +855 wall indent wall indent 1 40 7 otherprop Objects 40 misc +856 chairir chair 1 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +857 window seat window seat 1 777 38 7 window seat otherstructure Objects bench bench 02828884 n04590021 window_seat.n.01 34 seating +858 bike bicycle 1 189 40 7 bicycle otherprop Objects bicycle 02834778 n02834778 bicycle.n.01 39 objects +859 towels (rolled) towel 1 135 27 7 towel towel Objects n04459362 towel.n.01 20 towel +860 wall \other oom wall /otherroom 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +861 shower pipe shower pipe 1 664 40 7 shower pipe otherprop Objects 40 misc +862 towel or curtain bar towel/curtain bar 1 51 38 7 bar otherstructure Objects 40 misc +863 shower glass shower glass 1 612 38 7 glass otherstructure Objects 40 misc +864 stone bench stone bench 1 204 39 6 bench otherfurniture Furniture bench bench 02828884 n02828884 bench.n.01 34 seating +865 window/other room window /otherroom 1 59 9 13 window window Window n04587648 window.n.01 9 window +866 back splash sink sink 1 24 34 7 sink sink Objects sink n04223580 sink.n.01 15 sink +867 iron board iron board 1 408 38 7 board otherstructure Objects 40 misc +868 computer equipment computer equipment 1 40 7 otherprop Objects 40 misc +869 shelf / cabinet shelf/cabinet 1 40 7 otherprop Objects 40 misc +870 stove door stove door 1 28 8 12 door door Wall door 40 misc +871 door inside door 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +872 unknown stove accessory unknown stove accessory 1 40 7 otherprop Objects 40 misc +873 circular sofa circular sofa 1 83 6 9 sofa sofa Sofa sofa sofa sofa 04256520 n04256520 sofa.n.01 10 sofa +874 dustpan dustpan 1 40 7 otherprop Objects n03259009 dustpan.n.02 39 objects +875 bathroom door door 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +876 paining painting 1 64 11 8 picture picture Picture n03876519 painting.n.01 39 objects +877 luggage luggage 1 783 40 7 luggage otherprop Objects n02774630 baggage.n.01 39 objects +878 dooframe doorframe 1 615 38 7 door frame otherstructure Objects n03222722 doorframe.n.01 4 door +879 outside wall wall 2 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +880 alarm control alarm control 1 40 7 otherprop Objects 40 misc +881 oil lamp oil lamp 1 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +882 scaffolding scaffolding 1 40 7 otherprop Objects n04141712 scaffolding.n.01 39 objects +883 bed light bed light 1 62 38 7 light otherstructure Objects 40 misc +884 baluster baluster 1 453 38 7 banister otherstructure Objects n02783994 baluster.n.01 39 objects +885 leg rest leg rest 1 40 7 otherprop Objects 40 misc +886 ceiling / upstairs room ceiling /otheroom 1 40 7 otherprop Objects 40 misc +887 tv cabinet tv stand 1 291 39 6 tv stand otherfurniture Furniture tv_stand n03290653 entertainment_center.n.01 36 furniture +888 hole hole 1 40 7 otherprop Objects n09304750 hole.n.05 39 objects +889 ping pong table ping pong table 1 625 39 6 ping pong table otherfurniture Furniture table table table 04379243 n04379243 table.n.02 5 table +890 hutch hutch 1 40 7 otherprop Objects 40 misc +891 low shelf shelf 1 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +892 foliage foliage 1 40 7 otherprop Objects 40 misc +893 windows sill window frame 1 59 9 13 window window Window n04589593 window_frame.n.01 9 window +894 toilet cistern toilet 1 124 33 7 toilet toilet Objects toilet toilet n04446276 toilet.n.01 18 toilet +895 stone circle stone circle 1 40 7 otherprop Objects 40 misc +896 bathroom sink sink 1 24 34 7 sink sink Objects sink n04223580 sink.n.01 15 sink +897 record player record player 1 220 40 7 machine otherprop Objects n04064401 record_player.n.01 39 objects +898 table cushion table cushion 1 40 7 otherprop Objects 40 misc +899 power outlet outlet 1 40 7 otherprop Objects n04548771 wall_socket.n.01 39 objects +900 machine machine 1 220 40 7 machine otherprop Objects n03699975 machine.n.01 39 objects +901 door post doorpost 1 40 7 otherprop Objects n03222857 doorjamb.n.01 39 objects +902 briefcase briefcase 1 617 40 7 briefcase otherprop Objects n02900705 briefcase.n.01 39 objects +903 wall of doors door 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +904 showe wall shower wall 1 21 1 12 wall wall Wall n04208936 shower.n.01 23 shower +905 door stand door stand 1 50 39 6 stand otherfurniture Furniture 40 misc +906 energy box energy box 1 26 29 7 box box Objects 40 misc +907 balcony floor floor 1 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +908 bean bag beanbag 1 797 39 6 bean bag otherfurniture Furniture n02816656 beanbag.n.01 3 chair +909 kitchen extractor kitchen extractor 1 40 7 otherprop Objects 40 misc +910 toilet bin toilet bin 1 307 40 7 bin otherprop Objects 40 misc +911 wall in other room wall /otherroom 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +912 plumbing plumbing 1 40 7 otherprop Objects n03969041 plumbing.n.01 39 objects +913 moose head / sculpture / hunting trophy moose head/sculpture/hunting trophy 1 547 40 7 trophy otherprop Objects 40 misc +914 ceiling dome ceiling dome 1 40 7 otherprop Objects 40 misc +915 cabinet counter cabinet counter 1 7 12 6 counter counter Furniture 40 misc +916 cabinet \other room cabinet /otherroom 1 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +917 flowerbed flowerbed 1 157 4 1 bed bed Bed bed bed n03368352 flowerbed.n.01 39 objects +918 paingitn painting 1 64 11 8 picture picture Picture n03876519 painting.n.01 39 objects +919 bed headboard headboard 1 161 39 6 headboard otherfurniture Furniture n03502200 headboard.n.01 11 bed +920 antique clock antique clock 1 56 40 7 clock otherprop Objects clock 03046257 n03046257 clock.n.01 39 objects +921 rocks rock 1 40 7 otherprop Objects n09416076 rock.n.01 39 objects +922 shower caddy for soap etc. shower caddy 1 40 7 otherprop Objects 40 misc +923 window / frame window frame 1 59 9 13 window window Window n04589593 window_frame.n.01 9 window +924 media console media console 1 40 7 otherprop Objects 40 misc +925 cloths cloth 1 40 7 otherprop Objects n03309808 fabric.n.01 39 objects +926 vaccum cleaner vacuum cleaner 1 306 40 7 vacuum cleaner otherprop Objects n04517823 vacuum.n.04 37 appliances +927 door frame fireplace wall door frame 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +928 fruit dish fruit dish 1 40 7 otherprop Objects 40 misc +929 risers for theater seating risers for theater seating 1 40 7 otherprop Objects 40 misc +930 interior bathroom wall wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +931 hall wall wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +932 drawer sink table drawer sink table 1 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +933 keyboard piano keyboard piano 1 298 39 6 piano otherfurniture Furniture piano piano 03928116 n03928116 piano.n.01 39 objects +934 tree branch tree branch 1 40 7 otherprop Objects n13163803 limb.n.02 39 objects +935 tiled floor tiled floor 1 11 2 5 floor floor Floor 40 misc +936 ceiling bedroom ceiling bedroom 1 40 7 otherprop Objects 40 misc +937 chandellier chandelier 1 342 38 7 chandelier otherstructure Objects n03005285 chandelier.n.01 28 lighting +938 ceilin ceiling 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +939 hearst hearst 1 40 7 otherprop Objects n11037278 hearst.n.01 39 objects +940 random wall wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +941 condiments condiment 1 40 7 otherprop Objects 40 misc +942 shelf cubbies shelf cubby 1 40 7 otherprop Objects 40 misc +943 book display book display 1 40 7 otherprop Objects 40 misc +944 endtable end table 1 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +945 antique telephone antique telephone 1 32 40 7 telephone otherprop Objects telephone 04401088 n04401088 telephone.n.01 39 objects +946 clothes rack clothes rack 1 40 7 otherprop Objects 40 misc +947 doore door 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +948 heater piping heater piping 1 40 7 otherprop Objects 40 misc +949 tissue box stand tissue box stand 1 50 39 6 stand otherfurniture Furniture 40 misc +950 utencils utensil 1 267 40 7 utensil otherprop Objects n04516672 utensil.n.01 39 objects +951 foodtray food tray 1 179 40 7 tray otherprop Objects n04476259 tray.n.01 39 objects +952 shelves /otherroom shelves /otherroom 1 42 15 6 shelves shelves Furniture 40 misc +953 cailing ceiling 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +954 cluttered chair chair 1 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +955 countertop object object 1 40 7 otherprop Objects n00002684 object.n.01 39 objects +956 art frames art frame 1 40 7 otherprop Objects 40 misc +957 soap bottle soap bottle 1 2 40 7 bottle otherprop Objects bottle bottle 02876657 n02876657 bottle.n.01 39 objects +958 small wall wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +959 watch watch 1 384 40 7 watch otherprop Objects n04555897 watch.n.01 39 objects +960 ceil rail 1 497 38 7 railing otherstructure Objects n04047401 railing.n.01 30 railing +961 fuse box fuse box 1 26 29 7 box box Objects 40 misc +962 painting board painting 1 64 11 8 picture picture Picture n03876519 painting.n.01 39 objects +963 door locker handle door locker handle 1 758 40 7 handle otherprop Objects 40 misc +964 box with clutter box 1 26 29 7 box box Objects n02883344 box.n.01 39 objects +965 knive holder knife holder 1 40 7 otherprop Objects 40 misc +966 computer mouse computer mouse 1 103 40 7 mouse otherprop Objects n03793489 mouse.n.04 39 objects +967 mirror / other room mirror /otherroom 1 122 19 7 mirror mirror Objects n03773035 mirror.n.01 21 mirror +968 door back side door 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +969 ceiling bath ceiling bath 1 40 7 otherprop Objects 40 misc +970 chandelier? chandelier 1 342 38 7 chandelier otherstructure Objects n03005285 chandelier.n.01 28 lighting +971 table tray table tray 1 179 40 7 tray otherprop Objects 40 misc +972 aquarium aquarium 1 263 40 7 vessel otherprop Objects n02732072 aquarium.n.01 40 misc +973 wheelbarrow wheelbarrow 1 305 40 7 cart otherprop Objects n02797295 barrow.n.03 39 objects +974 curtain rail curtain rail 1 40 7 otherprop Objects 40 misc +975 rods / table rods/table 1 40 7 otherprop Objects 40 misc +976 counter /otherroom counter /otherroom 1 7 12 6 counter counter Furniture table table table 04379243 n03116530 counter.n.01 26 counter +977 gable gable 1 21 1 12 wall wall Wall n03409393 gable.n.01 1 wall +978 bothroom wall wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +979 balustrade balustrade 1 453 38 7 banister otherstructure Objects n02788148 bannister.n.02 30 railing +980 bathroom bathroom 1 40 7 otherprop Objects toilet toilet 40 misc +981 three three 1 40 7 otherprop Objects 40 misc +982 handcloth hand cloth 1 40 7 otherprop Objects 40 misc +983 wall curved wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +984 teapoy coffee table 1 356 39 6 coffee table otherfurniture Furniture table table table 04379243 n03063968 coffee_table.n.01 5 table +985 table with clutter table 1 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +986 back door door 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +987 rocky ground rocky ground 1 40 7 otherprop Objects 40 misc +988 doormat doormat 2 143 20 5 floor mat floor mat Floor n03223299 doormat.n.02 2 floor +989 backrest backrest 1 5 5 4 chair chair Chair n02767433 back.n.08 39 objects +990 cabinet with desk cabinet 1 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +991 faucet handles faucet 1 9 40 7 faucet otherprop Objects faucet 03325088 n03325088 faucet.n.01 39 objects +992 floor trim floor trim 1 868 38 7 floor trim otherstructure Objects 40 misc +993 toilet sliding door toilet sliding door 1 28 8 12 door door Wall door 40 misc +994 gym stuff gym equipment 1 457 39 6 excercise equipment otherfurniture Furniture n04285146 sports_equipment.n.01 33 gym_equipment +995 clothes container clothes container 1 140 40 7 container otherprop Objects 40 misc +996 basketball hoop basketball hoop 1 162 40 7 basketball hoop otherprop Objects n02802215 basket.n.03 33 gym_equipment +997 cooktop stovetop 1 40 7 otherprop Objects 40 misc +998 both tub bathtub 1 136 36 7 bathtub bathtub Objects bathtub bathtub tub 02808440 n02808440 bathtub.n.01 25 bathtub +999 fireplace heart fireplace 1 372 38 7 fireplace otherstructure Objects n03346455 fireplace.n.01 27 fireplace +1000 spice racks spice rack 1 241 38 7 spice rack otherstructure Objects n04275175 spice_rack.n.01 31 shelving +1001 wall fireplace wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1002 cabinet /w cluttered art cabinet /w cluttered art 1 40 7 otherprop Objects 40 misc +1003 inside wall wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1004 transformer transformer 1 40 7 otherprop Objects n04471315 transformer.n.01 39 objects +1005 wall light wall lamp 1 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +1006 bathroom shelf bathroom shelf 1 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +1007 glass french door door 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +1008 lamp / other room lamp /otherroom 1 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +1009 picutre picture 1 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +1010 gift gift 1 40 7 otherprop Objects 40 misc +1011 mirror door mirror door 1 28 8 12 door door Wall door 40 misc +1012 deorative object decoration 1 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +1013 ceiling door ceiling door 1 28 8 12 door door Wall door 40 misc +1014 stack of papers stack of papers 1 483 40 7 papers otherprop Objects 40 misc +1015 holy cross holy cross 1 40 7 otherprop Objects 40 misc +1016 door window door window 1 59 9 13 window window Window 40 misc +1017 computer screen monitor 1 49 40 7 monitor otherprop Objects monitor monitor tv or monitor 03211117 n03782190 monitor.n.04 22 tv_monitor +1018 while bottles bottle 1 2 40 7 bottle otherprop Objects bottle bottle 02876657 n02876657 bottle.n.01 39 objects +1019 arcade game arcade game 1 40 7 otherprop Objects 40 misc +1020 unknown - probably part of trellis -- maybe blinds unknown - probably part of trellis -- maybe blinds 1 80 13 13 blinds blinds Window 40 misc +1021 compound wall compound wall 1 21 1 12 wall wall Wall 40 misc +1022 lamps lamp 1 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +1023 plug plug 1 40 7 otherprop Objects 40 misc +1024 round cushion round cushion 1 40 7 otherprop Objects 40 misc +1025 ceiling frame molding 1 38 7 otherstructure Objects n02800354 baseboard.n.01 1 wall +1026 magazine magazine 1 71 40 7 magazine otherprop Objects n06595351 magazine.n.01 39 objects +1027 stair case staircase 1 215 38 7 stairs otherstructure Objects n04298308 stairway.n.01 16 stairs +1028 rolling pin rolling pin 1 267 40 7 utensil otherprop Objects n04103206 rolling_pin.n.01 39 objects +1029 shower knob shower knob 1 651 40 7 shower knob otherprop Objects 40 misc +1030 wall statue wall statue 1 40 7 otherprop Objects 40 misc +1031 wal wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1032 sink / basin sink/basin 1 40 7 otherprop Objects 40 misc +1033 celling borader ceiling boarder 1 40 7 otherprop Objects 40 misc +1034 clothes bag clothes bag 1 55 37 7 bag bag Objects 40 misc +1035 perfumes perfume 1 655 40 7 perfume otherprop Objects n03916031 perfume.n.02 39 objects +1036 heat heat 1 40 7 otherprop Objects n03509025 heating_system.n.01 39 objects +1037 kitchen counter support leg kitchen counter support 1 40 7 otherprop Objects 40 misc +1038 window frame window frame 1 59 9 13 window window Window n04589593 window_frame.n.01 9 window +1039 glassdoor door 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +1040 window closet door window closet door 1 28 8 12 door door Wall door 40 misc +1041 sticks stick 1 529 40 7 stick otherprop Objects 40 misc +1042 photo frame picture frame 1 64 11 8 picture picture Picture n03931765 picture_frame.n.01 6 picture +1043 water pump water pump 1 40 7 otherprop Objects n04561965 water_pump.n.01 39 objects +1044 shower doorframe shower door frame 1 40 7 otherprop Objects n04208936 shower.n.01 23 shower +1045 stairs behind stairs 1 215 38 7 stairs otherstructure Objects stairs n04314914 step.n.04 16 stairs +1046 flood floor 1 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +1047 columned perimeter columned perimeter 1 40 7 otherprop Objects 40 misc +1048 shrine shrine 1 40 7 otherprop Objects n04210390 shrine.n.01 40 misc +1049 papers paper 1 15 26 7 paper paper Objects n14974264 paper.n.01 39 objects +1050 fireplace /w art fireplace 1 372 38 7 fireplace otherstructure Objects n03346455 fireplace.n.01 27 fireplace +1051 canvas stand canvas stand 1 50 39 6 stand otherfurniture Furniture 40 misc +1052 art / man statue art/man statue 1 40 7 otherprop Objects 40 misc +1053 bath towel bath towel 1 135 27 7 towel towel Objects n02808304 bath_towel.n.01 20 towel +1054 cradenza credenza 1 7 12 6 counter counter Furniture n03129753 credenza.n.01 36 furniture +1055 doorr frame door frame 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +1056 column statue column statue 1 40 7 otherprop Objects 40 misc +1057 water basin water basin 1 40 7 otherprop Objects 40 misc +1058 moulidng molding 1 38 7 otherstructure Objects n02800354 baseboard.n.01 1 wall +1059 tablet computer tablet computer 1 46 40 7 computer otherprop Objects 40 misc +1060 artwork artwork 1 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +1061 towel bar shelf towel bar shelf 1 40 7 otherprop Objects 40 misc +1062 floor carpet carpet 1 130 40 7 rug floor mat Objects n04118021 rug.n.01 2 floor +1063 wine cabinet wine cabinet 1 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +1064 shower rail shower rail 1 40 7 otherprop Objects 40 misc +1065 skirting board skirting board 1 408 38 7 board otherstructure Objects n02800354 baseboard.n.01 1 wall +1066 playpen playpen 1 815 39 6 playpen otherfurniture Furniture n03964495 playpen.n.01 40 misc +1067 room door door 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +1068 doorgrame doorframe 1 615 38 7 door frame otherstructure Objects n03222722 doorframe.n.01 4 door +1069 makeup makeup 1 40 7 otherprop Objects n03714235 makeup.n.01 39 objects +1070 plant / art plant/art 1 40 7 otherprop Objects 40 misc +1071 sauna seating seat 1 524 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 34 seating +1072 bot bot 1 40 7 otherprop Objects n02311879 bot.n.01 39 objects +1073 handrail / other room handrail /otherroom 1 453 38 7 banister otherstructure Objects n02788148 bannister.n.02 30 railing +1074 rocking horse rocking horse 1 389 40 7 toy otherprop Objects n03523633 hobby.n.02 39 objects +1075 kitchen lower cabinet kitchen lower cabinet 1 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +1076 floor /other room floor /otherroom 1 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +1077 chair stand chair stand 1 50 39 6 stand otherfurniture Furniture 40 misc +1078 wall pack wall pack 1 40 7 otherprop Objects 40 misc +1079 towel box towel box 1 26 29 7 box box Objects 40 misc +1080 vessel sink vessel sink 1 24 34 7 sink sink Objects sink 40 misc +1081 can can 1 329 40 7 can otherprop Objects can 02946921 n02946921 can.n.01 39 objects +1082 ceiling lattice ceiling 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +1083 backslash backsplash 1 40 7 otherprop Objects 40 misc +1084 ire place fireplace 1 372 38 7 fireplace otherstructure Objects n03346455 fireplace.n.01 27 fireplace +1085 divider partition 1 21 1 12 wall wall Wall n03894379 partition.n.01 40 misc +1086 pathway pathway 1 40 7 otherprop Objects n03899533 pathway.n.02 40 misc +1087 laundry laundry 1 40 7 otherprop Objects n03648219 laundry.n.01 38 clothes +1088 tablecloth tablecloth 1 292 40 7 tablecloth otherprop Objects n04380143 tablecloth.n.01 39 objects +1089 water dispenser water dispenser 1 507 40 7 water dispenser otherprop Objects n03210683 dispenser.n.01 39 objects +1090 ;photo photo 1 508 40 7 photo otherprop Objects n03925226 photograph.n.01 6 picture +1091 door frame wall entry door frame 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +1092 plastic tarp tarp 1 40 7 otherprop Objects n04395024 tarpaulin.n.01 39 objects +1093 clothing stand clothing stand 1 50 39 6 stand otherfurniture Furniture 40 misc +1094 bathtab bathtub 1 136 36 7 bathtub bathtub Objects bathtub bathtub tub 02808440 n02808440 bathtub.n.01 25 bathtub +1095 apron apron 1 40 7 otherprop Objects 40 misc +1096 ceiling bedroom entry ceiling 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +1097 ashtray ashtray 1 377 40 7 ashtray otherprop Objects n02747802 ashtray.n.01 39 objects +1098 control panel control panel 1 408 38 7 board otherstructure Objects n03098140 control_panel.n.01 39 objects +1099 ironing board ironing board 1 313 39 6 ironing board otherfurniture Furniture n03586090 ironing_board.n.01 39 objects +1100 exhaust pipe exhaust pipe 1 41 40 7 pipe otherprop Objects n03303510 exhaust_pipe.n.01 40 misc +1101 rack of theater electronics rack 1 50 39 6 stand otherfurniture Furniture n04038440 rack.n.05 31 shelving +1102 canvas canvas 1 559 40 7 sheet otherprop Objects 40 misc +1103 alarm clock alarm clock 1 156 40 7 alarm clock otherprop Objects clock 03046257 n02694662 alarm_clock.n.01 39 objects +1104 ceiling lower ceiling lower 1 40 7 otherprop Objects 40 misc +1105 yarn machine yarn machine 1 220 40 7 machine otherprop Objects 40 misc +1106 tarrace door terrace door 1 28 8 12 door door Wall door 40 misc +1107 stand/table stand/table 1 40 7 otherprop Objects 40 misc +1108 statue base statue 1 294 40 7 sculpture otherprop Objects n04306847 statue.n.01 39 objects +1109 swing swing 1 389 40 7 toy otherprop Objects n04371774 swing.n.02 39 objects +1110 extractor fan extractor fan 1 74 40 7 fan otherprop Objects 40 misc +1111 crib crib 1 485 39 6 crib otherfurniture Furniture 40 misc +1112 hallway wall wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1113 side wall 2 wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1114 side wall 5 wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1115 mens urinal urinal 1 40 7 otherprop Objects n04515991 urinal.n.01 39 objects +1116 stacked chairs stacked chair 1 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +1117 patio floor patio floor 1 11 2 5 floor floor Floor 40 misc +1118 stair steps stair step 1 40 7 otherprop Objects 40 misc +1119 electrical controller electrical controller 1 40 7 otherprop Objects 40 misc +1120 kitchencounter kitchen counter 1 7 12 6 counter counter Furniture table table table 04379243 n03116530 counter.n.01 26 counter +1121 rotunda railing rotunda railing 1 497 38 7 railing otherstructure Objects 40 misc +1122 lamp table lamp table 1 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +1123 cabinet top cabinet 1 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +1124 tray with bottles of soap and lotion tray 1 179 40 7 tray otherprop Objects n04476259 tray.n.01 39 objects +1125 speakers speaker 1 54 40 7 speaker otherprop Objects speaker 03691459 n03691459 loudspeaker.n.01 39 objects +1126 fancy cabinet cabinet 1 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +1127 air vent air vent 1 25 38 7 air vent otherstructure Objects n04526241 vent.n.01 40 misc +1128 sofa couch couch 1 83 6 9 sofa sofa Sofa sofa sofa sofa 04256520 n04256520 sofa.n.01 10 sofa +1129 channel channel 1 40 7 otherprop Objects 40 misc +1130 wall sign wall sign 1 208 40 7 sign otherprop Objects 40 misc +1131 bench back bench 1 204 39 6 bench otherfurniture Furniture bench bench 02828884 n02828884 bench.n.01 34 seating +1132 baby changing station baby changing station 1 40 7 otherprop Objects 40 misc +1133 paip pip 1 286 40 7 fruit otherprop Objects n11685091 pip.n.03 39 objects +1134 bed arm pillows pillow 1 119 18 7 pillow pillow Objects pillow 03938244 n03938244 pillow.n.01 8 cushion +1135 shower tap shower tap 1 132 40 7 shower cap otherprop Objects 40 misc +1136 axe axe 1 40 7 otherprop Objects n02764044 ax.n.01 39 objects +1137 table shelf shelf 1 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +1138 washcloth washcloth 1 40 7 otherprop Objects n04554523 washcloth.n.01 39 objects +1139 coffee mug coffee mug 1 263 40 7 vessel otherprop Objects cup or mug 03797390 n03063599 coffee_mug.n.01 39 objects +1140 iron panel panel 1 559 40 7 sheet otherprop Objects n03882058 panel.n.01 35 board_panel +1141 door or window frame door/window frame 1 40 7 otherprop Objects 40 misc +1142 coffe table coffee table 1 356 39 6 coffee table otherfurniture Furniture table table table 04379243 n03063968 coffee_table.n.01 5 table +1143 dice dice 1 40 7 otherprop Objects n03191029 die.n.01 39 objects +1144 title title 1 40 7 otherprop Objects 40 misc +1145 toilet seat toilet 1 124 33 7 toilet toilet Objects toilet toilet n04446276 toilet.n.01 18 toilet +1146 coffee table leg coffee table 1 356 39 6 coffee table otherfurniture Furniture table table table 04379243 n03063968 coffee_table.n.01 5 table +1147 soft chair soft chair 1 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +1148 icebox icebox 1 17 24 6 refridgerator refridgerator Furniture n04070727 refrigerator.n.01 37 appliances +1149 showerfloor shower floor 1 11 2 5 floor floor Floor n04208936 shower.n.01 23 shower +1150 floor outside floor /outside 1 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +1151 framing framing 1 40 7 otherprop Objects n03390983 frame.n.10 40 misc +1152 wall patch wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1153 decorative bowl decorative bowl 1 826 40 7 decorative bowl otherprop Objects bowl bowl 02880940 n02880940 bowl.n.03 39 objects +1154 dog bed dog bed 1 858 39 6 dog bed otherfurniture Furniture bed bed bed 02818832 n02818832 bed.n.01 11 bed +1155 pool sticks pool stick 1 529 40 7 stick otherprop Objects n03145522 cue.n.04 39 objects +1156 exericse equipment exercise equipment 1 457 39 6 excercise equipment otherfurniture Furniture n04285146 sports_equipment.n.01 33 gym_equipment +1157 remove floor behind floor /otherroom 1 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +1158 doorhandle door handle 1 758 40 7 handle otherprop Objects 40 misc +1159 dishrag dishrag 1 40 7 otherprop Objects n03207743 dishrag.n.01 39 objects +1160 behind behind 1 40 7 otherprop Objects 40 misc +1161 drinking fountain drinking fountain 1 40 7 otherprop Objects n03241335 drinking_fountain.n.01 40 misc +1162 bureau bureau 1 524 39 6 furniture otherfurniture Furniture dresser dresser n03015254 chest_of_drawers.n.01 13 chest_of_drawers +1163 night stand object object 1 40 7 otherprop Objects n00002684 object.n.01 39 objects +1164 tub bathtub 1 136 36 7 bathtub bathtub Objects bathtub bathtub tub 02808440 n02808440 bathtub.n.01 25 bathtub +1165 parapet parapet 1 40 7 otherprop Objects 40 misc +1166 attic door attic door 1 28 8 12 door door Wall door 40 misc +1167 wall object object 1 40 7 otherprop Objects n00002684 object.n.01 39 objects +1168 closet door knob closet door knob 1 652 40 7 knob otherprop Objects 40 misc +1169 bathroom accessory bathroom accessory 1 40 7 otherprop Objects n02671780 accessory.n.01 38 clothes +1170 teddy teddy bear 1 389 40 7 toy otherprop Objects n04399382 teddy.n.01 39 objects +1171 wall toilet paper wall toilet paper 1 15 26 7 paper paper Objects 40 misc +1172 coffee machine coffee machine 1 234 40 7 coffee machine otherprop Objects n03063338 coffee_maker.n.01 37 appliances +1173 excercise equipment / other room exercise equipment /otherroom 1 457 39 6 excercise equipment otherfurniture Furniture 40 misc +1174 storage cabinet storage cabinet 1 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +1175 toilet stall door toilet stall door 1 28 8 12 door door Wall door 40 misc +1176 bin of posters bin 1 307 40 7 bin otherprop Objects n02839910 bin.n.01 39 objects +1177 photos picture 1 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +1178 push broom broom 1 328 40 7 broom otherprop Objects n02906734 broom.n.01 39 objects +1179 art stand art stand 1 50 39 6 stand otherfurniture Furniture 40 misc +1180 shelf /w art shelf /w art 1 40 7 otherprop Objects 40 misc +1181 spiderman spiderman statue 1 40 7 otherprop Objects 40 misc +1182 mount mount 1 40 7 otherprop Objects 40 misc +1183 ac unit air conditioning 1 40 7 otherprop Objects n02686379 air_conditioner.n.01 39 objects +1184 newspaper newspaper 1 873 40 7 newspapers otherprop Objects 40 misc +1185 towel basket towel basket 1 39 40 7 basket otherprop Objects basket 02801938 n02801938 basket.n.01 39 objects +1186 base rail base rail 1 40 7 otherprop Objects 40 misc +1187 show pack show pack 1 40 7 otherprop Objects 40 misc +1188 coutertop countertop 1 7 12 6 counter counter Furniture n03118245 countertop.n.01 26 counter +1189 stair stepper stair stepper 1 40 7 otherprop Objects 40 misc +1190 car batteries car battery 1 40 7 otherprop Objects n02961225 car_battery.n.01 39 objects +1191 pad pad 1 40 7 otherprop Objects n03195485 diggings.n.02 40 misc +1192 old fireplace fireplace 1 372 38 7 fireplace otherstructure Objects n03346455 fireplace.n.01 27 fireplace +1193 desk lamp desk lamp 1 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +1194 easychair easy chair 1 5 5 4 chair chair Chair chair chair chair 03001627 n03262932 easy_chair.n.01 3 chair +1195 wees table 1 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +1196 tabletop box tabletop box 1 26 29 7 box box Objects 40 misc +1197 canopy canopy 1 40 7 otherprop Objects 40 misc +1198 wall behind door wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1199 cabinet / other room cabinet /otherroom 1 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +1200 objects / other room objects /otherroom 1 40 7 otherprop Objects n00002684 object.n.01 39 objects +1201 bar soap bar soap 1 133 40 7 soap otherprop Objects 40 misc +1202 column /otherroom column /otherroom 1 94 38 7 column otherstructure Objects n03074380 column.n.06 40 misc +1203 outer wall wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1204 wall control wall control 1 40 7 otherprop Objects 40 misc +1205 desktop computer tower 1 46 40 7 computer otherprop Objects n03082979 computer.n.01 39 objects +1206 fire screen fire screen 1 40 7 otherprop Objects n03347037 fire_screen.n.01 39 objects +1207 fireplace counter mantel 1 58 38 7 mantel otherstructure Objects mantel n03719343 mantel.n.01 27 fireplace +1208 bread bread 1 246 40 7 bread otherprop Objects n07679356 bread.n.01 40 misc +1209 piano bench piano bench 1 460 39 6 piano bench otherfurniture Furniture bench bench 02828884 n02828884 bench.n.01 34 seating +1210 window shutter window shutter 1 40 7 otherprop Objects n04211356 shutter.n.02 39 objects +1211 draw draw 1 40 7 otherprop Objects n09269882 draw.n.01 39 objects +1212 flower wage flower vase 1 78 40 7 vase otherprop Objects vase jar 03593526 n04522168 vase.n.01 39 objects +1213 ceiling/west wall ceiling/west wall 1 21 1 12 wall wall Wall 40 misc +1214 decorative quilt decorative quilt 1 575 40 7 quilt otherprop Objects 40 misc +1215 knife knife 1 259 40 7 knife otherprop Objects knife 03624134 n03624134 knife.n.02 39 objects +1216 projector opening projector opening 1 40 7 otherprop Objects 40 misc +1217 boader boarder 1 40 7 otherprop Objects 40 misc +1218 lights / deco lights/deco 1 40 7 otherprop Objects 40 misc +1219 tv3 wall tv 1 40 7 otherprop Objects 40 misc +1220 desk clutter desk clutter 1 40 7 otherprop Objects 40 misc +1221 curatain curtain 1 89 16 13 curtain curtain Window curtain n03151077 curtain.n.01 12 curtain +1222 shoe shelves shelving 1 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +1223 mixer mixer 1 40 7 otherprop Objects 40 misc +1224 ceiling fixture ceiling fixture 1 40 7 otherprop Objects 40 misc +1225 scuplture sculpture 1 294 40 7 sculpture otherprop Objects n04157320 sculpture.n.01 39 objects +1226 countertop /otherroom countertop /otherroom 1 7 12 6 counter counter Furniture n03118245 countertop.n.01 26 counter +1227 work bench workbench 1 204 39 6 bench otherfurniture Furniture bench table 04379243 n04600486 workbench.n.01 5 table +1228 wall desk wall desk 1 36 14 10 desk desk Table desk desk 40 misc +1229 shelf of cloth shelf 1 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +1230 telescope telescope 1 467 40 7 telescope otherprop Objects n04403638 telescope.n.01 39 objects +1231 shower hoses shower hose 1 669 40 7 shower hose otherprop Objects 40 misc +1232 step; step 1 38 7 otherstructure Objects n04314914 step.n.04 16 stairs +1233 bathtub platform bathtub platform 1 40 7 otherprop Objects 40 misc +1234 sauna seats seat 1 524 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 34 seating +1235 bucker bucket 1 427 40 7 bucket otherprop Objects n02909870 bucket.n.01 39 objects +1236 ac air conditioner 1 79 38 7 air conditioner otherstructure Objects n02686379 air_conditioner.n.01 39 objects +1237 unknown / remove unknown/remove 1 40 7 otherprop Objects 40 misc +1238 art / statue art/statue 1 40 7 otherprop Objects 40 misc +1239 dinner table dinner table 1 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +1240 storage storage 1 n03744276 memory.n.04 39 objects +1241 unknown kitchen stuff unknown kitchen stuff 1 40 7 otherprop Objects 40 misc +1242 alamari wardrobe 1 772 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 36 furniture +1243 cement drum cement drum 1 145 40 7 drum otherprop Objects 40 misc +1244 kitchen cupboard kitchen cabinet 1 3 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 7 cabinet +1245 platter platter 1 129 40 7 platter otherprop Objects 40 misc +1246 large chunk of art chunk of art 1 40 7 otherprop Objects 40 misc +1247 dushbin dustbin 1 307 40 7 bin otherprop Objects trash_bin 02747177 n02747177 ashcan.n.01 39 objects +1248 couch pillows pillow 1 119 18 7 pillow pillow Objects pillow 03938244 n03938244 pillow.n.01 8 cushion +1249 ceiling /other room ceiling /otherroom 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +1250 object on cutting board object 1 40 7 otherprop Objects n00002684 object.n.01 39 objects +1251 show wall wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1252 robes robe 1 40 7 otherprop Objects n04097866 robe.n.01 38 clothes +1253 cusion cushion 1 119 18 7 pillow pillow Objects n03151500 cushion.n.03 8 cushion +1254 column /outside column /outside 1 94 38 7 column otherstructure Objects n03074380 column.n.06 40 misc +1255 hanging clothes hanging clothes 1 141 21 7 clothes clothes Objects n02728440 apparel.n.01 38 clothes +1256 newspaper basket newspaper basket 1 39 40 7 basket otherprop Objects basket 02801938 n02801938 basket.n.01 39 objects +1257 towels in a bowl towel 1 135 27 7 towel towel Objects n04459362 towel.n.01 20 towel +1258 detached door door 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +1259 big door door 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +1260 wall molding molding 1 38 7 otherstructure Objects n02800354 baseboard.n.01 1 wall +1261 bed / other room bed /otherroom 1 157 4 1 bed bed Bed bed bed bed 02818832 n02818832 bed.n.01 11 bed +1262 window /w pictures window /w pictures 1 40 7 otherprop Objects 40 misc +1263 ceiling corridor ceiling corridor 1 40 7 otherprop Objects 40 misc +1264 rotunda base rotunda 1 40 7 otherprop Objects 40 misc +1265 fire extinguisher' fire extinguisher 1 10 40 7 fire extinguisher otherprop Objects n03345837 fire_extinguisher.n.01 39 objects +1266 unicycle unicycle 1 40 7 otherprop Objects n04509417 unicycle.n.01 39 objects +1267 ceiling under balcony ceiling /otherroom 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +1268 laundary machines laundry machine 1 220 40 7 machine otherprop Objects 40 misc +1269 nightstand /reflection nightstand /reflection 1 158 32 6 night stand night stand Furniture night_stand night_stand n03015254 chest_of_drawers.n.01 13 chest_of_drawers +1270 pile of magazines pile of magazines 1 40 7 otherprop Objects 40 misc +1271 wall of statue inset wall of statue inset 1 40 7 otherprop Objects 40 misc +1272 cosmetic cosmetic 1 40 7 otherprop Objects n03113152 cosmetic.n.01 39 objects +1273 unknown item object 1 40 7 otherprop Objects n00002684 object.n.01 39 objects +1274 side steps 4 step 1 38 7 otherstructure Objects n04314914 step.n.04 16 stairs +1275 kitchen table kitchen table 1 19 7 10 table table Table table table table 04379243 n03620967 kitchen_table.n.01 5 table +1276 round footstool round footstool 1 40 7 otherprop Objects 40 misc +1277 firewood holder firewood holder 1 40 7 otherprop Objects 40 misc +1278 flag flag 1 405 40 7 flag otherprop Objects 40 misc +1279 window frame and shelves window frame and shelves 1 42 15 6 shelves shelves Furniture 40 misc +1280 towel ring towel ring 1 40 7 otherprop Objects n04460038 towel_ring.n.01 39 objects +1281 window frame /reflection window frame /reflection 1 477 38 7 window frame otherstructure Objects n04589593 window_frame.n.01 9 window +1282 basket of something basket of something 1 40 7 otherprop Objects 40 misc +1283 towel rod towel rod 1 134 38 7 towel rod otherstructure Objects 40 misc +1284 window /reflection window /reflection 1 59 9 13 window window Window n04587648 window.n.01 9 window +1285 paper storage paper storage 1 40 7 otherprop Objects 40 misc +1286 t.v tv 1 172 25 11 television television TV tv or monitor 03211117 n03211117 display.n.06 22 tv_monitor +1287 kitchen bell kitchen bell 1 40 7 otherprop Objects 40 misc +1288 ceiling from another room ceiling /otherroom 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +1289 throw blanket throw blanket 1 312 40 7 blanket otherprop Objects 40 misc +1290 wall post wall post 1 40 7 otherprop Objects 40 misc +1291 sink pipe sink pipe 1 41 40 7 pipe otherprop Objects 40 misc +1292 workstation workstation 1 46 40 7 computer otherprop Objects n04603399 workstation.n.01 39 objects +1293 room 1 wall wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1294 storage space storage space 1 645 38 7 storage space otherstructure Objects n04328946 storage_space.n.01 40 misc +1295 mattress bed 1 157 4 1 bed bed Bed bed bed bed 02818832 n02818832 bed.n.01 11 bed +1296 kitchen handle kitchen handle 1 758 40 7 handle otherprop Objects n03485997 handle.n.01 39 objects +1297 bed side table nightstand 1 158 32 6 night stand night stand Furniture night_stand night_stand n03015254 chest_of_drawers.n.01 13 chest_of_drawers +1298 lookout lookout 1 40 7 otherprop Objects n03688943 lookout.n.03 40 misc +1299 bag of sand bag of sand 1 40 7 otherprop Objects 40 misc +1300 walll wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1301 kitchen countertop object' object 1 40 7 otherprop Objects n00002684 object.n.01 39 objects +1302 bed rest headboard 1 161 39 6 headboard otherfurniture Furniture n03502200 headboard.n.01 11 bed +1303 furniture 2 furniture 1 524 39 6 furniture otherfurniture Furniture n03405725 furniture.n.01 36 furniture +1304 wall ac vent vent 1 25 38 7 air vent otherstructure Objects n04526241 vent.n.01 40 misc +1305 dumbbells dumbbell 1 40 7 otherprop Objects n03255030 dumbbell.n.01 33 gym_equipment +1306 weight weight 1 40 7 otherprop Objects n04571292 weight.n.02 33 gym_equipment +1307 fixture fixture 1 40 7 otherprop Objects n03354613 fixture.n.01 39 objects +1308 llight light 1 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +1309 show space show space 1 40 7 otherprop Objects 40 misc +1310 recessed cubby recessed cubby 1 40 7 otherprop Objects 40 misc +1311 oven hood range hood 1 380 38 7 range hood otherstructure Objects range_hood n04053677 range_hood.n.01 39 objects +1312 counter doors counter door 1 28 8 12 door door Wall door 40 misc +1313 roof grill roof grill 1 700 38 7 grill otherstructure Objects 40 misc +1314 shower dial shower dial 1 40 7 otherprop Objects 40 misc +1315 riser riser 1 40 7 otherprop Objects 40 misc +1316 plant ornament plant ornament 1 40 7 otherprop Objects 40 misc +1317 floor stand floor stand 1 50 39 6 stand otherfurniture Furniture 40 misc +1318 fruit fruit 1 286 40 7 fruit otherprop Objects n13134947 fruit.n.01 39 objects +1319 frige refrigerator 1 17 24 6 refridgerator refridgerator Furniture n04070727 refrigerator.n.01 37 appliances +1320 stairwell stairwell 1 40 7 otherprop Objects n04298661 stairwell.n.01 16 stairs +1321 trash bag trash bag 1 55 37 7 bag bag Objects 40 misc +1322 cups cup 1 35 40 7 cup otherprop Objects cup cup or mug 03797390 n03797390 mug.n.04 39 objects +1323 photo mount photo mount 1 40 7 otherprop Objects 40 misc +1324 drawers for clothes drawers for clothes 1 141 21 7 clothes clothes Objects 40 misc +1325 strange ceiling ceiling 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +1326 bilow billow 1 40 7 otherprop Objects 40 misc +1327 ceilng ceiling 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +1328 bathroom towel bathroom towel 1 135 27 7 towel towel Objects n04459362 towel.n.01 20 towel +1329 john john 1 40 7 otherprop Objects toilet toilet n04446276 toilet.n.01 18 toilet +1330 refrigerator cabinet refrigerator cabinet 1 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +1331 fish picture fish picture 1 64 11 8 picture picture Picture 40 misc +1332 painting frame painting frame 1 40 7 otherprop Objects 40 misc +1333 display table display table 1 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +1334 round seat round seat 1 40 7 otherprop Objects 40 misc +1335 lower floor floor 1 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +1336 lid lid 1 533 40 7 lid otherprop Objects 40 misc +1337 bonsai tree bonsai tree 1 40 7 otherprop Objects 40 misc +1338 window3 window 1 59 9 13 window window Window n04587648 window.n.01 9 window +1339 window2 window 1 59 9 13 window window Window n04587648 window.n.01 9 window +1340 window1 window 1 59 9 13 window window Window n04587648 window.n.01 9 window +1341 window5 window 1 59 9 13 window window Window n04587648 window.n.01 9 window +1342 shower floor surround shower floor 1 11 2 5 floor floor Floor n04208936 shower.n.01 23 shower +1343 folders folder 1 69 40 7 folder otherprop Objects n03376279 folder.n.02 39 objects +1344 decorative plant decorative plant 1 82 40 7 plant otherprop Objects plant 40 misc +1345 cooker unit cooker unit 1 40 7 otherprop Objects 40 misc +1346 towel holder towel holder 1 40 7 otherprop Objects 40 misc +1347 laundrybag laundry bag 1 55 37 7 bag bag Objects 40 misc +1348 bathroom utencil bathroom utensil 1 267 40 7 utensil otherprop Objects n04516672 utensil.n.01 39 objects +1349 wine bottles wine bottle 1 333 40 7 wine bottle otherprop Objects bottle wine bottle 04591713 n04591713 wine_bottle.n.01 39 objects +1350 high shelf high shelf 1 40 7 otherprop Objects 40 misc +1351 loveseat couch 1 83 6 9 sofa sofa Sofa sofa sofa sofa 04256520 n04256520 sofa.n.01 10 sofa +1352 couch pillow pillow 1 119 18 7 pillow pillow Objects pillow 03938244 n03938244 pillow.n.01 8 cushion +1353 ceiling molding ceiling molding 1 40 7 otherprop Objects 40 misc +1354 firewook firewood 1 40 7 otherprop Objects n15100644 firewood.n.01 40 misc +1355 door /otherroom door /otherroom 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +1356 toaster? toaster 1 251 40 7 toaster otherprop Objects n04442312 toaster.n.02 37 appliances +1357 foodstand food stand 1 50 39 6 stand otherfurniture Furniture 40 misc +1358 weight bench weight bench 1 457 39 6 excercise equipment otherfurniture Furniture n04285146 sports_equipment.n.01 33 gym_equipment +1359 floor / room above floor /otherroom 1 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +1360 mini fridge mini fridge 1 17 24 6 refridgerator refridgerator Furniture n03273913 electric_refrigerator.n.01 37 appliances +1361 bars bar 1 51 38 7 bar otherstructure Objects n02788689 bar.n.03 39 objects +1362 lion lion 1 594 40 7 cat otherprop Objects n02129165 lion.n.01 39 objects +1363 cuddly toy cuddly toy 1 389 40 7 toy otherprop Objects 40 misc +1364 toilet stall toilet stall 1 40 7 otherprop Objects 40 misc +1365 copier machine copier machine 1 220 40 7 machine otherprop Objects 40 misc +1366 raised platform platform 1 38 7 otherstructure Objects 40 misc +1367 bell bell 1 40 7 otherprop Objects 40 misc +1368 fireextinctioms fire extinguisher 1 10 40 7 fire extinguisher otherprop Objects n03345837 fire_extinguisher.n.01 39 objects +1369 table vase table vase 1 78 40 7 vase otherprop Objects vase 40 misc +1370 bedroom ceiling bedroom ceiling 1 4 22 3 ceiling ceiling Ceiling 40 misc +1371 ceiling \other room ceiling /otherroom 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +1372 chair parts chair part 1 40 7 otherprop Objects 40 misc +1373 door / window door/window 1 40 7 otherprop Objects 40 misc +1374 closet mirror wall closet mirror wall 1 21 1 12 wall wall Wall 40 misc +1375 beam across top of arch beam across top of arch 1 40 7 otherprop Objects 40 misc +1376 fireplace utensils fireplace utensil 1 267 40 7 utensil otherprop Objects 40 misc +1377 liight light 1 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +1378 self side self side 1 40 7 otherprop Objects 40 misc +1379 kitchen faucet faucet 1 9 40 7 faucet otherprop Objects faucet 03325088 n03325088 faucet.n.01 39 objects +1380 candle holder candle holder 1 148 40 7 candlestick otherprop Objects n02948557 candlestick.n.01 39 objects +1381 mirror/other room mirror /otherroom 1 122 19 7 mirror mirror Objects n03773035 mirror.n.01 21 mirror +1382 hall top hall top 1 40 7 otherprop Objects 40 misc +1383 decoration / other room decoration /otherroom 1 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +1384 shower hose shower hose 1 669 40 7 shower hose otherprop Objects 40 misc +1385 bottle of detergent bottle of detergent 1 40 7 otherprop Objects 40 misc +1386 box opening box opening 1 40 7 otherprop Objects 40 misc +1387 hunting trohpy hunting trophy 1 547 40 7 trophy otherprop Objects 40 misc +1388 rack with pool cues rack 1 50 39 6 stand otherfurniture Furniture n04038440 rack.n.05 31 shelving +1389 stack stack 1 40 7 otherprop Objects 40 misc +1390 bed stand bed stand 1 50 39 6 stand otherfurniture Furniture 40 misc +1391 planter curb planter curb 1 40 7 otherprop Objects 40 misc +1392 garage door motor garage door motor 1 40 7 otherprop Objects 40 misc +1393 entry entry 1 40 7 otherprop Objects n03290771 entrance.n.01 40 misc +1394 a-frame sign a-frame sign 1 208 40 7 sign otherprop Objects 40 misc +1395 cuboid cuboid 1 40 7 otherprop Objects 40 misc +1396 shelf with jars shelf 1 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +1397 wall top wall top 1 40 7 otherprop Objects 40 misc +1398 window4 window 1 59 9 13 window window Window n04587648 window.n.01 9 window +1399 bath faucet bath faucet 1 9 40 7 faucet otherprop Objects faucet 03325088 n03325088 faucet.n.01 39 objects +1400 statue / art statue/art 1 40 7 otherprop Objects 40 misc +1401 cabinet /w clutter cabinet /w clutter 1 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +1402 workout bike workout bike 1 40 7 otherprop Objects 40 misc +1403 closet area for hanging clothes closet area for hanging clothes 1 141 21 7 clothes clothes Objects 40 misc +1404 vase \other room vase /otherroom 1 78 40 7 vase otherprop Objects vase jar 03593526 n04522168 vase.n.01 39 objects +1405 art / muscle shell art/muscle shell 1 40 7 otherprop Objects 40 misc +1406 center island island 1 456 38 7 kitchen island otherstructure Objects n03620600 kitchen_island.n.01 26 counter +1407 dedore decor 1 40 7 otherprop Objects n03579355 interior_decoration.n.01 39 objects +1408 dome roof dome roof 1 40 7 otherprop Objects 40 misc +1409 chair /w clutter chair /w clutter 1 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +1410 basket of objects (popcorn?) basket 1 39 40 7 basket otherprop Objects basket 02801938 n02801938 basket.n.01 39 objects +1411 outer side wall wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1412 top floor floor 1 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +1413 pelt pelt 1 40 7 otherprop Objects n01895735 hide.n.02 39 objects +1414 washer-dryer washer-dryer 1 40 7 otherprop Objects 40 misc +1415 yellow egg shaped vase yellow egg shaped vase 1 78 40 7 vase otherprop Objects vase 40 misc +1416 entry table table 1 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +1417 domino decoration domino decoration 1 40 7 otherprop Objects 40 misc +1418 pc tower pc tower 1 40 7 otherprop Objects tower 04460130 n04460130 tower.n.01 40 misc +1419 unknown/ other room unknown /otherroom 1 20 40 7 unknown otherprop Objects n08632096 unknown.n.01 41 unlabeled +1420 bathroom glass bathroom glass 1 612 38 7 glass otherstructure Objects n03438257 glass.n.02 39 objects +1421 chanellier chandelier 1 342 38 7 chandelier otherstructure Objects n03005285 chandelier.n.01 28 lighting +1422 makeup area makeup area 1 40 7 otherprop Objects 40 misc +1423 fireplace mirror fireplace mirror 1 122 19 7 mirror mirror Objects 40 misc +1424 jug jug 1 687 40 7 jug otherprop Objects bottle bottle 02876657 n03603722 jug.n.01 39 objects +1425 bathroom window bathroom window 1 59 9 13 window window Window n04587648 window.n.01 9 window +1426 falling light falling light 1 62 38 7 light otherstructure Objects 40 misc +1427 snow globe snow globe 1 347 40 7 globe otherprop Objects 40 misc +1428 unknown kitchen appliance unknown kitchen appliance 1 40 7 otherprop Objects 40 misc +1429 boxes /w books boxes /w books 1 85 23 2 books books Books 40 misc +1430 alarm alarm 1 525 40 7 alarm otherprop Objects clock 03046257 n02694662 alarm_clock.n.01 39 objects +1431 electirc outlet electric outlet 1 98 40 7 electrical outlet otherprop Objects n04548771 wall_socket.n.01 39 objects +1432 sauna floor floor 1 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +1433 picture /otherroom picture /otherroom 1 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +1434 garage door railing garage door railing 1 497 38 7 railing otherstructure Objects 40 misc +1435 other ceiling ceiling 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +1436 lightbox frame light box frame 1 40 7 otherprop Objects 40 misc +1437 air hole vent 1 25 38 7 air vent otherstructure Objects n04526241 vent.n.01 40 misc +1438 mantel mantel 1 58 38 7 mantel otherstructure Objects mantel n03719343 mantel.n.01 27 fireplace +1439 cooker cooker 4 267 40 7 utensil otherprop Objects n03101156 cooker.n.01 39 objects +1440 partial partial 1 40 7 otherprop Objects 40 misc +1441 amplifier amplifier 1 40 7 otherprop Objects n02705944 amplifier.n.01 39 objects +1442 emergency exit emergency exit 1 40 7 otherprop Objects n03345658 fire_escape.n.01 16 stairs +1443 barbecue barbecue 1 40 7 otherprop Objects n02790669 barbecue.n.03 40 misc +1444 unkown / other room unknown /otherroom 1 20 40 7 unknown otherprop Objects n08632096 unknown.n.01 41 unlabeled +1445 bath sink bath sink 1 24 34 7 sink sink Objects sink 40 misc +1446 spa bench spa bench 1 204 39 6 bench otherfurniture Furniture bench bench 02828884 n02828884 bench.n.01 34 seating +1447 handsoap hand soap 1 133 40 7 soap otherprop Objects n04253437 soap.n.01 39 objects +1448 folding stand folding stand 1 50 39 6 stand otherfurniture Furniture 40 misc +1449 table plant table plant 1 82 40 7 plant otherprop Objects plant 40 misc +1450 hose outlet hose outlet 1 40 7 otherprop Objects 40 misc +1451 theater stage theater stage 1 40 7 otherprop Objects n04418818 theater_stage.n.01 39 objects +1452 bar cabinet bar cabinet 1 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +1453 washer and dryer washing machine and dryer 1 40 7 otherprop Objects 37 appliances +1454 door trim molding molding 1 38 7 otherstructure Objects n02800354 baseboard.n.01 1 wall +1455 paintings picture 1 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +1456 unknown wall part unknown wall 1 21 1 12 wall wall Wall 40 misc +1457 wooden balcony balcony 1 40 7 otherprop Objects 40 misc +1458 shelves with shoes shelving 1 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +1459 stair landing landing 1 40 7 otherprop Objects n03638511 landing.n.01 2 floor +1460 rack of excercise weights rack 1 50 39 6 stand otherfurniture Furniture n04038440 rack.n.05 31 shelving +1461 unknown - hot tub? unknown - hot tub? 1 40 7 otherprop Objects 40 misc +1462 wall window window 1 59 9 13 window window Window n04587648 window.n.01 9 window +1463 ceiling smoke detector smoke detector 1 40 7 otherprop Objects 40 misc +1464 couch type chair sofa chair 1 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +1465 scales scale 1 639 40 7 scale otherprop Objects n04141975 scale.n.07 39 objects +1466 chair rail chair rail 1 40 7 otherprop Objects 40 misc +1467 kitchen backsplash backsplash 1 40 7 otherprop Objects 40 misc +1468 shelves / other room shelves /otherroom 1 42 15 6 shelves shelves Furniture 40 misc +1469 tools tool 1 40 7 otherprop Objects n04451818 tool.n.01 39 objects +1470 hoses for chemical tank hoses for chemical tank 1 40 7 otherprop Objects 40 misc +1471 wall behind / remove wall /otherroom 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1472 sauna heat rocks sauna heat rocks 1 40 7 otherprop Objects 40 misc +1473 camera camera 1 40 40 7 camera otherprop Objects camera 02942699 n02942699 camera.n.01 39 objects +1474 curtain lighter curtain lighter 1 40 7 otherprop Objects 40 misc +1475 table decor decoration 1 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +1476 brochure brochure 1 69 40 7 folder otherprop Objects n06413889 booklet.n.01 39 objects +1477 flowerwage flower vase 1 78 40 7 vase otherprop Objects vase jar 03593526 n04522168 vase.n.01 39 objects +1478 drill drill 1 40 7 otherprop Objects 40 misc +1479 book stand bookshelf 1 88 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 02871439 n02871439 bookshelf.n.01 31 shelving +1480 china cabinet cabinet 1 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +1481 remove / behind remove 1 0 0 0 void void void 0 void +1482 rotunda wall rotunda wall 1 21 1 12 wall wall Wall 40 misc +1483 stone support structure stone support structure 1 40 7 otherprop Objects 40 misc +1484 dresser decor decoration 1 40 7 otherprop Objects n03169390 decoration.n.01 39 objects +1485 roof / other room ceiling /otherroom 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +1486 elliptical elliptical 1 457 39 6 excercise equipment otherfurniture Furniture n04285146 sports_equipment.n.01 33 gym_equipment +1487 furnace furnace 1 551 39 6 furnace otherfurniture Furniture n03404449 furnace.n.01 40 misc +1488 foosball table foosball table 1 510 39 6 foosball table otherfurniture Furniture table table table 04379243 n04379243 table.n.02 5 table +1489 maracas maraca 1 40 7 otherprop Objects n03720891 maraca.n.01 39 objects +1490 box of fruit box of fruit 1 286 40 7 fruit otherprop Objects 40 misc +1491 window screen window screen 1 40 7 otherprop Objects n04589890 window_screen.n.01 9 window +1492 door tag door tag 1 218 40 7 tag otherprop Objects 40 misc +1493 fire pit fire pit 1 40 7 otherprop Objects n09280113 fire_pit.n.01 39 objects +1494 box of tissues box of tissues 1 40 7 otherprop Objects 40 misc +1495 portal portal 1 46 40 7 computer otherprop Objects n06359657 portal_site.n.01 39 objects +1496 yoga mat yoga mat 1 205 40 7 yoga mat otherprop Objects 40 misc +1497 toy duck toy duck 1 887 40 7 duck otherprop Objects 40 misc +1498 motorcycle motorcycle 1 40 7 otherprop Objects motorcycle 03790512 n03790512 motorcycle.n.01 39 objects +1499 computer tower computer tower 1 46 40 7 computer otherprop Objects n03082979 computer.n.01 39 objects +1500 frame for door frame for door 1 28 8 12 door door Wall door 40 misc +1501 theater screen theater screen 1 40 7 otherprop Objects 40 misc +1502 swimming pool swimming pool 1 40 7 otherprop Objects n04371225 swimming_pool.n.01 40 misc +1503 shelf with clutter shelf 1 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +1504 spice rack spice rack 1 241 38 7 spice rack otherstructure Objects n04275175 spice_rack.n.01 31 shelving +1505 bed small bed small 1 40 7 otherprop Objects 40 misc +1506 object/other room object /otherroom 1 40 7 otherprop Objects n00002684 object.n.01 39 objects +1507 dinnerware dinnerware 1 40 7 otherprop Objects n03202622 dinnerware.n.01 39 objects +1508 storage shelves shelving 1 42 15 6 shelves shelves Furniture n04190052 shelf.n.01 31 shelving +1509 tanning bed tanning bed 1 157 4 1 bed bed Bed bed bed bed 02818832 n02818832 bed.n.01 11 bed +1510 grid door door 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +1511 background chair chair 1 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +1512 boarder boarder 1 40 7 otherprop Objects 40 misc +1513 closte closet 1 40 7 otherprop Objects n03148324 cupboard.n.01 7 cabinet +1514 doorframe / upstairs room doorframe /otherroom 1 615 38 7 door frame otherstructure Objects n03222722 doorframe.n.01 4 door +1515 rolling cart rolling cart 1 305 40 7 cart otherprop Objects 40 misc +1516 box of tissue box of tissue 1 648 40 7 tissue otherprop Objects 40 misc +1517 door nob door knob 1 27 40 7 door knob otherprop Objects 40 misc +1518 floor \other room floor /otherroom 1 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +1519 table w/ clutter table 1 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +1520 table vase soil table vase soil 1 40 7 otherprop Objects 40 misc +1521 light box light box 1 26 29 7 box box Objects 40 misc +1522 wines wine 1 766 40 7 wine otherprop Objects 40 misc +1523 sliding glass door sliding glass door 1 28 8 12 door door Wall door 40 misc +1524 toilet handle toilet handle 1 758 40 7 handle otherprop Objects 40 misc +1525 bath deco bath deco 1 40 7 otherprop Objects 40 misc +1526 shower hose/head shower hose/head 1 40 7 otherprop Objects 40 misc +1527 shower case shower case 1 851 40 7 case otherprop Objects 40 misc +1528 mortar mortar 1 40 7 otherprop Objects 40 misc +1529 watering can watering can 1 329 40 7 can otherprop Objects can 02946921 n02946921 can.n.01 39 objects +1530 scarf scarf 1 240 40 7 scarf otherprop Objects n04143897 scarf.n.01 38 clothes +1531 softer softer 1 40 7 otherprop Objects 40 misc +1532 soap dish cubby soap dish cubby 1 40 7 otherprop Objects 40 misc +1533 paintintg picture 1 64 11 8 picture picture Picture n03931044 picture.n.01 6 picture +1534 cleaning clutter cleaning clutter 1 40 7 otherprop Objects 40 misc +1535 fireplace tool set fireplace tool set 1 40 7 otherprop Objects 40 misc +1536 table stand table stand 1 50 39 6 stand otherfurniture Furniture 40 misc +1537 shower valve shower valve 1 40 7 otherprop Objects 40 misc +1538 long sofa couch 1 83 6 9 sofa sofa Sofa sofa sofa sofa 04256520 n04256520 sofa.n.01 10 sofa +1539 wall soap shelf wall soap shelf 1 40 7 otherprop Objects 40 misc +1540 ceiling inset for fan ceiling inset for fan 1 74 40 7 fan otherprop Objects 40 misc +1541 beanbag chair beanbag chair 1 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +1542 door bottom rail door bottom rail 1 40 7 otherprop Objects 40 misc +1543 sowing machine sowing machine 1 220 40 7 machine otherprop Objects 40 misc +1544 couch chair sofa chair 1 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +1545 tabletop trinket tabletop trinket 1 844 40 7 trinket otherprop Objects 40 misc +1546 ceilling ceiling 3 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +1547 flowerstand flower stand 1 50 39 6 stand otherfurniture Furniture 40 misc +1548 wall stand wall stand 1 295 38 7 wall stand otherstructure Objects 40 misc +1549 shower cabin shower cabin 1 40 7 otherprop Objects 40 misc +1550 shower-bath cabinet shower-bath cabinet 1 3 3 6 cabinet cabinet Furniture cabinet 02933112 n02933112 cabinet.n.01 7 cabinet +1551 teddy bear stuffed animal 1 177 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 39 objects +1552 foor lamp floor lamp 1 144 35 7 lamp lamp Objects lamp lamp 03636649 n03367059 floor_lamp.n.01 28 lighting +1553 water fountain water fountain 1 339 38 7 water fountain otherstructure Objects n03241335 drinking_fountain.n.01 40 misc +1554 elephant sculpture elephant sculpture 1 294 40 7 sculpture otherprop Objects 40 misc +1555 grate grate 1 40 7 otherprop Objects 40 misc +1556 chicken chicken 1 40 7 otherprop Objects n01791625 chicken.n.02 39 objects +1557 bathroom fan bathroom fan 1 74 40 7 fan otherprop Objects n03320046 fan.n.01 39 objects +1558 roomba roomba 1 40 7 otherprop Objects 40 misc +1559 planter pot 1 16 40 7 pot otherprop Objects n03991062 pot.n.04 39 objects +1560 controls control 1 40 7 otherprop Objects n03096960 control.n.09 39 objects +1561 nightstand / other room nightstand /otherroom 1 158 32 6 night stand night stand Furniture night_stand night_stand n03015254 chest_of_drawers.n.01 13 chest_of_drawers +1562 worktop worktop 1 40 7 otherprop Objects 40 misc +1563 steps wall steps wall 1 21 1 12 wall wall Wall 40 misc +1564 door fireplace wall door 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +1565 floor / upstairs room floor /otherroom 1 11 2 5 floor floor Floor n03365592 floor.n.01 2 floor +1566 oven and stove oven and stove 1 242 38 7 stove otherstructure Objects stove 04330267 40 misc +1567 chest drawer chest drawer 1 174 39 6 drawer otherfurniture Furniture 40 misc +1568 door outside door /outside 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +1569 fireplace \other room fireplace /otherroom 1 372 38 7 fireplace otherstructure Objects n03346455 fireplace.n.01 27 fireplace +1570 hammock hammock 1 157 4 1 bed bed Bed bed bed bed 02818832 n03482252 hammock.n.02 11 bed +1571 dartboard dartboard 1 408 38 7 board otherstructure Objects n03162940 dartboard.n.01 39 objects +1572 fireplace brush fireplace brush 1 40 7 otherprop Objects 40 misc +1573 drum drum 1 145 40 7 drum otherprop Objects n03249569 drum.n.01 39 objects +1574 rotunda ceiling ceiling 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +1575 ceiling light fixture connection ceiling light fixture connection 1 40 7 otherprop Objects 40 misc +1576 stair fencing banister 1 453 38 7 banister otherstructure Objects n02788148 bannister.n.02 30 railing +1577 piano stool piano stool 1 150 40 7 stool otherprop Objects stool n03801880 music_stool.n.01 19 stool +1578 swing door swing door 1 28 8 12 door door Wall door n04371979 swing_door.n.01 4 door +1579 bathtub utencils bathtub utensil 1 267 40 7 utensil otherprop Objects 40 misc +1580 access area access area 1 40 7 otherprop Objects 40 misc +1581 exercise mat roll exercise mat roll 1 40 7 otherprop Objects 40 misc +1582 ceiling duct ceiling duct 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +1583 motorbike motorbike 1 40 7 otherprop Objects motorcycle 03790512 n03769722 minibike.n.01 39 objects +1584 floor elevator floor elevator 1 40 7 otherprop Objects 40 misc +1585 kitchen countertop items kitchen countertop items 1 40 7 otherprop Objects 40 misc +1586 entertainment set entertainment set 1 40 7 otherprop Objects 40 misc +1587 shower curtain bar shower curtain bar 1 51 38 7 bar otherstructure Objects 40 misc +1588 bedroom entry walls wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1589 oven vent oven vent 1 40 7 otherprop Objects 40 misc +1590 shower floor /otherroom shower floor /otherroom 1 11 2 5 floor floor Floor n04208936 shower.n.01 23 shower +1591 washing bowl sink 1 24 34 7 sink sink Objects sink n04223580 sink.n.01 15 sink +1592 desert plate desert plate 1 233 40 7 plate otherprop Objects 40 misc +1593 fuse panel fuse panel 1 40 7 otherprop Objects 40 misc +1594 barbers chair barbers chair 1 5 5 4 chair chair Chair chair chair chair 03001627 n03001627 chair.n.01 3 chair +1595 diploma diploma 1 40 7 otherprop Objects 40 misc +1596 wall behind stove wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1597 window6 window 1 59 9 13 window window Window n04587648 window.n.01 9 window +1598 ceiling /otherroom ceiling /otherroom 1 4 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 17 ceiling +1599 ventilation hood ventilation hood 1 40 7 otherprop Objects 40 misc +1600 dirt ground dirt ground 1 40 7 otherprop Objects 40 misc +1601 awning awning 1 40 7 otherprop Objects n02763901 awning.n.01 39 objects +1602 stiarcase step step 1 38 7 otherstructure Objects n04314914 step.n.04 16 stairs +1603 rack of weights rack of weights 1 40 7 otherprop Objects 40 misc +1604 shower tub shower tub 1 675 40 7 shower tube otherprop Objects bathtub bathtub tub 02808440 n02808440 bathtub.n.01 25 bathtub +1605 washing powder washing powder 1 40 7 otherprop Objects 40 misc +1606 toilet cleaner toilet cleaner 1 548 40 7 cleaner otherprop Objects 40 misc +1607 soap displensor shelf in shower soap dispenser shelf in shower 1 40 7 otherprop Objects 40 misc +1608 window or door window/door 1 40 7 otherprop Objects 40 misc +1609 storage boxes storage box 1 26 29 7 box box Objects 40 misc +1610 closet storage area closet storage area 1 40 7 otherprop Objects 40 misc +1611 stationary bike exercise bike 1 457 39 6 excercise equipment otherfurniture Furniture n03302671 exercise_bike.n.01 33 gym_equipment +1612 bedding bedding 1 40 7 otherprop Objects n02820210 bedclothes.n.01 39 objects +1613 stair handrail banister 1 453 38 7 banister otherstructure Objects n02788148 bannister.n.02 30 railing +1614 room divider partition 1 21 1 12 wall wall Wall n03894379 partition.n.01 40 misc +1615 window frames window frame 1 59 9 13 window window Window n04589593 window_frame.n.01 9 window +1616 wall electrics wall electronics 1 40 7 otherprop Objects 40 misc +1617 other step step 1 38 7 otherstructure Objects n04314914 step.n.04 16 stairs +1618 tabletop objects object 1 40 7 otherprop Objects n00002684 object.n.01 39 objects +1619 foosball game table foosball game table 1 19 7 10 table table Table table table table 04379243 n04379243 table.n.02 5 table +1620 wall showcase wall showcase 1 40 7 otherprop Objects 40 misc +1621 kitchen utencil kitchen utensil 1 267 40 7 utensil otherprop Objects n03621049 kitchen_utensil.n.01 39 objects +1622 towels in a basket towels in a basket 1 39 40 7 basket otherprop Objects basket 02801938 n02801938 basket.n.01 39 objects +1623 lamp stand lamp stand 1 50 39 6 stand otherfurniture Furniture 40 misc +1624 toy giraffe toy giraffe 1 40 7 otherprop Objects 40 misc +1625 big door frame door frame 1 28 8 12 door door Wall door n03221720 door.n.01 4 door +1626 unknown / probably shrubbery unknown/probably shrubbery 1 40 7 otherprop Objects 40 misc +1627 wooden wall paneling paneling 1 21 1 12 wall wall Wall n03882611 paneling.n.01 1 wall +1628 bedstead bedstead 1 157 4 1 bed bed Bed n02822579 bedstead.n.01 11 bed +1629 fire dish fire dish 1 40 7 otherprop Objects 40 misc +1630 tray with tea cups tray 1 179 40 7 tray otherprop Objects n04476259 tray.n.01 39 objects +1631 television table tv stand 1 291 39 6 tv stand otherfurniture Furniture tv_stand n03290653 entertainment_center.n.01 36 furniture +1632 tissues tissue 1 648 40 7 tissue otherprop Objects 40 misc +1633 sitting area sitting area 1 40 7 otherprop Objects 40 misc +1634 wall vent vent 1 25 38 7 air vent otherstructure Objects n04526241 vent.n.01 40 misc +1635 utensils utensil 1 267 40 7 utensil otherprop Objects n04516672 utensil.n.01 39 objects +1636 pip pip 1 286 40 7 fruit otherprop Objects n11685091 pip.n.03 39 objects +1637 fireplace wall fireplace wall 1 21 1 12 wall wall Wall 40 misc +1638 stonework stonework 1 40 7 otherprop Objects n04326799 stonework.n.01 40 misc +1639 bottom of stairs bottom of stairs 1 215 38 7 stairs otherstructure Objects stairs 40 misc +1640 star star 1 40 7 otherprop Objects n09444783 star.n.03 39 objects +1641 art / deer statue statue 1 294 40 7 sculpture otherprop Objects n04306847 statue.n.01 39 objects +1642 unknown objects object 1 40 7 otherprop Objects n00002684 object.n.01 39 objects +1643 wall side wall 1 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1644 shower glass /shifted shower glass /shifted 1 612 38 7 glass otherstructure Objects 40 misc +1645 toilet stall partition partition 1 21 1 12 wall wall Wall n03894379 partition.n.01 40 misc +1646 ceiling design 1 40 7 otherprop Objects 40 misc +1647 conference table conference table 1 19 7 10 table table Table table table table 04379243 n03090000 conference_table.n.01 5 table +1648 edge edge 1 40 7 otherprop Objects 40 misc +1649 electrical box electrical box 1 26 29 7 box box Objects n03034244 circuit_breaker.n.01 39 objects +1650 entrance entrance 1 40 7 otherprop Objects n03290771 entrance.n.01 40 misc +1651 gap gap 2 40 7 otherprop Objects n09249034 col.n.01 39 objects +1652 garage door opener motor garage door opener motor 1 40 7 otherprop Objects 40 misc +1653 lamp base lamp 1 144 35 7 lamp lamp Objects lamp lamp 03636649 n03636649 lamp.n.02 28 lighting +1654 racks rack 2 50 39 6 stand otherfurniture Furniture n04038440 rack.n.05 31 shelving +1655 stringer stringer 1 40 7 otherprop Objects 40 misc +1656 supporting structure supporting structure 1 40 7 otherprop Objects n04361095 supporting_structure.n.01 40 misc +1657 typewriter typewriter 1 376 40 7 typewriter otherprop Objects printer 04004475 n04505036 typewriter.n.01 39 objects +1658 walls wall 4 21 1 12 wall wall Wall n04546855 wall.n.01 1 wall +1659 washbasin top washbasin 1 24 34 7 sink sink Objects sink n04553920 washbasin.n.01 15 sink diff --git a/Pointcept/pointcept/datasets/preprocessing/matterport3d/meta_data/scenes_test.txt b/Pointcept/pointcept/datasets/preprocessing/matterport3d/meta_data/scenes_test.txt new file mode 100644 index 0000000000000000000000000000000000000000..e378f66e2ed767778f9e62826da0ddac058c1682 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/matterport3d/meta_data/scenes_test.txt @@ -0,0 +1,18 @@ +2t7WUuJeko7 +5ZKStnWn8Zo +ARNzJeq3xxb +fzynW3qQPVF +jtcxE69GiFV +pa4otMbVnkk +q9vSo1VnCiC +rqfALeAoiTq +UwV83HsGsw3 +wc2JMjhGNzB +WYY7iVyf5p8 +YFuZgdQ5vWj +yqstnuAEVhm +YVUC4YcDtcY +gxdoqLR6rwA +gYvKGZ5eRqb +RPmz2sHmrrY +Vt2qJdWjCF2 \ No newline at end of file diff --git a/Pointcept/pointcept/datasets/preprocessing/matterport3d/meta_data/scenes_train.txt b/Pointcept/pointcept/datasets/preprocessing/matterport3d/meta_data/scenes_train.txt new file mode 100644 index 0000000000000000000000000000000000000000..64afe8d80d754176a0b6c68791da55b16ea36c56 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/matterport3d/meta_data/scenes_train.txt @@ -0,0 +1,61 @@ +17DRP5sb8fy +1LXtFkjw3qL +1pXnuDYAj8r +29hnd4uzFmX +5LpN3gDmAk7 +5q7pvUzZiYa +759xd9YjKW5 +7y3sRwLe3Va +82sE5b5pLXE +8WUmhLawc2A +aayBHfsNo7d +ac26ZMwG7aT +B6ByNegPMKs +b8cTxDM8gDG +cV4RVeZvu5T +D7N2EKCX4Sj +e9zR4mvMWw7 +EDJbREhghzL +GdvgFV5R1Z5 +gTV8FGcVJC9 +HxpKQynjfin +i5noydFURQK +JeFG25nYj2p +JF19kD82Mey +jh4fc5c5qoQ +kEZ7cmS4wCh +mJXqzFtmKg4 +p5wJjkQkbXX +Pm6F8kyY3z2 +pRbA3pwrgk9 +PuKPg4mmafe +PX4nDJXEHrG +qoiz87JEwZ2 +rPc6DW4iMge +s8pcmisQ38h +S9hNv5qa7GM +sKLMLpTHeUy +SN83YJsR3w2 +sT4fr6TAbpF +ULsKaCPVFJR +uNb9QFRL6hY +Uxmj2M2itWa +V2XKFyX4ASd +VFuaQ6m2Qom +VVfe2KiqLaN +Vvot9Ly1tCj +vyrNrziPKCB +VzqfbhrpDEA +XcA2TqTSSAj +2n8kARJN3HM +D7G3Y4RVNrH +dhjEzFoUFzH +E9uDoFAP3SH +gZ6f7yhEvPG +JmbYfDe2QKZ +r1Q1Z4BcV1o +r47D5H71a5s +ur6pFq6Qu1A +VLzqgDo317F +YmJkqBEsHnH +ZMojNkEp431 \ No newline at end of file diff --git a/Pointcept/pointcept/datasets/preprocessing/matterport3d/meta_data/scenes_val.txt b/Pointcept/pointcept/datasets/preprocessing/matterport3d/meta_data/scenes_val.txt new file mode 100644 index 0000000000000000000000000000000000000000..bcec005da85cc9f481208f9730a23da9bb9acc96 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/matterport3d/meta_data/scenes_val.txt @@ -0,0 +1,11 @@ +2azQ1b91cZZ +8194nk5LbLH +EU6Fwq7SyZv +oLBMNvg9in8 +QUCTc6BB5sX +TbHJrupSAjP +X7HyMhZNoso +pLe4wQe7qrG +x8F5xyUWy9e +Z6MFQCViBuw +zsNo4HB9uLZ \ No newline at end of file diff --git a/Pointcept/pointcept/datasets/preprocessing/matterport3d/preprocess_matterport3d_mesh.py b/Pointcept/pointcept/datasets/preprocessing/matterport3d/preprocess_matterport3d_mesh.py new file mode 100644 index 0000000000000000000000000000000000000000..82d2850b73602924e6ce93d23976a35bbe8a2ae7 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/matterport3d/preprocess_matterport3d_mesh.py @@ -0,0 +1,240 @@ +""" +Preprocessing Script for Matterport3D (Unzipping) +adatpted from https://github.com/pengsongyou/openscene/blob/main/scripts/preprocess/preprocess_3d_matterport.py + +Author: Chongjie Ye (chongjieye@link.cuhk.edu.cn) +Modified by: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import argparse +import glob +import plyfile +import numpy as np +import pandas as pd +import multiprocessing as mp +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat +from pathlib import Path +import torch + +MATTERPORT_CLASS_REMAP = np.zeros(41) +MATTERPORT_CLASS_REMAP[1] = 1 +MATTERPORT_CLASS_REMAP[2] = 2 +MATTERPORT_CLASS_REMAP[3] = 3 +MATTERPORT_CLASS_REMAP[4] = 4 +MATTERPORT_CLASS_REMAP[5] = 5 +MATTERPORT_CLASS_REMAP[6] = 6 +MATTERPORT_CLASS_REMAP[7] = 7 +MATTERPORT_CLASS_REMAP[8] = 8 +MATTERPORT_CLASS_REMAP[9] = 9 +MATTERPORT_CLASS_REMAP[10] = 10 +MATTERPORT_CLASS_REMAP[11] = 11 +MATTERPORT_CLASS_REMAP[12] = 12 +MATTERPORT_CLASS_REMAP[14] = 13 +MATTERPORT_CLASS_REMAP[16] = 14 +MATTERPORT_CLASS_REMAP[22] = 21 # DIFFERENCE TO SCANNET! +MATTERPORT_CLASS_REMAP[24] = 15 +MATTERPORT_CLASS_REMAP[28] = 16 +MATTERPORT_CLASS_REMAP[33] = 17 +MATTERPORT_CLASS_REMAP[34] = 18 +MATTERPORT_CLASS_REMAP[36] = 19 +MATTERPORT_CLASS_REMAP[39] = 20 + +MATTERPORT_LABELS_21 = ( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refrigerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "other", + "ceiling", +) +MATTERPORT_ALLOWED_NYU_CLASSES = [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 14, + 16, + 22, + 24, + 28, + 33, + 34, + 36, + 39, +] + + +def handle_process(mesh_path, output_path, mapping, train_scenes, val_scenes): + # Get the scene id and region name from the mesh path + scene_id = Path(mesh_path).parent.parent.name + region_id = Path(mesh_path).stem.removeprefix("region") + data_name = f"{scene_id}_{int(region_id):02d}" + + output_path = Path(output_path) + # Check which split the scene belongs to (train, val, or test) + if scene_id in train_scenes: + output_folder = output_path / "train" / data_name + split = "train" + elif scene_id in val_scenes: + output_folder = output_path / "val" / data_name + split = "val" + else: + output_folder = output_path / "test" / data_name + split = "test" + + # Create the output directory if it doesn't exist + os.makedirs(output_folder, exist_ok=True) + print(f"Processing: {data_name} in {split}") + + # Load the vertex data + with open(mesh_path, "rb") as f: + plydata = plyfile.PlyData.read(f) + vertex_data = plydata["vertex"].data + + # Get the coordinates, colors, and normals from the vertex data + coords = np.vstack([vertex_data["x"], vertex_data["y"], vertex_data["z"]]).T + colors = np.vstack( + [vertex_data["red"], vertex_data["green"], vertex_data["blue"]] + ).T + normals = np.vstack([vertex_data["nx"], vertex_data["ny"], vertex_data["nz"]]).T + + # Load the face data + face_data = plydata["face"].data + category_id = face_data["category_id"] + + # Replace -1 with 0 in category_id + category_id[category_id == -1] = 0 + + # Map the labels according to NYU40ID + mapped_labels = mapping[category_id] + + # Replace labels not in MATTERPORT_ALLOWED_NYU_CLASSES with 0 + mapped_labels[ + np.logical_not(np.isin(mapped_labels, MATTERPORT_ALLOWED_NYU_CLASSES)) + ] = 0 + + # Remap the labels to ScanNet 20 categories + ceiling + remapped_labels = MATTERPORT_CLASS_REMAP[mapped_labels].astype(int) + + # Calculate per-vertex labels + triangles = face_data["vertex_indices"] + vertex_labels = np.zeros((coords.shape[0], 22), dtype=np.int32) + # calculate per-vertex labels + for row_id in range(triangles.shape[0]): + for i in range(3): + vertex_labels[triangles[row_id][i], remapped_labels[row_id]] += 1 + + # Get the most frequent label for each vertex + vertex_labels = np.argmax(vertex_labels, axis=1) + vertex_labels -= 1 + + # Add the vertex labels to the data to be saved + # Prepare the data to be saved + data_dict = dict( + coord=coords.astype("float32"), + color=colors.astype("uint8"), + normal=normals.astype("float32"), + segment=vertex_labels.astype("int16"), + ) + + # Save processed data + for key in data_dict.keys(): + np.save(output_folder / f"{key}.npy", data_dict[key]) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the Matterport3D dataset containing scene folders", + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where train/val folders will be located", + ) + parser.add_argument( + "--num_workers", + default=mp.cpu_count(), + type=int, + help="Num workers for preprocessing.", + ) + opt = parser.parse_args() + meta_root = Path(os.path.dirname(__file__)) / "meta_data" + + # Load label map + category_mapping = pd.read_csv( + meta_root / "category_mapping.tsv", + sep="\t", + header=0, + ) + mapping = np.insert( + category_mapping[["nyu40id"]].to_numpy().astype(int).flatten(), 0, 0, axis=0 + ) + + # Load train/val splits + with open(meta_root / "scenes_train.txt") as train_file: + train_scenes = train_file.read().splitlines() + with open(meta_root / "scenes_val.txt") as val_file: + val_scenes = val_file.read().splitlines() + with open(meta_root / "scenes_test.txt") as test_file: + test_scenes = test_file.read().splitlines() + + # Create output directories + os.makedirs(opt.output_root, exist_ok=True) + train_output_dir = os.path.join(opt.output_root, "train") + os.makedirs(train_output_dir, exist_ok=True) + val_output_dir = os.path.join(opt.output_root, "val") + os.makedirs(val_output_dir, exist_ok=True) + test_output_dir = os.path.join(opt.output_root, "test") + os.makedirs(test_output_dir, exist_ok=True) + + # Load scene paths + scene_paths = sorted( + glob.glob( + os.path.join( + opt.dataset_root, "v1", "scans", "*", "region_segmentations", "*.ply" + ) + ) + ) + + # Preprocess data. + pool = ProcessPoolExecutor(max_workers=opt.num_workers) + print("Processing scenes...") + _ = list( + pool.map( + handle_process, + scene_paths, + repeat(opt.output_root), + repeat(mapping), + repeat(train_scenes), + repeat(val_scenes), + ) + ) diff --git a/Pointcept/pointcept/datasets/preprocessing/matterport3d/unzip_matterport3d_region_segmentation.py b/Pointcept/pointcept/datasets/preprocessing/matterport3d/unzip_matterport3d_region_segmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..eee79908262e09aeb428a5e176cf799e8647a01a --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/matterport3d/unzip_matterport3d_region_segmentation.py @@ -0,0 +1,66 @@ +""" +Preprocessing Script for Matterport3D (Unzipping) +modified from official preprocess code. + +Author: Chongjie Ye (chongjieye@link.cuhk.edu.cn) +Modified by: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import argparse +import os +import zipfile +import glob +import multiprocessing as mp +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat + + +def unzip_file(input_path, output_path): + print(f"Unzipping {input_path} ...") + os.makedirs(os.path.dirname(output_path), exist_ok=True) + with zipfile.ZipFile(input_path, "r") as zip_ref: + zip_ref.extractall(output_path) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description='Unzip all "region_segmentations.zip" files in a directory' + ) + parser.add_argument( + "--dataset_root", + type=str, + help="Path to input directory containing ZIP files", + required=True, + ) + parser.add_argument( + "--output_root", + type=str, + help="Path to output directory for extracted files", + default=None, + ) + parser.add_argument( + "--num_workers", + default=mp.cpu_count(), + type=int, + help="Num workers for preprocessing.", + ) + args = parser.parse_args() + if args.output_root is None: + args.output_root = args.dataset_root + args.output_root = os.path.join(args.output_root, "v1", "scans") + + file_list = glob.glob( + os.path.join(args.dataset_root, "v1", "scans", "*", "region_segmentations.zip") + ) + + # Preprocess data. + print("Unzipping region_segmentations.zip in Matterport3D...") + pool = ProcessPoolExecutor(max_workers=args.num_workers) + _ = list( + pool.map( + unzip_file, + file_list, + repeat(args.output_root), + ) + ) diff --git a/Pointcept/pointcept/datasets/preprocessing/nuscenes/preprocess_nuscenes_info.py b/Pointcept/pointcept/datasets/preprocessing/nuscenes/preprocess_nuscenes_info.py new file mode 100644 index 0000000000000000000000000000000000000000..7ed106f193a488aa76385157aa33fb65e5944a6f --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/nuscenes/preprocess_nuscenes_info.py @@ -0,0 +1,607 @@ +""" +Preprocessing Script for nuScenes Informantion +modified from OpenPCDet (https://github.com/open-mmlab/OpenPCDet) + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +from pathlib import Path +import numpy as np +import argparse +import tqdm +import pickle +from functools import reduce +from pyquaternion import Quaternion +from nuscenes.nuscenes import NuScenes +from nuscenes.utils import splits +from nuscenes.utils.geometry_utils import transform_matrix + + +map_name_from_general_to_detection = { + "human.pedestrian.adult": "pedestrian", + "human.pedestrian.child": "pedestrian", + "human.pedestrian.wheelchair": "ignore", + "human.pedestrian.stroller": "ignore", + "human.pedestrian.personal_mobility": "ignore", + "human.pedestrian.police_officer": "pedestrian", + "human.pedestrian.construction_worker": "pedestrian", + "animal": "ignore", + "vehicle.car": "car", + "vehicle.motorcycle": "motorcycle", + "vehicle.bicycle": "bicycle", + "vehicle.bus.bendy": "bus", + "vehicle.bus.rigid": "bus", + "vehicle.truck": "truck", + "vehicle.construction": "construction_vehicle", + "vehicle.emergency.ambulance": "ignore", + "vehicle.emergency.police": "ignore", + "vehicle.trailer": "trailer", + "movable_object.barrier": "barrier", + "movable_object.trafficcone": "traffic_cone", + "movable_object.pushable_pullable": "ignore", + "movable_object.debris": "ignore", + "static_object.bicycle_rack": "ignore", +} + + +cls_attr_dist = { + "barrier": { + "cycle.with_rider": 0, + "cycle.without_rider": 0, + "pedestrian.moving": 0, + "pedestrian.sitting_lying_down": 0, + "pedestrian.standing": 0, + "vehicle.moving": 0, + "vehicle.parked": 0, + "vehicle.stopped": 0, + }, + "bicycle": { + "cycle.with_rider": 2791, + "cycle.without_rider": 8946, + "pedestrian.moving": 0, + "pedestrian.sitting_lying_down": 0, + "pedestrian.standing": 0, + "vehicle.moving": 0, + "vehicle.parked": 0, + "vehicle.stopped": 0, + }, + "bus": { + "cycle.with_rider": 0, + "cycle.without_rider": 0, + "pedestrian.moving": 0, + "pedestrian.sitting_lying_down": 0, + "pedestrian.standing": 0, + "vehicle.moving": 9092, + "vehicle.parked": 3294, + "vehicle.stopped": 3881, + }, + "car": { + "cycle.with_rider": 0, + "cycle.without_rider": 0, + "pedestrian.moving": 0, + "pedestrian.sitting_lying_down": 0, + "pedestrian.standing": 0, + "vehicle.moving": 114304, + "vehicle.parked": 330133, + "vehicle.stopped": 46898, + }, + "construction_vehicle": { + "cycle.with_rider": 0, + "cycle.without_rider": 0, + "pedestrian.moving": 0, + "pedestrian.sitting_lying_down": 0, + "pedestrian.standing": 0, + "vehicle.moving": 882, + "vehicle.parked": 11549, + "vehicle.stopped": 2102, + }, + "ignore": { + "cycle.with_rider": 307, + "cycle.without_rider": 73, + "pedestrian.moving": 0, + "pedestrian.sitting_lying_down": 0, + "pedestrian.standing": 0, + "vehicle.moving": 165, + "vehicle.parked": 400, + "vehicle.stopped": 102, + }, + "motorcycle": { + "cycle.with_rider": 4233, + "cycle.without_rider": 8326, + "pedestrian.moving": 0, + "pedestrian.sitting_lying_down": 0, + "pedestrian.standing": 0, + "vehicle.moving": 0, + "vehicle.parked": 0, + "vehicle.stopped": 0, + }, + "pedestrian": { + "cycle.with_rider": 0, + "cycle.without_rider": 0, + "pedestrian.moving": 157444, + "pedestrian.sitting_lying_down": 13939, + "pedestrian.standing": 46530, + "vehicle.moving": 0, + "vehicle.parked": 0, + "vehicle.stopped": 0, + }, + "traffic_cone": { + "cycle.with_rider": 0, + "cycle.without_rider": 0, + "pedestrian.moving": 0, + "pedestrian.sitting_lying_down": 0, + "pedestrian.standing": 0, + "vehicle.moving": 0, + "vehicle.parked": 0, + "vehicle.stopped": 0, + }, + "trailer": { + "cycle.with_rider": 0, + "cycle.without_rider": 0, + "pedestrian.moving": 0, + "pedestrian.sitting_lying_down": 0, + "pedestrian.standing": 0, + "vehicle.moving": 3421, + "vehicle.parked": 19224, + "vehicle.stopped": 1895, + }, + "truck": { + "cycle.with_rider": 0, + "cycle.without_rider": 0, + "pedestrian.moving": 0, + "pedestrian.sitting_lying_down": 0, + "pedestrian.standing": 0, + "vehicle.moving": 21339, + "vehicle.parked": 55626, + "vehicle.stopped": 11097, + }, +} + + +def get_available_scenes(nusc): + available_scenes = [] + for scene in nusc.scene: + scene_token = scene["token"] + scene_rec = nusc.get("scene", scene_token) + sample_rec = nusc.get("sample", scene_rec["first_sample_token"]) + sd_rec = nusc.get("sample_data", sample_rec["data"]["LIDAR_TOP"]) + has_more_frames = True + scene_not_exist = False + while has_more_frames: + lidar_path, boxes, _ = nusc.get_sample_data(sd_rec["token"]) + if not Path(lidar_path).exists(): + scene_not_exist = True + break + else: + break + if scene_not_exist: + continue + available_scenes.append(scene) + return available_scenes + + +def get_sample_data(nusc, sample_data_token, selected_anntokens=None): + """ + Returns the data path as well as all annotations related to that sample_data. + Note that the boxes are transformed into the current sensor"s coordinate frame. + Args: + nusc: + sample_data_token: Sample_data token. + selected_anntokens: If provided only return the selected annotation. + + Returns: + + """ + # Retrieve sensor & pose records + sd_record = nusc.get("sample_data", sample_data_token) + cs_record = nusc.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) + sensor_record = nusc.get("sensor", cs_record["sensor_token"]) + pose_record = nusc.get("ego_pose", sd_record["ego_pose_token"]) + + data_path = nusc.get_sample_data_path(sample_data_token) + + if sensor_record["modality"] == "camera": + cam_intrinsic = np.array(cs_record["camera_intrinsic"]) + else: + cam_intrinsic = None + + # Retrieve all sample annotations and map to sensor coordinate system. + if selected_anntokens is not None: + boxes = list(map(nusc.get_box, selected_anntokens)) + else: + boxes = nusc.get_boxes(sample_data_token) + + # Make list of Box objects including coord system transforms. + box_list = [] + for box in boxes: + box.velocity = nusc.box_velocity(box.token) + # Move box to ego vehicle coord system + box.translate(-np.array(pose_record["translation"])) + box.rotate(Quaternion(pose_record["rotation"]).inverse) + + # Move box to sensor coord system + box.translate(-np.array(cs_record["translation"])) + box.rotate(Quaternion(cs_record["rotation"]).inverse) + + box_list.append(box) + + return data_path, box_list, cam_intrinsic + + +def quaternion_yaw(q: Quaternion) -> float: + """ + Calculate the yaw angle from a quaternion. + Note that this only works for a quaternion that represents a box in lidar or global coordinate frame. + It does not work for a box in the camera frame. + :param q: Quaternion of interest. + :return: Yaw angle in radians. + """ + + # Project into xy plane. + v = np.dot(q.rotation_matrix, np.array([1, 0, 0])) + + # Measure yaw using arctan. + yaw = np.arctan2(v[1], v[0]) + + return yaw + + +def obtain_sensor2top( + nusc, sensor_token, l2e_t, l2e_r_mat, e2g_t, e2g_r_mat, sensor_type="lidar" +): + """Obtain the info with RT matric from general sensor to Top LiDAR. + + Args: + nusc (class): Dataset class in the nuScenes dataset. + sensor_token (str): Sample data token corresponding to the + specific sensor type. + l2e_t (np.ndarray): Translation from lidar to ego in shape (1, 3). + l2e_r_mat (np.ndarray): Rotation matrix from lidar to ego + in shape (3, 3). + e2g_t (np.ndarray): Translation from ego to global in shape (1, 3). + e2g_r_mat (np.ndarray): Rotation matrix from ego to global + in shape (3, 3). + sensor_type (str): Sensor to calibrate. Default: "lidar". + + Returns: + sweep (dict): Sweep information after transformation. + """ + sd_rec = nusc.get("sample_data", sensor_token) + cs_record = nusc.get("calibrated_sensor", sd_rec["calibrated_sensor_token"]) + pose_record = nusc.get("ego_pose", sd_rec["ego_pose_token"]) + data_path = str(nusc.get_sample_data_path(sd_rec["token"])) + # if os.getcwd() in data_path: # path from lyftdataset is absolute path + # data_path = data_path.split(f"{os.getcwd()}/")[-1] # relative path + sweep = { + "data_path": data_path, + "type": sensor_type, + "sample_data_token": sd_rec["token"], + "sensor2ego_translation": cs_record["translation"], + "sensor2ego_rotation": cs_record["rotation"], + "ego2global_translation": pose_record["translation"], + "ego2global_rotation": pose_record["rotation"], + "timestamp": sd_rec["timestamp"], + } + l2e_r_s = sweep["sensor2ego_rotation"] + l2e_t_s = sweep["sensor2ego_translation"] + e2g_r_s = sweep["ego2global_rotation"] + e2g_t_s = sweep["ego2global_translation"] + + # obtain the RT from sensor to Top LiDAR + # sweep->ego->global->ego'->lidar + l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix + e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix + R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ ( + np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T + ) + T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ ( + np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T + ) + T -= ( + e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T) + + l2e_t @ np.linalg.inv(l2e_r_mat).T + ).squeeze(0) + sweep["sensor2lidar_rotation"] = R.T # points @ R.T + T + sweep["sensor2lidar_translation"] = T + return sweep + + +def fill_trainval_infos( + data_path, nusc, train_scenes, test=False, max_sweeps=10, with_camera=False +): + train_nusc_infos = [] + val_nusc_infos = [] + progress_bar = tqdm.tqdm( + total=len(nusc.sample), desc="create_info", dynamic_ncols=True + ) + + ref_chan = "LIDAR_TOP" # The radar channel from which we track back n sweeps to aggregate the point cloud. + chan = "LIDAR_TOP" # The reference channel of the current sample_rec that the point clouds are mapped to. + + for index, sample in enumerate(nusc.sample): + progress_bar.update() + + ref_sd_token = sample["data"][ref_chan] + ref_sd_rec = nusc.get("sample_data", ref_sd_token) + ref_cs_rec = nusc.get( + "calibrated_sensor", ref_sd_rec["calibrated_sensor_token"] + ) + ref_pose_rec = nusc.get("ego_pose", ref_sd_rec["ego_pose_token"]) + ref_time = 1e-6 * ref_sd_rec["timestamp"] + + ref_lidar_path, ref_boxes, _ = get_sample_data(nusc, ref_sd_token) + + ref_cam_front_token = sample["data"]["CAM_FRONT"] + ref_cam_path, _, ref_cam_intrinsic = nusc.get_sample_data(ref_cam_front_token) + + # Homogeneous transform from ego car frame to reference frame + ref_from_car = transform_matrix( + ref_cs_rec["translation"], Quaternion(ref_cs_rec["rotation"]), inverse=True + ) + + # Homogeneous transformation matrix from global to _current_ ego car frame + car_from_global = transform_matrix( + ref_pose_rec["translation"], + Quaternion(ref_pose_rec["rotation"]), + inverse=True, + ) + info = { + "lidar_path": Path(ref_lidar_path).relative_to(data_path).__str__(), + "lidar_token": ref_sd_token, + "cam_front_path": Path(ref_cam_path).relative_to(data_path).__str__(), + "cam_intrinsic": ref_cam_intrinsic, + "token": sample["token"], + "sweeps": [], + "ref_from_car": ref_from_car, + "car_from_global": car_from_global, + "timestamp": ref_time, + } + if with_camera: + info["cams"] = dict() + l2e_r = ref_cs_rec["rotation"] + l2e_t = (ref_cs_rec["translation"],) + e2g_r = ref_pose_rec["rotation"] + e2g_t = ref_pose_rec["translation"] + l2e_r_mat = Quaternion(l2e_r).rotation_matrix + e2g_r_mat = Quaternion(e2g_r).rotation_matrix + + # obtain 6 image's information per frame + camera_types = [ + "CAM_FRONT", + "CAM_FRONT_RIGHT", + "CAM_FRONT_LEFT", + "CAM_BACK", + "CAM_BACK_LEFT", + "CAM_BACK_RIGHT", + ] + for cam in camera_types: + cam_token = sample["data"][cam] + cam_path, _, camera_intrinsics = nusc.get_sample_data(cam_token) + cam_info = obtain_sensor2top( + nusc, cam_token, l2e_t, l2e_r_mat, e2g_t, e2g_r_mat, cam + ) + cam_info["data_path"] = ( + Path(cam_info["data_path"]).relative_to(data_path).__str__() + ) + cam_info.update(camera_intrinsics=camera_intrinsics) + info["cams"].update({cam: cam_info}) + + sample_data_token = sample["data"][chan] + curr_sd_rec = nusc.get("sample_data", sample_data_token) + sweeps = [] + while len(sweeps) < max_sweeps - 1: + if curr_sd_rec["prev"] == "": + if len(sweeps) == 0: + sweep = { + "lidar_path": Path(ref_lidar_path) + .relative_to(data_path) + .__str__(), + "sample_data_token": curr_sd_rec["token"], + "transform_matrix": None, + "time_lag": curr_sd_rec["timestamp"] * 0, + } + sweeps.append(sweep) + else: + sweeps.append(sweeps[-1]) + else: + curr_sd_rec = nusc.get("sample_data", curr_sd_rec["prev"]) + + # Get past pose + current_pose_rec = nusc.get("ego_pose", curr_sd_rec["ego_pose_token"]) + global_from_car = transform_matrix( + current_pose_rec["translation"], + Quaternion(current_pose_rec["rotation"]), + inverse=False, + ) + + # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. + current_cs_rec = nusc.get( + "calibrated_sensor", curr_sd_rec["calibrated_sensor_token"] + ) + car_from_current = transform_matrix( + current_cs_rec["translation"], + Quaternion(current_cs_rec["rotation"]), + inverse=False, + ) + + tm = reduce( + np.dot, + [ref_from_car, car_from_global, global_from_car, car_from_current], + ) + + lidar_path = nusc.get_sample_data_path(curr_sd_rec["token"]) + + time_lag = ref_time - 1e-6 * curr_sd_rec["timestamp"] + + sweep = { + "lidar_path": Path(lidar_path).relative_to(data_path).__str__(), + "sample_data_token": curr_sd_rec["token"], + "transform_matrix": tm, + "global_from_car": global_from_car, + "car_from_current": car_from_current, + "time_lag": time_lag, + } + sweeps.append(sweep) + + info["sweeps"] = sweeps + + assert len(info["sweeps"]) == max_sweeps - 1, ( + f"sweep {curr_sd_rec['token']} only has {len(info['sweeps'])} sweeps, " + f"you should duplicate to sweep num {max_sweeps - 1}" + ) + + if not test: + # processing gt bbox + annotations = [ + nusc.get("sample_annotation", token) for token in sample["anns"] + ] + + # the filtering gives 0.5~1 map improvement + num_lidar_pts = np.array([anno["num_lidar_pts"] for anno in annotations]) + num_radar_pts = np.array([anno["num_radar_pts"] for anno in annotations]) + mask = num_lidar_pts + num_radar_pts > 0 + + locs = np.array([b.center for b in ref_boxes]).reshape(-1, 3) + dims = np.array([b.wlh for b in ref_boxes]).reshape(-1, 3)[ + :, [1, 0, 2] + ] # wlh == > dxdydz (lwh) + velocity = np.array([b.velocity for b in ref_boxes]).reshape(-1, 3) + rots = np.array([quaternion_yaw(b.orientation) for b in ref_boxes]).reshape( + -1, 1 + ) + names = np.array([b.name for b in ref_boxes]) + tokens = np.array([b.token for b in ref_boxes]) + gt_boxes = np.concatenate([locs, dims, rots, velocity[:, :2]], axis=1) + + assert len(annotations) == len(gt_boxes) == len(velocity) + + info["gt_boxes"] = gt_boxes[mask, :] + info["gt_boxes_velocity"] = velocity[mask, :] + info["gt_names"] = np.array( + [map_name_from_general_to_detection[name] for name in names] + )[mask] + info["gt_boxes_token"] = tokens[mask] + info["num_lidar_pts"] = num_lidar_pts[mask] + info["num_radar_pts"] = num_radar_pts[mask] + + # processing gt segment + segment_path = nusc.get("lidarseg", ref_sd_token)["filename"] + info["gt_segment_path"] = segment_path + + if sample["scene_token"] in train_scenes: + train_nusc_infos.append(info) + else: + val_nusc_infos.append(info) + + progress_bar.close() + return train_nusc_infos, val_nusc_infos + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", required=True, help="Path to the nuScenes dataset." + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where processed information located.", + ) + parser.add_argument( + "--max_sweeps", default=10, type=int, help="Max number of sweeps. Default: 10." + ) + parser.add_argument( + "--with_camera", + action="store_true", + default=False, + help="Whether use camera or not.", + ) + config = parser.parse_args() + + print(f"Loading nuScenes tables for version v1.0-trainval...") + nusc_trainval = NuScenes( + version="v1.0-trainval", dataroot=config.dataset_root, verbose=False + ) + available_scenes_trainval = get_available_scenes(nusc_trainval) + available_scene_names_trainval = [s["name"] for s in available_scenes_trainval] + print("total scene num:", len(nusc_trainval.scene)) + print("exist scene num:", len(available_scenes_trainval)) + assert len(available_scenes_trainval) == len(nusc_trainval.scene) == 850 + + print(f"Loading nuScenes tables for version v1.0-test...") + nusc_test = NuScenes( + version="v1.0-test", dataroot=config.dataset_root, verbose=False + ) + available_scenes_test = get_available_scenes(nusc_test) + available_scene_names_test = [s["name"] for s in available_scenes_test] + print("total scene num:", len(nusc_test.scene)) + print("exist scene num:", len(available_scenes_test)) + assert len(available_scenes_test) == len(nusc_test.scene) == 150 + + train_scenes = splits.train + train_scenes = set( + [ + available_scenes_trainval[available_scene_names_trainval.index(s)]["token"] + for s in train_scenes + ] + ) + test_scenes = splits.test + test_scenes = set( + [ + available_scenes_test[available_scene_names_test.index(s)]["token"] + for s in test_scenes + ] + ) + print(f"Filling trainval information...") + train_nusc_infos, val_nusc_infos = fill_trainval_infos( + config.dataset_root, + nusc_trainval, + train_scenes, + test=False, + max_sweeps=config.max_sweeps, + with_camera=config.with_camera, + ) + print(f"Filling test information...") + test_nusc_infos, _ = fill_trainval_infos( + config.dataset_root, + nusc_test, + test_scenes, + test=True, + max_sweeps=config.max_sweeps, + with_camera=config.with_camera, + ) + + print(f"Saving nuScenes information...") + os.makedirs(os.path.join(config.output_root, "info"), exist_ok=True) + print( + f"train sample: {len(train_nusc_infos)}, val sample: {len(val_nusc_infos)}, test sample: {len(test_nusc_infos)}" + ) + with open( + os.path.join( + config.output_root, + "info", + f"nuscenes_infos_{config.max_sweeps}sweeps_train.pkl", + ), + "wb", + ) as f: + pickle.dump(train_nusc_infos, f) + with open( + os.path.join( + config.output_root, + "info", + f"nuscenes_infos_{config.max_sweeps}sweeps_val.pkl", + ), + "wb", + ) as f: + pickle.dump(val_nusc_infos, f) + with open( + os.path.join( + config.output_root, + "info", + f"nuscenes_infos_{config.max_sweeps}sweeps_test.pkl", + ), + "wb", + ) as f: + pickle.dump(test_nusc_infos, f) diff --git a/Pointcept/pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py b/Pointcept/pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py new file mode 100644 index 0000000000000000000000000000000000000000..d770ad6317996c8a53cd13b2e12af3a536b6dca4 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py @@ -0,0 +1,233 @@ +""" +Preprocessing Script for S3DIS +Parsing normal vectors has a large consumption of memory. Please reduce max_workers if memory is limited. + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import argparse +import glob +import numpy as np + +try: + import open3d +except ImportError: + import warnings + + warnings.warn("Please install open3d for parsing normal") + +try: + import trimesh +except ImportError: + import warnings + + warnings.warn("Please install trimesh for parsing normal") + +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat + +area_mesh_dict = {} + + +def parse_room( + room, angle, dataset_root, output_root, align_angle=True, parse_normal=False +): + print("Parsing: {}".format(room)) + classes = [ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", + ] + class2label = {cls: i for i, cls in enumerate(classes)} + source_dir = os.path.join(dataset_root, room) + save_path = os.path.join(output_root, room) + os.makedirs(save_path, exist_ok=True) + object_path_list = sorted(glob.glob(os.path.join(source_dir, "Annotations/*.txt"))) + + room_coords = [] + room_colors = [] + room_normals = [] + room_semantic_gt = [] + room_instance_gt = [] + + for object_id, object_path in enumerate(object_path_list): + object_name = os.path.basename(object_path).split("_")[0] + obj = np.loadtxt(object_path) + coords = obj[:, :3] + colors = obj[:, 3:6] + # note: in some room there is 'stairs' class + class_name = object_name if object_name in classes else "clutter" + semantic_gt = np.repeat(class2label[class_name], coords.shape[0]) + semantic_gt = semantic_gt.reshape([-1, 1]) + instance_gt = np.repeat(object_id, coords.shape[0]) + instance_gt = instance_gt.reshape([-1, 1]) + + room_coords.append(coords) + room_colors.append(colors) + room_semantic_gt.append(semantic_gt) + room_instance_gt.append(instance_gt) + + room_coords = np.ascontiguousarray(np.vstack(room_coords)) + + if parse_normal: + x_min, z_max, y_min = np.min(room_coords, axis=0) + x_max, z_min, y_max = np.max(room_coords, axis=0) + z_max = -z_max + z_min = -z_min + max_bound = np.array([x_max, y_max, z_max]) + 0.1 + min_bound = np.array([x_min, y_min, z_min]) - 0.1 + bbox = open3d.geometry.AxisAlignedBoundingBox( + min_bound=min_bound, max_bound=max_bound + ) + # crop room + room_mesh = ( + area_mesh_dict[os.path.dirname(room)] + .crop(bbox) + .transform( + np.array([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) + ) + ) + vertices = np.array(room_mesh.vertices) + faces = np.array(room_mesh.triangles) + vertex_normals = np.array(room_mesh.vertex_normals) + room_mesh = trimesh.Trimesh( + vertices=vertices, faces=faces, vertex_normals=vertex_normals + ) + (closest_points, distances, face_id) = room_mesh.nearest.on_surface(room_coords) + room_normals = room_mesh.face_normals[face_id] + + if align_angle: + angle = (2 - angle / 180) * np.pi + rot_cos, rot_sin = np.cos(angle), np.sin(angle) + rot_t = np.array([[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0], [0, 0, 1]]) + room_center = (np.max(room_coords, axis=0) + np.min(room_coords, axis=0)) / 2 + room_coords = (room_coords - room_center) @ np.transpose(rot_t) + room_center + if parse_normal: + room_normals = room_normals @ np.transpose(rot_t) + + room_colors = np.ascontiguousarray(np.vstack(room_colors)) + room_semantic_gt = np.ascontiguousarray(np.vstack(room_semantic_gt)) + room_instance_gt = np.ascontiguousarray(np.vstack(room_instance_gt)) + np.save(os.path.join(save_path, "coord.npy"), room_coords.astype(np.float32)) + np.save(os.path.join(save_path, "color.npy"), room_colors.astype(np.uint8)) + np.save(os.path.join(save_path, "segment.npy"), room_semantic_gt.astype(np.int16)) + np.save(os.path.join(save_path, "instance.npy"), room_instance_gt.astype(np.int16)) + + if parse_normal: + np.save(os.path.join(save_path, "normal.npy"), room_normals.astype(np.float32)) + + +def main_process(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--splits", + required=True, + nargs="+", + choices=["Area_1", "Area_2", "Area_3", "Area_4", "Area_5", "Area_6"], + help="Splits need to process ([Area_1, Area_2, Area_3, Area_4, Area_5, Area_6]).", + ) + parser.add_argument( + "--dataset_root", required=True, help="Path to Stanford3dDataset_v1.2 dataset" + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where area folders will be located", + ) + parser.add_argument( + "--raw_root", + default=None, + help="Path to Stanford2d3dDataset_noXYZ dataset (optional)", + ) + parser.add_argument( + "--align_angle", action="store_true", help="Whether align room angles" + ) + parser.add_argument( + "--parse_normal", action="store_true", help="Whether process normal" + ) + parser.add_argument( + "--num_workers", default=1, type=int, help="Num workers for preprocessing." + ) + args = parser.parse_args() + + if args.parse_normal: + assert args.raw_root is not None + + room_list = [] + angle_list = [] + + # Load room information + print("Loading room information ...") + for split in args.splits: + area_info = np.loadtxt( + os.path.join( + args.dataset_root, + split, + f"{split}_alignmentAngle.txt", + ), + dtype=str, + ) + room_list += [os.path.join(split, room_info[0]) for room_info in area_info] + angle_list += [int(room_info[1]) for room_info in area_info] + + if args.parse_normal: + # load raw mesh file to extract normal + print("Loading raw mesh file ...") + for split in args.splits: + if split != "Area_5": + mesh_dir = os.path.join(args.raw_root, split, "3d", "rgb.obj") + mesh = open3d.io.read_triangle_mesh(mesh_dir) + mesh.triangle_uvs.clear() + else: + mesh_a_dir = os.path.join(args.raw_root, f"{split}a", "3d", "rgb.obj") + mesh_b_dir = os.path.join(args.raw_root, f"{split}b", "3d", "rgb.obj") + mesh_a = open3d.io.read_triangle_mesh(mesh_a_dir) + mesh_a.triangle_uvs.clear() + mesh_b = open3d.io.read_triangle_mesh(mesh_b_dir) + mesh_b.triangle_uvs.clear() + mesh_b = mesh_b.transform( + np.array( + [ + [0, 0, -1, -4.09703582], + [0, 1, 0, 0], + [1, 0, 0, -6.22617759], + [0, 0, 0, 1], + ] + ) + ) + mesh = mesh_a + mesh_b + area_mesh_dict[split] = mesh + print(f"{split} mesh is loaded") + + # Preprocess data. + print("Processing scenes...") + pool = ProcessPoolExecutor( + max_workers=args.num_workers + ) # peak 110G memory when parsing normal. + _ = list( + pool.map( + parse_room, + room_list, + angle_list, + repeat(args.dataset_root), + repeat(args.output_root), + repeat(args.align_angle), + repeat(args.parse_normal), + ) + ) + + +if __name__ == "__main__": + main_process() diff --git a/Pointcept/pointcept/datasets/preprocessing/sampling_chunking_data.py b/Pointcept/pointcept/datasets/preprocessing/sampling_chunking_data.py new file mode 100644 index 0000000000000000000000000000000000000000..96536d415370bf28c0f1cc89312b2fde719c9a58 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/sampling_chunking_data.py @@ -0,0 +1,149 @@ +""" +Chunking Data + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import argparse +import numpy as np +import multiprocessing as mp +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat +from pathlib import Path + + +def chunking_scene( + name, + dataset_root, + split, + grid_size=None, + chunk_range=(6, 6), + chunk_stride=(3, 3), + chunk_minimum_size=10000, +): + print(f"Chunking scene {name} in {split} split") + dataset_root = Path(dataset_root) + scene_path = dataset_root / split / name + assets = os.listdir(scene_path) + data_dict = dict() + for asset in assets: + if not asset.endswith(".npy"): + continue + data_dict[asset[:-4]] = np.load(scene_path / asset) + coord = data_dict["coord"] - data_dict["coord"].min(axis=0) + + if grid_size is not None: + grid_coord = np.floor(coord / grid_size).astype(int) + _, idx = np.unique(grid_coord, axis=0, return_index=True) + coord = coord[idx] + for key in data_dict.keys(): + data_dict[key] = data_dict[key][idx] + + bev_range = coord.max(axis=0)[:2] + x, y = np.meshgrid( + np.arange(0, bev_range[0] + chunk_stride[0] - chunk_range[0], chunk_stride[0]), + np.arange(0, bev_range[0] + chunk_stride[0] - chunk_range[0], chunk_stride[0]), + indexing="ij", + ) + chunks = np.concatenate([x.reshape([-1, 1]), y.reshape([-1, 1])], axis=-1) + chunk_idx = 0 + for chunk in chunks: + mask = ( + (coord[:, 0] >= chunk[0]) + & (coord[:, 0] < chunk[0] + chunk_range[0]) + & (coord[:, 1] >= chunk[1]) + & (coord[:, 1] < chunk[1] + chunk_range[1]) + ) + if np.sum(mask) < chunk_minimum_size: + continue + + chunk_data_name = f"{name}_{chunk_idx}" + if grid_size is not None: + chunk_split_name = ( + f"{split}_" + f"grid{grid_size * 100:.0f}mm_" + f"chunk{chunk_range[0]}x{chunk_range[1]}_" + f"stride{chunk_stride[0]}x{chunk_stride[1]}" + ) + else: + chunk_split_name = ( + f"{split}_" + f"chunk{chunk_range[0]}x{chunk_range[1]}_" + f"stride{chunk_stride[0]}x{chunk_stride[1]}" + ) + + chunk_save_path = dataset_root / chunk_split_name / chunk_data_name + chunk_save_path.mkdir(parents=True, exist_ok=True) + for key in data_dict.keys(): + np.save(chunk_save_path / f"{key}.npy", data_dict[key][mask]) + chunk_idx += 1 + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the Pointcept processed ScanNet++ dataset.", + ) + parser.add_argument( + "--split", + required=True, + default="train", + type=str, + help="Split need to process.", + ) + parser.add_argument( + "--grid_size", + default=None, + type=float, + help="Grid size for initial grid sampling", + ) + parser.add_argument( + "--chunk_range", + default=[6, 6], + type=int, + nargs="+", + help="Range of each chunk, e.g. --chunk_range 6 6", + ) + parser.add_argument( + "--chunk_stride", + default=[3, 3], + type=int, + nargs="+", + help="Stride of each chunk, e.g. --chunk_stride 3 3", + ) + parser.add_argument( + "--chunk_minimum_size", + default=10000, + type=int, + help="Minimum number of points in each chunk", + ) + parser.add_argument( + "--num_workers", + default=mp.cpu_count(), + type=int, + help="Num workers for preprocessing.", + ) + + config = parser.parse_args() + config.dataset_root = Path(config.dataset_root) + data_list = os.listdir(config.dataset_root / config.split) + + print("Processing scenes...") + pool = ProcessPoolExecutor(max_workers=config.num_workers) + _ = list( + pool.map( + chunking_scene, + data_list, + repeat(config.dataset_root), + repeat(config.split), + repeat(config.grid_size), + repeat(config.chunk_range), + repeat(config.chunk_stride), + repeat(config.chunk_minimum_size), + ) + ) + pool.shutdown() diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/classes_ObjClassification-ShapeNetCore55.txt b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/classes_ObjClassification-ShapeNetCore55.txt new file mode 100644 index 0000000000000000000000000000000000000000..e53f5bcb2c1480f42ee9327940246258aa434f88 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/classes_ObjClassification-ShapeNetCore55.txt @@ -0,0 +1,17 @@ +1 trash +3 basket +4 bathtub +5 bed +9 shelf +13 cabinet +18 chair +20 keyboard +22 tv +30 lamp +31 laptop +35 microwave +39 pillow +42 printer +47 sofa +48 stove +49 table diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/classes_SemVoxLabel-nyu40id.txt b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/classes_SemVoxLabel-nyu40id.txt new file mode 100644 index 0000000000000000000000000000000000000000..48e228766391e0f0234c2eed086e31f738068a4b --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/classes_SemVoxLabel-nyu40id.txt @@ -0,0 +1,20 @@ +1 wall +2 floor +3 cabinet +4 bed +5 chair +6 sofa +7 table +8 door +9 window +10 bookshelf +11 picture +12 counter +14 desk +16 curtain +24 refridgerator +28 shower curtain +33 toilet +34 sink +36 bathtub +39 otherfurniture \ No newline at end of file diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_constants.py b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_constants.py new file mode 100644 index 0000000000000000000000000000000000000000..0404fd6aa8ad14ad729354ce184d4b51834bfd1b --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_constants.py @@ -0,0 +1,704 @@ +# ScanNet Benchmark constants +VALID_CLASS_IDS_20 = ( + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 14, + 16, + 24, + 28, + 33, + 34, + 36, + 39, +) + +CLASS_LABELS_20 = ( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "bookshelf", + "picture", + "counter", + "desk", + "curtain", + "refrigerator", + "shower curtain", + "toilet", + "sink", + "bathtub", + "otherfurniture", +) + +SCANNET_COLOR_MAP_20 = { + 0: (0.0, 0.0, 0.0), + 1: (174.0, 199.0, 232.0), + 2: (152.0, 223.0, 138.0), + 3: (31.0, 119.0, 180.0), + 4: (255.0, 187.0, 120.0), + 5: (188.0, 189.0, 34.0), + 6: (140.0, 86.0, 75.0), + 7: (255.0, 152.0, 150.0), + 8: (214.0, 39.0, 40.0), + 9: (197.0, 176.0, 213.0), + 10: (148.0, 103.0, 189.0), + 11: (196.0, 156.0, 148.0), + 12: (23.0, 190.0, 207.0), + 14: (247.0, 182.0, 210.0), + 15: (66.0, 188.0, 102.0), + 16: (219.0, 219.0, 141.0), + 17: (140.0, 57.0, 197.0), + 18: (202.0, 185.0, 52.0), + 19: (51.0, 176.0, 203.0), + 20: (200.0, 54.0, 131.0), + 21: (92.0, 193.0, 61.0), + 22: (78.0, 71.0, 183.0), + 23: (172.0, 114.0, 82.0), + 24: (255.0, 127.0, 14.0), + 25: (91.0, 163.0, 138.0), + 26: (153.0, 98.0, 156.0), + 27: (140.0, 153.0, 101.0), + 28: (158.0, 218.0, 229.0), + 29: (100.0, 125.0, 154.0), + 30: (178.0, 127.0, 135.0), + 32: (146.0, 111.0, 194.0), + 33: (44.0, 160.0, 44.0), + 34: (112.0, 128.0, 144.0), + 35: (96.0, 207.0, 209.0), + 36: (227.0, 119.0, 194.0), + 37: (213.0, 92.0, 176.0), + 38: (94.0, 106.0, 211.0), + 39: (82.0, 84.0, 163.0), + 40: (100.0, 85.0, 144.0), +} + +# ScanNet200 Benchmark constants +VALID_CLASS_IDS_200 = ( + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 21, + 22, + 23, + 24, + 26, + 27, + 28, + 29, + 31, + 32, + 33, + 34, + 35, + 36, + 38, + 39, + 40, + 41, + 42, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 54, + 55, + 56, + 57, + 58, + 59, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 82, + 84, + 86, + 87, + 88, + 89, + 90, + 93, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 110, + 112, + 115, + 116, + 118, + 120, + 121, + 122, + 125, + 128, + 130, + 131, + 132, + 134, + 136, + 138, + 139, + 140, + 141, + 145, + 148, + 154, + 155, + 156, + 157, + 159, + 161, + 163, + 165, + 166, + 168, + 169, + 170, + 177, + 180, + 185, + 188, + 191, + 193, + 195, + 202, + 208, + 213, + 214, + 221, + 229, + 230, + 232, + 233, + 242, + 250, + 261, + 264, + 276, + 283, + 286, + 300, + 304, + 312, + 323, + 325, + 331, + 342, + 356, + 370, + 392, + 395, + 399, + 408, + 417, + 488, + 540, + 562, + 570, + 572, + 581, + 609, + 748, + 776, + 1156, + 1163, + 1164, + 1165, + 1166, + 1167, + 1168, + 1169, + 1170, + 1171, + 1172, + 1173, + 1174, + 1175, + 1176, + 1178, + 1179, + 1180, + 1181, + 1182, + 1183, + 1184, + 1185, + 1186, + 1187, + 1188, + 1189, + 1190, + 1191, +) + +CLASS_LABELS_200 = ( + "wall", + "chair", + "floor", + "table", + "door", + "couch", + "cabinet", + "shelf", + "desk", + "office chair", + "bed", + "pillow", + "sink", + "picture", + "window", + "toilet", + "bookshelf", + "monitor", + "curtain", + "book", + "armchair", + "coffee table", + "box", + "refrigerator", + "lamp", + "kitchen cabinet", + "towel", + "clothes", + "tv", + "nightstand", + "counter", + "dresser", + "stool", + "cushion", + "plant", + "ceiling", + "bathtub", + "end table", + "dining table", + "keyboard", + "bag", + "backpack", + "toilet paper", + "printer", + "tv stand", + "whiteboard", + "blanket", + "shower curtain", + "trash can", + "closet", + "stairs", + "microwave", + "stove", + "shoe", + "computer tower", + "bottle", + "bin", + "ottoman", + "bench", + "board", + "washing machine", + "mirror", + "copier", + "basket", + "sofa chair", + "file cabinet", + "fan", + "laptop", + "shower", + "paper", + "person", + "paper towel dispenser", + "oven", + "blinds", + "rack", + "plate", + "blackboard", + "piano", + "suitcase", + "rail", + "radiator", + "recycling bin", + "container", + "wardrobe", + "soap dispenser", + "telephone", + "bucket", + "clock", + "stand", + "light", + "laundry basket", + "pipe", + "clothes dryer", + "guitar", + "toilet paper holder", + "seat", + "speaker", + "column", + "bicycle", + "ladder", + "bathroom stall", + "shower wall", + "cup", + "jacket", + "storage bin", + "coffee maker", + "dishwasher", + "paper towel roll", + "machine", + "mat", + "windowsill", + "bar", + "toaster", + "bulletin board", + "ironing board", + "fireplace", + "soap dish", + "kitchen counter", + "doorframe", + "toilet paper dispenser", + "mini fridge", + "fire extinguisher", + "ball", + "hat", + "shower curtain rod", + "water cooler", + "paper cutter", + "tray", + "shower door", + "pillar", + "ledge", + "toaster oven", + "mouse", + "toilet seat cover dispenser", + "furniture", + "cart", + "storage container", + "scale", + "tissue box", + "light switch", + "crate", + "power outlet", + "decoration", + "sign", + "projector", + "closet door", + "vacuum cleaner", + "candle", + "plunger", + "stuffed animal", + "headphones", + "dish rack", + "broom", + "guitar case", + "range hood", + "dustpan", + "hair dryer", + "water bottle", + "handicap bar", + "purse", + "vent", + "shower floor", + "water pitcher", + "mailbox", + "bowl", + "paper bag", + "alarm clock", + "music stand", + "projector screen", + "divider", + "laundry detergent", + "bathroom counter", + "object", + "bathroom vanity", + "closet wall", + "laundry hamper", + "bathroom stall door", + "ceiling light", + "trash bin", + "dumbbell", + "stair rail", + "tube", + "bathroom cabinet", + "cd case", + "closet rod", + "coffee kettle", + "structure", + "shower head", + "keyboard piano", + "case of water bottles", + "coat rack", + "storage organizer", + "folded chair", + "fire alarm", + "power strip", + "calendar", + "poster", + "potted plant", + "luggage", + "mattress", +) + +SCANNET_COLOR_MAP_200 = { + 0: (0.0, 0.0, 0.0), + 1: (174.0, 199.0, 232.0), + 2: (188.0, 189.0, 34.0), + 3: (152.0, 223.0, 138.0), + 4: (255.0, 152.0, 150.0), + 5: (214.0, 39.0, 40.0), + 6: (91.0, 135.0, 229.0), + 7: (31.0, 119.0, 180.0), + 8: (229.0, 91.0, 104.0), + 9: (247.0, 182.0, 210.0), + 10: (91.0, 229.0, 110.0), + 11: (255.0, 187.0, 120.0), + 13: (141.0, 91.0, 229.0), + 14: (112.0, 128.0, 144.0), + 15: (196.0, 156.0, 148.0), + 16: (197.0, 176.0, 213.0), + 17: (44.0, 160.0, 44.0), + 18: (148.0, 103.0, 189.0), + 19: (229.0, 91.0, 223.0), + 21: (219.0, 219.0, 141.0), + 22: (192.0, 229.0, 91.0), + 23: (88.0, 218.0, 137.0), + 24: (58.0, 98.0, 137.0), + 26: (177.0, 82.0, 239.0), + 27: (255.0, 127.0, 14.0), + 28: (237.0, 204.0, 37.0), + 29: (41.0, 206.0, 32.0), + 31: (62.0, 143.0, 148.0), + 32: (34.0, 14.0, 130.0), + 33: (143.0, 45.0, 115.0), + 34: (137.0, 63.0, 14.0), + 35: (23.0, 190.0, 207.0), + 36: (16.0, 212.0, 139.0), + 38: (90.0, 119.0, 201.0), + 39: (125.0, 30.0, 141.0), + 40: (150.0, 53.0, 56.0), + 41: (186.0, 197.0, 62.0), + 42: (227.0, 119.0, 194.0), + 44: (38.0, 100.0, 128.0), + 45: (120.0, 31.0, 243.0), + 46: (154.0, 59.0, 103.0), + 47: (169.0, 137.0, 78.0), + 48: (143.0, 245.0, 111.0), + 49: (37.0, 230.0, 205.0), + 50: (14.0, 16.0, 155.0), + 51: (196.0, 51.0, 182.0), + 52: (237.0, 80.0, 38.0), + 54: (138.0, 175.0, 62.0), + 55: (158.0, 218.0, 229.0), + 56: (38.0, 96.0, 167.0), + 57: (190.0, 77.0, 246.0), + 58: (208.0, 49.0, 84.0), + 59: (208.0, 193.0, 72.0), + 62: (55.0, 220.0, 57.0), + 63: (10.0, 125.0, 140.0), + 64: (76.0, 38.0, 202.0), + 65: (191.0, 28.0, 135.0), + 66: (211.0, 120.0, 42.0), + 67: (118.0, 174.0, 76.0), + 68: (17.0, 242.0, 171.0), + 69: (20.0, 65.0, 247.0), + 70: (208.0, 61.0, 222.0), + 71: (162.0, 62.0, 60.0), + 72: (210.0, 235.0, 62.0), + 73: (45.0, 152.0, 72.0), + 74: (35.0, 107.0, 149.0), + 75: (160.0, 89.0, 237.0), + 76: (227.0, 56.0, 125.0), + 77: (169.0, 143.0, 81.0), + 78: (42.0, 143.0, 20.0), + 79: (25.0, 160.0, 151.0), + 80: (82.0, 75.0, 227.0), + 82: (253.0, 59.0, 222.0), + 84: (240.0, 130.0, 89.0), + 86: (123.0, 172.0, 47.0), + 87: (71.0, 194.0, 133.0), + 88: (24.0, 94.0, 205.0), + 89: (134.0, 16.0, 179.0), + 90: (159.0, 32.0, 52.0), + 93: (213.0, 208.0, 88.0), + 95: (64.0, 158.0, 70.0), + 96: (18.0, 163.0, 194.0), + 97: (65.0, 29.0, 153.0), + 98: (177.0, 10.0, 109.0), + 99: (152.0, 83.0, 7.0), + 100: (83.0, 175.0, 30.0), + 101: (18.0, 199.0, 153.0), + 102: (61.0, 81.0, 208.0), + 103: (213.0, 85.0, 216.0), + 104: (170.0, 53.0, 42.0), + 105: (161.0, 192.0, 38.0), + 106: (23.0, 241.0, 91.0), + 107: (12.0, 103.0, 170.0), + 110: (151.0, 41.0, 245.0), + 112: (133.0, 51.0, 80.0), + 115: (184.0, 162.0, 91.0), + 116: (50.0, 138.0, 38.0), + 118: (31.0, 237.0, 236.0), + 120: (39.0, 19.0, 208.0), + 121: (223.0, 27.0, 180.0), + 122: (254.0, 141.0, 85.0), + 125: (97.0, 144.0, 39.0), + 128: (106.0, 231.0, 176.0), + 130: (12.0, 61.0, 162.0), + 131: (124.0, 66.0, 140.0), + 132: (137.0, 66.0, 73.0), + 134: (250.0, 253.0, 26.0), + 136: (55.0, 191.0, 73.0), + 138: (60.0, 126.0, 146.0), + 139: (153.0, 108.0, 234.0), + 140: (184.0, 58.0, 125.0), + 141: (135.0, 84.0, 14.0), + 145: (139.0, 248.0, 91.0), + 148: (53.0, 200.0, 172.0), + 154: (63.0, 69.0, 134.0), + 155: (190.0, 75.0, 186.0), + 156: (127.0, 63.0, 52.0), + 157: (141.0, 182.0, 25.0), + 159: (56.0, 144.0, 89.0), + 161: (64.0, 160.0, 250.0), + 163: (182.0, 86.0, 245.0), + 165: (139.0, 18.0, 53.0), + 166: (134.0, 120.0, 54.0), + 168: (49.0, 165.0, 42.0), + 169: (51.0, 128.0, 133.0), + 170: (44.0, 21.0, 163.0), + 177: (232.0, 93.0, 193.0), + 180: (176.0, 102.0, 54.0), + 185: (116.0, 217.0, 17.0), + 188: (54.0, 209.0, 150.0), + 191: (60.0, 99.0, 204.0), + 193: (129.0, 43.0, 144.0), + 195: (252.0, 100.0, 106.0), + 202: (187.0, 196.0, 73.0), + 208: (13.0, 158.0, 40.0), + 213: (52.0, 122.0, 152.0), + 214: (128.0, 76.0, 202.0), + 221: (187.0, 50.0, 115.0), + 229: (180.0, 141.0, 71.0), + 230: (77.0, 208.0, 35.0), + 232: (72.0, 183.0, 168.0), + 233: (97.0, 99.0, 203.0), + 242: (172.0, 22.0, 158.0), + 250: (155.0, 64.0, 40.0), + 261: (118.0, 159.0, 30.0), + 264: (69.0, 252.0, 148.0), + 276: (45.0, 103.0, 173.0), + 283: (111.0, 38.0, 149.0), + 286: (184.0, 9.0, 49.0), + 300: (188.0, 174.0, 67.0), + 304: (53.0, 206.0, 53.0), + 312: (97.0, 235.0, 252.0), + 323: (66.0, 32.0, 182.0), + 325: (236.0, 114.0, 195.0), + 331: (241.0, 154.0, 83.0), + 342: (133.0, 240.0, 52.0), + 356: (16.0, 205.0, 144.0), + 370: (75.0, 101.0, 198.0), + 392: (237.0, 95.0, 251.0), + 395: (191.0, 52.0, 49.0), + 399: (227.0, 254.0, 54.0), + 408: (49.0, 206.0, 87.0), + 417: (48.0, 113.0, 150.0), + 488: (125.0, 73.0, 182.0), + 540: (229.0, 32.0, 114.0), + 562: (158.0, 119.0, 28.0), + 570: (60.0, 205.0, 27.0), + 572: (18.0, 215.0, 201.0), + 581: (79.0, 76.0, 153.0), + 609: (134.0, 13.0, 116.0), + 748: (192.0, 97.0, 63.0), + 776: (108.0, 163.0, 18.0), + 1156: (95.0, 220.0, 156.0), + 1163: (98.0, 141.0, 208.0), + 1164: (144.0, 19.0, 193.0), + 1165: (166.0, 36.0, 57.0), + 1166: (212.0, 202.0, 34.0), + 1167: (23.0, 206.0, 34.0), + 1168: (91.0, 211.0, 236.0), + 1169: (79.0, 55.0, 137.0), + 1170: (182.0, 19.0, 117.0), + 1171: (134.0, 76.0, 14.0), + 1172: (87.0, 185.0, 28.0), + 1173: (82.0, 224.0, 187.0), + 1174: (92.0, 110.0, 214.0), + 1175: (168.0, 80.0, 171.0), + 1176: (197.0, 63.0, 51.0), + 1178: (175.0, 199.0, 77.0), + 1179: (62.0, 180.0, 98.0), + 1180: (8.0, 91.0, 150.0), + 1181: (77.0, 15.0, 130.0), + 1182: (154.0, 65.0, 96.0), + 1183: (197.0, 152.0, 11.0), + 1184: (59.0, 155.0, 45.0), + 1185: (12.0, 147.0, 145.0), + 1186: (54.0, 35.0, 219.0), + 1187: (210.0, 73.0, 181.0), + 1188: (221.0, 124.0, 77.0), + 1189: (149.0, 214.0, 66.0), + 1190: (72.0, 185.0, 134.0), + 1191: (42.0, 94.0, 198.0), +} + +# For instance segmentation the non-object categories +VALID_PANOPTIC_IDS = (1, 3) + +CLASS_LABELS_PANOPTIC = ("wall", "floor") diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_splits.py b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_splits.py new file mode 100644 index 0000000000000000000000000000000000000000..39ccc3c60bf289199342332e455fadb5b22129ee --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannet200_splits.py @@ -0,0 +1,625 @@ +# This file contains the HEAD - COMMON - TAIL split category ids for ScanNet 200 + +HEAD_CATS_SCANNET_200 = [ + "tv stand", + "curtain", + "blinds", + "shower curtain", + "bookshelf", + "tv", + "kitchen cabinet", + "pillow", + "lamp", + "dresser", + "monitor", + "object", + "ceiling", + "board", + "stove", + "closet wall", + "couch", + "office chair", + "kitchen counter", + "shower", + "closet", + "doorframe", + "sofa chair", + "mailbox", + "nightstand", + "washing machine", + "picture", + "book", + "sink", + "recycling bin", + "table", + "backpack", + "shower wall", + "toilet", + "copier", + "counter", + "stool", + "refrigerator", + "window", + "file cabinet", + "chair", + "wall", + "plant", + "coffee table", + "stairs", + "armchair", + "cabinet", + "bathroom vanity", + "bathroom stall", + "mirror", + "blackboard", + "trash can", + "stair rail", + "box", + "towel", + "door", + "clothes", + "whiteboard", + "bed", + "floor", + "bathtub", + "desk", + "wardrobe", + "clothes dryer", + "radiator", + "shelf", +] +COMMON_CATS_SCANNET_200 = [ + "cushion", + "end table", + "dining table", + "keyboard", + "bag", + "toilet paper", + "printer", + "blanket", + "microwave", + "shoe", + "computer tower", + "bottle", + "bin", + "ottoman", + "bench", + "basket", + "fan", + "laptop", + "person", + "paper towel dispenser", + "oven", + "rack", + "piano", + "suitcase", + "rail", + "container", + "telephone", + "stand", + "light", + "laundry basket", + "pipe", + "seat", + "column", + "bicycle", + "ladder", + "jacket", + "storage bin", + "coffee maker", + "dishwasher", + "machine", + "mat", + "windowsill", + "bulletin board", + "fireplace", + "mini fridge", + "water cooler", + "shower door", + "pillar", + "ledge", + "furniture", + "cart", + "decoration", + "closet door", + "vacuum cleaner", + "dish rack", + "range hood", + "projector screen", + "divider", + "bathroom counter", + "laundry hamper", + "bathroom stall door", + "ceiling light", + "trash bin", + "bathroom cabinet", + "structure", + "storage organizer", + "potted plant", + "mattress", +] +TAIL_CATS_SCANNET_200 = [ + "paper", + "plate", + "soap dispenser", + "bucket", + "clock", + "guitar", + "toilet paper holder", + "speaker", + "cup", + "paper towel roll", + "bar", + "toaster", + "ironing board", + "soap dish", + "toilet paper dispenser", + "fire extinguisher", + "ball", + "hat", + "shower curtain rod", + "paper cutter", + "tray", + "toaster oven", + "mouse", + "toilet seat cover dispenser", + "storage container", + "scale", + "tissue box", + "light switch", + "crate", + "power outlet", + "sign", + "projector", + "candle", + "plunger", + "stuffed animal", + "headphones", + "broom", + "guitar case", + "dustpan", + "hair dryer", + "water bottle", + "handicap bar", + "purse", + "vent", + "shower floor", + "water pitcher", + "bowl", + "paper bag", + "alarm clock", + "music stand", + "laundry detergent", + "dumbbell", + "tube", + "cd case", + "closet rod", + "coffee kettle", + "shower head", + "keyboard piano", + "case of water bottles", + "coat rack", + "folded chair", + "fire alarm", + "power strip", + "calendar", + "poster", + "luggage", +] + + +# Given the different size of the official train and val sets, not all ScanNet200 categories are present in the validation set. +# Here we list of categories with labels and IDs present in both train and validation set, and the remaining categories those are present in train, but not in val +# We dont evaluate on unseen validation categories in this benchmark + +VALID_CLASS_IDS_200_VALIDATION = ( + "wall", + "chair", + "floor", + "table", + "door", + "couch", + "cabinet", + "shelf", + "desk", + "office chair", + "bed", + "pillow", + "sink", + "picture", + "window", + "toilet", + "bookshelf", + "monitor", + "curtain", + "book", + "armchair", + "coffee table", + "box", + "refrigerator", + "lamp", + "kitchen cabinet", + "towel", + "clothes", + "tv", + "nightstand", + "counter", + "dresser", + "stool", + "cushion", + "plant", + "ceiling", + "bathtub", + "end table", + "dining table", + "keyboard", + "bag", + "backpack", + "toilet paper", + "printer", + "tv stand", + "whiteboard", + "blanket", + "shower curtain", + "trash can", + "closet", + "stairs", + "microwave", + "stove", + "shoe", + "computer tower", + "bottle", + "bin", + "ottoman", + "bench", + "board", + "washing machine", + "mirror", + "copier", + "basket", + "sofa chair", + "file cabinet", + "fan", + "laptop", + "shower", + "paper", + "person", + "paper towel dispenser", + "oven", + "blinds", + "rack", + "plate", + "blackboard", + "piano", + "suitcase", + "rail", + "radiator", + "recycling bin", + "container", + "wardrobe", + "soap dispenser", + "telephone", + "bucket", + "clock", + "stand", + "light", + "laundry basket", + "pipe", + "clothes dryer", + "guitar", + "toilet paper holder", + "seat", + "speaker", + "column", + "ladder", + "bathroom stall", + "shower wall", + "cup", + "jacket", + "storage bin", + "coffee maker", + "dishwasher", + "paper towel roll", + "machine", + "mat", + "windowsill", + "bar", + "toaster", + "bulletin board", + "ironing board", + "fireplace", + "soap dish", + "kitchen counter", + "doorframe", + "toilet paper dispenser", + "mini fridge", + "fire extinguisher", + "ball", + "hat", + "shower curtain rod", + "water cooler", + "paper cutter", + "tray", + "shower door", + "pillar", + "ledge", + "toaster oven", + "mouse", + "toilet seat cover dispenser", + "furniture", + "cart", + "scale", + "tissue box", + "light switch", + "crate", + "power outlet", + "decoration", + "sign", + "projector", + "closet door", + "vacuum cleaner", + "plunger", + "stuffed animal", + "headphones", + "dish rack", + "broom", + "range hood", + "dustpan", + "hair dryer", + "water bottle", + "handicap bar", + "vent", + "shower floor", + "water pitcher", + "mailbox", + "bowl", + "paper bag", + "projector screen", + "divider", + "laundry detergent", + "bathroom counter", + "object", + "bathroom vanity", + "closet wall", + "laundry hamper", + "bathroom stall door", + "ceiling light", + "trash bin", + "dumbbell", + "stair rail", + "tube", + "bathroom cabinet", + "closet rod", + "coffee kettle", + "shower head", + "keyboard piano", + "case of water bottles", + "coat rack", + "folded chair", + "fire alarm", + "power strip", + "calendar", + "poster", + "potted plant", + "mattress", +) + +CLASS_LABELS_200_VALIDATION = ( + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 21, + 22, + 23, + 24, + 26, + 27, + 28, + 29, + 31, + 32, + 33, + 34, + 35, + 36, + 38, + 39, + 40, + 41, + 42, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 54, + 55, + 56, + 57, + 58, + 59, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 82, + 84, + 86, + 87, + 88, + 89, + 90, + 93, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 110, + 112, + 115, + 116, + 118, + 120, + 122, + 125, + 128, + 130, + 131, + 132, + 134, + 136, + 138, + 139, + 140, + 141, + 145, + 148, + 154, + 155, + 156, + 157, + 159, + 161, + 163, + 165, + 166, + 168, + 169, + 170, + 177, + 180, + 185, + 188, + 191, + 193, + 195, + 202, + 208, + 213, + 214, + 229, + 230, + 232, + 233, + 242, + 250, + 261, + 264, + 276, + 283, + 300, + 304, + 312, + 323, + 325, + 342, + 356, + 370, + 392, + 395, + 408, + 417, + 488, + 540, + 562, + 570, + 609, + 748, + 776, + 1156, + 1163, + 1164, + 1165, + 1166, + 1167, + 1168, + 1169, + 1170, + 1171, + 1172, + 1173, + 1175, + 1176, + 1179, + 1180, + 1181, + 1182, + 1184, + 1185, + 1186, + 1187, + 1188, + 1189, + 1191, +) + +VALID_CLASS_IDS_200_TRAIN_ONLY = ( + "bicycle", + "storage container", + "candle", + "guitar case", + "purse", + "alarm clock", + "music stand", + "cd case", + "structure", + "storage organizer", + "luggage", +) + +CLASS_LABELS_200_TRAIN_ONLY = ( + 121, + 221, + 286, + 331, + 399, + 572, + 581, + 1174, + 1178, + 1183, + 1190, +) diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannet_means.npz b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannet_means.npz new file mode 100644 index 0000000000000000000000000000000000000000..e57647c9a3553ca4653a9d1e53ed4a2a58def822 Binary files /dev/null and b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannet_means.npz differ diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_test.txt b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_test.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9e7d9205321e8ca047a527466f4b7100c9c9d2c --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_test.txt @@ -0,0 +1,312 @@ +scene0568_00 +scene0568_01 +scene0568_02 +scene0304_00 +scene0488_00 +scene0488_01 +scene0412_00 +scene0412_01 +scene0217_00 +scene0019_00 +scene0019_01 +scene0414_00 +scene0575_00 +scene0575_01 +scene0575_02 +scene0426_00 +scene0426_01 +scene0426_02 +scene0426_03 +scene0549_00 +scene0549_01 +scene0578_00 +scene0578_01 +scene0578_02 +scene0665_00 +scene0665_01 +scene0050_00 +scene0050_01 +scene0050_02 +scene0257_00 +scene0025_00 +scene0025_01 +scene0025_02 +scene0583_00 +scene0583_01 +scene0583_02 +scene0701_00 +scene0701_01 +scene0701_02 +scene0580_00 +scene0580_01 +scene0565_00 +scene0169_00 +scene0169_01 +scene0655_00 +scene0655_01 +scene0655_02 +scene0063_00 +scene0221_00 +scene0221_01 +scene0591_00 +scene0591_01 +scene0591_02 +scene0678_00 +scene0678_01 +scene0678_02 +scene0462_00 +scene0427_00 +scene0595_00 +scene0193_00 +scene0193_01 +scene0164_00 +scene0164_01 +scene0164_02 +scene0164_03 +scene0598_00 +scene0598_01 +scene0598_02 +scene0599_00 +scene0599_01 +scene0599_02 +scene0328_00 +scene0300_00 +scene0300_01 +scene0354_00 +scene0458_00 +scene0458_01 +scene0423_00 +scene0423_01 +scene0423_02 +scene0307_00 +scene0307_01 +scene0307_02 +scene0606_00 +scene0606_01 +scene0606_02 +scene0432_00 +scene0432_01 +scene0608_00 +scene0608_01 +scene0608_02 +scene0651_00 +scene0651_01 +scene0651_02 +scene0430_00 +scene0430_01 +scene0689_00 +scene0357_00 +scene0357_01 +scene0574_00 +scene0574_01 +scene0574_02 +scene0329_00 +scene0329_01 +scene0329_02 +scene0153_00 +scene0153_01 +scene0616_00 +scene0616_01 +scene0671_00 +scene0671_01 +scene0618_00 +scene0382_00 +scene0382_01 +scene0490_00 +scene0621_00 +scene0607_00 +scene0607_01 +scene0149_00 +scene0695_00 +scene0695_01 +scene0695_02 +scene0695_03 +scene0389_00 +scene0377_00 +scene0377_01 +scene0377_02 +scene0342_00 +scene0139_00 +scene0629_00 +scene0629_01 +scene0629_02 +scene0496_00 +scene0633_00 +scene0633_01 +scene0518_00 +scene0652_00 +scene0406_00 +scene0406_01 +scene0406_02 +scene0144_00 +scene0144_01 +scene0494_00 +scene0278_00 +scene0278_01 +scene0316_00 +scene0609_00 +scene0609_01 +scene0609_02 +scene0609_03 +scene0084_00 +scene0084_01 +scene0084_02 +scene0696_00 +scene0696_01 +scene0696_02 +scene0351_00 +scene0351_01 +scene0643_00 +scene0644_00 +scene0645_00 +scene0645_01 +scene0645_02 +scene0081_00 +scene0081_01 +scene0081_02 +scene0647_00 +scene0647_01 +scene0535_00 +scene0353_00 +scene0353_01 +scene0353_02 +scene0559_00 +scene0559_01 +scene0559_02 +scene0593_00 +scene0593_01 +scene0246_00 +scene0653_00 +scene0653_01 +scene0064_00 +scene0064_01 +scene0356_00 +scene0356_01 +scene0356_02 +scene0030_00 +scene0030_01 +scene0030_02 +scene0222_00 +scene0222_01 +scene0338_00 +scene0338_01 +scene0338_02 +scene0378_00 +scene0378_01 +scene0378_02 +scene0660_00 +scene0553_00 +scene0553_01 +scene0553_02 +scene0527_00 +scene0663_00 +scene0663_01 +scene0663_02 +scene0664_00 +scene0664_01 +scene0664_02 +scene0334_00 +scene0334_01 +scene0334_02 +scene0046_00 +scene0046_01 +scene0046_02 +scene0203_00 +scene0203_01 +scene0203_02 +scene0088_00 +scene0088_01 +scene0088_02 +scene0088_03 +scene0086_00 +scene0086_01 +scene0086_02 +scene0670_00 +scene0670_01 +scene0256_00 +scene0256_01 +scene0256_02 +scene0249_00 +scene0441_00 +scene0658_00 +scene0704_00 +scene0704_01 +scene0187_00 +scene0187_01 +scene0131_00 +scene0131_01 +scene0131_02 +scene0207_00 +scene0207_01 +scene0207_02 +scene0461_00 +scene0011_00 +scene0011_01 +scene0343_00 +scene0251_00 +scene0077_00 +scene0077_01 +scene0684_00 +scene0684_01 +scene0550_00 +scene0686_00 +scene0686_01 +scene0686_02 +scene0208_00 +scene0500_00 +scene0500_01 +scene0552_00 +scene0552_01 +scene0648_00 +scene0648_01 +scene0435_00 +scene0435_01 +scene0435_02 +scene0435_03 +scene0690_00 +scene0690_01 +scene0693_00 +scene0693_01 +scene0693_02 +scene0700_00 +scene0700_01 +scene0700_02 +scene0699_00 +scene0231_00 +scene0231_01 +scene0231_02 +scene0697_00 +scene0697_01 +scene0697_02 +scene0697_03 +scene0474_00 +scene0474_01 +scene0474_02 +scene0474_03 +scene0474_04 +scene0474_05 +scene0355_00 +scene0355_01 +scene0146_00 +scene0146_01 +scene0146_02 +scene0196_00 +scene0702_00 +scene0702_01 +scene0702_02 +scene0314_00 +scene0277_00 +scene0277_01 +scene0277_02 +scene0095_00 +scene0095_01 +scene0015_00 +scene0100_00 +scene0100_01 +scene0100_02 +scene0558_00 +scene0558_01 +scene0558_02 +scene0685_00 +scene0685_01 +scene0685_02 diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_train.txt b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_train.txt new file mode 100644 index 0000000000000000000000000000000000000000..7520948c8170df9ae1a9e8a40bc444fcc7cc0772 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_train.txt @@ -0,0 +1,1045 @@ +scene0191_00 +scene0191_01 +scene0191_02 +scene0119_00 +scene0230_00 +scene0528_00 +scene0528_01 +scene0705_00 +scene0705_01 +scene0705_02 +scene0415_00 +scene0415_01 +scene0415_02 +scene0007_00 +scene0141_00 +scene0141_01 +scene0141_02 +scene0515_00 +scene0515_01 +scene0515_02 +scene0447_00 +scene0447_01 +scene0447_02 +scene0531_00 +scene0503_00 +scene0285_00 +scene0069_00 +scene0584_00 +scene0584_01 +scene0584_02 +scene0581_00 +scene0581_01 +scene0581_02 +scene0620_00 +scene0620_01 +scene0263_00 +scene0263_01 +scene0481_00 +scene0481_01 +scene0020_00 +scene0020_01 +scene0291_00 +scene0291_01 +scene0291_02 +scene0469_00 +scene0469_01 +scene0469_02 +scene0659_00 +scene0659_01 +scene0024_00 +scene0024_01 +scene0024_02 +scene0564_00 +scene0117_00 +scene0027_00 +scene0027_01 +scene0027_02 +scene0028_00 +scene0330_00 +scene0418_00 +scene0418_01 +scene0418_02 +scene0233_00 +scene0233_01 +scene0673_00 +scene0673_01 +scene0673_02 +scene0673_03 +scene0673_04 +scene0673_05 +scene0585_00 +scene0585_01 +scene0362_00 +scene0362_01 +scene0362_02 +scene0362_03 +scene0035_00 +scene0035_01 +scene0358_00 +scene0358_01 +scene0358_02 +scene0037_00 +scene0194_00 +scene0321_00 +scene0293_00 +scene0293_01 +scene0623_00 +scene0623_01 +scene0592_00 +scene0592_01 +scene0569_00 +scene0569_01 +scene0413_00 +scene0313_00 +scene0313_01 +scene0313_02 +scene0480_00 +scene0480_01 +scene0401_00 +scene0517_00 +scene0517_01 +scene0517_02 +scene0032_00 +scene0032_01 +scene0613_00 +scene0613_01 +scene0613_02 +scene0306_00 +scene0306_01 +scene0052_00 +scene0052_01 +scene0052_02 +scene0053_00 +scene0444_00 +scene0444_01 +scene0055_00 +scene0055_01 +scene0055_02 +scene0560_00 +scene0589_00 +scene0589_01 +scene0589_02 +scene0610_00 +scene0610_01 +scene0610_02 +scene0364_00 +scene0364_01 +scene0383_00 +scene0383_01 +scene0383_02 +scene0006_00 +scene0006_01 +scene0006_02 +scene0275_00 +scene0451_00 +scene0451_01 +scene0451_02 +scene0451_03 +scene0451_04 +scene0451_05 +scene0135_00 +scene0065_00 +scene0065_01 +scene0065_02 +scene0104_00 +scene0674_00 +scene0674_01 +scene0448_00 +scene0448_01 +scene0448_02 +scene0502_00 +scene0502_01 +scene0502_02 +scene0440_00 +scene0440_01 +scene0440_02 +scene0071_00 +scene0072_00 +scene0072_01 +scene0072_02 +scene0509_00 +scene0509_01 +scene0509_02 +scene0649_00 +scene0649_01 +scene0602_00 +scene0694_00 +scene0694_01 +scene0101_00 +scene0101_01 +scene0101_02 +scene0101_03 +scene0101_04 +scene0101_05 +scene0218_00 +scene0218_01 +scene0579_00 +scene0579_01 +scene0579_02 +scene0039_00 +scene0039_01 +scene0493_00 +scene0493_01 +scene0242_00 +scene0242_01 +scene0242_02 +scene0083_00 +scene0083_01 +scene0127_00 +scene0127_01 +scene0662_00 +scene0662_01 +scene0662_02 +scene0018_00 +scene0087_00 +scene0087_01 +scene0087_02 +scene0332_00 +scene0332_01 +scene0332_02 +scene0628_00 +scene0628_01 +scene0628_02 +scene0134_00 +scene0134_01 +scene0134_02 +scene0238_00 +scene0238_01 +scene0092_00 +scene0092_01 +scene0092_02 +scene0092_03 +scene0092_04 +scene0022_00 +scene0022_01 +scene0467_00 +scene0392_00 +scene0392_01 +scene0392_02 +scene0424_00 +scene0424_01 +scene0424_02 +scene0646_00 +scene0646_01 +scene0646_02 +scene0098_00 +scene0098_01 +scene0044_00 +scene0044_01 +scene0044_02 +scene0510_00 +scene0510_01 +scene0510_02 +scene0571_00 +scene0571_01 +scene0166_00 +scene0166_01 +scene0166_02 +scene0563_00 +scene0172_00 +scene0172_01 +scene0388_00 +scene0388_01 +scene0215_00 +scene0215_01 +scene0252_00 +scene0287_00 +scene0668_00 +scene0572_00 +scene0572_01 +scene0572_02 +scene0026_00 +scene0224_00 +scene0113_00 +scene0113_01 +scene0551_00 +scene0381_00 +scene0381_01 +scene0381_02 +scene0371_00 +scene0371_01 +scene0460_00 +scene0118_00 +scene0118_01 +scene0118_02 +scene0417_00 +scene0008_00 +scene0634_00 +scene0521_00 +scene0123_00 +scene0123_01 +scene0123_02 +scene0045_00 +scene0045_01 +scene0511_00 +scene0511_01 +scene0114_00 +scene0114_01 +scene0114_02 +scene0070_00 +scene0029_00 +scene0029_01 +scene0029_02 +scene0129_00 +scene0103_00 +scene0103_01 +scene0002_00 +scene0002_01 +scene0132_00 +scene0132_01 +scene0132_02 +scene0124_00 +scene0124_01 +scene0143_00 +scene0143_01 +scene0143_02 +scene0604_00 +scene0604_01 +scene0604_02 +scene0507_00 +scene0105_00 +scene0105_01 +scene0105_02 +scene0428_00 +scene0428_01 +scene0311_00 +scene0140_00 +scene0140_01 +scene0182_00 +scene0182_01 +scene0182_02 +scene0142_00 +scene0142_01 +scene0399_00 +scene0399_01 +scene0012_00 +scene0012_01 +scene0012_02 +scene0060_00 +scene0060_01 +scene0370_00 +scene0370_01 +scene0370_02 +scene0310_00 +scene0310_01 +scene0310_02 +scene0661_00 +scene0650_00 +scene0152_00 +scene0152_01 +scene0152_02 +scene0158_00 +scene0158_01 +scene0158_02 +scene0482_00 +scene0482_01 +scene0600_00 +scene0600_01 +scene0600_02 +scene0393_00 +scene0393_01 +scene0393_02 +scene0562_00 +scene0174_00 +scene0174_01 +scene0157_00 +scene0157_01 +scene0161_00 +scene0161_01 +scene0161_02 +scene0159_00 +scene0254_00 +scene0254_01 +scene0115_00 +scene0115_01 +scene0115_02 +scene0162_00 +scene0163_00 +scene0163_01 +scene0523_00 +scene0523_01 +scene0523_02 +scene0459_00 +scene0459_01 +scene0175_00 +scene0085_00 +scene0085_01 +scene0279_00 +scene0279_01 +scene0279_02 +scene0201_00 +scene0201_01 +scene0201_02 +scene0283_00 +scene0456_00 +scene0456_01 +scene0429_00 +scene0043_00 +scene0043_01 +scene0419_00 +scene0419_01 +scene0419_02 +scene0368_00 +scene0368_01 +scene0348_00 +scene0348_01 +scene0348_02 +scene0442_00 +scene0178_00 +scene0380_00 +scene0380_01 +scene0380_02 +scene0165_00 +scene0165_01 +scene0165_02 +scene0181_00 +scene0181_01 +scene0181_02 +scene0181_03 +scene0333_00 +scene0614_00 +scene0614_01 +scene0614_02 +scene0404_00 +scene0404_01 +scene0404_02 +scene0185_00 +scene0126_00 +scene0126_01 +scene0126_02 +scene0519_00 +scene0236_00 +scene0236_01 +scene0189_00 +scene0075_00 +scene0267_00 +scene0192_00 +scene0192_01 +scene0192_02 +scene0281_00 +scene0420_00 +scene0420_01 +scene0420_02 +scene0195_00 +scene0195_01 +scene0195_02 +scene0597_00 +scene0597_01 +scene0597_02 +scene0041_00 +scene0041_01 +scene0111_00 +scene0111_01 +scene0111_02 +scene0666_00 +scene0666_01 +scene0666_02 +scene0200_00 +scene0200_01 +scene0200_02 +scene0536_00 +scene0536_01 +scene0536_02 +scene0390_00 +scene0280_00 +scene0280_01 +scene0280_02 +scene0344_00 +scene0344_01 +scene0205_00 +scene0205_01 +scene0205_02 +scene0484_00 +scene0484_01 +scene0009_00 +scene0009_01 +scene0009_02 +scene0302_00 +scene0302_01 +scene0209_00 +scene0209_01 +scene0209_02 +scene0210_00 +scene0210_01 +scene0395_00 +scene0395_01 +scene0395_02 +scene0683_00 +scene0601_00 +scene0601_01 +scene0214_00 +scene0214_01 +scene0214_02 +scene0477_00 +scene0477_01 +scene0439_00 +scene0439_01 +scene0468_00 +scene0468_01 +scene0468_02 +scene0546_00 +scene0466_00 +scene0466_01 +scene0220_00 +scene0220_01 +scene0220_02 +scene0122_00 +scene0122_01 +scene0130_00 +scene0110_00 +scene0110_01 +scene0110_02 +scene0327_00 +scene0156_00 +scene0266_00 +scene0266_01 +scene0001_00 +scene0001_01 +scene0228_00 +scene0199_00 +scene0219_00 +scene0464_00 +scene0232_00 +scene0232_01 +scene0232_02 +scene0299_00 +scene0299_01 +scene0530_00 +scene0363_00 +scene0453_00 +scene0453_01 +scene0570_00 +scene0570_01 +scene0570_02 +scene0183_00 +scene0239_00 +scene0239_01 +scene0239_02 +scene0373_00 +scene0373_01 +scene0241_00 +scene0241_01 +scene0241_02 +scene0188_00 +scene0622_00 +scene0622_01 +scene0244_00 +scene0244_01 +scene0691_00 +scene0691_01 +scene0206_00 +scene0206_01 +scene0206_02 +scene0247_00 +scene0247_01 +scene0061_00 +scene0061_01 +scene0082_00 +scene0250_00 +scene0250_01 +scene0250_02 +scene0501_00 +scene0501_01 +scene0501_02 +scene0320_00 +scene0320_01 +scene0320_02 +scene0320_03 +scene0631_00 +scene0631_01 +scene0631_02 +scene0255_00 +scene0255_01 +scene0255_02 +scene0047_00 +scene0265_00 +scene0265_01 +scene0265_02 +scene0004_00 +scene0336_00 +scene0336_01 +scene0058_00 +scene0058_01 +scene0260_00 +scene0260_01 +scene0260_02 +scene0243_00 +scene0603_00 +scene0603_01 +scene0093_00 +scene0093_01 +scene0093_02 +scene0109_00 +scene0109_01 +scene0434_00 +scene0434_01 +scene0434_02 +scene0290_00 +scene0627_00 +scene0627_01 +scene0470_00 +scene0470_01 +scene0137_00 +scene0137_01 +scene0137_02 +scene0270_00 +scene0270_01 +scene0270_02 +scene0271_00 +scene0271_01 +scene0504_00 +scene0274_00 +scene0274_01 +scene0274_02 +scene0036_00 +scene0036_01 +scene0276_00 +scene0276_01 +scene0272_00 +scene0272_01 +scene0499_00 +scene0698_00 +scene0698_01 +scene0051_00 +scene0051_01 +scene0051_02 +scene0051_03 +scene0108_00 +scene0245_00 +scene0369_00 +scene0369_01 +scene0369_02 +scene0284_00 +scene0289_00 +scene0289_01 +scene0286_00 +scene0286_01 +scene0286_02 +scene0286_03 +scene0031_00 +scene0031_01 +scene0031_02 +scene0545_00 +scene0545_01 +scene0545_02 +scene0557_00 +scene0557_01 +scene0557_02 +scene0533_00 +scene0533_01 +scene0116_00 +scene0116_01 +scene0116_02 +scene0611_00 +scene0611_01 +scene0688_00 +scene0294_00 +scene0294_01 +scene0294_02 +scene0295_00 +scene0295_01 +scene0296_00 +scene0296_01 +scene0596_00 +scene0596_01 +scene0596_02 +scene0532_00 +scene0532_01 +scene0637_00 +scene0638_00 +scene0121_00 +scene0121_01 +scene0121_02 +scene0040_00 +scene0040_01 +scene0197_00 +scene0197_01 +scene0197_02 +scene0410_00 +scene0410_01 +scene0305_00 +scene0305_01 +scene0615_00 +scene0615_01 +scene0703_00 +scene0703_01 +scene0555_00 +scene0297_00 +scene0297_01 +scene0297_02 +scene0582_00 +scene0582_01 +scene0582_02 +scene0023_00 +scene0094_00 +scene0013_00 +scene0013_01 +scene0013_02 +scene0136_00 +scene0136_01 +scene0136_02 +scene0407_00 +scene0407_01 +scene0062_00 +scene0062_01 +scene0062_02 +scene0386_00 +scene0318_00 +scene0554_00 +scene0554_01 +scene0497_00 +scene0213_00 +scene0258_00 +scene0323_00 +scene0323_01 +scene0324_00 +scene0324_01 +scene0016_00 +scene0016_01 +scene0016_02 +scene0681_00 +scene0398_00 +scene0398_01 +scene0227_00 +scene0090_00 +scene0066_00 +scene0262_00 +scene0262_01 +scene0155_00 +scene0155_01 +scene0155_02 +scene0352_00 +scene0352_01 +scene0352_02 +scene0038_00 +scene0038_01 +scene0038_02 +scene0335_00 +scene0335_01 +scene0335_02 +scene0261_00 +scene0261_01 +scene0261_02 +scene0261_03 +scene0640_00 +scene0640_01 +scene0640_02 +scene0080_00 +scene0080_01 +scene0080_02 +scene0403_00 +scene0403_01 +scene0282_00 +scene0282_01 +scene0282_02 +scene0682_00 +scene0173_00 +scene0173_01 +scene0173_02 +scene0522_00 +scene0687_00 +scene0345_00 +scene0345_01 +scene0612_00 +scene0612_01 +scene0411_00 +scene0411_01 +scene0411_02 +scene0625_00 +scene0625_01 +scene0211_00 +scene0211_01 +scene0211_02 +scene0211_03 +scene0676_00 +scene0676_01 +scene0179_00 +scene0498_00 +scene0498_01 +scene0498_02 +scene0547_00 +scene0547_01 +scene0547_02 +scene0269_00 +scene0269_01 +scene0269_02 +scene0366_00 +scene0680_00 +scene0680_01 +scene0588_00 +scene0588_01 +scene0588_02 +scene0588_03 +scene0346_00 +scene0346_01 +scene0359_00 +scene0359_01 +scene0014_00 +scene0120_00 +scene0120_01 +scene0212_00 +scene0212_01 +scene0212_02 +scene0176_00 +scene0049_00 +scene0259_00 +scene0259_01 +scene0586_00 +scene0586_01 +scene0586_02 +scene0309_00 +scene0309_01 +scene0125_00 +scene0455_00 +scene0177_00 +scene0177_01 +scene0177_02 +scene0326_00 +scene0372_00 +scene0171_00 +scene0171_01 +scene0374_00 +scene0654_00 +scene0654_01 +scene0445_00 +scene0445_01 +scene0475_00 +scene0475_01 +scene0475_02 +scene0349_00 +scene0349_01 +scene0234_00 +scene0669_00 +scene0669_01 +scene0375_00 +scene0375_01 +scene0375_02 +scene0387_00 +scene0387_01 +scene0387_02 +scene0312_00 +scene0312_01 +scene0312_02 +scene0384_00 +scene0385_00 +scene0385_01 +scene0385_02 +scene0000_00 +scene0000_01 +scene0000_02 +scene0376_00 +scene0376_01 +scene0376_02 +scene0301_00 +scene0301_01 +scene0301_02 +scene0322_00 +scene0542_00 +scene0079_00 +scene0079_01 +scene0099_00 +scene0099_01 +scene0476_00 +scene0476_01 +scene0476_02 +scene0394_00 +scene0394_01 +scene0147_00 +scene0147_01 +scene0067_00 +scene0067_01 +scene0067_02 +scene0397_00 +scene0397_01 +scene0337_00 +scene0337_01 +scene0337_02 +scene0431_00 +scene0223_00 +scene0223_01 +scene0223_02 +scene0010_00 +scene0010_01 +scene0402_00 +scene0268_00 +scene0268_01 +scene0268_02 +scene0679_00 +scene0679_01 +scene0405_00 +scene0128_00 +scene0408_00 +scene0408_01 +scene0190_00 +scene0107_00 +scene0076_00 +scene0167_00 +scene0361_00 +scene0361_01 +scene0361_02 +scene0216_00 +scene0202_00 +scene0303_00 +scene0303_01 +scene0303_02 +scene0446_00 +scene0446_01 +scene0089_00 +scene0089_01 +scene0089_02 +scene0360_00 +scene0150_00 +scene0150_01 +scene0150_02 +scene0421_00 +scene0421_01 +scene0421_02 +scene0454_00 +scene0626_00 +scene0626_01 +scene0626_02 +scene0186_00 +scene0186_01 +scene0538_00 +scene0479_00 +scene0479_01 +scene0479_02 +scene0656_00 +scene0656_01 +scene0656_02 +scene0656_03 +scene0525_00 +scene0525_01 +scene0525_02 +scene0308_00 +scene0396_00 +scene0396_01 +scene0396_02 +scene0624_00 +scene0292_00 +scene0292_01 +scene0632_00 +scene0253_00 +scene0021_00 +scene0325_00 +scene0325_01 +scene0437_00 +scene0437_01 +scene0438_00 +scene0590_00 +scene0590_01 +scene0400_00 +scene0400_01 +scene0541_00 +scene0541_01 +scene0541_02 +scene0677_00 +scene0677_01 +scene0677_02 +scene0443_00 +scene0315_00 +scene0288_00 +scene0288_01 +scene0288_02 +scene0422_00 +scene0672_00 +scene0672_01 +scene0184_00 +scene0449_00 +scene0449_01 +scene0449_02 +scene0048_00 +scene0048_01 +scene0138_00 +scene0452_00 +scene0452_01 +scene0452_02 +scene0667_00 +scene0667_01 +scene0667_02 +scene0463_00 +scene0463_01 +scene0078_00 +scene0078_01 +scene0078_02 +scene0636_00 +scene0457_00 +scene0457_01 +scene0457_02 +scene0465_00 +scene0465_01 +scene0577_00 +scene0151_00 +scene0151_01 +scene0339_00 +scene0573_00 +scene0573_01 +scene0154_00 +scene0096_00 +scene0096_01 +scene0096_02 +scene0235_00 +scene0168_00 +scene0168_01 +scene0168_02 +scene0594_00 +scene0587_00 +scene0587_01 +scene0587_02 +scene0587_03 +scene0229_00 +scene0229_01 +scene0229_02 +scene0512_00 +scene0106_00 +scene0106_01 +scene0106_02 +scene0472_00 +scene0472_01 +scene0472_02 +scene0489_00 +scene0489_01 +scene0489_02 +scene0425_00 +scene0425_01 +scene0641_00 +scene0526_00 +scene0526_01 +scene0317_00 +scene0317_01 +scene0544_00 +scene0017_00 +scene0017_01 +scene0017_02 +scene0042_00 +scene0042_01 +scene0042_02 +scene0576_00 +scene0576_01 +scene0576_02 +scene0347_00 +scene0347_01 +scene0347_02 +scene0436_00 +scene0226_00 +scene0226_01 +scene0485_00 +scene0486_00 +scene0487_00 +scene0487_01 +scene0619_00 +scene0097_00 +scene0367_00 +scene0367_01 +scene0491_00 +scene0492_00 +scene0492_01 +scene0005_00 +scene0005_01 +scene0543_00 +scene0543_01 +scene0543_02 +scene0657_00 +scene0341_00 +scene0341_01 diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_val.txt b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_val.txt new file mode 100644 index 0000000000000000000000000000000000000000..965ff258035f857446c30b10e9a6be49f71d3dc7 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv1_val.txt @@ -0,0 +1,156 @@ +scene0534_00 +scene0534_01 +scene0319_00 +scene0273_00 +scene0273_01 +scene0225_00 +scene0198_00 +scene0003_00 +scene0003_01 +scene0003_02 +scene0409_00 +scene0409_01 +scene0331_00 +scene0331_01 +scene0505_00 +scene0505_01 +scene0505_02 +scene0505_03 +scene0505_04 +scene0506_00 +scene0057_00 +scene0057_01 +scene0074_00 +scene0074_01 +scene0074_02 +scene0091_00 +scene0112_00 +scene0112_01 +scene0112_02 +scene0240_00 +scene0102_00 +scene0102_01 +scene0513_00 +scene0514_00 +scene0514_01 +scene0537_00 +scene0516_00 +scene0516_01 +scene0495_00 +scene0617_00 +scene0133_00 +scene0520_00 +scene0520_01 +scene0635_00 +scene0635_01 +scene0054_00 +scene0473_00 +scene0473_01 +scene0524_00 +scene0524_01 +scene0379_00 +scene0471_00 +scene0471_01 +scene0471_02 +scene0566_00 +scene0248_00 +scene0248_01 +scene0248_02 +scene0529_00 +scene0529_01 +scene0529_02 +scene0391_00 +scene0264_00 +scene0264_01 +scene0264_02 +scene0675_00 +scene0675_01 +scene0350_00 +scene0350_01 +scene0350_02 +scene0450_00 +scene0068_00 +scene0068_01 +scene0237_00 +scene0237_01 +scene0365_00 +scene0365_01 +scene0365_02 +scene0605_00 +scene0605_01 +scene0539_00 +scene0539_01 +scene0539_02 +scene0540_00 +scene0540_01 +scene0540_02 +scene0170_00 +scene0170_01 +scene0170_02 +scene0433_00 +scene0340_00 +scene0340_01 +scene0340_02 +scene0160_00 +scene0160_01 +scene0160_02 +scene0160_03 +scene0160_04 +scene0059_00 +scene0059_01 +scene0059_02 +scene0056_00 +scene0056_01 +scene0478_00 +scene0478_01 +scene0548_00 +scene0548_01 +scene0548_02 +scene0204_00 +scene0204_01 +scene0204_02 +scene0033_00 +scene0145_00 +scene0483_00 +scene0508_00 +scene0508_01 +scene0508_02 +scene0180_00 +scene0148_00 +scene0556_00 +scene0556_01 +scene0416_00 +scene0416_01 +scene0416_02 +scene0416_03 +scene0416_04 +scene0073_00 +scene0073_01 +scene0073_02 +scene0073_03 +scene0034_00 +scene0034_01 +scene0034_02 +scene0639_00 +scene0561_00 +scene0561_01 +scene0298_00 +scene0692_00 +scene0692_01 +scene0692_02 +scene0692_03 +scene0692_04 +scene0642_00 +scene0642_01 +scene0642_02 +scene0642_03 +scene0630_00 +scene0630_01 +scene0630_02 +scene0630_03 +scene0630_04 +scene0630_05 +scene0630_06 +scene0706_00 +scene0567_00 +scene0567_01 diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels-old.combined.tsv b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels-old.combined.tsv new file mode 100644 index 0000000000000000000000000000000000000000..05c006e98066aa78d126bebcfb3654200d351b93 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels-old.combined.tsv @@ -0,0 +1,608 @@ +id raw_category category count nyu40id eigen13id nyuClass nyu40class eigen13class ModelNet40 ModelNet10 ShapeNetCore55 synsetoffset wnsynsetid wnsynsetkey mpcat40 mpcat40index +1 wall wall 8277 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +2 chair chair 4646 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +22 books book 1678 23 2 book books Books n02870526 book.n.11 objects 39 +3 floor floor 1553 2 5 floor floor Floor n03365592 floor.n.01 floor 2 +5 door door 1483 8 12 door door Wall door n03221720 door.n.01 door 4 +1163 object object 1313 40 7 otherprop Objects objects 39 +16 window window 1209 9 13 window window Window n04587648 window.n.01 window 9 +4 table table 1170 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +56 trash can trash can 1090 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +13 pillow pillow 937 18 7 pillow pillow Objects pillow 3938244 n03938244 pillow.n.01 cushion 8 +15 picture picture 862 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +41 ceiling ceiling 806 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 ceiling 17 +26 box box 775 29 7 box box Objects n02883344 box.n.01 objects 39 +161 doorframe doorframe 768 8 12 door door Wall door doorframe.n.01 door 4 +19 monitor monitor 765 40 7 monitor otherprop Objects monitor monitor tv or monitor 3211117 n03782190 monitor.n.04 objects 39 +7 cabinet cabinet 731 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +9 desk desk 680 14 10 desk desk Table desk desk table 4379243 n03179701 desk.n.01 table 5 +8 shelf shelf 641 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +10 office chair office chair 595 5 4 chair chair Chair chair chair chair 3001627 n04373704 swivel_chair.n.01 chair 3 +31 towel towel 570 27 7 towel towel Objects n04459362 towel.n.01 towel 20 +6 couch couch 502 6 9 sofa sofa Sofa sofa sofa sofa 4256520 n04256520 sofa.n.01 sofa 10 +14 sink sink 488 34 7 sink sink Objects sink n04223580 sink.n.01 sink 15 +48 backpack backpack 479 40 7 backpack otherprop Objects n02769748 backpack.n.01 objects 39 +28 lamp lamp 419 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +11 bed bed 370 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +18 bookshelf bookshelf 360 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +71 mirror mirror 349 19 7 mirror mirror Objects n03773035 mirror.n.01 mirror 21 +21 curtain curtain 347 16 13 curtain curtain Window curtain n03151077 curtain.n.01 curtain 12 +40 plant plant 331 40 7 plant otherprop Objects plant n00017222 plant.n.02 plant 14 +52 whiteboard whiteboard 327 30 7 whiteboard whiteboard Objects n03211616 display_panel.n.01 board_panel 35 +96 radiator radiator 322 39 6 radiator otherfurniture Furniture n04041069 radiator.n.02 misc 40 +22 book book 318 23 2 book books Books n02870526 book.n.11 objects 39 +29 kitchen cabinet kitchen cabinet 310 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 cabinet 7 +49 toilet paper toilet paper 291 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 objects 39 +29 kitchen cabinets kitchen cabinet 289 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +23 armchair armchair 281 5 4 chair chair Chair chair chair chair 3001627 n02738535 armchair.n.01 chair 3 +63 shoes shoe 272 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +24 coffee table coffee table 258 7 10 coffee table table Table table table table 4379243 n03063968 coffee_table.n.01 table 5 +17 toilet toilet 256 33 7 toilet toilet Objects toilet toilet n04446276 toilet.n.01 toilet 18 +47 bag bag 252 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +32 clothes clothes 248 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +46 keyboard keyboard 246 40 7 keyboard otherprop Objects keyboard computer keyboard 3085013 n03085013 computer_keyboard.n.01 objects 39 +65 bottle bottle 226 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +97 recycling bin recycling bin 225 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +34 nightstand nightstand 224 32 6 night stand night stand Furniture night_stand night_stand n03015254 chest_of_drawers.n.01 chest_of_drawers 13 +38 stool stool 221 40 7 stool otherprop Objects stool n04326896 stool.n.01 stool 19 +33 tv tv 219 25 11 television television TV tv or monitor 3211117 n03211117 display.n.06 tv_monitor 22 +75 file cabinet file cabinet 217 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +36 dresser dresser 213 17 6 dresser dresser Furniture dresser dresser n03015254 chest_of_drawers.n.01 chest_of_drawers 13 +64 computer tower computer tower 203 40 7 computer otherprop Objects n03082979 computer.n.01 objects 39 +32 clothing clothes 165 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +101 telephone telephone 164 40 7 telephone otherprop Objects telephone 4401088 n04401088 telephone.n.01 objects 39 +130 cup cup 157 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +27 refrigerator refrigerator 154 24 6 refridgerator refridgerator Furniture n04070727 refrigerator.n.01 appliances 37 +44 end table end table 147 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +131 jacket jacket 146 40 7 jacket otherprop Objects n03589791 jacket.n.01 clothes 38 +55 shower curtain shower curtain 144 28 7 shower curtain shower curtain Objects curtain n04209239 shower_curtain.n.01 curtain 12 +42 bathtub bathtub 144 36 7 bathtub bathtub Objects bathtub bathtub tub 2808440 n02808440 bathtub.n.01 bathtub 25 +59 microwave microwave 141 40 7 microwave otherprop Objects microwave 3761084 n03761084 microwave.n.02 appliances 37 +159 kitchen counter kitchen counter 140 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26 +74 sofa chair sofa chair 129 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +82 paper towel dispenser paper towel dispenser 129 40 7 paper towel dispenser otherprop Objects objects 39 +1164 bathroom vanity bathroom vanity 126 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 table 5 +93 suitcase suitcase 118 40 7 luggage otherprop Objects n02773838 bag.n.06 objects 39 +77 laptop laptop 111 40 7 laptop otherprop Objects laptop laptop 3642806 n03642806 laptop.n.01 objects 39 +67 ottoman ottoman 111 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 stool 19 +128 shower walls shower wall 109 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +50 printer printer 106 40 7 printer otherprop Objects printer 4004475 n04004475 printer.n.03 appliances 37 +35 counter counter 104 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26 +69 board board 100 38 7 board otherstructure Objects board_panel 35 +100 soap dispenser soap dispenser 99 40 7 otherprop Objects n04254120 soap_dispenser.n.01 objects 39 +62 stove stove 95 38 7 stove otherstructure Objects stove 4330267 n04330267 stove.n.02 appliances 37 +105 light light 93 38 7 light otherstructure Objects n03665366 light.n.02 lighting 28 +1165 closet wall closet wall 90 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +165 mini fridge mini fridge 87 24 6 refridgerator refridgerator Furniture n03273913 electric_refrigerator.n.01 appliances 37 +7 cabinets cabinet 79 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +5 doors door 76 8 12 door door Wall door n03221720 door.n.01 door 4 +76 fan fan 75 40 7 fan otherprop Objects n03320046 fan.n.01 misc 40 +230 tissue box tissue box 73 40 7 tissue box otherprop Objects n02883344 box.n.01 objects 39 +54 blanket blanket 72 40 7 blanket otherprop Objects n02849154 blanket.n.01 objects 39 +125 bathroom stall bathroom stall 71 38 7 otherstructure Objects n02873839 booth.n.02 misc 40 +72 copier copier 70 40 7 otherprop Objects n03257586 duplicator.n.01 appliances 37 +68 bench bench 66 39 6 bench otherfurniture Furniture bench bench 2828884 n02828884 bench.n.01 seating 34 +145 bar bar 66 38 7 bar otherstructure Objects n02788689 bar.n.03 misc 40 +157 soap dish soap dish 65 40 7 soap dish otherprop Objects n04254009 soap_dish.n.01 objects 39 +1166 laundry hamper laundry hamper 65 40 7 laundry basket otherprop Objects objects 39 +132 storage bin storage bin 63 40 7 storage bin otherprop Objects objects 39 +1167 bathroom stall door bathroom stall door 62 8 12 door door Wall door n03221720 door.n.01 door 4 +232 light switch light switch 61 38 7 light switch otherstructure Objects n04372370 switch.n.01 misc 40 +134 coffee maker coffee maker 61 40 7 otherprop Objects n03063338 coffee_maker.n.01 appliances 37 +51 tv stand tv stand 61 39 6 tv stand otherfurniture Furniture tv_stand n03290653 entertainment_center.n.01 furniture 36 +250 decoration decoration 60 40 7 otherprop Objects n03169390 decoration.n.01 misc 40 +1168 ceiling light ceiling light 59 38 7 light otherstructure Objects n03665366 light.n.02 lighting 28 +342 range hood range hood 59 38 7 range hood otherstructure Objects range_hood n04053677 range_hood.n.01 misc 40 +89 blackboard blackboard 58 38 7 blackboard otherstructure Objects n02846511 blackboard.n.01 board_panel 35 +103 clock clock 58 40 7 clock otherprop Objects clock 3046257 n03046257 clock.n.01 objects 39 +99 wardrobe closet wardrobe 54 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +95 rail rail 53 38 7 railing otherstructure Objects n04047401 railing.n.01 railing 30 +154 bulletin board bulletin board 53 38 7 board otherstructure Objects n03211616 display_panel.n.01 board_panel 35 +140 mat mat 52 20 5 floor mat floor mat Floor n03727837 mat.n.01 floor 2 +1169 trash bin trash bin 52 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +193 ledge ledge 51 38 7 otherstructure Objects n09337253 ledge.n.01 misc 40 +116 seat seat 49 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 furniture 36 +202 mouse mouse 49 40 7 mouse otherprop Objects n03793489 mouse.n.04 objects 39 +73 basket basket 48 40 7 basket otherprop Objects basket 2801938 n02801938 basket.n.01 objects 39 +78 shower shower 48 38 7 otherstructure Objects n04208936 shower.n.01 shower 23 +1170 dumbbell dumbbell 48 40 7 otherprop Objects n03255030 dumbbell.n.01 objects 39 +79 paper paper 46 26 7 paper paper Objects n14974264 paper.n.01 objects 39 +80 person person 46 31 7 person person Objects person n05217688 person.n.02 misc 40 +141 windowsill windowsill 45 38 7 otherstructure Objects n04590263 windowsill.n.01 window 9 +57 closet closet 45 39 6 wardrobe otherfurniture Furniture wardrobe misc 40 +102 bucket bucket 45 40 7 bucket otherprop Objects n02909870 bucket.n.01 misc 40 +261 sign sign 44 40 7 sign otherprop Objects n04217882 signboard.n.01 objects 39 +118 speaker speaker 43 40 7 speaker otherprop Objects speaker 3691459 n03691459 loudspeaker.n.01 objects 39 +136 dishwasher dishwasher 43 38 7 dishwasher otherstructure Objects dishwasher 3207941 n03207941 dishwasher.n.01 appliances 37 +98 container container 43 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1171 stair rail stair rail 42 38 7 banister otherstructure Objects n02788148 bannister.n.02 railing 30 +170 shower curtain rod shower curtain rod 42 40 7 otherprop Objects curtain 12 +1172 tube tube 41 40 7 otherprop Objects misc 40 +1173 bathroom cabinet bathroom cabinet 39 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +79 papers paper 39 26 7 paper paper Objects n14974264 paper.n.01 objects 39 +221 storage container storage container 39 40 7 container otherprop Objects objects 39 +570 paper bag paper bag 39 37 7 bag bag Objects n04122825 sack.n.01 objects 39 +138 paper towel roll paper towel roll 39 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20 +168 ball ball 39 40 7 ball otherprop Objects objects 39 +276 closet doors closet door 38 8 12 door door Wall door n03221720 door.n.01 door 4 +106 laundry basket laundry basket 37 40 7 laundry basket otherprop Objects basket 2801938 n03050864 clothes_hamper.n.01 objects 39 +214 cart cart 37 40 7 cart otherprop Objects n03484083 handcart.n.01 shelving 31 +276 closet door closet door 35 8 12 door door Wall door n03221720 door.n.01 door 4 +323 dish rack dish rack 35 40 7 dish rack otherprop Objects n03207630 dish_rack.n.01 objects 39 +58 stairs stairs 35 38 7 stairs otherstructure Objects n04298308 stairway.n.01 stairs 16 +86 blinds blinds 35 13 13 blinds blinds Window n02851099 blind.n.03 blinds 32 +2 stack of chairs chair 35 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +399 purse purse 34 40 7 purse otherprop Objects n02774152 bag.n.04 objects 39 +121 bicycle bicycle 33 40 7 bicycle otherprop Objects bicycle 2834778 n02834778 bicycle.n.01 objects 39 +185 tray tray 32 40 7 tray otherprop Objects n04476259 tray.n.01 objects 39 +300 plunger plunger 30 40 7 otherprop Objects n03970156 plunger.n.03 objects 39 +180 paper cutter paper cutter 30 40 7 paper cutter otherprop Objects n03886940 paper_cutter.n.01 objects 39 +163 toilet paper dispenser toilet paper dispenser 29 40 7 otherprop Objects objects 39 +26 boxes box 29 29 7 box box Objects n02883344 box.n.01 objects 39 +66 bin bin 28 40 7 bin otherprop Objects n02839910 bin.n.01 objects 39 +208 toilet seat cover dispenser toilet seat cover dispenser 28 40 7 otherprop Objects objects 39 +112 guitar guitar 28 40 7 guitar otherprop Objects guitar guitar 3467517 n03467517 guitar.n.01 objects 39 +540 mailboxes mailbox 28 29 7 box box Objects mailbox 3710193 n03710193 mailbox.n.01 misc 40 +395 handicap bar handicap bar 27 38 7 bar otherstructure Objects misc 40 +166 fire extinguisher fire extinguisher 27 40 7 fire extinguisher otherprop Objects n03345837 fire_extinguisher.n.01 misc 40 +122 ladder ladder 27 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 stairs 16 +120 column column 26 38 7 column otherstructure Objects n03074380 column.n.06 column 24 +107 pipe pipe 25 40 7 pipe otherprop Objects n03944672 pipe.n.02 misc 40 +283 vacuum cleaner vacuum cleaner 25 40 7 otherprop Objects n04517823 vacuum.n.04 objects 39 +88 plate plate 24 40 7 plate otherprop Objects n03959485 plate.n.04 objects 39 +90 piano piano 24 39 6 piano otherfurniture Furniture piano piano 3928116 n03928116 piano.n.01 furniture 36 +177 water cooler water cooler 24 39 6 water cooler otherfurniture Furniture n04559166 water_cooler.n.01 misc 40 +1174 cd case cd case 24 40 7 otherprop Objects objects 39 +562 bowl bowl 24 40 7 bowl otherprop Objects bowl bowl 2880940 n02880940 bowl.n.03 objects 39 +1175 closet rod closet rod 24 40 7 otherprop Objects n04100174 rod.n.01 misc 40 +1156 bathroom counter bathroom counter 24 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26 +84 oven oven 23 38 7 oven otherstructure Objects n03862676 oven.n.01 appliances 37 +104 stand stand 23 39 6 stand otherfurniture Furniture table table table 4379243 n04301000 stand.n.04 table 5 +229 scale scale 23 40 7 scale otherprop Objects n04141975 scale.n.07 objects 39 +70 washing machine washing machine 23 39 6 washing machine otherfurniture Furniture washing_machine 4554684 n04554684 washer.n.03 appliances 37 +325 broom broom 22 40 7 broom otherprop Objects n02906734 broom.n.01 objects 39 +169 hat hat 22 40 7 hat otherprop Objects n03497657 hat.n.01 clothes 38 +128 shower wall shower wall 22 1 12 wall wall Wall n04208936 shower.n.01 wall 1 +331 guitar case guitar case 21 40 7 guitar case otherprop Objects objects 39 +87 rack rack 21 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +488 water pitcher water pitcher 21 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 objects 39 +776 laundry detergent laundry detergent 21 40 7 otherprop Objects objects 39 +370 hair dryer hair dryer 21 40 7 hair dryer otherprop Objects n03483316 hand_blower.n.01 objects 39 +191 pillar pillar 21 38 7 column otherstructure Objects n03073977 column.n.07 column 24 +748 divider divider 20 40 7 otherprop Objects wall 1 +242 power outlet power outlet 19 40 7 otherprop Objects misc 40 +45 dining table dining table 19 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +417 shower floor shower floor 19 2 5 floor floor Floor n04208936 shower.n.01 floor 2 +70 washing machines washing machine 19 39 6 washing machine otherfurniture Furniture washing_machine 4554684 n04554684 washer.n.03 appliances 37 +188 shower door shower door 19 8 12 door door Wall door n04208936 shower.n.01 door 4 +1176 coffee kettle coffee kettle 18 40 7 pot otherprop Objects n03612814 kettle.n.01 objects 39 +1177 wardrobe cabinet wardrobe 18 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +1178 structure structure 18 38 7 otherstructure Objects misc 40 +18 bookshelves bookshelf 17 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +110 clothes dryer clothes dryer 17 39 6 otherfurniture Furniture n03251766 dryer.n.01 appliances 37 +148 toaster toaster 17 40 7 toaster otherprop Objects n04442312 toaster.n.02 appliances 37 +63 shoe shoe 17 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +155 ironing board ironing board 16 39 6 ironing board otherfurniture Furniture n03586090 ironing_board.n.01 objects 39 +572 alarm clock alarm clock 16 40 7 alarm clock otherprop Objects clock 3046257 n02694662 alarm_clock.n.01 objects 39 +1179 shower head shower head 15 38 7 otherstructure Objects shower 23 +28 lamp base lamp 15 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +392 water bottle water bottle 15 40 7 bottle otherprop Objects bottle bottle 2876657 n04557648 water_bottle.n.01 objects 39 +1180 keyboard piano keyboard piano 15 39 6 piano otherfurniture Furniture piano piano 3928116 n03928116 piano.n.01 furniture 36 +609 projector screen projector screen 15 38 7 projector screen otherstructure Objects misc 40 +1181 case of water bottles case of water bottles 15 40 7 otherprop Objects objects 39 +195 toaster oven toaster oven 14 40 7 toaster oven otherprop Objects n04442441 toaster_oven.n.01 appliances 37 +581 music stand music stand 14 39 6 music stand otherfurniture Furniture n03801760 music_stand.n.01 furniture 36 +58 staircase stairs 14 38 7 stairs otherstructure Objects n04298308 stairway.n.01 stairs 16 +1182 coat rack coat rack 14 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 3 +1183 storage organizer storage organizer 14 40 7 otherprop Objects shelving 3 +139 machine machine 14 40 7 machine otherprop Objects n03699975 machine.n.01 appliances 37 +1184 folded chair folded chair 14 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1185 fire alarm fire alarm 14 40 7 otherprop Objects n03343737 fire_alarm.n.02 misc 40 +156 fireplace fireplace 13 38 7 fireplace otherstructure Objects n03346455 fireplace.n.01 fireplace 27 +408 vent vent 13 40 7 otherprop Objects n04526241 vent.n.01 misc 40 +213 furniture furniture 13 39 6 furniture otherfurniture Furniture n03405725 furniture.n.01 furniture 36 +1186 power strip power strip 13 40 7 otherprop Objects objects 39 +1187 calendar calendar 13 40 7 otherprop Objects objects 39 +1188 poster poster 13 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +115 toilet paper holder toilet paper holder 13 40 7 toilet paper holder otherprop Objects objects 39 +1189 potted plant potted plant 12 40 7 plant otherprop Objects plant n00017222 plant.n.02 plant 14 +304 stuffed animal stuffed animal 12 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 objects 39 +1190 luggage luggage 12 40 7 luggage otherprop Objects n02774630 baggage.n.01 objects 39 +21 curtains curtain 12 16 13 curtain curtain Window curtain n03151077 curtain.n.01 curtain 12 +312 headphones headphones 12 40 7 otherprop Objects n03261776 earphone.n.01 objects 39 +233 crate crate 12 39 6 crate otherfurniture Furniture n03127925 crate.n.01 objects 39 +286 candle candle 12 40 7 candle otherprop Objects lamp n02948072 candle.n.01 objects 39 +264 projector projector 12 40 7 projector otherprop Objects n04009552 projector.n.02 objects 39 +110 clothes dryers clothes dryer 12 39 6 otherfurniture Furniture n03251766 dryer.n.01 appliances 37 +1191 mattress mattress 12 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +356 dustpan dustpan 12 40 7 otherprop Objects n03259009 dustpan.n.02 objects 39 +25 drawer drawer 11 39 6 drawer otherfurniture Furniture n03233905 drawer.n.01 furniture 36 +750 rod rod 11 40 7 otherprop Objects pistol 3948459 n03427202 gat.n.01 misc 40 +269 globe globe 11 40 7 globe otherprop Objects objects 39 +307 footrest footrest 11 39 6 foot rest otherfurniture Furniture stool n03380724 footstool.n.01 stool 19 +410 piano bench piano bench 11 39 6 piano bench otherfurniture Furniture bench bench 2828884 n02828884 bench.n.01 seating 34 +730 breakfast bar breakfast bar 11 38 7 bar otherstructure Objects counter 26 +216 step stool step stool 11 40 7 step stool otherprop Objects stool n04315713 step_stool.n.01 stool 19 +1192 hand rail hand rail 11 38 7 railing otherstructure Objects railing 30 +119 vending machine vending machine 11 40 7 machine otherprop Objects n04525305 vending_machine.n.01 appliances 37 +682 ceiling fan ceiling fan 11 40 7 fan otherprop Objects n03320046 fan.n.01 misc 40 +434 swiffer swiffer 11 40 7 otherprop Objects objects 39 +126 foosball table foosball table 11 39 6 foosball table otherfurniture Furniture table table table 4379243 n04379243 table.n.02 table 5 +919 jar jar 11 40 7 jar otherprop Objects jar 3593526 n03593526 jar.n.01 objects 39 +85 footstool footstool 11 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 stool 19 +1193 folded table folded table 10 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +108 round table round table 10 7 10 table table Table table table table 4379243 n04114554 round_table.n.02 table 5 +135 hamper hamper 10 40 7 basket otherprop Objects basket 2801938 n03482405 hamper.n.02 objects 39 +1194 poster tube poster tube 10 40 7 otherprop Objects objects 39 +432 case case 10 40 7 case otherprop Objects objects 39 +53 carpet carpet 10 40 7 rug otherprop Objects n04118021 rug.n.01 floor 2 +1195 thermostat thermostat 10 40 7 otherprop Objects n04422875 thermostat.n.01 misc 40 +111 coat coat 10 40 7 jacket otherprop Objects n03057021 coat.n.01 clothes 38 +305 water fountain water fountain 10 38 7 water fountain otherstructure Objects n03241335 drinking_fountain.n.01 misc 40 +1125 smoke detector smoke detector 10 40 7 otherprop Objects misc 40 +13 pillows pillow 9 18 7 pillow pillow Objects pillow 3938244 n03938244 pillow.n.01 cushion 8 +1196 flip flops flip flops 9 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +1197 cloth cloth 9 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +1198 banner banner 9 40 7 otherprop Objects n02788021 banner.n.01 misc 40 +1199 clothes hanger clothes hanger 9 40 7 otherprop Objects n03057920 coat_hanger.n.01 objects 39 +1200 whiteboard eraser whiteboard eraser 9 40 7 otherprop Objects objects 39 +378 iron iron 9 40 7 otherprop Objects n03584829 iron.n.04 objects 39 +591 instrument case instrument case 9 40 7 case otherprop Objects objects 39 +49 toilet paper rolls toilet paper 9 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 objects 39 +92 soap soap 9 40 7 soap otherprop Objects n04253437 soap.n.01 objects 39 +1098 block block 9 40 7 otherprop Objects misc 40 +291 wall hanging wall hanging 8 40 7 otherprop Objects n03491178 hanging.n.01 picture 6 +1063 kitchen island kitchen island 8 38 7 kitchen island otherstructure Objects n03620600 kitchen_island.n.01 counter 26 +107 pipes pipe 8 38 7 otherstructure Objects misc 40 +1135 toothbrush toothbrush 8 40 7 toothbrush otherprop Objects n04453156 toothbrush.n.01 objects 39 +189 shirt shirt 8 40 7 otherprop Objects n04197391 shirt.n.01 clothes 38 +245 cutting board cutting board 8 40 7 cutting board otherprop Objects n03025513 chopping_board.n.01 objects 39 +194 vase vase 8 40 7 vase otherprop Objects vase jar 3593526 n04522168 vase.n.01 objects 39 +1201 shower control valve shower control valve 8 38 7 otherstructure Objects n04208936 shower.n.01 shower 23 +386 exercise machine exercise machine 8 40 7 machine otherprop Objects gym_equipment 33 +1202 compost bin compost bin 8 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +857 shorts shorts 8 40 7 shorts otherprop Objects clothes 38 +452 tire tire 8 40 7 otherprop Objects n04440749 tire.n.01 objects 39 +1203 teddy bear teddy bear 7 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 objects 39 +346 bathrobe bathrobe 7 40 7 otherprop Objects n02807616 bathrobe.n.01 clothes 38 +152 handrail handrail 7 38 7 railing otherstructure Objects n02788148 bannister.n.02 railing 30 +83 faucet faucet 7 40 7 faucet otherprop Objects faucet 3325088 n03325088 faucet.n.01 misc 40 +1204 pantry wall pantry wall 7 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +726 thermos thermos 7 40 7 flask otherprop Objects bottle bottle 2876657 n04422727 thermos.n.01 objects 39 +61 rug rug 7 40 7 rug otherprop Objects n04118021 rug.n.01 floor 2 +39 couch cushions cushion 7 18 7 pillow pillow Objects n03151500 cushion.n.03 cushion 8 +1117 tripod tripod 7 39 6 stand otherfurniture Furniture n04485082 tripod.n.01 objects 39 +540 mailbox mailbox 7 29 7 box box Objects mailbox 3710193 n03710193 mailbox.n.01 misc 40 +1205 tupperware tupperware 7 40 7 otherprop Objects objects 39 +415 shoe rack shoe rack 7 40 7 shoe rack otherprop Objects shelving 31 +31 towels towel 6 27 7 towel towel Objects n04459362 towel.n.01 towel 20 +1206 beer bottles beer bottle 6 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +153 treadmill treadmill 6 39 6 treadmill otherfurniture Furniture n04477387 treadmill.n.01 gym_equipment 33 +1207 salt salt 6 40 7 otherprop Objects objects 39 +129 chest chest 6 39 6 chest otherfurniture Furniture dresser dresser chest_of_drawers 13 +220 dispenser dispenser 6 40 7 otherprop Objects n03210683 dispenser.n.01 objects 39 +1208 mirror doors mirror door 6 8 12 door door Wall door n03221720 door.n.01 door 4 +231 remote remote 6 40 7 otherprop Objects remote_control 4074963 n04074963 remote_control.n.01 objects 39 +1209 folded ladder folded ladder 6 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 misc 40 +39 cushion cushion 6 18 7 pillow pillow Objects n03151500 cushion.n.03 cushion 8 +1210 carton carton 6 40 7 otherprop Objects objects 39 +117 step step 6 38 7 otherstructure Objects n04314914 step.n.04 misc 40 +822 drying rack drying rack 6 39 6 drying rack otherfurniture Furniture shelving 31 +238 slippers slipper 6 40 7 shoe otherprop Objects n04241394 slipper.n.01 clothes 38 +143 pool table pool table 6 39 6 pool table otherfurniture Furniture table table table 4379243 n03982430 pool_table.n.01 table 5 +1211 soda stream soda stream 6 40 7 otherprop Objects objects 39 +228 toilet brush toilet brush 6 40 7 toilet brush otherprop Objects objects 39 +494 loft bed loft bed 6 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +226 cooking pot cooking pot 6 40 7 pot otherprop Objects objects 39 +91 heater heater 6 39 6 heater otherfurniture Furniture n03508101 heater.n.01 misc 40 +1072 messenger bag messenger bag 6 37 7 bag bag Objects objects 39 +435 stapler stapler 6 40 7 stapler otherprop Objects n04303497 stapler.n.01 objects 39 +1165 closet walls closet wall 5 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +345 scanner scanner 5 40 7 otherprop Objects appliances 37 +893 elliptical machine elliptical machine 5 40 7 machine otherprop Objects gym_equipment 33 +621 kettle kettle 5 40 7 pot otherprop Objects n03612814 kettle.n.01 objects 39 +1212 metronome metronome 5 40 7 otherprop Objects n03757604 metronome.n.01 objects 39 +297 dumbell dumbell 5 40 7 otherprop Objects objects 39 +1213 music book music book 5 23 2 book books Books n02870526 book.n.11 objects 39 +1214 rice cooker rice cooker 5 40 7 otherprop Objects objects 39 +1215 dart board dart board 5 38 7 board otherstructure Objects n03162940 dartboard.n.01 objects 39 +529 sewing machine sewing machine 5 40 7 sewing machine otherprop Objects n04179913 sewing_machine.n.01 objects 39 +1216 grab bar grab bar 5 38 7 railing otherstructure Objects railing 30 +1217 flowerpot flowerpot 5 40 7 vase otherprop Objects vase jar 3593526 n04522168 vase.n.01 objects 39 +1218 painting painting 5 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +1219 railing railing 5 38 7 railing otherstructure Objects n04047401 railing.n.01 railing 30 +1220 stair stair 5 38 7 stairs otherstructure Objects stairs n04314914 step.n.04 stairs 16 +525 toolbox toolbox 5 39 6 chest otherfurniture Furniture n04452615 toolbox.n.01 objects 39 +204 nerf gun nerf gun 5 40 7 otherprop Objects objects 39 +693 binders binder 5 40 7 binder otherprop Objects objects 39 +179 desk lamp desk lamp 5 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +1221 quadcopter quadcopter 5 40 7 otherprop Objects objects 39 +1222 pitcher pitcher 5 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 objects 39 +1223 hanging hanging 5 40 7 otherprop Objects misc 40 +1224 mail mail 5 40 7 otherprop Objects misc 40 +1225 closet ceiling closet ceiling 5 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 ceiling 17 +1226 hoverboard hoverboard 5 40 7 otherprop Objects objects 39 +1227 beanbag chair beanbag chair 5 39 6 bean bag otherfurniture Furniture n02816656 beanbag.n.01 chair 3 +571 water heater water heater 5 40 7 water heater otherprop Objects n04560113 water_heater.n.01 misc 40 +1228 spray bottle spray bottle 5 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +556 rope rope 5 40 7 rope otherprop Objects n04108268 rope.n.01 objects 39 +280 plastic container plastic container 5 40 7 container otherprop Objects objects 39 +1229 soap bottle soap bottle 5 40 7 soap otherprop Objects objects 39 +1230 ikea bag ikea bag 4 37 7 bag bag Objects 2773838 n02773838 bag.n.06 objects 39 +1231 sleeping bag sleeping bag 4 40 7 otherprop Objects n04235860 sleeping_bag.n.01 objects 39 +1232 duffel bag duffel bag 4 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +746 frying pan frying pan 4 40 7 frying pan otherprop Objects n03400231 frying_pan.n.01 objects 39 +1233 oven mitt oven mitt 4 40 7 otherprop Objects objects 39 +1234 pot pot 4 40 7 pot otherprop Objects n04235860 sleeping_bag.n.01 objects 39 +144 hand dryer hand dryer 4 40 7 otherprop Objects objects 39 +282 dollhouse dollhouse 4 39 6 doll house otherfurniture Furniture n03219483 dollhouse.n.01 objects 39 +167 shampoo bottle shampoo bottle 4 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1235 hair brush hair brush 4 40 7 otherprop Objects n02908217 brush.n.02 objects 39 +1236 tennis racket tennis racket 4 40 7 otherprop Objects n04409806 tennis_racket.n.01 objects 39 +1237 display case display case 4 40 7 case otherprop Objects objects 39 +234 ping pong table ping pong table 4 39 6 ping pong table otherfurniture Furniture table table table 4379243 n04379243 table.n.02 table 5 +563 boiler boiler 4 40 7 otherprop Objects misc 40 +1238 bag of coffee beans bag of coffee beans 4 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +1239 bananas banana 4 40 7 otherprop Objects n00021265 food.n.01 objects 39 +1240 carseat carseat 4 40 7 otherprop Objects misc 40 +366 helmet helmet 4 40 7 otherprop Objects helmet 3513137 n03513137 helmet.n.02 clothes 38 +816 umbrella umbrella 4 40 7 umbrella otherprop Objects n04507155 umbrella.n.01 objects 39 +1241 coffee box coffee box 4 40 7 otherprop Objects objects 39 +719 envelope envelope 4 40 7 envelope otherprop Objects n03291819 envelope.n.01 objects 39 +284 wet floor sign wet floor sign 4 40 7 sign otherprop Objects misc 40 +1242 clothing rack clothing rack 4 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +247 controller controller 4 40 7 otherprop Objects n03096960 control.n.09 objects 39 +1243 bath walls bathroom wall 4 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +1244 podium podium 4 39 6 otherfurniture Furniture n03159640 dais.n.01 furniture 36 +1245 storage box storage box 4 29 7 box box Objects n02883344 box.n.01 objects 39 +1246 dolly dolly 4 40 7 otherprop Objects misc 40 +1247 shampoo shampoo 3 40 7 otherprop Objects n04183516 shampoo.n.01 objects 39 +592 paper tray paper tray 3 40 7 paper tray otherprop Objects objects 39 +385 cabinet door cabinet door 3 8 12 door door Wall door door 4 +1248 changing station changing station 3 40 7 otherprop Objects misc 40 +1249 poster printer poster printer 3 40 7 printer otherprop Objects printer 4004475 n04004475 printer.n.03 appliances 37 +133 screen screen 3 40 7 otherprop Objects n03151077 curtain.n.01 curtain 12 +301 soap bar soap bar 3 38 7 bar otherstructure Objects objects 39 +1250 crutches crutches 3 40 7 otherprop Objects n03141823 crutch.n.01 objects 39 +379 studio light studio light 3 38 7 light otherstructure Objects lighting 28 +130 stack of cups cup 3 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +1251 toilet flush button toilet flush button 3 40 7 otherprop Objects objects 39 +450 trunk trunk 3 40 7 otherprop Objects misc 40 +1252 grocery bag grocery bag 3 37 7 bag bag Objects suitcase 2773838 n03461288 grocery_bag.n.01 objects 39 +316 plastic bin plastic bin 3 40 7 bin otherprop Objects objects 39 +1253 pizza box pizza box 3 29 7 box box Objects objects 39 +385 cabinet doors cabinet door 3 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 door 4 +1254 legs legs 3 31 7 person person Objects person n05217688 person.n.02 misc 40 +461 car car 3 40 7 car otherprop Objects car car 2958343 n02958343 car.n.01 misc 40 +1255 shaving cream shaving cream 3 40 7 otherprop Objects n04186051 shaving_cream.n.01 objects 39 +1256 luggage stand luggage stand 3 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +599 shredder shredder 3 40 7 otherprop Objects n04210120 shredder.n.01 objects 39 +281 statue statue 3 40 7 sculpture otherprop Objects n04306847 statue.n.01 misc 40 +1257 urinal urinal 3 33 7 toilet toilet Objects toilet toilet n04515991 urinal.n.01 toilet 18 +1258 hose hose 3 40 7 otherprop Objects n03539875 hose.n.03 misc 40 +1259 bike pump bike pump 3 40 7 otherprop Objects objects 39 +319 coatrack coatrack 3 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31 +1260 bear bear 3 40 7 otherprop Objects objects 39 +28 wall lamp lamp 3 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +1261 humidifier humidifier 3 40 7 otherprop Objects objects 39 +546 toothpaste toothpaste 3 40 7 toothpaste otherprop Objects objects 39 +1262 mouthwash bottle mouthwash bottle 3 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1263 poster cutter poster cutter 3 40 7 otherprop Objects objects 39 +1264 golf bag golf bag 3 37 7 bag bag Objects suitcase 2773838 n03445617 golf_bag.n.01 objects 39 +1265 food container food container 3 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1266 camera camera 3 40 7 otherprop Objects objects 39 +28 table lamp lamp 3 35 7 lamp lamp Objects lamp lamp 3636649 n04380533 table_lamp.n.01 lighting 28 +1267 yoga mat yoga mat 3 20 5 floor mat floor mat Floor n03727837 mat.n.01 floor 2 +1268 card card 3 40 7 otherprop Objects objects 39 +1269 mug mug 3 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +188 shower doors shower door 3 38 7 otherstructure Objects n04208936 shower.n.01 door 4 +689 cardboard cardboard 3 40 7 otherprop Objects objects 39 +1270 rack stand rack stand 3 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +1271 boxes of paper boxes of paper 3 29 7 box box Objects n02883344 box.n.01 objects 39 +1272 flag flag 3 40 7 otherprop Objects misc 40 +354 futon futon 3 39 6 mattress otherfurniture Furniture n03408444 futon.n.01 sofa 10 +339 magazine magazine 3 40 7 magazine otherprop Objects n06595351 magazine.n.01 objects 39 +1009 exit sign exit sign 3 40 7 exit sign otherprop Objects misc 40 +1273 rolled poster rolled poster 3 40 7 otherprop Objects objects 39 +1274 wheel wheel 3 40 7 otherprop Objects objects 39 +15 pictures picture 3 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +1275 blackboard eraser blackboard eraser 3 40 7 eraser otherprop Objects n03294833 eraser.n.01 objects 39 +361 organizer organizer 3 40 7 otherprop Objects n03918737 personal_digital_assistant.n.01 objects 39 +1276 doll doll 3 40 7 toy otherprop Objects n03219135 doll.n.01 objects 39 +326 book rack book rack 3 39 6 bookrack otherfurniture Furniture objects 39 +1277 laundry bag laundry bag 3 40 7 laundry basket otherprop Objects basket 2801938 n03050864 clothes_hamper.n.01 objects 39 +1278 sponge sponge 3 40 7 otherprop Objects n01906749 sponge.n.04 objects 39 +116 seating seat 3 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 furniture 36 +1184 folded chairs folded chair 2 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1279 lotion bottle lotion bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +212 can can 2 40 7 can otherprop Objects can 2946921 n02946921 can.n.01 objects 39 +1280 lunch box lunch box 2 40 7 otherprop Objects objects 39 +1281 food display food display 2 40 7 otherprop Objects misc 40 +794 storage shelf storage shelf 2 40 7 otherprop Objects shelving 31 +1282 sliding wood door sliding wood door 2 40 7 otherprop Objects door 4 +955 pants pants 2 40 7 otherprop Objects n04489008 trouser.n.01 clothes 38 +387 wood wood 2 40 7 otherprop Objects misc 40 +69 boards board 2 38 7 board otherstructure Objects board_panel 35 +65 bottles bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +523 washcloth washcloth 2 40 7 otherprop Objects n04554523 washcloth.n.01 towel 20 +389 workbench workbench 2 39 6 bench otherfurniture Furniture bench table 4379243 n04600486 workbench.n.01 table 5 +29 open kitchen cabinet kitchen cabinet 2 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 cabinet 7 +1283 organizer shelf organizer shelf 2 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +146 frame frame 2 38 7 otherstructure Objects misc 40 +130 cups cup 2 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +372 exercise ball exercise ball 2 40 7 ball otherprop Objects n04285146 sports_equipment.n.01 gym_equipment 33 +289 easel easel 2 39 6 stand otherfurniture Furniture n03262809 easel.n.01 furniture 36 +440 garbage bag garbage bag 2 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +321 roomba roomba 2 40 7 otherprop Objects objects 39 +976 garage door garage door 2 38 7 garage door otherstructure Objects door door 4 +1256 luggage rack luggage stand 2 39 6 stand otherfurniture Furniture n04038440 shelving 31 +1284 bike lock bike lock 2 40 7 otherprop Objects objects 39 +1285 briefcase briefcase 2 40 7 otherprop Objects n02900705 briefcase.n.01 objects 39 +357 hand towel hand towel 2 27 7 towel towel Objects n03490006 hand_towel.n.01 towel 20 +1286 bath products bath product 2 40 7 otherprop Objects objects 39 +1287 star star 2 40 7 otherprop Objects n09444783 star.n.03 misc 40 +365 map map 2 40 7 map otherprop Objects n03720163 map.n.01 misc 40 +1288 coffee bean bag coffee bean bag 2 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +81 headboard headboard 2 39 6 headboard otherfurniture Furniture n03502200 headboard.n.01 bed 11 +1289 ipad ipad 2 40 7 otherprop Objects objects 39 +1290 display rack display rack 2 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +948 traffic cone traffic cone 2 40 7 cone otherprop Objects cone objects 39 +174 toiletry toiletry 2 40 7 otherprop Objects n04447443 toiletry.n.01 objects 39 +1028 canopy canopy 2 40 7 otherprop Objects misc 40 +1291 massage chair massage chair 2 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1292 paper organizer paper organizer 2 40 7 otherprop Objects objects 39 +1005 barricade barricade 2 40 7 otherprop Objects misc 40 +235 platform platform 2 38 7 otherstructure Objects misc 40 +1293 cap cap 2 40 7 hat otherprop Objects n03497657 hat.n.01 clothes 38 +1294 dumbbell plates dumbbell plates 2 40 7 otherprop Objects objects 39 +1295 elevator elevator 2 38 7 otherstructure Objects misc 40 +1296 cooking pan cooking pan 2 40 7 pan otherprop Objects n03880531 pan.n.01 objects 39 +1297 trash bag trash bag 2 37 7 bag bag Objects objects 39 +1298 santa santa 2 40 7 otherprop Objects misc 40 +1299 jewelry box jewelry box 2 29 7 box box Objects n02883344 box.n.01 objects 39 +1300 boat boat 2 40 7 otherprop Objects misc 40 +1301 sock sock 2 21 7 clothes clothes Objects n04254777 sock.n.01 clothes 38 +1051 kinect kinect 2 40 7 kinect otherprop Objects objects 39 +566 crib crib 2 39 6 crib otherfurniture Furniture furniture 36 +1302 plastic storage bin plastic storage bin 2 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1062 cooler cooler 2 24 6 refridgerator refridgerator Furniture n03102654 cooler.n.01 appliances 37 +1303 kitchen apron kitchen apron 2 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +1304 dishwashing soap bottle dishwashing soap bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1305 xbox controller xbox controller 2 40 7 otherprop Objects objects 39 +1306 banana holder banana holder 2 40 7 otherprop Objects objects 39 +298 ping pong paddle ping pong paddle 2 40 7 otherprop Objects table 5 +1307 airplane airplane 2 40 7 otherprop Objects misc 40 +1308 conditioner bottle conditioner bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1309 tea kettle tea kettle 2 40 7 tea kettle otherprop Objects n04397768 teakettle.n.01 objects 39 +43 bedframe bedframe 2 39 6 otherfurniture Furniture n02822579 bedstead.n.01 bed 11 +1310 wood beam wood beam 2 38 7 otherstructure Objects beam 29 +593 toilet paper package toilet paper package 2 40 7 otherprop Objects objects 39 +1311 wall mounted coat rack wall mounted coat rack 2 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31 +1312 film light film light 2 40 7 otherprop Objects lighting 28 +749 ceiling lamp ceiling lamp 1 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +623 chain chain 1 40 7 otherprop Objects chair 3 +1313 sofa sofa 1 6 9 sofa sofa Sofa sofa sofa sofa 4256520 n04256520 sofa.n.01 sofa 10 +99 closet wardrobe wardrobe 1 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +265 sweater sweater 1 40 7 otherprop Objects n04370048 sweater.n.01 clothes 38 +1314 kitchen mixer kitchen mixer 1 40 7 otherprop Objects appliances 37 +99 wardrobe wardrobe 1 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +1315 water softener water softener 1 40 7 otherprop Objects misc 40 +448 banister banister 1 38 7 banister otherstructure Objects n02788148 bannister.n.02 railing 30 +257 trolley trolley 1 40 7 trolley otherprop Objects n04335435 streetcar.n.01 misc 40 +1316 pantry shelf pantry shelf 1 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +786 sofa bed sofa bed 1 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +801 loofa loofa 1 40 7 otherprop Objects objects 39 +972 shower faucet handle shower faucet handle 1 40 7 handle otherprop Objects shower 23 +1317 toy piano toy piano 1 40 7 toy otherprop Objects n03964744 plaything.n.01 objects 39 +1318 fish fish 1 40 7 otherprop Objects n02512053 fish.n.01 objects 39 +75 file cabinets file cabinet 1 3 6 cabinet cabinet Furniture cabinet 2933112 n03337140 file.n.03 cabinet 7 +657 cat litter box cat litter box 1 29 7 box box Objects objects 39 +561 electric panel electric panel 1 40 7 otherprop Objects misc 40 +93 suitcases suitcase 1 40 7 luggage otherprop Objects n02774630 baggage.n.01 objects 39 +513 curtain rod curtain rod 1 38 7 curtain rod otherstructure Objects curtain 12 +411 bunk bed bunk bed 1 39 6 bunk bed otherfurniture Furniture bed bed bed 2818832 n02920259 bunk_bed.n.01 bed 11 +1122 chandelier chandelier 1 38 7 chandelier otherstructure Objects n03005285 chandelier.n.01 lighting 28 +922 tape tape 1 40 7 tape otherprop Objects objects 39 +88 plates plate 1 40 7 otherprop Objects n03959485 plate.n.04 objects 39 +518 alarm alarm 1 40 7 alarm otherprop Objects clock 3046257 n02694662 alarm_clock.n.01 objects 39 +814 fire hose fire hose 1 40 7 otherprop Objects n03346004 fire_hose.n.01 misc 40 +1319 toy dinosaur toy dinosaur 1 40 7 toy otherprop Objects n03964744 plaything.n.01 objects 39 +1320 cone cone 1 40 7 otherprop Objects objects 39 +649 glass doors glass door 1 8 12 door door Wall door n03221720 door.n.01 door 4 +607 hatrack hatrack 1 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31 +819 subwoofer subwoofer 1 40 7 speaker otherprop Objects speaker 3691459 n04349401 subwoofer.n.01 objects 39 +1321 fire sprinkler fire sprinkler 1 40 7 otherprop Objects misc 40 +1322 trash cabinet trash cabinet 1 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +1204 pantry walls pantry wall 1 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +227 photo photo 1 40 7 photo otherprop Objects n03925226 photograph.n.01 picture 6 +817 barrier barrier 1 40 7 otherprop Objects n02796623 barrier.n.01 misc 40 +130 stacks of cups cup 1 40 7 otherprop Objects n03147509 cup.n.01 objects 39 +712 beachball beachball 1 40 7 ball otherprop Objects n02814224 beach_ball.n.01 objects 39 +1323 folded boxes folded boxes 1 40 7 otherprop Objects objects 39 +1324 contact lens solution bottle contact lens solution bottle 1 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +673 covered box covered box 1 29 7 box box Objects objects 39 +459 folder folder 1 40 7 folder otherprop Objects n03376279 folder.n.02 objects 39 +643 mail trays mail tray 1 40 7 mail tray otherprop Objects objects 39 +238 slipper slipper 1 40 7 otherprop Objects n04241394 slipper.n.01 clothes 38 +765 magazine rack magazine rack 1 39 6 stand otherfurniture Furniture n03704549 magazine_rack.n.01 shelving 31 +1008 sticker sticker 1 40 7 sticker otherprop Objects n07272545 gummed_label.n.01 objects 39 +225 lotion lotion 1 40 7 otherprop Objects n03690938 lotion.n.01 objects 39 +1083 buddha buddha 1 40 7 otherprop Objects objects 39 +813 file organizer file organizer 1 40 7 otherprop Objects objects 39 +138 paper towel rolls paper towel roll 1 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20 +1145 night lamp night lamp 1 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +796 fuse box fuse box 1 40 7 otherprop Objects misc 40 +1325 knife block knife block 1 40 7 otherprop Objects objects 39 +363 furnace furnace 1 39 6 furnace otherfurniture Furniture n03404449 furnace.n.01 +1174 cd cases cd case 1 40 7 otherprop Objects objects 39 +38 stools stool 1 40 7 stool otherprop Objects stool n04326896 stool.n.01 stool 19 +1326 hand sanitzer dispenser hand sanitzer dispenser 1 40 7 otherprop Objects n04254120 soap_dispenser.n.01 objects 39 +997 teapot teapot 1 40 7 tea pot otherprop Objects n04398044 teapot.n.01 objects 39 +1327 pen holder pen holder 1 40 7 otherprop Objects objects 39 +1328 tray rack tray rack 1 40 7 otherprop Objects objects 39 +1329 wig wig 1 40 7 otherprop Objects n04584207 wig.n.01 objects 39 +182 switch switch 1 40 7 otherprop Objects n04372370 switch.n.01 misc 40 +280 plastic containers plastic container 1 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1330 night light night light 1 40 7 otherprop Objects lighting 28 +1331 notepad notepad 1 40 7 otherprop Objects objects 39 +1332 mail bin mail bin 1 40 7 otherprop Objects misc 40 +1333 elevator button elevator button 1 40 7 otherprop Objects misc 40 +939 gaming wheel gaming wheel 1 40 7 otherprop Objects objects 39 +1334 drum set drum set 1 40 7 otherprop Objects objects 39 +480 cosmetic bag cosmetic bag 1 37 7 bag bag Objects objects 39 +907 coffee mug coffee mug 1 40 7 vessel otherprop Objects cup or mug 3797390 n03063599 coffee_mug.n.01 objects 39 +1335 closet shelf closet shelf 1 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +1336 baby mobile baby mobile 1 40 7 otherprop Objects objects 39 +829 diaper bin diaper bin 1 40 7 bin otherprop Objects objects 39 +947 door wall door wall 1 1 12 wall wall Wall wall 1 +1116 stepstool stepstool 1 40 7 step stool otherprop Objects objects 39 +599 paper shredder shredder 1 40 7 otherprop Objects n04210120 shredder.n.01 objects 39 +733 dress rack dress rack 1 40 7 otherprop Objects n03238762 dress_rack.n.01 misc 40 +123 cover cover 1 40 7 blanket otherprop Objects objects 39 +506 shopping bag shopping bag 1 37 7 bag bag Objects n04204081 shopping_bag.n.01 objects 39 +569 sliding door sliding door 1 8 12 door door Wall door n04239074 sliding_door.n.01 door 4 +1337 exercise bike exercise bike 1 40 7 machine otherprop Objects n04210120 shredder.n.01 gym_equipment 33 +1338 recliner chair recliner chair 1 5 4 chair chair Chair chair chair chair 3001627 n03238762 dress_rack.n.01 chair 3 +1314 kitchenaid mixer kitchen mixer 1 40 7 otherprop Objects appliances 37 +1339 soda can soda can 1 40 7 can otherprop Objects can 2946921 n02946921 can.n.01 objects 39 +1340 stovetop stovetop 1 38 7 stove otherstructure Objects stove 4330267 n04330267 stove.n.02 appliances 37 +851 stepladder stepladder 1 39 6 ladder otherfurniture Furniture stairs n04315599 step_ladder.n.01 stairs 16 +142 tap tap 1 40 7 faucet otherprop Objects faucet 3325088 n04559451 water_faucet.n.01 objects 39 +436 cable cable 1 40 7 cables otherprop Objects objects 39 +1341 baby changing station baby changing station 1 39 6 otherfurniture Furniture furniture 36 +1342 costume costume 1 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +885 rocking chair rocking chair 1 5 4 chair chair Chair chair chair chair 3001627 n04099969 rocking_chair.n.01 chair 3 +693 binder binder 1 40 7 binder otherprop Objects objects 39 +815 media center media center 1 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +401 towel rack towel rack 1 40 7 otherprop Objects n04459773 towel_rack.n.01 misc 40 +1343 medal medal 1 40 7 otherprop Objects objects 39 +1184 stack of folded chairs folded chair 1 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1344 telescope telescope 1 40 7 otherprop Objects n04403638 telescope.n.01 objects 39 +1345 closet doorframe closet doorframe 1 8 12 door door Wall door door 4 +160 glass glass 1 38 7 glass otherstructure Objects n03438257 glass.n.02 misc 40 +1126 baseball cap baseball cap 1 40 7 otherprop Objects cap 2954340 n02799323 baseball_cap.n.01 clothes 38 +1346 battery disposal jar battery disposal jar 1 40 7 jar otherprop Objects jar 3593526 n03593526 jar.n.01 objects 39 +332 mop mop 1 40 7 otherprop Objects n04367480 swab.n.02 objects 39 +397 tank tank 1 40 7 otherprop Objects objects 39 +643 mail tray mail tray 1 40 7 mail tray otherprop Objects objects 39 +551 centerpiece centerpiece 1 40 7 centerpiece otherprop Objects n02994419 centerpiece.n.02 objects 39 +1163 stick stick 1 40 7 stick otherprop Objects objects 39 +1347 closet floor closet floor 1 2 5 floor floor Floor n03365592 floor.n.01 floor 2 +1348 dryer sheets dryer sheets 1 40 7 otherprop Objects objects 39 +803 bycicle bycicle 1 40 7 otherprop Objects misc 40 +484 flower stand flower stand 1 39 6 stand otherfurniture Furniture furniture 36 +1349 air mattress air mattress 1 4 1 bed bed Bed bed bed bed 2818832 n02690809 air_mattress.n.01 bed 11 +1350 clip clip 1 40 7 otherprop Objects objects 39 +222 side table side table 1 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +1253 pizza boxes pizza box 1 29 7 box box Objects n02883344 box.n.01 objects 39 +1351 display display 1 39 7 otherfurniture Furniture n03211117 display.n.06 misc 40 +1352 postcard postcard 1 40 7 otherprop Objects objects 39 +828 display sign display sign 1 40 7 sign otherprop Objects misc 40 +1353 paper towel paper towel 1 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20 +612 boots boot 1 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +1354 tennis racket bag tennis racket bag 1 40 7 otherprop Objects objects 39 +1355 air hockey table air hockey table 1 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +1301 socks sock 1 21 7 clothes clothes Objects n04254777 sock.n.01 clothes 38 +1356 food bag food bag 1 37 7 bag bag Objects objects 39 +1199 clothes hangers clothes hanger 1 40 7 otherprop Objects n03057920 coat_hanger.n.01 misc 40 +1357 starbucks cup starbucks cup 1 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels.combined.tsv b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels.combined.tsv new file mode 100644 index 0000000000000000000000000000000000000000..cff61b132f3ebf4edd513445b76fd39db54462d2 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2-labels.combined.tsv @@ -0,0 +1,608 @@ +id raw_category category count nyu40id eigen13id nyuClass nyu40class eigen13class ModelNet40 ModelNet10 ShapeNetCore55 synsetoffset wnsynsetid wnsynsetkey mpcat40 mpcat40index +1 wall wall 8277 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +2 chair chair 4646 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +22 books book 1678 23 2 book books Books n02870526 book.n.11 objects 39 +3 floor floor 1553 2 5 floor floor Floor n03365592 floor.n.01 floor 2 +5 door door 1483 8 12 door door Wall door n03221720 door.n.01 door 4 +1163 object object 1313 40 7 otherprop Objects objects 39 +16 window window 1209 9 13 window window Window n04587648 window.n.01 window 9 +4 table table 1170 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +56 trash can trash can 1090 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +13 pillow pillow 937 18 7 pillow pillow Objects pillow 3938244 n03938244 pillow.n.01 cushion 8 +15 picture picture 862 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +41 ceiling ceiling 806 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 ceiling 17 +26 box box 775 29 7 box box Objects n02883344 box.n.01 objects 39 +161 doorframe doorframe 768 8 12 door door Wall door doorframe.n.01 door 4 +19 monitor monitor 765 40 7 monitor otherprop Objects monitor monitor tv or monitor 3211117 n03782190 monitor.n.04 objects 39 +7 cabinet cabinet 731 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +9 desk desk 680 14 10 desk desk Table desk desk table 4379243 n03179701 desk.n.01 table 5 +8 shelf shelf 641 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +10 office chair office chair 595 5 4 chair chair Chair chair chair chair 3001627 n04373704 swivel_chair.n.01 chair 3 +31 towel towel 570 27 7 towel towel Objects n04459362 towel.n.01 towel 20 +6 couch couch 502 6 9 sofa sofa Sofa sofa sofa sofa 4256520 n04256520 sofa.n.01 sofa 10 +14 sink sink 488 34 7 sink sink Objects sink n04223580 sink.n.01 sink 15 +48 backpack backpack 479 40 7 backpack otherprop Objects n02769748 backpack.n.01 objects 39 +28 lamp lamp 419 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +11 bed bed 370 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +18 bookshelf bookshelf 360 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +71 mirror mirror 349 19 7 mirror mirror Objects n03773035 mirror.n.01 mirror 21 +21 curtain curtain 347 16 13 curtain curtain Window curtain n03151077 curtain.n.01 curtain 12 +40 plant plant 331 40 7 plant otherprop Objects plant n00017222 plant.n.02 plant 14 +52 whiteboard whiteboard 327 30 7 whiteboard whiteboard Objects n03211616 display_panel.n.01 board_panel 35 +96 radiator radiator 322 39 6 radiator otherfurniture Furniture n04041069 radiator.n.02 misc 40 +22 book book 318 23 2 book books Books n02870526 book.n.11 objects 39 +29 kitchen cabinet kitchen cabinet 310 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 cabinet 7 +49 toilet paper toilet paper 291 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 objects 39 +29 kitchen cabinets kitchen cabinet 289 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +23 armchair armchair 281 5 4 chair chair Chair chair chair chair 3001627 n02738535 armchair.n.01 chair 3 +63 shoes shoe 272 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +24 coffee table coffee table 258 7 10 coffee table table Table table table table 4379243 n03063968 coffee_table.n.01 table 5 +17 toilet toilet 256 33 7 toilet toilet Objects toilet toilet n04446276 toilet.n.01 toilet 18 +47 bag bag 252 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +32 clothes clothes 248 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +46 keyboard keyboard 246 40 7 keyboard otherprop Objects keyboard computer keyboard 3085013 n03085013 computer_keyboard.n.01 objects 39 +65 bottle bottle 226 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +97 recycling bin recycling bin 225 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +34 nightstand nightstand 224 32 6 night stand night stand Furniture night_stand night_stand n03015254 chest_of_drawers.n.01 chest_of_drawers 13 +38 stool stool 221 40 7 stool otherprop Objects stool n04326896 stool.n.01 stool 19 +33 tv tv 219 25 11 television television TV tv or monitor 3211117 n03211117 display.n.06 tv_monitor 22 +75 file cabinet file cabinet 217 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +36 dresser dresser 213 17 6 dresser dresser Furniture dresser dresser n03015254 chest_of_drawers.n.01 chest_of_drawers 13 +64 computer tower computer tower 203 40 7 computer otherprop Objects n03082979 computer.n.01 objects 39 +32 clothing clothes 165 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +101 telephone telephone 164 40 7 telephone otherprop Objects telephone 4401088 n04401088 telephone.n.01 objects 39 +130 cup cup 157 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +27 refrigerator refrigerator 154 24 6 refridgerator refridgerator Furniture n04070727 refrigerator.n.01 appliances 37 +44 end table end table 147 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +131 jacket jacket 146 40 7 jacket otherprop Objects n03589791 jacket.n.01 clothes 38 +55 shower curtain shower curtain 144 28 7 shower curtain shower curtain Objects curtain n04209239 shower_curtain.n.01 curtain 12 +42 bathtub bathtub 144 36 7 bathtub bathtub Objects bathtub bathtub tub 2808440 n02808440 bathtub.n.01 bathtub 25 +59 microwave microwave 141 40 7 microwave otherprop Objects microwave 3761084 n03761084 microwave.n.02 appliances 37 +159 kitchen counter kitchen counter 140 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26 +74 sofa chair sofa chair 129 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +82 paper towel dispenser paper towel dispenser 129 40 7 paper towel dispenser otherprop Objects objects 39 +1164 bathroom vanity bathroom vanity 126 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 table 5 +93 suitcase suitcase 118 40 7 luggage otherprop Objects n02773838 bag.n.06 objects 39 +77 laptop laptop 111 40 7 laptop otherprop Objects laptop laptop 3642806 n03642806 laptop.n.01 objects 39 +67 ottoman ottoman 111 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 stool 19 +128 shower walls shower wall 109 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +50 printer printer 106 40 7 printer otherprop Objects printer 4004475 n04004475 printer.n.03 appliances 37 +35 counter counter 104 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26 +69 board board 100 38 7 board otherstructure Objects board_panel 35 +100 soap dispenser soap dispenser 99 40 7 otherprop Objects n04254120 soap_dispenser.n.01 objects 39 +62 stove stove 95 38 7 stove otherstructure Objects stove 4330267 n04330267 stove.n.02 appliances 37 +105 light light 93 38 7 light otherstructure Objects n03665366 light.n.02 lighting 28 +1165 closet wall closet wall 90 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +165 mini fridge mini fridge 87 24 6 refridgerator refridgerator Furniture n03273913 electric_refrigerator.n.01 appliances 37 +7 cabinets cabinet 79 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +5 doors door 76 8 12 door door Wall door n03221720 door.n.01 door 4 +76 fan fan 75 40 7 fan otherprop Objects n03320046 fan.n.01 misc 40 +230 tissue box tissue box 73 40 7 tissue box otherprop Objects n02883344 box.n.01 objects 39 +54 blanket blanket 72 40 7 blanket otherprop Objects n02849154 blanket.n.01 objects 39 +125 bathroom stall bathroom stall 71 38 7 otherstructure Objects n02873839 booth.n.02 misc 40 +72 copier copier 70 40 7 otherprop Objects n03257586 duplicator.n.01 appliances 37 +68 bench bench 66 39 6 bench otherfurniture Furniture bench bench 2828884 n02828884 bench.n.01 seating 34 +145 bar bar 66 38 7 bar otherstructure Objects n02788689 bar.n.03 misc 40 +157 soap dish soap dish 65 40 7 soap dish otherprop Objects n04254009 soap_dish.n.01 objects 39 +1166 laundry hamper laundry hamper 65 40 7 laundry basket otherprop Objects objects 39 +132 storage bin storage bin 63 40 7 storage bin otherprop Objects objects 39 +1167 bathroom stall door bathroom stall door 62 8 12 door door Wall door n03221720 door.n.01 door 4 +232 light switch light switch 61 38 7 light switch otherstructure Objects n04372370 switch.n.01 misc 40 +134 coffee maker coffee maker 61 40 7 otherprop Objects n03063338 coffee_maker.n.01 appliances 37 +51 tv stand tv stand 61 39 6 tv stand otherfurniture Furniture tv_stand n03290653 entertainment_center.n.01 furniture 36 +250 decoration decoration 60 40 7 otherprop Objects n03169390 decoration.n.01 misc 40 +1168 ceiling light ceiling light 59 38 7 light otherstructure Objects n03665366 light.n.02 lighting 28 +342 range hood range hood 59 38 7 range hood otherstructure Objects range_hood n04053677 range_hood.n.01 misc 40 +89 blackboard blackboard 58 38 7 blackboard otherstructure Objects n02846511 blackboard.n.01 board_panel 35 +103 clock clock 58 40 7 clock otherprop Objects clock 3046257 n03046257 clock.n.01 objects 39 +99 wardrobe closet wardrobe 54 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +95 rail rail 53 38 7 railing otherstructure Objects n04047401 railing.n.01 railing 30 +154 bulletin board bulletin board 53 38 7 board otherstructure Objects n03211616 display_panel.n.01 board_panel 35 +140 mat mat 52 20 5 floor mat floor mat Floor n03727837 mat.n.01 floor 2 +1169 trash bin trash bin 52 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +193 ledge ledge 51 38 7 otherstructure Objects n09337253 ledge.n.01 misc 40 +116 seat seat 49 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 furniture 36 +202 mouse mouse 49 40 7 mouse otherprop Objects n03793489 mouse.n.04 objects 39 +73 basket basket 48 40 7 basket otherprop Objects basket 2801938 n02801938 basket.n.01 objects 39 +78 shower shower 48 38 7 otherstructure Objects n04208936 shower.n.01 shower 23 +1170 dumbbell dumbbell 48 40 7 otherprop Objects n03255030 dumbbell.n.01 objects 39 +79 paper paper 46 26 7 paper paper Objects n14974264 paper.n.01 objects 39 +80 person person 46 31 7 person person Objects person n05217688 person.n.02 misc 40 +141 windowsill windowsill 45 38 7 otherstructure Objects n04590263 windowsill.n.01 window 9 +57 closet closet 45 39 6 wardrobe otherfurniture Furniture wardrobe misc 40 +102 bucket bucket 45 40 7 bucket otherprop Objects n02909870 bucket.n.01 misc 40 +261 sign sign 44 40 7 sign otherprop Objects n04217882 signboard.n.01 objects 39 +118 speaker speaker 43 40 7 speaker otherprop Objects speaker 3691459 n03691459 loudspeaker.n.01 objects 39 +136 dishwasher dishwasher 43 38 7 dishwasher otherstructure Objects dishwasher 3207941 n03207941 dishwasher.n.01 appliances 37 +98 container container 43 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1171 stair rail stair rail 42 38 7 banister otherstructure Objects n02788148 bannister.n.02 railing 30 +170 shower curtain rod shower curtain rod 42 40 7 otherprop Objects curtain 12 +1172 tube tube 41 40 7 otherprop Objects misc 40 +1173 bathroom cabinet bathroom cabinet 39 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +79 papers paper 39 26 7 paper paper Objects n14974264 paper.n.01 objects 39 +221 storage container storage container 39 40 7 container otherprop Objects objects 39 +570 paper bag paper bag 39 37 7 bag bag Objects n04122825 sack.n.01 objects 39 +138 paper towel roll paper towel roll 39 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20 +168 ball ball 39 40 7 ball otherprop Objects objects 39 +276 closet doors closet door 38 8 12 door door Wall door n03221720 door.n.01 door 4 +106 laundry basket laundry basket 37 40 7 laundry basket otherprop Objects basket 2801938 n03050864 clothes_hamper.n.01 objects 39 +214 cart cart 37 40 7 cart otherprop Objects n03484083 handcart.n.01 shelving 31 +276 closet door closet door 35 8 12 door door Wall door n03221720 door.n.01 door 4 +323 dish rack dish rack 35 40 7 dish rack otherprop Objects n03207630 dish_rack.n.01 objects 39 +58 stairs stairs 35 38 7 stairs otherstructure Objects n04298308 stairway.n.01 stairs 16 +86 blinds blinds 35 13 13 blinds blinds Window n02851099 blind.n.03 blinds 32 +2 stack of chairs chair 35 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +399 purse purse 34 40 7 purse otherprop Objects n02774152 bag.n.04 objects 39 +121 bicycle bicycle 33 40 7 bicycle otherprop Objects bicycle 2834778 n02834778 bicycle.n.01 objects 39 +185 tray tray 32 40 7 tray otherprop Objects n04476259 tray.n.01 objects 39 +300 plunger plunger 30 40 7 otherprop Objects n03970156 plunger.n.03 objects 39 +180 paper cutter paper cutter 30 40 7 paper cutter otherprop Objects n03886940 paper_cutter.n.01 objects 39 +163 toilet paper dispenser toilet paper dispenser 29 40 7 otherprop Objects objects 39 +26 boxes box 29 29 7 box box Objects n02883344 box.n.01 objects 39 +66 bin bin 28 40 7 bin otherprop Objects n02839910 bin.n.01 objects 39 +208 toilet seat cover dispenser toilet seat cover dispenser 28 40 7 otherprop Objects objects 39 +112 guitar guitar 28 40 7 guitar otherprop Objects guitar guitar 3467517 n03467517 guitar.n.01 objects 39 +540 mailboxes mailbox 28 29 7 box box Objects mailbox 3710193 n03710193 mailbox.n.01 misc 40 +395 handicap bar handicap bar 27 38 7 bar otherstructure Objects misc 40 +166 fire extinguisher fire extinguisher 27 40 7 fire extinguisher otherprop Objects n03345837 fire_extinguisher.n.01 misc 40 +122 ladder ladder 27 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 stairs 16 +120 column column 26 38 7 column otherstructure Objects n03074380 column.n.06 column 24 +107 pipe pipe 25 40 7 pipe otherprop Objects n03944672 pipe.n.02 misc 40 +283 vacuum cleaner vacuum cleaner 25 40 7 otherprop Objects n04517823 vacuum.n.04 objects 39 +88 plate plate 24 40 7 plate otherprop Objects n03959485 plate.n.04 objects 39 +90 piano piano 24 39 6 piano otherfurniture Furniture piano piano 3928116 n03928116 piano.n.01 furniture 36 +177 water cooler water cooler 24 39 6 water cooler otherfurniture Furniture n04559166 water_cooler.n.01 misc 40 +1174 cd case cd case 24 40 7 otherprop Objects objects 39 +562 bowl bowl 24 40 7 bowl otherprop Objects bowl bowl 2880940 n02880940 bowl.n.03 objects 39 +1175 closet rod closet rod 24 40 7 otherprop Objects n04100174 rod.n.01 misc 40 +1156 bathroom counter bathroom counter 24 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26 +84 oven oven 23 38 7 oven otherstructure Objects n03862676 oven.n.01 appliances 37 +104 stand stand 23 39 6 stand otherfurniture Furniture table table table 4379243 n04301000 stand.n.04 table 5 +229 scale scale 23 40 7 scale otherprop Objects n04141975 scale.n.07 objects 39 +70 washing machine washing machine 23 39 6 washing machine otherfurniture Furniture washing_machine 4554684 n04554684 washer.n.03 appliances 37 +325 broom broom 22 40 7 broom otherprop Objects n02906734 broom.n.01 objects 39 +169 hat hat 22 40 7 hat otherprop Objects n03497657 hat.n.01 clothes 38 +128 shower wall shower wall 22 1 12 wall wall Wall n04208936 shower.n.01 wall 1 +331 guitar case guitar case 21 40 7 guitar case otherprop Objects objects 39 +87 rack rack 21 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +488 water pitcher water pitcher 21 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 objects 39 +776 laundry detergent laundry detergent 21 40 7 otherprop Objects objects 39 +370 hair dryer hair dryer 21 40 7 hair dryer otherprop Objects n03483316 hand_blower.n.01 objects 39 +191 pillar pillar 21 38 7 column otherstructure Objects n03073977 column.n.07 column 24 +748 divider divider 20 40 7 otherprop Objects wall 1 +242 power outlet power outlet 19 40 7 otherprop Objects misc 40 +45 dining table dining table 19 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +417 shower floor shower floor 19 2 5 floor floor Floor n04208936 shower.n.01 floor 2 +70 washing machines washing machine 19 39 6 washing machine otherfurniture Furniture washing_machine 4554684 n04554684 washer.n.03 appliances 37 +188 shower door shower door 19 8 12 door door Wall door n04208936 shower.n.01 door 4 +1176 coffee kettle coffee kettle 18 40 7 pot otherprop Objects n03612814 kettle.n.01 objects 39 +1177 wardrobe cabinet wardrobe 18 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +1178 structure structure 18 38 7 otherstructure Objects misc 40 +18 bookshelves bookshelf 17 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +110 clothes dryer clothes dryer 17 39 6 otherfurniture Furniture n03251766 dryer.n.01 appliances 37 +148 toaster toaster 17 40 7 toaster otherprop Objects n04442312 toaster.n.02 appliances 37 +63 shoe shoe 17 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +155 ironing board ironing board 16 39 6 ironing board otherfurniture Furniture n03586090 ironing_board.n.01 objects 39 +572 alarm clock alarm clock 16 40 7 alarm clock otherprop Objects clock 3046257 n02694662 alarm_clock.n.01 objects 39 +1179 shower head shower head 15 38 7 otherstructure Objects shower 23 +28 lamp base lamp 15 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +392 water bottle water bottle 15 40 7 bottle otherprop Objects bottle bottle 2876657 n04557648 water_bottle.n.01 objects 39 +1180 keyboard piano keyboard piano 15 39 6 piano otherfurniture Furniture piano piano 3928116 n03928116 piano.n.01 furniture 36 +609 projector screen projector screen 15 38 7 projector screen otherstructure Objects misc 40 +1181 case of water bottles case of water bottles 15 40 7 otherprop Objects objects 39 +195 toaster oven toaster oven 14 40 7 toaster oven otherprop Objects n04442441 toaster_oven.n.01 appliances 37 +581 music stand music stand 14 39 6 music stand otherfurniture Furniture n03801760 music_stand.n.01 furniture 36 +58 staircase stairs 14 38 7 stairs otherstructure Objects n04298308 stairway.n.01 stairs 16 +1182 coat rack coat rack 14 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 3 +1183 storage organizer storage organizer 14 40 7 otherprop Objects shelving 3 +139 machine machine 14 40 7 machine otherprop Objects n03699975 machine.n.01 appliances 37 +1184 folded chair folded chair 14 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1185 fire alarm fire alarm 14 40 7 otherprop Objects n03343737 fire_alarm.n.02 misc 40 +156 fireplace fireplace 13 38 7 fireplace otherstructure Objects n03346455 fireplace.n.01 fireplace 27 +408 vent vent 13 40 7 otherprop Objects n04526241 vent.n.01 misc 40 +213 furniture furniture 13 39 6 furniture otherfurniture Furniture n03405725 furniture.n.01 furniture 36 +1186 power strip power strip 13 40 7 otherprop Objects objects 39 +1187 calendar calendar 13 40 7 otherprop Objects objects 39 +1188 poster poster 13 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +115 toilet paper holder toilet paper holder 13 40 7 toilet paper holder otherprop Objects objects 39 +1189 potted plant potted plant 12 40 7 plant otherprop Objects plant n00017222 plant.n.02 plant 14 +304 stuffed animal stuffed animal 12 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 objects 39 +1190 luggage luggage 12 40 7 luggage otherprop Objects n02774630 baggage.n.01 objects 39 +21 curtains curtain 12 16 13 curtain curtain Window curtain n03151077 curtain.n.01 curtain 12 +312 headphones headphones 12 40 7 otherprop Objects n03261776 earphone.n.01 objects 39 +233 crate crate 12 39 6 crate otherfurniture Furniture n03127925 crate.n.01 objects 39 +286 candle candle 12 40 7 candle otherprop Objects lamp n02948072 candle.n.01 objects 39 +264 projector projector 12 40 7 projector otherprop Objects n04009552 projector.n.02 objects 39 +110 clothes dryers clothes dryer 12 39 6 otherfurniture Furniture n03251766 dryer.n.01 appliances 37 +1191 mattress mattress 12 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +356 dustpan dustpan 12 40 7 otherprop Objects n03259009 dustpan.n.02 objects 39 +25 drawer drawer 11 39 6 drawer otherfurniture Furniture n03233905 drawer.n.01 furniture 36 +750 rod rod 11 40 7 otherprop Objects pistol 3948459 n03427202 gat.n.01 misc 40 +269 globe globe 11 40 7 globe otherprop Objects objects 39 +307 footrest footrest 11 39 6 foot rest otherfurniture Furniture stool n03380724 footstool.n.01 stool 19 +410 piano bench piano bench 11 39 6 piano bench otherfurniture Furniture bench bench 2828884 n02828884 bench.n.01 seating 34 +730 breakfast bar breakfast bar 11 38 7 bar otherstructure Objects counter 26 +216 step stool step stool 11 40 7 step stool otherprop Objects stool n04315713 step_stool.n.01 stool 19 +1192 hand rail hand rail 11 38 7 railing otherstructure Objects railing 30 +119 vending machine vending machine 11 40 7 machine otherprop Objects n04525305 vending_machine.n.01 appliances 37 +682 ceiling fan ceiling fan 11 40 7 fan otherprop Objects n03320046 fan.n.01 misc 40 +434 swiffer swiffer 11 40 7 otherprop Objects objects 39 +126 foosball table foosball table 11 39 6 foosball table otherfurniture Furniture table table table 4379243 n04379243 table.n.02 table 5 +919 jar jar 11 40 7 jar otherprop Objects jar 3593526 n03593526 jar.n.01 objects 39 +85 footstool footstool 11 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 stool 19 +1193 folded table folded table 10 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +108 round table round table 10 7 10 table table Table table table table 4379243 n04114554 round_table.n.02 table 5 +135 hamper hamper 10 40 7 basket otherprop Objects basket 2801938 n03482405 hamper.n.02 objects 39 +1194 poster tube poster tube 10 40 7 otherprop Objects objects 39 +432 case case 10 40 7 case otherprop Objects objects 39 +53 carpet carpet 10 40 7 rug otherprop Objects n04118021 rug.n.01 floor 2 +1195 thermostat thermostat 10 40 7 otherprop Objects n04422875 thermostat.n.01 misc 40 +111 coat coat 10 40 7 jacket otherprop Objects n03057021 coat.n.01 clothes 38 +305 water fountain water fountain 10 38 7 water fountain otherstructure Objects n03241335 drinking_fountain.n.01 misc 40 +1125 smoke detector smoke detector 10 40 7 otherprop Objects misc 40 +13 pillows pillow 9 18 7 pillow pillow Objects pillow 3938244 n03938244 pillow.n.01 cushion 8 +1196 flip flops flip flops 9 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +1197 cloth cloth 9 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +1198 banner banner 9 40 7 otherprop Objects n02788021 banner.n.01 misc 40 +1199 clothes hanger clothes hanger 9 40 7 otherprop Objects n03057920 coat_hanger.n.01 objects 39 +1200 whiteboard eraser whiteboard eraser 9 40 7 otherprop Objects objects 39 +378 iron iron 9 40 7 otherprop Objects n03584829 iron.n.04 objects 39 +591 instrument case instrument case 9 40 7 case otherprop Objects objects 39 +49 toilet paper rolls toilet paper 9 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 objects 39 +92 soap soap 9 40 7 soap otherprop Objects n04253437 soap.n.01 objects 39 +1098 block block 9 40 7 otherprop Objects misc 40 +291 wall hanging wall hanging 8 40 7 otherprop Objects n03491178 hanging.n.01 picture 6 +1063 kitchen island kitchen island 8 38 7 kitchen island otherstructure Objects n03620600 kitchen_island.n.01 counter 26 +107 pipes pipe 8 38 7 otherstructure Objects misc 40 +1135 toothbrush toothbrush 8 40 7 toothbrush otherprop Objects n04453156 toothbrush.n.01 objects 39 +189 shirt shirt 8 40 7 otherprop Objects n04197391 shirt.n.01 clothes 38 +245 cutting board cutting board 8 40 7 cutting board otherprop Objects n03025513 chopping_board.n.01 objects 39 +194 vase vase 8 40 7 vase otherprop Objects vase jar 3593526 n04522168 vase.n.01 objects 39 +1201 shower control valve shower control valve 8 38 7 otherstructure Objects n04208936 shower.n.01 shower 23 +386 exercise machine exercise machine 8 40 7 machine otherprop Objects gym_equipment 33 +1202 compost bin compost bin 8 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39 +857 shorts shorts 8 40 7 shorts otherprop Objects clothes 38 +452 tire tire 8 40 7 otherprop Objects n04440749 tire.n.01 objects 39 +1203 teddy bear teddy bear 7 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 objects 39 +346 bathrobe bathrobe 7 40 7 otherprop Objects n02807616 bathrobe.n.01 clothes 38 +152 handrail handrail 7 38 7 railing otherstructure Objects n02788148 bannister.n.02 railing 30 +83 faucet faucet 7 40 7 faucet otherprop Objects faucet 3325088 n03325088 faucet.n.01 misc 40 +1204 pantry wall pantry wall 7 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +726 thermos thermos 7 40 7 flask otherprop Objects bottle bottle 2876657 n04422727 thermos.n.01 objects 39 +61 rug rug 7 40 7 rug otherprop Objects n04118021 rug.n.01 floor 2 +39 couch cushions cushion 7 18 7 pillow pillow Objects n03151500 cushion.n.03 cushion 8 +1117 tripod tripod 7 39 6 stand otherfurniture Furniture n04485082 tripod.n.01 objects 39 +540 mailbox mailbox 7 29 7 box box Objects mailbox 3710193 n03710193 mailbox.n.01 misc 40 +1205 tupperware tupperware 7 40 7 otherprop Objects objects 39 +415 shoe rack shoe rack 7 40 7 shoe rack otherprop Objects shelving 31 +31 towels towel 6 27 7 towel towel Objects n04459362 towel.n.01 towel 20 +1206 beer bottles beer bottle 6 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +153 treadmill treadmill 6 39 6 treadmill otherfurniture Furniture n04477387 treadmill.n.01 gym_equipment 33 +1207 salt salt 6 40 7 otherprop Objects objects 39 +129 chest chest 6 39 6 chest otherfurniture Furniture dresser dresser chest_of_drawers 13 +220 dispenser dispenser 6 40 7 otherprop Objects n03210683 dispenser.n.01 objects 39 +1208 mirror doors mirror door 6 8 12 door door Wall door n03221720 door.n.01 door 4 +231 remote remote 6 40 7 otherprop Objects remote_control 4074963 n04074963 remote_control.n.01 objects 39 +1209 folded ladder folded ladder 6 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 misc 40 +39 cushion cushion 6 18 7 pillow pillow Objects n03151500 cushion.n.03 cushion 8 +1210 carton carton 6 40 7 otherprop Objects objects 39 +117 step step 6 38 7 otherstructure Objects n04314914 step.n.04 misc 40 +822 drying rack drying rack 6 39 6 drying rack otherfurniture Furniture shelving 31 +238 slippers slipper 6 40 7 shoe otherprop Objects n04241394 slipper.n.01 clothes 38 +143 pool table pool table 6 39 6 pool table otherfurniture Furniture table table table 4379243 n03982430 pool_table.n.01 table 5 +1211 soda stream soda stream 6 40 7 otherprop Objects objects 39 +228 toilet brush toilet brush 6 40 7 toilet brush otherprop Objects objects 39 +494 loft bed loft bed 6 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +226 cooking pot cooking pot 6 40 7 pot otherprop Objects objects 39 +91 heater heater 6 39 6 heater otherfurniture Furniture n03508101 heater.n.01 misc 40 +1072 messenger bag messenger bag 6 37 7 bag bag Objects objects 39 +435 stapler stapler 6 40 7 stapler otherprop Objects n04303497 stapler.n.01 objects 39 +1165 closet walls closet wall 5 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +345 scanner scanner 5 40 7 otherprop Objects appliances 37 +893 elliptical machine elliptical machine 5 40 7 machine otherprop Objects gym_equipment 33 +621 kettle kettle 5 40 7 pot otherprop Objects n03612814 kettle.n.01 objects 39 +1212 metronome metronome 5 40 7 otherprop Objects n03757604 metronome.n.01 objects 39 +297 dumbell dumbell 5 40 7 otherprop Objects objects 39 +1213 music book music book 5 23 2 book books Books n02870526 book.n.11 objects 39 +1214 rice cooker rice cooker 5 40 7 otherprop Objects objects 39 +1215 dart board dart board 5 38 7 board otherstructure Objects n03162940 dartboard.n.01 objects 39 +529 sewing machine sewing machine 5 40 7 sewing machine otherprop Objects n04179913 sewing_machine.n.01 objects 39 +1216 grab bar grab bar 5 38 7 railing otherstructure Objects railing 30 +1217 flowerpot flowerpot 5 40 7 vase otherprop Objects vase jar 3593526 n04522168 vase.n.01 objects 39 +1218 painting painting 5 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +1219 railing railing 5 38 7 railing otherstructure Objects n04047401 railing.n.01 railing 30 +1220 stair stair 5 38 7 stairs otherstructure Objects stairs n04314914 step.n.04 stairs 16 +525 toolbox toolbox 5 39 6 chest otherfurniture Furniture n04452615 toolbox.n.01 objects 39 +204 nerf gun nerf gun 5 40 7 otherprop Objects objects 39 +693 binders binder 5 40 7 binder otherprop Objects objects 39 +179 desk lamp desk lamp 5 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +1221 quadcopter quadcopter 5 40 7 otherprop Objects objects 39 +1222 pitcher pitcher 5 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 objects 39 +1223 hanging hanging 5 40 7 otherprop Objects misc 40 +1224 mail mail 5 40 7 otherprop Objects misc 40 +1225 closet ceiling closet ceiling 5 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 ceiling 17 +1226 hoverboard hoverboard 5 40 7 otherprop Objects objects 39 +1227 beanbag chair beanbag chair 5 39 6 bean bag otherfurniture Furniture n02816656 beanbag.n.01 chair 3 +571 water heater water heater 5 40 7 water heater otherprop Objects n04560113 water_heater.n.01 misc 40 +1228 spray bottle spray bottle 5 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +556 rope rope 5 40 7 rope otherprop Objects n04108268 rope.n.01 objects 39 +280 plastic container plastic container 5 40 7 container otherprop Objects objects 39 +1229 soap bottle soap bottle 5 40 7 soap otherprop Objects objects 39 +1230 ikea bag ikea bag 4 37 7 bag bag Objects 2773838 n02773838 bag.n.06 objects 39 +1231 sleeping bag sleeping bag 4 40 7 otherprop Objects n04235860 sleeping_bag.n.01 objects 39 +1232 duffel bag duffel bag 4 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +746 frying pan frying pan 4 40 7 frying pan otherprop Objects n03400231 frying_pan.n.01 objects 39 +1233 oven mitt oven mitt 4 40 7 otherprop Objects objects 39 +1234 pot pot 4 40 7 pot otherprop Objects n04235860 sleeping_bag.n.01 objects 39 +144 hand dryer hand dryer 4 40 7 otherprop Objects objects 39 +282 dollhouse dollhouse 4 39 6 doll house otherfurniture Furniture n03219483 dollhouse.n.01 objects 39 +167 shampoo bottle shampoo bottle 4 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1235 hair brush hair brush 4 40 7 otherprop Objects n02908217 brush.n.02 objects 39 +1236 tennis racket tennis racket 4 40 7 otherprop Objects n04409806 tennis_racket.n.01 objects 39 +1237 display case display case 4 40 7 case otherprop Objects objects 39 +234 ping pong table ping pong table 4 39 6 ping pong table otherfurniture Furniture table table table 4379243 n04379243 table.n.02 table 5 +563 boiler boiler 4 40 7 otherprop Objects misc 40 +1238 bag of coffee beans bag of coffee beans 4 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +1239 bananas banana 4 40 7 otherprop Objects n00021265 food.n.01 objects 39 +1240 carseat carseat 4 40 7 otherprop Objects misc 40 +366 helmet helmet 4 40 7 otherprop Objects helmet 3513137 n03513137 helmet.n.02 clothes 38 +816 umbrella umbrella 4 40 7 umbrella otherprop Objects n04507155 umbrella.n.01 objects 39 +1241 coffee box coffee box 4 40 7 otherprop Objects objects 39 +719 envelope envelope 4 40 7 envelope otherprop Objects n03291819 envelope.n.01 objects 39 +284 wet floor sign wet floor sign 4 40 7 sign otherprop Objects misc 40 +1242 clothing rack clothing rack 4 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +247 controller controller 4 40 7 otherprop Objects n03096960 control.n.09 objects 39 +1243 bath walls bathroom wall 4 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +1244 podium podium 4 39 6 otherfurniture Furniture n03159640 dais.n.01 furniture 36 +1245 storage box storage box 4 29 7 box box Objects n02883344 box.n.01 objects 39 +1246 dolly dolly 4 40 7 otherprop Objects misc 40 +1247 shampoo shampoo 3 40 7 otherprop Objects n04183516 shampoo.n.01 objects 39 +592 paper tray paper tray 3 40 7 paper tray otherprop Objects objects 39 +385 cabinet door cabinet door 3 8 12 door door Wall door door 4 +1248 changing station changing station 3 40 7 otherprop Objects misc 40 +1249 poster printer poster printer 3 40 7 printer otherprop Objects printer 4004475 n04004475 printer.n.03 appliances 37 +133 screen screen 3 40 7 otherprop Objects n03151077 curtain.n.01 curtain 12 +301 soap bar soap bar 3 38 7 bar otherstructure Objects objects 39 +1250 crutches crutches 3 40 7 otherprop Objects n03141823 crutch.n.01 objects 39 +379 studio light studio light 3 38 7 light otherstructure Objects lighting 28 +130 stack of cups cup 3 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +1251 toilet flush button toilet flush button 3 40 7 otherprop Objects objects 39 +450 trunk trunk 3 40 7 otherprop Objects misc 40 +1252 grocery bag grocery bag 3 37 7 bag bag Objects suitcase 2773838 n03461288 grocery_bag.n.01 objects 39 +316 plastic bin plastic bin 3 40 7 bin otherprop Objects objects 39 +1253 pizza box pizza box 3 29 7 box box Objects objects 39 +385 cabinet doors cabinet door 3 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 door 4 +1254 legs legs 3 31 7 person person Objects person n05217688 person.n.02 misc 40 +461 car car 3 40 7 car otherprop Objects car car 2958343 n02958343 car.n.01 misc 40 +1255 shaving cream shaving cream 3 40 7 otherprop Objects n04186051 shaving_cream.n.01 objects 39 +1256 luggage stand luggage stand 3 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +599 shredder shredder 3 40 7 otherprop Objects n04210120 shredder.n.01 objects 39 +281 statue statue 3 40 7 sculpture otherprop Objects n04306847 statue.n.01 misc 40 +1257 urinal urinal 3 33 7 toilet toilet Objects toilet toilet n04515991 urinal.n.01 toilet 18 +1258 hose hose 3 40 7 otherprop Objects n03539875 hose.n.03 misc 40 +1259 bike pump bike pump 3 40 7 otherprop Objects objects 39 +319 coatrack coatrack 3 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31 +1260 bear bear 3 40 7 otherprop Objects objects 39 +28 wall lamp lamp 3 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +1261 humidifier humidifier 3 40 7 otherprop Objects objects 39 +546 toothpaste toothpaste 3 40 7 toothpaste otherprop Objects objects 39 +1262 mouthwash bottle mouthwash bottle 3 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1263 poster cutter poster cutter 3 40 7 otherprop Objects objects 39 +1264 golf bag golf bag 3 37 7 bag bag Objects suitcase 2773838 n03445617 golf_bag.n.01 objects 39 +1265 food container food container 3 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1266 camera camera 3 40 7 otherprop Objects objects 39 +28 table lamp lamp 3 35 7 lamp lamp Objects lamp lamp 3636649 n04380533 table_lamp.n.01 lighting 28 +1267 yoga mat yoga mat 3 20 5 floor mat floor mat Floor n03727837 mat.n.01 floor 2 +1268 card card 3 40 7 otherprop Objects objects 39 +1269 mug mug 3 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +188 shower doors shower door 3 38 7 otherstructure Objects n04208936 shower.n.01 door 4 +689 cardboard cardboard 3 40 7 otherprop Objects objects 39 +1270 rack stand rack stand 3 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +1271 boxes of paper boxes of paper 3 29 7 box box Objects n02883344 box.n.01 objects 39 +1272 flag flag 3 40 7 otherprop Objects misc 40 +354 futon futon 3 39 6 mattress otherfurniture Furniture n03408444 futon.n.01 sofa 10 +339 magazine magazine 3 40 7 magazine otherprop Objects n06595351 magazine.n.01 objects 39 +1009 exit sign exit sign 3 40 7 exit sign otherprop Objects misc 40 +1273 rolled poster rolled poster 3 40 7 otherprop Objects objects 39 +1274 wheel wheel 3 40 7 otherprop Objects objects 39 +15 pictures picture 3 11 8 picture picture Picture n03931044 picture.n.01 picture 6 +1275 blackboard eraser blackboard eraser 3 40 7 eraser otherprop Objects n03294833 eraser.n.01 objects 39 +361 organizer organizer 3 40 7 otherprop Objects n03918737 personal_digital_assistant.n.01 objects 39 +1276 doll doll 3 40 7 toy otherprop Objects n03219135 doll.n.01 objects 39 +326 book rack book rack 3 39 6 bookrack otherfurniture Furniture objects 39 +1277 laundry bag laundry bag 3 40 7 laundry basket otherprop Objects basket 2801938 n03050864 clothes_hamper.n.01 objects 39 +1278 sponge sponge 3 40 7 otherprop Objects n01906749 sponge.n.04 objects 39 +116 seating seat 3 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 furniture 36 +1184 folded chairs folded chair 2 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1279 lotion bottle lotion bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +212 can can 2 40 7 can otherprop Objects can 2946921 n02946921 can.n.01 objects 39 +1280 lunch box lunch box 2 40 7 otherprop Objects objects 39 +1281 food display food display 2 40 7 otherprop Objects misc 40 +794 storage shelf storage shelf 2 40 7 otherprop Objects shelving 31 +1282 sliding wood door sliding wood door 2 40 7 otherprop Objects door 4 +955 pants pants 2 40 7 otherprop Objects n04489008 trouser.n.01 clothes 38 +387 wood wood 2 40 7 otherprop Objects misc 40 +69 boards board 2 38 7 board otherstructure Objects board_panel 35 +65 bottles bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +523 washcloth washcloth 2 40 7 otherprop Objects n04554523 washcloth.n.01 towel 20 +389 workbench workbench 2 39 6 bench otherfurniture Furniture bench table 4379243 n04600486 workbench.n.01 table 5 +29 open kitchen cabinet kitchen cabinet 2 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 cabinet 7 +1283 organizer shelf organizer shelf 2 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +146 frame frame 2 38 7 otherstructure Objects misc 40 +130 cups cup 2 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 +372 exercise ball exercise ball 2 40 7 ball otherprop Objects n04285146 sports_equipment.n.01 gym_equipment 33 +289 easel easel 2 39 6 stand otherfurniture Furniture n03262809 easel.n.01 furniture 36 +440 garbage bag garbage bag 2 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +321 roomba roomba 2 40 7 otherprop Objects objects 39 +976 garage door garage door 2 38 7 garage door otherstructure Objects door door 4 +1256 luggage rack luggage stand 2 39 6 stand otherfurniture Furniture n04038440 shelving 31 +1284 bike lock bike lock 2 40 7 otherprop Objects objects 39 +1285 briefcase briefcase 2 40 7 otherprop Objects n02900705 briefcase.n.01 objects 39 +357 hand towel hand towel 2 27 7 towel towel Objects n03490006 hand_towel.n.01 towel 20 +1286 bath products bath product 2 40 7 otherprop Objects objects 39 +1287 star star 2 40 7 otherprop Objects n09444783 star.n.03 misc 40 +365 map map 2 40 7 map otherprop Objects n03720163 map.n.01 misc 40 +1288 coffee bean bag coffee bean bag 2 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39 +81 headboard headboard 2 39 6 headboard otherfurniture Furniture n03502200 headboard.n.01 bed 11 +1289 ipad ipad 2 40 7 otherprop Objects objects 39 +1290 display rack display rack 2 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31 +948 traffic cone traffic cone 2 40 7 cone otherprop Objects cone objects 39 +174 toiletry toiletry 2 40 7 otherprop Objects n04447443 toiletry.n.01 objects 39 +1028 canopy canopy 2 40 7 otherprop Objects misc 40 +1291 massage chair massage chair 2 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1292 paper organizer paper organizer 2 40 7 otherprop Objects objects 39 +1005 barricade barricade 2 40 7 otherprop Objects misc 40 +235 platform platform 2 38 7 otherstructure Objects misc 40 +1293 cap cap 2 40 7 hat otherprop Objects n03497657 hat.n.01 clothes 38 +1294 dumbbell plates dumbbell plates 2 40 7 otherprop Objects objects 39 +1295 elevator elevator 2 38 7 otherstructure Objects misc 40 +1296 cooking pan cooking pan 2 40 7 pan otherprop Objects n03880531 pan.n.01 objects 39 +1297 trash bag trash bag 2 37 7 bag bag Objects objects 39 +1298 santa santa 2 40 7 otherprop Objects misc 40 +1299 jewelry box jewelry box 2 29 7 box box Objects n02883344 box.n.01 objects 39 +1300 boat boat 2 40 7 otherprop Objects misc 40 +1301 sock sock 2 21 7 clothes clothes Objects n04254777 sock.n.01 clothes 38 +1051 kinect kinect 2 40 7 kinect otherprop Objects objects 39 +566 crib crib 2 39 6 crib otherfurniture Furniture furniture 36 +1302 plastic storage bin plastic storage bin 2 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1062 cooler cooler 2 24 6 refridgerator refridgerator Furniture n03102654 cooler.n.01 appliances 37 +1303 kitchen apron kitchen apron 2 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +1304 dishwashing soap bottle dishwashing soap bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1305 xbox controller xbox controller 2 40 7 otherprop Objects objects 39 +1306 banana holder banana holder 2 40 7 otherprop Objects objects 39 +298 ping pong paddle ping pong paddle 2 40 7 otherprop Objects table 5 +1307 airplane airplane 2 40 7 otherprop Objects misc 40 +1308 conditioner bottle conditioner bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +1309 tea kettle tea kettle 2 40 7 tea kettle otherprop Objects n04397768 teakettle.n.01 objects 39 +43 bedframe bedframe 2 39 6 otherfurniture Furniture n02822579 bedstead.n.01 bed 11 +1310 wood beam wood beam 2 38 7 otherstructure Objects beam 29 +593 toilet paper package toilet paper package 2 40 7 otherprop Objects objects 39 +1311 wall mounted coat rack wall mounted coat rack 2 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31 +1312 film light film light 2 40 7 otherprop Objects lighting 28 +749 ceiling lamp ceiling lamp 1 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +623 chain chain 1 40 7 otherprop Objects chair 3 +1313 sofa sofa 1 6 9 sofa sofa Sofa sofa sofa sofa 4256520 n04256520 sofa.n.01 sofa 10 +99 closet wardrobe wardrobe 1 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +265 sweater sweater 1 40 7 otherprop Objects n04370048 sweater.n.01 clothes 38 +1314 kitchen mixer kitchen mixer 1 40 7 otherprop Objects appliances 37 +99 wardrobe wardrobe 1 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36 +1315 water softener water softener 1 40 7 otherprop Objects misc 40 +448 banister banister 1 38 7 banister otherstructure Objects n02788148 bannister.n.02 railing 30 +257 trolley trolley 1 40 7 trolley otherprop Objects n04335435 streetcar.n.01 misc 40 +1316 pantry shelf pantry shelf 1 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +786 sofa bed sofa bed 1 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11 +801 loofa loofa 1 40 7 otherprop Objects objects 39 +972 shower faucet handle shower faucet handle 1 40 7 handle otherprop Objects shower 23 +1317 toy piano toy piano 1 40 7 toy otherprop Objects n03964744 plaything.n.01 objects 39 +1318 fish fish 1 40 7 otherprop Objects n02512053 fish.n.01 objects 39 +75 file cabinets file cabinet 1 3 6 cabinet cabinet Furniture cabinet 2933112 n03337140 file.n.03 cabinet 7 +657 cat litter box cat litter box 1 29 7 box box Objects objects 39 +561 electric panel electric panel 1 40 7 otherprop Objects misc 40 +93 suitcases suitcase 1 40 7 luggage otherprop Objects n02774630 baggage.n.01 objects 39 +513 curtain rod curtain rod 1 38 7 curtain rod otherstructure Objects curtain 12 +411 bunk bed bunk bed 1 39 6 bunk bed otherfurniture Furniture bed bed bed 2818832 n02920259 bunk_bed.n.01 bed 11 +1122 chandelier chandelier 1 38 7 chandelier otherstructure Objects n03005285 chandelier.n.01 lighting 28 +922 tape tape 1 40 7 tape otherprop Objects objects 39 +88 plates plate 1 40 7 otherprop Objects n03959485 plate.n.04 objects 39 +518 alarm alarm 1 40 7 alarm otherprop Objects clock 3046257 n02694662 alarm_clock.n.01 objects 39 +814 fire hose fire hose 1 40 7 otherprop Objects n03346004 fire_hose.n.01 misc 40 +1319 toy dinosaur toy dinosaur 1 40 7 toy otherprop Objects n03964744 plaything.n.01 objects 39 +1320 cone cone 1 40 7 otherprop Objects objects 39 +649 glass doors glass door 1 8 12 door door Wall door n03221720 door.n.01 door 4 +607 hatrack hatrack 1 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31 +819 subwoofer subwoofer 1 40 7 speaker otherprop Objects speaker 3691459 n04349401 subwoofer.n.01 objects 39 +1321 fire sprinkler fire sprinkler 1 40 7 otherprop Objects misc 40 +1322 trash cabinet trash cabinet 1 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +1204 pantry walls pantry wall 1 1 12 wall wall Wall n04546855 wall.n.01 wall 1 +227 photo photo 1 40 7 photo otherprop Objects n03925226 photograph.n.01 picture 6 +817 barrier barrier 1 40 7 otherprop Objects n02796623 barrier.n.01 misc 40 +130 stacks of cups cup 1 40 7 otherprop Objects n03147509 cup.n.01 objects 39 +712 beachball beachball 1 40 7 ball otherprop Objects n02814224 beach_ball.n.01 objects 39 +1323 folded boxes folded boxes 1 40 7 otherprop Objects objects 39 +1324 contact lens solution bottle contact lens solution bottle 1 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39 +673 covered box covered box 1 29 7 box box Objects objects 39 +459 folder folder 1 40 7 folder otherprop Objects n03376279 folder.n.02 objects 39 +643 mail trays mail tray 1 40 7 mail tray otherprop Objects objects 39 +238 slipper slipper 1 40 7 otherprop Objects n04241394 slipper.n.01 clothes 38 +765 magazine rack magazine rack 1 39 6 stand otherfurniture Furniture n03704549 magazine_rack.n.01 shelving 31 +1008 sticker sticker 1 40 7 sticker otherprop Objects n07272545 gummed_label.n.01 objects 39 +225 lotion lotion 1 40 7 otherprop Objects n03690938 lotion.n.01 objects 39 +1083 buddha buddha 1 40 7 otherprop Objects objects 39 +813 file organizer file organizer 1 40 7 otherprop Objects objects 39 +138 paper towel rolls paper towel roll 1 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20 +1145 night lamp night lamp 1 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28 +796 fuse box fuse box 1 40 7 otherprop Objects misc 40 +1325 knife block knife block 1 40 7 otherprop Objects objects 39 +363 furnace furnace 1 39 6 furnace otherfurniture Furniture n03404449 furnace.n.01 +1174 cd cases cd case 1 40 7 otherprop Objects objects 39 +38 stools stool 1 40 7 stool otherprop Objects stool n04326896 stool.n.01 stool 19 +1326 hand sanitzer dispenser hand sanitzer dispenser 1 40 7 otherprop Objects n04254120 soap_dispenser.n.01 objects 39 +997 teapot teapot 1 40 7 tea pot otherprop Objects n04398044 teapot.n.01 objects 39 +1327 pen holder pen holder 1 40 7 otherprop Objects objects 39 +1328 tray rack tray rack 1 40 7 otherprop Objects objects 39 +1329 wig wig 1 40 7 otherprop Objects n04584207 wig.n.01 objects 39 +182 switch switch 1 40 7 otherprop Objects n04372370 switch.n.01 misc 40 +280 plastic containers plastic container 1 40 7 container otherprop Objects n03094503 container.n.01 objects 39 +1330 night light night light 1 40 7 otherprop Objects lighting 28 +1331 notepad notepad 1 40 7 otherprop Objects objects 39 +1332 mail bin mail bin 1 40 7 otherprop Objects misc 40 +1333 elevator button elevator button 1 40 7 otherprop Objects misc 40 +939 gaming wheel gaming wheel 1 40 7 otherprop Objects objects 39 +1334 drum set drum set 1 40 7 otherprop Objects objects 39 +480 cosmetic bag cosmetic bag 1 37 7 bag bag Objects objects 39 +907 coffee mug coffee mug 1 40 7 vessel otherprop Objects cup or mug 3797390 n03063599 coffee_mug.n.01 objects 39 +1335 closet shelf closet shelf 1 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31 +1336 baby mobile baby mobile 1 40 7 otherprop Objects objects 39 +829 diaper bin diaper bin 1 40 7 bin otherprop Objects objects 39 +947 door wall door wall 1 1 12 wall wall Wall wall 1 +1116 stepstool stepstool 1 40 7 step stool otherprop Objects objects 39 +599 paper shredder shredder 1 40 7 otherprop Objects n04210120 shredder.n.01 objects 39 +733 dress rack dress rack 1 40 7 otherprop Objects n03238762 dress_rack.n.01 misc 40 +123 cover cover 1 40 7 blanket otherprop Objects objects 39 +506 shopping bag shopping bag 1 37 7 bag bag Objects n04204081 shopping_bag.n.01 objects 39 +569 sliding door sliding door 1 8 12 door door Wall door n04239074 sliding_door.n.01 door 4 +1337 exercise bike exercise bike 1 40 7 machine otherprop Objects n04210120 shredder.n.01 gym_equipment 33 +1338 recliner chair recliner chair 1 5 4 chair chair Chair chair chair chair 3001627 n03238762 dress_rack.n.01 chair 3 +1314 kitchenaid mixer kitchen mixer 1 40 7 otherprop Objects appliances 37 +1339 soda can soda can 1 40 7 can otherprop Objects can 2946921 n02946921 can.n.01 objects 39 +1340 stovetop stovetop 1 38 7 stove otherstructure Objects stove 4330267 n04330267 stove.n.02 appliances 37 +851 stepladder stepladder 1 39 6 ladder otherfurniture Furniture stairs n04315599 step_ladder.n.01 stairs 16 +142 tap tap 1 40 7 faucet otherprop Objects faucet 3325088 n04559451 water_faucet.n.01 objects 39 +436 cable cable 1 40 7 cables otherprop Objects objects 39 +1341 baby changing station baby changing station 1 39 6 otherfurniture Furniture furniture 36 +1342 costume costume 1 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38 +885 rocking chair rocking chair 1 5 4 chair chair Chair chair chair chair 3001627 n04099969 rocking_chair.n.01 chair 3 +693 binder binder 1 40 7 binder otherprop Objects objects 39 +815 media center media center 1 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7 +401 towel rack towel rack 1 40 7 otherprop Objects n04459773 towel_rack.n.01 misc 40 +1343 medal medal 1 40 7 otherprop Objects objects 39 +1184 stack of folded chairs folded chair 1 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3 +1344 telescope telescope 1 40 7 otherprop Objects n04403638 telescope.n.01 objects 39 +1345 closet doorframe closet doorframe 1 8 12 door door Wall door door 4 +160 glass glass 1 38 7 glass otherstructure Objects n03438257 glass.n.02 misc 40 +1126 baseball cap baseball cap 1 40 7 otherprop Objects cap 2954340 n02799323 baseball_cap.n.01 clothes 38 +1346 battery disposal jar battery disposal jar 1 40 7 jar otherprop Objects jar 3593526 n03593526 jar.n.01 objects 39 +332 mop mop 1 40 7 otherprop Objects n04367480 swab.n.02 objects 39 +397 tank tank 1 40 7 otherprop Objects objects 39 +643 mail tray mail tray 1 40 7 mail tray otherprop Objects objects 39 +551 centerpiece centerpiece 1 40 7 centerpiece otherprop Objects n02994419 centerpiece.n.02 objects 39 +1163 object stick 1 40 7 stick otherprop Objects objects 39 +1347 closet floor closet floor 1 2 5 floor floor Floor n03365592 floor.n.01 floor 2 +1348 dryer sheets dryer sheets 1 40 7 otherprop Objects objects 39 +803 bycicle bycicle 1 40 7 otherprop Objects misc 40 +484 flower stand flower stand 1 39 6 stand otherfurniture Furniture furniture 36 +1349 air mattress air mattress 1 4 1 bed bed Bed bed bed bed 2818832 n02690809 air_mattress.n.01 bed 11 +1350 clip clip 1 40 7 otherprop Objects objects 39 +222 side table side table 1 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +1253 pizza boxes pizza box 1 29 7 box box Objects n02883344 box.n.01 objects 39 +1351 display display 1 39 7 otherfurniture Furniture n03211117 display.n.06 misc 40 +1352 postcard postcard 1 40 7 otherprop Objects objects 39 +828 display sign display sign 1 40 7 sign otherprop Objects misc 40 +1353 paper towel paper towel 1 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20 +612 boots boot 1 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38 +1354 tennis racket bag tennis racket bag 1 40 7 otherprop Objects objects 39 +1355 air hockey table air hockey table 1 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5 +1301 socks sock 1 21 7 clothes clothes Objects n04254777 sock.n.01 clothes 38 +1356 food bag food bag 1 37 7 bag bag Objects objects 39 +1199 clothes hangers clothes hanger 1 40 7 otherprop Objects n03057920 coat_hanger.n.01 misc 40 +1357 starbucks cup starbucks cup 1 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39 \ No newline at end of file diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_test.txt b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_test.txt new file mode 100644 index 0000000000000000000000000000000000000000..79d15b0ee4afa889883562a722b837b78ee8ce4b --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_test.txt @@ -0,0 +1,100 @@ +scene0707_00 +scene0708_00 +scene0709_00 +scene0710_00 +scene0711_00 +scene0712_00 +scene0713_00 +scene0714_00 +scene0715_00 +scene0716_00 +scene0717_00 +scene0718_00 +scene0719_00 +scene0720_00 +scene0721_00 +scene0722_00 +scene0723_00 +scene0724_00 +scene0725_00 +scene0726_00 +scene0727_00 +scene0728_00 +scene0729_00 +scene0730_00 +scene0731_00 +scene0732_00 +scene0733_00 +scene0734_00 +scene0735_00 +scene0736_00 +scene0737_00 +scene0738_00 +scene0739_00 +scene0740_00 +scene0741_00 +scene0742_00 +scene0743_00 +scene0744_00 +scene0745_00 +scene0746_00 +scene0747_00 +scene0748_00 +scene0749_00 +scene0750_00 +scene0751_00 +scene0752_00 +scene0753_00 +scene0754_00 +scene0755_00 +scene0756_00 +scene0757_00 +scene0758_00 +scene0759_00 +scene0760_00 +scene0761_00 +scene0762_00 +scene0763_00 +scene0764_00 +scene0765_00 +scene0766_00 +scene0767_00 +scene0768_00 +scene0769_00 +scene0770_00 +scene0771_00 +scene0772_00 +scene0773_00 +scene0774_00 +scene0775_00 +scene0776_00 +scene0777_00 +scene0778_00 +scene0779_00 +scene0780_00 +scene0781_00 +scene0782_00 +scene0783_00 +scene0784_00 +scene0785_00 +scene0786_00 +scene0787_00 +scene0788_00 +scene0789_00 +scene0790_00 +scene0791_00 +scene0792_00 +scene0793_00 +scene0794_00 +scene0795_00 +scene0796_00 +scene0797_00 +scene0798_00 +scene0799_00 +scene0800_00 +scene0801_00 +scene0802_00 +scene0803_00 +scene0804_00 +scene0805_00 +scene0806_00 diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_train.txt b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_train.txt new file mode 100644 index 0000000000000000000000000000000000000000..ef625f120b812fea5ac507d3b7049fc7ebd2e7e4 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_train.txt @@ -0,0 +1,1201 @@ +scene0191_00 +scene0191_01 +scene0191_02 +scene0119_00 +scene0230_00 +scene0528_00 +scene0528_01 +scene0705_00 +scene0705_01 +scene0705_02 +scene0415_00 +scene0415_01 +scene0415_02 +scene0007_00 +scene0141_00 +scene0141_01 +scene0141_02 +scene0515_00 +scene0515_01 +scene0515_02 +scene0447_00 +scene0447_01 +scene0447_02 +scene0531_00 +scene0503_00 +scene0285_00 +scene0069_00 +scene0584_00 +scene0584_01 +scene0584_02 +scene0581_00 +scene0581_01 +scene0581_02 +scene0620_00 +scene0620_01 +scene0263_00 +scene0263_01 +scene0481_00 +scene0481_01 +scene0020_00 +scene0020_01 +scene0291_00 +scene0291_01 +scene0291_02 +scene0469_00 +scene0469_01 +scene0469_02 +scene0659_00 +scene0659_01 +scene0024_00 +scene0024_01 +scene0024_02 +scene0564_00 +scene0117_00 +scene0027_00 +scene0027_01 +scene0027_02 +scene0028_00 +scene0330_00 +scene0418_00 +scene0418_01 +scene0418_02 +scene0233_00 +scene0233_01 +scene0673_00 +scene0673_01 +scene0673_02 +scene0673_03 +scene0673_04 +scene0673_05 +scene0585_00 +scene0585_01 +scene0362_00 +scene0362_01 +scene0362_02 +scene0362_03 +scene0035_00 +scene0035_01 +scene0358_00 +scene0358_01 +scene0358_02 +scene0037_00 +scene0194_00 +scene0321_00 +scene0293_00 +scene0293_01 +scene0623_00 +scene0623_01 +scene0592_00 +scene0592_01 +scene0569_00 +scene0569_01 +scene0413_00 +scene0313_00 +scene0313_01 +scene0313_02 +scene0480_00 +scene0480_01 +scene0401_00 +scene0517_00 +scene0517_01 +scene0517_02 +scene0032_00 +scene0032_01 +scene0613_00 +scene0613_01 +scene0613_02 +scene0306_00 +scene0306_01 +scene0052_00 +scene0052_01 +scene0052_02 +scene0053_00 +scene0444_00 +scene0444_01 +scene0055_00 +scene0055_01 +scene0055_02 +scene0560_00 +scene0589_00 +scene0589_01 +scene0589_02 +scene0610_00 +scene0610_01 +scene0610_02 +scene0364_00 +scene0364_01 +scene0383_00 +scene0383_01 +scene0383_02 +scene0006_00 +scene0006_01 +scene0006_02 +scene0275_00 +scene0451_00 +scene0451_01 +scene0451_02 +scene0451_03 +scene0451_04 +scene0451_05 +scene0135_00 +scene0065_00 +scene0065_01 +scene0065_02 +scene0104_00 +scene0674_00 +scene0674_01 +scene0448_00 +scene0448_01 +scene0448_02 +scene0502_00 +scene0502_01 +scene0502_02 +scene0440_00 +scene0440_01 +scene0440_02 +scene0071_00 +scene0072_00 +scene0072_01 +scene0072_02 +scene0509_00 +scene0509_01 +scene0509_02 +scene0649_00 +scene0649_01 +scene0602_00 +scene0694_00 +scene0694_01 +scene0101_00 +scene0101_01 +scene0101_02 +scene0101_03 +scene0101_04 +scene0101_05 +scene0218_00 +scene0218_01 +scene0579_00 +scene0579_01 +scene0579_02 +scene0039_00 +scene0039_01 +scene0493_00 +scene0493_01 +scene0242_00 +scene0242_01 +scene0242_02 +scene0083_00 +scene0083_01 +scene0127_00 +scene0127_01 +scene0662_00 +scene0662_01 +scene0662_02 +scene0018_00 +scene0087_00 +scene0087_01 +scene0087_02 +scene0332_00 +scene0332_01 +scene0332_02 +scene0628_00 +scene0628_01 +scene0628_02 +scene0134_00 +scene0134_01 +scene0134_02 +scene0238_00 +scene0238_01 +scene0092_00 +scene0092_01 +scene0092_02 +scene0092_03 +scene0092_04 +scene0022_00 +scene0022_01 +scene0467_00 +scene0392_00 +scene0392_01 +scene0392_02 +scene0424_00 +scene0424_01 +scene0424_02 +scene0646_00 +scene0646_01 +scene0646_02 +scene0098_00 +scene0098_01 +scene0044_00 +scene0044_01 +scene0044_02 +scene0510_00 +scene0510_01 +scene0510_02 +scene0571_00 +scene0571_01 +scene0166_00 +scene0166_01 +scene0166_02 +scene0563_00 +scene0172_00 +scene0172_01 +scene0388_00 +scene0388_01 +scene0215_00 +scene0215_01 +scene0252_00 +scene0287_00 +scene0668_00 +scene0572_00 +scene0572_01 +scene0572_02 +scene0026_00 +scene0224_00 +scene0113_00 +scene0113_01 +scene0551_00 +scene0381_00 +scene0381_01 +scene0381_02 +scene0371_00 +scene0371_01 +scene0460_00 +scene0118_00 +scene0118_01 +scene0118_02 +scene0417_00 +scene0008_00 +scene0634_00 +scene0521_00 +scene0123_00 +scene0123_01 +scene0123_02 +scene0045_00 +scene0045_01 +scene0511_00 +scene0511_01 +scene0114_00 +scene0114_01 +scene0114_02 +scene0070_00 +scene0029_00 +scene0029_01 +scene0029_02 +scene0129_00 +scene0103_00 +scene0103_01 +scene0002_00 +scene0002_01 +scene0132_00 +scene0132_01 +scene0132_02 +scene0124_00 +scene0124_01 +scene0143_00 +scene0143_01 +scene0143_02 +scene0604_00 +scene0604_01 +scene0604_02 +scene0507_00 +scene0105_00 +scene0105_01 +scene0105_02 +scene0428_00 +scene0428_01 +scene0311_00 +scene0140_00 +scene0140_01 +scene0182_00 +scene0182_01 +scene0182_02 +scene0142_00 +scene0142_01 +scene0399_00 +scene0399_01 +scene0012_00 +scene0012_01 +scene0012_02 +scene0060_00 +scene0060_01 +scene0370_00 +scene0370_01 +scene0370_02 +scene0310_00 +scene0310_01 +scene0310_02 +scene0661_00 +scene0650_00 +scene0152_00 +scene0152_01 +scene0152_02 +scene0158_00 +scene0158_01 +scene0158_02 +scene0482_00 +scene0482_01 +scene0600_00 +scene0600_01 +scene0600_02 +scene0393_00 +scene0393_01 +scene0393_02 +scene0562_00 +scene0174_00 +scene0174_01 +scene0157_00 +scene0157_01 +scene0161_00 +scene0161_01 +scene0161_02 +scene0159_00 +scene0254_00 +scene0254_01 +scene0115_00 +scene0115_01 +scene0115_02 +scene0162_00 +scene0163_00 +scene0163_01 +scene0523_00 +scene0523_01 +scene0523_02 +scene0459_00 +scene0459_01 +scene0175_00 +scene0085_00 +scene0085_01 +scene0279_00 +scene0279_01 +scene0279_02 +scene0201_00 +scene0201_01 +scene0201_02 +scene0283_00 +scene0456_00 +scene0456_01 +scene0429_00 +scene0043_00 +scene0043_01 +scene0419_00 +scene0419_01 +scene0419_02 +scene0368_00 +scene0368_01 +scene0348_00 +scene0348_01 +scene0348_02 +scene0442_00 +scene0178_00 +scene0380_00 +scene0380_01 +scene0380_02 +scene0165_00 +scene0165_01 +scene0165_02 +scene0181_00 +scene0181_01 +scene0181_02 +scene0181_03 +scene0333_00 +scene0614_00 +scene0614_01 +scene0614_02 +scene0404_00 +scene0404_01 +scene0404_02 +scene0185_00 +scene0126_00 +scene0126_01 +scene0126_02 +scene0519_00 +scene0236_00 +scene0236_01 +scene0189_00 +scene0075_00 +scene0267_00 +scene0192_00 +scene0192_01 +scene0192_02 +scene0281_00 +scene0420_00 +scene0420_01 +scene0420_02 +scene0195_00 +scene0195_01 +scene0195_02 +scene0597_00 +scene0597_01 +scene0597_02 +scene0041_00 +scene0041_01 +scene0111_00 +scene0111_01 +scene0111_02 +scene0666_00 +scene0666_01 +scene0666_02 +scene0200_00 +scene0200_01 +scene0200_02 +scene0536_00 +scene0536_01 +scene0536_02 +scene0390_00 +scene0280_00 +scene0280_01 +scene0280_02 +scene0344_00 +scene0344_01 +scene0205_00 +scene0205_01 +scene0205_02 +scene0484_00 +scene0484_01 +scene0009_00 +scene0009_01 +scene0009_02 +scene0302_00 +scene0302_01 +scene0209_00 +scene0209_01 +scene0209_02 +scene0210_00 +scene0210_01 +scene0395_00 +scene0395_01 +scene0395_02 +scene0683_00 +scene0601_00 +scene0601_01 +scene0214_00 +scene0214_01 +scene0214_02 +scene0477_00 +scene0477_01 +scene0439_00 +scene0439_01 +scene0468_00 +scene0468_01 +scene0468_02 +scene0546_00 +scene0466_00 +scene0466_01 +scene0220_00 +scene0220_01 +scene0220_02 +scene0122_00 +scene0122_01 +scene0130_00 +scene0110_00 +scene0110_01 +scene0110_02 +scene0327_00 +scene0156_00 +scene0266_00 +scene0266_01 +scene0001_00 +scene0001_01 +scene0228_00 +scene0199_00 +scene0219_00 +scene0464_00 +scene0232_00 +scene0232_01 +scene0232_02 +scene0299_00 +scene0299_01 +scene0530_00 +scene0363_00 +scene0453_00 +scene0453_01 +scene0570_00 +scene0570_01 +scene0570_02 +scene0183_00 +scene0239_00 +scene0239_01 +scene0239_02 +scene0373_00 +scene0373_01 +scene0241_00 +scene0241_01 +scene0241_02 +scene0188_00 +scene0622_00 +scene0622_01 +scene0244_00 +scene0244_01 +scene0691_00 +scene0691_01 +scene0206_00 +scene0206_01 +scene0206_02 +scene0247_00 +scene0247_01 +scene0061_00 +scene0061_01 +scene0082_00 +scene0250_00 +scene0250_01 +scene0250_02 +scene0501_00 +scene0501_01 +scene0501_02 +scene0320_00 +scene0320_01 +scene0320_02 +scene0320_03 +scene0631_00 +scene0631_01 +scene0631_02 +scene0255_00 +scene0255_01 +scene0255_02 +scene0047_00 +scene0265_00 +scene0265_01 +scene0265_02 +scene0004_00 +scene0336_00 +scene0336_01 +scene0058_00 +scene0058_01 +scene0260_00 +scene0260_01 +scene0260_02 +scene0243_00 +scene0603_00 +scene0603_01 +scene0093_00 +scene0093_01 +scene0093_02 +scene0109_00 +scene0109_01 +scene0434_00 +scene0434_01 +scene0434_02 +scene0290_00 +scene0627_00 +scene0627_01 +scene0470_00 +scene0470_01 +scene0137_00 +scene0137_01 +scene0137_02 +scene0270_00 +scene0270_01 +scene0270_02 +scene0271_00 +scene0271_01 +scene0504_00 +scene0274_00 +scene0274_01 +scene0274_02 +scene0036_00 +scene0036_01 +scene0276_00 +scene0276_01 +scene0272_00 +scene0272_01 +scene0499_00 +scene0698_00 +scene0698_01 +scene0051_00 +scene0051_01 +scene0051_02 +scene0051_03 +scene0108_00 +scene0245_00 +scene0369_00 +scene0369_01 +scene0369_02 +scene0284_00 +scene0289_00 +scene0289_01 +scene0286_00 +scene0286_01 +scene0286_02 +scene0286_03 +scene0031_00 +scene0031_01 +scene0031_02 +scene0545_00 +scene0545_01 +scene0545_02 +scene0557_00 +scene0557_01 +scene0557_02 +scene0533_00 +scene0533_01 +scene0116_00 +scene0116_01 +scene0116_02 +scene0611_00 +scene0611_01 +scene0688_00 +scene0294_00 +scene0294_01 +scene0294_02 +scene0295_00 +scene0295_01 +scene0296_00 +scene0296_01 +scene0596_00 +scene0596_01 +scene0596_02 +scene0532_00 +scene0532_01 +scene0637_00 +scene0638_00 +scene0121_00 +scene0121_01 +scene0121_02 +scene0040_00 +scene0040_01 +scene0197_00 +scene0197_01 +scene0197_02 +scene0410_00 +scene0410_01 +scene0305_00 +scene0305_01 +scene0615_00 +scene0615_01 +scene0703_00 +scene0703_01 +scene0555_00 +scene0297_00 +scene0297_01 +scene0297_02 +scene0582_00 +scene0582_01 +scene0582_02 +scene0023_00 +scene0094_00 +scene0013_00 +scene0013_01 +scene0013_02 +scene0136_00 +scene0136_01 +scene0136_02 +scene0407_00 +scene0407_01 +scene0062_00 +scene0062_01 +scene0062_02 +scene0386_00 +scene0318_00 +scene0554_00 +scene0554_01 +scene0497_00 +scene0213_00 +scene0258_00 +scene0323_00 +scene0323_01 +scene0324_00 +scene0324_01 +scene0016_00 +scene0016_01 +scene0016_02 +scene0681_00 +scene0398_00 +scene0398_01 +scene0227_00 +scene0090_00 +scene0066_00 +scene0262_00 +scene0262_01 +scene0155_00 +scene0155_01 +scene0155_02 +scene0352_00 +scene0352_01 +scene0352_02 +scene0038_00 +scene0038_01 +scene0038_02 +scene0335_00 +scene0335_01 +scene0335_02 +scene0261_00 +scene0261_01 +scene0261_02 +scene0261_03 +scene0640_00 +scene0640_01 +scene0640_02 +scene0080_00 +scene0080_01 +scene0080_02 +scene0403_00 +scene0403_01 +scene0282_00 +scene0282_01 +scene0282_02 +scene0682_00 +scene0173_00 +scene0173_01 +scene0173_02 +scene0522_00 +scene0687_00 +scene0345_00 +scene0345_01 +scene0612_00 +scene0612_01 +scene0411_00 +scene0411_01 +scene0411_02 +scene0625_00 +scene0625_01 +scene0211_00 +scene0211_01 +scene0211_02 +scene0211_03 +scene0676_00 +scene0676_01 +scene0179_00 +scene0498_00 +scene0498_01 +scene0498_02 +scene0547_00 +scene0547_01 +scene0547_02 +scene0269_00 +scene0269_01 +scene0269_02 +scene0366_00 +scene0680_00 +scene0680_01 +scene0588_00 +scene0588_01 +scene0588_02 +scene0588_03 +scene0346_00 +scene0346_01 +scene0359_00 +scene0359_01 +scene0014_00 +scene0120_00 +scene0120_01 +scene0212_00 +scene0212_01 +scene0212_02 +scene0176_00 +scene0049_00 +scene0259_00 +scene0259_01 +scene0586_00 +scene0586_01 +scene0586_02 +scene0309_00 +scene0309_01 +scene0125_00 +scene0455_00 +scene0177_00 +scene0177_01 +scene0177_02 +scene0326_00 +scene0372_00 +scene0171_00 +scene0171_01 +scene0374_00 +scene0654_00 +scene0654_01 +scene0445_00 +scene0445_01 +scene0475_00 +scene0475_01 +scene0475_02 +scene0349_00 +scene0349_01 +scene0234_00 +scene0669_00 +scene0669_01 +scene0375_00 +scene0375_01 +scene0375_02 +scene0387_00 +scene0387_01 +scene0387_02 +scene0312_00 +scene0312_01 +scene0312_02 +scene0384_00 +scene0385_00 +scene0385_01 +scene0385_02 +scene0000_00 +scene0000_01 +scene0000_02 +scene0376_00 +scene0376_01 +scene0376_02 +scene0301_00 +scene0301_01 +scene0301_02 +scene0322_00 +scene0542_00 +scene0079_00 +scene0079_01 +scene0099_00 +scene0099_01 +scene0476_00 +scene0476_01 +scene0476_02 +scene0394_00 +scene0394_01 +scene0147_00 +scene0147_01 +scene0067_00 +scene0067_01 +scene0067_02 +scene0397_00 +scene0397_01 +scene0337_00 +scene0337_01 +scene0337_02 +scene0431_00 +scene0223_00 +scene0223_01 +scene0223_02 +scene0010_00 +scene0010_01 +scene0402_00 +scene0268_00 +scene0268_01 +scene0268_02 +scene0679_00 +scene0679_01 +scene0405_00 +scene0128_00 +scene0408_00 +scene0408_01 +scene0190_00 +scene0107_00 +scene0076_00 +scene0167_00 +scene0361_00 +scene0361_01 +scene0361_02 +scene0216_00 +scene0202_00 +scene0303_00 +scene0303_01 +scene0303_02 +scene0446_00 +scene0446_01 +scene0089_00 +scene0089_01 +scene0089_02 +scene0360_00 +scene0150_00 +scene0150_01 +scene0150_02 +scene0421_00 +scene0421_01 +scene0421_02 +scene0454_00 +scene0626_00 +scene0626_01 +scene0626_02 +scene0186_00 +scene0186_01 +scene0538_00 +scene0479_00 +scene0479_01 +scene0479_02 +scene0656_00 +scene0656_01 +scene0656_02 +scene0656_03 +scene0525_00 +scene0525_01 +scene0525_02 +scene0308_00 +scene0396_00 +scene0396_01 +scene0396_02 +scene0624_00 +scene0292_00 +scene0292_01 +scene0632_00 +scene0253_00 +scene0021_00 +scene0325_00 +scene0325_01 +scene0437_00 +scene0437_01 +scene0438_00 +scene0590_00 +scene0590_01 +scene0400_00 +scene0400_01 +scene0541_00 +scene0541_01 +scene0541_02 +scene0677_00 +scene0677_01 +scene0677_02 +scene0443_00 +scene0315_00 +scene0288_00 +scene0288_01 +scene0288_02 +scene0422_00 +scene0672_00 +scene0672_01 +scene0184_00 +scene0449_00 +scene0449_01 +scene0449_02 +scene0048_00 +scene0048_01 +scene0138_00 +scene0452_00 +scene0452_01 +scene0452_02 +scene0667_00 +scene0667_01 +scene0667_02 +scene0463_00 +scene0463_01 +scene0078_00 +scene0078_01 +scene0078_02 +scene0636_00 +scene0457_00 +scene0457_01 +scene0457_02 +scene0465_00 +scene0465_01 +scene0577_00 +scene0151_00 +scene0151_01 +scene0339_00 +scene0573_00 +scene0573_01 +scene0154_00 +scene0096_00 +scene0096_01 +scene0096_02 +scene0235_00 +scene0168_00 +scene0168_01 +scene0168_02 +scene0594_00 +scene0587_00 +scene0587_01 +scene0587_02 +scene0587_03 +scene0229_00 +scene0229_01 +scene0229_02 +scene0512_00 +scene0106_00 +scene0106_01 +scene0106_02 +scene0472_00 +scene0472_01 +scene0472_02 +scene0489_00 +scene0489_01 +scene0489_02 +scene0425_00 +scene0425_01 +scene0641_00 +scene0526_00 +scene0526_01 +scene0317_00 +scene0317_01 +scene0544_00 +scene0017_00 +scene0017_01 +scene0017_02 +scene0042_00 +scene0042_01 +scene0042_02 +scene0576_00 +scene0576_01 +scene0576_02 +scene0347_00 +scene0347_01 +scene0347_02 +scene0436_00 +scene0226_00 +scene0226_01 +scene0485_00 +scene0486_00 +scene0487_00 +scene0487_01 +scene0619_00 +scene0097_00 +scene0367_00 +scene0367_01 +scene0491_00 +scene0492_00 +scene0492_01 +scene0005_00 +scene0005_01 +scene0543_00 +scene0543_01 +scene0543_02 +scene0657_00 +scene0341_00 +scene0341_01 +scene0534_00 +scene0534_01 +scene0319_00 +scene0273_00 +scene0273_01 +scene0225_00 +scene0198_00 +scene0003_00 +scene0003_01 +scene0003_02 +scene0409_00 +scene0409_01 +scene0331_00 +scene0331_01 +scene0505_00 +scene0505_01 +scene0505_02 +scene0505_03 +scene0505_04 +scene0506_00 +scene0057_00 +scene0057_01 +scene0074_00 +scene0074_01 +scene0074_02 +scene0091_00 +scene0112_00 +scene0112_01 +scene0112_02 +scene0240_00 +scene0102_00 +scene0102_01 +scene0513_00 +scene0514_00 +scene0514_01 +scene0537_00 +scene0516_00 +scene0516_01 +scene0495_00 +scene0617_00 +scene0133_00 +scene0520_00 +scene0520_01 +scene0635_00 +scene0635_01 +scene0054_00 +scene0473_00 +scene0473_01 +scene0524_00 +scene0524_01 +scene0379_00 +scene0471_00 +scene0471_01 +scene0471_02 +scene0566_00 +scene0248_00 +scene0248_01 +scene0248_02 +scene0529_00 +scene0529_01 +scene0529_02 +scene0391_00 +scene0264_00 +scene0264_01 +scene0264_02 +scene0675_00 +scene0675_01 +scene0350_00 +scene0350_01 +scene0350_02 +scene0450_00 +scene0068_00 +scene0068_01 +scene0237_00 +scene0237_01 +scene0365_00 +scene0365_01 +scene0365_02 +scene0605_00 +scene0605_01 +scene0539_00 +scene0539_01 +scene0539_02 +scene0540_00 +scene0540_01 +scene0540_02 +scene0170_00 +scene0170_01 +scene0170_02 +scene0433_00 +scene0340_00 +scene0340_01 +scene0340_02 +scene0160_00 +scene0160_01 +scene0160_02 +scene0160_03 +scene0160_04 +scene0059_00 +scene0059_01 +scene0059_02 +scene0056_00 +scene0056_01 +scene0478_00 +scene0478_01 +scene0548_00 +scene0548_01 +scene0548_02 +scene0204_00 +scene0204_01 +scene0204_02 +scene0033_00 +scene0145_00 +scene0483_00 +scene0508_00 +scene0508_01 +scene0508_02 +scene0180_00 +scene0148_00 +scene0556_00 +scene0556_01 +scene0416_00 +scene0416_01 +scene0416_02 +scene0416_03 +scene0416_04 +scene0073_00 +scene0073_01 +scene0073_02 +scene0073_03 +scene0034_00 +scene0034_01 +scene0034_02 +scene0639_00 +scene0561_00 +scene0561_01 +scene0298_00 +scene0692_00 +scene0692_01 +scene0692_02 +scene0692_03 +scene0692_04 +scene0642_00 +scene0642_01 +scene0642_02 +scene0642_03 +scene0630_00 +scene0630_01 +scene0630_02 +scene0630_03 +scene0630_04 +scene0630_05 +scene0630_06 +scene0706_00 +scene0567_00 +scene0567_01 diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_val.txt b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_val.txt new file mode 100644 index 0000000000000000000000000000000000000000..b9e7d9205321e8ca047a527466f4b7100c9c9d2c --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/meta_data/scannetv2_val.txt @@ -0,0 +1,312 @@ +scene0568_00 +scene0568_01 +scene0568_02 +scene0304_00 +scene0488_00 +scene0488_01 +scene0412_00 +scene0412_01 +scene0217_00 +scene0019_00 +scene0019_01 +scene0414_00 +scene0575_00 +scene0575_01 +scene0575_02 +scene0426_00 +scene0426_01 +scene0426_02 +scene0426_03 +scene0549_00 +scene0549_01 +scene0578_00 +scene0578_01 +scene0578_02 +scene0665_00 +scene0665_01 +scene0050_00 +scene0050_01 +scene0050_02 +scene0257_00 +scene0025_00 +scene0025_01 +scene0025_02 +scene0583_00 +scene0583_01 +scene0583_02 +scene0701_00 +scene0701_01 +scene0701_02 +scene0580_00 +scene0580_01 +scene0565_00 +scene0169_00 +scene0169_01 +scene0655_00 +scene0655_01 +scene0655_02 +scene0063_00 +scene0221_00 +scene0221_01 +scene0591_00 +scene0591_01 +scene0591_02 +scene0678_00 +scene0678_01 +scene0678_02 +scene0462_00 +scene0427_00 +scene0595_00 +scene0193_00 +scene0193_01 +scene0164_00 +scene0164_01 +scene0164_02 +scene0164_03 +scene0598_00 +scene0598_01 +scene0598_02 +scene0599_00 +scene0599_01 +scene0599_02 +scene0328_00 +scene0300_00 +scene0300_01 +scene0354_00 +scene0458_00 +scene0458_01 +scene0423_00 +scene0423_01 +scene0423_02 +scene0307_00 +scene0307_01 +scene0307_02 +scene0606_00 +scene0606_01 +scene0606_02 +scene0432_00 +scene0432_01 +scene0608_00 +scene0608_01 +scene0608_02 +scene0651_00 +scene0651_01 +scene0651_02 +scene0430_00 +scene0430_01 +scene0689_00 +scene0357_00 +scene0357_01 +scene0574_00 +scene0574_01 +scene0574_02 +scene0329_00 +scene0329_01 +scene0329_02 +scene0153_00 +scene0153_01 +scene0616_00 +scene0616_01 +scene0671_00 +scene0671_01 +scene0618_00 +scene0382_00 +scene0382_01 +scene0490_00 +scene0621_00 +scene0607_00 +scene0607_01 +scene0149_00 +scene0695_00 +scene0695_01 +scene0695_02 +scene0695_03 +scene0389_00 +scene0377_00 +scene0377_01 +scene0377_02 +scene0342_00 +scene0139_00 +scene0629_00 +scene0629_01 +scene0629_02 +scene0496_00 +scene0633_00 +scene0633_01 +scene0518_00 +scene0652_00 +scene0406_00 +scene0406_01 +scene0406_02 +scene0144_00 +scene0144_01 +scene0494_00 +scene0278_00 +scene0278_01 +scene0316_00 +scene0609_00 +scene0609_01 +scene0609_02 +scene0609_03 +scene0084_00 +scene0084_01 +scene0084_02 +scene0696_00 +scene0696_01 +scene0696_02 +scene0351_00 +scene0351_01 +scene0643_00 +scene0644_00 +scene0645_00 +scene0645_01 +scene0645_02 +scene0081_00 +scene0081_01 +scene0081_02 +scene0647_00 +scene0647_01 +scene0535_00 +scene0353_00 +scene0353_01 +scene0353_02 +scene0559_00 +scene0559_01 +scene0559_02 +scene0593_00 +scene0593_01 +scene0246_00 +scene0653_00 +scene0653_01 +scene0064_00 +scene0064_01 +scene0356_00 +scene0356_01 +scene0356_02 +scene0030_00 +scene0030_01 +scene0030_02 +scene0222_00 +scene0222_01 +scene0338_00 +scene0338_01 +scene0338_02 +scene0378_00 +scene0378_01 +scene0378_02 +scene0660_00 +scene0553_00 +scene0553_01 +scene0553_02 +scene0527_00 +scene0663_00 +scene0663_01 +scene0663_02 +scene0664_00 +scene0664_01 +scene0664_02 +scene0334_00 +scene0334_01 +scene0334_02 +scene0046_00 +scene0046_01 +scene0046_02 +scene0203_00 +scene0203_01 +scene0203_02 +scene0088_00 +scene0088_01 +scene0088_02 +scene0088_03 +scene0086_00 +scene0086_01 +scene0086_02 +scene0670_00 +scene0670_01 +scene0256_00 +scene0256_01 +scene0256_02 +scene0249_00 +scene0441_00 +scene0658_00 +scene0704_00 +scene0704_01 +scene0187_00 +scene0187_01 +scene0131_00 +scene0131_01 +scene0131_02 +scene0207_00 +scene0207_01 +scene0207_02 +scene0461_00 +scene0011_00 +scene0011_01 +scene0343_00 +scene0251_00 +scene0077_00 +scene0077_01 +scene0684_00 +scene0684_01 +scene0550_00 +scene0686_00 +scene0686_01 +scene0686_02 +scene0208_00 +scene0500_00 +scene0500_01 +scene0552_00 +scene0552_01 +scene0648_00 +scene0648_01 +scene0435_00 +scene0435_01 +scene0435_02 +scene0435_03 +scene0690_00 +scene0690_01 +scene0693_00 +scene0693_01 +scene0693_02 +scene0700_00 +scene0700_01 +scene0700_02 +scene0699_00 +scene0231_00 +scene0231_01 +scene0231_02 +scene0697_00 +scene0697_01 +scene0697_02 +scene0697_03 +scene0474_00 +scene0474_01 +scene0474_02 +scene0474_03 +scene0474_04 +scene0474_05 +scene0355_00 +scene0355_01 +scene0146_00 +scene0146_01 +scene0146_02 +scene0196_00 +scene0702_00 +scene0702_01 +scene0702_02 +scene0314_00 +scene0277_00 +scene0277_01 +scene0277_02 +scene0095_00 +scene0095_01 +scene0015_00 +scene0100_00 +scene0100_01 +scene0100_02 +scene0558_00 +scene0558_01 +scene0558_02 +scene0685_00 +scene0685_01 +scene0685_02 diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/preprocess_scannet.py b/Pointcept/pointcept/datasets/preprocessing/scannet/preprocess_scannet.py new file mode 100644 index 0000000000000000000000000000000000000000..549a4261080b0dfb47f31ed390f821446d35322e --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/preprocess_scannet.py @@ -0,0 +1,253 @@ +""" +Preprocessing Script for ScanNet 20/200 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import warnings + +warnings.filterwarnings("ignore", category=DeprecationWarning) + +import os +import argparse +import glob +import json +import plyfile +import numpy as np +import pandas as pd +import multiprocessing as mp +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat +from pathlib import Path + +# Load external constants +from meta_data.scannet200_constants import VALID_CLASS_IDS_200, VALID_CLASS_IDS_20 + +CLOUD_FILE_PFIX = "_vh_clean_2" +SEGMENTS_FILE_PFIX = ".0.010000.segs.json" +AGGREGATIONS_FILE_PFIX = ".aggregation.json" +CLASS_IDS200 = VALID_CLASS_IDS_200 +CLASS_IDS20 = VALID_CLASS_IDS_20 +IGNORE_INDEX = -1 + + +def read_plymesh(filepath): + """Read ply file and return it as numpy array. Returns None if emtpy.""" + with open(filepath, "rb") as f: + plydata = plyfile.PlyData.read(f) + if plydata.elements: + vertices = pd.DataFrame(plydata["vertex"].data).values + faces = np.stack(plydata["face"].data["vertex_indices"], axis=0) + return vertices, faces + + +# Map the raw category id to the point cloud +def point_indices_from_group(seg_indices, group, labels_pd): + group_segments = np.array(group["segments"]) + label = group["label"] + + # Map the category name to id + label_id20 = labels_pd[labels_pd["raw_category"] == label]["nyu40id"] + label_id20 = int(label_id20.iloc[0]) if len(label_id20) > 0 else 0 + label_id200 = labels_pd[labels_pd["raw_category"] == label]["id"] + label_id200 = int(label_id200.iloc[0]) if len(label_id200) > 0 else 0 + + # Only store for the valid categories + if label_id20 in CLASS_IDS20: + label_id20 = CLASS_IDS20.index(label_id20) + else: + label_id20 = IGNORE_INDEX + + if label_id200 in CLASS_IDS200: + label_id200 = CLASS_IDS200.index(label_id200) + else: + label_id200 = IGNORE_INDEX + + # get points, where segment indices (points labelled with segment ids) are in the group segment list + point_idx = np.where(np.isin(seg_indices, group_segments))[0] + return point_idx, label_id20, label_id200 + + +def face_normal(vertex, face): + v01 = vertex[face[:, 1]] - vertex[face[:, 0]] + v02 = vertex[face[:, 2]] - vertex[face[:, 0]] + vec = np.cross(v01, v02) + length = np.sqrt(np.sum(vec**2, axis=1, keepdims=True)) + 1.0e-8 + nf = vec / length + area = length * 0.5 + return nf, area + + +def vertex_normal(vertex, face): + nf, area = face_normal(vertex, face) + nf = nf * area + + nv = np.zeros_like(vertex) + for i in range(face.shape[0]): + nv[face[i]] += nf[i] + + length = np.sqrt(np.sum(nv**2, axis=1, keepdims=True)) + 1.0e-8 + nv = nv / length + return nv + + +def handle_process( + scene_path, output_path, labels_pd, train_scenes, val_scenes, parse_normals=True +): + scene_id = os.path.basename(scene_path) + mesh_path = os.path.join(scene_path, f"{scene_id}{CLOUD_FILE_PFIX}.ply") + segments_file = os.path.join( + scene_path, f"{scene_id}{CLOUD_FILE_PFIX}{SEGMENTS_FILE_PFIX}" + ) + aggregations_file = os.path.join(scene_path, f"{scene_id}{AGGREGATIONS_FILE_PFIX}") + info_file = os.path.join(scene_path, f"{scene_id}.txt") + + if scene_id in train_scenes: + output_path = os.path.join(output_path, "train", f"{scene_id}") + split_name = "train" + elif scene_id in val_scenes: + output_path = os.path.join(output_path, "val", f"{scene_id}") + split_name = "val" + else: + output_path = os.path.join(output_path, "test", f"{scene_id}") + split_name = "test" + + print(f"Processing: {scene_id} in {split_name}") + + vertices, faces = read_plymesh(mesh_path) + coords = vertices[:, :3] + colors = vertices[:, 3:6] + save_dict = dict( + coord=coords.astype(np.float32), + color=colors.astype(np.uint8), + ) + + # # Rotating the mesh to axis aligned + # info_dict = {} + # with open(info_file) as f: + # for line in f: + # (key, val) = line.split(" = ") + # info_dict[key] = np.fromstring(val, sep=' ') + # + # if 'axisAlignment' not in info_dict: + # rot_matrix = np.identity(4) + # else: + # rot_matrix = info_dict['axisAlignment'].reshape(4, 4) + # r_coords = coords.transpose() + # r_coords = np.append(r_coords, np.ones((1, r_coords.shape[1])), axis=0) + # r_coords = np.dot(rot_matrix, r_coords) + # coords = r_coords + + # Parse Normals + if parse_normals: + save_dict["normal"] = vertex_normal(coords, faces).astype(np.float32) + + # Load segments file + if split_name != "test": + with open(segments_file) as f: + segments = json.load(f) + seg_indices = np.array(segments["segIndices"]) + + # Load Aggregations file + with open(aggregations_file) as f: + aggregation = json.load(f) + seg_groups = np.array(aggregation["segGroups"]) + + # Generate new labels + semantic_gt20 = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + semantic_gt200 = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + instance_ids = np.ones((vertices.shape[0]), dtype=np.int16) * IGNORE_INDEX + for group in seg_groups: + point_idx, label_id20, label_id200 = point_indices_from_group( + seg_indices, group, labels_pd + ) + + semantic_gt20[point_idx] = label_id20 + semantic_gt200[point_idx] = label_id200 + instance_ids[point_idx] = group["id"] + + semantic_gt20 = semantic_gt20.astype(int) + semantic_gt200 = semantic_gt200.astype(int) + instance_ids = instance_ids.astype(int) + + save_dict["segment20"] = semantic_gt20 + save_dict["segment200"] = semantic_gt200 + save_dict["instance"] = instance_ids + + # Concatenate with original cloud + processed_vertices = np.hstack((semantic_gt200, instance_ids)) + + if np.any(np.isnan(processed_vertices)) or not np.all( + np.isfinite(processed_vertices) + ): + raise ValueError(f"Find NaN in Scene: {scene_id}") + + # Save processed data + os.makedirs(output_path, exist_ok=True) + for key in save_dict.keys(): + np.save(os.path.join(output_path, f"{key}.npy"), save_dict[key]) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet dataset containing scene folders", + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where train/val folders will be located", + ) + parser.add_argument( + "--parse_normals", default=True, type=bool, help="Whether parse point normals" + ) + parser.add_argument( + "--num_workers", + default=mp.cpu_count(), + type=int, + help="Num workers for preprocessing.", + ) + config = parser.parse_args() + meta_root = Path(os.path.dirname(__file__)) / "meta_data" + + # Load label map + labels_pd = pd.read_csv( + meta_root / "scannetv2-labels.combined.tsv", + sep="\t", + header=0, + ) + + # Load train/val splits + with open(meta_root / "scannetv2_train.txt") as train_file: + train_scenes = train_file.read().splitlines() + with open(meta_root / "scannetv2_val.txt") as val_file: + val_scenes = val_file.read().splitlines() + + # Create output directories + train_output_dir = os.path.join(config.output_root, "train") + os.makedirs(train_output_dir, exist_ok=True) + val_output_dir = os.path.join(config.output_root, "val") + os.makedirs(val_output_dir, exist_ok=True) + test_output_dir = os.path.join(config.output_root, "test") + os.makedirs(test_output_dir, exist_ok=True) + + # Load scene paths + scene_paths = sorted(glob.glob(config.dataset_root + "/scans*/scene*")) + + # Preprocess data. + print("Processing scenes...") + pool = ProcessPoolExecutor(max_workers=config.num_workers) + _ = list( + pool.map( + handle_process, + scene_paths, + repeat(config.output_root), + repeat(labels_pd), + repeat(train_scenes), + repeat(val_scenes), + repeat(config.parse_normals), + ) + ) diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/SensorData.py b/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/SensorData.py new file mode 100644 index 0000000000000000000000000000000000000000..d90c8770e812f782e4735cc7095c100cd6258bf6 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/SensorData.py @@ -0,0 +1,183 @@ +import os, struct +import numpy as np +import zlib +import imageio +import cv2 + +COMPRESSION_TYPE_COLOR = {-1: "unknown", 0: "raw", 1: "png", 2: "jpeg"} +COMPRESSION_TYPE_DEPTH = { + -1: "unknown", + 0: "raw_ushort", + 1: "zlib_ushort", + 2: "occi_ushort", +} + + +class RGBDFrame: + def load(self, file_handle): + self.camera_to_world = np.asarray( + struct.unpack("f" * 16, file_handle.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.timestamp_color = struct.unpack("Q", file_handle.read(8))[0] + self.timestamp_depth = struct.unpack("Q", file_handle.read(8))[0] + self.color_size_bytes = struct.unpack("Q", file_handle.read(8))[0] + self.depth_size_bytes = struct.unpack("Q", file_handle.read(8))[0] + self.color_data = b"".join( + struct.unpack( + "c" * self.color_size_bytes, file_handle.read(self.color_size_bytes) + ) + ) + self.depth_data = b"".join( + struct.unpack( + "c" * self.depth_size_bytes, file_handle.read(self.depth_size_bytes) + ) + ) + + def decompress_depth(self, compression_type): + if compression_type == "zlib_ushort": + return self.decompress_depth_zlib() + else: + raise + + def decompress_depth_zlib(self): + return zlib.decompress(self.depth_data) + + def decompress_color(self, compression_type): + if compression_type == "jpeg": + return self.decompress_color_jpeg() + else: + raise + + def decompress_color_jpeg(self): + return imageio.imread(self.color_data) + + +class SensorData: + def __init__(self, filename): + self.version = 4 + self.load(filename) + + def load(self, filename): + with open(filename, "rb") as f: + version = struct.unpack("I", f.read(4))[0] + assert self.version == version + strlen = struct.unpack("Q", f.read(8))[0] + self.sensor_name = b"".join(struct.unpack("c" * strlen, f.read(strlen))) + self.intrinsic_color = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.extrinsic_color = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.intrinsic_depth = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.extrinsic_depth = np.asarray( + struct.unpack("f" * 16, f.read(16 * 4)), dtype=np.float32 + ).reshape(4, 4) + self.color_compression_type = COMPRESSION_TYPE_COLOR[ + struct.unpack("i", f.read(4))[0] + ] + self.depth_compression_type = COMPRESSION_TYPE_DEPTH[ + struct.unpack("i", f.read(4))[0] + ] + self.color_width = struct.unpack("I", f.read(4))[0] + self.color_height = struct.unpack("I", f.read(4))[0] + self.depth_width = struct.unpack("I", f.read(4))[0] + self.depth_height = struct.unpack("I", f.read(4))[0] + self.depth_shift = struct.unpack("f", f.read(4))[0] + num_frames = struct.unpack("Q", f.read(8))[0] + self.frames = [] + for i in range(num_frames): + frame = RGBDFrame() + frame.load(f) + self.frames.append(frame) + + def export_depth_images(self, output_path, image_size=None, frame_skip=1): + if not os.path.exists(output_path): + os.makedirs(output_path) + print( + "exporting", len(self.frames) // frame_skip, " depth frames to", output_path + ) + for f in range(0, len(self.frames), frame_skip): + if os.path.exists((os.path.join(output_path, str(f) + ".png"))): + continue + if f % 100 == 0: + print( + "exporting", + f, + "th depth frames to", + os.path.join(output_path, str(f) + ".png"), + ) + + depth_data = self.frames[f].decompress_depth(self.depth_compression_type) + depth = np.fromstring(depth_data, dtype=np.uint16).reshape( + self.depth_height, self.depth_width + ) + if image_size is not None: + depth = cv2.resize( + depth, + (image_size[1], image_size[0]), + interpolation=cv2.INTER_NEAREST, + ) + imageio.imwrite(os.path.join(output_path, str(f) + ".png"), depth) + + def export_color_images(self, output_path, image_size=None, frame_skip=1): + if not os.path.exists(output_path): + os.makedirs(output_path) + print( + "exporting", len(self.frames) // frame_skip, "color frames to", output_path + ) + for f in range(0, len(self.frames), frame_skip): + if os.path.exists((os.path.join(output_path, str(f) + ".png"))): + continue + if f % 100 == 0: + print( + "exporting", + f, + "th color frames to", + os.path.join(output_path, str(f) + ".png"), + ) + color = self.frames[f].decompress_color(self.color_compression_type) + if image_size is not None: + color = cv2.resize( + color, + (image_size[1], image_size[0]), + interpolation=cv2.INTER_NEAREST, + ) + # imageio.imwrite(os.path.join(output_path, str(f) + '.jpg'), color) + imageio.imwrite(os.path.join(output_path, str(f) + ".png"), color) + + def save_mat_to_file(self, matrix, filename): + with open(filename, "w") as f: + for line in matrix: + np.savetxt(f, line[np.newaxis], fmt="%f") + + def export_poses(self, output_path, frame_skip=1): + if not os.path.exists(output_path): + os.makedirs(output_path) + print( + "exporting", len(self.frames) // frame_skip, "camera poses to", output_path + ) + for f in range(0, len(self.frames), frame_skip): + self.save_mat_to_file( + self.frames[f].camera_to_world, + os.path.join(output_path, str(f) + ".txt"), + ) + + def export_intrinsics(self, output_path): + if not os.path.exists(output_path): + os.makedirs(output_path) + print("exporting camera intrinsics to", output_path) + self.save_mat_to_file( + self.intrinsic_color, os.path.join(output_path, "intrinsic_color.txt") + ) + self.save_mat_to_file( + self.extrinsic_color, os.path.join(output_path, "extrinsic_color.txt") + ) + self.save_mat_to_file( + self.intrinsic_depth, os.path.join(output_path, "intrinsic_depth.txt") + ) + self.save_mat_to_file( + self.extrinsic_depth, os.path.join(output_path, "extrinsic_depth.txt") + ) diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/compute_full_overlapping.py b/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/compute_full_overlapping.py new file mode 100644 index 0000000000000000000000000000000000000000..a6b407eebad280f2817805d15ec43b9f7f6afbf4 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/compute_full_overlapping.py @@ -0,0 +1,91 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +import copy +import torch +import numpy as np +import math +import glob, os +import argparse +import open3d as o3d + + +def make_open3d_point_cloud(xyz, color=None, voxel_size=None): + if np.isnan(xyz).any(): + return None + + xyz = xyz[:, :3] + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(xyz) + if color is not None: + pcd.colors = o3d.utility.Vector3dVector(color) + if voxel_size is not None: + pcd = pcd.voxel_down_sample(voxel_size) + + return pcd + + +def compute_overlap_ratio(pcd0, pcd1, voxel_size): + pcd0_down = pcd0.voxel_down_sample(voxel_size) + pcd1_down = pcd1.voxel_down_sample(voxel_size) + matching01 = get_matching_indices(pcd0_down, pcd1_down, voxel_size * 1.5, 1) + matching10 = get_matching_indices(pcd1_down, pcd0_down, voxel_size * 1.5, 1) + overlap0 = float(len(matching01)) / float(len(pcd0_down.points)) + overlap1 = float(len(matching10)) / float(len(pcd1_down.points)) + return max(overlap0, overlap1) + + +def get_matching_indices(source, pcd_tree, search_voxel_size, K=None): + match_inds = [] + for i, point in enumerate(source.points): + [_, idx, _] = pcd_tree.search_radius_vector_3d(point, search_voxel_size) + if K is not None: + idx = idx[:K] + for j in idx: + match_inds.append((i, j)) + return match_inds + + +def compute_full_overlapping(data_root, scene_id, voxel_size=0.05): + _points = [ + ( + pcd_name, + make_open3d_point_cloud( + torch.load(pcd_name)["coord"], voxel_size=voxel_size + ), + ) + for pcd_name in glob.glob(os.path.join(data_root, scene_id, "pcd", "*.pth")) + ] + points = [(pcd_name, pcd) for (pcd_name, pcd) in _points if pcd is not None] + print( + "load {} point clouds ({} invalid has been filtered), computing matching/overlapping".format( + len(points), len(_points) - len(points) + ) + ) + + matching_matrix = np.zeros((len(points), len(points))) + for i, (pcd0_name, pcd0) in enumerate(points): + print("matching to...{}".format(pcd0_name)) + pcd0_tree = o3d.geometry.KDTreeFlann(copy.deepcopy(pcd0)) + for j, (pcd1_name, pcd1) in enumerate(points): + if i == j: + continue + matching_matrix[i, j] = float( + len(get_matching_indices(pcd1, pcd0_tree, 1.5 * voxel_size, 1)) + ) / float(len(pcd1.points)) + + # write to file + with open(os.path.join(data_root, scene_id, "pcd", "overlap.txt"), "w") as f: + for i, (pcd0_name, pcd0) in enumerate(points): + for j, (pcd1_name, pcd1) in enumerate(points): + if i < j: + overlap = max(matching_matrix[i, j], matching_matrix[j, i]) + f.write( + "{} {} {}\n".format( + pcd0_name.replace(data_root, ""), + pcd1_name.replace(data_root, ""), + overlap, + ) + ) diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/generage_list.py b/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/generage_list.py new file mode 100644 index 0000000000000000000000000000000000000000..a8943ba040cc24fc8d3130bd8784052cb57ce6c9 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/generage_list.py @@ -0,0 +1,33 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import argparse +import glob, os, sys + +from SensorData import SensorData + +# params +parser = argparse.ArgumentParser() +# data paths +parser.add_argument("--target_dir", required=True, help="path to the target dir") + +opt = parser.parse_args() +print(opt) + + +def main(): + overlaps = glob.glob(os.path.join(opt.target_dir, "*/pcd/overlap.txt")) + with open(os.path.join(opt.target_dir, "overlap30.txt"), "w") as f: + for fo in overlaps: + for line in open(fo): + pcd0, pcd1, op = line.strip().split() + if float(op) >= 0.3: + print("{} {} {}".format(pcd0, pcd1, op), file=f) + print("done") + + +if __name__ == "__main__": + main() diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/plyfile.py b/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/plyfile.py new file mode 100644 index 0000000000000000000000000000000000000000..17400c4bd28764829d248e90dc141182fa1d8f03 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/plyfile.py @@ -0,0 +1,894 @@ +# Copyright 2014 Darsh Ranjan +# +# This file is part of python-plyfile. +# +# python-plyfile is free software: you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# python-plyfile is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with python-plyfile. If not, see +# . + +from itertools import islice as _islice + +import numpy as _np +from sys import byteorder as _byteorder + + +try: + _range = xrange +except NameError: + _range = range + + +# Many-many relation +_data_type_relation = [ + ("int8", "i1"), + ("char", "i1"), + ("uint8", "u1"), + ("uchar", "b1"), + ("uchar", "u1"), + ("int16", "i2"), + ("short", "i2"), + ("uint16", "u2"), + ("ushort", "u2"), + ("int32", "i4"), + ("int", "i4"), + ("uint32", "u4"), + ("uint", "u4"), + ("float32", "f4"), + ("float", "f4"), + ("float64", "f8"), + ("double", "f8"), +] + +_data_types = dict(_data_type_relation) +_data_type_reverse = dict((b, a) for (a, b) in _data_type_relation) + +_types_list = [] +_types_set = set() +for _a, _b in _data_type_relation: + if _a not in _types_set: + _types_list.append(_a) + _types_set.add(_a) + if _b not in _types_set: + _types_list.append(_b) + _types_set.add(_b) + + +_byte_order_map = {"ascii": "=", "binary_little_endian": "<", "binary_big_endian": ">"} + +_byte_order_reverse = {"<": "binary_little_endian", ">": "binary_big_endian"} + +_native_byte_order = {"little": "<", "big": ">"}[_byteorder] + + +def _lookup_type(type_str): + if type_str not in _data_type_reverse: + try: + type_str = _data_types[type_str] + except KeyError: + raise ValueError("field type %r not in %r" % (type_str, _types_list)) + + return _data_type_reverse[type_str] + + +def _split_line(line, n): + fields = line.split(None, n) + if len(fields) == n: + fields.append("") + + assert len(fields) == n + 1 + + return fields + + +def make2d(array, cols=None, dtype=None): + """ + Make a 2D array from an array of arrays. The `cols' and `dtype' + arguments can be omitted if the array is not empty. + + """ + if (cols is None or dtype is None) and not len(array): + raise RuntimeError("cols and dtype must be specified for empty " "array") + + if cols is None: + cols = len(array[0]) + + if dtype is None: + dtype = array[0].dtype + + return _np.fromiter(array, [("_", dtype, (cols,))], count=len(array))["_"] + + +class PlyParseError(Exception): + """ + Raised when a PLY file cannot be parsed. + + The attributes `element', `row', `property', and `message' give + additional information. + + """ + + def __init__(self, message, element=None, row=None, prop=None): + self.message = message + self.element = element + self.row = row + self.prop = prop + + s = "" + if self.element: + s += "element %r: " % self.element.name + if self.row is not None: + s += "row %d: " % self.row + if self.prop: + s += "property %r: " % self.prop.name + s += self.message + + Exception.__init__(self, s) + + def __repr__(self): + return ( + "PlyParseError(%r, element=%r, row=%r, prop=%r)" % self.message, + self.element, + self.row, + self.prop, + ) + + +class PlyData(object): + """ + PLY file header and data. + + A PlyData instance is created in one of two ways: by the static + method PlyData.read (to read a PLY file), or directly from __init__ + given a sequence of elements (which can then be written to a PLY + file). + + """ + + def __init__( + self, elements=[], text=False, byte_order="=", comments=[], obj_info=[] + ): + """ + elements: sequence of PlyElement instances. + + text: whether the resulting PLY file will be text (True) or + binary (False). + + byte_order: '<' for little-endian, '>' for big-endian, or '=' + for native. This is only relevant if `text' is False. + + comments: sequence of strings that will be placed in the header + between the 'ply' and 'format ...' lines. + + obj_info: like comments, but will be placed in the header with + "obj_info ..." instead of "comment ...". + + """ + if byte_order == "=" and not text: + byte_order = _native_byte_order + + self.byte_order = byte_order + self.text = text + + self.comments = list(comments) + self.obj_info = list(obj_info) + self.elements = elements + + def _get_elements(self): + return self._elements + + def _set_elements(self, elements): + self._elements = tuple(elements) + self._index() + + elements = property(_get_elements, _set_elements) + + def _get_byte_order(self): + return self._byte_order + + def _set_byte_order(self, byte_order): + if byte_order not in ["<", ">", "="]: + raise ValueError("byte order must be '<', '>', or '='") + + self._byte_order = byte_order + + byte_order = property(_get_byte_order, _set_byte_order) + + def _index(self): + self._element_lookup = dict((elt.name, elt) for elt in self._elements) + if len(self._element_lookup) != len(self._elements): + raise ValueError("two elements with same name") + + @staticmethod + def _parse_header(stream): + """ + Parse a PLY header from a readable file-like stream. + + """ + lines = [] + comments = {"comment": [], "obj_info": []} + while True: + line = stream.readline().decode("ascii").strip() + fields = _split_line(line, 1) + + if fields[0] == "end_header": + break + + elif fields[0] in comments.keys(): + lines.append(fields) + else: + lines.append(line.split()) + + a = 0 + if lines[a] != ["ply"]: + raise PlyParseError("expected 'ply'") + + a += 1 + while lines[a][0] in comments.keys(): + comments[lines[a][0]].append(lines[a][1]) + a += 1 + + if lines[a][0] != "format": + raise PlyParseError("expected 'format'") + + if lines[a][2] != "1.0": + raise PlyParseError("expected version '1.0'") + + if len(lines[a]) != 3: + raise PlyParseError("too many fields after 'format'") + + fmt = lines[a][1] + + if fmt not in _byte_order_map: + raise PlyParseError("don't understand format %r" % fmt) + + byte_order = _byte_order_map[fmt] + text = fmt == "ascii" + + a += 1 + while a < len(lines) and lines[a][0] in comments.keys(): + comments[lines[a][0]].append(lines[a][1]) + a += 1 + + return PlyData( + PlyElement._parse_multi(lines[a:]), + text, + byte_order, + comments["comment"], + comments["obj_info"], + ) + + @staticmethod + def read(stream): + """ + Read PLY data from a readable file-like object or filename. + + """ + (must_close, stream) = _open_stream(stream, "read") + try: + data = PlyData._parse_header(stream) + for elt in data: + elt._read(stream, data.text, data.byte_order) + finally: + if must_close: + stream.close() + + return data + + def write(self, stream): + """ + Write PLY data to a writeable file-like object or filename. + + """ + (must_close, stream) = _open_stream(stream, "write") + try: + stream.write(self.header.encode("ascii")) + stream.write(b"\r\n") + for elt in self: + elt._write(stream, self.text, self.byte_order) + finally: + if must_close: + stream.close() + + @property + def header(self): + """ + Provide PLY-formatted metadata for the instance. + + """ + lines = ["ply"] + + if self.text: + lines.append("format ascii 1.0") + else: + lines.append("format " + _byte_order_reverse[self.byte_order] + " 1.0") + + # Some information is lost here, since all comments are placed + # between the 'format' line and the first element. + for c in self.comments: + lines.append("comment " + c) + + for c in self.obj_info: + lines.append("obj_info " + c) + + lines.extend(elt.header for elt in self.elements) + lines.append("end_header") + return "\r\n".join(lines) + + def __iter__(self): + return iter(self.elements) + + def __len__(self): + return len(self.elements) + + def __contains__(self, name): + return name in self._element_lookup + + def __getitem__(self, name): + return self._element_lookup[name] + + def __str__(self): + return self.header + + def __repr__(self): + return "PlyData(%r, text=%r, byte_order=%r, " "comments=%r, obj_info=%r)" % ( + self.elements, + self.text, + self.byte_order, + self.comments, + self.obj_info, + ) + + +def _open_stream(stream, read_or_write): + if hasattr(stream, read_or_write): + return (False, stream) + try: + return (True, open(stream, read_or_write[0] + "b")) + except TypeError: + raise RuntimeError("expected open file or filename") + + +class PlyElement(object): + """ + PLY file element. + + A client of this library doesn't normally need to instantiate this + directly, so the following is only for the sake of documenting the + internals. + + Creating a PlyElement instance is generally done in one of two ways: + as a byproduct of PlyData.read (when reading a PLY file) and by + PlyElement.describe (before writing a PLY file). + + """ + + def __init__(self, name, properties, count, comments=[]): + """ + This is not part of the public interface. The preferred methods + of obtaining PlyElement instances are PlyData.read (to read from + a file) and PlyElement.describe (to construct from a numpy + array). + + """ + self._name = str(name) + self._check_name() + self._count = count + + self._properties = tuple(properties) + self._index() + + self.comments = list(comments) + + self._have_list = any(isinstance(p, PlyListProperty) for p in self.properties) + + @property + def count(self): + return self._count + + def _get_data(self): + return self._data + + def _set_data(self, data): + self._data = data + self._count = len(data) + self._check_sanity() + + data = property(_get_data, _set_data) + + def _check_sanity(self): + for prop in self.properties: + if prop.name not in self._data.dtype.fields: + raise ValueError("dangling property %r" % prop.name) + + def _get_properties(self): + return self._properties + + def _set_properties(self, properties): + self._properties = tuple(properties) + self._check_sanity() + self._index() + + properties = property(_get_properties, _set_properties) + + def _index(self): + self._property_lookup = dict((prop.name, prop) for prop in self._properties) + if len(self._property_lookup) != len(self._properties): + raise ValueError("two properties with same name") + + def ply_property(self, name): + return self._property_lookup[name] + + @property + def name(self): + return self._name + + def _check_name(self): + if any(c.isspace() for c in self._name): + msg = "element name %r contains spaces" % self._name + raise ValueError(msg) + + def dtype(self, byte_order="="): + """ + Return the numpy dtype of the in-memory representation of the + data. (If there are no list properties, and the PLY format is + binary, then this also accurately describes the on-disk + representation of the element.) + + """ + return [(prop.name, prop.dtype(byte_order)) for prop in self.properties] + + @staticmethod + def _parse_multi(header_lines): + """ + Parse a list of PLY element definitions. + + """ + elements = [] + while header_lines: + (elt, header_lines) = PlyElement._parse_one(header_lines) + elements.append(elt) + + return elements + + @staticmethod + def _parse_one(lines): + """ + Consume one element definition. The unconsumed input is + returned along with a PlyElement instance. + + """ + a = 0 + line = lines[a] + + if line[0] != "element": + raise PlyParseError("expected 'element'") + if len(line) > 3: + raise PlyParseError("too many fields after 'element'") + if len(line) < 3: + raise PlyParseError("too few fields after 'element'") + + (name, count) = (line[1], int(line[2])) + + comments = [] + properties = [] + while True: + a += 1 + if a >= len(lines): + break + + if lines[a][0] == "comment": + comments.append(lines[a][1]) + elif lines[a][0] == "property": + properties.append(PlyProperty._parse_one(lines[a])) + else: + break + + return (PlyElement(name, properties, count, comments), lines[a:]) + + @staticmethod + def describe(data, name, len_types={}, val_types={}, comments=[]): + """ + Construct a PlyElement from an array's metadata. + + len_types and val_types can be given as mappings from list + property names to type strings (like 'u1', 'f4', etc., or + 'int8', 'float32', etc.). These can be used to define the length + and value types of list properties. List property lengths + always default to type 'u1' (8-bit unsigned integer), and value + types default to 'i4' (32-bit integer). + + """ + if not isinstance(data, _np.ndarray): + raise TypeError("only numpy arrays are supported") + + if len(data.shape) != 1: + raise ValueError("only one-dimensional arrays are " "supported") + + count = len(data) + + properties = [] + descr = data.dtype.descr + + for t in descr: + if not isinstance(t[1], str): + raise ValueError("nested records not supported") + + if not t[0]: + raise ValueError("field with empty name") + + if len(t) != 2 or t[1][1] == "O": + # non-scalar field, which corresponds to a list + # property in PLY. + + if t[1][1] == "O": + if len(t) != 2: + raise ValueError("non-scalar object fields not " "supported") + + len_str = _data_type_reverse[len_types.get(t[0], "u1")] + if t[1][1] == "O": + val_type = val_types.get(t[0], "i4") + val_str = _lookup_type(val_type) + else: + val_str = _lookup_type(t[1][1:]) + + prop = PlyListProperty(t[0], len_str, val_str) + else: + val_str = _lookup_type(t[1][1:]) + prop = PlyProperty(t[0], val_str) + + properties.append(prop) + + elt = PlyElement(name, properties, count, comments) + elt.data = data + + return elt + + def _read(self, stream, text, byte_order): + """ + Read the actual data from a PLY file. + + """ + if text: + self._read_txt(stream) + else: + if self._have_list: + # There are list properties, so a simple load is + # impossible. + self._read_bin(stream, byte_order) + else: + # There are no list properties, so loading the data is + # much more straightforward. + self._data = _np.fromfile(stream, self.dtype(byte_order), self.count) + + if len(self._data) < self.count: + k = len(self._data) + del self._data + raise PlyParseError("early end-of-file", self, k) + + self._check_sanity() + + def _write(self, stream, text, byte_order): + """ + Write the data to a PLY file. + + """ + if text: + self._write_txt(stream) + else: + if self._have_list: + # There are list properties, so serialization is + # slightly complicated. + self._write_bin(stream, byte_order) + else: + # no list properties, so serialization is + # straightforward. + self.data.astype(self.dtype(byte_order), copy=False).tofile(stream) + + def _read_txt(self, stream): + """ + Load a PLY element from an ASCII-format PLY file. The element + may contain list properties. + + """ + self._data = _np.empty(self.count, dtype=self.dtype()) + + k = 0 + for line in _islice(iter(stream.readline, b""), self.count): + fields = iter(line.strip().split()) + for prop in self.properties: + try: + self._data[prop.name][k] = prop._from_fields(fields) + except StopIteration: + raise PlyParseError("early end-of-line", self, k, prop) + except ValueError: + raise PlyParseError("malformed input", self, k, prop) + try: + next(fields) + except StopIteration: + pass + else: + raise PlyParseError("expected end-of-line", self, k) + k += 1 + + if k < self.count: + del self._data + raise PlyParseError("early end-of-file", self, k) + + def _write_txt(self, stream): + """ + Save a PLY element to an ASCII-format PLY file. The element may + contain list properties. + + """ + for rec in self.data: + fields = [] + for prop in self.properties: + fields.extend(prop._to_fields(rec[prop.name])) + + _np.savetxt(stream, [fields], "%.18g", newline="\r\n") + + def _read_bin(self, stream, byte_order): + """ + Load a PLY element from a binary PLY file. The element may + contain list properties. + + """ + self._data = _np.empty(self.count, dtype=self.dtype(byte_order)) + + for k in _range(self.count): + for prop in self.properties: + try: + self._data[prop.name][k] = prop._read_bin(stream, byte_order) + except StopIteration: + raise PlyParseError("early end-of-file", self, k, prop) + + def _write_bin(self, stream, byte_order): + """ + Save a PLY element to a binary PLY file. The element may + contain list properties. + + """ + for rec in self.data: + for prop in self.properties: + prop._write_bin(rec[prop.name], stream, byte_order) + + @property + def header(self): + """ + Format this element's metadata as it would appear in a PLY + header. + + """ + lines = ["element %s %d" % (self.name, self.count)] + + # Some information is lost here, since all comments are placed + # between the 'element' line and the first property definition. + for c in self.comments: + lines.append("comment " + c) + + lines.extend(list(map(str, self.properties))) + + return "\r\n".join(lines) + + def __getitem__(self, key): + return self.data[key] + + def __setitem__(self, key, value): + self.data[key] = value + + def __str__(self): + return self.header + + def __repr__(self): + return "PlyElement(%r, %r, count=%d, comments=%r)" % ( + self.name, + self.properties, + self.count, + self.comments, + ) + + +class PlyProperty(object): + """ + PLY property description. This class is pure metadata; the data + itself is contained in PlyElement instances. + + """ + + def __init__(self, name, val_dtype): + self._name = str(name) + self._check_name() + self.val_dtype = val_dtype + + def _get_val_dtype(self): + return self._val_dtype + + def _set_val_dtype(self, val_dtype): + self._val_dtype = _data_types[_lookup_type(val_dtype)] + + val_dtype = property(_get_val_dtype, _set_val_dtype) + + @property + def name(self): + return self._name + + def _check_name(self): + if any(c.isspace() for c in self._name): + msg = "Error: property name %r contains spaces" % self._name + raise RuntimeError(msg) + + @staticmethod + def _parse_one(line): + assert line[0] == "property" + + if line[1] == "list": + if len(line) > 5: + raise PlyParseError("too many fields after " "'property list'") + if len(line) < 5: + raise PlyParseError("too few fields after " "'property list'") + + return PlyListProperty(line[4], line[2], line[3]) + + else: + if len(line) > 3: + raise PlyParseError("too many fields after " "'property'") + if len(line) < 3: + raise PlyParseError("too few fields after " "'property'") + + return PlyProperty(line[2], line[1]) + + def dtype(self, byte_order="="): + """ + Return the numpy dtype description for this property (as a tuple + of strings). + + """ + return byte_order + self.val_dtype + + def _from_fields(self, fields): + """ + Parse from generator. Raise StopIteration if the property could + not be read. + + """ + return _np.dtype(self.dtype()).type(next(fields)) + + def _to_fields(self, data): + """ + Return generator over one item. + + """ + yield _np.dtype(self.dtype()).type(data) + + def _read_bin(self, stream, byte_order): + """ + Read data from a binary stream. Raise StopIteration if the + property could not be read. + + """ + try: + return _np.fromfile(stream, self.dtype(byte_order), 1)[0] + except IndexError: + raise StopIteration + + def _write_bin(self, data, stream, byte_order): + """ + Write data to a binary stream. + + """ + _np.dtype(self.dtype(byte_order)).type(data).tofile(stream) + + def __str__(self): + val_str = _data_type_reverse[self.val_dtype] + return "property %s %s" % (val_str, self.name) + + def __repr__(self): + return "PlyProperty(%r, %r)" % (self.name, _lookup_type(self.val_dtype)) + + +class PlyListProperty(PlyProperty): + """ + PLY list property description. + + """ + + def __init__(self, name, len_dtype, val_dtype): + PlyProperty.__init__(self, name, val_dtype) + + self.len_dtype = len_dtype + + def _get_len_dtype(self): + return self._len_dtype + + def _set_len_dtype(self, len_dtype): + self._len_dtype = _data_types[_lookup_type(len_dtype)] + + len_dtype = property(_get_len_dtype, _set_len_dtype) + + def dtype(self, byte_order="="): + """ + List properties always have a numpy dtype of "object". + + """ + return "|O" + + def list_dtype(self, byte_order="="): + """ + Return the pair (len_dtype, val_dtype) (both numpy-friendly + strings). + + """ + return (byte_order + self.len_dtype, byte_order + self.val_dtype) + + def _from_fields(self, fields): + (len_t, val_t) = self.list_dtype() + + n = int(_np.dtype(len_t).type(next(fields))) + + data = _np.loadtxt(list(_islice(fields, n)), val_t, ndmin=1) + if len(data) < n: + raise StopIteration + + return data + + def _to_fields(self, data): + """ + Return generator over the (numerical) PLY representation of the + list data (length followed by actual data). + + """ + (len_t, val_t) = self.list_dtype() + + data = _np.asarray(data, dtype=val_t).ravel() + + yield _np.dtype(len_t).type(data.size) + for x in data: + yield x + + def _read_bin(self, stream, byte_order): + (len_t, val_t) = self.list_dtype(byte_order) + + try: + n = _np.fromfile(stream, len_t, 1)[0] + except IndexError: + raise StopIteration + + data = _np.fromfile(stream, val_t, n) + if len(data) < n: + raise StopIteration + + return data + + def _write_bin(self, data, stream, byte_order): + """ + Write data to a binary stream. + + """ + (len_t, val_t) = self.list_dtype(byte_order) + + data = _np.asarray(data, dtype=val_t).ravel() + + _np.array(data.size, dtype=len_t).tofile(stream) + data.tofile(stream) + + def __str__(self): + len_str = _data_type_reverse[self.len_dtype] + val_str = _data_type_reverse[self.val_dtype] + return "property list %s %s %s" % (len_str, val_str, self.name) + + def __repr__(self): + return "PlyListProperty(%r, %r, %r)" % ( + self.name, + _lookup_type(self.len_dtype), + _lookup_type(self.val_dtype), + ) diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/point_cloud_extractor.py b/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/point_cloud_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..1cbff78d9453bac7efe8359448dabcd6edb60452 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/point_cloud_extractor.py @@ -0,0 +1,98 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + + +import glob, os +import numpy as np +import cv2 +import torch + + +def extractor(input_path, output_path): + if not os.path.exists(output_path): + os.mkdir(output_path) + + # Load Depth Camera Intrinsic + depth_intrinsic = np.loadtxt(input_path + "/intrinsic/intrinsic_depth.txt") + print("Depth intrinsic: ") + print(depth_intrinsic) + + # Compute Camrea Distance (just for demo, so you can choose the camera distance in frame sampling) + poses = sorted( + glob.glob(input_path + "/pose/*.txt"), + key=lambda a: int(os.path.basename(a).split(".")[0]), + ) + depths = sorted( + glob.glob(input_path + "/depth/*.png"), + key=lambda a: int(os.path.basename(a).split(".")[0]), + ) + colors = sorted( + glob.glob(input_path + "/color/*.png"), + key=lambda a: int(os.path.basename(a).split(".")[0]), + ) + + # # Get Aligned Point Clouds. + for ind, (pose, depth, color) in enumerate(zip(poses, depths, colors)): + name = os.path.basename(pose).split(".")[0] + + if os.path.exists(output_path + "/{}.npz".format(name)): + continue + + try: + print("=" * 50, ": {}".format(pose)) + depth_img = cv2.imread(depth, -1) # read 16bit grayscale image + mask = depth_img != 0 + color_image = cv2.imread(color) + color_image = cv2.resize(color_image, (640, 480)) + color_image = np.reshape(color_image[mask], [-1, 3]) + colors = np.zeros_like(color_image) + colors[:, 0] = color_image[:, 2] + colors[:, 1] = color_image[:, 1] + colors[:, 2] = color_image[:, 0] + + pose = np.loadtxt(poses[ind]) + print("Camera pose: ") + print(pose) + + depth_shift = 1000.0 + x, y = np.meshgrid( + np.linspace(0, depth_img.shape[1] - 1, depth_img.shape[1]), + np.linspace(0, depth_img.shape[0] - 1, depth_img.shape[0]), + ) + uv_depth = np.zeros((depth_img.shape[0], depth_img.shape[1], 3)) + uv_depth[:, :, 0] = x + uv_depth[:, :, 1] = y + uv_depth[:, :, 2] = depth_img / depth_shift + uv_depth = np.reshape(uv_depth, [-1, 3]) + uv_depth = uv_depth[np.where(uv_depth[:, 2] != 0), :].squeeze() + + intrinsic_inv = np.linalg.inv(depth_intrinsic) + fx = depth_intrinsic[0, 0] + fy = depth_intrinsic[1, 1] + cx = depth_intrinsic[0, 2] + cy = depth_intrinsic[1, 2] + bx = depth_intrinsic[0, 3] + by = depth_intrinsic[1, 3] + point_list = [] + n = uv_depth.shape[0] + points = np.ones((n, 4)) + X = (uv_depth[:, 0] - cx) * uv_depth[:, 2] / fx + bx + Y = (uv_depth[:, 1] - cy) * uv_depth[:, 2] / fy + by + points[:, 0] = X + points[:, 1] = Y + points[:, 2] = uv_depth[:, 2] + points_world = np.dot(points, np.transpose(pose)) + print(points_world.shape) + + pcd = dict(coord=points_world[:, :3], color=colors) + # pcd_save = np.zeros((points_world.shape[0], 7)) + # pcd_save[:, :3] = points_world[:, :3] + # pcd_save[:, 3:6] = colors + + # print('Saving npz file...') + # np.savez(output_path + '/{}.npz'.format(name), pcd=pcd_save) + torch.save(pcd, output_path + "/{}.pth".format(name)) + except: + continue diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/preprocess.py b/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..818c369fa60b841736012950895c6dbaebdbef86 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/preprocess.py @@ -0,0 +1,51 @@ +import os +import argparse +import glob +import multiprocessing as mp +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat +from reader import reader +from point_cloud_extractor import extractor +from compute_full_overlapping import compute_full_overlapping + + +frame_skip = 25 + + +def parse_sens(sens_dir, output_dir): + scene_id = os.path.basename(os.path.dirname(sens_dir)) + print(f"Parsing sens data{sens_dir}") + reader( + sens_dir, + os.path.join(output_dir, scene_id), + frame_skip, + export_color_images=True, + export_depth_images=True, + export_poses=True, + export_intrinsics=True, + ) + extractor( + os.path.join(output_dir, scene_id), os.path.join(output_dir, scene_id, "pcd") + ) + compute_full_overlapping(output_dir, scene_id) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet dataset containing scene folders", + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where train/val folders will be located", + ) + opt = parser.parse_args() + sens_list = sorted(glob.glob(os.path.join(opt.dataset_root, "scans/scene*/*.sens"))) + # Preprocess data. + pool = ProcessPoolExecutor(max_workers=mp.cpu_count()) + # pool = ProcessPoolExecutor(max_workers=1) + print("Processing scenes...") + _ = list(pool.map(parse_sens, sens_list, repeat(opt.output_root))) diff --git a/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/reader.py b/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/reader.py new file mode 100644 index 0000000000000000000000000000000000000000..d21aa0ce88006f34775edc9d9aaf4e750d523197 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannet/scannet_pair/reader.py @@ -0,0 +1,33 @@ +import argparse +import os, sys + +from SensorData import SensorData + + +def reader( + filename, + output_path, + frame_skip, + export_color_images=False, + export_depth_images=False, + export_poses=False, + export_intrinsics=False, +): + if not os.path.exists(output_path): + os.makedirs(output_path) + + # load the data + print("loading %s..." % filename) + sd = SensorData(filename) + if export_depth_images: + sd.export_depth_images( + os.path.join(output_path, "depth"), frame_skip=frame_skip + ) + if export_color_images: + sd.export_color_images( + os.path.join(output_path, "color"), frame_skip=frame_skip + ) + if export_poses: + sd.export_poses(os.path.join(output_path, "pose"), frame_skip=frame_skip) + if export_intrinsics: + sd.export_intrinsics(os.path.join(output_path, "intrinsic")) diff --git a/Pointcept/pointcept/datasets/preprocessing/scannetpp/preprocess_scannetpp.py b/Pointcept/pointcept/datasets/preprocessing/scannetpp/preprocess_scannetpp.py new file mode 100644 index 0000000000000000000000000000000000000000..ad820029016ff16c53ab0ad872ede9449add6a7b --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/scannetpp/preprocess_scannetpp.py @@ -0,0 +1,252 @@ +""" +Preprocessing Script for ScanNet++ +modified from official preprocess code. + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import argparse +import json +import numpy as np +import pandas as pd +import open3d as o3d +import multiprocessing as mp +from collections import OrderedDict +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat +from pathlib import Path + + +def parse_scene( + name, + split, + dataset_root, + output_root, + label_mapping, + class2idx, + ignore_index=-1, +): + print(f"Parsing scene {name} in {split} split") + dataset_root = Path(dataset_root) + output_root = Path(output_root) + scene_path = dataset_root / "data" / name / "scans" + mesh_path = scene_path / "mesh_aligned_0.05.ply" + segs_path = scene_path / "segments.json" + anno_path = scene_path / "segments_anno.json" + + # load mesh vertices and colors + mesh = o3d.io.read_triangle_mesh(str(mesh_path)) + + # extract mesh information + mesh.compute_vertex_normals(normalized=True) + coord = np.array(mesh.vertices).astype(np.float32) + color = (np.array(mesh.vertex_colors) * 255).astype(np.uint8) + normal = np.array(mesh.vertex_normals).astype(np.float32) + + save_path = output_root / split / name + save_path.mkdir(parents=True, exist_ok=True) + np.save(save_path / "coord.npy", coord) + np.save(save_path / "color.npy", color) + np.save(save_path / "normal.npy", normal) + + if split == "test": + return + + # get label on vertices + # load segments = vertices per segment ID + with open(segs_path) as f: + segments = json.load(f) + # load anno = (instance, groups of segments) + with open(anno_path) as f: + anno = json.load(f) + seg_indices = np.array(segments["segIndices"], dtype=np.uint32) + num_vertices = len(seg_indices) + assert num_vertices == len(coord) + semantic_gt = np.ones((num_vertices, 3), dtype=np.int16) * ignore_index + instance_gt = np.ones((num_vertices, 3), dtype=np.int16) * ignore_index + + # number of labels are used per vertex. initially 0 + # increment each time a new label is added + instance_size = np.ones((num_vertices, 3), dtype=np.int16) * np.inf + + # keep track of the size of the instance (#vertices) assigned to each vertex + # later, keep the label of the smallest instance for major label of vertices + # store inf initially so that we can pick the smallest instance + labels_used = np.zeros(num_vertices, dtype=np.int16) + + for idx, instance in enumerate(anno["segGroups"]): + label = instance["label"] + instance["label_orig"] = label + # remap label + instance["label"] = label_mapping.get(label, None) + instance["label_index"] = class2idx.get(label, ignore_index) + + if instance["label_index"] == ignore_index: + continue + # get all the vertices with segment index in this instance + # and max number of labels not yet applied + mask = np.isin(seg_indices, instance["segments"]) & (labels_used < 3) + size = mask.sum() + if size == 0: + continue + + # get the position to add the label - 0, 1, 2 + label_position = labels_used[mask] + semantic_gt[mask, label_position] = instance["label_index"] + # store all valid instance (include ignored instance) + instance_gt[mask, label_position] = instance["objectId"] + instance_size[mask, label_position] = size + labels_used[mask] += 1 + + # major label is the label of smallest instance for each vertex + # use major label for single class segmentation + # shift major label to the first column + mask = labels_used > 1 + if mask.sum() > 0: + major_label_position = np.argmin(instance_size[mask], axis=1) + + major_semantic_label = semantic_gt[mask, major_label_position] + semantic_gt[mask, major_label_position] = semantic_gt[:, 0][mask] + semantic_gt[:, 0][mask] = major_semantic_label + + major_instance_label = instance_gt[mask, major_label_position] + instance_gt[mask, major_label_position] = instance_gt[:, 0][mask] + instance_gt[:, 0][mask] = major_instance_label + + np.save(save_path / "segment.npy", semantic_gt) + np.save(save_path / "instance.npy", instance_gt) + + +def filter_map_classes(mapping, count_thresh, count_type, mapping_type): + mapping = mapping[mapping[count_type] >= count_thresh] + if mapping_type == "semantic": + map_key = "semantic_map_to" + elif mapping_type == "instance": + map_key = "instance_map_to" + else: + raise NotImplementedError + # create a dict with classes to be mapped + # classes that don't have mapping are entered as x->x + # otherwise x->y + map_dict = OrderedDict() + + for i in range(mapping.shape[0]): + row = mapping.iloc[i] + class_name = row["class"] + map_target = row[map_key] + + # map to None or some other label -> don't add this class to the label list + try: + if len(map_target) > 0: + # map to None -> don't use this class + if map_target == "None": + pass + else: + # map to something else -> use this class + map_dict[class_name] = map_target + except TypeError: + # nan values -> no mapping, keep label as is + if class_name not in map_dict: + map_dict[class_name] = class_name + + return map_dict + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet++ dataset containing data/metadata/splits.", + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where train/val/test folders will be located.", + ) + parser.add_argument( + "--ignore_index", + default=-1, + type=int, + help="Default ignore index.", + ) + parser.add_argument( + "--num_workers", + default=mp.cpu_count(), + type=int, + help="Num workers for preprocessing.", + ) + config = parser.parse_args() + + print("Loading meta data...") + config.dataset_root = Path(config.dataset_root) + config.output_root = Path(config.output_root) + + train_list = np.loadtxt( + config.dataset_root / "splits" / "nvs_sem_train.txt", + dtype=str, + ) + print("Num samples in training split:", len(train_list)) + + val_list = np.loadtxt( + config.dataset_root / "splits" / "nvs_sem_val.txt", + dtype=str, + ) + print("Num samples in validation split:", len(val_list)) + + test_list = np.loadtxt( + config.dataset_root / "splits" / "sem_test.txt", + dtype=str, + ) + print("Num samples in testing split:", len(test_list)) + + data_list = np.concatenate([train_list, val_list, test_list]) + split_list = np.concatenate( + [ + np.full_like(train_list, "train"), + np.full_like(val_list, "val"), + np.full_like(test_list, "test"), + ] + ) + + # Parsing label information and mapping + segment_class_names = np.loadtxt( + config.dataset_root / "metadata" / "semantic_benchmark" / "top100.txt", + dtype=str, + delimiter=".", # dummy delimiter to replace " " + ) + print("Num classes in segment class list:", len(segment_class_names)) + + instance_class_names = np.loadtxt( + config.dataset_root / "metadata" / "semantic_benchmark" / "top100_instance.txt", + dtype=str, + delimiter=".", # dummy delimiter to replace " " + ) + print("Num classes in instance class list:", len(instance_class_names)) + + label_mapping = pd.read_csv( + config.dataset_root / "metadata" / "semantic_benchmark" / "map_benchmark.csv" + ) + label_mapping = filter_map_classes( + label_mapping, count_thresh=0, count_type="count", mapping_type="semantic" + ) + class2idx = { + class_name: idx for (idx, class_name) in enumerate(segment_class_names) + } + + print("Processing scenes...") + pool = ProcessPoolExecutor(max_workers=config.num_workers) + _ = list( + pool.map( + parse_scene, + data_list, + split_list, + repeat(config.dataset_root), + repeat(config.output_root), + repeat(label_mapping), + repeat(class2idx), + repeat(config.ignore_index), + ) + ) + pool.shutdown() diff --git a/Pointcept/pointcept/datasets/preprocessing/structured3d/preprocess_structured3d.py b/Pointcept/pointcept/datasets/preprocessing/structured3d/preprocess_structured3d.py new file mode 100644 index 0000000000000000000000000000000000000000..6924dc9abf3a3c253ee80dd3b3d85454e3df35d2 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/structured3d/preprocess_structured3d.py @@ -0,0 +1,420 @@ +""" +Preprocessing Script for Structured3D + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import argparse +import io +import os +import PIL +from PIL import Image +import cv2 +import zipfile +import numpy as np +import multiprocessing as mp +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat + + +VALID_CLASS_IDS_25 = ( + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 11, + 14, + 15, + 16, + 17, + 18, + 19, + 22, + 24, + 25, + 32, + 34, + 35, + 38, + 39, + 40, +) +CLASS_LABELS_25 = ( + "wall", + "floor", + "cabinet", + "bed", + "chair", + "sofa", + "table", + "door", + "window", + "picture", + "desk", + "shelves", + "curtain", + "dresser", + "pillow", + "mirror", + "ceiling", + "refrigerator", + "television", + "nightstand", + "sink", + "lamp", + "otherstructure", + "otherfurniture", + "otherprop", +) + + +def normal_from_cross_product(points_2d: np.ndarray) -> np.ndarray: + xyz_points_pad = np.pad(points_2d, ((0, 1), (0, 1), (0, 0)), mode="symmetric") + xyz_points_ver = (xyz_points_pad[:, :-1, :] - xyz_points_pad[:, 1:, :])[:-1, :, :] + xyz_points_hor = (xyz_points_pad[:-1, :, :] - xyz_points_pad[1:, :, :])[:, :-1, :] + xyz_normal = np.cross(xyz_points_hor, xyz_points_ver) + xyz_dist = np.linalg.norm(xyz_normal, axis=-1, keepdims=True) + xyz_normal = np.divide( + xyz_normal, xyz_dist, out=np.zeros_like(xyz_normal), where=xyz_dist != 0 + ) + return xyz_normal + + +class Structured3DReader: + def __init__(self, files): + super().__init__() + if isinstance(files, str): + files = [files] + self.readers = [zipfile.ZipFile(f, "r") for f in files] + self.names_mapper = dict() + for idx, reader in enumerate(self.readers): + for name in reader.namelist(): + self.names_mapper[name] = idx + + def filelist(self): + return list(self.names_mapper.keys()) + + def listdir(self, dir_name): + dir_name = dir_name.lstrip(os.path.sep).rstrip(os.path.sep) + file_list = list( + np.unique( + [ + f.replace(dir_name + os.path.sep, "", 1).split(os.path.sep)[0] + for f in self.filelist() + if f.startswith(dir_name + os.path.sep) + ] + ) + ) + if "" in file_list: + file_list.remove("") + return file_list + + def read(self, file_name): + split = self.names_mapper[file_name] + return self.readers[split].read(file_name) + + def read_camera(self, camera_path): + z2y_top_m = np.array([[0, 1, 0], [0, 0, 1], [1, 0, 0]], dtype=np.float32) + cam_extr = np.fromstring(self.read(camera_path), dtype=np.float32, sep=" ") + cam_t = np.matmul(z2y_top_m, cam_extr[:3] / 1000) + if cam_extr.shape[0] > 3: + cam_front, cam_up = cam_extr[3:6], cam_extr[6:9] + cam_n = np.cross(cam_front, cam_up) + cam_r = np.stack((cam_front, cam_up, cam_n), axis=1).astype(np.float32) + cam_r = np.matmul(z2y_top_m, cam_r) + cam_f = cam_extr[9:11] + else: + cam_r = np.eye(3, dtype=np.float32) + cam_f = None + return cam_r, cam_t, cam_f + + def read_depth(self, depth_path): + depth = cv2.imdecode( + np.frombuffer(self.read(depth_path), np.uint8), cv2.IMREAD_UNCHANGED + )[..., np.newaxis] + depth[depth == 0] = 65535 + return depth + + def read_color(self, color_path): + color = cv2.imdecode( + np.frombuffer(self.read(color_path), np.uint8), cv2.IMREAD_UNCHANGED + )[..., :3][..., ::-1] + return color + + def read_segment(self, segment_path): + segment = np.array(PIL.Image.open(io.BytesIO(self.read(segment_path))))[ + ..., np.newaxis + ] + return segment + + +def parse_scene( + scene, + dataset_root, + output_root, + ignore_index=-1, + grid_size=None, + fuse_prsp=True, + fuse_pano=True, + vis=False, +): + assert fuse_prsp or fuse_pano + reader = Structured3DReader( + [ + os.path.join(dataset_root, f) + for f in os.listdir(dataset_root) + if f.endswith(".zip") + ] + ) + scene_id = int(os.path.basename(scene).split("_")[-1]) + if scene_id < 3000: + split = "train" + elif 3000 <= scene_id < 3250: + split = "val" + else: + split = "test" + + print(f"Processing: {scene} in {split}") + rooms = reader.listdir(os.path.join("Structured3D", scene, "2D_rendering")) + for room in rooms: + room_path = os.path.join("Structured3D", scene, "2D_rendering", room) + coord_list = list() + color_list = list() + normal_list = list() + segment_list = list() + if fuse_prsp: + prsp_path = os.path.join(room_path, "perspective", "full") + frames = reader.listdir(prsp_path) + + for frame in frames: + try: + cam_r, cam_t, cam_f = reader.read_camera( + os.path.join(prsp_path, frame, "camera_pose.txt") + ) + depth = reader.read_depth( + os.path.join(prsp_path, frame, "depth.png") + ) + color = reader.read_color( + os.path.join(prsp_path, frame, "rgb_rawlight.png") + ) + segment = reader.read_segment( + os.path.join(prsp_path, frame, "semantic.png") + ) + except: + print( + f"Skipping {scene}_room{room}_frame{frame} perspective view due to loading error" + ) + else: + fx, fy = cam_f + height, width = depth.shape[0], depth.shape[1] + pixel = np.transpose(np.indices((width, height)), (2, 1, 0)) + pixel = pixel.reshape((-1, 2)) + pixel = np.hstack((pixel, np.ones((pixel.shape[0], 1)))) + k = np.diag([1.0, 1.0, 1.0]) + + k[0, 2] = width / 2 + k[1, 2] = height / 2 + + k[0, 0] = k[0, 2] / np.tan(fx) + k[1, 1] = k[1, 2] / np.tan(fy) + coord = ( + depth.reshape((-1, 1)) * (np.linalg.inv(k) @ pixel.T).T + ).reshape(height, width, 3) + coord = coord @ np.array([[0, 0, 1], [0, -1, 0], [1, 0, 0]]) + normal = normal_from_cross_product(coord) + + # Filtering invalid points + view_dist = np.maximum( + np.linalg.norm(coord, axis=-1, keepdims=True), float(10e-5) + ) + cosine_dist = np.sum( + (coord * normal / view_dist), axis=-1, keepdims=True + ) + cosine_dist = np.abs(cosine_dist) + mask = ((cosine_dist > 0.15) & (depth < 65535) & (segment > 0))[ + ..., 0 + ].reshape(-1) + + coord = np.matmul(coord / 1000, cam_r.T) + cam_t + normal = normal_from_cross_product(coord) + + if sum(mask) > 0: + coord_list.append(coord.reshape(-1, 3)[mask]) + color_list.append(color.reshape(-1, 3)[mask]) + normal_list.append(normal.reshape(-1, 3)[mask]) + segment_list.append(segment.reshape(-1, 1)[mask]) + else: + print( + f"Skipping {scene}_room{room}_frame{frame} perspective view due to all points are filtered out" + ) + + if fuse_pano: + pano_path = os.path.join(room_path, "panorama") + try: + _, cam_t, _ = reader.read_camera( + os.path.join(pano_path, "camera_xyz.txt") + ) + depth = reader.read_depth(os.path.join(pano_path, "full", "depth.png")) + color = reader.read_color( + os.path.join(pano_path, "full", "rgb_rawlight.png") + ) + segment = reader.read_segment( + os.path.join(pano_path, "full", "semantic.png") + ) + except: + print(f"Skipping {scene}_room{room} panorama view due to loading error") + else: + p_h, p_w = depth.shape[:2] + p_a = np.arange(p_w, dtype=np.float32) / p_w * 2 * np.pi - np.pi + p_b = np.arange(p_h, dtype=np.float32) / p_h * np.pi * -1 + np.pi / 2 + p_a = np.tile(p_a[None], [p_h, 1])[..., np.newaxis] + p_b = np.tile(p_b[:, None], [1, p_w])[..., np.newaxis] + p_a_sin, p_a_cos, p_b_sin, p_b_cos = ( + np.sin(p_a), + np.cos(p_a), + np.sin(p_b), + np.cos(p_b), + ) + x = depth * p_a_cos * p_b_cos + y = depth * p_b_sin + z = depth * p_a_sin * p_b_cos + coord = np.concatenate([x, y, z], axis=-1) / 1000 + normal = normal_from_cross_product(coord) + + # Filtering invalid points + view_dist = np.maximum( + np.linalg.norm(coord, axis=-1, keepdims=True), float(10e-5) + ) + cosine_dist = np.sum( + (coord * normal / view_dist), axis=-1, keepdims=True + ) + cosine_dist = np.abs(cosine_dist) + mask = ((cosine_dist > 0.15) & (depth < 65535) & (segment > 0))[ + ..., 0 + ].reshape(-1) + coord = coord + cam_t + + if sum(mask) > 0: + coord_list.append(coord.reshape(-1, 3)[mask]) + color_list.append(color.reshape(-1, 3)[mask]) + normal_list.append(normal.reshape(-1, 3)[mask]) + segment_list.append(segment.reshape(-1, 1)[mask]) + else: + print( + f"Skipping {scene}_room{room} panorama view due to all points are filtered out" + ) + + if len(coord_list) > 0: + coord = np.concatenate(coord_list, axis=0) + coord = coord @ np.array([[1, 0, 0], [0, 0, 1], [0, 1, 0]]) + color = np.concatenate(color_list, axis=0) + normal = np.concatenate(normal_list, axis=0) + normal = normal @ np.array([[1, 0, 0], [0, 0, 1], [0, 1, 0]]) + segment = np.concatenate(segment_list, axis=0) + segment25 = np.ones_like(segment, dtype=np.int64) * ignore_index + for idx, value in enumerate(VALID_CLASS_IDS_25): + mask = np.all(segment == value, axis=-1) + segment25[mask] = idx + + data_dict = dict( + coord=coord.astype(np.float32), + color=color.astype(np.uint8), + normal=normal.astype(np.float32), + segment=segment25.astype(np.int16), + ) + # Grid sampling data + if grid_size is not None: + grid_coord = np.floor(coord / grid_size).astype(int) + _, idx = np.unique(grid_coord, axis=0, return_index=True) + coord = coord[idx] + for key in data_dict.keys(): + data_dict[key] = data_dict[key][idx] + + # Save data + save_path = os.path.join( + output_root, split, os.path.basename(scene), f"room_{room}" + ) + os.makedirs(save_path, exist_ok=True) + for key in data_dict.keys(): + np.save(os.path.join(save_path, f"{key}.npy"), data_dict[key]) + + if vis: + from pointcept.utils.visualization import save_point_cloud + + os.makedirs("./vis", exist_ok=True) + save_point_cloud( + coord, color / 255, f"./vis/{scene}_room{room}_color.ply" + ) + save_point_cloud( + coord, (normal + 1) / 2, f"./vis/{scene}_room{room}_normal.ply" + ) + else: + print(f"Skipping {scene}_room{room} due to no valid points") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the ScanNet dataset containing scene folders.", + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where train/val folders will be located.", + ) + parser.add_argument( + "--num_workers", + default=mp.cpu_count(), + type=int, + help="Num workers for preprocessing.", + ) + parser.add_argument( + "--grid_size", default=None, type=float, help="Grid size for grid sampling." + ) + parser.add_argument("--ignore_index", default=-1, type=float, help="Ignore index.") + parser.add_argument( + "--fuse_prsp", action="store_true", help="Whether fuse perspective view." + ) + parser.add_argument( + "--fuse_pano", action="store_true", help="Whether fuse panorama view." + ) + config = parser.parse_args() + + reader = Structured3DReader( + [ + os.path.join(config.dataset_root, f) + for f in os.listdir(config.dataset_root) + if f.endswith(".zip") + ] + ) + + scenes_list = reader.listdir("Structured3D") + scenes_list = sorted(scenes_list) + os.makedirs(os.path.join(config.output_root, "train"), exist_ok=True) + os.makedirs(os.path.join(config.output_root, "val"), exist_ok=True) + os.makedirs(os.path.join(config.output_root, "test"), exist_ok=True) + + # Preprocess data. + print("Processing scenes...") + pool = ProcessPoolExecutor(max_workers=config.num_workers) + _ = list( + pool.map( + parse_scene, + scenes_list, + repeat(config.dataset_root), + repeat(config.output_root), + repeat(config.ignore_index), + repeat(config.grid_size), + repeat(config.fuse_prsp), + repeat(config.fuse_pano), + ) + ) + pool.shutdown() diff --git a/Pointcept/pointcept/datasets/preprocessing/waymo/3d_semseg_test_set_frames.txt b/Pointcept/pointcept/datasets/preprocessing/waymo/3d_semseg_test_set_frames.txt new file mode 100644 index 0000000000000000000000000000000000000000..e25dc615331aaff261c25bfc3e6b930fccf880ac --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/waymo/3d_semseg_test_set_frames.txt @@ -0,0 +1,2982 @@ +2830680430134047327_1720_000_1740_000,1558034229922468 +2830680430134047327_1720_000_1740_000,1558034232422787 +2830680430134047327_1720_000_1740_000,1558034222922333 +2830680430134047327_1720_000_1740_000,1558034223422411 +2830680430134047327_1720_000_1740_000,1558034232922788 +2830680430134047327_1720_000_1740_000,1558034234422988 +2830680430134047327_1720_000_1740_000,1558034220422269 +2830680430134047327_1720_000_1740_000,1558034224422390 +2830680430134047327_1720_000_1740_000,1558034230422275 +2830680430134047327_1720_000_1740_000,1558034221922367 +2830680430134047327_1720_000_1740_000,1558034231422462 +2830680430134047327_1720_000_1740_000,1558034233422761 +2830680430134047327_1720_000_1740_000,1558034220922343 +2830680430134047327_1720_000_1740_000,1558034223922363 +2830680430134047327_1720_000_1740_000,1558034221422343 +2830680430134047327_1720_000_1740_000,1558034222422363 +2830680430134047327_1720_000_1740_000,1558034233922841 +2830680430134047327_1720_000_1740_000,1558034231922698 +2830680430134047327_1720_000_1740_000,1558034219922361 +2830680430134047327_1720_000_1740_000,1558034230922233 +14586026017427828517_700_000_720_000,1557363460737615 +14586026017427828517_700_000_720_000,1557363461737535 +14586026017427828517_700_000_720_000,1557363470737492 +14586026017427828517_700_000_720_000,1557363458737857 +14586026017427828517_700_000_720_000,1557363471237472 +14586026017427828517_700_000_720_000,1557363459237861 +14586026017427828517_700_000_720_000,1557363460237653 +14586026017427828517_700_000_720_000,1557363467737151 +14586026017427828517_700_000_720_000,1557363469237474 +14586026017427828517_700_000_720_000,1557363468237103 +14586026017427828517_700_000_720_000,1557363461237576 +14586026017427828517_700_000_720_000,1557363457237421 +14586026017427828517_700_000_720_000,1557363467237306 +14586026017427828517_700_000_720_000,1557363468737287 +14586026017427828517_700_000_720_000,1557363459737792 +14586026017427828517_700_000_720_000,1557363469737664 +14586026017427828517_700_000_720_000,1557363458237773 +14586026017427828517_700_000_720_000,1557363471737431 +14586026017427828517_700_000_720_000,1557363470237558 +14586026017427828517_700_000_720_000,1557363457737578 +6079272500228273268_2480_000_2500_000,1557546171272882 +6079272500228273268_2480_000_2500_000,1557546172772631 +6079272500228273268_2480_000_2500_000,1557546159772245 +6079272500228273268_2480_000_2500_000,1557546169773016 +6079272500228273268_2480_000_2500_000,1557546162272567 +6079272500228273268_2480_000_2500_000,1557546170272998 +6079272500228273268_2480_000_2500_000,1557546161272265 +6079272500228273268_2480_000_2500_000,1557546173772741 +6079272500228273268_2480_000_2500_000,1557546170772958 +6079272500228273268_2480_000_2500_000,1557546163272476 +6079272500228273268_2480_000_2500_000,1557546161772481 +6079272500228273268_2480_000_2500_000,1557546171772762 +6079272500228273268_2480_000_2500_000,1557546163772228 +6079272500228273268_2480_000_2500_000,1557546160772102 +6079272500228273268_2480_000_2500_000,1557546162772554 +6079272500228273268_2480_000_2500_000,1557546172272635 +6079272500228273268_2480_000_2500_000,1557546159272357 +6079272500228273268_2480_000_2500_000,1557546173272639 +6079272500228273268_2480_000_2500_000,1557546169272728 +6079272500228273268_2480_000_2500_000,1557546160272024 +1936395688683397781_2580_000_2600_000,1557546260797397 +1936395688683397781_2580_000_2600_000,1557546263297538 +1936395688683397781_2580_000_2600_000,1557546273797478 +1936395688683397781_2580_000_2600_000,1557546259797381 +1936395688683397781_2580_000_2600_000,1557546261797507 +1936395688683397781_2580_000_2600_000,1557546269797705 +1936395688683397781_2580_000_2600_000,1557546271297274 +1936395688683397781_2580_000_2600_000,1557546260297363 +1936395688683397781_2580_000_2600_000,1557546273297464 +1936395688683397781_2580_000_2600_000,1557546272297368 +1936395688683397781_2580_000_2600_000,1557546261297445 +1936395688683397781_2580_000_2600_000,1557546263797578 +1936395688683397781_2580_000_2600_000,1557546270297559 +1936395688683397781_2580_000_2600_000,1557546269297756 +1936395688683397781_2580_000_2600_000,1557546270797376 +1936395688683397781_2580_000_2600_000,1557546262797541 +1936395688683397781_2580_000_2600_000,1557546259297393 +1936395688683397781_2580_000_2600_000,1557546272797434 +1936395688683397781_2580_000_2600_000,1557546262297511 +1936395688683397781_2580_000_2600_000,1557546271797317 +12537711031998520792_3080_000_3100_000,1559178584137607 +12537711031998520792_3080_000_3100_000,1559178596138055 +12537711031998520792_3080_000_3100_000,1559178598137718 +12537711031998520792_3080_000_3100_000,1559178588137379 +12537711031998520792_3080_000_3100_000,1559178588637547 +12537711031998520792_3080_000_3100_000,1559178597637619 +12537711031998520792_3080_000_3100_000,1559178587137003 +12537711031998520792_3080_000_3100_000,1559178594138804 +12537711031998520792_3080_000_3100_000,1559178584637645 +12537711031998520792_3080_000_3100_000,1559178587637113 +12537711031998520792_3080_000_3100_000,1559178598637642 +12537711031998520792_3080_000_3100_000,1559178595638405 +12537711031998520792_3080_000_3100_000,1559178594638876 +12537711031998520792_3080_000_3100_000,1559178585137535 +12537711031998520792_3080_000_3100_000,1559178586637145 +12537711031998520792_3080_000_3100_000,1559178595138665 +12537711031998520792_3080_000_3100_000,1559178585637309 +12537711031998520792_3080_000_3100_000,1559178586137201 +12537711031998520792_3080_000_3100_000,1559178597137539 +12537711031998520792_3080_000_3100_000,1559178596637690 +614453665074997770_1060_000_1080_000,1557449600160586 +614453665074997770_1060_000_1080_000,1557449609661257 +614453665074997770_1060_000_1080_000,1557449599161330 +614453665074997770_1060_000_1080_000,1557449608161401 +614453665074997770_1060_000_1080_000,1557449607661221 +614453665074997770_1060_000_1080_000,1557449598662064 +614453665074997770_1060_000_1080_000,1557449600660450 +614453665074997770_1060_000_1080_000,1557449596170831 +614453665074997770_1060_000_1080_000,1557449610161229 +614453665074997770_1060_000_1080_000,1557449607161133 +614453665074997770_1060_000_1080_000,1557449610661288 +614453665074997770_1060_000_1080_000,1557449597165803 +614453665074997770_1060_000_1080_000,1557449606161418 +614453665074997770_1060_000_1080_000,1557449599660888 +614453665074997770_1060_000_1080_000,1557449597664327 +614453665074997770_1060_000_1080_000,1557449609161403 +614453665074997770_1060_000_1080_000,1557449606661312 +614453665074997770_1060_000_1080_000,1557449596667769 +614453665074997770_1060_000_1080_000,1557449598163004 +614453665074997770_1060_000_1080_000,1557449608661487 +10488772413132920574_680_000_700_000,1557276689370724 +10488772413132920574_680_000_700_000,1557276681870683 +10488772413132920574_680_000_700_000,1557276687370626 +10488772413132920574_680_000_700_000,1557276691870689 +10488772413132920574_680_000_700_000,1557276677372053 +10488772413132920574_680_000_700_000,1557276688370712 +10488772413132920574_680_000_700_000,1557276689870718 +10488772413132920574_680_000_700_000,1557276687870742 +10488772413132920574_680_000_700_000,1557276690370698 +10488772413132920574_680_000_700_000,1557276690870678 +10488772413132920574_680_000_700_000,1557276677871875 +10488772413132920574_680_000_700_000,1557276679370967 +10488772413132920574_680_000_700_000,1557276678871230 +10488772413132920574_680_000_700_000,1557276688870677 +10488772413132920574_680_000_700_000,1557276691370685 +10488772413132920574_680_000_700_000,1557276680370783 +10488772413132920574_680_000_700_000,1557276678371569 +10488772413132920574_680_000_700_000,1557276679870868 +10488772413132920574_680_000_700_000,1557276680870747 +10488772413132920574_680_000_700_000,1557276681370633 +17174012103392027911_3500_000_3520_000,1570909461425651 +17174012103392027911_3500_000_3520_000,1570909452924866 +17174012103392027911_3500_000_3520_000,1570909453924848 +17174012103392027911_3500_000_3520_000,1570909451425527 +17174012103392027911_3500_000_3520_000,1570909450426192 +17174012103392027911_3500_000_3520_000,1570909460425382 +17174012103392027911_3500_000_3520_000,1570909449427011 +17174012103392027911_3500_000_3520_000,1570909449926640 +17174012103392027911_3500_000_3520_000,1570909459925389 +17174012103392027911_3500_000_3520_000,1570909459425389 +17174012103392027911_3500_000_3520_000,1570909461925903 +17174012103392027911_3500_000_3520_000,1570909462926099 +17174012103392027911_3500_000_3520_000,1570909452425024 +17174012103392027911_3500_000_3520_000,1570909450925798 +17174012103392027911_3500_000_3520_000,1570909463926171 +17174012103392027911_3500_000_3520_000,1570909451925298 +17174012103392027911_3500_000_3520_000,1570909463426072 +17174012103392027911_3500_000_3520_000,1570909462426098 +17174012103392027911_3500_000_3520_000,1570909453424839 +17174012103392027911_3500_000_3520_000,1570909460925464 +16062780403777359835_2580_000_2600_000,1570323463399590 +16062780403777359835_2580_000_2600_000,1570323461899536 +16062780403777359835_2580_000_2600_000,1570323456400014 +16062780403777359835_2580_000_2600_000,1570323465899811 +16062780403777359835_2580_000_2600_000,1570323452400102 +16062780403777359835_2580_000_2600_000,1570323454400277 +16062780403777359835_2580_000_2600_000,1570323454900086 +16062780403777359835_2580_000_2600_000,1570323464399655 +16062780403777359835_2580_000_2600_000,1570323452900168 +16062780403777359835_2580_000_2600_000,1570323453400256 +16062780403777359835_2580_000_2600_000,1570323462899433 +16062780403777359835_2580_000_2600_000,1570323451900017 +16062780403777359835_2580_000_2600_000,1570323466399934 +16062780403777359835_2580_000_2600_000,1570323464899679 +16062780403777359835_2580_000_2600_000,1570323455399863 +16062780403777359835_2580_000_2600_000,1570323453900317 +16062780403777359835_2580_000_2600_000,1570323462399389 +16062780403777359835_2580_000_2600_000,1570323455899851 +16062780403777359835_2580_000_2600_000,1570323465399682 +16062780403777359835_2580_000_2600_000,1570323463899688 +1376304843325714018_3420_000_3440_000,1557855926972381 +1376304843325714018_3420_000_3440_000,1557855912972456 +1376304843325714018_3420_000_3440_000,1557855925972176 +1376304843325714018_3420_000_3440_000,1557855927472462 +1376304843325714018_3420_000_3440_000,1557855917472088 +1376304843325714018_3420_000_3440_000,1557855914472215 +1376304843325714018_3420_000_3440_000,1557855923972406 +1376304843325714018_3420_000_3440_000,1557855914972144 +1376304843325714018_3420_000_3440_000,1557855915972108 +1376304843325714018_3420_000_3440_000,1557855924472251 +1376304843325714018_3420_000_3440_000,1557855926472255 +1376304843325714018_3420_000_3440_000,1557855913472347 +1376304843325714018_3420_000_3440_000,1557855923472548 +1376304843325714018_3420_000_3440_000,1557855915472102 +1376304843325714018_3420_000_3440_000,1557855922972694 +1376304843325714018_3420_000_3440_000,1557855924972252 +1376304843325714018_3420_000_3440_000,1557855916472106 +1376304843325714018_3420_000_3440_000,1557855925472198 +1376304843325714018_3420_000_3440_000,1557855913972269 +1376304843325714018_3420_000_3440_000,1557855916972142 +5648007586817904385_3220_000_3240_000,1569901291300092 +5648007586817904385_3220_000_3240_000,1569901302299589 +5648007586817904385_3220_000_3240_000,1569901290300004 +5648007586817904385_3220_000_3240_000,1569901302799659 +5648007586817904385_3220_000_3240_000,1569901301799512 +5648007586817904385_3220_000_3240_000,1569901290800085 +5648007586817904385_3220_000_3240_000,1569901293800265 +5648007586817904385_3220_000_3240_000,1569901292800206 +5648007586817904385_3220_000_3240_000,1569901300799428 +5648007586817904385_3220_000_3240_000,1569901293300205 +5648007586817904385_3220_000_3240_000,1569901294300108 +5648007586817904385_3220_000_3240_000,1569901301299422 +5648007586817904385_3220_000_3240_000,1569901303299757 +5648007586817904385_3220_000_3240_000,1569901304799913 +5648007586817904385_3220_000_3240_000,1569901303799751 +5648007586817904385_3220_000_3240_000,1569901291800157 +5648007586817904385_3220_000_3240_000,1569901304299911 +5648007586817904385_3220_000_3240_000,1569901292300126 +5648007586817904385_3220_000_3240_000,1569901300299426 +14470988792985854683_760_000_780_000,1567607330924872 +14470988792985854683_760_000_780_000,1567607341924939 +14470988792985854683_760_000_780_000,1567607339424824 +14470988792985854683_760_000_780_000,1567607339924818 +14470988792985854683_760_000_780_000,1567607332924840 +14470988792985854683_760_000_780_000,1567607329924795 +14470988792985854683_760_000_780_000,1567607332424911 +14470988792985854683_760_000_780_000,1567607340924888 +14470988792985854683_760_000_780_000,1567607330424797 +14470988792985854683_760_000_780_000,1567607342924753 +14470988792985854683_760_000_780_000,1567607340424823 +14470988792985854683_760_000_780_000,1567607342424852 +14470988792985854683_760_000_780_000,1567607331424817 +14470988792985854683_760_000_780_000,1567607329424827 +14470988792985854683_760_000_780_000,1567607338924788 +14470988792985854683_760_000_780_000,1567607338424807 +14470988792985854683_760_000_780_000,1567607331924837 +14470988792985854683_760_000_780_000,1567607341424947 +14470988792985854683_760_000_780_000,1567607328424803 +14470988792985854683_760_000_780_000,1567607328924826 +16951245307634830999_1400_000_1420_000,1568599891824038 +16951245307634830999_1400_000_1420_000,1568599901824641 +16951245307634830999_1400_000_1420_000,1568599904324788 +16951245307634830999_1400_000_1420_000,1568599903324933 +16951245307634830999_1400_000_1420_000,1568599902824777 +16951245307634830999_1400_000_1420_000,1568599904824728 +16951245307634830999_1400_000_1420_000,1568599895824670 +16951245307634830999_1400_000_1420_000,1568599891324145 +16951245307634830999_1400_000_1420_000,1568599894824764 +16951245307634830999_1400_000_1420_000,1568599893824574 +16951245307634830999_1400_000_1420_000,1568599894324795 +16951245307634830999_1400_000_1420_000,1568599905824790 +16951245307634830999_1400_000_1420_000,1568599903824893 +16951245307634830999_1400_000_1420_000,1568599902324620 +16951245307634830999_1400_000_1420_000,1568599905324768 +16951245307634830999_1400_000_1420_000,1568599893324265 +16951245307634830999_1400_000_1420_000,1568599892824082 +16951245307634830999_1400_000_1420_000,1568599895324702 +16951245307634830999_1400_000_1420_000,1568599892323941 +17835886859721116155_1860_000_1880_000,1558151678787439 +17835886859721116155_1860_000_1880_000,1558151676287513 +17835886859721116155_1860_000_1880_000,1558151670787584 +17835886859721116155_1860_000_1880_000,1558151680287177 +17835886859721116155_1860_000_1880_000,1558151670287284 +17835886859721116155_1860_000_1880_000,1558151679287439 +17835886859721116155_1860_000_1880_000,1558151679787332 +17835886859721116155_1860_000_1880_000,1558151680787183 +17835886859721116155_1860_000_1880_000,1558151668786681 +17835886859721116155_1860_000_1880_000,1558151678287466 +17835886859721116155_1860_000_1880_000,1558151667787272 +17835886859721116155_1860_000_1880_000,1558151677287479 +17835886859721116155_1860_000_1880_000,1558151669286533 +17835886859721116155_1860_000_1880_000,1558151669786756 +17835886859721116155_1860_000_1880_000,1558151676787561 +17835886859721116155_1860_000_1880_000,1558151668286995 +17835886859721116155_1860_000_1880_000,1558151666786923 +17835886859721116155_1860_000_1880_000,1558151677787410 +17835886859721116155_1860_000_1880_000,1558151667287257 +17835886859721116155_1860_000_1880_000,1558151666286432 +9145030426583202228_1060_000_1080_000,1557424274778866 +9145030426583202228_1060_000_1080_000,1557424275778987 +9145030426583202228_1060_000_1080_000,1557424266779291 +9145030426583202228_1060_000_1080_000,1557424278279219 +9145030426583202228_1060_000_1080_000,1557424276779170 +9145030426583202228_1060_000_1080_000,1557424279279575 +9145030426583202228_1060_000_1080_000,1557424268279175 +9145030426583202228_1060_000_1080_000,1557424277779106 +9145030426583202228_1060_000_1080_000,1557424266279249 +9145030426583202228_1060_000_1080_000,1557424269279152 +9145030426583202228_1060_000_1080_000,1557424268779150 +9145030426583202228_1060_000_1080_000,1557424277279133 +9145030426583202228_1060_000_1080_000,1557424275278791 +9145030426583202228_1060_000_1080_000,1557424265779130 +9145030426583202228_1060_000_1080_000,1557424264779014 +9145030426583202228_1060_000_1080_000,1557424265279048 +9145030426583202228_1060_000_1080_000,1557424267279322 +9145030426583202228_1060_000_1080_000,1557424276279143 +9145030426583202228_1060_000_1080_000,1557424278779413 +9145030426583202228_1060_000_1080_000,1557424267779293 +13781857304705519152_2740_000_2760_000,1558018015472758 +13781857304705519152_2740_000_2760_000,1558018006972289 +13781857304705519152_2740_000_2760_000,1558018007472306 +13781857304705519152_2740_000_2760_000,1558018014472458 +13781857304705519152_2740_000_2760_000,1558018017472179 +13781857304705519152_2740_000_2760_000,1558018014972710 +13781857304705519152_2740_000_2760_000,1558018008472276 +13781857304705519152_2740_000_2760_000,1558018006472299 +13781857304705519152_2740_000_2760_000,1558018004472242 +13781857304705519152_2740_000_2760_000,1558018017972558 +13781857304705519152_2740_000_2760_000,1558018004972259 +13781857304705519152_2740_000_2760_000,1558018007972307 +13781857304705519152_2740_000_2760_000,1558018013972483 +13781857304705519152_2740_000_2760_000,1558018005972338 +13781857304705519152_2740_000_2760_000,1558018016972032 +13781857304705519152_2740_000_2760_000,1558018015972514 +13781857304705519152_2740_000_2760_000,1558018005472310 +13781857304705519152_2740_000_2760_000,1558018003972238 +13781857304705519152_2740_000_2760_000,1558018018472666 +13781857304705519152_2740_000_2760_000,1558018016472185 +5154724129640787887_4840_000_4860_000,1557342396562648 +5154724129640787887_4840_000_4860_000,1557342399062810 +5154724129640787887_4840_000_4860_000,1557342395062806 +5154724129640787887_4840_000_4860_000,1557342405062520 +5154724129640787887_4840_000_4860_000,1557342399562770 +5154724129640787887_4840_000_4860_000,1557342395562652 +5154724129640787887_4840_000_4860_000,1557342406562476 +5154724129640787887_4840_000_4860_000,1557342408562474 +5154724129640787887_4840_000_4860_000,1557342406062444 +5154724129640787887_4840_000_4860_000,1557342397562592 +5154724129640787887_4840_000_4860_000,1557342407562646 +5154724129640787887_4840_000_4860_000,1557342396062602 +5154724129640787887_4840_000_4860_000,1557342409562395 +5154724129640787887_4840_000_4860_000,1557342397062617 +5154724129640787887_4840_000_4860_000,1557342409062401 +5154724129640787887_4840_000_4860_000,1557342398062702 +5154724129640787887_4840_000_4860_000,1557342407062596 +5154724129640787887_4840_000_4860_000,1557342405562490 +5154724129640787887_4840_000_4860_000,1557342408062539 +5154724129640787887_4840_000_4860_000,1557342398562701 +12892154548237137398_2820_000_2840_000,1558018087522764 +12892154548237137398_2820_000_2840_000,1558018098022390 +12892154548237137398_2820_000_2840_000,1558018088022638 +12892154548237137398_2820_000_2840_000,1558018095522691 +12892154548237137398_2820_000_2840_000,1558018087022717 +12892154548237137398_2820_000_2840_000,1558018086022213 +12892154548237137398_2820_000_2840_000,1558018086522385 +12892154548237137398_2820_000_2840_000,1558018085522203 +12892154548237137398_2820_000_2840_000,1558018094522190 +12892154548237137398_2820_000_2840_000,1558018084022848 +12892154548237137398_2820_000_2840_000,1558018085022352 +12892154548237137398_2820_000_2840_000,1558018088522537 +12892154548237137398_2820_000_2840_000,1558018084522834 +12892154548237137398_2820_000_2840_000,1558018097022451 +12892154548237137398_2820_000_2840_000,1558018097522376 +12892154548237137398_2820_000_2840_000,1558018098522395 +12892154548237137398_2820_000_2840_000,1558018096022561 +12892154548237137398_2820_000_2840_000,1558018096522494 +12892154548237137398_2820_000_2840_000,1558018094021934 +12892154548237137398_2820_000_2840_000,1558018095022568 +17262030607996041518_540_000_560_000,1558150357737631 +17262030607996041518_540_000_560_000,1558150360737468 +17262030607996041518_540_000_560_000,1558150358737355 +17262030607996041518_540_000_560_000,1558150346737340 +17262030607996041518_540_000_560_000,1558150350737099 +17262030607996041518_540_000_560_000,1558150347237353 +17262030607996041518_540_000_560_000,1558150349237231 +17262030607996041518_540_000_560_000,1558150348237167 +17262030607996041518_540_000_560_000,1558150359237305 +17262030607996041518_540_000_560_000,1558150348737035 +17262030607996041518_540_000_560_000,1558150359737335 +17262030607996041518_540_000_560_000,1558150347737351 +17262030607996041518_540_000_560_000,1558150350237481 +17262030607996041518_540_000_560_000,1558150356237309 +17262030607996041518_540_000_560_000,1558150349737529 +17262030607996041518_540_000_560_000,1558150356737414 +17262030607996041518_540_000_560_000,1558150346237488 +17262030607996041518_540_000_560_000,1558150358237512 +17262030607996041518_540_000_560_000,1558150360237386 +17262030607996041518_540_000_560_000,1558150357237609 +1735154401471216485_440_000_460_000,1566351679575063 +1735154401471216485_440_000_460_000,1566351680574951 +1735154401471216485_440_000_460_000,1566351667075023 +1735154401471216485_440_000_460_000,1566351668074924 +1735154401471216485_440_000_460_000,1566351681074884 +1735154401471216485_440_000_460_000,1566351679075007 +1735154401471216485_440_000_460_000,1566351671574819 +1735154401471216485_440_000_460_000,1566351670575041 +1735154401471216485_440_000_460_000,1566351681574847 +1735154401471216485_440_000_460_000,1566351678574927 +1735154401471216485_440_000_460_000,1566351667575012 +1735154401471216485_440_000_460_000,1566351668574986 +1735154401471216485_440_000_460_000,1566351678074851 +1735154401471216485_440_000_460_000,1566351670075165 +1735154401471216485_440_000_460_000,1566351671074932 +1735154401471216485_440_000_460_000,1566351680075032 +1735154401471216485_440_000_460_000,1566351677075266 +1735154401471216485_440_000_460_000,1566351669075103 +1735154401471216485_440_000_460_000,1566351669575114 +1735154401471216485_440_000_460_000,1566351677575057 +16721473705085324478_2580_000_2600_000,1559143954073968 +16721473705085324478_2580_000_2600_000,1559143946067629 +16721473705085324478_2580_000_2600_000,1559143948570004 +16721473705085324478_2580_000_2600_000,1559143957574188 +16721473705085324478_2580_000_2600_000,1559143945567167 +16721473705085324478_2580_000_2600_000,1559143945066818 +16721473705085324478_2580_000_2600_000,1559143947068541 +16721473705085324478_2580_000_2600_000,1559143956574149 +16721473705085324478_2580_000_2600_000,1559143958574172 +16721473705085324478_2580_000_2600_000,1559143955573951 +16721473705085324478_2580_000_2600_000,1559143957074228 +16721473705085324478_2580_000_2600_000,1559143947568997 +16721473705085324478_2580_000_2600_000,1559143944066354 +16721473705085324478_2580_000_2600_000,1559143954573995 +16721473705085324478_2580_000_2600_000,1559143946568047 +16721473705085324478_2580_000_2600_000,1559143956074028 +16721473705085324478_2580_000_2600_000,1559143948069446 +16721473705085324478_2580_000_2600_000,1559143944566550 +16721473705085324478_2580_000_2600_000,1559143955073909 +16721473705085324478_2580_000_2600_000,1559143958074171 +5046614299208670619_1760_000_1780_000,1557859931896818 +5046614299208670619_1760_000_1780_000,1557859942397098 +5046614299208670619_1760_000_1780_000,1557859941397484 +5046614299208670619_1760_000_1780_000,1557859941897278 +5046614299208670619_1760_000_1780_000,1557859939397451 +5046614299208670619_1760_000_1780_000,1557859929394856 +5046614299208670619_1760_000_1780_000,1557859938397438 +5046614299208670619_1760_000_1780_000,1557859931396740 +5046614299208670619_1760_000_1780_000,1557859940397424 +5046614299208670619_1760_000_1780_000,1557859930896546 +5046614299208670619_1760_000_1780_000,1557859939897429 +5046614299208670619_1760_000_1780_000,1557859929895843 +5046614299208670619_1760_000_1780_000,1557859928893310 +5046614299208670619_1760_000_1780_000,1557859938897438 +5046614299208670619_1760_000_1780_000,1557859940897554 +5046614299208670619_1760_000_1780_000,1557859942897115 +5046614299208670619_1760_000_1780_000,1557859932897120 +5046614299208670619_1760_000_1780_000,1557859930396386 +5046614299208670619_1760_000_1780_000,1557859928391171 +5046614299208670619_1760_000_1780_000,1557859932396854 +6259508587655502768_780_000_800_000,1557843985062519 +6259508587655502768_780_000_800_000,1557843985562521 +6259508587655502768_780_000_800_000,1557843976562766 +6259508587655502768_780_000_800_000,1557843976062713 +6259508587655502768_780_000_800_000,1557843978562284 +6259508587655502768_780_000_800_000,1557843989062285 +6259508587655502768_780_000_800_000,1557843979062370 +6259508587655502768_780_000_800_000,1557843988562341 +6259508587655502768_780_000_800_000,1557843977562120 +6259508587655502768_780_000_800_000,1557843975562542 +6259508587655502768_780_000_800_000,1557843977062493 +6259508587655502768_780_000_800_000,1557843978062117 +6259508587655502768_780_000_800_000,1557843986562332 +6259508587655502768_780_000_800_000,1557843975062365 +6259508587655502768_780_000_800_000,1557843988062465 +6259508587655502768_780_000_800_000,1557843986062494 +6259508587655502768_780_000_800_000,1557843987062399 +6259508587655502768_780_000_800_000,1557843979562469 +6259508587655502768_780_000_800_000,1557843987562501 +6259508587655502768_780_000_800_000,1557843989562412 +11436803605426256250_1720_000_1740_000,1558151527787782 +11436803605426256250_1720_000_1740_000,1558151526787865 +11436803605426256250_1720_000_1740_000,1558151528287716 +11436803605426256250_1720_000_1740_000,1558151530287466 +11436803605426256250_1720_000_1740_000,1558151537786930 +11436803605426256250_1720_000_1740_000,1558151528787637 +11436803605426256250_1720_000_1740_000,1558151538786570 +11436803605426256250_1720_000_1740_000,1558151540786822 +11436803605426256250_1720_000_1740_000,1558151530787441 +11436803605426256250_1720_000_1740_000,1558151527287885 +11436803605426256250_1720_000_1740_000,1558151539786751 +11436803605426256250_1720_000_1740_000,1558151529787489 +11436803605426256250_1720_000_1740_000,1558151539286648 +11436803605426256250_1720_000_1740_000,1558151526287909 +11436803605426256250_1720_000_1740_000,1558151536786870 +11436803605426256250_1720_000_1740_000,1558151536287214 +11436803605426256250_1720_000_1740_000,1558151529287531 +11436803605426256250_1720_000_1740_000,1558151540286973 +11436803605426256250_1720_000_1740_000,1558151538286751 +11436803605426256250_1720_000_1740_000,1558151537286653 +15410814825574326536_2620_000_2640_000,1557860798372836 +15410814825574326536_2620_000_2640_000,1557860800872838 +15410814825574326536_2620_000_2640_000,1557860790372597 +15410814825574326536_2620_000_2640_000,1557860791372832 +15410814825574326536_2620_000_2640_000,1557860799872854 +15410814825574326536_2620_000_2640_000,1557860789372743 +15410814825574326536_2620_000_2640_000,1557860791872904 +15410814825574326536_2620_000_2640_000,1557860798872877 +15410814825574326536_2620_000_2640_000,1557860788372735 +15410814825574326536_2620_000_2640_000,1557860801372803 +15410814825574326536_2620_000_2640_000,1557860802372685 +15410814825574326536_2620_000_2640_000,1557860801872720 +15410814825574326536_2620_000_2640_000,1557860802872671 +15410814825574326536_2620_000_2640_000,1557860792372830 +15410814825574326536_2620_000_2640_000,1557860790872704 +15410814825574326536_2620_000_2640_000,1557860799372902 +15410814825574326536_2620_000_2640_000,1557860792872709 +15410814825574326536_2620_000_2640_000,1557860788872750 +15410814825574326536_2620_000_2640_000,1557860800372906 +15410814825574326536_2620_000_2640_000,1557860789872662 +13585389505831587326_2560_000_2580_000,1557241472137342 +13585389505831587326_2560_000_2580_000,1557241476637531 +13585389505831587326_2560_000_2580_000,1557241484636865 +13585389505831587326_2560_000_2580_000,1557241473137454 +13585389505831587326_2560_000_2580_000,1557241476137536 +13585389505831587326_2560_000_2580_000,1557241472637386 +13585389505831587326_2560_000_2580_000,1557241485637136 +13585389505831587326_2560_000_2580_000,1557241484136968 +13585389505831587326_2560_000_2580_000,1557241485137091 +13585389505831587326_2560_000_2580_000,1557241473637451 +13585389505831587326_2560_000_2580_000,1557241482137115 +13585389505831587326_2560_000_2580_000,1557241475637469 +13585389505831587326_2560_000_2580_000,1557241483636983 +13585389505831587326_2560_000_2580_000,1557241474637506 +13585389505831587326_2560_000_2580_000,1557241483136950 +13585389505831587326_2560_000_2580_000,1557241486137285 +13585389505831587326_2560_000_2580_000,1557241474137501 +13585389505831587326_2560_000_2580_000,1557241486637439 +13585389505831587326_2560_000_2580_000,1557241475137435 +13585389505831587326_2560_000_2580_000,1557241482636985 +15739335479094705947_1420_000_1440_000,1557240344647374 +15739335479094705947_1420_000_1440_000,1557240333147825 +15739335479094705947_1420_000_1440_000,1557240332647832 +15739335479094705947_1420_000_1440_000,1557240336647687 +15739335479094705947_1420_000_1440_000,1557240345147370 +15739335479094705947_1420_000_1440_000,1557240334147846 +15739335479094705947_1420_000_1440_000,1557240335648112 +15739335479094705947_1420_000_1440_000,1557240345647376 +15739335479094705947_1420_000_1440_000,1557240332147799 +15739335479094705947_1420_000_1440_000,1557240344147429 +15739335479094705947_1420_000_1440_000,1557240342147432 +15739335479094705947_1420_000_1440_000,1557240343647467 +15739335479094705947_1420_000_1440_000,1557240346647461 +15739335479094705947_1420_000_1440_000,1557240343147461 +15739335479094705947_1420_000_1440_000,1557240333647840 +15739335479094705947_1420_000_1440_000,1557240335147955 +15739335479094705947_1420_000_1440_000,1557240342647438 +15739335479094705947_1420_000_1440_000,1557240334647920 +15739335479094705947_1420_000_1440_000,1557240346147451 +15739335479094705947_1420_000_1440_000,1557240336147836 +16743182245734335352_1260_000_1280_000,1557888790949495 +16743182245734335352_1260_000_1280_000,1557888787449383 +16743182245734335352_1260_000_1280_000,1557888788948833 +16743182245734335352_1260_000_1280_000,1557888786949263 +16743182245734335352_1260_000_1280_000,1557888776449903 +16743182245734335352_1260_000_1280_000,1557888780449779 +16743182245734335352_1260_000_1280_000,1557888786448960 +16743182245734335352_1260_000_1280_000,1557888777950853 +16743182245734335352_1260_000_1280_000,1557888789448778 +16743182245734335352_1260_000_1280_000,1557888790449312 +16743182245734335352_1260_000_1280_000,1557888779950298 +16743182245734335352_1260_000_1280_000,1557888778451116 +16743182245734335352_1260_000_1280_000,1557888788449105 +16743182245734335352_1260_000_1280_000,1557888779450837 +16743182245734335352_1260_000_1280_000,1557888776950096 +16743182245734335352_1260_000_1280_000,1557888789949015 +16743182245734335352_1260_000_1280_000,1557888787949303 +16743182245734335352_1260_000_1280_000,1557888778951257 +16743182245734335352_1260_000_1280_000,1557888780949350 +16743182245734335352_1260_000_1280_000,1557888777450467 +4037952268810331899_2420_000_2440_000,1567028476924185 +4037952268810331899_2420_000_2440_000,1567028464925058 +4037952268810331899_2420_000_2440_000,1567028466425018 +4037952268810331899_2420_000_2440_000,1567028477924371 +4037952268810331899_2420_000_2440_000,1567028475423773 +4037952268810331899_2420_000_2440_000,1567028475923773 +4037952268810331899_2420_000_2440_000,1567028478424492 +4037952268810331899_2420_000_2440_000,1567028468424910 +4037952268810331899_2420_000_2440_000,1567028466924954 +4037952268810331899_2420_000_2440_000,1567028477424335 +4037952268810331899_2420_000_2440_000,1567028465925047 +4037952268810331899_2420_000_2440_000,1567028476424000 +4037952268810331899_2420_000_2440_000,1567028474424271 +4037952268810331899_2420_000_2440_000,1567028467924880 +4037952268810331899_2420_000_2440_000,1567028478924633 +4037952268810331899_2420_000_2440_000,1567028467424848 +4037952268810331899_2420_000_2440_000,1567028465425099 +4037952268810331899_2420_000_2440_000,1567028464424994 +4037952268810331899_2420_000_2440_000,1567028468924846 +4037952268810331899_2420_000_2440_000,1567028474924011 +17052666463197337241_4560_000_4580_000,1558019835965165 +17052666463197337241_4560_000_4580_000,1558019834964122 +17052666463197337241_4560_000_4580_000,1558019826962706 +17052666463197337241_4560_000_4580_000,1558019837466540 +17052666463197337241_4560_000_4580_000,1558019823962469 +17052666463197337241_4560_000_4580_000,1558019826462862 +17052666463197337241_4560_000_4580_000,1558019834463718 +17052666463197337241_4560_000_4580_000,1558019827962424 +17052666463197337241_4560_000_4580_000,1558019836465729 +17052666463197337241_4560_000_4580_000,1558019827462613 +17052666463197337241_4560_000_4580_000,1558019833963377 +17052666463197337241_4560_000_4580_000,1558019824462615 +17052666463197337241_4560_000_4580_000,1558019836966268 +17052666463197337241_4560_000_4580_000,1558019835464590 +17052666463197337241_4560_000_4580_000,1558019828462295 +17052666463197337241_4560_000_4580_000,1558019825962899 +17052666463197337241_4560_000_4580_000,1558019824962730 +17052666463197337241_4560_000_4580_000,1558019837966298 +17052666463197337241_4560_000_4580_000,1558019825462832 +17052666463197337241_4560_000_4580_000,1558019838465664 +8197312656120253218_3120_000_3140_000,1569346275474782 +8197312656120253218_3120_000_3140_000,1569346279974791 +8197312656120253218_3120_000_3140_000,1569346268974889 +8197312656120253218_3120_000_3140_000,1569346266474964 +8197312656120253218_3120_000_3140_000,1569346267974935 +8197312656120253218_3120_000_3140_000,1569346269974854 +8197312656120253218_3120_000_3140_000,1569346268474908 +8197312656120253218_3120_000_3140_000,1569346266975023 +8197312656120253218_3120_000_3140_000,1569346265475116 +8197312656120253218_3120_000_3140_000,1569346267475024 +8197312656120253218_3120_000_3140_000,1569346276974820 +8197312656120253218_3120_000_3140_000,1569346275974860 +8197312656120253218_3120_000_3140_000,1569346276474878 +8197312656120253218_3120_000_3140_000,1569346279474792 +8197312656120253218_3120_000_3140_000,1569346269474905 +8197312656120253218_3120_000_3140_000,1569346278974783 +8197312656120253218_3120_000_3140_000,1569346265975042 +8197312656120253218_3120_000_3140_000,1569346277974754 +8197312656120253218_3120_000_3140_000,1569346278474771 +8197312656120253218_3120_000_3140_000,1569346277474745 +7844300897851889216_500_000_520_000,1569180269849584 +7844300897851889216_500_000_520_000,1569180283349326 +7844300897851889216_500_000_520_000,1569180270349514 +7844300897851889216_500_000_520_000,1569180281349367 +7844300897851889216_500_000_520_000,1569180273349112 +7844300897851889216_500_000_520_000,1569180280349315 +7844300897851889216_500_000_520_000,1569180280849273 +7844300897851889216_500_000_520_000,1569180283849207 +7844300897851889216_500_000_520_000,1569180272849305 +7844300897851889216_500_000_520_000,1569180270849484 +7844300897851889216_500_000_520_000,1569180282849497 +7844300897851889216_500_000_520_000,1569180271349596 +7844300897851889216_500_000_520_000,1569180271849879 +7844300897851889216_500_000_520_000,1569180284349457 +7844300897851889216_500_000_520_000,1569180282349589 +7844300897851889216_500_000_520_000,1569180281849491 +7844300897851889216_500_000_520_000,1569180272349632 +7844300897851889216_500_000_520_000,1569180274349414 +7844300897851889216_500_000_520_000,1569180279849307 +7844300897851889216_500_000_520_000,1569180273849225 +14918167237855418464_1420_000_1440_000,1557265451487590 +14918167237855418464_1420_000_1440_000,1557265453487513 +14918167237855418464_1420_000_1440_000,1557265440987220 +14918167237855418464_1420_000_1440_000,1557265452987516 +14918167237855418464_1420_000_1440_000,1557265441487272 +14918167237855418464_1420_000_1440_000,1557265449987389 +14918167237855418464_1420_000_1440_000,1557265450487458 +14918167237855418464_1420_000_1440_000,1557265450987504 +14918167237855418464_1420_000_1440_000,1557265440487216 +14918167237855418464_1420_000_1440_000,1557265452487693 +14918167237855418464_1420_000_1440_000,1557265443487465 +14918167237855418464_1420_000_1440_000,1557265451987681 +14918167237855418464_1420_000_1440_000,1557265453987788 +14918167237855418464_1420_000_1440_000,1557265449487404 +14918167237855418464_1420_000_1440_000,1557265442487348 +14918167237855418464_1420_000_1440_000,1557265439487550 +14918167237855418464_1420_000_1440_000,1557265441987298 +14918167237855418464_1420_000_1440_000,1557265439987371 +14918167237855418464_1420_000_1440_000,1557265443987430 +14918167237855418464_1420_000_1440_000,1557265442987426 +1765211916310163252_4400_000_4420_000,1557548091247400 +1765211916310163252_4400_000_4420_000,1557548092247422 +1765211916310163252_4400_000_4420_000,1557548082747340 +1765211916310163252_4400_000_4420_000,1557548080247436 +1765211916310163252_4400_000_4420_000,1557548081747442 +1765211916310163252_4400_000_4420_000,1557548079747433 +1765211916310163252_4400_000_4420_000,1557548093747379 +1765211916310163252_4400_000_4420_000,1557548079247435 +1765211916310163252_4400_000_4420_000,1557548089247264 +1765211916310163252_4400_000_4420_000,1557548092747360 +1765211916310163252_4400_000_4420_000,1557548093247395 +1765211916310163252_4400_000_4420_000,1557548090747296 +1765211916310163252_4400_000_4420_000,1557548083747413 +1765211916310163252_4400_000_4420_000,1557548091747409 +1765211916310163252_4400_000_4420_000,1557548080747512 +1765211916310163252_4400_000_4420_000,1557548090247209 +1765211916310163252_4400_000_4420_000,1557548089747220 +1765211916310163252_4400_000_4420_000,1557548082247344 +1765211916310163252_4400_000_4420_000,1557548081247513 +1765211916310163252_4400_000_4420_000,1557548083247412 +365416647046203224_1080_000_1100_000,1557424297779078 +365416647046203224_1080_000_1100_000,1557424298279187 +365416647046203224_1080_000_1100_000,1557424284779145 +365416647046203224_1080_000_1100_000,1557424299279496 +365416647046203224_1080_000_1100_000,1557424285779375 +365416647046203224_1080_000_1100_000,1557424286279493 +365416647046203224_1080_000_1100_000,1557424288279208 +365416647046203224_1080_000_1100_000,1557424289279220 +365416647046203224_1080_000_1100_000,1557424286779477 +365416647046203224_1080_000_1100_000,1557424294779296 +365416647046203224_1080_000_1100_000,1557424297279126 +365416647046203224_1080_000_1100_000,1557424288779176 +365416647046203224_1080_000_1100_000,1557424287779352 +365416647046203224_1080_000_1100_000,1557424296779274 +365416647046203224_1080_000_1100_000,1557424298779408 +365416647046203224_1080_000_1100_000,1557424295779354 +365416647046203224_1080_000_1100_000,1557424295279343 +365416647046203224_1080_000_1100_000,1557424287279453 +365416647046203224_1080_000_1100_000,1557424285279259 +365416647046203224_1080_000_1100_000,1557424296279315 +3122599254941105215_2980_000_3000_000,1557267013486064 +3122599254941105215_2980_000_3000_000,1557266999471976 +3122599254941105215_2980_000_3000_000,1557267003971991 +3122599254941105215_2980_000_3000_000,1557267002972068 +3122599254941105215_2980_000_3000_000,1557267011978743 +3122599254941105215_2980_000_3000_000,1557267010473667 +3122599254941105215_2980_000_3000_000,1557267001472099 +3122599254941105215_2980_000_3000_000,1557267009973013 +3122599254941105215_2980_000_3000_000,1557267001972106 +3122599254941105215_2980_000_3000_000,1557267009472852 +3122599254941105215_2980_000_3000_000,1557267013987647 +3122599254941105215_2980_000_3000_000,1557267000972170 +3122599254941105215_2980_000_3000_000,1557267011476593 +3122599254941105215_2980_000_3000_000,1557267012983667 +3122599254941105215_2980_000_3000_000,1557266999972086 +3122599254941105215_2980_000_3000_000,1557267012481088 +3122599254941105215_2980_000_3000_000,1557267010974840 +3122599254941105215_2980_000_3000_000,1557267000472146 +3122599254941105215_2980_000_3000_000,1557267002472069 +3122599254941105215_2980_000_3000_000,1557267003472050 +11672844176539348333_4440_000_4460_000,1557548130247264 +11672844176539348333_4440_000_4460_000,1557548119247298 +11672844176539348333_4440_000_4460_000,1557548120747400 +11672844176539348333_4440_000_4460_000,1557548129247403 +11672844176539348333_4440_000_4460_000,1557548121747436 +11672844176539348333_4440_000_4460_000,1557548131747575 +11672844176539348333_4440_000_4460_000,1557548122747361 +11672844176539348333_4440_000_4460_000,1557548132247553 +11672844176539348333_4440_000_4460_000,1557548129747331 +11672844176539348333_4440_000_4460_000,1557548119747262 +11672844176539348333_4440_000_4460_000,1557548121247414 +11672844176539348333_4440_000_4460_000,1557548133747542 +11672844176539348333_4440_000_4460_000,1557548131247534 +11672844176539348333_4440_000_4460_000,1557548122247407 +11672844176539348333_4440_000_4460_000,1557548120247254 +11672844176539348333_4440_000_4460_000,1557548132747504 +11672844176539348333_4440_000_4460_000,1557548123247374 +11672844176539348333_4440_000_4460_000,1557548133247537 +11672844176539348333_4440_000_4460_000,1557548130747376 +11672844176539348333_4440_000_4460_000,1557548123747487 +17212025549630306883_2500_000_2520_000,1558035014396914 +17212025549630306883_2500_000_2520_000,1558035010397071 +17212025549630306883_2500_000_2520_000,1558035000879571 +17212025549630306883_2500_000_2520_000,1558035010897075 +17212025549630306883_2500_000_2520_000,1558035003389800 +17212025549630306883_2500_000_2520_000,1558034999877494 +17212025549630306883_2500_000_2520_000,1558035001883076 +17212025549630306883_2500_000_2520_000,1558035013896904 +17212025549630306883_2500_000_2520_000,1558035002385104 +17212025549630306883_2500_000_2520_000,1558035013397429 +17212025549630306883_2500_000_2520_000,1558035012398066 +17212025549630306883_2500_000_2520_000,1558035009897309 +17212025549630306883_2500_000_2520_000,1558035011397333 +17212025549630306883_2500_000_2520_000,1558035003892217 +17212025549630306883_2500_000_2520_000,1558035002887308 +17212025549630306883_2500_000_2520_000,1558035004394149 +17212025549630306883_2500_000_2520_000,1558035001381105 +17212025549630306883_2500_000_2520_000,1558035012897961 +17212025549630306883_2500_000_2520_000,1558035011897875 +17212025549630306883_2500_000_2520_000,1558035000378455 +5444585006397501511_160_000_180_000,1557843369612501 +5444585006397501511_160_000_180_000,1557843356612649 +5444585006397501511_160_000_180_000,1557843357612587 +5444585006397501511_160_000_180_000,1557843366112688 +5444585006397501511_160_000_180_000,1557843369112577 +5444585006397501511_160_000_180_000,1557843356112502 +5444585006397501511_160_000_180_000,1557843357112699 +5444585006397501511_160_000_180_000,1557843359112424 +5444585006397501511_160_000_180_000,1557843368612608 +5444585006397501511_160_000_180_000,1557843358612418 +5444585006397501511_160_000_180_000,1557843359612545 +5444585006397501511_160_000_180_000,1557843365112636 +5444585006397501511_160_000_180_000,1557843365612657 +5444585006397501511_160_000_180_000,1557843367112626 +5444585006397501511_160_000_180_000,1557843366612681 +5444585006397501511_160_000_180_000,1557843367612623 +5444585006397501511_160_000_180_000,1557843358112458 +5444585006397501511_160_000_180_000,1557843355112397 +5444585006397501511_160_000_180_000,1557843355612457 +5444585006397501511_160_000_180_000,1557843368112622 +17595457728136868510_860_000_880_000,1568570142949954 +17595457728136868510_860_000_880_000,1568570144449980 +17595457728136868510_860_000_880_000,1568570133450011 +17595457728136868510_860_000_880_000,1568570132449985 +17595457728136868510_860_000_880_000,1568570146949999 +17595457728136868510_860_000_880_000,1568570145450102 +17595457728136868510_860_000_880_000,1568570136950003 +17595457728136868510_860_000_880_000,1568570146449992 +17595457728136868510_860_000_880_000,1568570145950029 +17595457728136868510_860_000_880_000,1568570134450024 +17595457728136868510_860_000_880_000,1568570135449980 +17595457728136868510_860_000_880_000,1568570133950026 +17595457728136868510_860_000_880_000,1568570143449845 +17595457728136868510_860_000_880_000,1568570143949863 +17595457728136868510_860_000_880_000,1568570144950031 +17595457728136868510_860_000_880_000,1568570132950020 +17595457728136868510_860_000_880_000,1568570142449990 +17595457728136868510_860_000_880_000,1568570135950008 +17595457728136868510_860_000_880_000,1568570134950042 +17595457728136868510_860_000_880_000,1568570136450044 +10534368980139017457_4480_000_4500_000,1557548163747266 +10534368980139017457_4480_000_4500_000,1557548173324042 +10534368980139017457_4480_000_4500_000,1557548171335072 +10534368980139017457_4480_000_4500_000,1557548171831184 +10534368980139017457_4480_000_4500_000,1557548172327947 +10534368980139017457_4480_000_4500_000,1557548160747474 +10534368980139017457_4480_000_4500_000,1557548159747321 +10534368980139017457_4480_000_4500_000,1557548170342486 +10534368980139017457_4480_000_4500_000,1557548169845348 +10534368980139017457_4480_000_4500_000,1557548170838932 +10534368980139017457_4480_000_4500_000,1557548162247217 +10534368980139017457_4480_000_4500_000,1557548169346776 +10534368980139017457_4480_000_4500_000,1557548173822919 +10534368980139017457_4480_000_4500_000,1557548162747305 +10534368980139017457_4480_000_4500_000,1557548160247434 +10534368980139017457_4480_000_4500_000,1557548163247304 +10534368980139017457_4480_000_4500_000,1557548159247329 +10534368980139017457_4480_000_4500_000,1557548161247379 +10534368980139017457_4480_000_4500_000,1557548161747254 +10534368980139017457_4480_000_4500_000,1557548172825501 +4593468568253300598_1620_000_1640_000,1558034119947167 +4593468568253300598_1620_000_1640_000,1558034131947521 +4593468568253300598_1620_000_1640_000,1558034130447767 +4593468568253300598_1620_000_1640_000,1558034123947147 +4593468568253300598_1620_000_1640_000,1558034123447155 +4593468568253300598_1620_000_1640_000,1558034131447564 +4593468568253300598_1620_000_1640_000,1558034132447509 +4593468568253300598_1620_000_1640_000,1558034133947605 +4593468568253300598_1620_000_1640_000,1558034130947609 +4593468568253300598_1620_000_1640_000,1558034120947198 +4593468568253300598_1620_000_1640_000,1558034129947874 +4593468568253300598_1620_000_1640_000,1558034121947243 +4593468568253300598_1620_000_1640_000,1558034134447535 +4593468568253300598_1620_000_1640_000,1558034122447204 +4593468568253300598_1620_000_1640_000,1558034120447070 +4593468568253300598_1620_000_1640_000,1558034132947552 +4593468568253300598_1620_000_1640_000,1558034121447241 +4593468568253300598_1620_000_1640_000,1558034124447344 +4593468568253300598_1620_000_1640_000,1558034122947127 +4593468568253300598_1620_000_1640_000,1558034133447706 +5810494922060252082_3720_000_3740_000,1557324791637281 +5810494922060252082_3720_000_3740_000,1557324799137989 +5810494922060252082_3720_000_3740_000,1557324792137386 +5810494922060252082_3720_000_3740_000,1557324793137531 +5810494922060252082_3720_000_3740_000,1557324802137484 +5810494922060252082_3720_000_3740_000,1557324802637768 +5810494922060252082_3720_000_3740_000,1557324793637492 +5810494922060252082_3720_000_3740_000,1557324789137692 +5810494922060252082_3720_000_3740_000,1557324803137953 +5810494922060252082_3720_000_3740_000,1557324800137837 +5810494922060252082_3720_000_3740_000,1557324789637648 +5810494922060252082_3720_000_3740_000,1557324800637433 +5810494922060252082_3720_000_3740_000,1557324792637516 +5810494922060252082_3720_000_3740_000,1557324803638106 +5810494922060252082_3720_000_3740_000,1557324791137526 +5810494922060252082_3720_000_3740_000,1557324790637757 +5810494922060252082_3720_000_3740_000,1557324801637223 +5810494922060252082_3720_000_3740_000,1557324801137028 +5810494922060252082_3720_000_3740_000,1557324799638056 +5810494922060252082_3720_000_3740_000,1557324790137645 +2942662230423855469_880_000_900_000,1559348363724177 +2942662230423855469_880_000_900_000,1559348365724257 +2942662230423855469_880_000_900_000,1559348367224483 +2942662230423855469_880_000_900_000,1559348356718454 +2942662230423855469_880_000_900_000,1559348367724406 +2942662230423855469_880_000_900_000,1559348355717289 +2942662230423855469_880_000_900_000,1559348357719518 +2942662230423855469_880_000_900_000,1559348357218926 +2942662230423855469_880_000_900_000,1559348363224053 +2942662230423855469_880_000_900_000,1559348365224224 +2942662230423855469_880_000_900_000,1559348364724292 +2942662230423855469_880_000_900_000,1559348353716247 +2942662230423855469_880_000_900_000,1559348354716626 +2942662230423855469_880_000_900_000,1559348366224290 +2942662230423855469_880_000_900_000,1559348366724409 +2942662230423855469_880_000_900_000,1559348353216332 +2942662230423855469_880_000_900_000,1559348355216840 +2942662230423855469_880_000_900_000,1559348364224254 +2942662230423855469_880_000_900_000,1559348356217995 +2942662230423855469_880_000_900_000,1559348354216438 +5927928428387529213_1640_000_1660_000,1557240564663983 +5927928428387529213_1640_000_1660_000,1557240563163984 +5927928428387529213_1640_000_1660_000,1557240553162670 +5927928428387529213_1640_000_1660_000,1557240566163388 +5927928428387529213_1640_000_1660_000,1557240556162436 +5927928428387529213_1640_000_1660_000,1557240554162851 +5927928428387529213_1640_000_1660_000,1557240552662244 +5927928428387529213_1640_000_1660_000,1557240555162405 +5927928428387529213_1640_000_1660_000,1557240564164016 +5927928428387529213_1640_000_1660_000,1557240552162020 +5927928428387529213_1640_000_1660_000,1557240554662508 +5927928428387529213_1640_000_1660_000,1557240562163098 +5927928428387529213_1640_000_1660_000,1557240566663035 +5927928428387529213_1640_000_1660_000,1557240555662402 +5927928428387529213_1640_000_1660_000,1557240565663746 +5927928428387529213_1640_000_1660_000,1557240562663614 +5927928428387529213_1640_000_1660_000,1557240563664057 +5927928428387529213_1640_000_1660_000,1557240556662471 +5927928428387529213_1640_000_1660_000,1557240553662931 +5927928428387529213_1640_000_1660_000,1557240565163970 +3645211352574995740_3540_000_3560_000,1558018817992298 +3645211352574995740_3540_000_3560_000,1558018804471765 +3645211352574995740_3540_000_3560_000,1558018808472120 +3645211352574995740_3540_000_3560_000,1558018807972072 +3645211352574995740_3540_000_3560_000,1558018815475883 +3645211352574995740_3540_000_3560_000,1558018804971761 +3645211352574995740_3540_000_3560_000,1558018816984976 +3645211352574995740_3540_000_3560_000,1558018815978327 +3645211352574995740_3540_000_3560_000,1558018816481398 +3645211352574995740_3540_000_3560_000,1558018818494946 +3645211352574995740_3540_000_3560_000,1558018817488679 +3645211352574995740_3540_000_3560_000,1558018805471754 +3645211352574995740_3540_000_3560_000,1558018806471940 +3645211352574995740_3540_000_3560_000,1558018807472066 +3645211352574995740_3540_000_3560_000,1558018805971789 +3645211352574995740_3540_000_3560_000,1558018806972056 +3645211352574995740_3540_000_3560_000,1558018814473516 +3645211352574995740_3540_000_3560_000,1558018813973212 +3645211352574995740_3540_000_3560_000,1558018814974307 +3645211352574995740_3540_000_3560_000,1558018803972077 +3510690431623954420_7700_000_7720_000,1567022016599663 +3510690431623954420_7700_000_7720_000,1567022018599669 +3510690431623954420_7700_000_7720_000,1567022028099832 +3510690431623954420_7700_000_7720_000,1567022017099671 +3510690431623954420_7700_000_7720_000,1567022021099696 +3510690431623954420_7700_000_7720_000,1567022019599567 +3510690431623954420_7700_000_7720_000,1567022020599579 +3510690431623954420_7700_000_7720_000,1567022029600020 +3510690431623954420_7700_000_7720_000,1567022017599659 +3510690431623954420_7700_000_7720_000,1567022026599976 +3510690431623954420_7700_000_7720_000,1567022030099989 +3510690431623954420_7700_000_7720_000,1567022028599676 +3510690431623954420_7700_000_7720_000,1567022019099605 +3510690431623954420_7700_000_7720_000,1567022018099661 +3510690431623954420_7700_000_7720_000,1567022030599800 +3510690431623954420_7700_000_7720_000,1567022027599919 +3510690431623954420_7700_000_7720_000,1567022029099795 +3510690431623954420_7700_000_7720_000,1567022020099574 +3510690431623954420_7700_000_7720_000,1567022027099913 +3510690431623954420_7700_000_7720_000,1567022031099758 +39847154216997509_6440_000_6460_000,1568954824924191 +39847154216997509_6440_000_6460_000,1568954813424574 +39847154216997509_6440_000_6460_000,1568954813924259 +39847154216997509_6440_000_6460_000,1568954811424618 +39847154216997509_6440_000_6460_000,1568954822924591 +39847154216997509_6440_000_6460_000,1568954812924890 +39847154216997509_6440_000_6460_000,1568954820924315 +39847154216997509_6440_000_6460_000,1568954810424785 +39847154216997509_6440_000_6460_000,1568954811924639 +39847154216997509_6440_000_6460_000,1568954810924694 +39847154216997509_6440_000_6460_000,1568954814924374 +39847154216997509_6440_000_6460_000,1568954823424463 +39847154216997509_6440_000_6460_000,1568954824424244 +39847154216997509_6440_000_6460_000,1568954814424260 +39847154216997509_6440_000_6460_000,1568954821924250 +39847154216997509_6440_000_6460_000,1568954821424322 +39847154216997509_6440_000_6460_000,1568954820424237 +39847154216997509_6440_000_6460_000,1568954823924349 +39847154216997509_6440_000_6460_000,1568954812424884 +39847154216997509_6440_000_6460_000,1568954822424440 +8623236016759087157_3500_000_3520_000,1557324582561015 +8623236016759087157_3500_000_3520_000,1557324569137429 +8623236016759087157_3500_000_3520_000,1557324582058259 +8623236016759087157_3500_000_3520_000,1557324583062491 +8623236016759087157_3500_000_3520_000,1557324573638623 +8623236016759087157_3500_000_3520_000,1557324572138081 +8623236016759087157_3500_000_3520_000,1557324583562892 +8623236016759087157_3500_000_3520_000,1557324570637898 +8623236016759087157_3500_000_3520_000,1557324581052320 +8623236016759087157_3500_000_3520_000,1557324571638078 +8623236016759087157_3500_000_3520_000,1557324570137931 +8623236016759087157_3500_000_3520_000,1557324580549644 +8623236016759087157_3500_000_3520_000,1557324571138013 +8623236016759087157_3500_000_3520_000,1557324579042078 +8623236016759087157_3500_000_3520_000,1557324581555154 +8623236016759087157_3500_000_3520_000,1557324572638279 +8623236016759087157_3500_000_3520_000,1557324579544532 +8623236016759087157_3500_000_3520_000,1557324580047183 +8623236016759087157_3500_000_3520_000,1557324569637588 +8623236016759087157_3500_000_3520_000,1557324573138464 +8920841445900141920_1700_000_1720_000,1557859882950427 +8920841445900141920_1700_000_1720_000,1557859870947785 +8920841445900141920_1700_000_1720_000,1557859868947629 +8920841445900141920_1700_000_1720_000,1557859882449903 +8920841445900141920_1700_000_1720_000,1557859878447989 +8920841445900141920_1700_000_1720_000,1557859872447754 +8920841445900141920_1700_000_1720_000,1557859879448016 +8920841445900141920_1700_000_1720_000,1557859879948093 +8920841445900141920_1700_000_1720_000,1557859869447788 +8920841445900141920_1700_000_1720_000,1557859881448912 +8920841445900141920_1700_000_1720_000,1557859870447773 +8920841445900141920_1700_000_1720_000,1557859880448231 +8920841445900141920_1700_000_1720_000,1557859878947993 +8920841445900141920_1700_000_1720_000,1557859880948478 +8920841445900141920_1700_000_1720_000,1557859869947772 +8920841445900141920_1700_000_1720_000,1557859881949366 +8920841445900141920_1700_000_1720_000,1557859872947901 +8920841445900141920_1700_000_1720_000,1557859871947716 +8920841445900141920_1700_000_1720_000,1557859871447819 +8920841445900141920_1700_000_1720_000,1557859868447474 +1417898473608326362_2560_000_2580_000,1557546242797474 +1417898473608326362_2560_000_2580_000,1557546239797468 +1417898473608326362_2560_000_2580_000,1557546241797368 +1417898473608326362_2560_000_2580_000,1557546252797649 +1417898473608326362_2560_000_2580_000,1557546252297680 +1417898473608326362_2560_000_2580_000,1557546239297163 +1417898473608326362_2560_000_2580_000,1557546253797788 +1417898473608326362_2560_000_2580_000,1557546249297427 +1417898473608326362_2560_000_2580_000,1557546242297446 +1417898473608326362_2560_000_2580_000,1557546251297740 +1417898473608326362_2560_000_2580_000,1557546240297658 +1417898473608326362_2560_000_2580_000,1557546240797643 +1417898473608326362_2560_000_2580_000,1557546250297550 +1417898473608326362_2560_000_2580_000,1557546249797555 +1417898473608326362_2560_000_2580_000,1557546251797725 +1417898473608326362_2560_000_2580_000,1557546250797666 +1417898473608326362_2560_000_2580_000,1557546253297756 +1417898473608326362_2560_000_2580_000,1557546243797028 +1417898473608326362_2560_000_2580_000,1557546243297291 +1417898473608326362_2560_000_2580_000,1557546241297483 +9584760613582366524_1620_000_1640_000,1557879417399208 +9584760613582366524_1620_000_1640_000,1557879416899412 +9584760613582366524_1620_000_1640_000,1557879428399102 +9584760613582366524_1620_000_1640_000,1557879420399302 +9584760613582366524_1620_000_1640_000,1557879427399045 +9584760613582366524_1620_000_1640_000,1557879420899353 +9584760613582366524_1620_000_1640_000,1557879426899061 +9584760613582366524_1620_000_1640_000,1557879418899485 +9584760613582366524_1620_000_1640_000,1557879418399553 +9584760613582366524_1620_000_1640_000,1557879429898992 +9584760613582366524_1620_000_1640_000,1557879428899097 +9584760613582366524_1620_000_1640_000,1557879430898987 +9584760613582366524_1620_000_1640_000,1557879429399097 +9584760613582366524_1620_000_1640_000,1557879421399451 +9584760613582366524_1620_000_1640_000,1557879431398990 +9584760613582366524_1620_000_1640_000,1557879419899335 +9584760613582366524_1620_000_1640_000,1557879419399372 +9584760613582366524_1620_000_1640_000,1557879430398927 +9584760613582366524_1620_000_1640_000,1557879417899434 +9584760613582366524_1620_000_1640_000,1557879427899058 +6503078254504013503_3440_000_3460_000,1557855947547440 +6503078254504013503_3440_000_3460_000,1557855934472627 +6503078254504013503_3440_000_3460_000,1557855932972711 +6503078254504013503_3440_000_3460_000,1557855934972072 +6503078254504013503_3440_000_3460_000,1557855946547513 +6503078254504013503_3440_000_3460_000,1557855933972741 +6503078254504013503_3440_000_3460_000,1557855945047402 +6503078254504013503_3440_000_3460_000,1557855936962356 +6503078254504013503_3440_000_3460_000,1557855945547411 +6503078254504013503_3440_000_3460_000,1557855947047525 +6503078254504013503_3440_000_3460_000,1557855944547167 +6503078254504013503_3440_000_3460_000,1557855944046932 +6503078254504013503_3440_000_3460_000,1557855937459144 +6503078254504013503_3440_000_3460_000,1557855933472775 +6503078254504013503_3440_000_3460_000,1557855946047387 +6503078254504013503_3440_000_3460_000,1557855935470483 +6503078254504013503_3440_000_3460_000,1557855943047114 +6503078254504013503_3440_000_3460_000,1557855935968223 +6503078254504013503_3440_000_3460_000,1557855943547034 +6503078254504013503_3440_000_3460_000,1557855936465449 +11867874114645674271_600_000_620_000,1556074736854433 +11867874114645674271_600_000_620_000,1556074726349701 +11867874114645674271_600_000_620_000,1556074735851657 +11867874114645674271_600_000_620_000,1556074725849692 +11867874114645674271_600_000_620_000,1556074737859199 +11867874114645674271_600_000_620_000,1556074738362254 +11867874114645674271_600_000_620_000,1556074727849804 +11867874114645674271_600_000_620_000,1556074733850341 +11867874114645674271_600_000_620_000,1556074724350381 +11867874114645674271_600_000_620_000,1556074735350931 +11867874114645674271_600_000_620_000,1556074728349730 +11867874114645674271_600_000_620_000,1556074724849999 +11867874114645674271_600_000_620_000,1556074725349782 +11867874114645674271_600_000_620_000,1556074726849817 +11867874114645674271_600_000_620_000,1556074727349951 +11867874114645674271_600_000_620_000,1556074723850636 +11867874114645674271_600_000_620_000,1556074736352936 +11867874114645674271_600_000_620_000,1556074737356543 +11867874114645674271_600_000_620_000,1556074734850605 +2374138435300423201_2600_000_2620_000,1557546281297269 +2374138435300423201_2600_000_2620_000,1557546290797384 +2374138435300423201_2600_000_2620_000,1557546279797210 +2374138435300423201_2600_000_2620_000,1557546282797429 +2374138435300423201_2600_000_2620_000,1557546279297244 +2374138435300423201_2600_000_2620_000,1557546280797280 +2374138435300423201_2600_000_2620_000,1557546280297328 +2374138435300423201_2600_000_2620_000,1557546289297324 +2374138435300423201_2600_000_2620_000,1557546289797335 +2374138435300423201_2600_000_2620_000,1557546283297421 +2374138435300423201_2600_000_2620_000,1557546293797422 +2374138435300423201_2600_000_2620_000,1557546283797387 +2374138435300423201_2600_000_2620_000,1557546291297442 +2374138435300423201_2600_000_2620_000,1557546292797289 +2374138435300423201_2600_000_2620_000,1557546293297352 +2374138435300423201_2600_000_2620_000,1557546282297473 +2374138435300423201_2600_000_2620_000,1557546290297367 +2374138435300423201_2600_000_2620_000,1557546281797389 +2374138435300423201_2600_000_2620_000,1557546292297340 +2374138435300423201_2600_000_2620_000,1557546291797459 +16050146835908439029_4500_000_4520_000,1557862669362069 +16050146835908439029_4500_000_4520_000,1557862668362475 +16050146835908439029_4500_000_4520_000,1557862680862489 +16050146835908439029_4500_000_4520_000,1557862682362527 +16050146835908439029_4500_000_4520_000,1557862679362451 +16050146835908439029_4500_000_4520_000,1557862669862200 +16050146835908439029_4500_000_4520_000,1557862680362483 +16050146835908439029_4500_000_4520_000,1557862670362417 +16050146835908439029_4500_000_4520_000,1557862668862219 +16050146835908439029_4500_000_4520_000,1557862682862598 +16050146835908439029_4500_000_4520_000,1557862681362512 +16050146835908439029_4500_000_4520_000,1557862672362384 +16050146835908439029_4500_000_4520_000,1557862672862388 +16050146835908439029_4500_000_4520_000,1557862670862532 +16050146835908439029_4500_000_4520_000,1557862671862452 +16050146835908439029_4500_000_4520_000,1557862681862522 +16050146835908439029_4500_000_4520_000,1557862678862529 +16050146835908439029_4500_000_4520_000,1557862671362531 +16050146835908439029_4500_000_4520_000,1557862679862445 +16050146835908439029_4500_000_4520_000,1557862678362527 +3400465735719851775_1400_000_1420_000,1572136113149873 +3400465735719851775_1400_000_1420_000,1572136101149909 +3400465735719851775_1400_000_1420_000,1572136102649696 +3400465735719851775_1400_000_1420_000,1572136113649695 +3400465735719851775_1400_000_1420_000,1572136110649929 +3400465735719851775_1400_000_1420_000,1572136111649912 +3400465735719851775_1400_000_1420_000,1572136112649991 +3400465735719851775_1400_000_1420_000,1572136114149800 +3400465735719851775_1400_000_1420_000,1572136100649954 +3400465735719851775_1400_000_1420_000,1572136114649927 +3400465735719851775_1400_000_1420_000,1572136103149884 +3400465735719851775_1400_000_1420_000,1572136112149956 +3400465735719851775_1400_000_1420_000,1572136105149812 +3400465735719851775_1400_000_1420_000,1572136103650115 +3400465735719851775_1400_000_1420_000,1572136115150038 +3400465735719851775_1400_000_1420_000,1572136102149777 +3400465735719851775_1400_000_1420_000,1572136111149884 +3400465735719851775_1400_000_1420_000,1572136104150206 +3400465735719851775_1400_000_1420_000,1572136101649856 +3400465735719851775_1400_000_1420_000,1572136104650062 +13347759874869607317_1540_000_1560_000,1557240455162639 +13347759874869607317_1540_000_1560_000,1557240466662585 +13347759874869607317_1540_000_1560_000,1557240462162692 +13347759874869607317_1540_000_1560_000,1557240454162643 +13347759874869607317_1540_000_1560_000,1557240453162676 +13347759874869607317_1540_000_1560_000,1557240464162672 +13347759874869607317_1540_000_1560_000,1557240462662650 +13347759874869607317_1540_000_1560_000,1557240463162605 +13347759874869607317_1540_000_1560_000,1557240466162631 +13347759874869607317_1540_000_1560_000,1557240452662663 +13347759874869607317_1540_000_1560_000,1557240465662701 +13347759874869607317_1540_000_1560_000,1557240464662665 +13347759874869607317_1540_000_1560_000,1557240452162656 +13347759874869607317_1540_000_1560_000,1557240455662596 +13347759874869607317_1540_000_1560_000,1557240456662719 +13347759874869607317_1540_000_1560_000,1557240456162634 +13347759874869607317_1540_000_1560_000,1557240463662676 +13347759874869607317_1540_000_1560_000,1557240465162708 +13347759874869607317_1540_000_1560_000,1557240454662705 +13347759874869607317_1540_000_1560_000,1557240453662610 +792520390268391604_780_000_800_000,1557276779322398 +792520390268391604_780_000_800_000,1557276789819241 +792520390268391604_780_000_800_000,1557276781822470 +792520390268391604_780_000_800_000,1557276790814750 +792520390268391604_780_000_800_000,1557276787822228 +792520390268391604_780_000_800_000,1557276777822335 +792520390268391604_780_000_800_000,1557276779822444 +792520390268391604_780_000_800_000,1557276791809911 +792520390268391604_780_000_800_000,1557276787322288 +792520390268391604_780_000_800_000,1557276781322325 +792520390268391604_780_000_800_000,1557276778822345 +792520390268391604_780_000_800_000,1557276788821750 +792520390268391604_780_000_800_000,1557276791312453 +792520390268391604_780_000_800_000,1557276780822385 +792520390268391604_780_000_800_000,1557276789320894 +792520390268391604_780_000_800_000,1557276788322108 +792520390268391604_780_000_800_000,1557276778322306 +792520390268391604_780_000_800_000,1557276790317051 +792520390268391604_780_000_800_000,1557276780322459 +792520390268391604_780_000_800_000,1557276777322444 +12555145882162126399_1180_000_1200_000,1558016457446728 +12555145882162126399_1180_000_1200_000,1558016457946731 +12555145882162126399_1180_000_1200_000,1558016443947236 +12555145882162126399_1180_000_1200_000,1558016455946753 +12555145882162126399_1180_000_1200_000,1558016456946665 +12555145882162126399_1180_000_1200_000,1558016445447090 +12555145882162126399_1180_000_1200_000,1558016446446923 +12555145882162126399_1180_000_1200_000,1558016453946646 +12555145882162126399_1180_000_1200_000,1558016446946859 +12555145882162126399_1180_000_1200_000,1558016445947010 +12555145882162126399_1180_000_1200_000,1558016444947161 +12555145882162126399_1180_000_1200_000,1558016455446712 +12555145882162126399_1180_000_1200_000,1558016448446785 +12555145882162126399_1180_000_1200_000,1558016447946858 +12555145882162126399_1180_000_1200_000,1558016458446676 +12555145882162126399_1180_000_1200_000,1558016444447200 +12555145882162126399_1180_000_1200_000,1558016454446636 +12555145882162126399_1180_000_1200_000,1558016454946704 +12555145882162126399_1180_000_1200_000,1558016447446885 +12555145882162126399_1180_000_1200_000,1558016456446713 +2363225200168330815_760_000_780_000,1557363529737707 +2363225200168330815_760_000_780_000,1557363527237757 +2363225200168330815_760_000_780_000,1557363531737831 +2363225200168330815_760_000_780_000,1557363517737551 +2363225200168330815_760_000_780_000,1557363521737684 +2363225200168330815_760_000_780_000,1557363520237879 +2363225200168330815_760_000_780_000,1557363520737853 +2363225200168330815_760_000_780_000,1557363530737748 +2363225200168330815_760_000_780_000,1557363530237741 +2363225200168330815_760_000_780_000,1557363527737802 +2363225200168330815_760_000_780_000,1557363519238031 +2363225200168330815_760_000_780_000,1557363518738025 +2363225200168330815_760_000_780_000,1557363519737941 +2363225200168330815_760_000_780_000,1557363528237770 +2363225200168330815_760_000_780_000,1557363517237246 +2363225200168330815_760_000_780_000,1557363518237827 +2363225200168330815_760_000_780_000,1557363528737726 +2363225200168330815_760_000_780_000,1557363529237740 +2363225200168330815_760_000_780_000,1557363521237784 +2363225200168330815_760_000_780_000,1557363531237786 +3328513486129168664_2080_000_2100_000,1567831757349830 +3328513486129168664_2080_000_2100_000,1567831748349777 +3328513486129168664_2080_000_2100_000,1567831749349801 +3328513486129168664_2080_000_2100_000,1567831759349962 +3328513486129168664_2080_000_2100_000,1567831748849802 +3328513486129168664_2080_000_2100_000,1567831755849834 +3328513486129168664_2080_000_2100_000,1567831745849801 +3328513486129168664_2080_000_2100_000,1567831756349773 +3328513486129168664_2080_000_2100_000,1567831746849942 +3328513486129168664_2080_000_2100_000,1567831750350037 +3328513486129168664_2080_000_2100_000,1567831749849925 +3328513486129168664_2080_000_2100_000,1567831759849964 +3328513486129168664_2080_000_2100_000,1567831747849819 +3328513486129168664_2080_000_2100_000,1567831747349894 +3328513486129168664_2080_000_2100_000,1567831758849989 +3328513486129168664_2080_000_2100_000,1567831758350003 +3328513486129168664_2080_000_2100_000,1567831746349855 +3328513486129168664_2080_000_2100_000,1567831757849928 +3328513486129168664_2080_000_2100_000,1567831756849772 +3328513486129168664_2080_000_2100_000,1567831760349924 +4632556232973423919_2940_000_2960_000,1557266971471958 +4632556232973423919_2940_000_2960_000,1557266969972031 +4632556232973423919_2940_000_2960_000,1557266970971991 +4632556232973423919_2940_000_2960_000,1557266961970295 +4632556232973423919_2940_000_2960_000,1557266959471575 +4632556232973423919_2940_000_2960_000,1557266973972043 +4632556232973423919_2940_000_2960_000,1557266970471994 +4632556232973423919_2940_000_2960_000,1557266959971091 +4632556232973423919_2940_000_2960_000,1557266969472028 +4632556232973423919_2940_000_2960_000,1557266973471990 +4632556232973423919_2940_000_2960_000,1557266962470698 +4632556232973423919_2940_000_2960_000,1557266972972019 +4632556232973423919_2940_000_2960_000,1557266961470258 +4632556232973423919_2940_000_2960_000,1557266972472025 +4632556232973423919_2940_000_2960_000,1557266963471197 +4632556232973423919_2940_000_2960_000,1557266963971575 +4632556232973423919_2940_000_2960_000,1557266962971008 +4632556232973423919_2940_000_2960_000,1557266960970249 +4632556232973423919_2940_000_2960_000,1557266960470534 +4632556232973423919_2940_000_2960_000,1557266971972014 +7855150647548977812_3900_000_3920_000,1557963222297587 +7855150647548977812_3900_000_3920_000,1557963229797360 +7855150647548977812_3900_000_3920_000,1557963219297462 +7855150647548977812_3900_000_3920_000,1557963222797358 +7855150647548977812_3900_000_3920_000,1557963219797630 +7855150647548977812_3900_000_3920_000,1557963221297729 +7855150647548977812_3900_000_3920_000,1557963228797686 +7855150647548977812_3900_000_3920_000,1557963232797406 +7855150647548977812_3900_000_3920_000,1557963230297234 +7855150647548977812_3900_000_3920_000,1557963230797164 +7855150647548977812_3900_000_3920_000,1557963232297399 +7855150647548977812_3900_000_3920_000,1557963221797707 +7855150647548977812_3900_000_3920_000,1557963231797409 +7855150647548977812_3900_000_3920_000,1557963220297852 +7855150647548977812_3900_000_3920_000,1557963233297506 +7855150647548977812_3900_000_3920_000,1557963218797413 +7855150647548977812_3900_000_3920_000,1557963231297227 +7855150647548977812_3900_000_3920_000,1557963229297664 +7855150647548977812_3900_000_3920_000,1557963220797815 +7855150647548977812_3900_000_3920_000,1557963223297048 +6228701001600487900_720_000_740_000,1557196124797396 +6228701001600487900_720_000_740_000,1557196115797133 +6228701001600487900_720_000_740_000,1557196127297339 +6228701001600487900_720_000_740_000,1557196118797295 +6228701001600487900_720_000_740_000,1557196114797019 +6228701001600487900_720_000_740_000,1557196125297368 +6228701001600487900_720_000_740_000,1557196117797593 +6228701001600487900_720_000_740_000,1557196128297444 +6228701001600487900_720_000_740_000,1557196126797262 +6228701001600487900_720_000_740_000,1557196116297146 +6228701001600487900_720_000_740_000,1557196114297509 +6228701001600487900_720_000_740_000,1557196125797316 +6228701001600487900_720_000_740_000,1557196124297381 +6228701001600487900_720_000_740_000,1557196128797516 +6228701001600487900_720_000_740_000,1557196126297244 +6228701001600487900_720_000_740_000,1557196118297420 +6228701001600487900_720_000_740_000,1557196117297567 +6228701001600487900_720_000_740_000,1557196116797296 +6228701001600487900_720_000_740_000,1557196127797382 +6228701001600487900_720_000_740_000,1557196115297027 +5683383258122801095_1040_000_1060_000,1557363799237684 +5683383258122801095_1040_000_1060_000,1557363798737684 +5683383258122801095_1040_000_1060_000,1557363809737739 +5683383258122801095_1040_000_1060_000,1557363801237681 +5683383258122801095_1040_000_1060_000,1557363808237726 +5683383258122801095_1040_000_1060_000,1557363810237674 +5683383258122801095_1040_000_1060_000,1557363797237694 +5683383258122801095_1040_000_1060_000,1557363808737723 +5683383258122801095_1040_000_1060_000,1557363801737692 +5683383258122801095_1040_000_1060_000,1557363807737730 +5683383258122801095_1040_000_1060_000,1557363797737739 +5683383258122801095_1040_000_1060_000,1557363800737713 +5683383258122801095_1040_000_1060_000,1557363799737724 +5683383258122801095_1040_000_1060_000,1557363811737549 +5683383258122801095_1040_000_1060_000,1557363798237739 +5683383258122801095_1040_000_1060_000,1557363810737600 +5683383258122801095_1040_000_1060_000,1557363807237704 +5683383258122801095_1040_000_1060_000,1557363811237550 +5683383258122801095_1040_000_1060_000,1557363800237778 +5683383258122801095_1040_000_1060_000,1557363809237695 +14631629219048194483_2720_000_2740_000,1558017994972187 +14631629219048194483_2720_000_2740_000,1558017997472318 +14631629219048194483_2720_000_2740_000,1558017985972625 +14631629219048194483_2720_000_2740_000,1558017985472656 +14631629219048194483_2720_000_2740_000,1558017986972355 +14631629219048194483_2720_000_2740_000,1558017998472347 +14631629219048194483_2720_000_2740_000,1558017996472360 +14631629219048194483_2720_000_2740_000,1558017983971974 +14631629219048194483_2720_000_2740_000,1558017987972319 +14631629219048194483_2720_000_2740_000,1558017984472180 +14631629219048194483_2720_000_2740_000,1558017984972436 +14631629219048194483_2720_000_2740_000,1558017995972332 +14631629219048194483_2720_000_2740_000,1558017997972298 +14631629219048194483_2720_000_2740_000,1558017994472307 +14631629219048194483_2720_000_2740_000,1558017995472189 +14631629219048194483_2720_000_2740_000,1558017988472347 +14631629219048194483_2720_000_2740_000,1558017986472464 +14631629219048194483_2720_000_2740_000,1558017987472321 +14631629219048194483_2720_000_2740_000,1558017996972405 +14631629219048194483_2720_000_2740_000,1558017993972404 +2906594041697319079_3040_000_3060_000,1557267072486781 +2906594041697319079_3040_000_3060_000,1557267060487498 +2906594041697319079_3040_000_3060_000,1557267073986709 +2906594041697319079_3040_000_3060_000,1557267070986791 +2906594041697319079_3040_000_3060_000,1557267059987458 +2906594041697319079_3040_000_3060_000,1557267071486876 +2906594041697319079_3040_000_3060_000,1557267062487451 +2906594041697319079_3040_000_3060_000,1557267063987482 +2906594041697319079_3040_000_3060_000,1557267063487438 +2906594041697319079_3040_000_3060_000,1557267071986868 +2906594041697319079_3040_000_3060_000,1557267072986667 +2906594041697319079_3040_000_3060_000,1557267069487459 +2906594041697319079_3040_000_3060_000,1557267073486626 +2906594041697319079_3040_000_3060_000,1557267062987469 +2906594041697319079_3040_000_3060_000,1557267061487517 +2906594041697319079_3040_000_3060_000,1557267061987452 +2906594041697319079_3040_000_3060_000,1557267060987578 +2906594041697319079_3040_000_3060_000,1557267070487093 +2906594041697319079_3040_000_3060_000,1557267069987397 +2906594041697319079_3040_000_3060_000,1557267059487462 +2383902674438058857_4420_000_4440_000,1567796626524800 +2383902674438058857_4420_000_4440_000,1567796634024717 +2383902674438058857_4420_000_4440_000,1567796623525141 +2383902674438058857_4420_000_4440_000,1567796634524720 +2383902674438058857_4420_000_4440_000,1567796637024790 +2383902674438058857_4420_000_4440_000,1567796633524726 +2383902674438058857_4420_000_4440_000,1567796623025071 +2383902674438058857_4420_000_4440_000,1567796624525076 +2383902674438058857_4420_000_4440_000,1567796627024804 +2383902674438058857_4420_000_4440_000,1567796627524846 +2383902674438058857_4420_000_4440_000,1567796635024610 +2383902674438058857_4420_000_4440_000,1567796624025081 +2383902674438058857_4420_000_4440_000,1567796625524889 +2383902674438058857_4420_000_4440_000,1567796635524621 +2383902674438058857_4420_000_4440_000,1567796626024858 +2383902674438058857_4420_000_4440_000,1567796636024637 +2383902674438058857_4420_000_4440_000,1567796625024984 +2383902674438058857_4420_000_4440_000,1567796633024733 +2383902674438058857_4420_000_4440_000,1567796637524761 +2383902674438058857_4420_000_4440_000,1567796636524720 +6862795755554967162_2280_000_2300_000,1558152098797641 +6862795755554967162_2280_000_2300_000,1558152096797728 +6862795755554967162_2280_000_2300_000,1558152098297483 +6862795755554967162_2280_000_2300_000,1558152086297472 +6862795755554967162_2280_000_2300_000,1558152088297543 +6862795755554967162_2280_000_2300_000,1558152090297619 +6862795755554967162_2280_000_2300_000,1558152088797546 +6862795755554967162_2280_000_2300_000,1558152096297876 +6862795755554967162_2280_000_2300_000,1558152087797448 +6862795755554967162_2280_000_2300_000,1558152100297819 +6862795755554967162_2280_000_2300_000,1558152089297513 +6862795755554967162_2280_000_2300_000,1558152086797503 +6862795755554967162_2280_000_2300_000,1558152097297600 +6862795755554967162_2280_000_2300_000,1558152099297843 +6862795755554967162_2280_000_2300_000,1558152089797536 +6862795755554967162_2280_000_2300_000,1558152090797668 +6862795755554967162_2280_000_2300_000,1558152099797835 +6862795755554967162_2280_000_2300_000,1558152100797780 +6862795755554967162_2280_000_2300_000,1558152097797483 +6862795755554967162_2280_000_2300_000,1558152087297497 +8085856200343017603_4120_000_4140_000,1557963441810800 +8085856200343017603_4120_000_4140_000,1557963452811392 +8085856200343017603_4120_000_4140_000,1557963442310429 +8085856200343017603_4120_000_4140_000,1557963448811394 +8085856200343017603_4120_000_4140_000,1557963440312587 +8085856200343017603_4120_000_4140_000,1557963452311343 +8085856200343017603_4120_000_4140_000,1557963438812840 +8085856200343017603_4120_000_4140_000,1557963449311428 +8085856200343017603_4120_000_4140_000,1557963450311446 +8085856200343017603_4120_000_4140_000,1557963450811460 +8085856200343017603_4120_000_4140_000,1557963451311480 +8085856200343017603_4120_000_4140_000,1557963441311391 +8085856200343017603_4120_000_4140_000,1557963439312755 +8085856200343017603_4120_000_4140_000,1557963442810720 +8085856200343017603_4120_000_4140_000,1557963453311401 +8085856200343017603_4120_000_4140_000,1557963449811379 +8085856200343017603_4120_000_4140_000,1557963439812771 +8085856200343017603_4120_000_4140_000,1557963443310973 +8085856200343017603_4120_000_4140_000,1557963451811373 +8085856200343017603_4120_000_4140_000,1557963440812062 +15370024704033662533_1240_000_1260_000,1558016507522714 +15370024704033662533_1240_000_1260_000,1558016504022513 +15370024704033662533_1240_000_1260_000,1558016508023021 +15370024704033662533_1240_000_1260_000,1558016516522659 +15370024704033662533_1240_000_1260_000,1558016518522508 +15370024704033662533_1240_000_1260_000,1558016506522488 +15370024704033662533_1240_000_1260_000,1558016516022182 +15370024704033662533_1240_000_1260_000,1558016518022743 +15370024704033662533_1240_000_1260_000,1558016517022970 +15370024704033662533_1240_000_1260_000,1558016514522028 +15370024704033662533_1240_000_1260_000,1558016507022487 +15370024704033662533_1240_000_1260_000,1558016505022580 +15370024704033662533_1240_000_1260_000,1558016517522896 +15370024704033662533_1240_000_1260_000,1558016506022489 +15370024704033662533_1240_000_1260_000,1558016504522546 +15370024704033662533_1240_000_1260_000,1558016514022344 +15370024704033662533_1240_000_1260_000,1558016505522521 +15370024704033662533_1240_000_1260_000,1558016515022010 +15370024704033662533_1240_000_1260_000,1558016515522158 +15370024704033662533_1240_000_1260_000,1558016508523056 +13887882285811432765_740_000_760_000,1557427670612416 +13887882285811432765_740_000_760_000,1557427657104220 +13887882285811432765_740_000_760_000,1557427656098234 +13887882285811432765_740_000_760_000,1557427670112488 +13887882285811432765_740_000_760_000,1557427657607241 +13887882285811432765_740_000_760_000,1557427659611359 +13887882285811432765_740_000_760_000,1557427668112690 +13887882285811432765_740_000_760_000,1557427669112938 +13887882285811432765_740_000_760_000,1557427668612977 +13887882285811432765_740_000_760_000,1557427667112287 +13887882285811432765_740_000_760_000,1557427667612500 +13887882285811432765_740_000_760_000,1557427660611691 +13887882285811432765_740_000_760_000,1557427658610018 +13887882285811432765_740_000_760_000,1557427660111611 +13887882285811432765_740_000_760_000,1557427658109105 +13887882285811432765_740_000_760_000,1557427656601007 +13887882285811432765_740_000_760_000,1557427659110867 +13887882285811432765_740_000_760_000,1557427666112373 +13887882285811432765_740_000_760_000,1557427666612282 +13887882285811432765_740_000_760_000,1557427669612708 +7886090431228432618_1060_000_1080_000,1557427978090024 +7886090431228432618_1060_000_1080_000,1557427986587357 +7886090431228432618_1060_000_1080_000,1557427979089033 +7886090431228432618_1060_000_1080_000,1557427980587825 +7886090431228432618_1060_000_1080_000,1557427988586899 +7886090431228432618_1060_000_1080_000,1557427989086904 +7886090431228432618_1060_000_1080_000,1557427977091041 +7886090431228432618_1060_000_1080_000,1557427976591045 +7886090431228432618_1060_000_1080_000,1557427987587267 +7886090431228432618_1060_000_1080_000,1557427980088231 +7886090431228432618_1060_000_1080_000,1557427987087350 +7886090431228432618_1060_000_1080_000,1557427990587971 +7886090431228432618_1060_000_1080_000,1557427978589494 +7886090431228432618_1060_000_1080_000,1557427979588581 +7886090431228432618_1060_000_1080_000,1557427977590623 +7886090431228432618_1060_000_1080_000,1557427990087424 +7886090431228432618_1060_000_1080_000,1557427988087048 +7886090431228432618_1060_000_1080_000,1557427989587118 +7886090431228432618_1060_000_1080_000,1557427986087349 +7886090431228432618_1060_000_1080_000,1557427976090905 +11096867396355523348_1460_000_1480_000,1557240385647315 +11096867396355523348_1460_000_1480_000,1557240376147639 +11096867396355523348_1460_000_1480_000,1557240383646953 +11096867396355523348_1460_000_1480_000,1557240373147399 +11096867396355523348_1460_000_1480_000,1557240385147284 +11096867396355523348_1460_000_1480_000,1557240383147053 +11096867396355523348_1460_000_1480_000,1557240375647537 +11096867396355523348_1460_000_1480_000,1557240376647555 +11096867396355523348_1460_000_1480_000,1557240382647278 +11096867396355523348_1460_000_1480_000,1557240374147381 +11096867396355523348_1460_000_1480_000,1557240373647402 +11096867396355523348_1460_000_1480_000,1557240382147351 +11096867396355523348_1460_000_1480_000,1557240375147338 +11096867396355523348_1460_000_1480_000,1557240386147261 +11096867396355523348_1460_000_1480_000,1557240384647073 +11096867396355523348_1460_000_1480_000,1557240372647451 +11096867396355523348_1460_000_1480_000,1557240384146914 +11096867396355523348_1460_000_1480_000,1557240386647265 +11096867396355523348_1460_000_1480_000,1557240374647330 +11096867396355523348_1460_000_1480_000,1557240372147515 +5993415832220804439_1020_000_1040_000,1557427938162330 +5993415832220804439_1020_000_1040_000,1557427940162319 +5993415832220804439_1020_000_1040_000,1557427937662244 +5993415832220804439_1020_000_1040_000,1557427946662314 +5993415832220804439_1020_000_1040_000,1557427946162333 +5993415832220804439_1020_000_1040_000,1557427938662319 +5993415832220804439_1020_000_1040_000,1557427948162669 +5993415832220804439_1020_000_1040_000,1557427947162431 +5993415832220804439_1020_000_1040_000,1557427947662672 +5993415832220804439_1020_000_1040_000,1557427949662420 +5993415832220804439_1020_000_1040_000,1557427950162677 +5993415832220804439_1020_000_1040_000,1557427948662689 +5993415832220804439_1020_000_1040_000,1557427950662930 +5993415832220804439_1020_000_1040_000,1557427940662334 +5993415832220804439_1020_000_1040_000,1557427939662313 +5993415832220804439_1020_000_1040_000,1557427936661931 +5993415832220804439_1020_000_1040_000,1557427936161893 +5993415832220804439_1020_000_1040_000,1557427939162305 +5993415832220804439_1020_000_1040_000,1557427937162046 +5993415832220804439_1020_000_1040_000,1557427949162510 +684234579698396203_2540_000_2560_000,1557546221272675 +684234579698396203_2540_000_2560_000,1557546223272676 +684234579698396203_2540_000_2560_000,1557546229272374 +684234579698396203_2540_000_2560_000,1557546232272632 +684234579698396203_2540_000_2560_000,1557546222772668 +684234579698396203_2540_000_2560_000,1557546233775554 +684234579698396203_2540_000_2560_000,1557546230272562 +684234579698396203_2540_000_2560_000,1557546219772629 +684234579698396203_2540_000_2560_000,1557546231272784 +684234579698396203_2540_000_2560_000,1557546221772604 +684234579698396203_2540_000_2560_000,1557546229772445 +684234579698396203_2540_000_2560_000,1557546233273525 +684234579698396203_2540_000_2560_000,1557546220772768 +684234579698396203_2540_000_2560_000,1557546230772716 +684234579698396203_2540_000_2560_000,1557546223772715 +684234579698396203_2540_000_2560_000,1557546231772736 +684234579698396203_2540_000_2560_000,1557546232772749 +684234579698396203_2540_000_2560_000,1557546222272631 +684234579698396203_2540_000_2560_000,1557546220272744 +684234579698396203_2540_000_2560_000,1557546219272563 +16367045247642649300_3060_000_3080_000,1557267091988308 +16367045247642649300_3060_000_3080_000,1557267090487889 +16367045247642649300_3060_000_3080_000,1557267089487659 +16367045247642649300_3060_000_3080_000,1557267093487520 +16367045247642649300_3060_000_3080_000,1557267093987555 +16367045247642649300_3060_000_3080_000,1557267082986958 +16367045247642649300_3060_000_3080_000,1557267080987657 +16367045247642649300_3060_000_3080_000,1557267083987136 +16367045247642649300_3060_000_3080_000,1557267082487269 +16367045247642649300_3060_000_3080_000,1557267080487535 +16367045247642649300_3060_000_3080_000,1557267081987538 +16367045247642649300_3060_000_3080_000,1557267083486940 +16367045247642649300_3060_000_3080_000,1557267079987387 +16367045247642649300_3060_000_3080_000,1557267079487248 +16367045247642649300_3060_000_3080_000,1557267089987808 +16367045247642649300_3060_000_3080_000,1557267092987360 +16367045247642649300_3060_000_3080_000,1557267092487706 +16367045247642649300_3060_000_3080_000,1557267090987837 +16367045247642649300_3060_000_3080_000,1557267081487585 +16367045247642649300_3060_000_3080_000,1557267091488223 +10940141908690367388_4420_000_4440_000,1557325501087726 +10940141908690367388_4420_000_4440_000,1557325493087410 +10940141908690367388_4420_000_4440_000,1557325490587432 +10940141908690367388_4420_000_4440_000,1557325503087783 +10940141908690367388_4420_000_4440_000,1557325501587681 +10940141908690367388_4420_000_4440_000,1557325492087435 +10940141908690367388_4420_000_4440_000,1557325503587721 +10940141908690367388_4420_000_4440_000,1557325491587341 +10940141908690367388_4420_000_4440_000,1557325489587388 +10940141908690367388_4420_000_4440_000,1557325489087347 +10940141908690367388_4420_000_4440_000,1557325490087423 +10940141908690367388_4420_000_4440_000,1557325499587637 +10940141908690367388_4420_000_4440_000,1557325491087364 +10940141908690367388_4420_000_4440_000,1557325493587440 +10940141908690367388_4420_000_4440_000,1557325502087695 +10940141908690367388_4420_000_4440_000,1557325500087574 +10940141908690367388_4420_000_4440_000,1557325502587743 +10940141908690367388_4420_000_4440_000,1557325492587377 +10940141908690367388_4420_000_4440_000,1557325500587663 +10940141908690367388_4420_000_4440_000,1557325499087629 +15865907199900332614_760_000_780_000,1559313080537412 +15865907199900332614_760_000_780_000,1559313078037376 +15865907199900332614_760_000_780_000,1559313080037454 +15865907199900332614_760_000_780_000,1559313079537512 +15865907199900332614_760_000_780_000,1559313078537459 +15865907199900332614_760_000_780_000,1559313089537338 +15865907199900332614_760_000_780_000,1559313077537461 +15865907199900332614_760_000_780_000,1559313091537372 +15865907199900332614_760_000_780_000,1559313081037481 +15865907199900332614_760_000_780_000,1559313087537628 +15865907199900332614_760_000_780_000,1559313077037424 +15865907199900332614_760_000_780_000,1559313079037502 +15865907199900332614_760_000_780_000,1559313090537600 +15865907199900332614_760_000_780_000,1559313089037261 +15865907199900332614_760_000_780_000,1559313088037246 +15865907199900332614_760_000_780_000,1559313091037429 +15865907199900332614_760_000_780_000,1559313087037841 +15865907199900332614_760_000_780_000,1559313081537390 +15865907199900332614_760_000_780_000,1559313090037603 +15865907199900332614_760_000_780_000,1559313088537022 +16418654553014119039_4340_000_4360_000,1557548032247842 +16418654553014119039_4340_000_4360_000,1557548021247344 +16418654553014119039_4340_000_4360_000,1557548020747349 +16418654553014119039_4340_000_4360_000,1557548019247610 +16418654553014119039_4340_000_4360_000,1557548019747557 +16418654553014119039_4340_000_4360_000,1557548022747669 +16418654553014119039_4340_000_4360_000,1557548032748077 +16418654553014119039_4340_000_4360_000,1557548022247554 +16418654553014119039_4340_000_4360_000,1557548020247425 +16418654553014119039_4340_000_4360_000,1557548031247283 +16418654553014119039_4340_000_4360_000,1557548031747513 +16418654553014119039_4340_000_4360_000,1557548021747406 +16418654553014119039_4340_000_4360_000,1557548023747615 +16418654553014119039_4340_000_4360_000,1557548029247116 +16418654553014119039_4340_000_4360_000,1557548030247196 +16418654553014119039_4340_000_4360_000,1557548030747259 +16418654553014119039_4340_000_4360_000,1557548023247650 +16418654553014119039_4340_000_4360_000,1557548029747131 +16418654553014119039_4340_000_4360_000,1557548033248036 +16418654553014119039_4340_000_4360_000,1557548033747756 +2795127582672852315_4140_000_4160_000,1557963462811402 +2795127582672852315_4140_000_4160_000,1557963459811328 +2795127582672852315_4140_000_4160_000,1557963461311393 +2795127582672852315_4140_000_4160_000,1557963468811200 +2795127582672852315_4140_000_4160_000,1557963460311323 +2795127582672852315_4140_000_4160_000,1557963472811254 +2795127582672852315_4140_000_4160_000,1557963459311361 +2795127582672852315_4140_000_4160_000,1557963472311331 +2795127582672852315_4140_000_4160_000,1557963469811253 +2795127582672852315_4140_000_4160_000,1557963473311173 +2795127582672852315_4140_000_4160_000,1557963458811300 +2795127582672852315_4140_000_4160_000,1557963461811317 +2795127582672852315_4140_000_4160_000,1557963460811362 +2795127582672852315_4140_000_4160_000,1557963471811333 +2795127582672852315_4140_000_4160_000,1557963462311357 +2795127582672852315_4140_000_4160_000,1557963463311436 +2795127582672852315_4140_000_4160_000,1557963469311205 +2795127582672852315_4140_000_4160_000,1557963470811412 +2795127582672852315_4140_000_4160_000,1557963471311372 +2795127582672852315_4140_000_4160_000,1557963470311335 +10084636266401282188_1120_000_1140_000,1558407846397548 +10084636266401282188_1120_000_1140_000,1558407843897545 +10084636266401282188_1120_000_1140_000,1558407844397659 +10084636266401282188_1120_000_1140_000,1558407855397331 +10084636266401282188_1120_000_1140_000,1558407854397201 +10084636266401282188_1120_000_1140_000,1558407856897229 +10084636266401282188_1120_000_1140_000,1558407843397428 +10084636266401282188_1120_000_1140_000,1558407857397306 +10084636266401282188_1120_000_1140_000,1558407845897532 +10084636266401282188_1120_000_1140_000,1558407846897582 +10084636266401282188_1120_000_1140_000,1558407855897228 +10084636266401282188_1120_000_1140_000,1558407852897242 +10084636266401282188_1120_000_1140_000,1558407845397550 +10084636266401282188_1120_000_1140_000,1558407856397205 +10084636266401282188_1120_000_1140_000,1558407853897063 +10084636266401282188_1120_000_1140_000,1558407844897621 +10084636266401282188_1120_000_1140_000,1558407847397707 +10084636266401282188_1120_000_1140_000,1558407854897351 +10084636266401282188_1120_000_1140_000,1558407853397165 +10084636266401282188_1120_000_1140_000,1558407842897345 +2709541197299883157_1140_000_1160_000,1558407875897558 +2709541197299883157_1140_000_1160_000,1558407877397532 +2709541197299883157_1140_000_1160_000,1558407873397482 +2709541197299883157_1140_000_1160_000,1558407866897397 +2709541197299883157_1140_000_1160_000,1558407865397535 +2709541197299883157_1140_000_1160_000,1558407862897305 +2709541197299883157_1140_000_1160_000,1558407865897598 +2709541197299883157_1140_000_1160_000,1558407867397220 +2709541197299883157_1140_000_1160_000,1558407866397538 +2709541197299883157_1140_000_1160_000,1558407874397414 +2709541197299883157_1140_000_1160_000,1558407876897664 +2709541197299883157_1140_000_1160_000,1558407876397661 +2709541197299883157_1140_000_1160_000,1558407874897399 +2709541197299883157_1140_000_1160_000,1558407864897431 +2709541197299883157_1140_000_1160_000,1558407863397357 +2709541197299883157_1140_000_1160_000,1558407863897366 +2709541197299883157_1140_000_1160_000,1558407873897410 +2709541197299883157_1140_000_1160_000,1558407872897442 +2709541197299883157_1140_000_1160_000,1558407875397469 +2709541197299883157_1140_000_1160_000,1558407864397400 +13849332693800388551_960_000_980_000,1557264991038089 +13849332693800388551_960_000_980_000,1557264981037854 +13849332693800388551_960_000_980_000,1557264980537799 +13849332693800388551_960_000_980_000,1557264990038023 +13849332693800388551_960_000_980_000,1557264981537583 +13849332693800388551_960_000_980_000,1557264990537919 +13849332693800388551_960_000_980_000,1557264989537908 +13849332693800388551_960_000_980_000,1557264993538114 +13849332693800388551_960_000_980_000,1557264992037794 +13849332693800388551_960_000_980_000,1557264982537109 +13849332693800388551_960_000_980_000,1557264991537831 +13849332693800388551_960_000_980_000,1557264983537470 +13849332693800388551_960_000_980_000,1557264984037733 +13849332693800388551_960_000_980_000,1557264980037600 +13849332693800388551_960_000_980_000,1557264979537445 +13849332693800388551_960_000_980_000,1557264983037216 +13849332693800388551_960_000_980_000,1557264992537947 +13849332693800388551_960_000_980_000,1557264993038041 +13849332693800388551_960_000_980_000,1557264994038073 +13849332693800388551_960_000_980_000,1557264982037313 +10649066155322078676_1660_000_1680_000,1557240584087768 +10649066155322078676_1660_000_1680_000,1557240585587367 +10649066155322078676_1660_000_1680_000,1557240573663029 +10649066155322078676_1660_000_1680_000,1557240584587589 +10649066155322078676_1660_000_1680_000,1557240586087486 +10649066155322078676_1660_000_1680_000,1557240585087446 +10649066155322078676_1660_000_1680_000,1557240575671293 +10649066155322078676_1660_000_1680_000,1557240576677868 +10649066155322078676_1660_000_1680_000,1557240576174505 +10649066155322078676_1660_000_1680_000,1557240582087726 +10649066155322078676_1660_000_1680_000,1557240574666010 +10649066155322078676_1660_000_1680_000,1557240572662201 +10649066155322078676_1660_000_1680_000,1557240572162174 +10649066155322078676_1660_000_1680_000,1557240583587849 +10649066155322078676_1660_000_1680_000,1557240573162360 +10649066155322078676_1660_000_1680_000,1557240582587734 +10649066155322078676_1660_000_1680_000,1557240586587594 +10649066155322078676_1660_000_1680_000,1557240574164269 +10649066155322078676_1660_000_1680_000,1557240575168313 +10649066155322078676_1660_000_1680_000,1557240583087847 +14386836877680112549_4460_000_4480_000,1559179974137579 +14386836877680112549_4460_000_4480_000,1559179965637497 +14386836877680112549_4460_000_4480_000,1559179975137452 +14386836877680112549_4460_000_4480_000,1559179965137491 +14386836877680112549_4460_000_4480_000,1559179967137475 +14386836877680112549_4460_000_4480_000,1559179968137424 +14386836877680112549_4460_000_4480_000,1559179968637431 +14386836877680112549_4460_000_4480_000,1559179977137567 +14386836877680112549_4460_000_4480_000,1559179977637531 +14386836877680112549_4460_000_4480_000,1559179974637544 +14386836877680112549_4460_000_4480_000,1559179975637343 +14386836877680112549_4460_000_4480_000,1559179966637434 +14386836877680112549_4460_000_4480_000,1559179964137409 +14386836877680112549_4460_000_4480_000,1559179967637439 +14386836877680112549_4460_000_4480_000,1559179976637532 +14386836877680112549_4460_000_4480_000,1559179978137338 +14386836877680112549_4460_000_4480_000,1559179978637228 +14386836877680112549_4460_000_4480_000,1559179964637420 +14386836877680112549_4460_000_4480_000,1559179966137487 +14386836877680112549_4460_000_4480_000,1559179976137422 +1703056599550681101_4380_000_4400_000,1557548063747285 +1703056599550681101_4380_000_4400_000,1557548069747442 +1703056599550681101_4380_000_4400_000,1557548060747134 +1703056599550681101_4380_000_4400_000,1557548059247135 +1703056599550681101_4380_000_4400_000,1557548062747196 +1703056599550681101_4380_000_4400_000,1557548061747138 +1703056599550681101_4380_000_4400_000,1557548059747103 +1703056599550681101_4380_000_4400_000,1557548071747485 +1703056599550681101_4380_000_4400_000,1557548062247198 +1703056599550681101_4380_000_4400_000,1557548071247487 +1703056599550681101_4380_000_4400_000,1557548070747406 +1703056599550681101_4380_000_4400_000,1557548073247485 +1703056599550681101_4380_000_4400_000,1557548072747519 +1703056599550681101_4380_000_4400_000,1557548061247054 +1703056599550681101_4380_000_4400_000,1557548070247363 +1703056599550681101_4380_000_4400_000,1557548063247235 +1703056599550681101_4380_000_4400_000,1557548060247093 +1703056599550681101_4380_000_4400_000,1557548072247479 +1703056599550681101_4380_000_4400_000,1557548069247567 +1703056599550681101_4380_000_4400_000,1557548073747477 +9806821842001738961_4460_000_4480_000,1557548152749185 +9806821842001738961_4460_000_4480_000,1557548152249507 +9806821842001738961_4460_000_4480_000,1557548139248527 +9806821842001738961_4460_000_4480_000,1557548139748613 +9806821842001738961_4460_000_4480_000,1557548149748710 +9806821842001738961_4460_000_4480_000,1557548143745069 +9806821842001738961_4460_000_4480_000,1557548141247955 +9806821842001738961_4460_000_4480_000,1557548150749859 +9806821842001738961_4460_000_4480_000,1557548153248836 +9806821842001738961_4460_000_4480_000,1557548142746485 +9806821842001738961_4460_000_4480_000,1557548151749796 +9806821842001738961_4460_000_4480_000,1557548140248466 +9806821842001738961_4460_000_4480_000,1557548143245860 +9806821842001738961_4460_000_4480_000,1557548141747585 +9806821842001738961_4460_000_4480_000,1557548149247731 +9806821842001738961_4460_000_4480_000,1557548153748607 +9806821842001738961_4460_000_4480_000,1557548142247085 +9806821842001738961_4460_000_4480_000,1557548150249485 +9806821842001738961_4460_000_4480_000,1557548151249946 +9806821842001738961_4460_000_4480_000,1557548140748234 +4008112367880337022_3680_000_3700_000,1569854705325111 +4008112367880337022_3680_000_3700_000,1569854713325049 +4008112367880337022_3680_000_3700_000,1569854717325186 +4008112367880337022_3680_000_3700_000,1569854717825065 +4008112367880337022_3680_000_3700_000,1569854716325211 +4008112367880337022_3680_000_3700_000,1569854716825240 +4008112367880337022_3680_000_3700_000,1569854714325134 +4008112367880337022_3680_000_3700_000,1569854706825153 +4008112367880337022_3680_000_3700_000,1569854704325165 +4008112367880337022_3680_000_3700_000,1569854714825260 +4008112367880337022_3680_000_3700_000,1569854706325106 +4008112367880337022_3680_000_3700_000,1569854705825068 +4008112367880337022_3680_000_3700_000,1569854704825168 +4008112367880337022_3680_000_3700_000,1569854707325043 +4008112367880337022_3680_000_3700_000,1569854707824970 +4008112367880337022_3680_000_3700_000,1569854715325243 +4008112367880337022_3680_000_3700_000,1569854715825244 +4008112367880337022_3680_000_3700_000,1569854703825152 +4008112367880337022_3680_000_3700_000,1569854713825019 +4008112367880337022_3680_000_3700_000,1569854703325067 +3275806206237593341_1260_000_1280_000,1557544942819254 +3275806206237593341_1260_000_1280_000,1557544950297870 +3275806206237593341_1260_000_1280_000,1557544951297442 +3275806206237593341_1260_000_1280_000,1557544951797369 +3275806206237593341_1260_000_1280_000,1557544950797707 +3275806206237593341_1260_000_1280_000,1557544952297300 +3275806206237593341_1260_000_1280_000,1557544949299141 +3275806206237593341_1260_000_1280_000,1557544940320359 +3275806206237593341_1260_000_1280_000,1557544940820248 +3275806206237593341_1260_000_1280_000,1557544942319553 +3275806206237593341_1260_000_1280_000,1557544941320001 +3275806206237593341_1260_000_1280_000,1557544949798358 +3275806206237593341_1260_000_1280_000,1557544939320505 +3275806206237593341_1260_000_1280_000,1557544953797222 +3275806206237593341_1260_000_1280_000,1557544953297262 +3275806206237593341_1260_000_1280_000,1557544943318943 +3275806206237593341_1260_000_1280_000,1557544941819785 +3275806206237593341_1260_000_1280_000,1557544943818501 +3275806206237593341_1260_000_1280_000,1557544939820502 +3275806206237593341_1260_000_1280_000,1557544952797231 +16942495693882305487_4340_000_4360_000,1559179844137784 +16942495693882305487_4340_000_4360_000,1559179844637716 +16942495693882305487_4340_000_4360_000,1559179846637950 +16942495693882305487_4340_000_4360_000,1559179855137769 +16942495693882305487_4340_000_4360_000,1559179854137701 +16942495693882305487_4340_000_4360_000,1559179846137883 +16942495693882305487_4340_000_4360_000,1559179845637785 +16942495693882305487_4340_000_4360_000,1559179857137780 +16942495693882305487_4340_000_4360_000,1559179848137768 +16942495693882305487_4340_000_4360_000,1559179847637805 +16942495693882305487_4340_000_4360_000,1559179848637749 +16942495693882305487_4340_000_4360_000,1559179855637782 +16942495693882305487_4340_000_4360_000,1559179845137739 +16942495693882305487_4340_000_4360_000,1559179858137740 +16942495693882305487_4340_000_4360_000,1559179856637781 +16942495693882305487_4340_000_4360_000,1559179854637737 +16942495693882305487_4340_000_4360_000,1559179857637814 +16942495693882305487_4340_000_4360_000,1559179856137797 +16942495693882305487_4340_000_4360_000,1559179858637797 +16942495693882305487_4340_000_4360_000,1559179847137875 +5764319372514665214_2480_000_2500_000,1558034992472993 +5764319372514665214_2480_000_2500_000,1558034983472954 +5764319372514665214_2480_000_2500_000,1558034982972924 +5764319372514665214_2480_000_2500_000,1558034989972975 +5764319372514665214_2480_000_2500_000,1558034981473075 +5764319372514665214_2480_000_2500_000,1558034990472969 +5764319372514665214_2480_000_2500_000,1558034984472951 +5764319372514665214_2480_000_2500_000,1558034991472965 +5764319372514665214_2480_000_2500_000,1558034980973024 +5764319372514665214_2480_000_2500_000,1558034979972956 +5764319372514665214_2480_000_2500_000,1558034981973026 +5764319372514665214_2480_000_2500_000,1558034991973002 +5764319372514665214_2480_000_2500_000,1558034990972960 +5764319372514665214_2480_000_2500_000,1558034993973011 +5764319372514665214_2480_000_2500_000,1558034982472951 +5764319372514665214_2480_000_2500_000,1558034983972951 +5764319372514665214_2480_000_2500_000,1558034993473006 +5764319372514665214_2480_000_2500_000,1558034980472954 +5764319372514665214_2480_000_2500_000,1558034994473066 +5764319372514665214_2480_000_2500_000,1558034992972995 +3485136235103477552_600_000_620_000,1559312920037900 +3485136235103477552_600_000_620_000,1559312918536992 +3485136235103477552_600_000_620_000,1559312929037490 +3485136235103477552_600_000_620_000,1559312931537400 +3485136235103477552_600_000_620_000,1559312921537438 +3485136235103477552_600_000_620_000,1559312917537421 +3485136235103477552_600_000_620_000,1559312927536888 +3485136235103477552_600_000_620_000,1559312921037521 +3485136235103477552_600_000_620_000,1559312919537665 +3485136235103477552_600_000_620_000,1559312928037154 +3485136235103477552_600_000_620_000,1559312930537328 +3485136235103477552_600_000_620_000,1559312917037757 +3485136235103477552_600_000_620_000,1559312930037396 +3485136235103477552_600_000_620_000,1559312918037188 +3485136235103477552_600_000_620_000,1559312929537548 +3485136235103477552_600_000_620_000,1559312927037001 +3485136235103477552_600_000_620_000,1559312928537375 +3485136235103477552_600_000_620_000,1559312931037329 +3485136235103477552_600_000_620_000,1559312919037170 +3485136235103477552_600_000_620_000,1559312920537711 +13732041959462600641_720_000_740_000,1558742853976814 +13732041959462600641_720_000_740_000,1558742855976028 +13732041959462600641_720_000_740_000,1558742843475326 +13732041959462600641_720_000_740_000,1558742854976703 +13732041959462600641_720_000_740_000,1558742843975547 +13732041959462600641_720_000_740_000,1558742846475978 +13732041959462600641_720_000_740_000,1558742844975697 +13732041959462600641_720_000_740_000,1558742856975912 +13732041959462600641_720_000_740_000,1558742855476179 +13732041959462600641_720_000_740_000,1558742842975141 +13732041959462600641_720_000_740_000,1558742847476056 +13732041959462600641_720_000_740_000,1558742857475609 +13732041959462600641_720_000_740_000,1558742844475636 +13732041959462600641_720_000_740_000,1558742845475848 +13732041959462600641_720_000_740_000,1558742845975911 +13732041959462600641_720_000_740_000,1558742846976015 +13732041959462600641_720_000_740_000,1558742854477097 +13732041959462600641_720_000_740_000,1558742852976440 +13732041959462600641_720_000_740_000,1558742853476695 +13732041959462600641_720_000_740_000,1558742856476018 +8684065200957554260_2700_000_2720_000,1566246362851376 +8684065200957554260_2700_000_2720_000,1566246374351315 +8684065200957554260_2700_000_2720_000,1566246373851362 +8684065200957554260_2700_000_2720_000,1566246372351287 +8684065200957554260_2700_000_2720_000,1566246363351451 +8684065200957554260_2700_000_2720_000,1566246362351295 +8684065200957554260_2700_000_2720_000,1566246363851429 +8684065200957554260_2700_000_2720_000,1566246366351318 +8684065200957554260_2700_000_2720_000,1566246375351264 +8684065200957554260_2700_000_2720_000,1566246373351328 +8684065200957554260_2700_000_2720_000,1566246376351894 +8684065200957554260_2700_000_2720_000,1566246376852628 +8684065200957554260_2700_000_2720_000,1566246364851337 +8684065200957554260_2700_000_2720_000,1566246375851419 +8684065200957554260_2700_000_2720_000,1566246365351325 +8684065200957554260_2700_000_2720_000,1566246366851318 +8684065200957554260_2700_000_2720_000,1566246365851320 +8684065200957554260_2700_000_2720_000,1566246364351329 +8684065200957554260_2700_000_2720_000,1566246372851306 +8684065200957554260_2700_000_2720_000,1566246374851263 +10410418118434245359_5140_000_5160_000,1557326223047734 +10410418118434245359_5140_000_5160_000,1557326221547648 +10410418118434245359_5140_000_5160_000,1557326223547764 +10410418118434245359_5140_000_5160_000,1557326209047560 +10410418118434245359_5140_000_5160_000,1557326213047602 +10410418118434245359_5140_000_5160_000,1557326212047572 +10410418118434245359_5140_000_5160_000,1557326221047770 +10410418118434245359_5140_000_5160_000,1557326211047663 +10410418118434245359_5140_000_5160_000,1557326211547653 +10410418118434245359_5140_000_5160_000,1557326220547772 +10410418118434245359_5140_000_5160_000,1557326212547575 +10410418118434245359_5140_000_5160_000,1557326209547585 +10410418118434245359_5140_000_5160_000,1557326210047617 +10410418118434245359_5140_000_5160_000,1557326220047729 +10410418118434245359_5140_000_5160_000,1557326222047648 +10410418118434245359_5140_000_5160_000,1557326222547699 +10410418118434245359_5140_000_5160_000,1557326219047730 +10410418118434245359_5140_000_5160_000,1557326219547770 +10410418118434245359_5140_000_5160_000,1557326210547626 +10410418118434245359_5140_000_5160_000,1557326213547578 +7240042450405902042_580_000_600_000,1559312901037775 +7240042450405902042_580_000_600_000,1559312897037515 +7240042450405902042_580_000_600_000,1559312899537484 +7240042450405902042_580_000_600_000,1559312898537394 +7240042450405902042_580_000_600_000,1559312911537589 +7240042450405902042_580_000_600_000,1559312900037413 +7240042450405902042_580_000_600_000,1559312907037317 +7240042450405902042_580_000_600_000,1559312901538082 +7240042450405902042_580_000_600_000,1559312909537272 +7240042450405902042_580_000_600_000,1559312908537793 +7240042450405902042_580_000_600_000,1559312899037443 +7240042450405902042_580_000_600_000,1559312910036813 +7240042450405902042_580_000_600_000,1559312910537019 +7240042450405902042_580_000_600_000,1559312908037618 +7240042450405902042_580_000_600_000,1559312909037663 +7240042450405902042_580_000_600_000,1559312911037369 +7240042450405902042_580_000_600_000,1559312898037440 +7240042450405902042_580_000_600_000,1559312900537375 +7240042450405902042_580_000_600_000,1559312897537487 +7240042450405902042_580_000_600_000,1559312907537791 +5585555620508986875_720_000_740_000,1559313037538117 +5585555620508986875_720_000_740_000,1559313050537687 +5585555620508986875_720_000_740_000,1559313047537497 +5585555620508986875_720_000_740_000,1559313048037350 +5585555620508986875_720_000_740_000,1559313040037581 +5585555620508986875_720_000_740_000,1559313039037173 +5585555620508986875_720_000_740_000,1559313038037778 +5585555620508986875_720_000_740_000,1559313051537445 +5585555620508986875_720_000_740_000,1559313040537431 +5585555620508986875_720_000_740_000,1559313047037528 +5585555620508986875_720_000_740_000,1559313049537681 +5585555620508986875_720_000_740_000,1559313048537310 +5585555620508986875_720_000_740_000,1559313041537128 +5585555620508986875_720_000_740_000,1559313049037464 +5585555620508986875_720_000_740_000,1559313037038225 +5585555620508986875_720_000_740_000,1559313041037197 +5585555620508986875_720_000_740_000,1559313051037544 +5585555620508986875_720_000_740_000,1559313050037678 +5585555620508986875_720_000_740_000,1559313038537390 +5585555620508986875_720_000_740_000,1559313039537279 +2714318267497393311_480_000_500_000,1558150298237122 +2714318267497393311_480_000_500_000,1558150287237469 +2714318267497393311_480_000_500_000,1558150290237709 +2714318267497393311_480_000_500_000,1558150296737452 +2714318267497393311_480_000_500_000,1558150287737484 +2714318267497393311_480_000_500_000,1558150299237252 +2714318267497393311_480_000_500_000,1558150288237628 +2714318267497393311_480_000_500_000,1558150300237538 +2714318267497393311_480_000_500_000,1558150297737232 +2714318267497393311_480_000_500_000,1558150289737802 +2714318267497393311_480_000_500_000,1558150290737726 +2714318267497393311_480_000_500_000,1558150296237346 +2714318267497393311_480_000_500_000,1558150297237200 +2714318267497393311_480_000_500_000,1558150288737667 +2714318267497393311_480_000_500_000,1558150286237588 +2714318267497393311_480_000_500_000,1558150289237769 +2714318267497393311_480_000_500_000,1558150286737552 +2714318267497393311_480_000_500_000,1558150298737101 +2714318267497393311_480_000_500_000,1558150299737398 +2714318267497393311_480_000_500_000,1558150300737648 +13790309965076620852_6520_000_6540_000,1574126957899706 +13790309965076620852_6520_000_6540_000,1574126959900038 +13790309965076620852_6520_000_6540_000,1574126955399851 +13790309965076620852_6520_000_6540_000,1574126968399982 +13790309965076620852_6520_000_6540_000,1574126965399821 +13790309965076620852_6520_000_6540_000,1574126958399589 +13790309965076620852_6520_000_6540_000,1574126957399943 +13790309965076620852_6520_000_6540_000,1574126967399978 +13790309965076620852_6520_000_6540_000,1574126958899663 +13790309965076620852_6520_000_6540_000,1574126956399869 +13790309965076620852_6520_000_6540_000,1574126966400006 +13790309965076620852_6520_000_6540_000,1574126956899942 +13790309965076620852_6520_000_6540_000,1574126968900008 +13790309965076620852_6520_000_6540_000,1574126966900090 +13790309965076620852_6520_000_6540_000,1574126959399883 +13790309965076620852_6520_000_6540_000,1574126965899849 +13790309965076620852_6520_000_6540_000,1574126967900033 +13790309965076620852_6520_000_6540_000,1574126955899899 +13790309965076620852_6520_000_6540_000,1574126969400087 +13790309965076620852_6520_000_6540_000,1574126969900021 +17387485694427326992_760_000_780_000,1557843958062722 +17387485694427326992_760_000_780_000,1557843968062691 +17387485694427326992_760_000_780_000,1557843968562687 +17387485694427326992_760_000_780_000,1557843959062736 +17387485694427326992_760_000_780_000,1557843967562765 +17387485694427326992_760_000_780_000,1557843956562821 +17387485694427326992_760_000_780_000,1557843955062802 +17387485694427326992_760_000_780_000,1557843965062813 +17387485694427326992_760_000_780_000,1557843969062758 +17387485694427326992_760_000_780_000,1557843969562794 +17387485694427326992_760_000_780_000,1557843966062703 +17387485694427326992_760_000_780_000,1557843967062734 +17387485694427326992_760_000_780_000,1557843965562735 +17387485694427326992_760_000_780_000,1557843959562659 +17387485694427326992_760_000_780_000,1557843957062778 +17387485694427326992_760_000_780_000,1557843957562803 +17387485694427326992_760_000_780_000,1557843966562710 +17387485694427326992_760_000_780_000,1557843956062840 +17387485694427326992_760_000_780_000,1557843958562737 +17387485694427326992_760_000_780_000,1557843955562873 +9350911198443552989_680_000_700_000,1557363451237372 +9350911198443552989_680_000_700_000,1557363441737465 +9350911198443552989_680_000_700_000,1557363449237250 +9350911198443552989_680_000_700_000,1557363439737622 +9350911198443552989_680_000_700_000,1557363438237327 +9350911198443552989_680_000_700_000,1557363440237403 +9350911198443552989_680_000_700_000,1557363441237340 +9350911198443552989_680_000_700_000,1557363447237793 +9350911198443552989_680_000_700_000,1557363451737437 +9350911198443552989_680_000_700_000,1557363449737386 +9350911198443552989_680_000_700_000,1557363437237375 +9350911198443552989_680_000_700_000,1557363437737418 +9350911198443552989_680_000_700_000,1557363440737261 +9350911198443552989_680_000_700_000,1557363448737285 +9350911198443552989_680_000_700_000,1557363439237622 +9350911198443552989_680_000_700_000,1557363447737794 +9350911198443552989_680_000_700_000,1557363438737444 +9350911198443552989_680_000_700_000,1557363450237422 +9350911198443552989_680_000_700_000,1557363450737354 +9350911198443552989_680_000_700_000,1557363448237517 +6174376739759381004_3240_000_3260_000,1557877015199162 +6174376739759381004_3240_000_3260_000,1557877006199178 +6174376739759381004_3240_000_3260_000,1557877004199181 +6174376739759381004_3240_000_3260_000,1557877005699128 +6174376739759381004_3240_000_3260_000,1557877008199313 +6174376739759381004_3240_000_3260_000,1557877016199192 +6174376739759381004_3240_000_3260_000,1557877014699134 +6174376739759381004_3240_000_3260_000,1557877007699341 +6174376739759381004_3240_000_3260_000,1557877017199143 +6174376739759381004_3240_000_3260_000,1557877014199207 +6174376739759381004_3240_000_3260_000,1557877016699133 +6174376739759381004_3240_000_3260_000,1557877004699166 +6174376739759381004_3240_000_3260_000,1557877018699207 +6174376739759381004_3240_000_3260_000,1557877015699193 +6174376739759381004_3240_000_3260_000,1557877008699136 +6174376739759381004_3240_000_3260_000,1557877005199071 +6174376739759381004_3240_000_3260_000,1557877018199234 +6174376739759381004_3240_000_3260_000,1557877007199256 +6174376739759381004_3240_000_3260_000,1557877006699224 +6174376739759381004_3240_000_3260_000,1557877017699172 +12153647356523920032_2560_000_2580_000,1572710128774788 +12153647356523920032_2560_000_2580_000,1572710126774725 +12153647356523920032_2560_000_2580_000,1572710120774792 +12153647356523920032_2560_000_2580_000,1572710129274823 +12153647356523920032_2560_000_2580_000,1572710116774778 +12153647356523920032_2560_000_2580_000,1572710119774754 +12153647356523920032_2560_000_2580_000,1572710117774722 +12153647356523920032_2560_000_2580_000,1572710130274767 +12153647356523920032_2560_000_2580_000,1572710128274786 +12153647356523920032_2560_000_2580_000,1572710120274760 +12153647356523920032_2560_000_2580_000,1572710130774726 +12153647356523920032_2560_000_2580_000,1572710118274683 +12153647356523920032_2560_000_2580_000,1572710127274765 +12153647356523920032_2560_000_2580_000,1572710127774811 +12153647356523920032_2560_000_2580_000,1572710126274748 +12153647356523920032_2560_000_2580_000,1572710118774754 +12153647356523920032_2560_000_2580_000,1572710117274754 +12153647356523920032_2560_000_2580_000,1572710129774796 +12153647356523920032_2560_000_2580_000,1572710119274760 +12153647356523920032_2560_000_2580_000,1572710116274782 +11933765568165455008_2940_000_2960_000,1557198335387209 +11933765568165455008_2940_000_2960_000,1557198338387310 +11933765568165455008_2940_000_2960_000,1557198347387352 +11933765568165455008_2940_000_2960_000,1557198338887301 +11933765568165455008_2940_000_2960_000,1557198348387315 +11933765568165455008_2940_000_2960_000,1557198345387416 +11933765568165455008_2940_000_2960_000,1557198335887178 +11933765568165455008_2940_000_2960_000,1557198344387408 +11933765568165455008_2940_000_2960_000,1557198344887332 +11933765568165455008_2940_000_2960_000,1557198337387225 +11933765568165455008_2940_000_2960_000,1557198345887369 +11933765568165455008_2940_000_2960_000,1557198347887352 +11933765568165455008_2940_000_2960_000,1557198346887349 +11933765568165455008_2940_000_2960_000,1557198336387249 +11933765568165455008_2940_000_2960_000,1557198348887399 +11933765568165455008_2940_000_2960_000,1557198334887218 +11933765568165455008_2940_000_2960_000,1557198334387221 +11933765568165455008_2940_000_2960_000,1557198337887303 +11933765568165455008_2940_000_2960_000,1557198336887239 +11933765568165455008_2940_000_2960_000,1557198346387373 +10161761842905385678_760_000_780_000,1557196157797448 +10161761842905385678_760_000_780_000,1557196158797350 +10161761842905385678_760_000_780_000,1557196168297765 +10161761842905385678_760_000_780_000,1557196155797333 +10161761842905385678_760_000_780_000,1557196167797613 +10161761842905385678_760_000_780_000,1557196166297587 +10161761842905385678_760_000_780_000,1557196156797479 +10161761842905385678_760_000_780_000,1557196167297622 +10161761842905385678_760_000_780_000,1557196154797258 +10161761842905385678_760_000_780_000,1557196154297327 +10161761842905385678_760_000_780_000,1557196165297463 +10161761842905385678_760_000_780_000,1557196165797474 +10161761842905385678_760_000_780_000,1557196156297413 +10161761842905385678_760_000_780_000,1557196164297460 +10161761842905385678_760_000_780_000,1557196158297419 +10161761842905385678_760_000_780_000,1557196168797617 +10161761842905385678_760_000_780_000,1557196166797651 +10161761842905385678_760_000_780_000,1557196155297293 +10161761842905385678_760_000_780_000,1557196164797422 +10161761842905385678_760_000_780_000,1557196157297472 +6922883602463663456_2220_000_2240_000,1558152040772834 +6922883602463663456_2220_000_2240_000,1558152026756411 +6922883602463663456_2220_000_2240_000,1558152028764982 +6922883602463663456_2220_000_2240_000,1558152036772202 +6922883602463663456_2220_000_2240_000,1558152029768625 +6922883602463663456_2220_000_2240_000,1558152037272084 +6922883602463663456_2220_000_2240_000,1558152038772394 +6922883602463663456_2220_000_2240_000,1558152036272158 +6922883602463663456_2220_000_2240_000,1558152030771388 +6922883602463663456_2220_000_2240_000,1558152038272239 +6922883602463663456_2220_000_2240_000,1558152040272803 +6922883602463663456_2220_000_2240_000,1558152030270137 +6922883602463663456_2220_000_2240_000,1558152037772191 +6922883602463663456_2220_000_2240_000,1558152027760810 +6922883602463663456_2220_000_2240_000,1558152027258557 +6922883602463663456_2220_000_2240_000,1558152026254441 +6922883602463663456_2220_000_2240_000,1558152039272550 +6922883602463663456_2220_000_2240_000,1558152039772680 +6922883602463663456_2220_000_2240_000,1558152029266975 +6922883602463663456_2220_000_2240_000,1558152028262942 +3341890853207909601_1020_000_1040_000,1573927803625099 +3341890853207909601_1020_000_1040_000,1573927790125189 +3341890853207909601_1020_000_1040_000,1573927802125062 +3341890853207909601_1020_000_1040_000,1573927801625067 +3341890853207909601_1020_000_1040_000,1573927794625079 +3341890853207909601_1020_000_1040_000,1573927790625242 +3341890853207909601_1020_000_1040_000,1573927792624930 +3341890853207909601_1020_000_1040_000,1573927791125208 +3341890853207909601_1020_000_1040_000,1573927800624954 +3341890853207909601_1020_000_1040_000,1573927804625096 +3341890853207909601_1020_000_1040_000,1573927800124914 +3341890853207909601_1020_000_1040_000,1573927802625074 +3341890853207909601_1020_000_1040_000,1573927792124827 +3341890853207909601_1020_000_1040_000,1573927794125084 +3341890853207909601_1020_000_1040_000,1573927801125097 +3341890853207909601_1020_000_1040_000,1573927793624995 +3341890853207909601_1020_000_1040_000,1573927793124963 +3341890853207909601_1020_000_1040_000,1573927804125097 +3341890853207909601_1020_000_1040_000,1573927803125097 +3341890853207909601_1020_000_1040_000,1573927791625026 +17756183617755834457_1940_000_1960_000,1558017204447293 +17756183617755834457_1940_000_1960_000,1558017214436996 +17756183617755834457_1940_000_1960_000,1558017215429120 +17756183617755834457_1940_000_1960_000,1558017206446333 +17756183617755834457_1940_000_1960_000,1558017207446078 +17756183617755834457_1940_000_1960_000,1558017218421930 +17756183617755834457_1940_000_1960_000,1558017213940930 +17756183617755834457_1940_000_1960_000,1558017217922014 +17756183617755834457_1940_000_1960_000,1558017206945999 +17756183617755834457_1940_000_1960_000,1558017205447104 +17756183617755834457_1940_000_1960_000,1558017214932926 +17756183617755834457_1940_000_1960_000,1558017217422255 +17756183617755834457_1940_000_1960_000,1558017215925793 +17756183617755834457_1940_000_1960_000,1558017208447290 +17756183617755834457_1940_000_1960_000,1558017216423608 +17756183617755834457_1940_000_1960_000,1558017207946577 +17756183617755834457_1940_000_1960_000,1558017216922725 +17756183617755834457_1940_000_1960_000,1558017204947246 +17756183617755834457_1940_000_1960_000,1558017205946707 +17756183617755834457_1940_000_1960_000,1558017203947410 +2218963221891181906_4360_000_4380_000,1573932454073919 +2218963221891181906_4360_000_4380_000,1573932458574312 +2218963221891181906_4360_000_4380_000,1573932456574286 +2218963221891181906_4360_000_4380_000,1573932445149910 +2218963221891181906_4360_000_4380_000,1573932457574335 +2218963221891181906_4360_000_4380_000,1573932444649899 +2218963221891181906_4360_000_4380_000,1573932446649928 +2218963221891181906_4360_000_4380_000,1573932445649884 +2218963221891181906_4360_000_4380_000,1573932448150256 +2218963221891181906_4360_000_4380_000,1573932444149933 +2218963221891181906_4360_000_4380_000,1573932447149977 +2218963221891181906_4360_000_4380_000,1573932454574319 +2218963221891181906_4360_000_4380_000,1573932456074299 +2218963221891181906_4360_000_4380_000,1573932455574265 +2218963221891181906_4360_000_4380_000,1573932457074331 +2218963221891181906_4360_000_4380_000,1573932458074340 +2218963221891181906_4360_000_4380_000,1573932448650899 +2218963221891181906_4360_000_4380_000,1573932446149941 +2218963221891181906_4360_000_4380_000,1573932447650058 +2218963221891181906_4360_000_4380_000,1573932455074331 +10149575340910243572_2720_000_2740_000,1558035231962663 +10149575340910243572_2720_000_2740_000,1558035232462596 +10149575340910243572_2720_000_2740_000,1558035234462274 +10149575340910243572_2720_000_2740_000,1558035232962512 +10149575340910243572_2720_000_2740_000,1558035233462396 +10149575340910243572_2720_000_2740_000,1558035230462351 +10149575340910243572_2720_000_2740_000,1558035231462594 +10149575340910243572_2720_000_2740_000,1558035230962448 +10149575340910243572_2720_000_2740_000,1558035229962648 +10149575340910243572_2720_000_2740_000,1558035233962327 +3459095437766396887_1600_000_1620_000,1559177116218096 +3459095437766396887_1600_000_1620_000,1559177116717458 +3459095437766396887_1600_000_1620_000,1559177108222874 +3459095437766396887_1600_000_1620_000,1559177115219166 +3459095437766396887_1600_000_1620_000,1559177117217000 +3459095437766396887_1600_000_1620_000,1559177114719614 +3459095437766396887_1600_000_1620_000,1559177115718671 +3459095437766396887_1600_000_1620_000,1559177105721360 +3459095437766396887_1600_000_1620_000,1559177108722993 +3459095437766396887_1600_000_1620_000,1559177107221934 +3459095437766396887_1600_000_1620_000,1559177106221852 +3459095437766396887_1600_000_1620_000,1559177114219949 +3459095437766396887_1600_000_1620_000,1559177105220562 +3459095437766396887_1600_000_1620_000,1559177107722383 +3459095437766396887_1600_000_1620_000,1559177118216369 +3459095437766396887_1600_000_1620_000,1559177117716745 +3459095437766396887_1600_000_1620_000,1559177104218831 +3459095437766396887_1600_000_1620_000,1559177104719526 +3459095437766396887_1600_000_1620_000,1559177118715883 +3459095437766396887_1600_000_1620_000,1559177106721948 +8249122135171526629_520_000_540_000,1559184839587788 +8249122135171526629_520_000_540_000,1559184839087463 +8249122135171526629_520_000_540_000,1559184838086814 +8249122135171526629_520_000_540_000,1559184829585106 +8249122135171526629_520_000_540_000,1559184829085741 +8249122135171526629_520_000_540_000,1559184841587404 +8249122135171526629_520_000_540_000,1559184832087286 +8249122135171526629_520_000_540_000,1559184831086036 +8249122135171526629_520_000_540_000,1559184830585419 +8249122135171526629_520_000_540_000,1559184838587000 +8249122135171526629_520_000_540_000,1559184842087749 +8249122135171526629_520_000_540_000,1559184827587385 +8249122135171526629_520_000_540_000,1559184828087141 +8249122135171526629_520_000_540_000,1559184837586942 +8249122135171526629_520_000_540_000,1559184840587321 +8249122135171526629_520_000_540_000,1559184830085083 +8249122135171526629_520_000_540_000,1559184828586572 +8249122135171526629_520_000_540_000,1559184841087135 +8249122135171526629_520_000_540_000,1559184840087626 +8249122135171526629_520_000_540_000,1559184831586778 +1664548685643064400_2240_000_2260_000,1572730660024796 +1664548685643064400_2240_000_2260_000,1572730661524793 +1664548685643064400_2240_000_2260_000,1572730664024893 +1664548685643064400_2240_000_2260_000,1572730661024763 +1664548685643064400_2240_000_2260_000,1572730659524712 +1664548685643064400_2240_000_2260_000,1572730651024914 +1664548685643064400_2240_000_2260_000,1572730652024805 +1664548685643064400_2240_000_2260_000,1572730663524712 +1664548685643064400_2240_000_2260_000,1572730662524607 +1664548685643064400_2240_000_2260_000,1572730654024948 +1664548685643064400_2240_000_2260_000,1572730660524763 +1664548685643064400_2240_000_2260_000,1572730649525094 +1664548685643064400_2240_000_2260_000,1572730651524841 +1664548685643064400_2240_000_2260_000,1572730653024965 +1664548685643064400_2240_000_2260_000,1572730662024682 +1664548685643064400_2240_000_2260_000,1572730652524780 +1664548685643064400_2240_000_2260_000,1572730650524867 +1664548685643064400_2240_000_2260_000,1572730663024572 +1664548685643064400_2240_000_2260_000,1572730650024950 +1664548685643064400_2240_000_2260_000,1572730653525063 +4916600861562283346_3880_000_3900_000,1559179394137429 +4916600861562283346_3880_000_3900_000,1559179396137504 +4916600861562283346_3880_000_3900_000,1559179396637496 +4916600861562283346_3880_000_3900_000,1559179398137489 +4916600861562283346_3880_000_3900_000,1559179388637375 +4916600861562283346_3880_000_3900_000,1559179398637508 +4916600861562283346_3880_000_3900_000,1559179386637413 +4916600861562283346_3880_000_3900_000,1559179386137493 +4916600861562283346_3880_000_3900_000,1559179397137450 +4916600861562283346_3880_000_3900_000,1559179387637365 +4916600861562283346_3880_000_3900_000,1559179384137390 +4916600861562283346_3880_000_3900_000,1559179387137336 +4916600861562283346_3880_000_3900_000,1559179384637499 +4916600861562283346_3880_000_3900_000,1559179388137403 +4916600861562283346_3880_000_3900_000,1559179397637459 +4916600861562283346_3880_000_3900_000,1559179395137442 +4916600861562283346_3880_000_3900_000,1559179385137537 +4916600861562283346_3880_000_3900_000,1559179385637530 +4916600861562283346_3880_000_3900_000,1559179395637456 +4916600861562283346_3880_000_3900_000,1559179394637383 +10802932587105534078_1280_000_1300_000,1557888796948097 +10802932587105534078_1280_000_1300_000,1557888798448099 +10802932587105534078_1280_000_1300_000,1557888806449251 +10802932587105534078_1280_000_1300_000,1557888809449360 +10802932587105534078_1280_000_1300_000,1557888810448859 +10802932587105534078_1280_000_1300_000,1557888800447985 +10802932587105534078_1280_000_1300_000,1557888807948674 +10802932587105534078_1280_000_1300_000,1557888809949023 +10802932587105534078_1280_000_1300_000,1557888810949122 +10802932587105534078_1280_000_1300_000,1557888799948216 +10802932587105534078_1280_000_1300_000,1557888798948041 +10802932587105534078_1280_000_1300_000,1557888800948126 +10802932587105534078_1280_000_1300_000,1557888806949187 +10802932587105534078_1280_000_1300_000,1557888807448803 +10802932587105534078_1280_000_1300_000,1557888799448247 +10802932587105534078_1280_000_1300_000,1557888808449065 +10802932587105534078_1280_000_1300_000,1557888797948166 +10802932587105534078_1280_000_1300_000,1557888796448121 +10802932587105534078_1280_000_1300_000,1557888808949531 +10802932587105534078_1280_000_1300_000,1557888797448185 +13748565785898537200_680_000_700_000,1573621439474829 +13748565785898537200_680_000_700_000,1573621439974767 +13748565785898537200_680_000_700_000,1573621429474915 +13748565785898537200_680_000_700_000,1573621429974924 +13748565785898537200_680_000_700_000,1573621440974804 +13748565785898537200_680_000_700_000,1573621441474863 +13748565785898537200_680_000_700_000,1573621443974860 +13748565785898537200_680_000_700_000,1573621431474829 +13748565785898537200_680_000_700_000,1573621441974787 +13748565785898537200_680_000_700_000,1573621432474859 +13748565785898537200_680_000_700_000,1573621443474808 +13748565785898537200_680_000_700_000,1573621430974792 +13748565785898537200_680_000_700_000,1573621433974860 +13748565785898537200_680_000_700_000,1573621431974875 +13748565785898537200_680_000_700_000,1573621442974839 +13748565785898537200_680_000_700_000,1573621430474807 +13748565785898537200_680_000_700_000,1573621442474772 +13748565785898537200_680_000_700_000,1573621440474758 +13748565785898537200_680_000_700_000,1573621433474826 +13748565785898537200_680_000_700_000,1573621432974900 +14643284977980826278_520_000_540_000,1558150336737516 +14643284977980826278_520_000_540_000,1558150328237510 +14643284977980826278_520_000_540_000,1558150337237361 +14643284977980826278_520_000_540_000,1558150327737447 +14643284977980826278_520_000_540_000,1558150327237391 +14643284977980826278_520_000_540_000,1558150339737419 +14643284977980826278_520_000_540_000,1558150326737588 +14643284977980826278_520_000_540_000,1558150326237829 +14643284977980826278_520_000_540_000,1558150330737609 +14643284977980826278_520_000_540_000,1558150329237498 +14643284977980826278_520_000_540_000,1558150330237732 +14643284977980826278_520_000_540_000,1558150339237427 +14643284977980826278_520_000_540_000,1558150340237494 +14643284977980826278_520_000_540_000,1558150340737501 +14643284977980826278_520_000_540_000,1558150329737609 +14643284977980826278_520_000_540_000,1558150328737468 +14643284977980826278_520_000_540_000,1558150337737326 +14643284977980826278_520_000_540_000,1558150338237396 +14643284977980826278_520_000_540_000,1558150336237586 +14643284977980826278_520_000_540_000,1558150338737431 +4045613324047897473_940_000_960_000,1558493338074162 +4045613324047897473_940_000_960_000,1558493348073783 +4045613324047897473_940_000_960_000,1558493350074053 +4045613324047897473_940_000_960_000,1558493345573992 +4045613324047897473_940_000_960_000,1558493347574007 +4045613324047897473_940_000_960_000,1558493338574044 +4045613324047897473_940_000_960_000,1558493335574296 +4045613324047897473_940_000_960_000,1558493339573912 +4045613324047897473_940_000_960_000,1558493336574269 +4045613324047897473_940_000_960_000,1558493347074035 +4045613324047897473_940_000_960_000,1558493346574102 +4045613324047897473_940_000_960_000,1558493346073989 +4045613324047897473_940_000_960_000,1558493337574148 +4045613324047897473_940_000_960_000,1558493348573778 +4045613324047897473_940_000_960_000,1558493349074012 +4045613324047897473_940_000_960_000,1558493337074219 +4045613324047897473_940_000_960_000,1558493349574122 +4045613324047897473_940_000_960_000,1558493340074053 +4045613324047897473_940_000_960_000,1558493336074290 +4045613324047897473_940_000_960_000,1558493339073948 +2257381802419655779_820_000_840_000,1558402111847622 +2257381802419655779_820_000_840_000,1558402122847222 +2257381802419655779_820_000_840_000,1558402108847992 +2257381802419655779_820_000_840_000,1558402118847287 +2257381802419655779_820_000_840_000,1558402120847365 +2257381802419655779_820_000_840_000,1558402110847064 +2257381802419655779_820_000_840_000,1558402119847426 +2257381802419655779_820_000_840_000,1558402122347099 +2257381802419655779_820_000_840_000,1558402121847019 +2257381802419655779_820_000_840_000,1558402121347177 +2257381802419655779_820_000_840_000,1558402109847716 +2257381802419655779_820_000_840_000,1558402112347811 +2257381802419655779_820_000_840_000,1558402123347308 +2257381802419655779_820_000_840_000,1558402112847819 +2257381802419655779_820_000_840_000,1558402109347833 +2257381802419655779_820_000_840_000,1558402120347479 +2257381802419655779_820_000_840_000,1558402111347219 +2257381802419655779_820_000_840_000,1558402110347368 +2257381802419655779_820_000_840_000,1558402119347368 +2257381802419655779_820_000_840_000,1558402113347613 +4054036670499089296_2300_000_2320_000,1557187714649115 +4054036670499089296_2300_000_2320_000,1557187716649135 +4054036670499089296_2300_000_2320_000,1557187704649276 +4054036670499089296_2300_000_2320_000,1557187707149136 +4054036670499089296_2300_000_2320_000,1557187716149170 +4054036670499089296_2300_000_2320_000,1557187704149193 +4054036670499089296_2300_000_2320_000,1557187717148945 +4054036670499089296_2300_000_2320_000,1557187707649076 +4054036670499089296_2300_000_2320_000,1557187706649119 +4054036670499089296_2300_000_2320_000,1557187705149208 +4054036670499089296_2300_000_2320_000,1557187715649133 +4054036670499089296_2300_000_2320_000,1557187713649046 +4054036670499089296_2300_000_2320_000,1557187706149101 +4054036670499089296_2300_000_2320_000,1557187715149153 +4054036670499089296_2300_000_2320_000,1557187703148999 +4054036670499089296_2300_000_2320_000,1557187703649173 +4054036670499089296_2300_000_2320_000,1557187713149076 +4054036670499089296_2300_000_2320_000,1557187714149098 +4054036670499089296_2300_000_2320_000,1557187717649252 +4054036670499089296_2300_000_2320_000,1557187705649134 +12056192874455954437_140_000_160_000,1557843345612667 +12056192874455954437_140_000_160_000,1557843349612578 +12056192874455954437_140_000_160_000,1557843345112543 +12056192874455954437_140_000_160_000,1557843335112508 +12056192874455954437_140_000_160_000,1557843338612551 +12056192874455954437_140_000_160_000,1557843336612494 +12056192874455954437_140_000_160_000,1557843338112693 +12056192874455954437_140_000_160_000,1557843337112658 +12056192874455954437_140_000_160_000,1557843339612639 +12056192874455954437_140_000_160_000,1557843348612302 +12056192874455954437_140_000_160_000,1557843335612429 +12056192874455954437_140_000_160_000,1557843336112396 +12056192874455954437_140_000_160_000,1557843349112419 +12056192874455954437_140_000_160_000,1557843337612796 +12056192874455954437_140_000_160_000,1557843346612497 +12056192874455954437_140_000_160_000,1557843347612615 +12056192874455954437_140_000_160_000,1557843348112448 +12056192874455954437_140_000_160_000,1557843346112603 +12056192874455954437_140_000_160_000,1557843339112601 +12056192874455954437_140_000_160_000,1557843347112468 +13034900465317073842_1700_000_1720_000,1559143078524545 +13034900465317073842_1700_000_1720_000,1559143065016062 +13034900465317073842_1700_000_1720_000,1559143064015948 +13034900465317073842_1700_000_1720_000,1559143074021060 +13034900465317073842_1700_000_1720_000,1559143068016067 +13034900465317073842_1700_000_1720_000,1559143076523597 +13034900465317073842_1700_000_1720_000,1559143067016248 +13034900465317073842_1700_000_1720_000,1559143075522514 +13034900465317073842_1700_000_1720_000,1559143077023973 +13034900465317073842_1700_000_1720_000,1559143064515955 +13034900465317073842_1700_000_1720_000,1559143066516551 +13034900465317073842_1700_000_1720_000,1559143077524362 +13034900465317073842_1700_000_1720_000,1559143068516366 +13034900465317073842_1700_000_1720_000,1559143076023064 +13034900465317073842_1700_000_1720_000,1559143074521426 +13034900465317073842_1700_000_1720_000,1559143067516020 +13034900465317073842_1700_000_1720_000,1559143065516232 +13034900465317073842_1700_000_1720_000,1559143066016549 +13034900465317073842_1700_000_1720_000,1559143075021878 +13034900465317073842_1700_000_1720_000,1559143078024530 +7511993111693456743_3880_000_3900_000,1557963202297466 +7511993111693456743_3880_000_3900_000,1557963212297515 +7511993111693456743_3880_000_3900_000,1557963200297419 +7511993111693456743_3880_000_3900_000,1557963202797419 +7511993111693456743_3880_000_3900_000,1557963211297319 +7511993111693456743_3880_000_3900_000,1557963211797549 +7511993111693456743_3880_000_3900_000,1557963201297473 +7511993111693456743_3880_000_3900_000,1557963209797116 +7511993111693456743_3880_000_3900_000,1557963210297172 +7511993111693456743_3880_000_3900_000,1557963200797464 +7511993111693456743_3880_000_3900_000,1557963209297327 +7511993111693456743_3880_000_3900_000,1557963208797520 +7511993111693456743_3880_000_3900_000,1557963198797401 +7511993111693456743_3880_000_3900_000,1557963213297448 +7511993111693456743_3880_000_3900_000,1557963210797182 +7511993111693456743_3880_000_3900_000,1557963201797503 +7511993111693456743_3880_000_3900_000,1557963199297286 +7511993111693456743_3880_000_3900_000,1557963199797330 +7511993111693456743_3880_000_3900_000,1557963203297377 +7511993111693456743_3880_000_3900_000,1557963212797472 +9355489589631690177_4800_000_4820_000,1557342366562650 +9355489589631690177_4800_000_4820_000,1557342358062536 +9355489589631690177_4800_000_4820_000,1557342369562809 +9355489589631690177_4800_000_4820_000,1557342357562530 +9355489589631690177_4800_000_4820_000,1557342367062748 +9355489589631690177_4800_000_4820_000,1557342356562423 +9355489589631690177_4800_000_4820_000,1557342355562520 +9355489589631690177_4800_000_4820_000,1557342358562309 +9355489589631690177_4800_000_4820_000,1557342368562561 +9355489589631690177_4800_000_4820_000,1557342367562723 +9355489589631690177_4800_000_4820_000,1557342365562451 +9355489589631690177_4800_000_4820_000,1557342369062698 +9355489589631690177_4800_000_4820_000,1557342366062493 +9355489589631690177_4800_000_4820_000,1557342368062616 +9355489589631690177_4800_000_4820_000,1557342357062509 +9355489589631690177_4800_000_4820_000,1557342359062110 +9355489589631690177_4800_000_4820_000,1557342355062436 +9355489589631690177_4800_000_4820_000,1557342359562031 +9355489589631690177_4800_000_4820_000,1557342365062568 +9355489589631690177_4800_000_4820_000,1557342356062469 +3522804493060229409_3400_000_3420_000,1557855904472271 +3522804493060229409_3400_000_3420_000,1557855907472634 +3522804493060229409_3400_000_3420_000,1557855896472328 +3522804493060229409_3400_000_3420_000,1557855892972587 +3522804493060229409_3400_000_3420_000,1557855906972551 +3522804493060229409_3400_000_3420_000,1557855905472302 +3522804493060229409_3400_000_3420_000,1557855904972296 +3522804493060229409_3400_000_3420_000,1557855905972396 +3522804493060229409_3400_000_3420_000,1557855906472495 +3522804493060229409_3400_000_3420_000,1557855893972382 +3522804493060229409_3400_000_3420_000,1557855897472206 +3522804493060229409_3400_000_3420_000,1557855902972245 +3522804493060229409_3400_000_3420_000,1557855894972377 +3522804493060229409_3400_000_3420_000,1557855893472505 +3522804493060229409_3400_000_3420_000,1557855895472388 +3522804493060229409_3400_000_3420_000,1557855896972244 +3522804493060229409_3400_000_3420_000,1557855903472293 +3522804493060229409_3400_000_3420_000,1557855895972316 +3522804493060229409_3400_000_3420_000,1557855894472345 +3522804493060229409_3400_000_3420_000,1557855903972289 +8566480970798227989_500_000_520_000,1557239425612429 +8566480970798227989_500_000_520_000,1557239414112699 +8566480970798227989_500_000_520_000,1557239413112667 +8566480970798227989_500_000_520_000,1557239415112533 +8566480970798227989_500_000_520_000,1557239416612460 +8566480970798227989_500_000_520_000,1557239423112799 +8566480970798227989_500_000_520_000,1557239415612490 +8566480970798227989_500_000_520_000,1557239422112884 +8566480970798227989_500_000_520_000,1557239412612624 +8566480970798227989_500_000_520_000,1557239424612659 +8566480970798227989_500_000_520_000,1557239412112652 +8566480970798227989_500_000_520_000,1557239422612861 +8566480970798227989_500_000_520_000,1557239416112464 +8566480970798227989_500_000_520_000,1557239423612728 +8566480970798227989_500_000_520_000,1557239413612747 +8566480970798227989_500_000_520_000,1557239426112320 +8566480970798227989_500_000_520_000,1557239426612303 +8566480970798227989_500_000_520_000,1557239414612596 +8566480970798227989_500_000_520_000,1557239425112554 +8566480970798227989_500_000_520_000,1557239424112739 +6278307160249415497_1700_000_1720_000,1558034213921937 +6278307160249415497_1700_000_1720_000,1558034201922721 +6278307160249415497_1700_000_1720_000,1558034202422649 +6278307160249415497_1700_000_1720_000,1558034202922472 +6278307160249415497_1700_000_1720_000,1558034204422154 +6278307160249415497_1700_000_1720_000,1558034214422280 +6278307160249415497_1700_000_1720_000,1558034213421817 +6278307160249415497_1700_000_1720_000,1558034211421372 +6278307160249415497_1700_000_1720_000,1558034203922216 +6278307160249415497_1700_000_1720_000,1558034200922728 +6278307160249415497_1700_000_1720_000,1558034212921821 +6278307160249415497_1700_000_1720_000,1558034210421304 +6278307160249415497_1700_000_1720_000,1558034201422689 +6278307160249415497_1700_000_1720_000,1558034211921700 +6278307160249415497_1700_000_1720_000,1558034209921189 +6278307160249415497_1700_000_1720_000,1558034212421831 +6278307160249415497_1700_000_1720_000,1558034200422683 +6278307160249415497_1700_000_1720_000,1558034210921320 +6278307160249415497_1700_000_1720_000,1558034203422353 +6278307160249415497_1700_000_1720_000,1558034199922726 +13787943721654585343_1220_000_1240_000,1558483374422389 +13787943721654585343_1220_000_1240_000,1558483360422540 +13787943721654585343_1220_000_1240_000,1558483362922326 +13787943721654585343_1220_000_1240_000,1558483361422280 +13787943721654585343_1220_000_1240_000,1558483370422349 +13787943721654585343_1220_000_1240_000,1558483359922533 +13787943721654585343_1220_000_1240_000,1558483372922276 +13787943721654585343_1220_000_1240_000,1558483364422414 +13787943721654585343_1220_000_1240_000,1558483369922463 +13787943721654585343_1220_000_1240_000,1558483373422253 +13787943721654585343_1220_000_1240_000,1558483360922432 +13787943721654585343_1220_000_1240_000,1558483370922205 +13787943721654585343_1220_000_1240_000,1558483371922349 +13787943721654585343_1220_000_1240_000,1558483371422242 +13787943721654585343_1220_000_1240_000,1558483361922245 +13787943721654585343_1220_000_1240_000,1558483362422314 +13787943721654585343_1220_000_1240_000,1558483363422326 +13787943721654585343_1220_000_1240_000,1558483363922364 +13787943721654585343_1220_000_1240_000,1558483372422320 +13787943721654585343_1220_000_1240_000,1558483373922325 +10998289306141768318_1280_000_1300_000,1558483433397038 +10998289306141768318_1280_000_1300_000,1558483430411803 +10998289306141768318_1280_000_1300_000,1558483420422343 +10998289306141768318_1280_000_1300_000,1558483434396435 +10998289306141768318_1280_000_1300_000,1558483421422280 +10998289306141768318_1280_000_1300_000,1558483423422502 +10998289306141768318_1280_000_1300_000,1558483430908205 +10998289306141768318_1280_000_1300_000,1558483424422579 +10998289306141768318_1280_000_1300_000,1558483433896475 +10998289306141768318_1280_000_1300_000,1558483423922620 +10998289306141768318_1280_000_1300_000,1558483419922414 +10998289306141768318_1280_000_1300_000,1558483422422324 +10998289306141768318_1280_000_1300_000,1558483431404397 +10998289306141768318_1280_000_1300_000,1558483431901030 +10998289306141768318_1280_000_1300_000,1558483429915076 +10998289306141768318_1280_000_1300_000,1558483420922273 +10998289306141768318_1280_000_1300_000,1558483421922318 +10998289306141768318_1280_000_1300_000,1558483422922327 +10998289306141768318_1280_000_1300_000,1558483432398938 +10998289306141768318_1280_000_1300_000,1558483432897848 +7435516779413778621_4440_000_4460_000,1557325510087987 +7435516779413778621_4440_000_4460_000,1557325509088023 +7435516779413778621_4440_000_4460_000,1557325509588017 +7435516779413778621_4440_000_4460_000,1557325522112585 +7435516779413778621_4440_000_4460_000,1557325511088136 +7435516779413778621_4440_000_4460_000,1557325513590433 +7435516779413778621_4440_000_4460_000,1557325512588488 +7435516779413778621_4440_000_4460_000,1557325521112794 +7435516779413778621_4440_000_4460_000,1557325513089176 +7435516779413778621_4440_000_4460_000,1557325522612689 +7435516779413778621_4440_000_4460_000,1557325520112870 +7435516779413778621_4440_000_4460_000,1557325523612525 +7435516779413778621_4440_000_4460_000,1557325511588133 +7435516779413778621_4440_000_4460_000,1557325521612655 +7435516779413778621_4440_000_4460_000,1557325519113921 +7435516779413778621_4440_000_4460_000,1557325520612844 +7435516779413778621_4440_000_4460_000,1557325510588071 +7435516779413778621_4440_000_4460_000,1557325523112680 +7435516779413778621_4440_000_4460_000,1557325519613322 +7435516779413778621_4440_000_4460_000,1557325512088233 +13944616099709049906_1020_000_1040_000,1558493425524322 +13944616099709049906_1020_000_1040_000,1558493417024071 +13944616099709049906_1020_000_1040_000,1558493426024320 +13944616099709049906_1020_000_1040_000,1558493416024098 +13944616099709049906_1020_000_1040_000,1558493429524171 +13944616099709049906_1020_000_1040_000,1558493426524287 +13944616099709049906_1020_000_1040_000,1558493419024193 +13944616099709049906_1020_000_1040_000,1558493430024138 +13944616099709049906_1020_000_1040_000,1558493427524280 +13944616099709049906_1020_000_1040_000,1558493415524136 +13944616099709049906_1020_000_1040_000,1558493427024273 +13944616099709049906_1020_000_1040_000,1558493429024223 +13944616099709049906_1020_000_1040_000,1558493428524220 +13944616099709049906_1020_000_1040_000,1558493420024171 +13944616099709049906_1020_000_1040_000,1558493418024131 +13944616099709049906_1020_000_1040_000,1558493418524161 +13944616099709049906_1020_000_1040_000,1558493417524102 +13944616099709049906_1020_000_1040_000,1558493419524165 +13944616099709049906_1020_000_1040_000,1558493416524077 +13944616099709049906_1020_000_1040_000,1558493428024253 +8229317157758012712_3860_000_3880_000,1559179375137657 +8229317157758012712_3860_000_3880_000,1559179375637448 +8229317157758012712_3860_000_3880_000,1559179366637361 +8229317157758012712_3860_000_3880_000,1559179368137382 +8229317157758012712_3860_000_3880_000,1559179367137366 +8229317157758012712_3860_000_3880_000,1559179376137327 +8229317157758012712_3860_000_3880_000,1559179378637568 +8229317157758012712_3860_000_3880_000,1559179374137643 +8229317157758012712_3860_000_3880_000,1559179374637715 +8229317157758012712_3860_000_3880_000,1559179376637419 +8229317157758012712_3860_000_3880_000,1559179364137325 +8229317157758012712_3860_000_3880_000,1559179377637503 +8229317157758012712_3860_000_3880_000,1559179366137360 +8229317157758012712_3860_000_3880_000,1559179368637389 +8229317157758012712_3860_000_3880_000,1559179377137484 +8229317157758012712_3860_000_3880_000,1559179364637326 +8229317157758012712_3860_000_3880_000,1559179365137367 +8229317157758012712_3860_000_3880_000,1559179367637354 +8229317157758012712_3860_000_3880_000,1559179378137535 +8229317157758012712_3860_000_3880_000,1559179365637366 +5638240639308158118_4220_000_4240_000,1555267988099080 +5638240639308158118_4220_000_4240_000,1555267981099003 +5638240639308158118_4220_000_4240_000,1555267980599134 +5638240639308158118_4220_000_4240_000,1555267989099215 +5638240639308158118_4220_000_4240_000,1555267977599158 +5638240639308158118_4220_000_4240_000,1555267987599108 +5638240639308158118_4220_000_4240_000,1555267986599172 +5638240639308158118_4220_000_4240_000,1555267979599132 +5638240639308158118_4220_000_4240_000,1555267988599141 +5638240639308158118_4220_000_4240_000,1555267990098844 +5638240639308158118_4220_000_4240_000,1555267990598105 +5638240639308158118_4220_000_4240_000,1555267979099131 +5638240639308158118_4220_000_4240_000,1555267978599123 +5638240639308158118_4220_000_4240_000,1555267987099206 +5638240639308158118_4220_000_4240_000,1555267976599172 +5638240639308158118_4220_000_4240_000,1555267977099159 +5638240639308158118_4220_000_4240_000,1555267989599152 +5638240639308158118_4220_000_4240_000,1555267980099165 +5638240639308158118_4220_000_4240_000,1555267978099155 +5638240639308158118_4220_000_4240_000,1555267991096855 +15272375112495403395_620_000_640_000,1559189217599985 +15272375112495403395_620_000_640_000,1559189230099846 +15272375112495403395_620_000_640_000,1559189221600285 +15272375112495403395_620_000_640_000,1559189228599908 +15272375112495403395_620_000_640_000,1559189228100026 +15272375112495403395_620_000_640_000,1559189231099755 +15272375112495403395_620_000_640_000,1559189229599850 +15272375112495403395_620_000_640_000,1559189217099978 +15272375112495403395_620_000_640_000,1559189220599788 +15272375112495403395_620_000_640_000,1559189229099841 +15272375112495403395_620_000_640_000,1559189227100268 +15272375112495403395_620_000_640_000,1559189231599710 +15272375112495403395_620_000_640_000,1559189218599758 +15272375112495403395_620_000_640_000,1559189219599785 +15272375112495403395_620_000_640_000,1559189218099858 +15272375112495403395_620_000_640_000,1559189230599799 +15272375112495403395_620_000_640_000,1559189219099720 +15272375112495403395_620_000_640_000,1559189221099879 +15272375112495403395_620_000_640_000,1559189227600224 +15272375112495403395_620_000_640_000,1559189220099860 +8993680275027614595_2520_000_2540_000,1555280202399639 +8993680275027614595_2520_000_2540_000,1555280199899606 +8993680275027614595_2520_000_2540_000,1555280211375470 +8993680275027614595_2520_000_2540_000,1555280199399568 +8993680275027614595_2520_000_2540_000,1555280212875223 +8993680275027614595_2520_000_2540_000,1555280208875515 +8993680275027614595_2520_000_2540_000,1555280202899788 +8993680275027614595_2520_000_2540_000,1555280210374654 +8993680275027614595_2520_000_2540_000,1555280210875023 +8993680275027614595_2520_000_2540_000,1555280200899582 +8993680275027614595_2520_000_2540_000,1555280201399518 +8993680275027614595_2520_000_2540_000,1555280212375553 +8993680275027614595_2520_000_2540_000,1555280209874639 +8993680275027614595_2520_000_2540_000,1555280211875697 +8993680275027614595_2520_000_2540_000,1555280209374829 +8993680275027614595_2520_000_2540_000,1555280203399700 +8993680275027614595_2520_000_2540_000,1555280201899495 +8993680275027614595_2520_000_2540_000,1555280213374830 +8993680275027614595_2520_000_2540_000,1555280200399612 +8993680275027614595_2520_000_2540_000,1555280198899595 +8688567562597583972_940_000_960_000,1555217344950039 +8688567562597583972_940_000_960_000,1555217347949948 +8688567562597583972_940_000_960_000,1555217356449802 +8688567562597583972_940_000_960_000,1555217353949933 +8688567562597583972_940_000_960_000,1555217345450004 +8688567562597583972_940_000_960_000,1555217346449944 +8688567562597583972_940_000_960_000,1555217343450016 +8688567562597583972_940_000_960_000,1555217344449945 +8688567562597583972_940_000_960_000,1555217346949874 +8688567562597583972_940_000_960_000,1555217355449905 +8688567562597583972_940_000_960_000,1555217353449883 +8688567562597583972_940_000_960_000,1555217355949898 +8688567562597583972_940_000_960_000,1555217354949900 +8688567562597583972_940_000_960_000,1555217357449853 +8688567562597583972_940_000_960_000,1555217345949937 +8688567562597583972_940_000_960_000,1555217354449934 +8688567562597583972_940_000_960_000,1555217356949774 +8688567562597583972_940_000_960_000,1555217343949948 +8688567562597583972_940_000_960_000,1555217357949939 +8688567562597583972_940_000_960_000,1555217347449863 +7247823803417339098_2320_000_2340_000,1557197726848807 +7247823803417339098_2320_000_2340_000,1557197726349233 +7247823803417339098_2320_000_2340_000,1557197727348551 +7247823803417339098_2320_000_2340_000,1557197714347252 +7247823803417339098_2320_000_2340_000,1557197716347129 +7247823803417339098_2320_000_2340_000,1557197725349846 +7247823803417339098_2320_000_2340_000,1557197718347455 +7247823803417339098_2320_000_2340_000,1557197716847198 +7247823803417339098_2320_000_2340_000,1557197715847235 +7247823803417339098_2320_000_2340_000,1557197724349365 +7247823803417339098_2320_000_2340_000,1557197714847182 +7247823803417339098_2320_000_2340_000,1557197717847546 +7247823803417339098_2320_000_2340_000,1557197728348372 +7247823803417339098_2320_000_2340_000,1557197715347156 +7247823803417339098_2320_000_2340_000,1557197727848417 +7247823803417339098_2320_000_2340_000,1557197718847355 +7247823803417339098_2320_000_2340_000,1557197728848372 +7247823803417339098_2320_000_2340_000,1557197724849707 +7247823803417339098_2320_000_2340_000,1557197725849623 +7247823803417339098_2320_000_2340_000,1557197717347349 +2601205676330128831_4880_000_4900_000,1555183240199075 +2601205676330128831_4880_000_4900_000,1555183251775192 +2601205676330128831_4880_000_4900_000,1555183242695259 +2601205676330128831_4880_000_4900_000,1555183239698969 +2601205676330128831_4880_000_4900_000,1555183252774590 +2601205676330128831_4880_000_4900_000,1555183239198898 +2601205676330128831_4880_000_4900_000,1555183241697881 +2601205676330128831_4880_000_4900_000,1555183250274996 +2601205676330128831_4880_000_4900_000,1555183248775035 +2601205676330128831_4880_000_4900_000,1555183242196604 +2601205676330128831_4880_000_4900_000,1555183241198707 +2601205676330128831_4880_000_4900_000,1555183252274928 +2601205676330128831_4880_000_4900_000,1555183253274584 +2601205676330128831_4880_000_4900_000,1555183249775067 +2601205676330128831_4880_000_4900_000,1555183238698908 +2601205676330128831_4880_000_4900_000,1555183240699040 +2601205676330128831_4880_000_4900_000,1555183243193747 +2601205676330128831_4880_000_4900_000,1555183251275298 +2601205676330128831_4880_000_4900_000,1555183249275187 +2601205676330128831_4880_000_4900_000,1555183250775187 +14737335824319407706_1980_000_2000_000,1556068257625722 +14737335824319407706_1980_000_2000_000,1556068264624994 +14737335824319407706_1980_000_2000_000,1556068253125108 +14737335824319407706_1980_000_2000_000,1556068256626068 +14737335824319407706_1980_000_2000_000,1556068256125917 +14737335824319407706_1980_000_2000_000,1556068267124989 +14737335824319407706_1980_000_2000_000,1556068254125759 +14737335824319407706_1980_000_2000_000,1556068265124999 +14737335824319407706_1980_000_2000_000,1556068263125013 +14737335824319407706_1980_000_2000_000,1556068266125077 +14737335824319407706_1980_000_2000_000,1556068254626070 +14737335824319407706_1980_000_2000_000,1556068265625046 +14737335824319407706_1980_000_2000_000,1556068255126360 +14737335824319407706_1980_000_2000_000,1556068267624889 +14737335824319407706_1980_000_2000_000,1556068255626085 +14737335824319407706_1980_000_2000_000,1556068266625069 +14737335824319407706_1980_000_2000_000,1556068264124922 +14737335824319407706_1980_000_2000_000,1556068257126022 +14737335824319407706_1980_000_2000_000,1556068253625378 +14737335824319407706_1980_000_2000_000,1556068263624987 +10504764403039842352_460_000_480_000,1558060925875055 +10504764403039842352_460_000_480_000,1558060940374703 +10504764403039842352_460_000_480_000,1558060939874709 +10504764403039842352_460_000_480_000,1558060937374792 +10504764403039842352_460_000_480_000,1558060927874686 +10504764403039842352_460_000_480_000,1558060926874887 +10504764403039842352_460_000_480_000,1558060930375221 +10504764403039842352_460_000_480_000,1558060926375083 +10504764403039842352_460_000_480_000,1558060935875120 +10504764403039842352_460_000_480_000,1558060936375015 +10504764403039842352_460_000_480_000,1558060936874787 +10504764403039842352_460_000_480_000,1558060938875168 +10504764403039842352_460_000_480_000,1558060928875075 +10504764403039842352_460_000_480_000,1558060937874938 +10504764403039842352_460_000_480_000,1558060928374842 +10504764403039842352_460_000_480_000,1558060929375235 +10504764403039842352_460_000_480_000,1558060938375035 +10504764403039842352_460_000_480_000,1558060939374902 +4140965781175793864_460_000_480_000,1559189068049919 +4140965781175793864_460_000_480_000,1559189060549423 +4140965781175793864_460_000_480_000,1559189058052659 +4140965781175793864_460_000_480_000,1559189070549944 +4140965781175793864_460_000_480_000,1559189071550057 +4140965781175793864_460_000_480_000,1559189067049957 +4140965781175793864_460_000_480_000,1559189061049573 +4140965781175793864_460_000_480_000,1559189059549297 +4140965781175793864_460_000_480_000,1559189067549997 +4140965781175793864_460_000_480_000,1559189058551289 +4140965781175793864_460_000_480_000,1559189057056840 +4140965781175793864_460_000_480_000,1559189069550001 +4140965781175793864_460_000_480_000,1559189068549926 +4140965781175793864_460_000_480_000,1559189069049952 +4140965781175793864_460_000_480_000,1559189059049934 +4140965781175793864_460_000_480_000,1559189057554573 +4140965781175793864_460_000_480_000,1559189070049942 +4140965781175793864_460_000_480_000,1559189061549638 +4140965781175793864_460_000_480_000,1559189071050027 +4140965781175793864_460_000_480_000,1559189060049248 +14188689528137485670_2660_000_2680_000,1555687836099829 +14188689528137485670_2660_000_2680_000,1555687847574536 +14188689528137485670_2660_000_2680_000,1555687834599917 +14188689528137485670_2660_000_2680_000,1555687835599804 +14188689528137485670_2660_000_2680_000,1555687844576878 +14188689528137485670_2660_000_2680_000,1555687838099816 +14188689528137485670_2660_000_2680_000,1555687846574299 +14188689528137485670_2660_000_2680_000,1555687836599840 +14188689528137485670_2660_000_2680_000,1555687837099812 +14188689528137485670_2660_000_2680_000,1555687848074544 +14188689528137485670_2660_000_2680_000,1555687845075193 +14188689528137485670_2660_000_2680_000,1555687834099910 +14188689528137485670_2660_000_2680_000,1555687845574255 +14188689528137485670_2660_000_2680_000,1555687847074492 +14188689528137485670_2660_000_2680_000,1555687835099800 +14188689528137485670_2660_000_2680_000,1555687843582715 +14188689528137485670_2660_000_2680_000,1555687837599851 +14188689528137485670_2660_000_2680_000,1555687833599780 +14188689528137485670_2660_000_2680_000,1555687846074113 +14188689528137485670_2660_000_2680_000,1555687844079474 +18149616047892103767_2460_000_2480_000,1555706658299969 +18149616047892103767_2460_000_2480_000,1555706646800116 +18149616047892103767_2460_000_2480_000,1555706656800049 +18149616047892103767_2460_000_2480_000,1555706647300089 +18149616047892103767_2460_000_2480_000,1555706645799946 +18149616047892103767_2460_000_2480_000,1555706645299873 +18149616047892103767_2460_000_2480_000,1555706644299834 +18149616047892103767_2460_000_2480_000,1555706654299962 +18149616047892103767_2460_000_2480_000,1555706648799880 +18149616047892103767_2460_000_2480_000,1555706656300141 +18149616047892103767_2460_000_2480_000,1555706644799899 +18149616047892103767_2460_000_2480_000,1555706658800051 +18149616047892103767_2460_000_2480_000,1555706655300035 +18149616047892103767_2460_000_2480_000,1555706654799999 +18149616047892103767_2460_000_2480_000,1555706655800109 +18149616047892103767_2460_000_2480_000,1555706657299969 +18149616047892103767_2460_000_2480_000,1555706646300071 +18149616047892103767_2460_000_2480_000,1555706657799945 +18149616047892103767_2460_000_2480_000,1555706647800020 +18149616047892103767_2460_000_2480_000,1555706648299913 +5026942594071056992_3120_000_3140_000,1555462125499896 +5026942594071056992_3120_000_3140_000,1555462133999526 +5026942594071056992_3120_000_3140_000,1555462131999686 +5026942594071056992_3120_000_3140_000,1555462120999711 +5026942594071056992_3120_000_3140_000,1555462123499771 +5026942594071056992_3120_000_3140_000,1555462132499693 +5026942594071056992_3120_000_3140_000,1555462124499589 +5026942594071056992_3120_000_3140_000,1555462122500198 +5026942594071056992_3120_000_3140_000,1555462123999626 +5026942594071056992_3120_000_3140_000,1555462130999515 +5026942594071056992_3120_000_3140_000,1555462123000001 +5026942594071056992_3120_000_3140_000,1555462121499912 +5026942594071056992_3120_000_3140_000,1555462132999655 +5026942594071056992_3120_000_3140_000,1555462135499500 +5026942594071056992_3120_000_3140_000,1555462124999696 +5026942594071056992_3120_000_3140_000,1555462133499574 +5026942594071056992_3120_000_3140_000,1555462122000279 +5026942594071056992_3120_000_3140_000,1555462134999525 +5026942594071056992_3120_000_3140_000,1555462131499619 +5026942594071056992_3120_000_3140_000,1555462134499515 +11987368976578218644_1340_000_1360_000,1557240254147006 +11987368976578218644_1340_000_1360_000,1557240256647136 +11987368976578218644_1340_000_1360_000,1557240253147019 +11987368976578218644_1340_000_1360_000,1557240264121600 +11987368976578218644_1340_000_1360_000,1557240266622584 +11987368976578218644_1340_000_1360_000,1557240253646981 +11987368976578218644_1340_000_1360_000,1557240263622577 +11987368976578218644_1340_000_1360_000,1557240255647121 +11987368976578218644_1340_000_1360_000,1557240266122577 +11987368976578218644_1340_000_1360_000,1557240252646979 +11987368976578218644_1340_000_1360_000,1557240256147181 +11987368976578218644_1340_000_1360_000,1557240265622400 +11987368976578218644_1340_000_1360_000,1557240263124752 +11987368976578218644_1340_000_1360_000,1557240252147007 +11987368976578218644_1340_000_1360_000,1557240254647011 +11987368976578218644_1340_000_1360_000,1557240264621606 +11987368976578218644_1340_000_1360_000,1557240265121984 +11987368976578218644_1340_000_1360_000,1557240255147121 +11987368976578218644_1340_000_1360_000,1557240262627879 +11987368976578218644_1340_000_1360_000,1557240262131544 +17136775999940024630_4860_000_4880_000,1555381565899350 +17136775999940024630_4860_000_4880_000,1555381569399418 +17136775999940024630_4860_000_4880_000,1555381577399397 +17136775999940024630_4860_000_4880_000,1555381567899452 +17136775999940024630_4860_000_4880_000,1555381579899405 +17136775999940024630_4860_000_4880_000,1555381576399429 +17136775999940024630_4860_000_4880_000,1555381566399384 +17136775999940024630_4860_000_4880_000,1555381569899411 +17136775999940024630_4860_000_4880_000,1555381579399300 +17136775999940024630_4860_000_4880_000,1555381576899420 +17136775999940024630_4860_000_4880_000,1555381565399404 +17136775999940024630_4860_000_4880_000,1555381575399420 +17136775999940024630_4860_000_4880_000,1555381578399393 +17136775999940024630_4860_000_4880_000,1555381567399421 +17136775999940024630_4860_000_4880_000,1555381575899458 +17136775999940024630_4860_000_4880_000,1555381577899394 +17136775999940024630_4860_000_4880_000,1555381568399448 +17136775999940024630_4860_000_4880_000,1555381568899445 +17136775999940024630_4860_000_4880_000,1555381578899304 +10980133015080705026_780_000_800_000,1557159347347517 +10980133015080705026_780_000_800_000,1557159341347548 +10980133015080705026_780_000_800_000,1557159350347179 +10980133015080705026_780_000_800_000,1557159338347170 +10980133015080705026_780_000_800_000,1557159348347894 +10980133015080705026_780_000_800_000,1557159341847592 +10980133015080705026_780_000_800_000,1557159340347306 +10980133015080705026_780_000_800_000,1557159351347307 +10980133015080705026_780_000_800_000,1557159339347070 +10980133015080705026_780_000_800_000,1557159349347437 +10980133015080705026_780_000_800_000,1557159348847749 +10980133015080705026_780_000_800_000,1557159337346937 +10980133015080705026_780_000_800_000,1557159340847461 +10980133015080705026_780_000_800_000,1557159350847321 +10980133015080705026_780_000_800_000,1557159337847132 +10980133015080705026_780_000_800_000,1557159349847214 +10980133015080705026_780_000_800_000,1557159347847829 +10980133015080705026_780_000_800_000,1557159338847114 +10980133015080705026_780_000_800_000,1557159351847230 +10980133015080705026_780_000_800_000,1557159339847156 +17792628511034220885_2360_000_2380_000,1555038976374997 +17792628511034220885_2360_000_2380_000,1555038978374871 +17792628511034220885_2360_000_2380_000,1555038976874968 +17792628511034220885_2360_000_2380_000,1555038975875022 +17792628511034220885_2360_000_2380_000,1555038968860681 +17792628511034220885_2360_000_2380_000,1555038964850579 +17792628511034220885_2360_000_2380_000,1555038974875036 +17792628511034220885_2360_000_2380_000,1555038977374963 +17792628511034220885_2360_000_2380_000,1555038978874913 +17792628511034220885_2360_000_2380_000,1555038966351482 +17792628511034220885_2360_000_2380_000,1555038979375036 +17792628511034220885_2360_000_2380_000,1555038965850871 +17792628511034220885_2360_000_2380_000,1555038977874932 +17792628511034220885_2360_000_2380_000,1555038967353934 +17792628511034220885_2360_000_2380_000,1555038969363655 +17792628511034220885_2360_000_2380_000,1555038965350606 +17792628511034220885_2360_000_2380_000,1555038966852499 +17792628511034220885_2360_000_2380_000,1555038968358038 diff --git a/Pointcept/pointcept/datasets/preprocessing/waymo/preprocess_waymo.py b/Pointcept/pointcept/datasets/preprocessing/waymo/preprocess_waymo.py new file mode 100644 index 0000000000000000000000000000000000000000..33a309fc2a03c52b63a43863ac40632ad5daa0a6 --- /dev/null +++ b/Pointcept/pointcept/datasets/preprocessing/waymo/preprocess_waymo.py @@ -0,0 +1,387 @@ +""" +Preprocessing Script for ScanNet 20/200 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import warnings + +warnings.filterwarnings("ignore", category=DeprecationWarning) + +import os + +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" + +import argparse +import numpy as np +import tensorflow.compat.v1 as tf +from pathlib import Path +from waymo_open_dataset.utils import frame_utils +from waymo_open_dataset.utils import transform_utils +from waymo_open_dataset.utils import range_image_utils +from waymo_open_dataset import dataset_pb2 as open_dataset +import glob +import multiprocessing as mp +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat + + +def create_lidar(frame): + """Parse and save the lidar data in psd format. + Args: + frame (:obj:`Frame`): Open dataset frame proto. + """ + ( + range_images, + camera_projections, + segmentation_labels, + range_image_top_pose, + ) = frame_utils.parse_range_image_and_camera_projection(frame) + + points, cp_points, valid_masks = convert_range_image_to_point_cloud( + frame, + range_images, + camera_projections, + range_image_top_pose, + keep_polar_features=True, + ) + points_ri2, cp_points_ri2, valid_masks_ri2 = convert_range_image_to_point_cloud( + frame, + range_images, + camera_projections, + range_image_top_pose, + ri_index=1, + keep_polar_features=True, + ) + + # 3d points in vehicle frame. + points_all = np.concatenate(points, axis=0) + points_all_ri2 = np.concatenate(points_ri2, axis=0) + # point labels. + + points_all = np.concatenate([points_all, points_all_ri2], axis=0) + + velodyne = np.c_[points_all[:, 3:6], points_all[:, 1]] + velodyne = velodyne.reshape((velodyne.shape[0] * velodyne.shape[1])) + + valid_masks = [valid_masks, valid_masks_ri2] + return velodyne, valid_masks + + +def create_label(frame): + ( + range_images, + camera_projections, + segmentation_labels, + range_image_top_pose, + ) = frame_utils.parse_range_image_and_camera_projection(frame) + + point_labels = convert_range_image_to_point_cloud_labels( + frame, range_images, segmentation_labels + ) + point_labels_ri2 = convert_range_image_to_point_cloud_labels( + frame, range_images, segmentation_labels, ri_index=1 + ) + + # point labels. + point_labels_all = np.concatenate(point_labels, axis=0) + point_labels_all_ri2 = np.concatenate(point_labels_ri2, axis=0) + point_labels_all = np.concatenate([point_labels_all, point_labels_all_ri2], axis=0) + + labels = point_labels_all + return labels + + +def convert_range_image_to_cartesian( + frame, range_images, range_image_top_pose, ri_index=0, keep_polar_features=False +): + """Convert range images from polar coordinates to Cartesian coordinates. + + Args: + frame: open dataset frame + range_images: A dict of {laser_name, [range_image_first_return, + range_image_second_return]}. + range_image_top_pose: range image pixel pose for top lidar. + ri_index: 0 for the first return, 1 for the second return. + keep_polar_features: If true, keep the features from the polar range image + (i.e. range, intensity, and elongation) as the first features in the + output range image. + + Returns: + dict of {laser_name, (H, W, D)} range images in Cartesian coordinates. D + will be 3 if keep_polar_features is False (x, y, z) and 6 if + keep_polar_features is True (range, intensity, elongation, x, y, z). + """ + cartesian_range_images = {} + frame_pose = tf.convert_to_tensor( + value=np.reshape(np.array(frame.pose.transform), [4, 4]) + ) + + # [H, W, 6] + range_image_top_pose_tensor = tf.reshape( + tf.convert_to_tensor(value=range_image_top_pose.data), + range_image_top_pose.shape.dims, + ) + # [H, W, 3, 3] + range_image_top_pose_tensor_rotation = transform_utils.get_rotation_matrix( + range_image_top_pose_tensor[..., 0], + range_image_top_pose_tensor[..., 1], + range_image_top_pose_tensor[..., 2], + ) + range_image_top_pose_tensor_translation = range_image_top_pose_tensor[..., 3:] + range_image_top_pose_tensor = transform_utils.get_transform( + range_image_top_pose_tensor_rotation, range_image_top_pose_tensor_translation + ) + + for c in frame.context.laser_calibrations: + range_image = range_images[c.name][ri_index] + if len(c.beam_inclinations) == 0: # pylint: disable=g-explicit-length-test + beam_inclinations = range_image_utils.compute_inclination( + tf.constant([c.beam_inclination_min, c.beam_inclination_max]), + height=range_image.shape.dims[0], + ) + else: + beam_inclinations = tf.constant(c.beam_inclinations) + + beam_inclinations = tf.reverse(beam_inclinations, axis=[-1]) + extrinsic = np.reshape(np.array(c.extrinsic.transform), [4, 4]) + + range_image_tensor = tf.reshape( + tf.convert_to_tensor(value=range_image.data), range_image.shape.dims + ) + pixel_pose_local = None + frame_pose_local = None + if c.name == open_dataset.LaserName.TOP: + pixel_pose_local = range_image_top_pose_tensor + pixel_pose_local = tf.expand_dims(pixel_pose_local, axis=0) + frame_pose_local = tf.expand_dims(frame_pose, axis=0) + range_image_cartesian = range_image_utils.extract_point_cloud_from_range_image( + tf.expand_dims(range_image_tensor[..., 0], axis=0), + tf.expand_dims(extrinsic, axis=0), + tf.expand_dims(tf.convert_to_tensor(value=beam_inclinations), axis=0), + pixel_pose=pixel_pose_local, + frame_pose=frame_pose_local, + ) + + range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0) + + if keep_polar_features: + # If we want to keep the polar coordinate features of range, intensity, + # and elongation, concatenate them to be the initial dimensions of the + # returned Cartesian range image. + range_image_cartesian = tf.concat( + [range_image_tensor[..., 0:3], range_image_cartesian], axis=-1 + ) + + cartesian_range_images[c.name] = range_image_cartesian + + return cartesian_range_images + + +def convert_range_image_to_point_cloud( + frame, + range_images, + camera_projections, + range_image_top_pose, + ri_index=0, + keep_polar_features=False, +): + """Convert range images to point cloud. + + Args: + frame: open dataset frame + range_images: A dict of {laser_name, [range_image_first_return, + range_image_second_return]}. + camera_projections: A dict of {laser_name, + [camera_projection_from_first_return, + camera_projection_from_second_return]}. + range_image_top_pose: range image pixel pose for top lidar. + ri_index: 0 for the first return, 1 for the second return. + keep_polar_features: If true, keep the features from the polar range image + (i.e. range, intensity, and elongation) as the first features in the + output range image. + + Returns: + points: {[N, 3]} list of 3d lidar points of length 5 (number of lidars). + (NOTE: Will be {[N, 6]} if keep_polar_features is true. + cp_points: {[N, 6]} list of camera projections of length 5 + (number of lidars). + """ + calibrations = sorted(frame.context.laser_calibrations, key=lambda c: c.name) + points = [] + cp_points = [] + valid_masks = [] + + cartesian_range_images = convert_range_image_to_cartesian( + frame, range_images, range_image_top_pose, ri_index, keep_polar_features + ) + + for c in calibrations: + range_image = range_images[c.name][ri_index] + range_image_tensor = tf.reshape( + tf.convert_to_tensor(value=range_image.data), range_image.shape.dims + ) + range_image_mask = range_image_tensor[..., 0] > 0 + + range_image_cartesian = cartesian_range_images[c.name] + points_tensor = tf.gather_nd( + range_image_cartesian, tf.compat.v1.where(range_image_mask) + ) + + cp = camera_projections[c.name][ri_index] + cp_tensor = tf.reshape(tf.convert_to_tensor(value=cp.data), cp.shape.dims) + cp_points_tensor = tf.gather_nd(cp_tensor, tf.compat.v1.where(range_image_mask)) + points.append(points_tensor.numpy()) + cp_points.append(cp_points_tensor.numpy()) + valid_masks.append(range_image_mask.numpy()) + + return points, cp_points, valid_masks + + +def convert_range_image_to_point_cloud_labels( + frame, range_images, segmentation_labels, ri_index=0 +): + """Convert segmentation labels from range images to point clouds. + + Args: + frame: open dataset frame + range_images: A dict of {laser_name, [range_image_first_return, + range_image_second_return]}. + segmentation_labels: A dict of {laser_name, [range_image_first_return, + range_image_second_return]}. + ri_index: 0 for the first return, 1 for the second return. + + Returns: + point_labels: {[N, 2]} list of 3d lidar points's segmentation labels. 0 for + points that are not labeled. + """ + calibrations = sorted(frame.context.laser_calibrations, key=lambda c: c.name) + point_labels = [] + for c in calibrations: + range_image = range_images[c.name][ri_index] + range_image_tensor = tf.reshape( + tf.convert_to_tensor(range_image.data), range_image.shape.dims + ) + range_image_mask = range_image_tensor[..., 0] > 0 + + if c.name in segmentation_labels: + sl = segmentation_labels[c.name][ri_index] + sl_tensor = tf.reshape(tf.convert_to_tensor(sl.data), sl.shape.dims) + sl_points_tensor = tf.gather_nd(sl_tensor, tf.where(range_image_mask)) + else: + num_valid_point = tf.math.reduce_sum(tf.cast(range_image_mask, tf.int32)) + sl_points_tensor = tf.zeros([num_valid_point, 2], dtype=tf.int32) + + point_labels.append(sl_points_tensor.numpy()) + return point_labels + + +def handle_process(file_path, output_root, test_frame_list): + file = os.path.basename(file_path) + split = os.path.basename(os.path.dirname(file_path)) + print(f"Parsing {split}/{file}") + save_path = Path(output_root) / split / file.split(".")[0] + + data_group = tf.data.TFRecordDataset(file_path, compression_type="") + for data in data_group: + frame = open_dataset.Frame() + frame.ParseFromString(bytearray(data.numpy())) + context_name = frame.context.name + timestamp = str(frame.timestamp_micros) + + if split != "testing": + # for training and validation frame, extract labelled frame + if not frame.lasers[0].ri_return1.segmentation_label_compressed: + continue + else: + # for testing frame, extract frame in test_frame_list + if f"{context_name},{timestamp}" not in test_frame_list: + continue + + os.makedirs(save_path / timestamp, exist_ok=True) + + # extract frame pass above check + point_cloud, valid_masks = create_lidar(frame) + point_cloud = point_cloud.reshape(-1, 4) + coord = point_cloud[:, :3] + strength = np.tanh(point_cloud[:, -1].reshape([-1, 1])) + pose = np.array(frame.pose.transform, np.float32).reshape(4, 4) + mask = np.array(valid_masks, dtype=object) + + np.save(save_path / timestamp / "coord.npy", coord) + np.save(save_path / timestamp / "strength.npy", strength) + np.save(save_path / timestamp / "pose.npy", pose) + + # save mask for reverse prediction + if split != "training": + np.save(save_path / timestamp / "mask.npy", mask) + + # save label + if split != "testing": + # ignore TYPE_UNDEFINED, ignore_index 0 -> -1 + label = create_label(frame)[:, 1].reshape([-1]) - 1 + np.save(save_path / timestamp / "segment.npy", label) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset_root", + required=True, + help="Path to the Waymo dataset", + ) + parser.add_argument( + "--output_root", + required=True, + help="Output path where train/val folders will be located", + ) + parser.add_argument( + "--splits", + required=True, + nargs="+", + choices=["training", "validation", "testing"], + help="Splits need to process ([training, validation, testing]).", + ) + parser.add_argument( + "--num_workers", + default=mp.cpu_count(), + type=int, + help="Num workers for preprocessing.", + ) + config = parser.parse_args() + + # load file list + file_list = glob.glob( + os.path.join(os.path.abspath(config.dataset_root), "*", "*.tfrecord") + ) + assert len(file_list) == 1150 + + # Create output directories + for split in config.splits: + os.makedirs(os.path.join(config.output_root, split), exist_ok=True) + + file_list = [ + file + for file in file_list + if os.path.basename(os.path.dirname(file)) in config.splits + ] + + # Load test frame list + test_frame_file = os.path.join( + os.path.dirname(__file__), "3d_semseg_test_set_frames.txt" + ) + test_frame_list = [x.rstrip() for x in (open(test_frame_file, "r").readlines())] + + # Preprocess data. + print("Processing scenes...") + pool = ProcessPoolExecutor(max_workers=config.num_workers) + _ = list( + pool.map( + handle_process, + file_list, + repeat(config.output_root), + repeat(test_frame_list), + ) + ) diff --git a/Pointcept/pointcept/datasets/s3dis.py b/Pointcept/pointcept/datasets/s3dis.py new file mode 100644 index 0000000000000000000000000000000000000000..1cfc176380caaa42d35bbf517ae2f13bf5c86f90 --- /dev/null +++ b/Pointcept/pointcept/datasets/s3dis.py @@ -0,0 +1,18 @@ +""" +S3DIS Dataset + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +from .defaults import DefaultDataset +from .builder import DATASETS + + +@DATASETS.register_module() +class S3DISDataset(DefaultDataset): + def get_data_name(self, idx): + remain, room_name = os.path.split(self.data_list[idx % len(self.data_list)]) + remain, area_name = os.path.split(remain) + return f"{area_name}-{room_name}" diff --git a/Pointcept/pointcept/datasets/scannet.py b/Pointcept/pointcept/datasets/scannet.py new file mode 100644 index 0000000000000000000000000000000000000000..d3f3ab8832e37d64c96cdf836741a4f0fc3ad9ff --- /dev/null +++ b/Pointcept/pointcept/datasets/scannet.py @@ -0,0 +1,116 @@ +""" +ScanNet20 / ScanNet200 / ScanNet Data Efficient Dataset + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import glob +import numpy as np +import torch +from copy import deepcopy +from torch.utils.data import Dataset +from collections.abc import Sequence + +from pointcept.utils.logger import get_root_logger +from pointcept.utils.cache import shared_dict +from .builder import DATASETS +from .defaults import DefaultDataset +from .transform import Compose, TRANSFORMS +from .preprocessing.scannet.meta_data.scannet200_constants import ( + VALID_CLASS_IDS_20, + VALID_CLASS_IDS_200, +) + + +@DATASETS.register_module() +class ScanNetDataset(DefaultDataset): + VALID_ASSETS = [ + "coord", + "color", + "normal", + "segment20", + "instance", + ] + class2id = np.array(VALID_CLASS_IDS_20) + + def __init__( + self, + lr_file=None, + la_file=None, + **kwargs, + ): + self.lr = np.loadtxt(lr_file, dtype=str) if lr_file is not None else None + self.la = torch.load(la_file) if la_file is not None else None + super().__init__(**kwargs) + + def get_data_list(self): + if self.lr is None: + data_list = super().get_data_list() + else: + data_list = [ + os.path.join(self.data_root, "train", name) for name in self.lr + ] + return data_list + + def get_data(self, idx): + data_path = self.data_list[idx % len(self.data_list)] + name = self.get_data_name(idx) + if self.cache: + cache_name = f"pointcept-{name}" + return shared_dict(cache_name) + + data_dict = {} + assets = os.listdir(data_path) + for asset in assets: + if not asset.endswith(".npy"): + continue + if asset[:-4] not in self.VALID_ASSETS: + continue + data_dict[asset[:-4]] = np.load(os.path.join(data_path, asset)) + data_dict["name"] = name + data_dict["coord"] = data_dict["coord"].astype(np.float32) + data_dict["color"] = data_dict["color"].astype(np.float32) + data_dict["normal"] = data_dict["normal"].astype(np.float32) + + if "segment20" in data_dict.keys(): + data_dict["segment"] = ( + data_dict.pop("segment20").reshape([-1]).astype(np.int32) + ) + elif "segment200" in data_dict.keys(): + data_dict["segment"] = ( + data_dict.pop("segment200").reshape([-1]).astype(np.int32) + ) + else: + data_dict["segment"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + + if "instance" in data_dict.keys(): + data_dict["instance"] = ( + data_dict.pop("instance").reshape([-1]).astype(np.int32) + ) + else: + data_dict["instance"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + if self.la: + sampled_index = self.la[self.get_data_name(idx)] + mask = np.ones_like(data_dict["segment"], dtype=bool) + mask[sampled_index] = False + data_dict["segment"][mask] = self.ignore_index + data_dict["sampled_index"] = sampled_index + return data_dict + + +@DATASETS.register_module() +class ScanNet200Dataset(ScanNetDataset): + VALID_ASSETS = [ + "coord", + "color", + "normal", + "segment200", + "instance", + ] + class2id = np.array(VALID_CLASS_IDS_200) diff --git a/Pointcept/pointcept/datasets/scannet_pair.py b/Pointcept/pointcept/datasets/scannet_pair.py new file mode 100644 index 0000000000000000000000000000000000000000..b7fdf199aa90b113bb4e8c7643e5549f62d0c51a --- /dev/null +++ b/Pointcept/pointcept/datasets/scannet_pair.py @@ -0,0 +1,89 @@ +""" +ScanNet Pair Dataset (Frame-level contrastive view) + +Refer PointContrast + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import glob +import numpy as np +import torch +from copy import deepcopy +from torch.utils.data import Dataset + +from pointcept.utils.logger import get_root_logger +from .builder import DATASETS +from .transform import Compose, TRANSFORMS + + +@DATASETS.register_module() +class ScanNetPairDataset(Dataset): + def __init__( + self, + data_root="data/scannet_pair", + overlap_threshold=0.3, + view1_transform=None, + view2_transform=None, + loop=1, + **kwargs + ): + super(ScanNetPairDataset, self).__init__() + self.data_root = data_root + self.overlap_threshold = overlap_threshold + self.view1_transform = Compose(view1_transform) + self.view2_transform = Compose(view2_transform) + self.loop = loop + self.data_list = self.get_data_list() + logger = get_root_logger() + logger.info("Totally {} x {} samples.".format(len(self.data_list), self.loop)) + + def get_data_list(self): + data_list = [] + overlap_list = glob.glob( + os.path.join(self.data_root, "*", "pcd", "overlap.txt") + ) + for overlap_file in overlap_list: + with open(overlap_file) as f: + overlap = f.readlines() + overlap = [pair.strip().split() for pair in overlap] + data_list.extend( + [ + pair[:2] + for pair in overlap + if float(pair[2]) > self.overlap_threshold + ] + ) + return data_list + + def get_data(self, idx): + pair = self.data_list[idx % len(self.data_list)] + view1_dict = torch.load(self.data_root + pair[0]) + view2_dict = torch.load(self.data_root + pair[1]) + return view1_dict, view2_dict + + def get_data_name(self, idx): + return os.path.basename(self.data_list[idx % len(self.data_list)]).split(".")[0] + + def prepare_train_data(self, idx): + # load data + view1_dict, view2_dict = self.get_data(idx) + view1_dict = self.view1_transform(view1_dict) + view2_dict = self.view2_transform(view2_dict) + data_dict = dict() + for key, value in view1_dict.items(): + data_dict["view1_" + key] = value + for key, value in view2_dict.items(): + data_dict["view2_" + key] = value + return data_dict + + def prepare_test_data(self, idx): + raise NotImplementedError + + def __getitem__(self, idx): + return self.prepare_train_data(idx) + + def __len__(self): + return len(self.data_list) * self.loop diff --git a/Pointcept/pointcept/datasets/scannetpp.py b/Pointcept/pointcept/datasets/scannetpp.py new file mode 100644 index 0000000000000000000000000000000000000000..5954e484081b32f7c6408e5efcf08f1af7449917 --- /dev/null +++ b/Pointcept/pointcept/datasets/scannetpp.py @@ -0,0 +1,78 @@ +""" +ScanNet++ dataset + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import numpy as np +import glob + +from pointcept.utils.cache import shared_dict + +from .builder import DATASETS +from .defaults import DefaultDataset + + +@DATASETS.register_module() +class ScanNetPPDataset(DefaultDataset): + VALID_ASSETS = [ + "coord", + "color", + "normal", + "segment", + "instance", + ] + + def __init__( + self, + multilabel=False, + **kwargs, + ): + super().__init__(**kwargs) + self.multilabel = multilabel + + def get_data(self, idx): + data_path = self.data_list[idx % len(self.data_list)] + name = self.get_data_name(idx) + if self.cache: + cache_name = f"pointcept-{name}" + return shared_dict(cache_name) + + data_dict = {} + assets = os.listdir(data_path) + for asset in assets: + if not asset.endswith(".npy"): + continue + if asset[:-4] not in self.VALID_ASSETS: + continue + data_dict[asset[:-4]] = np.load(os.path.join(data_path, asset)) + data_dict["name"] = name + + if "coord" in data_dict.keys(): + data_dict["coord"] = data_dict["coord"].astype(np.float32) + + if "color" in data_dict.keys(): + data_dict["color"] = data_dict["color"].astype(np.float32) + + if "normal" in data_dict.keys(): + data_dict["normal"] = data_dict["normal"].astype(np.float32) + + if not self.multilabel: + if "segment" in data_dict.keys(): + data_dict["segment"] = data_dict["segment"][:, 0].astype(np.int32) + else: + data_dict["segment"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + + if "instance" in data_dict.keys(): + data_dict["instance"] = data_dict["instance"][:, 0].astype(np.int32) + else: + data_dict["instance"] = ( + np.ones(data_dict["coord"].shape[0], dtype=np.int32) * -1 + ) + else: + raise NotImplementedError + return data_dict diff --git a/Pointcept/pointcept/datasets/semantic_kitti.py b/Pointcept/pointcept/datasets/semantic_kitti.py new file mode 100644 index 0000000000000000000000000000000000000000..cd1354f6043792bf50d367c63168b6fd0455038b --- /dev/null +++ b/Pointcept/pointcept/datasets/semantic_kitti.py @@ -0,0 +1,144 @@ +""" +Semantic KITTI dataset + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import numpy as np + +from .builder import DATASETS +from .defaults import DefaultDataset + + +@DATASETS.register_module() +class SemanticKITTIDataset(DefaultDataset): + def __init__(self, ignore_index=-1, **kwargs): + self.ignore_index = ignore_index + self.learning_map = self.get_learning_map(ignore_index) + self.learning_map_inv = self.get_learning_map_inv(ignore_index) + super().__init__(ignore_index=ignore_index, **kwargs) + + def get_data_list(self): + split2seq = dict( + train=[0, 1, 2, 3, 4, 5, 6, 7, 9, 10], + val=[8], + test=[11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21], + ) + if isinstance(self.split, str): + seq_list = split2seq[self.split] + elif isinstance(self.split, list): + seq_list = [] + for split in self.split: + seq_list += split2seq[split] + else: + raise NotImplementedError + + data_list = [] + for seq in seq_list: + seq = str(seq).zfill(2) + seq_folder = os.path.join(self.data_root, "dataset", "sequences", seq) + seq_files = sorted(os.listdir(os.path.join(seq_folder, "velodyne"))) + data_list += [ + os.path.join(seq_folder, "velodyne", file) for file in seq_files + ] + return data_list + + def get_data(self, idx): + data_path = self.data_list[idx % len(self.data_list)] + with open(data_path, "rb") as b: + scan = np.fromfile(b, dtype=np.float32).reshape(-1, 4) + coord = scan[:, :3] + strength = scan[:, -1].reshape([-1, 1]) + + label_file = data_path.replace("velodyne", "labels").replace(".bin", ".label") + if os.path.exists(label_file): + with open(label_file, "rb") as a: + segment = np.fromfile(a, dtype=np.int32).reshape(-1) + segment = np.vectorize(self.learning_map.__getitem__)( + segment & 0xFFFF + ).astype(np.int32) + else: + segment = np.zeros(scan.shape[0]).astype(np.int32) + data_dict = dict( + coord=coord, + strength=strength, + segment=segment, + name=self.get_data_name(idx), + ) + return data_dict + + def get_data_name(self, idx): + file_path = self.data_list[idx % len(self.data_list)] + dir_path, file_name = os.path.split(file_path) + sequence_name = os.path.basename(os.path.dirname(dir_path)) + frame_name = os.path.splitext(file_name)[0] + data_name = f"{sequence_name}_{frame_name}" + return data_name + + @staticmethod + def get_learning_map(ignore_index): + learning_map = { + 0: ignore_index, # "unlabeled" + 1: ignore_index, # "outlier" mapped to "unlabeled" --------------------------mapped + 10: 0, # "car" + 11: 1, # "bicycle" + 13: 4, # "bus" mapped to "other-vehicle" --------------------------mapped + 15: 2, # "motorcycle" + 16: 4, # "on-rails" mapped to "other-vehicle" ---------------------mapped + 18: 3, # "truck" + 20: 4, # "other-vehicle" + 30: 5, # "person" + 31: 6, # "bicyclist" + 32: 7, # "motorcyclist" + 40: 8, # "road" + 44: 9, # "parking" + 48: 10, # "sidewalk" + 49: 11, # "other-ground" + 50: 12, # "building" + 51: 13, # "fence" + 52: ignore_index, # "other-structure" mapped to "unlabeled" ------------------mapped + 60: 8, # "lane-marking" to "road" ---------------------------------mapped + 70: 14, # "vegetation" + 71: 15, # "trunk" + 72: 16, # "terrain" + 80: 17, # "pole" + 81: 18, # "traffic-sign" + 99: ignore_index, # "other-object" to "unlabeled" ----------------------------mapped + 252: 0, # "moving-car" to "car" ------------------------------------mapped + 253: 6, # "moving-bicyclist" to "bicyclist" ------------------------mapped + 254: 5, # "moving-person" to "person" ------------------------------mapped + 255: 7, # "moving-motorcyclist" to "motorcyclist" ------------------mapped + 256: 4, # "moving-on-rails" mapped to "other-vehicle" --------------mapped + 257: 4, # "moving-bus" mapped to "other-vehicle" -------------------mapped + 258: 3, # "moving-truck" to "truck" --------------------------------mapped + 259: 4, # "moving-other"-vehicle to "other-vehicle" ----------------mapped + } + return learning_map + + @staticmethod + def get_learning_map_inv(ignore_index): + learning_map_inv = { + ignore_index: ignore_index, # "unlabeled" + 0: 10, # "car" + 1: 11, # "bicycle" + 2: 15, # "motorcycle" + 3: 18, # "truck" + 4: 20, # "other-vehicle" + 5: 30, # "person" + 6: 31, # "bicyclist" + 7: 32, # "motorcyclist" + 8: 40, # "road" + 9: 44, # "parking" + 10: 48, # "sidewalk" + 11: 49, # "other-ground" + 12: 50, # "building" + 13: 51, # "fence" + 14: 70, # "vegetation" + 15: 71, # "trunk" + 16: 72, # "terrain" + 17: 80, # "pole" + 18: 81, # "traffic-sign" + } + return learning_map_inv diff --git a/Pointcept/pointcept/datasets/shapenet_part.py b/Pointcept/pointcept/datasets/shapenet_part.py new file mode 100644 index 0000000000000000000000000000000000000000..ca0c3d50adf98b643233e28b56631654479fde60 --- /dev/null +++ b/Pointcept/pointcept/datasets/shapenet_part.py @@ -0,0 +1,160 @@ +""" +ShapeNet Part Dataset (Unmaintained) + +get processed shapenet part dataset +at "https://shapenet.cs.stanford.edu/media/shapenetcore_partanno_segmentation_benchmark_v0_normal.zip" + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import json +import torch +import numpy as np +from copy import deepcopy +from torch.utils.data import Dataset + +from pointcept.utils.logger import get_root_logger +from .builder import DATASETS +from .transform import Compose + + +@DATASETS.register_module() +class ShapeNetPartDataset(Dataset): + def __init__( + self, + split="train", + data_root="data/shapenetcore_partanno_segmentation_benchmark_v0_normal", + transform=None, + test_mode=False, + test_cfg=None, + loop=1, + ): + super(ShapeNetPartDataset, self).__init__() + self.data_root = data_root + self.split = split + self.transform = Compose(transform) + self.loop = ( + loop if not test_mode else 1 + ) # force make loop = 1 while in test mode + self.test_mode = test_mode + self.test_cfg = test_cfg if test_mode else None + self.cache = {} + + # load categories file + self.categories = [] + self.category2part = { + "Airplane": [0, 1, 2, 3], + "Bag": [4, 5], + "Cap": [6, 7], + "Car": [8, 9, 10, 11], + "Chair": [12, 13, 14, 15], + "Earphone": [16, 17, 18], + "Guitar": [19, 20, 21], + "Knife": [22, 23], + "Lamp": [24, 25, 26, 27], + "Laptop": [28, 29], + "Motorbike": [30, 31, 32, 33, 34, 35], + "Mug": [36, 37], + "Pistol": [38, 39, 40], + "Rocket": [41, 42, 43], + "Skateboard": [44, 45, 46], + "Table": [47, 48, 49], + } + self.token2category = {} + with open(os.path.join(self.data_root, "synsetoffset2category.txt"), "r") as f: + for line in f: + ls = line.strip().split() + self.token2category[ls[1]] = len(self.categories) + self.categories.append(ls[0]) + + if test_mode: + self.post_transform = Compose(self.test_cfg.post_transform) + self.aug_transform = [Compose(aug) for aug in self.test_cfg.aug_transform] + + # load data list + if isinstance(self.split, str): + self.data_list = self.load_data_list(self.split) + elif isinstance(self.split, list): + self.data_list = [] + for s in self.split: + self.data_list += self.load_data_list(s) + else: + raise NotImplementedError + + logger = get_root_logger() + logger.info( + "Totally {} x {} samples in {} set.".format( + len(self.data_idx), self.loop, split + ) + ) + + def load_data_list(self, split): + split_file = os.path.join( + self.data_root, + "train_test_split", + "shuffled_{}_file_list.json".format(split), + ) + if not os.path.isfile(split_file): + raise (RuntimeError("Split file do not exist: " + split_file + "\n")) + with open(split_file, "r") as f: + # drop "shape_data/" and append ".txt" + data_list = [ + os.path.join(self.data_root, data[11:] + ".txt") + for data in json.load(f) + ] + return data_list + + def prepare_train_data(self, idx): + # load data + data_idx = idx % len(self.data_list) + if data_idx in self.cache: + coord, norm, segment, cls_token = self.cache[data_idx] + else: + data = np.loadtxt(self.data_list[data_idx]).astype(np.float32) + cls_token = self.token2category[ + os.path.basename(os.path.dirname(self.data_list[data_idx])) + ] + coord, norm, segment = ( + data[:, :3], + data[:, 3:6], + data[:, 6].astype(np.int32), + ) + self.cache[data_idx] = (coord, norm, segment, cls_token) + + data_dict = dict(coord=coord, norm=norm, segment=segment, cls_token=cls_token) + data_dict = self.transform(data_dict) + return data_dict + + def prepare_test_data(self, idx): + # load data + data_idx = self.data_idx[idx % len(self.data_idx)] + data = np.loadtxt(self.data_list[data_idx]).astype(np.float32) + cls_token = self.token2category[ + os.path.basename(os.path.dirname(self.data_list[data_idx])) + ] + coord, norm, segment = data[:, :3], data[:, 3:6], data[:, 6].astype(np.int32) + + data_dict = dict(coord=coord, norm=norm, cls_token=cls_token) + data_dict = self.transform(data_dict) + data_dict_list = [] + for aug in self.aug_transform: + data_dict_list.append(self.post_transform(aug(deepcopy(data_dict)))) + data_dict = dict( + fragment_list=data_dict_list, segment=segment, name=self.get_data_name(idx) + ) + return data_dict + + def get_data_name(self, idx): + data_idx = self.data_idx[idx % len(self.data_idx)] + return os.path.basename(self.data_list[data_idx]).split(".")[0] + + def __getitem__(self, idx): + if self.test_mode: + return self.prepare_test_data(idx) + else: + return self.prepare_train_data(idx) + + def __len__(self): + return len(self.data_idx) * self.loop diff --git a/Pointcept/pointcept/datasets/structure3d.py b/Pointcept/pointcept/datasets/structure3d.py new file mode 100644 index 0000000000000000000000000000000000000000..81649e3d83275f0419546f37da810e7508b327e0 --- /dev/null +++ b/Pointcept/pointcept/datasets/structure3d.py @@ -0,0 +1,38 @@ +""" +Structured3D Datasets + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import glob +from collections.abc import Sequence + +from .defaults import DefaultDataset +from .builder import DATASETS + + +@DATASETS.register_module() +class Structured3DDataset(DefaultDataset): + def get_data_list(self): + if isinstance(self.split, str): + data_list = glob.glob( + os.path.join(self.data_root, self.split, "scene_*/room_*") + ) + elif isinstance(self.split, Sequence): + data_list = [] + for split in self.split: + data_list += glob.glob( + os.path.join(self.data_root, split, "scene_*/room_*") + ) + else: + raise NotImplementedError + return data_list + + def get_data_name(self, idx): + file_path = self.data_list[idx % len(self.data_list)] + dir_path, room_name = os.path.split(file_path) + scene_name = os.path.basename(dir_path) + data_name = f"{scene_name}_{room_name}" + return data_name diff --git a/Pointcept/pointcept/datasets/transform.py b/Pointcept/pointcept/datasets/transform.py new file mode 100644 index 0000000000000000000000000000000000000000..d1abfe4f3d88decdac17861b31647c34f9520292 --- /dev/null +++ b/Pointcept/pointcept/datasets/transform.py @@ -0,0 +1,1148 @@ +""" +3D Point Cloud Augmentation + +Inspirited by chrischoy/SpatioTemporalSegmentation + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import random +import numbers +import scipy +import scipy.ndimage +import scipy.interpolate +import scipy.stats +import numpy as np +import torch +import copy +from collections.abc import Sequence, Mapping + +from pointcept.utils.registry import Registry + +TRANSFORMS = Registry("transforms") + + +@TRANSFORMS.register_module() +class Collect(object): + def __init__(self, keys, offset_keys_dict=None, **kwargs): + """ + e.g. Collect(keys=[coord], feat_keys=[coord, color]) + """ + if offset_keys_dict is None: + offset_keys_dict = dict(offset="coord") + self.keys = keys + self.offset_keys = offset_keys_dict + self.kwargs = kwargs + + def __call__(self, data_dict): + data = dict() + if isinstance(self.keys, str): + self.keys = [self.keys] + for key in self.keys: + data[key] = data_dict[key] + for key, value in self.offset_keys.items(): + data[key] = torch.tensor([data_dict[value].shape[0]]) + for name, keys in self.kwargs.items(): + name = name.replace("_keys", "") + assert isinstance(keys, Sequence) + data[name] = torch.cat([data_dict[key].float() for key in keys], dim=1) + return data + + +@TRANSFORMS.register_module() +class Copy(object): + def __init__(self, keys_dict=None): + if keys_dict is None: + keys_dict = dict(coord="origin_coord", segment="origin_segment") + self.keys_dict = keys_dict + + def __call__(self, data_dict): + for key, value in self.keys_dict.items(): + if isinstance(data_dict[key], np.ndarray): + data_dict[value] = data_dict[key].copy() + elif isinstance(data_dict[key], torch.Tensor): + data_dict[value] = data_dict[key].clone().detach() + else: + data_dict[value] = copy.deepcopy(data_dict[key]) + return data_dict + + +@TRANSFORMS.register_module() +class ToTensor(object): + def __call__(self, data): + if isinstance(data, torch.Tensor): + return data + elif isinstance(data, str): + # note that str is also a kind of sequence, judgement should before sequence + return data + elif isinstance(data, int): + return torch.LongTensor([data]) + elif isinstance(data, float): + return torch.FloatTensor([data]) + elif isinstance(data, np.ndarray) and np.issubdtype(data.dtype, bool): + return torch.from_numpy(data) + elif isinstance(data, np.ndarray) and np.issubdtype(data.dtype, np.integer): + return torch.from_numpy(data).long() + elif isinstance(data, np.ndarray) and np.issubdtype(data.dtype, np.floating): + return torch.from_numpy(data).float() + elif isinstance(data, Mapping): + result = {sub_key: self(item) for sub_key, item in data.items()} + return result + elif isinstance(data, Sequence): + result = [self(item) for item in data] + return result + else: + raise TypeError(f"type {type(data)} cannot be converted to tensor.") + + +@TRANSFORMS.register_module() +class Add(object): + def __init__(self, keys_dict=None): + if keys_dict is None: + keys_dict = dict() + self.keys_dict = keys_dict + + def __call__(self, data_dict): + for key, value in self.keys_dict.items(): + data_dict[key] = value + return data_dict + + +@TRANSFORMS.register_module() +class NormalizeColor(object): + def __call__(self, data_dict): + if "color" in data_dict.keys(): + data_dict["color"] = data_dict["color"] / 127.5 - 1 + return data_dict + + +@TRANSFORMS.register_module() +class NormalizeCoord(object): + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + # modified from pointnet2 + centroid = np.mean(data_dict["coord"], axis=0) + data_dict["coord"] -= centroid + m = np.max(np.sqrt(np.sum(data_dict["coord"] ** 2, axis=1))) + data_dict["coord"] = data_dict["coord"] / m + return data_dict + + +@TRANSFORMS.register_module() +class PositiveShift(object): + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + coord_min = np.min(data_dict["coord"], 0) + data_dict["coord"] -= coord_min + return data_dict + + +@TRANSFORMS.register_module() +class CenterShift(object): + def __init__(self, apply_z=True): + self.apply_z = apply_z + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + x_min, y_min, z_min = data_dict["coord"].min(axis=0) + x_max, y_max, _ = data_dict["coord"].max(axis=0) + if self.apply_z: + shift = [(x_min + x_max) / 2, (y_min + y_max) / 2, z_min] + else: + shift = [(x_min + x_max) / 2, (y_min + y_max) / 2, 0] + data_dict["coord"] -= shift + return data_dict + + +@TRANSFORMS.register_module() +class RandomShift(object): + def __init__(self, shift=((-0.2, 0.2), (-0.2, 0.2), (0, 0))): + self.shift = shift + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + shift_x = np.random.uniform(self.shift[0][0], self.shift[0][1]) + shift_y = np.random.uniform(self.shift[1][0], self.shift[1][1]) + shift_z = np.random.uniform(self.shift[2][0], self.shift[2][1]) + data_dict["coord"] += [shift_x, shift_y, shift_z] + return data_dict + + +@TRANSFORMS.register_module() +class PointClip(object): + def __init__(self, point_cloud_range=(-80, -80, -3, 80, 80, 1)): + self.point_cloud_range = point_cloud_range + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + data_dict["coord"] = np.clip( + data_dict["coord"], + a_min=self.point_cloud_range[:3], + a_max=self.point_cloud_range[3:], + ) + return data_dict + + +@TRANSFORMS.register_module() +class RandomDropout(object): + def __init__(self, dropout_ratio=0.2, dropout_application_ratio=0.5): + """ + upright_axis: axis index among x,y,z, i.e. 2 for z + """ + self.dropout_ratio = dropout_ratio + self.dropout_application_ratio = dropout_application_ratio + + def __call__(self, data_dict): + if random.random() < self.dropout_application_ratio: + n = len(data_dict["coord"]) + idx = np.random.choice(n, int(n * (1 - self.dropout_ratio)), replace=False) + if "sampled_index" in data_dict: + # for ScanNet data efficient, we need to make sure labeled point is sampled. + idx = np.unique(np.append(idx, data_dict["sampled_index"])) + mask = np.zeros_like(data_dict["segment"]).astype(bool) + mask[data_dict["sampled_index"]] = True + data_dict["sampled_index"] = np.where(mask[idx])[0] + if "coord" in data_dict.keys(): + data_dict["coord"] = data_dict["coord"][idx] + if "color" in data_dict.keys(): + data_dict["color"] = data_dict["color"][idx] + if "normal" in data_dict.keys(): + data_dict["normal"] = data_dict["normal"][idx] + if "strength" in data_dict.keys(): + data_dict["strength"] = data_dict["strength"][idx] + if "segment" in data_dict.keys(): + data_dict["segment"] = data_dict["segment"][idx] + if "instance" in data_dict.keys(): + data_dict["instance"] = data_dict["instance"][idx] + return data_dict + + +@TRANSFORMS.register_module() +class RandomRotate(object): + def __init__(self, angle=None, center=None, axis="z", always_apply=False, p=0.5): + self.angle = [-1, 1] if angle is None else angle + self.axis = axis + self.always_apply = always_apply + self.p = p if not self.always_apply else 1 + self.center = center + + def __call__(self, data_dict): + if random.random() > self.p: + return data_dict + angle = np.random.uniform(self.angle[0], self.angle[1]) * np.pi + rot_cos, rot_sin = np.cos(angle), np.sin(angle) + if self.axis == "x": + rot_t = np.array([[1, 0, 0], [0, rot_cos, -rot_sin], [0, rot_sin, rot_cos]]) + elif self.axis == "y": + rot_t = np.array([[rot_cos, 0, rot_sin], [0, 1, 0], [-rot_sin, 0, rot_cos]]) + elif self.axis == "z": + rot_t = np.array([[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0], [0, 0, 1]]) + else: + raise NotImplementedError + if "coord" in data_dict.keys(): + if self.center is None: + x_min, y_min, z_min = data_dict["coord"].min(axis=0) + x_max, y_max, z_max = data_dict["coord"].max(axis=0) + center = [(x_min + x_max) / 2, (y_min + y_max) / 2, (z_min + z_max) / 2] + else: + center = self.center + data_dict["coord"] -= center + data_dict["coord"] = np.dot(data_dict["coord"], np.transpose(rot_t)) + data_dict["coord"] += center + if "normal" in data_dict.keys(): + data_dict["normal"] = np.dot(data_dict["normal"], np.transpose(rot_t)) + return data_dict + + +@TRANSFORMS.register_module() +class RandomRotateTargetAngle(object): + def __init__( + self, angle=(1 / 2, 1, 3 / 2), center=None, axis="z", always_apply=False, p=0.75 + ): + self.angle = angle + self.axis = axis + self.always_apply = always_apply + self.p = p if not self.always_apply else 1 + self.center = center + + def __call__(self, data_dict): + if random.random() > self.p: + return data_dict + angle = np.random.choice(self.angle) * np.pi + rot_cos, rot_sin = np.cos(angle), np.sin(angle) + if self.axis == "x": + rot_t = np.array([[1, 0, 0], [0, rot_cos, -rot_sin], [0, rot_sin, rot_cos]]) + elif self.axis == "y": + rot_t = np.array([[rot_cos, 0, rot_sin], [0, 1, 0], [-rot_sin, 0, rot_cos]]) + elif self.axis == "z": + rot_t = np.array([[rot_cos, -rot_sin, 0], [rot_sin, rot_cos, 0], [0, 0, 1]]) + else: + raise NotImplementedError + if "coord" in data_dict.keys(): + if self.center is None: + x_min, y_min, z_min = data_dict["coord"].min(axis=0) + x_max, y_max, z_max = data_dict["coord"].max(axis=0) + center = [(x_min + x_max) / 2, (y_min + y_max) / 2, (z_min + z_max) / 2] + else: + center = self.center + data_dict["coord"] -= center + data_dict["coord"] = np.dot(data_dict["coord"], np.transpose(rot_t)) + data_dict["coord"] += center + if "normal" in data_dict.keys(): + data_dict["normal"] = np.dot(data_dict["normal"], np.transpose(rot_t)) + return data_dict + + +@TRANSFORMS.register_module() +class RandomScale(object): + def __init__(self, scale=None, anisotropic=False): + self.scale = scale if scale is not None else [0.95, 1.05] + self.anisotropic = anisotropic + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + scale = np.random.uniform( + self.scale[0], self.scale[1], 3 if self.anisotropic else 1 + ) + data_dict["coord"] *= scale + return data_dict + + +@TRANSFORMS.register_module() +class RandomFlip(object): + def __init__(self, p=0.5): + self.p = p + + def __call__(self, data_dict): + if np.random.rand() < self.p: + if "coord" in data_dict.keys(): + data_dict["coord"][:, 0] = -data_dict["coord"][:, 0] + if "normal" in data_dict.keys(): + data_dict["normal"][:, 0] = -data_dict["normal"][:, 0] + if np.random.rand() < self.p: + if "coord" in data_dict.keys(): + data_dict["coord"][:, 1] = -data_dict["coord"][:, 1] + if "normal" in data_dict.keys(): + data_dict["normal"][:, 1] = -data_dict["normal"][:, 1] + return data_dict + + +@TRANSFORMS.register_module() +class RandomJitter(object): + def __init__(self, sigma=0.01, clip=0.05): + assert clip > 0 + self.sigma = sigma + self.clip = clip + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + jitter = np.clip( + self.sigma * np.random.randn(data_dict["coord"].shape[0], 3), + -self.clip, + self.clip, + ) + data_dict["coord"] += jitter + return data_dict + + +@TRANSFORMS.register_module() +class ClipGaussianJitter(object): + def __init__(self, scalar=0.02, store_jitter=False): + self.scalar = scalar + self.mean = np.mean(3) + self.cov = np.identity(3) + self.quantile = 1.96 + self.store_jitter = store_jitter + + def __call__(self, data_dict): + if "coord" in data_dict.keys(): + jitter = np.random.multivariate_normal( + self.mean, self.cov, data_dict["coord"].shape[0] + ) + jitter = self.scalar * np.clip(jitter / 1.96, -1, 1) + data_dict["coord"] += jitter + if self.store_jitter: + data_dict["jitter"] = jitter + return data_dict + + +@TRANSFORMS.register_module() +class ChromaticAutoContrast(object): + def __init__(self, p=0.2, blend_factor=None): + self.p = p + self.blend_factor = blend_factor + + def __call__(self, data_dict): + if "color" in data_dict.keys() and np.random.rand() < self.p: + lo = np.min(data_dict["color"], 0, keepdims=True) + hi = np.max(data_dict["color"], 0, keepdims=True) + scale = 255 / (hi - lo) + contrast_feat = (data_dict["color"][:, :3] - lo) * scale + blend_factor = ( + np.random.rand() if self.blend_factor is None else self.blend_factor + ) + data_dict["color"][:, :3] = (1 - blend_factor) * data_dict["color"][ + :, :3 + ] + blend_factor * contrast_feat + return data_dict + + +@TRANSFORMS.register_module() +class ChromaticTranslation(object): + def __init__(self, p=0.95, ratio=0.05): + self.p = p + self.ratio = ratio + + def __call__(self, data_dict): + if "color" in data_dict.keys() and np.random.rand() < self.p: + tr = (np.random.rand(1, 3) - 0.5) * 255 * 2 * self.ratio + data_dict["color"][:, :3] = np.clip(tr + data_dict["color"][:, :3], 0, 255) + return data_dict + + +@TRANSFORMS.register_module() +class ChromaticJitter(object): + def __init__(self, p=0.95, std=0.005): + self.p = p + self.std = std + + def __call__(self, data_dict): + if "color" in data_dict.keys() and np.random.rand() < self.p: + noise = np.random.randn(data_dict["color"].shape[0], 3) + noise *= self.std * 255 + data_dict["color"][:, :3] = np.clip( + noise + data_dict["color"][:, :3], 0, 255 + ) + return data_dict + + +@TRANSFORMS.register_module() +class RandomColorGrayScale(object): + def __init__(self, p): + self.p = p + + @staticmethod + def rgb_to_grayscale(color, num_output_channels=1): + if color.shape[-1] < 3: + raise TypeError( + "Input color should have at least 3 dimensions, but found {}".format( + color.shape[-1] + ) + ) + + if num_output_channels not in (1, 3): + raise ValueError("num_output_channels should be either 1 or 3") + + r, g, b = color[..., 0], color[..., 1], color[..., 2] + gray = (0.2989 * r + 0.587 * g + 0.114 * b).astype(color.dtype) + gray = np.expand_dims(gray, axis=-1) + + if num_output_channels == 3: + gray = np.broadcast_to(gray, color.shape) + + return gray + + def __call__(self, data_dict): + if np.random.rand() < self.p: + data_dict["color"] = self.rgb_to_grayscale(data_dict["color"], 3) + return data_dict + + +@TRANSFORMS.register_module() +class RandomColorJitter(object): + """ + Random Color Jitter for 3D point cloud (refer torchvision) + """ + + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, p=0.95): + self.brightness = self._check_input(brightness, "brightness") + self.contrast = self._check_input(contrast, "contrast") + self.saturation = self._check_input(saturation, "saturation") + self.hue = self._check_input( + hue, "hue", center=0, bound=(-0.5, 0.5), clip_first_on_zero=False + ) + self.p = p + + @staticmethod + def _check_input( + value, name, center=1, bound=(0, float("inf")), clip_first_on_zero=True + ): + if isinstance(value, numbers.Number): + if value < 0: + raise ValueError( + "If {} is a single number, it must be non negative.".format(name) + ) + value = [center - float(value), center + float(value)] + if clip_first_on_zero: + value[0] = max(value[0], 0.0) + elif isinstance(value, (tuple, list)) and len(value) == 2: + if not bound[0] <= value[0] <= value[1] <= bound[1]: + raise ValueError("{} values should be between {}".format(name, bound)) + else: + raise TypeError( + "{} should be a single number or a list/tuple with length 2.".format( + name + ) + ) + + # if value is 0 or (1., 1.) for brightness/contrast/saturation + # or (0., 0.) for hue, do nothing + if value[0] == value[1] == center: + value = None + return value + + @staticmethod + def blend(color1, color2, ratio): + ratio = float(ratio) + bound = 255.0 + return ( + (ratio * color1 + (1.0 - ratio) * color2) + .clip(0, bound) + .astype(color1.dtype) + ) + + @staticmethod + def rgb2hsv(rgb): + r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2] + maxc = np.max(rgb, axis=-1) + minc = np.min(rgb, axis=-1) + eqc = maxc == minc + cr = maxc - minc + s = cr / (np.ones_like(maxc) * eqc + maxc * (1 - eqc)) + cr_divisor = np.ones_like(maxc) * eqc + cr * (1 - eqc) + rc = (maxc - r) / cr_divisor + gc = (maxc - g) / cr_divisor + bc = (maxc - b) / cr_divisor + + hr = (maxc == r) * (bc - gc) + hg = ((maxc == g) & (maxc != r)) * (2.0 + rc - bc) + hb = ((maxc != g) & (maxc != r)) * (4.0 + gc - rc) + h = hr + hg + hb + h = (h / 6.0 + 1.0) % 1.0 + return np.stack((h, s, maxc), axis=-1) + + @staticmethod + def hsv2rgb(hsv): + h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2] + i = np.floor(h * 6.0) + f = (h * 6.0) - i + i = i.astype(np.int32) + + p = np.clip((v * (1.0 - s)), 0.0, 1.0) + q = np.clip((v * (1.0 - s * f)), 0.0, 1.0) + t = np.clip((v * (1.0 - s * (1.0 - f))), 0.0, 1.0) + i = i % 6 + mask = np.expand_dims(i, axis=-1) == np.arange(6) + + a1 = np.stack((v, q, p, p, t, v), axis=-1) + a2 = np.stack((t, v, v, q, p, p), axis=-1) + a3 = np.stack((p, p, t, v, v, q), axis=-1) + a4 = np.stack((a1, a2, a3), axis=-1) + + return np.einsum("...na, ...nab -> ...nb", mask.astype(hsv.dtype), a4) + + def adjust_brightness(self, color, brightness_factor): + if brightness_factor < 0: + raise ValueError( + "brightness_factor ({}) is not non-negative.".format(brightness_factor) + ) + + return self.blend(color, np.zeros_like(color), brightness_factor) + + def adjust_contrast(self, color, contrast_factor): + if contrast_factor < 0: + raise ValueError( + "contrast_factor ({}) is not non-negative.".format(contrast_factor) + ) + mean = np.mean(RandomColorGrayScale.rgb_to_grayscale(color)) + return self.blend(color, mean, contrast_factor) + + def adjust_saturation(self, color, saturation_factor): + if saturation_factor < 0: + raise ValueError( + "saturation_factor ({}) is not non-negative.".format(saturation_factor) + ) + gray = RandomColorGrayScale.rgb_to_grayscale(color) + return self.blend(color, gray, saturation_factor) + + def adjust_hue(self, color, hue_factor): + if not (-0.5 <= hue_factor <= 0.5): + raise ValueError( + "hue_factor ({}) is not in [-0.5, 0.5].".format(hue_factor) + ) + orig_dtype = color.dtype + hsv = self.rgb2hsv(color / 255.0) + h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2] + h = (h + hue_factor) % 1.0 + hsv = np.stack((h, s, v), axis=-1) + color_hue_adj = (self.hsv2rgb(hsv) * 255.0).astype(orig_dtype) + return color_hue_adj + + @staticmethod + def get_params(brightness, contrast, saturation, hue): + fn_idx = torch.randperm(4) + b = ( + None + if brightness is None + else np.random.uniform(brightness[0], brightness[1]) + ) + c = None if contrast is None else np.random.uniform(contrast[0], contrast[1]) + s = ( + None + if saturation is None + else np.random.uniform(saturation[0], saturation[1]) + ) + h = None if hue is None else np.random.uniform(hue[0], hue[1]) + return fn_idx, b, c, s, h + + def __call__(self, data_dict): + ( + fn_idx, + brightness_factor, + contrast_factor, + saturation_factor, + hue_factor, + ) = self.get_params(self.brightness, self.contrast, self.saturation, self.hue) + + for fn_id in fn_idx: + if ( + fn_id == 0 + and brightness_factor is not None + and np.random.rand() < self.p + ): + data_dict["color"] = self.adjust_brightness( + data_dict["color"], brightness_factor + ) + elif ( + fn_id == 1 and contrast_factor is not None and np.random.rand() < self.p + ): + data_dict["color"] = self.adjust_contrast( + data_dict["color"], contrast_factor + ) + elif ( + fn_id == 2 + and saturation_factor is not None + and np.random.rand() < self.p + ): + data_dict["color"] = self.adjust_saturation( + data_dict["color"], saturation_factor + ) + elif fn_id == 3 and hue_factor is not None and np.random.rand() < self.p: + data_dict["color"] = self.adjust_hue(data_dict["color"], hue_factor) + return data_dict + + +@TRANSFORMS.register_module() +class HueSaturationTranslation(object): + @staticmethod + def rgb_to_hsv(rgb): + # Translated from source of colorsys.rgb_to_hsv + # r,g,b should be a numpy arrays with values between 0 and 255 + # rgb_to_hsv returns an array of floats between 0.0 and 1.0. + rgb = rgb.astype("float") + hsv = np.zeros_like(rgb) + # in case an RGBA array was passed, just copy the A channel + hsv[..., 3:] = rgb[..., 3:] + r, g, b = rgb[..., 0], rgb[..., 1], rgb[..., 2] + maxc = np.max(rgb[..., :3], axis=-1) + minc = np.min(rgb[..., :3], axis=-1) + hsv[..., 2] = maxc + mask = maxc != minc + hsv[mask, 1] = (maxc - minc)[mask] / maxc[mask] + rc = np.zeros_like(r) + gc = np.zeros_like(g) + bc = np.zeros_like(b) + rc[mask] = (maxc - r)[mask] / (maxc - minc)[mask] + gc[mask] = (maxc - g)[mask] / (maxc - minc)[mask] + bc[mask] = (maxc - b)[mask] / (maxc - minc)[mask] + hsv[..., 0] = np.select( + [r == maxc, g == maxc], [bc - gc, 2.0 + rc - bc], default=4.0 + gc - rc + ) + hsv[..., 0] = (hsv[..., 0] / 6.0) % 1.0 + return hsv + + @staticmethod + def hsv_to_rgb(hsv): + # Translated from source of colorsys.hsv_to_rgb + # h,s should be a numpy arrays with values between 0.0 and 1.0 + # v should be a numpy array with values between 0.0 and 255.0 + # hsv_to_rgb returns an array of uints between 0 and 255. + rgb = np.empty_like(hsv) + rgb[..., 3:] = hsv[..., 3:] + h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2] + i = (h * 6.0).astype("uint8") + f = (h * 6.0) - i + p = v * (1.0 - s) + q = v * (1.0 - s * f) + t = v * (1.0 - s * (1.0 - f)) + i = i % 6 + conditions = [s == 0.0, i == 1, i == 2, i == 3, i == 4, i == 5] + rgb[..., 0] = np.select(conditions, [v, q, p, p, t, v], default=v) + rgb[..., 1] = np.select(conditions, [v, v, v, q, p, p], default=t) + rgb[..., 2] = np.select(conditions, [v, p, t, v, v, q], default=p) + return rgb.astype("uint8") + + def __init__(self, hue_max=0.5, saturation_max=0.2): + self.hue_max = hue_max + self.saturation_max = saturation_max + + def __call__(self, data_dict): + if "color" in data_dict.keys(): + # Assume color[:, :3] is rgb + hsv = HueSaturationTranslation.rgb_to_hsv(data_dict["color"][:, :3]) + hue_val = (np.random.rand() - 0.5) * 2 * self.hue_max + sat_ratio = 1 + (np.random.rand() - 0.5) * 2 * self.saturation_max + hsv[..., 0] = np.remainder(hue_val + hsv[..., 0] + 1, 1) + hsv[..., 1] = np.clip(sat_ratio * hsv[..., 1], 0, 1) + data_dict["color"][:, :3] = np.clip( + HueSaturationTranslation.hsv_to_rgb(hsv), 0, 255 + ) + return data_dict + + +@TRANSFORMS.register_module() +class RandomColorDrop(object): + def __init__(self, p=0.2, color_augment=0.0): + self.p = p + self.color_augment = color_augment + + def __call__(self, data_dict): + if "color" in data_dict.keys() and np.random.rand() < self.p: + data_dict["color"] *= self.color_augment + return data_dict + + def __repr__(self): + return "RandomColorDrop(color_augment: {}, p: {})".format( + self.color_augment, self.p + ) + + +@TRANSFORMS.register_module() +class ElasticDistortion(object): + def __init__(self, distortion_params=None): + self.distortion_params = ( + [[0.2, 0.4], [0.8, 1.6]] if distortion_params is None else distortion_params + ) + + @staticmethod + def elastic_distortion(coords, granularity, magnitude): + """ + Apply elastic distortion on sparse coordinate space. + pointcloud: numpy array of (number of points, at least 3 spatial dims) + granularity: size of the noise grid (in same scale[m/cm] as the voxel grid) + magnitude: noise multiplier + """ + blurx = np.ones((3, 1, 1, 1)).astype("float32") / 3 + blury = np.ones((1, 3, 1, 1)).astype("float32") / 3 + blurz = np.ones((1, 1, 3, 1)).astype("float32") / 3 + coords_min = coords.min(0) + + # Create Gaussian noise tensor of the size given by granularity. + noise_dim = ((coords - coords_min).max(0) // granularity).astype(int) + 3 + noise = np.random.randn(*noise_dim, 3).astype(np.float32) + + # Smoothing. + for _ in range(2): + noise = scipy.ndimage.filters.convolve( + noise, blurx, mode="constant", cval=0 + ) + noise = scipy.ndimage.filters.convolve( + noise, blury, mode="constant", cval=0 + ) + noise = scipy.ndimage.filters.convolve( + noise, blurz, mode="constant", cval=0 + ) + + # Trilinear interpolate noise filters for each spatial dimensions. + ax = [ + np.linspace(d_min, d_max, d) + for d_min, d_max, d in zip( + coords_min - granularity, + coords_min + granularity * (noise_dim - 2), + noise_dim, + ) + ] + interp = scipy.interpolate.RegularGridInterpolator( + ax, noise, bounds_error=False, fill_value=0 + ) + coords += interp(coords) * magnitude + return coords + + def __call__(self, data_dict): + if "coord" in data_dict.keys() and self.distortion_params is not None: + if random.random() < 0.95: + for granularity, magnitude in self.distortion_params: + data_dict["coord"] = self.elastic_distortion( + data_dict["coord"], granularity, magnitude + ) + return data_dict + + +@TRANSFORMS.register_module() +class GridSample(object): + def __init__( + self, + grid_size=0.05, + hash_type="fnv", + mode="train", + keys=("coord", "color", "normal", "segment"), + return_inverse=False, + return_grid_coord=False, + return_min_coord=False, + return_displacement=False, + project_displacement=False, + ): + self.grid_size = grid_size + self.hash = self.fnv_hash_vec if hash_type == "fnv" else self.ravel_hash_vec + assert mode in ["train", "test"] + self.mode = mode + self.keys = keys + self.return_inverse = return_inverse + self.return_grid_coord = return_grid_coord + self.return_min_coord = return_min_coord + self.return_displacement = return_displacement + self.project_displacement = project_displacement + + def __call__(self, data_dict): + assert "coord" in data_dict.keys() + scaled_coord = data_dict["coord"] / np.array(self.grid_size) + grid_coord = np.floor(scaled_coord).astype(int) + min_coord = grid_coord.min(0) + grid_coord -= min_coord + scaled_coord -= min_coord + min_coord = min_coord * np.array(self.grid_size) + key = self.hash(grid_coord) + idx_sort = np.argsort(key) + key_sort = key[idx_sort] + _, inverse, count = np.unique(key_sort, return_inverse=True, return_counts=True) + if self.mode == "train": # train mode + idx_select = ( + np.cumsum(np.insert(count, 0, 0)[0:-1]) + + np.random.randint(0, count.max(), count.size) % count + ) + idx_unique = idx_sort[idx_select] + if "sampled_index" in data_dict: + # for ScanNet data efficient, we need to make sure labeled point is sampled. + idx_unique = np.unique( + np.append(idx_unique, data_dict["sampled_index"]) + ) + mask = np.zeros_like(data_dict["segment"]).astype(bool) + mask[data_dict["sampled_index"]] = True + data_dict["sampled_index"] = np.where(mask[idx_unique])[0] + if self.return_inverse: + data_dict["inverse"] = np.zeros_like(inverse) + data_dict["inverse"][idx_sort] = inverse + if self.return_grid_coord: + data_dict["grid_coord"] = grid_coord[idx_unique] + if self.return_min_coord: + data_dict["min_coord"] = min_coord.reshape([1, 3]) + if self.return_displacement: + displacement = ( + scaled_coord - grid_coord - 0.5 + ) # [0, 1] -> [-0.5, 0.5] displacement to center + if self.project_displacement: + displacement = np.sum( + displacement * data_dict["normal"], axis=-1, keepdims=True + ) + data_dict["displacement"] = displacement[idx_unique] + for key in self.keys: + data_dict[key] = data_dict[key][idx_unique] + return data_dict + + elif self.mode == "test": # test mode + data_part_list = [] + for i in range(count.max()): + idx_select = np.cumsum(np.insert(count, 0, 0)[0:-1]) + i % count + idx_part = idx_sort[idx_select] + data_part = dict(index=idx_part) + if self.return_inverse: + data_dict["inverse"] = np.zeros_like(inverse) + data_dict["inverse"][idx_sort] = inverse + if self.return_grid_coord: + data_part["grid_coord"] = grid_coord[idx_part] + if self.return_min_coord: + data_part["min_coord"] = min_coord.reshape([1, 3]) + if self.return_displacement: + displacement = ( + scaled_coord - grid_coord - 0.5 + ) # [0, 1] -> [-0.5, 0.5] displacement to center + if self.project_displacement: + displacement = np.sum( + displacement * data_dict["normal"], axis=-1, keepdims=True + ) + data_dict["displacement"] = displacement[idx_part] + for key in data_dict.keys(): + if key in self.keys: + data_part[key] = data_dict[key][idx_part] + else: + data_part[key] = data_dict[key] + data_part_list.append(data_part) + return data_part_list + else: + raise NotImplementedError + + @staticmethod + def ravel_hash_vec(arr): + """ + Ravel the coordinates after subtracting the min coordinates. + """ + assert arr.ndim == 2 + arr = arr.copy() + arr -= arr.min(0) + arr = arr.astype(np.uint64, copy=False) + arr_max = arr.max(0).astype(np.uint64) + 1 + + keys = np.zeros(arr.shape[0], dtype=np.uint64) + # Fortran style indexing + for j in range(arr.shape[1] - 1): + keys += arr[:, j] + keys *= arr_max[j + 1] + keys += arr[:, -1] + return keys + + @staticmethod + def fnv_hash_vec(arr): + """ + FNV64-1A + """ + assert arr.ndim == 2 + # Floor first for negative coordinates + arr = arr.copy() + arr = arr.astype(np.uint64, copy=False) + hashed_arr = np.uint64(14695981039346656037) * np.ones( + arr.shape[0], dtype=np.uint64 + ) + for j in range(arr.shape[1]): + hashed_arr *= np.uint64(1099511628211) + hashed_arr = np.bitwise_xor(hashed_arr, arr[:, j]) + return hashed_arr + + +@TRANSFORMS.register_module() +class SphereCrop(object): + def __init__(self, point_max=80000, sample_rate=None, mode="random"): + self.point_max = point_max + self.sample_rate = sample_rate + assert mode in ["random", "center", "all"] + self.mode = mode + + def __call__(self, data_dict): + point_max = ( + int(self.sample_rate * data_dict["coord"].shape[0]) + if self.sample_rate is not None + else self.point_max + ) + + assert "coord" in data_dict.keys() + if self.mode == "all": + # TODO: Optimize + if "index" not in data_dict.keys(): + data_dict["index"] = np.arange(data_dict["coord"].shape[0]) + data_part_list = [] + # coord_list, color_list, dist2_list, idx_list, offset_list = [], [], [], [], [] + if data_dict["coord"].shape[0] > point_max: + coord_p, idx_uni = np.random.rand( + data_dict["coord"].shape[0] + ) * 1e-3, np.array([]) + while idx_uni.size != data_dict["index"].shape[0]: + init_idx = np.argmin(coord_p) + dist2 = np.sum( + np.power(data_dict["coord"] - data_dict["coord"][init_idx], 2), + 1, + ) + idx_crop = np.argsort(dist2)[:point_max] + + data_crop_dict = dict() + if "coord" in data_dict.keys(): + data_crop_dict["coord"] = data_dict["coord"][idx_crop] + if "grid_coord" in data_dict.keys(): + data_crop_dict["grid_coord"] = data_dict["grid_coord"][idx_crop] + if "normal" in data_dict.keys(): + data_crop_dict["normal"] = data_dict["normal"][idx_crop] + if "color" in data_dict.keys(): + data_crop_dict["color"] = data_dict["color"][idx_crop] + if "displacement" in data_dict.keys(): + data_crop_dict["displacement"] = data_dict["displacement"][ + idx_crop + ] + if "strength" in data_dict.keys(): + data_crop_dict["strength"] = data_dict["strength"][idx_crop] + data_crop_dict["weight"] = dist2[idx_crop] + data_crop_dict["index"] = data_dict["index"][idx_crop] + data_part_list.append(data_crop_dict) + + delta = np.square( + 1 - data_crop_dict["weight"] / np.max(data_crop_dict["weight"]) + ) + coord_p[idx_crop] += delta + idx_uni = np.unique( + np.concatenate((idx_uni, data_crop_dict["index"])) + ) + else: + data_crop_dict = data_dict.copy() + data_crop_dict["weight"] = np.zeros(data_dict["coord"].shape[0]) + data_crop_dict["index"] = data_dict["index"] + data_part_list.append(data_crop_dict) + return data_part_list + # mode is "random" or "center" + elif data_dict["coord"].shape[0] > point_max: + if self.mode == "random": + center = data_dict["coord"][ + np.random.randint(data_dict["coord"].shape[0]) + ] + elif self.mode == "center": + center = data_dict["coord"][data_dict["coord"].shape[0] // 2] + else: + raise NotImplementedError + idx_crop = np.argsort(np.sum(np.square(data_dict["coord"] - center), 1))[ + :point_max + ] + if "coord" in data_dict.keys(): + data_dict["coord"] = data_dict["coord"][idx_crop] + if "origin_coord" in data_dict.keys(): + data_dict["origin_coord"] = data_dict["origin_coord"][idx_crop] + if "grid_coord" in data_dict.keys(): + data_dict["grid_coord"] = data_dict["grid_coord"][idx_crop] + if "color" in data_dict.keys(): + data_dict["color"] = data_dict["color"][idx_crop] + if "normal" in data_dict.keys(): + data_dict["normal"] = data_dict["normal"][idx_crop] + if "segment" in data_dict.keys(): + data_dict["segment"] = data_dict["segment"][idx_crop] + if "instance" in data_dict.keys(): + data_dict["instance"] = data_dict["instance"][idx_crop] + if "displacement" in data_dict.keys(): + data_dict["displacement"] = data_dict["displacement"][idx_crop] + if "strength" in data_dict.keys(): + data_dict["strength"] = data_dict["strength"][idx_crop] + return data_dict + + +@TRANSFORMS.register_module() +class ShufflePoint(object): + def __call__(self, data_dict): + assert "coord" in data_dict.keys() + shuffle_index = np.arange(data_dict["coord"].shape[0]) + np.random.shuffle(shuffle_index) + if "coord" in data_dict.keys(): + data_dict["coord"] = data_dict["coord"][shuffle_index] + if "grid_coord" in data_dict.keys(): + data_dict["grid_coord"] = data_dict["grid_coord"][shuffle_index] + if "displacement" in data_dict.keys(): + data_dict["displacement"] = data_dict["displacement"][shuffle_index] + if "color" in data_dict.keys(): + data_dict["color"] = data_dict["color"][shuffle_index] + if "normal" in data_dict.keys(): + data_dict["normal"] = data_dict["normal"][shuffle_index] + if "segment" in data_dict.keys(): + data_dict["segment"] = data_dict["segment"][shuffle_index] + if "instance" in data_dict.keys(): + data_dict["instance"] = data_dict["instance"][shuffle_index] + return data_dict + + +@TRANSFORMS.register_module() +class CropBoundary(object): + def __call__(self, data_dict): + assert "segment" in data_dict + segment = data_dict["segment"].flatten() + mask = (segment != 0) * (segment != 1) + if "coord" in data_dict.keys(): + data_dict["coord"] = data_dict["coord"][mask] + if "grid_coord" in data_dict.keys(): + data_dict["grid_coord"] = data_dict["grid_coord"][mask] + if "color" in data_dict.keys(): + data_dict["color"] = data_dict["color"][mask] + if "normal" in data_dict.keys(): + data_dict["normal"] = data_dict["normal"][mask] + if "segment" in data_dict.keys(): + data_dict["segment"] = data_dict["segment"][mask] + if "instance" in data_dict.keys(): + data_dict["instance"] = data_dict["instance"][mask] + return data_dict + + +@TRANSFORMS.register_module() +class ContrastiveViewsGenerator(object): + def __init__( + self, + view_keys=("coord", "color", "normal", "origin_coord"), + view_trans_cfg=None, + ): + self.view_keys = view_keys + self.view_trans = Compose(view_trans_cfg) + + def __call__(self, data_dict): + view1_dict = dict() + view2_dict = dict() + for key in self.view_keys: + view1_dict[key] = data_dict[key].copy() + view2_dict[key] = data_dict[key].copy() + view1_dict = self.view_trans(view1_dict) + view2_dict = self.view_trans(view2_dict) + for key, value in view1_dict.items(): + data_dict["view1_" + key] = value + for key, value in view2_dict.items(): + data_dict["view2_" + key] = value + return data_dict + + +@TRANSFORMS.register_module() +class InstanceParser(object): + def __init__(self, segment_ignore_index=(-1, 0, 1), instance_ignore_index=-1): + self.segment_ignore_index = segment_ignore_index + self.instance_ignore_index = instance_ignore_index + + def __call__(self, data_dict): + coord = data_dict["coord"] + segment = data_dict["segment"] + instance = data_dict["instance"] + mask = ~np.in1d(segment, self.segment_ignore_index) + # mapping ignored instance to ignore index + instance[~mask] = self.instance_ignore_index + # reorder left instance + unique, inverse = np.unique(instance[mask], return_inverse=True) + instance_num = len(unique) + instance[mask] = inverse + # init instance information + centroid = np.ones((coord.shape[0], 3)) * self.instance_ignore_index + bbox = np.ones((instance_num, 8)) * self.instance_ignore_index + vacancy = [ + index for index in self.segment_ignore_index if index >= 0 + ] # vacate class index + + for instance_id in range(instance_num): + mask_ = instance == instance_id + coord_ = coord[mask_] + bbox_min = coord_.min(0) + bbox_max = coord_.max(0) + bbox_centroid = coord_.mean(0) + bbox_center = (bbox_max + bbox_min) / 2 + bbox_size = bbox_max - bbox_min + bbox_theta = np.zeros(1, dtype=coord_.dtype) + bbox_class = np.array([segment[mask_][0]], dtype=coord_.dtype) + # shift class index to fill vacate class index caused by segment ignore index + bbox_class -= np.greater(bbox_class, vacancy).sum() + + centroid[mask_] = bbox_centroid + bbox[instance_id] = np.concatenate( + [bbox_center, bbox_size, bbox_theta, bbox_class] + ) # 3 + 3 + 1 + 1 = 8 + data_dict["instance"] = instance + data_dict["instance_centroid"] = centroid + data_dict["bbox"] = bbox + return data_dict + + +class Compose(object): + def __init__(self, cfg=None): + self.cfg = cfg if cfg is not None else [] + self.transforms = [] + for t_cfg in self.cfg: + self.transforms.append(TRANSFORMS.build(t_cfg)) + + def __call__(self, data_dict): + for t in self.transforms: + data_dict = t(data_dict) + return data_dict diff --git a/Pointcept/pointcept/datasets/utils.py b/Pointcept/pointcept/datasets/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3abb9bf88c81f5eae302468ffc91c62bd942a002 --- /dev/null +++ b/Pointcept/pointcept/datasets/utils.py @@ -0,0 +1,59 @@ +""" +Utils for Datasets + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import random +from collections.abc import Mapping, Sequence +import numpy as np +import torch +from torch.utils.data.dataloader import default_collate + + +def collate_fn(batch): + """ + collate function for point cloud which support dict and list, + 'coord' is necessary to determine 'offset' + """ + if not isinstance(batch, Sequence): + raise TypeError(f"{batch.dtype} is not supported.") + + if isinstance(batch[0], torch.Tensor): + return torch.cat(list(batch)) + elif isinstance(batch[0], str): + # str is also a kind of Sequence, judgement should before Sequence + return list(batch) + elif isinstance(batch[0], Sequence): + for data in batch: + data.append(torch.tensor([data[0].shape[0]])) + batch = [collate_fn(samples) for samples in zip(*batch)] + batch[-1] = torch.cumsum(batch[-1], dim=0).int() + return batch + elif isinstance(batch[0], Mapping): + batch = {key: collate_fn([d[key] for d in batch]) for key in batch[0]} + for key in batch.keys(): + if "offset" in key: + batch[key] = torch.cumsum(batch[key], dim=0) + return batch + else: + return default_collate(batch) + + +def point_collate_fn(batch, mix_prob=0): + assert isinstance( + batch[0], Mapping + ) # currently, only support input_dict, rather than input_list + batch = collate_fn(batch) + if "offset" in batch.keys(): + # Mix3d (https://arxiv.org/pdf/2110.02210.pdf) + if random.random() < mix_prob: + batch["offset"] = torch.cat( + [batch["offset"][1:-1:2], batch["offset"][-1].unsqueeze(0)], dim=0 + ) + return batch + + +def gaussian_kernel(dist2: np.array, a: float = 1, c: float = 5): + return a * np.exp(-dist2 / (2 * c**2)) diff --git a/Pointcept/pointcept/datasets/waymo.py b/Pointcept/pointcept/datasets/waymo.py new file mode 100644 index 0000000000000000000000000000000000000000..93e7f7ee1a0f18f9c932248b5d1d192dcb78f5f5 --- /dev/null +++ b/Pointcept/pointcept/datasets/waymo.py @@ -0,0 +1,104 @@ +""" +Waymo dataset + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import numpy as np +import glob + +from .builder import DATASETS +from .defaults import DefaultDataset + + +@DATASETS.register_module() +class WaymoDataset(DefaultDataset): + def __init__( + self, + timestamp=(0,), + reference_label=True, + timing_embedding=False, + **kwargs, + ): + super().__init__(**kwargs) + assert timestamp[0] == 0 + self.timestamp = timestamp + self.reference_label = reference_label + self.timing_embedding = timing_embedding + self.data_list = sorted(self.data_list) + _, self.sequence_offset, self.sequence_index = np.unique( + [os.path.dirname(data) for data in self.data_list], + return_index=True, + return_inverse=True, + ) + self.sequence_offset = np.append(self.sequence_offset, len(self.data_list)) + + def get_data_list(self): + if isinstance(self.split, str): + self.split = [self.split] + data_list = [] + for split in self.split: + data_list += glob.glob(os.path.join(self.data_root, split, "*", "*")) + return data_list + + @staticmethod + def align_pose(coord, pose, target_pose): + coord = np.hstack((coord, np.ones_like(coord[:, :1]))) + pose_align = np.matmul(np.linalg.inv(target_pose), pose) + coord = (pose_align @ coord.T).T[:, :3] + return coord + + def get_single_frame(self, idx): + return super().get_data(idx) + + def get_data(self, idx): + idx = idx % len(self.data_list) + if self.timestamp == (0,): + return self.get_single_frame(idx) + + sequence_index = self.sequence_index[idx] + lower, upper = self.sequence_offset[[sequence_index, sequence_index + 1]] + major_frame = self.get_single_frame(idx) + name = major_frame.pop("name") + target_pose = major_frame.pop("pose") + for key in major_frame.keys(): + major_frame[key] = [major_frame[key]] + + for timestamp in self.timestamp[1:]: + refer_idx = timestamp + idx + if refer_idx < lower or upper <= refer_idx: + continue + refer_frame = self.get_single_frame(refer_idx) + refer_frame.pop("name") + pose = refer_frame.pop("pose") + refer_frame["coord"] = self.align_pose( + refer_frame["coord"], pose, target_pose + ) + if not self.reference_label: + refer_frame["segment"] = ( + np.ones_like(refer_frame["segment"]) * self.ignore_index + ) + + if self.timing_embedding: + refer_frame["strength"] = np.hstack( + ( + refer_frame["strength"], + np.ones_like(refer_frame["strength"]) * timestamp, + ) + ) + + for key in major_frame.keys(): + major_frame[key].append(refer_frame[key]) + for key in major_frame.keys(): + major_frame[key] = np.concatenate(major_frame[key], axis=0) + major_frame["name"] = name + return major_frame + + def get_data_name(self, idx): + file_path = self.data_list[idx % len(self.data_list)] + sequence_path, frame_name = os.path.split(file_path) + sequence_name = os.path.basename(sequence_path) + data_name = f"{sequence_name}_{frame_name}" + return data_name diff --git a/Pointcept/pointcept/engines/__init__.py b/Pointcept/pointcept/engines/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Pointcept/pointcept/engines/defaults.py b/Pointcept/pointcept/engines/defaults.py new file mode 100644 index 0000000000000000000000000000000000000000..d45e7925a50acb03bb510c46ec4c566f6815cc05 --- /dev/null +++ b/Pointcept/pointcept/engines/defaults.py @@ -0,0 +1,152 @@ +""" +Default training/testing logic + +modified from detectron2(https://github.com/facebookresearch/detectron2) + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import sys +import argparse +import multiprocessing as mp +from torch.nn.parallel import DistributedDataParallel + + +import pointcept.utils.comm as comm +from pointcept.utils.env import get_random_seed, set_seed +from pointcept.utils.config import Config, DictAction + + +def create_ddp_model(model, *, fp16_compression=False, **kwargs): + """ + Create a DistributedDataParallel model if there are >1 processes. + Args: + model: a torch.nn.Module + fp16_compression: add fp16 compression hooks to the ddp object. + See more at https://pytorch.org/docs/stable/ddp_comm_hooks.html#torch.distributed.algorithms.ddp_comm_hooks.default_hooks.fp16_compress_hook + kwargs: other arguments of :module:`torch.nn.parallel.DistributedDataParallel`. + """ + if comm.get_world_size() == 1: + return model + # kwargs['find_unused_parameters'] = True + if "device_ids" not in kwargs: + kwargs["device_ids"] = [comm.get_local_rank()] + if "output_device" not in kwargs: + kwargs["output_device"] = [comm.get_local_rank()] + ddp = DistributedDataParallel(model, **kwargs) + if fp16_compression: + from torch.distributed.algorithms.ddp_comm_hooks import default as comm_hooks + + ddp.register_comm_hook(state=None, hook=comm_hooks.fp16_compress_hook) + return ddp + + +def worker_init_fn(worker_id, num_workers, rank, seed): + """Worker init func for dataloader. + + The seed of each worker equals to num_worker * rank + worker_id + user_seed + + Args: + worker_id (int): Worker id. + num_workers (int): Number of workers. + rank (int): The rank of current process. + seed (int): The random seed to use. + """ + + worker_seed = num_workers * rank + worker_id + seed + set_seed(worker_seed) + + +def default_argument_parser(epilog=None): + parser = argparse.ArgumentParser( + epilog=epilog + or f""" + Examples: + Run on single machine: + $ {sys.argv[0]} --num-gpus 8 --config-file cfg.yaml + Change some config options: + $ {sys.argv[0]} --config-file cfg.yaml MODEL.WEIGHTS /path/to/weight.pth SOLVER.BASE_LR 0.001 + Run on multiple machines: + (machine0)$ {sys.argv[0]} --machine-rank 0 --num-machines 2 --dist-url [--other-flags] + (machine1)$ {sys.argv[0]} --machine-rank 1 --num-machines 2 --dist-url [--other-flags] + """, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--config-file", default="", metavar="FILE", help="path to config file" + ) + parser.add_argument( + "--num-gpus", type=int, default=1, help="number of gpus *per machine*" + ) + parser.add_argument( + "--num-machines", type=int, default=1, help="total number of machines" + ) + parser.add_argument( + "--machine-rank", + type=int, + default=0, + help="the rank of this machine (unique per machine)", + ) + # PyTorch still may leave orphan processes in multi-gpu training. + # Therefore we use a deterministic way to obtain port, + # so that users are aware of orphan processes by seeing the port occupied. + # port = 2 ** 15 + 2 ** 14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2 ** 14 + parser.add_argument( + "--dist-url", + # default="tcp://127.0.0.1:{}".format(port), + default="auto", + help="initialization URL for pytorch distributed backend. See " + "https://pytorch.org/docs/stable/distributed.html for details.", + ) + parser.add_argument( + "--options", nargs="+", action=DictAction, help="custom options" + ) + return parser + + +def default_config_parser(file_path, options): + # config name protocol: dataset_name/model_name-exp_name + if os.path.isfile(file_path): + cfg = Config.fromfile(file_path) + else: + sep = file_path.find("-") + cfg = Config.fromfile(os.path.join(file_path[:sep], file_path[sep + 1 :])) + + if options is not None: + cfg.merge_from_dict(options) + + if cfg.seed is None: + cfg.seed = get_random_seed() + + cfg.data.train.loop = cfg.epoch // cfg.eval_epoch + + os.makedirs(os.path.join(cfg.save_path, "model"), exist_ok=True) + if not cfg.resume: + cfg.dump(os.path.join(cfg.save_path, "config.py")) + return cfg + + +def default_setup(cfg): + # scalar by world size + world_size = comm.get_world_size() + cfg.num_worker = cfg.num_worker if cfg.num_worker is not None else mp.cpu_count() + cfg.num_worker_per_gpu = cfg.num_worker // world_size + assert cfg.batch_size % world_size == 0 + assert cfg.batch_size_val is None or cfg.batch_size_val % world_size == 0 + assert cfg.batch_size_test is None or cfg.batch_size_test % world_size == 0 + cfg.batch_size_per_gpu = cfg.batch_size // world_size + cfg.batch_size_val_per_gpu = ( + cfg.batch_size_val // world_size if cfg.batch_size_val is not None else 1 + ) + cfg.batch_size_test_per_gpu = ( + cfg.batch_size_test // world_size if cfg.batch_size_test is not None else 1 + ) + # update data loop + assert cfg.epoch % cfg.eval_epoch == 0 + # settle random seed + rank = comm.get_rank() + seed = None if cfg.seed is None else cfg.seed * cfg.num_worker_per_gpu + rank + set_seed(seed) + return cfg diff --git a/Pointcept/pointcept/engines/hooks/__init__.py b/Pointcept/pointcept/engines/hooks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1ab2c4beb7f1938d9703e572ad8619fe88bff223 --- /dev/null +++ b/Pointcept/pointcept/engines/hooks/__init__.py @@ -0,0 +1,5 @@ +from .default import HookBase +from .misc import * +from .evaluator import * + +from .builder import build_hooks diff --git a/Pointcept/pointcept/engines/hooks/builder.py b/Pointcept/pointcept/engines/hooks/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..2f4cce4871b0e18f3adc1f7430a8d5410442c77c --- /dev/null +++ b/Pointcept/pointcept/engines/hooks/builder.py @@ -0,0 +1,18 @@ +""" +Hook Builder + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.utils.registry import Registry + + +HOOKS = Registry("hooks") + + +def build_hooks(cfg): + hooks = [] + for hook_cfg in cfg: + hooks.append(HOOKS.build(hook_cfg)) + return hooks diff --git a/Pointcept/pointcept/engines/hooks/default.py b/Pointcept/pointcept/engines/hooks/default.py new file mode 100644 index 0000000000000000000000000000000000000000..87a64415a5a66d2570dffbaa7b90707443be42e2 --- /dev/null +++ b/Pointcept/pointcept/engines/hooks/default.py @@ -0,0 +1,32 @@ +""" +Default Hook + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + + +class HookBase: + """ + Base class for hooks that can be registered with :class:`TrainerBase`. + """ + + trainer = None # A weak reference to the trainer object. + + def before_train(self): + pass + + def before_epoch(self): + pass + + def before_step(self): + pass + + def after_step(self): + pass + + def after_epoch(self): + pass + + def after_train(self): + pass diff --git a/Pointcept/pointcept/engines/hooks/evaluator.py b/Pointcept/pointcept/engines/hooks/evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..02b35b3abd83e0a7b59f532f1ca8aacf70afcce2 --- /dev/null +++ b/Pointcept/pointcept/engines/hooks/evaluator.py @@ -0,0 +1,581 @@ +""" +Evaluate Hook + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import numpy as np +import torch +import torch.distributed as dist +import pointops +from uuid import uuid4 + +import pointcept.utils.comm as comm +from pointcept.utils.misc import intersection_and_union_gpu + +from .default import HookBase +from .builder import HOOKS + + +@HOOKS.register_module() +class ClsEvaluator(HookBase): + def after_epoch(self): + if self.trainer.cfg.evaluate: + self.eval() + + def eval(self): + self.trainer.logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + self.trainer.model.eval() + for i, input_dict in enumerate(self.trainer.val_loader): + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with torch.no_grad(): + output_dict = self.trainer.model(input_dict) + output = output_dict["cls_logits"] + loss = output_dict["loss"] + pred = output.max(1)[1] + label = input_dict["category"] + intersection, union, target = intersection_and_union_gpu( + pred, + label, + self.trainer.cfg.data.num_classes, + self.trainer.cfg.data.ignore_index, + ) + if comm.get_world_size() > 1: + dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce( + target + ) + intersection, union, target = ( + intersection.cpu().numpy(), + union.cpu().numpy(), + target.cpu().numpy(), + ) + # Here there is no need to sync since sync happened in dist.all_reduce + self.trainer.storage.put_scalar("val_intersection", intersection) + self.trainer.storage.put_scalar("val_union", union) + self.trainer.storage.put_scalar("val_target", target) + self.trainer.storage.put_scalar("val_loss", loss.item()) + self.trainer.logger.info( + "Test: [{iter}/{max_iter}] " + "Loss {loss:.4f} ".format( + iter=i + 1, max_iter=len(self.trainer.val_loader), loss=loss.item() + ) + ) + loss_avg = self.trainer.storage.history("val_loss").avg + intersection = self.trainer.storage.history("val_intersection").total + union = self.trainer.storage.history("val_union").total + target = self.trainer.storage.history("val_target").total + iou_class = intersection / (union + 1e-10) + acc_class = intersection / (target + 1e-10) + m_iou = np.mean(iou_class) + m_acc = np.mean(acc_class) + all_acc = sum(intersection) / (sum(target) + 1e-10) + self.trainer.logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.".format( + m_iou, m_acc, all_acc + ) + ) + for i in range(self.trainer.cfg.data.num_classes): + self.trainer.logger.info( + "Class_{idx}-{name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=self.trainer.cfg.data.names[i], + iou=iou_class[i], + accuracy=acc_class[i], + ) + ) + current_epoch = self.trainer.epoch + 1 + if self.trainer.writer is not None: + self.trainer.writer.add_scalar("val/loss", loss_avg, current_epoch) + self.trainer.writer.add_scalar("val/mIoU", m_iou, current_epoch) + self.trainer.writer.add_scalar("val/mAcc", m_acc, current_epoch) + self.trainer.writer.add_scalar("val/allAcc", all_acc, current_epoch) + self.trainer.logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + self.trainer.comm_info["current_metric_value"] = all_acc # save for saver + self.trainer.comm_info["current_metric_name"] = "allAcc" # save for saver + + def after_train(self): + self.trainer.logger.info( + "Best {}: {:.4f}".format("allAcc", self.trainer.best_metric_value) + ) + + +@HOOKS.register_module() +class SemSegEvaluator(HookBase): + def after_epoch(self): + if self.trainer.cfg.evaluate: + self.eval() + + def eval(self): + self.trainer.logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + self.trainer.model.eval() + for i, input_dict in enumerate(self.trainer.val_loader): + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with torch.no_grad(): + output_dict = self.trainer.model(input_dict) + output = output_dict["seg_logits"] + loss = output_dict["loss"] + pred = output.max(1)[1] + segment = input_dict["segment"] + if "origin_coord" in input_dict.keys(): + idx, _ = pointops.knn_query( + 1, + input_dict["coord"].float(), + input_dict["offset"].int(), + input_dict["origin_coord"].float(), + input_dict["origin_offset"].int(), + ) + pred = pred[idx.flatten().long()] + segment = input_dict["origin_segment"] + intersection, union, target = intersection_and_union_gpu( + pred, + segment, + self.trainer.cfg.data.num_classes, + self.trainer.cfg.data.ignore_index, + ) + if comm.get_world_size() > 1: + dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce( + target + ) + intersection, union, target = ( + intersection.cpu().numpy(), + union.cpu().numpy(), + target.cpu().numpy(), + ) + # Here there is no need to sync since sync happened in dist.all_reduce + self.trainer.storage.put_scalar("val_intersection", intersection) + self.trainer.storage.put_scalar("val_union", union) + self.trainer.storage.put_scalar("val_target", target) + self.trainer.storage.put_scalar("val_loss", loss.item()) + info = "Test: [{iter}/{max_iter}] ".format( + iter=i + 1, max_iter=len(self.trainer.val_loader) + ) + if "origin_coord" in input_dict.keys(): + info = "Interp. " + info + self.trainer.logger.info( + info + + "Loss {loss:.4f} ".format( + iter=i + 1, max_iter=len(self.trainer.val_loader), loss=loss.item() + ) + ) + loss_avg = self.trainer.storage.history("val_loss").avg + intersection = self.trainer.storage.history("val_intersection").total + union = self.trainer.storage.history("val_union").total + target = self.trainer.storage.history("val_target").total + iou_class = intersection / (union + 1e-10) + acc_class = intersection / (target + 1e-10) + m_iou = np.mean(iou_class) + m_acc = np.mean(acc_class) + all_acc = sum(intersection) / (sum(target) + 1e-10) + self.trainer.logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.".format( + m_iou, m_acc, all_acc + ) + ) + for i in range(self.trainer.cfg.data.num_classes): + self.trainer.logger.info( + "Class_{idx}-{name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=self.trainer.cfg.data.names[i], + iou=iou_class[i], + accuracy=acc_class[i], + ) + ) + current_epoch = self.trainer.epoch + 1 + if self.trainer.writer is not None: + self.trainer.writer.add_scalar("val/loss", loss_avg, current_epoch) + self.trainer.writer.add_scalar("val/mIoU", m_iou, current_epoch) + self.trainer.writer.add_scalar("val/mAcc", m_acc, current_epoch) + self.trainer.writer.add_scalar("val/allAcc", all_acc, current_epoch) + self.trainer.logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + self.trainer.comm_info["current_metric_value"] = m_iou # save for saver + self.trainer.comm_info["current_metric_name"] = "mIoU" # save for saver + + def after_train(self): + self.trainer.logger.info( + "Best {}: {:.4f}".format("mIoU", self.trainer.best_metric_value) + ) + + +@HOOKS.register_module() +class InsSegEvaluator(HookBase): + def __init__(self, segment_ignore_index=(-1,), instance_ignore_index=-1): + self.segment_ignore_index = segment_ignore_index + self.instance_ignore_index = instance_ignore_index + + self.valid_class_names = None # update in before train + self.overlaps = np.append(np.arange(0.5, 0.95, 0.05), 0.25) + self.min_region_sizes = 100 + self.distance_threshes = float("inf") + self.distance_confs = -float("inf") + + def before_train(self): + self.valid_class_names = [ + self.trainer.cfg.data.names[i] + for i in range(self.trainer.cfg.data.num_classes) + if i not in self.segment_ignore_index + ] + + def after_epoch(self): + if self.trainer.cfg.evaluate: + self.eval() + + def associate_instances(self, pred, segment, instance): + segment = segment.cpu().numpy() + instance = instance.cpu().numpy() + void_mask = np.in1d(segment, self.segment_ignore_index) + + assert ( + pred["pred_classes"].shape[0] + == pred["pred_scores"].shape[0] + == pred["pred_masks"].shape[0] + ) + assert pred["pred_masks"].shape[1] == segment.shape[0] == instance.shape[0] + # get gt instances + gt_instances = dict() + for i in range(self.trainer.cfg.data.num_classes): + if i not in self.segment_ignore_index: + gt_instances[self.trainer.cfg.data.names[i]] = [] + instance_ids, idx, counts = np.unique( + instance, return_index=True, return_counts=True + ) + segment_ids = segment[idx] + for i in range(len(instance_ids)): + if instance_ids[i] == self.instance_ignore_index: + continue + if segment_ids[i] in self.segment_ignore_index: + continue + gt_inst = dict() + gt_inst["instance_id"] = instance_ids[i] + gt_inst["segment_id"] = segment_ids[i] + gt_inst["dist_conf"] = 0.0 + gt_inst["med_dist"] = -1.0 + gt_inst["vert_count"] = counts[i] + gt_inst["matched_pred"] = [] + gt_instances[self.trainer.cfg.data.names[segment_ids[i]]].append(gt_inst) + + # get pred instances and associate with gt + pred_instances = dict() + for i in range(self.trainer.cfg.data.num_classes): + if i not in self.segment_ignore_index: + pred_instances[self.trainer.cfg.data.names[i]] = [] + instance_id = 0 + for i in range(len(pred["pred_classes"])): + if pred["pred_classes"][i] in self.segment_ignore_index: + continue + pred_inst = dict() + pred_inst["uuid"] = uuid4() + pred_inst["instance_id"] = instance_id + pred_inst["segment_id"] = pred["pred_classes"][i] + pred_inst["confidence"] = pred["pred_scores"][i] + pred_inst["mask"] = np.not_equal(pred["pred_masks"][i], 0) + pred_inst["vert_count"] = np.count_nonzero(pred_inst["mask"]) + pred_inst["void_intersection"] = np.count_nonzero( + np.logical_and(void_mask, pred_inst["mask"]) + ) + if pred_inst["vert_count"] < self.min_region_sizes: + continue # skip if empty + segment_name = self.trainer.cfg.data.names[pred_inst["segment_id"]] + matched_gt = [] + for gt_idx, gt_inst in enumerate(gt_instances[segment_name]): + intersection = np.count_nonzero( + np.logical_and( + instance == gt_inst["instance_id"], pred_inst["mask"] + ) + ) + if intersection > 0: + gt_inst_ = gt_inst.copy() + pred_inst_ = pred_inst.copy() + gt_inst_["intersection"] = intersection + pred_inst_["intersection"] = intersection + matched_gt.append(gt_inst_) + gt_inst["matched_pred"].append(pred_inst_) + pred_inst["matched_gt"] = matched_gt + pred_instances[segment_name].append(pred_inst) + instance_id += 1 + return gt_instances, pred_instances + + def evaluate_matches(self, scenes): + overlaps = self.overlaps + min_region_sizes = [self.min_region_sizes] + dist_threshes = [self.distance_threshes] + dist_confs = [self.distance_confs] + + # results: class x overlap + ap_table = np.zeros( + (len(dist_threshes), len(self.valid_class_names), len(overlaps)), float + ) + for di, (min_region_size, distance_thresh, distance_conf) in enumerate( + zip(min_region_sizes, dist_threshes, dist_confs) + ): + for oi, overlap_th in enumerate(overlaps): + pred_visited = {} + for scene in scenes: + for _ in scene["pred"]: + for label_name in self.valid_class_names: + for p in scene["pred"][label_name]: + if "uuid" in p: + pred_visited[p["uuid"]] = False + for li, label_name in enumerate(self.valid_class_names): + y_true = np.empty(0) + y_score = np.empty(0) + hard_false_negatives = 0 + has_gt = False + has_pred = False + for scene in scenes: + pred_instances = scene["pred"][label_name] + gt_instances = scene["gt"][label_name] + # filter groups in ground truth + gt_instances = [ + gt + for gt in gt_instances + if gt["vert_count"] >= min_region_size + and gt["med_dist"] <= distance_thresh + and gt["dist_conf"] >= distance_conf + ] + if gt_instances: + has_gt = True + if pred_instances: + has_pred = True + + cur_true = np.ones(len(gt_instances)) + cur_score = np.ones(len(gt_instances)) * (-float("inf")) + cur_match = np.zeros(len(gt_instances), dtype=bool) + # collect matches + for gti, gt in enumerate(gt_instances): + found_match = False + for pred in gt["matched_pred"]: + # greedy assignments + if pred_visited[pred["uuid"]]: + continue + overlap = float(pred["intersection"]) / ( + gt["vert_count"] + + pred["vert_count"] + - pred["intersection"] + ) + if overlap > overlap_th: + confidence = pred["confidence"] + # if already have a prediction for this gt, + # the prediction with the lower score is automatically a false positive + if cur_match[gti]: + max_score = max(cur_score[gti], confidence) + min_score = min(cur_score[gti], confidence) + cur_score[gti] = max_score + # append false positive + cur_true = np.append(cur_true, 0) + cur_score = np.append(cur_score, min_score) + cur_match = np.append(cur_match, True) + # otherwise set score + else: + found_match = True + cur_match[gti] = True + cur_score[gti] = confidence + pred_visited[pred["uuid"]] = True + if not found_match: + hard_false_negatives += 1 + # remove non-matched ground truth instances + cur_true = cur_true[cur_match] + cur_score = cur_score[cur_match] + + # collect non-matched predictions as false positive + for pred in pred_instances: + found_gt = False + for gt in pred["matched_gt"]: + overlap = float(gt["intersection"]) / ( + gt["vert_count"] + + pred["vert_count"] + - gt["intersection"] + ) + if overlap > overlap_th: + found_gt = True + break + if not found_gt: + num_ignore = pred["void_intersection"] + for gt in pred["matched_gt"]: + if gt["segment_id"] in self.segment_ignore_index: + num_ignore += gt["intersection"] + # small ground truth instances + if ( + gt["vert_count"] < min_region_size + or gt["med_dist"] > distance_thresh + or gt["dist_conf"] < distance_conf + ): + num_ignore += gt["intersection"] + proportion_ignore = ( + float(num_ignore) / pred["vert_count"] + ) + # if not ignored append false positive + if proportion_ignore <= overlap_th: + cur_true = np.append(cur_true, 0) + confidence = pred["confidence"] + cur_score = np.append(cur_score, confidence) + + # append to overall results + y_true = np.append(y_true, cur_true) + y_score = np.append(y_score, cur_score) + + # compute average precision + if has_gt and has_pred: + # compute precision recall curve first + + # sorting and cumsum + score_arg_sort = np.argsort(y_score) + y_score_sorted = y_score[score_arg_sort] + y_true_sorted = y_true[score_arg_sort] + y_true_sorted_cumsum = np.cumsum(y_true_sorted) + + # unique thresholds + (thresholds, unique_indices) = np.unique( + y_score_sorted, return_index=True + ) + num_prec_recall = len(unique_indices) + 1 + + # prepare precision recall + num_examples = len(y_score_sorted) + # https://github.com/ScanNet/ScanNet/pull/26 + # all predictions are non-matched but also all of them are ignored and not counted as FP + # y_true_sorted_cumsum is empty + # num_true_examples = y_true_sorted_cumsum[-1] + num_true_examples = ( + y_true_sorted_cumsum[-1] + if len(y_true_sorted_cumsum) > 0 + else 0 + ) + precision = np.zeros(num_prec_recall) + recall = np.zeros(num_prec_recall) + + # deal with the first point + y_true_sorted_cumsum = np.append(y_true_sorted_cumsum, 0) + # deal with remaining + for idx_res, idx_scores in enumerate(unique_indices): + cumsum = y_true_sorted_cumsum[idx_scores - 1] + tp = num_true_examples - cumsum + fp = num_examples - idx_scores - tp + fn = cumsum + hard_false_negatives + p = float(tp) / (tp + fp) + r = float(tp) / (tp + fn) + precision[idx_res] = p + recall[idx_res] = r + + # first point in curve is artificial + precision[-1] = 1.0 + recall[-1] = 0.0 + + # compute average of precision-recall curve + recall_for_conv = np.copy(recall) + recall_for_conv = np.append(recall_for_conv[0], recall_for_conv) + recall_for_conv = np.append(recall_for_conv, 0.0) + + stepWidths = np.convolve( + recall_for_conv, [-0.5, 0, 0.5], "valid" + ) + # integrate is now simply a dot product + ap_current = np.dot(precision, stepWidths) + + elif has_gt: + ap_current = 0.0 + else: + ap_current = float("nan") + ap_table[di, li, oi] = ap_current + d_inf = 0 + o50 = np.where(np.isclose(self.overlaps, 0.5)) + o25 = np.where(np.isclose(self.overlaps, 0.25)) + oAllBut25 = np.where(np.logical_not(np.isclose(self.overlaps, 0.25))) + ap_scores = dict() + ap_scores["all_ap"] = np.nanmean(ap_table[d_inf, :, oAllBut25]) + ap_scores["all_ap_50%"] = np.nanmean(ap_table[d_inf, :, o50]) + ap_scores["all_ap_25%"] = np.nanmean(ap_table[d_inf, :, o25]) + ap_scores["classes"] = {} + for li, label_name in enumerate(self.valid_class_names): + ap_scores["classes"][label_name] = {} + ap_scores["classes"][label_name]["ap"] = np.average( + ap_table[d_inf, li, oAllBut25] + ) + ap_scores["classes"][label_name]["ap50%"] = np.average( + ap_table[d_inf, li, o50] + ) + ap_scores["classes"][label_name]["ap25%"] = np.average( + ap_table[d_inf, li, o25] + ) + return ap_scores + + def eval(self): + self.trainer.logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + self.trainer.model.eval() + scenes = [] + for i, input_dict in enumerate(self.trainer.val_loader): + assert ( + len(input_dict["offset"]) == 1 + ) # currently only support bs 1 for each GPU + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with torch.no_grad(): + output_dict = self.trainer.model(input_dict) + + loss = output_dict["loss"] + + segment = input_dict["segment"] + instance = input_dict["instance"] + # map to origin + if "origin_coord" in input_dict.keys(): + idx, _ = pointops.knn_query( + 1, + input_dict["coord"].float(), + input_dict["offset"].int(), + input_dict["origin_coord"].float(), + input_dict["origin_offset"].int(), + ) + idx = idx.cpu().flatten().long() + output_dict["pred_masks"] = output_dict["pred_masks"][:, idx] + segment = input_dict["origin_segment"] + instance = input_dict["origin_instance"] + + gt_instances, pred_instance = self.associate_instances( + output_dict, segment, instance + ) + scenes.append(dict(gt=gt_instances, pred=pred_instance)) + + self.trainer.storage.put_scalar("val_loss", loss.item()) + self.trainer.logger.info( + "Test: [{iter}/{max_iter}] " + "Loss {loss:.4f} ".format( + iter=i + 1, max_iter=len(self.trainer.val_loader), loss=loss.item() + ) + ) + + loss_avg = self.trainer.storage.history("val_loss").avg + comm.synchronize() + scenes_sync = comm.gather(scenes, dst=0) + scenes = [scene for scenes_ in scenes_sync for scene in scenes_] + ap_scores = self.evaluate_matches(scenes) + all_ap = ap_scores["all_ap"] + all_ap_50 = ap_scores["all_ap_50%"] + all_ap_25 = ap_scores["all_ap_25%"] + self.trainer.logger.info( + "Val result: mAP/AP50/AP25 {:.4f}/{:.4f}/{:.4f}.".format( + all_ap, all_ap_50, all_ap_25 + ) + ) + for i, label_name in enumerate(self.valid_class_names): + ap = ap_scores["classes"][label_name]["ap"] + ap_50 = ap_scores["classes"][label_name]["ap50%"] + ap_25 = ap_scores["classes"][label_name]["ap25%"] + self.trainer.logger.info( + "Class_{idx}-{name} Result: AP/AP50/AP25 {AP:.4f}/{AP50:.4f}/{AP25:.4f}".format( + idx=i, name=label_name, AP=ap, AP50=ap_50, AP25=ap_25 + ) + ) + current_epoch = self.trainer.epoch + 1 + if self.trainer.writer is not None: + self.trainer.writer.add_scalar("val/loss", loss_avg, current_epoch) + self.trainer.writer.add_scalar("val/mAP", all_ap, current_epoch) + self.trainer.writer.add_scalar("val/AP50", all_ap_50, current_epoch) + self.trainer.writer.add_scalar("val/AP25", all_ap_25, current_epoch) + self.trainer.logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + self.trainer.comm_info["current_metric_value"] = all_ap_50 # save for saver + self.trainer.comm_info["current_metric_name"] = "AP50" # save for saver diff --git a/Pointcept/pointcept/engines/hooks/misc.py b/Pointcept/pointcept/engines/hooks/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..155bf5541fc8e5406618a801ba4ccb1e369d4308 --- /dev/null +++ b/Pointcept/pointcept/engines/hooks/misc.py @@ -0,0 +1,464 @@ +""" +Misc Hook + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import sys +import glob +import os +import shutil +import time +import torch +import torch.utils.data +from collections import OrderedDict + +if sys.version_info >= (3, 10): + from collections.abc import Sequence +else: + from collections import Sequence +from pointcept.utils.timer import Timer +from pointcept.utils.comm import is_main_process, synchronize, get_world_size +from pointcept.utils.cache import shared_dict +import pointcept.utils.comm as comm +from pointcept.engines.test import TESTERS + +from .default import HookBase +from .builder import HOOKS + + +@HOOKS.register_module() +class IterationTimer(HookBase): + def __init__(self, warmup_iter=1): + self._warmup_iter = warmup_iter + self._start_time = time.perf_counter() + self._iter_timer = Timer() + self._remain_iter = 0 + + def before_train(self): + self._start_time = time.perf_counter() + self._remain_iter = self.trainer.max_epoch * len(self.trainer.train_loader) + + def before_epoch(self): + self._iter_timer.reset() + + def before_step(self): + data_time = self._iter_timer.seconds() + self.trainer.storage.put_scalar("data_time", data_time) + + def after_step(self): + batch_time = self._iter_timer.seconds() + self._iter_timer.reset() + self.trainer.storage.put_scalar("batch_time", batch_time) + self._remain_iter -= 1 + remain_time = self._remain_iter * self.trainer.storage.history("batch_time").avg + t_m, t_s = divmod(remain_time, 60) + t_h, t_m = divmod(t_m, 60) + remain_time = "{:02d}:{:02d}:{:02d}".format(int(t_h), int(t_m), int(t_s)) + if "iter_info" in self.trainer.comm_info.keys(): + info = ( + "Data {data_time_val:.3f} ({data_time_avg:.3f}) " + "Batch {batch_time_val:.3f} ({batch_time_avg:.3f}) " + "Remain {remain_time} ".format( + data_time_val=self.trainer.storage.history("data_time").val, + data_time_avg=self.trainer.storage.history("data_time").avg, + batch_time_val=self.trainer.storage.history("batch_time").val, + batch_time_avg=self.trainer.storage.history("batch_time").avg, + remain_time=remain_time, + ) + ) + self.trainer.comm_info["iter_info"] += info + if self.trainer.comm_info["iter"] <= self._warmup_iter: + self.trainer.storage.history("data_time").reset() + self.trainer.storage.history("batch_time").reset() + + +@HOOKS.register_module() +class InformationWriter(HookBase): + def __init__(self): + self.curr_iter = 0 + self.model_output_keys = [] + + def before_train(self): + self.trainer.comm_info["iter_info"] = "" + self.curr_iter = self.trainer.start_epoch * len(self.trainer.train_loader) + + def before_step(self): + self.curr_iter += 1 + # MSC pretrain do not have offset information. Comment the code for support MSC + # info = "Train: [{epoch}/{max_epoch}][{iter}/{max_iter}] " \ + # "Scan {batch_size} ({points_num}) ".format( + # epoch=self.trainer.epoch + 1, max_epoch=self.trainer.max_epoch, + # iter=self.trainer.comm_info["iter"], max_iter=len(self.trainer.train_loader), + # batch_size=len(self.trainer.comm_info["input_dict"]["offset"]), + # points_num=self.trainer.comm_info["input_dict"]["offset"][-1] + # ) + info = "Train: [{epoch}/{max_epoch}][{iter}/{max_iter}] ".format( + epoch=self.trainer.epoch + 1, + max_epoch=self.trainer.max_epoch, + iter=self.trainer.comm_info["iter"] + 1, + max_iter=len(self.trainer.train_loader), + ) + self.trainer.comm_info["iter_info"] += info + + def after_step(self): + if "model_output_dict" in self.trainer.comm_info.keys(): + model_output_dict = self.trainer.comm_info["model_output_dict"] + self.model_output_keys = model_output_dict.keys() + for key in self.model_output_keys: + self.trainer.storage.put_scalar(key, model_output_dict[key].item()) + + for key in self.model_output_keys: + self.trainer.comm_info["iter_info"] += "{key}: {value:.4f} ".format( + key=key, value=self.trainer.storage.history(key).val + ) + lr = self.trainer.optimizer.state_dict()["param_groups"][0]["lr"] + self.trainer.comm_info["iter_info"] += "Lr: {lr:.5f}".format(lr=lr) + self.trainer.logger.info(self.trainer.comm_info["iter_info"]) + self.trainer.comm_info["iter_info"] = "" # reset iter info + if self.trainer.writer is not None: + self.trainer.writer.add_scalar("lr", lr, self.curr_iter) + for key in self.model_output_keys: + self.trainer.writer.add_scalar( + "train_batch/" + key, + self.trainer.storage.history(key).val, + self.curr_iter, + ) + + def after_epoch(self): + epoch_info = "Train result: " + for key in self.model_output_keys: + epoch_info += "{key}: {value:.4f} ".format( + key=key, value=self.trainer.storage.history(key).avg + ) + self.trainer.logger.info(epoch_info) + if self.trainer.writer is not None: + for key in self.model_output_keys: + self.trainer.writer.add_scalar( + "train/" + key, + self.trainer.storage.history(key).avg, + self.trainer.epoch + 1, + ) + + +@HOOKS.register_module() +class CheckpointSaver(HookBase): + def __init__(self, save_freq=None): + self.save_freq = save_freq # None or int, None indicate only save model last + + def after_epoch(self): + if is_main_process(): + is_best = False + if self.trainer.cfg.evaluate: + current_metric_value = self.trainer.comm_info["current_metric_value"] + current_metric_name = self.trainer.comm_info["current_metric_name"] + if current_metric_value > self.trainer.best_metric_value: + self.trainer.best_metric_value = current_metric_value + is_best = True + self.trainer.logger.info( + "Best validation {} updated to: {:.4f}".format( + current_metric_name, current_metric_value + ) + ) + self.trainer.logger.info( + "Currently Best {}: {:.4f}".format( + current_metric_name, self.trainer.best_metric_value + ) + ) + + filename = os.path.join( + self.trainer.cfg.save_path, "model", "model_last.pth" + ) + self.trainer.logger.info("Saving checkpoint to: " + filename) + torch.save( + { + "epoch": self.trainer.epoch + 1, + "state_dict": self.trainer.model.state_dict(), + "optimizer": self.trainer.optimizer.state_dict(), + "scheduler": self.trainer.scheduler.state_dict(), + "scaler": ( + self.trainer.scaler.state_dict() + if self.trainer.cfg.enable_amp + else None + ), + "best_metric_value": self.trainer.best_metric_value, + }, + filename + ".tmp", + ) + os.replace(filename + ".tmp", filename) + if is_best: + shutil.copyfile( + filename, + os.path.join(self.trainer.cfg.save_path, "model", "model_best.pth"), + ) + if self.save_freq and (self.trainer.epoch + 1) % self.save_freq == 0: + shutil.copyfile( + filename, + os.path.join( + self.trainer.cfg.save_path, + "model", + f"epoch_{self.trainer.epoch + 1}.pth", + ), + ) + + +@HOOKS.register_module() +class CheckpointLoader(HookBase): + def __init__(self, keywords="", replacement=None, strict=False): + self.keywords = keywords + self.replacement = replacement if replacement is not None else keywords + self.strict = strict + + def before_train(self): + self.trainer.logger.info("=> Loading checkpoint & weight ...") + if self.trainer.cfg.weight and os.path.isfile(self.trainer.cfg.weight): + self.trainer.logger.info(f"Loading weight at: {self.trainer.cfg.weight}") + checkpoint = torch.load( + self.trainer.cfg.weight, + map_location=lambda storage, loc: storage.cuda(), + ) + self.trainer.logger.info( + f"Loading layer weights with keyword: {self.keywords}, " + f"replace keyword with: {self.replacement}" + ) + weight = OrderedDict() + for key, value in checkpoint["state_dict"].items(): + if not key.startswith("module."): + key = "module." + key # xxx.xxx -> module.xxx.xxx + # Now all keys contain "module." no matter DDP or not. + if self.keywords in key: + key = key.replace(self.keywords, self.replacement) + if comm.get_world_size() == 1: + key = key[7:] # module.xxx.xxx -> xxx.xxx + weight[key] = value + load_state_info = self.trainer.model.load_state_dict( + weight, strict=self.strict + ) + self.trainer.logger.info(f"Missing keys: {load_state_info[0]}") + if self.trainer.cfg.resume: + self.trainer.logger.info( + f"Resuming train at eval epoch: {checkpoint['epoch']}" + ) + self.trainer.start_epoch = checkpoint["epoch"] + self.trainer.best_metric_value = checkpoint["best_metric_value"] + self.trainer.optimizer.load_state_dict(checkpoint["optimizer"]) + self.trainer.scheduler.load_state_dict(checkpoint["scheduler"]) + if self.trainer.cfg.enable_amp: + self.trainer.scaler.load_state_dict(checkpoint["scaler"]) + else: + self.trainer.logger.info(f"No weight found at: {self.trainer.cfg.weight}") + + +@HOOKS.register_module() +class PreciseEvaluator(HookBase): + def __init__(self, test_last=False): + self.test_last = test_last + + def after_train(self): + self.trainer.logger.info( + ">>>>>>>>>>>>>>>> Start Precise Evaluation >>>>>>>>>>>>>>>>" + ) + torch.cuda.empty_cache() + cfg = self.trainer.cfg + tester = TESTERS.build( + dict(type=cfg.test.type, cfg=cfg, model=self.trainer.model) + ) + if self.test_last: + self.trainer.logger.info("=> Testing on model_last ...") + else: + self.trainer.logger.info("=> Testing on model_best ...") + best_path = os.path.join( + self.trainer.cfg.save_path, "model", "model_best.pth" + ) + checkpoint = torch.load(best_path) + state_dict = checkpoint["state_dict"] + tester.model.load_state_dict(state_dict, strict=True) + tester.test() + + +@HOOKS.register_module() +class DataCacheOperator(HookBase): + def __init__(self, data_root, split): + self.data_root = data_root + self.split = split + self.data_list = self.get_data_list() + + def get_data_list(self): + if isinstance(self.split, str): + data_list = glob.glob(os.path.join(self.data_root, self.split)) + elif isinstance(self.split, Sequence): + data_list = [] + for split in self.split: + data_list += glob.glob(os.path.join(self.data_root, split)) + else: + raise NotImplementedError + return data_list + + def get_cache_name(self, data_path): + data_name = data_path.replace(os.path.dirname(self.data_root), "") + return "pointcept" + data_name.replace(os.path.sep, "-") + + def before_train(self): + self.trainer.logger.info( + f"=> Caching dataset: {self.data_root}, split: {self.split} ..." + ) + if is_main_process(): + dataset = self.trainer.train_loader.dataset + for i in range(len(dataset)): + data_dict = dataset[i] + name = data_dict["name"] + shared_dict(f"Pointcept-{name}", data_dict) + synchronize() + + +@HOOKS.register_module() +class RuntimeProfiler(HookBase): + def __init__( + self, + forward=True, + backward=True, + interrupt=False, + warm_up=2, + sort_by="cuda_time_total", + row_limit=30, + ): + self.forward = forward + self.backward = backward + self.interrupt = interrupt + self.warm_up = warm_up + self.sort_by = sort_by + self.row_limit = row_limit + + def before_train(self): + self.trainer.logger.info("Profiling runtime ...") + from torch.profiler import profile, record_function, ProfilerActivity + + for i, input_dict in enumerate(self.trainer.train_loader): + if i == self.warm_up + 1: + break + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + if self.forward: + with profile( + activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + record_shapes=True, + profile_memory=True, + with_stack=True, + ) as forward_prof: + with record_function("model_inference"): + output_dict = self.trainer.model(input_dict) + else: + output_dict = self.trainer.model(input_dict) + loss = output_dict["loss"] + if self.backward: + with profile( + activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + record_shapes=True, + profile_memory=True, + with_stack=True, + ) as backward_prof: + with record_function("model_inference"): + loss.backward() + self.trainer.logger.info(f"Profile: [{i + 1}/{self.warm_up + 1}]") + if self.forward: + self.trainer.logger.info( + "Forward profile: \n" + + str( + forward_prof.key_averages().table( + sort_by=self.sort_by, row_limit=self.row_limit + ) + ) + ) + forward_prof.export_chrome_trace( + os.path.join(self.trainer.cfg.save_path, "forward_trace.json") + ) + + if self.backward: + self.trainer.logger.info( + "Backward profile: \n" + + str( + backward_prof.key_averages().table( + sort_by=self.sort_by, row_limit=self.row_limit + ) + ) + ) + backward_prof.export_chrome_trace( + os.path.join(self.trainer.cfg.save_path, "backward_trace.json") + ) + if self.interrupt: + sys.exit(0) + + +@HOOKS.register_module() +class RuntimeProfilerV2(HookBase): + def __init__( + self, + interrupt=False, + wait=1, + warmup=1, + active=10, + repeat=1, + sort_by="cuda_time_total", + row_limit=30, + ): + self.interrupt = interrupt + self.wait = wait + self.warmup = warmup + self.active = active + self.repeat = repeat + self.sort_by = sort_by + self.row_limit = row_limit + + def before_train(self): + self.trainer.logger.info("Profiling runtime ...") + from torch.profiler import ( + profile, + record_function, + ProfilerActivity, + schedule, + tensorboard_trace_handler, + ) + + prof = profile( + activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], + schedule=schedule( + wait=self.wait, + warmup=self.warmup, + active=self.active, + repeat=self.repeat, + ), + on_trace_ready=tensorboard_trace_handler(self.trainer.cfg.save_path), + record_shapes=True, + profile_memory=True, + with_stack=True, + ) + prof.start() + for i, input_dict in enumerate(self.trainer.train_loader): + if i >= (self.wait + self.warmup + self.active) * self.repeat: + break + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with record_function("model_forward"): + output_dict = self.trainer.model(input_dict) + loss = output_dict["loss"] + with record_function("model_backward"): + loss.backward() + prof.step() + self.trainer.logger.info( + f"Profile: [{i + 1}/{(self.wait + self.warmup + self.active) * self.repeat}]" + ) + self.trainer.logger.info( + "Profile: \n" + + str( + prof.key_averages().table( + sort_by=self.sort_by, row_limit=self.row_limit + ) + ) + ) + prof.stop() + + if self.interrupt: + sys.exit(0) diff --git a/Pointcept/pointcept/engines/launch.py b/Pointcept/pointcept/engines/launch.py new file mode 100644 index 0000000000000000000000000000000000000000..99a8351fe5ab4393c1fab75c3bd546ba66641986 --- /dev/null +++ b/Pointcept/pointcept/engines/launch.py @@ -0,0 +1,137 @@ +""" +Launcher + +modified from detectron2(https://github.com/facebookresearch/detectron2) + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import logging +from datetime import timedelta +import torch +import torch.distributed as dist +import torch.multiprocessing as mp + +from pointcept.utils import comm + +__all__ = ["DEFAULT_TIMEOUT", "launch"] + +DEFAULT_TIMEOUT = timedelta(minutes=60) + + +def _find_free_port(): + import socket + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + # Binding to port 0 will cause the OS to find an available port for us + sock.bind(("", 0)) + port = sock.getsockname()[1] + sock.close() + # NOTE: there is still a chance the port could be taken by other processes. + return port + + +def launch( + main_func, + num_gpus_per_machine, + num_machines=1, + machine_rank=0, + dist_url=None, + cfg=(), + timeout=DEFAULT_TIMEOUT, +): + """ + Launch multi-gpu or distributed training. + This function must be called on all machines involved in the training. + It will spawn child processes (defined by ``num_gpus_per_machine``) on each machine. + Args: + main_func: a function that will be called by `main_func(*args)` + num_gpus_per_machine (int): number of GPUs per machine + num_machines (int): the total number of machines + machine_rank (int): the rank of this machine + dist_url (str): url to connect to for distributed jobs, including protocol + e.g. "tcp://127.0.0.1:8686". + Can be set to "auto" to automatically select a free port on localhost + timeout (timedelta): timeout of the distributed workers + args (tuple): arguments passed to main_func + """ + world_size = num_machines * num_gpus_per_machine + if world_size > 1: + if dist_url == "auto": + assert ( + num_machines == 1 + ), "dist_url=auto not supported in multi-machine jobs." + port = _find_free_port() + dist_url = f"tcp://127.0.0.1:{port}" + if num_machines > 1 and dist_url.startswith("file://"): + logger = logging.getLogger(__name__) + logger.warning( + "file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://" + ) + + mp.spawn( + _distributed_worker, + nprocs=num_gpus_per_machine, + args=( + main_func, + world_size, + num_gpus_per_machine, + machine_rank, + dist_url, + cfg, + timeout, + ), + daemon=False, + ) + else: + main_func(*cfg) + + +def _distributed_worker( + local_rank, + main_func, + world_size, + num_gpus_per_machine, + machine_rank, + dist_url, + cfg, + timeout=DEFAULT_TIMEOUT, +): + assert ( + torch.cuda.is_available() + ), "cuda is not available. Please check your installation." + global_rank = machine_rank * num_gpus_per_machine + local_rank + try: + dist.init_process_group( + backend="NCCL", + init_method=dist_url, + world_size=world_size, + rank=global_rank, + timeout=timeout, + ) + except Exception as e: + logger = logging.getLogger(__name__) + logger.error("Process group URL: {}".format(dist_url)) + raise e + + # Setup the local process group (which contains ranks within the same machine) + assert comm._LOCAL_PROCESS_GROUP is None + num_machines = world_size // num_gpus_per_machine + for i in range(num_machines): + ranks_on_i = list( + range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine) + ) + pg = dist.new_group(ranks_on_i) + if i == machine_rank: + comm._LOCAL_PROCESS_GROUP = pg + + assert num_gpus_per_machine <= torch.cuda.device_count() + torch.cuda.set_device(local_rank) + + # synchronize is needed here to prevent a possible timeout after calling init_process_group + # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172 + comm.synchronize() + + main_func(*cfg) diff --git a/Pointcept/pointcept/engines/test.py b/Pointcept/pointcept/engines/test.py new file mode 100644 index 0000000000000000000000000000000000000000..d74872d8c18382ca44b09211c2f3def70fc8dfe7 --- /dev/null +++ b/Pointcept/pointcept/engines/test.py @@ -0,0 +1,640 @@ +""" +Tester + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import time +import numpy as np +from collections import OrderedDict +import torch +import torch.distributed as dist +import torch.nn.functional as F +import torch.utils.data + +from .defaults import create_ddp_model +import pointcept.utils.comm as comm +from pointcept.datasets import build_dataset, collate_fn +from pointcept.models import build_model +from pointcept.utils.logger import get_root_logger +from pointcept.utils.registry import Registry +from pointcept.utils.misc import ( + AverageMeter, + intersection_and_union, + intersection_and_union_gpu, + make_dirs, +) + + +TESTERS = Registry("testers") + + +class TesterBase: + def __init__(self, cfg, model=None, test_loader=None, verbose=False) -> None: + torch.multiprocessing.set_sharing_strategy("file_system") + self.logger = get_root_logger( + log_file=os.path.join(cfg.save_path, "test.log"), + file_mode="a" if cfg.resume else "w", + ) + self.logger.info("=> Loading config ...") + self.cfg = cfg + self.verbose = verbose + if self.verbose: + self.logger.info(f"Save path: {cfg.save_path}") + self.logger.info(f"Config:\n{cfg.pretty_text}") + if model is None: + self.logger.info("=> Building model ...") + self.model = self.build_model() + else: + self.model = model + if test_loader is None: + self.logger.info("=> Building test dataset & dataloader ...") + self.test_loader = self.build_test_loader() + else: + self.test_loader = test_loader + + def build_model(self): + model = build_model(self.cfg.model) + n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) + self.logger.info(f"Num params: {n_parameters}") + model = create_ddp_model( + model.cuda(), + broadcast_buffers=False, + find_unused_parameters=self.cfg.find_unused_parameters, + ) + if os.path.isfile(self.cfg.weight): + self.logger.info(f"Loading weight at: {self.cfg.weight}") + checkpoint = torch.load(self.cfg.weight) + weight = OrderedDict() + for key, value in checkpoint["state_dict"].items(): + if key.startswith("module."): + if comm.get_world_size() == 1: + key = key[7:] # module.xxx.xxx -> xxx.xxx + else: + if comm.get_world_size() > 1: + key = "module." + key # xxx.xxx -> module.xxx.xxx + weight[key] = value + model.load_state_dict(weight, strict=True) + self.logger.info( + "=> Loaded weight '{}' (epoch {})".format( + self.cfg.weight, checkpoint["epoch"] + ) + ) + else: + raise RuntimeError("=> No checkpoint found at '{}'".format(self.cfg.weight)) + return model + + def build_test_loader(self): + test_dataset = build_dataset(self.cfg.data.test) + if comm.get_world_size() > 1: + test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset) + else: + test_sampler = None + test_loader = torch.utils.data.DataLoader( + test_dataset, + batch_size=self.cfg.batch_size_test_per_gpu, + shuffle=False, + num_workers=self.cfg.batch_size_test_per_gpu, + pin_memory=True, + sampler=test_sampler, + collate_fn=self.__class__.collate_fn, + ) + return test_loader + + def test(self): + raise NotImplementedError + + @staticmethod + def collate_fn(batch): + raise collate_fn(batch) + + +@TESTERS.register_module() +class SemSegTester(TesterBase): + def test(self): + assert self.test_loader.batch_size == 1 + logger = get_root_logger() + logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + + batch_time = AverageMeter() + intersection_meter = AverageMeter() + union_meter = AverageMeter() + target_meter = AverageMeter() + self.model.eval() + + save_path = os.path.join(self.cfg.save_path, "result") + make_dirs(save_path) + # create submit folder only on main process + if ( + self.cfg.data.test.type == "ScanNetDataset" + or self.cfg.data.test.type == "ScanNet200Dataset" + or self.cfg.data.test.type == "ScanNetPPDataset" + ) and comm.is_main_process(): + make_dirs(os.path.join(save_path, "submit")) + elif ( + self.cfg.data.test.type == "SemanticKITTIDataset" and comm.is_main_process() + ): + make_dirs(os.path.join(save_path, "submit")) + elif self.cfg.data.test.type == "NuScenesDataset" and comm.is_main_process(): + import json + + make_dirs(os.path.join(save_path, "submit", "lidarseg", "test")) + make_dirs(os.path.join(save_path, "submit", "test")) + submission = dict( + meta=dict( + use_camera=False, + use_lidar=True, + use_radar=False, + use_map=False, + use_external=False, + ) + ) + with open( + os.path.join(save_path, "submit", "test", "submission.json"), "w" + ) as f: + json.dump(submission, f, indent=4) + comm.synchronize() + record = {} + # fragment inference + for idx, data_dict in enumerate(self.test_loader): + end = time.time() + data_dict = data_dict[0] # current assume batch size is 1 + fragment_list = data_dict.pop("fragment_list") + segment = data_dict.pop("segment") + data_name = data_dict.pop("name") + pred_save_path = os.path.join(save_path, "{}_pred.npy".format(data_name)) + if os.path.isfile(pred_save_path): + logger.info( + "{}/{}: {}, loaded pred and label.".format( + idx + 1, len(self.test_loader), data_name + ) + ) + pred = np.load(pred_save_path) + if "origin_segment" in data_dict.keys(): + segment = data_dict["origin_segment"] + else: + pred = torch.zeros((segment.size, self.cfg.data.num_classes)).cuda() + for i in range(len(fragment_list)): + fragment_batch_size = 1 + s_i, e_i = i * fragment_batch_size, min( + (i + 1) * fragment_batch_size, len(fragment_list) + ) + input_dict = collate_fn(fragment_list[s_i:e_i]) + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + idx_part = input_dict["index"] + with torch.no_grad(): + pred_part = self.model(input_dict)["seg_logits"] # (n, k) + pred_part = F.softmax(pred_part, -1) + if self.cfg.empty_cache: + torch.cuda.empty_cache() + bs = 0 + for be in input_dict["offset"]: + pred[idx_part[bs:be], :] += pred_part[bs:be] + bs = be + + logger.info( + "Test: {}/{}-{data_name}, Batch: {batch_idx}/{batch_num}".format( + idx + 1, + len(self.test_loader), + data_name=data_name, + batch_idx=i, + batch_num=len(fragment_list), + ) + ) + if self.cfg.data.test.type == "ScanNetPPDataset": + pred = pred.topk(3, dim=1)[1].data.cpu().numpy() + else: + pred = pred.max(1)[1].data.cpu().numpy() + if "origin_segment" in data_dict.keys(): + assert "inverse" in data_dict.keys() + pred = pred[data_dict["inverse"]] + segment = data_dict["origin_segment"] + np.save(pred_save_path, pred) + if ( + self.cfg.data.test.type == "ScanNetDataset" + or self.cfg.data.test.type == "ScanNet200Dataset" + ): + np.savetxt( + os.path.join(save_path, "submit", "{}.txt".format(data_name)), + self.test_loader.dataset.class2id[pred].reshape([-1, 1]), + fmt="%d", + ) + elif self.cfg.data.test.type == "ScanNetPPDataset": + np.savetxt( + os.path.join(save_path, "submit", "{}.txt".format(data_name)), + pred.astype(np.int32), + delimiter=",", + fmt="%d", + ) + pred = pred[:, 0] # for mIoU, TODO: support top3 mIoU + elif self.cfg.data.test.type == "SemanticKITTIDataset": + # 00_000000 -> 00, 000000 + sequence_name, frame_name = data_name.split("_") + os.makedirs( + os.path.join( + save_path, "submit", "sequences", sequence_name, "predictions" + ), + exist_ok=True, + ) + submit = pred.astype(np.uint32) + submit = np.vectorize( + self.test_loader.dataset.learning_map_inv.__getitem__ + )(submit).astype(np.uint32) + submit.tofile( + os.path.join( + save_path, + "submit", + "sequences", + sequence_name, + "predictions", + f"{frame_name}.label", + ) + ) + elif self.cfg.data.test.type == "NuScenesDataset": + np.array(pred + 1).astype(np.uint8).tofile( + os.path.join( + save_path, + "submit", + "lidarseg", + "test", + "{}_lidarseg.bin".format(data_name), + ) + ) + + intersection, union, target = intersection_and_union( + pred, segment, self.cfg.data.num_classes, self.cfg.data.ignore_index + ) + intersection_meter.update(intersection) + union_meter.update(union) + target_meter.update(target) + record[data_name] = dict( + intersection=intersection, union=union, target=target + ) + + mask = union != 0 + iou_class = intersection / (union + 1e-10) + iou = np.mean(iou_class[mask]) + acc = sum(intersection) / (sum(target) + 1e-10) + + m_iou = np.mean(intersection_meter.sum / (union_meter.sum + 1e-10)) + m_acc = np.mean(intersection_meter.sum / (target_meter.sum + 1e-10)) + + batch_time.update(time.time() - end) + logger.info( + "Test: {} [{}/{}]-{} " + "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " + "Accuracy {acc:.4f} ({m_acc:.4f}) " + "mIoU {iou:.4f} ({m_iou:.4f})".format( + data_name, + idx + 1, + len(self.test_loader), + segment.size, + batch_time=batch_time, + acc=acc, + m_acc=m_acc, + iou=iou, + m_iou=m_iou, + ) + ) + + logger.info("Syncing ...") + comm.synchronize() + record_sync = comm.gather(record, dst=0) + + if comm.is_main_process(): + record = {} + for _ in range(len(record_sync)): + r = record_sync.pop() + record.update(r) + del r + intersection = np.sum( + [meters["intersection"] for _, meters in record.items()], axis=0 + ) + union = np.sum([meters["union"] for _, meters in record.items()], axis=0) + target = np.sum([meters["target"] for _, meters in record.items()], axis=0) + + if self.cfg.data.test.type == "S3DISDataset": + torch.save( + dict(intersection=intersection, union=union, target=target), + os.path.join(save_path, f"{self.test_loader.dataset.split}.pth"), + ) + + iou_class = intersection / (union + 1e-10) + accuracy_class = intersection / (target + 1e-10) + mIoU = np.mean(iou_class) + mAcc = np.mean(accuracy_class) + allAcc = sum(intersection) / (sum(target) + 1e-10) + + logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}".format( + mIoU, mAcc, allAcc + ) + ) + for i in range(self.cfg.data.num_classes): + logger.info( + "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=self.cfg.data.names[i], + iou=iou_class[i], + accuracy=accuracy_class[i], + ) + ) + logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + + @staticmethod + def collate_fn(batch): + return batch + + +@TESTERS.register_module() +class ClsTester(TesterBase): + def test(self): + logger = get_root_logger() + logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + batch_time = AverageMeter() + intersection_meter = AverageMeter() + union_meter = AverageMeter() + target_meter = AverageMeter() + self.model.eval() + + for i, input_dict in enumerate(self.test_loader): + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + end = time.time() + with torch.no_grad(): + output_dict = self.model(input_dict) + output = output_dict["cls_logits"] + pred = output.max(1)[1] + label = input_dict["category"] + intersection, union, target = intersection_and_union_gpu( + pred, label, self.cfg.data.num_classes, self.cfg.data.ignore_index + ) + if comm.get_world_size() > 1: + dist.all_reduce(intersection), dist.all_reduce(union), dist.all_reduce( + target + ) + intersection, union, target = ( + intersection.cpu().numpy(), + union.cpu().numpy(), + target.cpu().numpy(), + ) + intersection_meter.update(intersection), union_meter.update( + union + ), target_meter.update(target) + + accuracy = sum(intersection_meter.val) / (sum(target_meter.val) + 1e-10) + batch_time.update(time.time() - end) + + logger.info( + "Test: [{}/{}] " + "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " + "Accuracy {accuracy:.4f} ".format( + i + 1, + len(self.test_loader), + batch_time=batch_time, + accuracy=accuracy, + ) + ) + + iou_class = intersection_meter.sum / (union_meter.sum + 1e-10) + accuracy_class = intersection_meter.sum / (target_meter.sum + 1e-10) + mIoU = np.mean(iou_class) + mAcc = np.mean(accuracy_class) + allAcc = sum(intersection_meter.sum) / (sum(target_meter.sum) + 1e-10) + logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}.".format( + mIoU, mAcc, allAcc + ) + ) + + for i in range(self.cfg.data.num_classes): + logger.info( + "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=self.cfg.data.names[i], + iou=iou_class[i], + accuracy=accuracy_class[i], + ) + ) + logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + + @staticmethod + def collate_fn(batch): + return collate_fn(batch) + + +@TESTERS.register_module() +class ClsVotingTester(TesterBase): + def __init__( + self, + num_repeat=100, + metric="allAcc", + **kwargs, + ): + super().__init__(**kwargs) + self.num_repeat = num_repeat + self.metric = metric + self.best_idx = 0 + self.best_record = None + self.best_metric = 0 + + def test(self): + for i in range(self.num_repeat): + logger = get_root_logger() + logger.info(f">>>>>>>>>>>>>>>> Start Evaluation {i + 1} >>>>>>>>>>>>>>>>") + record = self.test_once() + if comm.is_main_process(): + if record[self.metric] > self.best_metric: + self.best_record = record + self.best_idx = i + self.best_metric = record[self.metric] + info = f"Current best record is Evaluation {i + 1}: " + for m in self.best_record.keys(): + info += f"{m}: {self.best_record[m]:.4f} " + logger.info(info) + + def test_once(self): + logger = get_root_logger() + batch_time = AverageMeter() + intersection_meter = AverageMeter() + target_meter = AverageMeter() + record = {} + self.model.eval() + + for idx, data_dict in enumerate(self.test_loader): + end = time.time() + data_dict = data_dict[0] # current assume batch size is 1 + voting_list = data_dict.pop("voting_list") + category = data_dict.pop("category") + data_name = data_dict.pop("name") + # pred = torch.zeros([1, self.cfg.data.num_classes]).cuda() + # for i in range(len(voting_list)): + # input_dict = voting_list[i] + # for key in input_dict.keys(): + # if isinstance(input_dict[key], torch.Tensor): + # input_dict[key] = input_dict[key].cuda(non_blocking=True) + # with torch.no_grad(): + # pred += F.softmax(self.model(input_dict)["cls_logits"], -1) + input_dict = collate_fn(voting_list) + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with torch.no_grad(): + pred = F.softmax(self.model(input_dict)["cls_logits"], -1).sum( + 0, keepdim=True + ) + pred = pred.max(1)[1].cpu().numpy() + intersection, union, target = intersection_and_union( + pred, category, self.cfg.data.num_classes, self.cfg.data.ignore_index + ) + intersection_meter.update(intersection) + target_meter.update(target) + record[data_name] = dict(intersection=intersection, target=target) + acc = sum(intersection) / (sum(target) + 1e-10) + m_acc = np.mean(intersection_meter.sum / (target_meter.sum + 1e-10)) + batch_time.update(time.time() - end) + logger.info( + "Test: {} [{}/{}] " + "Batch {batch_time.val:.3f} ({batch_time.avg:.3f}) " + "Accuracy {acc:.4f} ({m_acc:.4f}) ".format( + data_name, + idx + 1, + len(self.test_loader), + batch_time=batch_time, + acc=acc, + m_acc=m_acc, + ) + ) + + logger.info("Syncing ...") + comm.synchronize() + record_sync = comm.gather(record, dst=0) + + if comm.is_main_process(): + record = {} + for _ in range(len(record_sync)): + r = record_sync.pop() + record.update(r) + del r + intersection = np.sum( + [meters["intersection"] for _, meters in record.items()], axis=0 + ) + target = np.sum([meters["target"] for _, meters in record.items()], axis=0) + accuracy_class = intersection / (target + 1e-10) + mAcc = np.mean(accuracy_class) + allAcc = sum(intersection) / (sum(target) + 1e-10) + + logger.info("Val result: mAcc/allAcc {:.4f}/{:.4f}".format(mAcc, allAcc)) + for i in range(self.cfg.data.num_classes): + logger.info( + "Class_{idx} - {name} Result: iou/accuracy {accuracy:.4f}".format( + idx=i, + name=self.cfg.data.names[i], + accuracy=accuracy_class[i], + ) + ) + return dict(mAcc=mAcc, allAcc=allAcc) + + @staticmethod + def collate_fn(batch): + return batch + + +@TESTERS.register_module() +class PartSegTester(TesterBase): + def test(self): + test_dataset = self.test_loader.dataset + logger = get_root_logger() + logger.info(">>>>>>>>>>>>>>>> Start Evaluation >>>>>>>>>>>>>>>>") + + batch_time = AverageMeter() + + num_categories = len(self.test_loader.dataset.categories) + iou_category, iou_count = np.zeros(num_categories), np.zeros(num_categories) + self.model.eval() + + save_path = os.path.join( + self.cfg.save_path, "result", "test_epoch{}".format(self.cfg.test_epoch) + ) + make_dirs(save_path) + + for idx in range(len(test_dataset)): + end = time.time() + data_name = test_dataset.get_data_name(idx) + + data_dict_list, label = test_dataset[idx] + pred = torch.zeros((label.size, self.cfg.data.num_classes)).cuda() + batch_num = int(np.ceil(len(data_dict_list) / self.cfg.batch_size_test)) + for i in range(batch_num): + s_i, e_i = i * self.cfg.batch_size_test, min( + (i + 1) * self.cfg.batch_size_test, len(data_dict_list) + ) + input_dict = collate_fn(data_dict_list[s_i:e_i]) + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with torch.no_grad(): + pred_part = self.model(input_dict)["cls_logits"] + pred_part = F.softmax(pred_part, -1) + if self.cfg.empty_cache: + torch.cuda.empty_cache() + pred_part = pred_part.reshape(-1, label.size, self.cfg.data.num_classes) + pred = pred + pred_part.total(dim=0) + logger.info( + "Test: {} {}/{}, Batch: {batch_idx}/{batch_num}".format( + data_name, + idx + 1, + len(test_dataset), + batch_idx=i, + batch_num=batch_num, + ) + ) + pred = pred.max(1)[1].data.cpu().numpy() + + category_index = data_dict_list[0]["cls_token"] + category = self.test_loader.dataset.categories[category_index] + parts_idx = self.test_loader.dataset.category2part[category] + parts_iou = np.zeros(len(parts_idx)) + for j, part in enumerate(parts_idx): + if (np.sum(label == part) == 0) and (np.sum(pred == part) == 0): + parts_iou[j] = 1.0 + else: + i = (label == part) & (pred == part) + u = (label == part) | (pred == part) + parts_iou[j] = np.sum(i) / (np.sum(u) + 1e-10) + iou_category[category_index] += parts_iou.mean() + iou_count[category_index] += 1 + + batch_time.update(time.time() - end) + logger.info( + "Test: {} [{}/{}] " + "Batch {batch_time.val:.3f} " + "({batch_time.avg:.3f}) ".format( + data_name, idx + 1, len(self.test_loader), batch_time=batch_time + ) + ) + + ins_mIoU = iou_category.sum() / (iou_count.sum() + 1e-10) + cat_mIoU = (iou_category / (iou_count + 1e-10)).mean() + logger.info( + "Val result: ins.mIoU/cat.mIoU {:.4f}/{:.4f}.".format(ins_mIoU, cat_mIoU) + ) + for i in range(num_categories): + logger.info( + "Class_{idx}-{name} Result: iou_cat/num_sample {iou_cat:.4f}/{iou_count:.4f}".format( + idx=i, + name=self.test_loader.dataset.categories[i], + iou_cat=iou_category[i] / (iou_count[i] + 1e-10), + iou_count=int(iou_count[i]), + ) + ) + logger.info("<<<<<<<<<<<<<<<<< End Evaluation <<<<<<<<<<<<<<<<<") + + @staticmethod + def collate_fn(batch): + return collate_fn(batch) diff --git a/Pointcept/pointcept/engines/train.py b/Pointcept/pointcept/engines/train.py new file mode 100644 index 0000000000000000000000000000000000000000..11c543fdfe754c5473503a05f628447b7afed502 --- /dev/null +++ b/Pointcept/pointcept/engines/train.py @@ -0,0 +1,318 @@ +""" +Trainer + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import sys +import weakref +import torch +import torch.nn as nn +import torch.utils.data +from functools import partial + +if sys.version_info >= (3, 10): + from collections.abc import Iterator +else: + from collections import Iterator +from tensorboardX import SummaryWriter + +from .defaults import create_ddp_model, worker_init_fn +from .hooks import HookBase, build_hooks +import pointcept.utils.comm as comm +from pointcept.datasets import build_dataset, point_collate_fn, collate_fn +from pointcept.models import build_model +from pointcept.utils.logger import get_root_logger +from pointcept.utils.optimizer import build_optimizer +from pointcept.utils.scheduler import build_scheduler +from pointcept.utils.events import EventStorage, ExceptionWriter +from pointcept.utils.registry import Registry + + +TRAINERS = Registry("trainers") + + +class TrainerBase: + def __init__(self) -> None: + self.hooks = [] + self.epoch = 0 + self.start_epoch = 0 + self.max_epoch = 0 + self.max_iter = 0 + self.comm_info = dict() + self.data_iterator: Iterator = enumerate([]) + self.storage: EventStorage + self.writer: SummaryWriter + + def register_hooks(self, hooks) -> None: + hooks = build_hooks(hooks) + for h in hooks: + assert isinstance(h, HookBase) + # To avoid circular reference, hooks and trainer cannot own each other. + # This normally does not matter, but will cause memory leak if the + # involved objects contain __del__: + # See http://engineering.hearsaysocial.com/2013/06/16/circular-references-in-python/ + h.trainer = weakref.proxy(self) + self.hooks.extend(hooks) + + def train(self): + with EventStorage() as self.storage: + # => before train + self.before_train() + for self.epoch in range(self.start_epoch, self.max_epoch): + # => before epoch + self.before_epoch() + # => run_epoch + for ( + self.comm_info["iter"], + self.comm_info["input_dict"], + ) in self.data_iterator: + # => before_step + self.before_step() + # => run_step + self.run_step() + # => after_step + self.after_step() + # => after epoch + self.after_epoch() + # => after train + self.after_train() + + def before_train(self): + for h in self.hooks: + h.before_train() + + def before_epoch(self): + for h in self.hooks: + h.before_epoch() + + def before_step(self): + for h in self.hooks: + h.before_step() + + def run_step(self): + raise NotImplementedError + + def after_step(self): + for h in self.hooks: + h.after_step() + + def after_epoch(self): + for h in self.hooks: + h.after_epoch() + self.storage.reset_histories() + + def after_train(self): + # Sync GPU before running train hooks + comm.synchronize() + for h in self.hooks: + h.after_train() + if comm.is_main_process(): + self.writer.close() + + +@TRAINERS.register_module("DefaultTrainer") +class Trainer(TrainerBase): + def __init__(self, cfg): + super(Trainer, self).__init__() + self.epoch = 0 + self.start_epoch = 0 + self.max_epoch = cfg.eval_epoch + self.best_metric_value = -torch.inf + self.logger = get_root_logger( + log_file=os.path.join(cfg.save_path, "train.log"), + file_mode="a" if cfg.resume else "w", + ) + self.logger.info("=> Loading config ...") + self.cfg = cfg + self.logger.info(f"Save path: {cfg.save_path}") + self.logger.info(f"Config:\n{cfg.pretty_text}") + self.logger.info("=> Building model ...") + self.model = self.build_model() + self.logger.info("=> Building writer ...") + self.writer = self.build_writer() + self.logger.info("=> Building train dataset & dataloader ...") + self.train_loader = self.build_train_loader() + self.logger.info("=> Building val dataset & dataloader ...") + self.val_loader = self.build_val_loader() + self.logger.info("=> Building optimize, scheduler, scaler(amp) ...") + self.optimizer = self.build_optimizer() + self.scheduler = self.build_scheduler() + self.scaler = self.build_scaler() + self.logger.info("=> Building hooks ...") + self.register_hooks(self.cfg.hooks) + + def train(self): + with EventStorage() as self.storage, ExceptionWriter(): + # => before train + self.before_train() + self.logger.info(">>>>>>>>>>>>>>>> Start Training >>>>>>>>>>>>>>>>") + for self.epoch in range(self.start_epoch, self.max_epoch): + # => before epoch + # TODO: optimize to iteration based + if comm.get_world_size() > 1: + self.train_loader.sampler.set_epoch(self.epoch) + self.model.train() + self.data_iterator = enumerate(self.train_loader) + self.before_epoch() + # => run_epoch + for ( + self.comm_info["iter"], + self.comm_info["input_dict"], + ) in self.data_iterator: + # => before_step + self.before_step() + # => run_step + self.run_step() + # => after_step + self.after_step() + # => after epoch + self.after_epoch() + # => after train + self.after_train() + + def run_step(self): + input_dict = self.comm_info["input_dict"] + for key in input_dict.keys(): + if isinstance(input_dict[key], torch.Tensor): + input_dict[key] = input_dict[key].cuda(non_blocking=True) + with torch.cuda.amp.autocast(enabled=self.cfg.enable_amp): + output_dict = self.model(input_dict) + loss = output_dict["loss"] + self.optimizer.zero_grad() + if self.cfg.enable_amp: + self.scaler.scale(loss).backward() + self.scaler.unscale_(self.optimizer) + if self.cfg.clip_grad is not None: + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), self.cfg.clip_grad + ) + self.scaler.step(self.optimizer) + + # When enable amp, optimizer.step call are skipped if the loss scaling factor is too large. + # Fix torch warning scheduler step before optimizer step. + scaler = self.scaler.get_scale() + self.scaler.update() + if scaler <= self.scaler.get_scale(): + self.scheduler.step() + else: + loss.backward() + if self.cfg.clip_grad is not None: + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), self.cfg.clip_grad + ) + self.optimizer.step() + self.scheduler.step() + if self.cfg.empty_cache: + torch.cuda.empty_cache() + self.comm_info["model_output_dict"] = output_dict + + def after_epoch(self): + for h in self.hooks: + h.after_epoch() + self.storage.reset_histories() + if self.cfg.empty_cache_per_epoch: + torch.cuda.empty_cache() + + def build_model(self): + model = build_model(self.cfg.model) + if self.cfg.sync_bn: + model = nn.SyncBatchNorm.convert_sync_batchnorm(model) + n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) + # logger.info(f"Model: \n{self.model}") + self.logger.info(f"Num params: {n_parameters}") + model = create_ddp_model( + model.cuda(), + broadcast_buffers=False, + find_unused_parameters=self.cfg.find_unused_parameters, + ) + return model + + def build_writer(self): + writer = SummaryWriter(self.cfg.save_path) if comm.is_main_process() else None + self.logger.info(f"Tensorboard writer logging dir: {self.cfg.save_path}") + return writer + + def build_train_loader(self): + train_data = build_dataset(self.cfg.data.train) + + if comm.get_world_size() > 1: + train_sampler = torch.utils.data.distributed.DistributedSampler(train_data) + else: + train_sampler = None + + init_fn = ( + partial( + worker_init_fn, + num_workers=self.cfg.num_worker_per_gpu, + rank=comm.get_rank(), + seed=self.cfg.seed, + ) + if self.cfg.seed is not None + else None + ) + + train_loader = torch.utils.data.DataLoader( + train_data, + batch_size=self.cfg.batch_size_per_gpu, + shuffle=(train_sampler is None), + num_workers=self.cfg.num_worker_per_gpu, + sampler=train_sampler, + collate_fn=partial(point_collate_fn, mix_prob=self.cfg.mix_prob), + pin_memory=True, + worker_init_fn=init_fn, + drop_last=True, + persistent_workers=True, + ) + return train_loader + + def build_val_loader(self): + val_loader = None + if self.cfg.evaluate: + val_data = build_dataset(self.cfg.data.val) + if comm.get_world_size() > 1: + val_sampler = torch.utils.data.distributed.DistributedSampler(val_data) + else: + val_sampler = None + val_loader = torch.utils.data.DataLoader( + val_data, + batch_size=self.cfg.batch_size_val_per_gpu, + shuffle=False, + num_workers=self.cfg.num_worker_per_gpu, + pin_memory=True, + sampler=val_sampler, + collate_fn=collate_fn, + ) + return val_loader + + def build_optimizer(self): + return build_optimizer(self.cfg.optimizer, self.model, self.cfg.param_dicts) + + def build_scheduler(self): + assert hasattr(self, "optimizer") + assert hasattr(self, "train_loader") + self.cfg.scheduler.total_steps = len(self.train_loader) * self.cfg.eval_epoch + return build_scheduler(self.cfg.scheduler, self.optimizer) + + def build_scaler(self): + scaler = torch.cuda.amp.GradScaler() if self.cfg.enable_amp else None + return scaler + + +@TRAINERS.register_module("MultiDatasetTrainer") +class MultiDatasetTrainer(Trainer): + def build_train_loader(self): + from pointcept.datasets import MultiDatasetDataloader + + train_data = build_dataset(self.cfg.data.train) + train_loader = MultiDatasetDataloader( + train_data, + self.cfg.batch_size_per_gpu, + self.cfg.num_worker_per_gpu, + self.cfg.mix_prob, + self.cfg.seed, + ) + self.comm_info["iter_per_epoch"] = len(train_loader) + return train_loader diff --git a/Pointcept/pointcept/models/__init__.py b/Pointcept/pointcept/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aea1d44143a64992d72c098bf9fecdbaf6d2f030 --- /dev/null +++ b/Pointcept/pointcept/models/__init__.py @@ -0,0 +1,24 @@ +from .builder import build_model +from .default import DefaultSegmentor, DefaultClassifier + +# Backbones +from .sparse_unet import * +from .point_transformer import * +from .point_transformer_v2 import * +from .point_transformer_v3 import * +from .stratified_transformer import * +from .spvcnn import * +from .octformer import * +from .oacnns import * + +# from .swin3d import * + +# Semantic Segmentation +from .context_aware_classifier import * + +# Instance Segmentation +from .point_group import * + +# Pretraining +from .masked_scene_contrast import * +from .point_prompt_training import * diff --git a/Pointcept/pointcept/models/builder.py b/Pointcept/pointcept/models/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..bbda24465a405a5a2094f8d0c420a53c50fe79cc --- /dev/null +++ b/Pointcept/pointcept/models/builder.py @@ -0,0 +1,16 @@ +""" +Model Builder + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.utils.registry import Registry + +MODELS = Registry("models") +MODULES = Registry("modules") + + +def build_model(cfg): + """Build models.""" + return MODELS.build(cfg) diff --git a/Pointcept/pointcept/models/context_aware_classifier/__init__.py b/Pointcept/pointcept/models/context_aware_classifier/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8327900bbf92258fa242f4b99d5b235bc1ae44aa --- /dev/null +++ b/Pointcept/pointcept/models/context_aware_classifier/__init__.py @@ -0,0 +1 @@ +from .context_aware_classifier_v1m1_base import CACSegmentor diff --git a/Pointcept/pointcept/models/context_aware_classifier/context_aware_classifier_v1m1_base.py b/Pointcept/pointcept/models/context_aware_classifier/context_aware_classifier_v1m1_base.py new file mode 100644 index 0000000000000000000000000000000000000000..893f4c8fc6d68a1fb7dc24e1ceb3b3ace11ebec6 --- /dev/null +++ b/Pointcept/pointcept/models/context_aware_classifier/context_aware_classifier_v1m1_base.py @@ -0,0 +1,275 @@ +""" +Context-aware Classifier for Semantic Segmentation + +Author: Zhuotao Tian, Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F +from pointcept.models.losses import build_criteria +from pointcept.models.utils.structure import Point +from pointcept.models.builder import MODELS, build_model + + +@MODELS.register_module("CAC-v1m1") +class CACSegmentor(nn.Module): + def __init__( + self, + num_classes, + backbone_out_channels, + backbone=None, + criteria=None, + cos_temp=15, + main_weight=1, + pre_weight=1, + pre_self_weight=1, + kl_weight=1, + conf_thresh=0, + detach_pre_logits=False, + ): + super().__init__() + self.num_classes = num_classes + self.cos_temp = cos_temp + self.main_weight = main_weight + self.pre_weight = pre_weight + self.pre_self_weight = pre_self_weight + self.kl_weight = kl_weight + self.conf_thresh = conf_thresh + self.detach_pre_logits = detach_pre_logits + + # backbone + self.backbone = build_model(backbone) + # heads + self.seg_head = nn.Linear(backbone_out_channels, num_classes) + self.proj = nn.Sequential( + nn.Linear(backbone_out_channels * 2, backbone_out_channels * 2, bias=False), + nn.ReLU(inplace=True), + nn.Linear(backbone_out_channels * 2, backbone_out_channels), + ) + self.apd_proj = nn.Sequential( + nn.Linear(backbone_out_channels * 2, backbone_out_channels * 2, bias=False), + nn.ReLU(inplace=True), + nn.Linear(backbone_out_channels * 2, backbone_out_channels), + ) + self.feat_proj_layer = nn.Sequential( + nn.Linear(backbone_out_channels, backbone_out_channels, bias=False), + nn.BatchNorm1d(backbone_out_channels), + nn.ReLU(inplace=True), + nn.Linear(backbone_out_channels, backbone_out_channels), + ) + # Criteria + self.criteria = build_criteria(criteria) + + @staticmethod + def get_pred(x, proto): + # x: [n,c]; proto: [cls, c] + x = F.normalize(x, 2, 1) + proto = F.normalize(proto, 2, 1) + pred = x @ proto.permute(1, 0) # [n,c] x [c, cls] -> [n, cls] + return pred + + def get_adaptive_perspective(self, feat, target, new_proto, proto): + raw_feat = feat.clone() + # target: [n] + # feat: [n,c] + # proto: [cls, c] + unique_y = list(target.unique()) + if -1 in unique_y: + unique_y.remove(-1) + target = target.unsqueeze(-1) # [n, 1] + + for tmp_y in unique_y: + tmp_mask = (target == tmp_y).float() + tmp_proto = (feat * tmp_mask).sum(0) / (tmp_mask.sum(0) + 1e-4) # c + onehot_vec = torch.zeros(new_proto.shape[0], 1).cuda() # cls, 1 + onehot_vec[tmp_y.long()] = 1 + new_proto = ( + new_proto * (1 - onehot_vec) + tmp_proto.unsqueeze(0) * onehot_vec + ) + + new_proto = torch.cat([new_proto, proto], -1) + new_proto = self.apd_proj(new_proto) + raw_feat = self.feat_proj_layer(raw_feat) + pred = self.get_pred(raw_feat, new_proto) + return pred + + def post_refine_proto_batch(self, feat, pred, proto, offset=None): + # x: [n, c]; pred: [n, cls]; proto: [cls, c] + pred_list = [] + x = feat + raw_x = x.clone() + if self.detach_pre_logits: + pred = pred.detach() + raw_pred = pred.clone() + + if offset is None: + raw_x = x.clone() + n, n_cls = pred.shape[:] + pred = pred.view(n, n_cls) + pred = F.softmax(pred, 1).permute(1, 0) # [n, cls] -> [cls, n] + if self.conf_thresh > 0: + max_pred = ( + (pred.max(0)[0] >= self.conf_thresh).float().unsqueeze(0) + ) # 1, n + pred = pred * max_pred + pred_proto = (pred / (pred.sum(-1).unsqueeze(-1) + 1e-7)) @ raw_x # cls, c + + pred_proto = torch.cat([pred_proto, proto], -1) # cls, 2c + pred_proto = self.proj(pred_proto) + raw_x = self.feat_proj_layer(raw_x) + new_pred = self.get_pred(raw_x, pred_proto) + else: + for i in range(len(offset)): + if i == 0: + start = 0 + end = offset[i] + else: + start, end = offset[i - 1], offset[i] + tmp_x = raw_x[start:end] + pred = raw_pred[start:end] + n, n_cls = pred.shape[:] + pred = pred.view(n, n_cls) + pred = F.softmax(pred, 1).permute(1, 0) # [n, cls] -> [cls, n] + if self.conf_thresh > 0: + max_pred = ( + (pred.max(0)[0] >= self.conf_thresh).float().unsqueeze(0) + ) # 1, n + pred = pred * max_pred + pred_proto = ( + pred / (pred.sum(-1).unsqueeze(-1) + 1e-7) + ) @ tmp_x # cls, c + + pred_proto = torch.cat([pred_proto, proto], -1) # cls, 2c + pred_proto = self.proj(pred_proto) + tmp_x = self.feat_proj_layer(tmp_x) + new_pred = self.get_pred(tmp_x, pred_proto) + pred_list.append(new_pred) + new_pred = torch.cat(pred_list, 0) + return new_pred + + @staticmethod + def get_distill_loss(pred, soft, target, smoothness=0.5, eps=0): + """ + knowledge distillation loss + """ + n, c = soft.shape[:] + soft = soft.detach() + target = target.unsqueeze(-1) # n, 1 + onehot = target.view(-1, 1) # n, 1 + ignore_mask = (onehot == -1).float() + sm_soft = F.softmax(soft / 1, 1) # n, c + + onehot = onehot * (1 - ignore_mask) + onehot = torch.zeros(n, c).cuda().scatter_(1, onehot.long(), 1) # n, c + smoothed_label = smoothness * sm_soft + (1 - smoothness) * onehot + if eps > 0: + smoothed_label = smoothed_label * (1 - eps) + (1 - smoothed_label) * eps / ( + smoothed_label.shape[1] - 1 + ) + + loss = torch.mul(-1 * F.log_softmax(pred, dim=1), smoothed_label) # b, n, h, w + loss = loss.sum(1) + + sm_soft = F.softmax(soft / 1, 1) # n, c + entropy_mask = -1 * (sm_soft * torch.log(sm_soft + 1e-4)).sum(1) + + # for class-wise entropy estimation + target = target.squeeze(-1) + unique_classes = list(target.unique()) + if -1 in unique_classes: + unique_classes.remove(-1) + valid_mask = (target != -1).float() + entropy_mask = entropy_mask * valid_mask + loss_list = [] + weight_list = [] + for tmp_y in unique_classes: + tmp_mask = (target == tmp_y).float().squeeze() + tmp_entropy_mask = entropy_mask * tmp_mask + class_weight = 1 + tmp_loss = (loss * tmp_entropy_mask).sum() / (tmp_entropy_mask.sum() + 1e-4) + loss_list.append(class_weight * tmp_loss) + weight_list.append(class_weight) + + if len(weight_list) > 0: + loss = sum(loss_list) / (sum(weight_list) + 1e-4) + else: + loss = torch.zeros(1).cuda().mean() + return loss + + def forward(self, data_dict): + offset = data_dict["offset"] + point = self.backbone(data_dict) + if isinstance(point, Point): + feat = point.feat + else: + feat = point + seg_logits = self.seg_head(feat) + + if self.training: + target = data_dict["segment"] + pre_logits = seg_logits.clone() + refine_logits = ( + self.post_refine_proto_batch( + feat=feat, + pred=seg_logits, + proto=self.seg_head.weight.squeeze(), + offset=offset, + ) + * self.cos_temp + ) + + cac_pred = ( + self.get_adaptive_perspective( + feat=feat, + target=target, + new_proto=self.seg_head.weight.detach().data.squeeze(), + proto=self.seg_head.weight.squeeze(), + ) + * self.cos_temp + ) + + seg_loss = self.criteria(refine_logits, target) * self.main_weight + pre_loss = self.criteria(cac_pred, target) * self.pre_weight + pre_self_loss = self.criteria(pre_logits, target) * self.pre_self_weight + kl_loss = ( + self.get_distill_loss( + pred=refine_logits, soft=cac_pred.detach(), target=target + ) + * self.kl_weight + ) + loss = seg_loss + pre_loss + pre_self_loss + kl_loss + return dict( + loss=loss, + seg_loss=seg_loss, + pre_loss=pre_loss, + pre_self_loss=pre_self_loss, + kl_loss=kl_loss, + ) + + elif "segment" in data_dict.keys(): + refine_logits = ( + self.post_refine_proto_batch( + feat=feat, + pred=seg_logits, + proto=self.seg_head.weight.squeeze(), + offset=offset, + ) + * self.cos_temp + ) + + loss = self.criteria(seg_logits, data_dict["segment"]) + return dict(loss=loss, seg_logits=refine_logits) + + else: + refine_logits = ( + self.post_refine_proto_batch( + feat=feat, + pred=seg_logits, + proto=self.seg_head.weight.squeeze(), + offset=offset, + ) + * self.cos_temp + ) + return dict(seg_logits=refine_logits) diff --git a/Pointcept/pointcept/models/default.py b/Pointcept/pointcept/models/default.py new file mode 100644 index 0000000000000000000000000000000000000000..8dd600b9e71e70350c644755534c5ef2d7b82c58 --- /dev/null +++ b/Pointcept/pointcept/models/default.py @@ -0,0 +1,125 @@ +import torch.nn as nn +import torch_scatter + +from pointcept.models.losses import build_criteria +from pointcept.models.utils.structure import Point +from .builder import MODELS, build_model + + +@MODELS.register_module() +class DefaultSegmentor(nn.Module): + def __init__(self, backbone=None, criteria=None): + super().__init__() + self.backbone = build_model(backbone) + self.criteria = build_criteria(criteria) + + def forward(self, input_dict): + if "condition" in input_dict.keys(): + # PPT (https://arxiv.org/abs/2308.09718) + # currently, only support one batch one condition + input_dict["condition"] = input_dict["condition"][0] + seg_logits = self.backbone(input_dict) + # train + if self.training: + loss = self.criteria(seg_logits, input_dict["segment"]) + return dict(loss=loss) + # eval + elif "segment" in input_dict.keys(): + loss = self.criteria(seg_logits, input_dict["segment"]) + return dict(loss=loss, seg_logits=seg_logits) + # test + else: + return dict(seg_logits=seg_logits) + + +@MODELS.register_module() +class DefaultSegmentorV2(nn.Module): + def __init__( + self, + num_classes, + backbone_out_channels, + backbone=None, + criteria=None, + ): + super().__init__() + self.seg_head = ( + nn.Linear(backbone_out_channels, num_classes) + if num_classes > 0 + else nn.Identity() + ) + self.backbone = build_model(backbone) + self.criteria = build_criteria(criteria) + + def forward(self, input_dict): + point = Point(input_dict) + point = self.backbone(point) + # Backbone added after v1.5.0 return Point instead of feat and use DefaultSegmentorV2 + # TODO: remove this part after make all backbone return Point only. + if isinstance(point, Point): + feat = point.feat + else: + feat = point + seg_logits = self.seg_head(feat) + # train + if self.training: + loss = self.criteria(seg_logits, input_dict["segment"]) + return dict(loss=loss) + # eval + elif "segment" in input_dict.keys(): + loss = self.criteria(seg_logits, input_dict["segment"]) + return dict(loss=loss, seg_logits=seg_logits) + # test + else: + return dict(seg_logits=seg_logits) + + +@MODELS.register_module() +class DefaultClassifier(nn.Module): + def __init__( + self, + backbone=None, + criteria=None, + num_classes=40, + backbone_embed_dim=256, + ): + super().__init__() + self.backbone = build_model(backbone) + self.criteria = build_criteria(criteria) + self.num_classes = num_classes + self.backbone_embed_dim = backbone_embed_dim + self.cls_head = nn.Sequential( + nn.Linear(backbone_embed_dim, 256), + nn.BatchNorm1d(256), + nn.ReLU(inplace=True), + nn.Dropout(p=0.5), + nn.Linear(256, 128), + nn.BatchNorm1d(128), + nn.ReLU(inplace=True), + nn.Dropout(p=0.5), + nn.Linear(128, num_classes), + ) + + def forward(self, input_dict): + point = Point(input_dict) + point = self.backbone(point) + # Backbone added after v1.5.0 return Point instead of feat + # And after v1.5.0 feature aggregation for classification operated in classifier + # TODO: remove this part after make all backbone return Point only. + if isinstance(point, Point): + point.feat = torch_scatter.segment_csr( + src=point.feat, + indptr=nn.functional.pad(point.offset, (1, 0)), + reduce="mean", + ) + feat = point.feat + else: + feat = point + cls_logits = self.cls_head(feat) + if self.training: + loss = self.criteria(cls_logits, input_dict["category"]) + return dict(loss=loss) + elif "category" in input_dict.keys(): + loss = self.criteria(cls_logits, input_dict["category"]) + return dict(loss=loss, cls_logits=cls_logits) + else: + return dict(cls_logits=cls_logits) diff --git a/Pointcept/pointcept/models/losses/__init__.py b/Pointcept/pointcept/models/losses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ee44096e13acdf7d6cf7ba9ff764b4b6af3aafe5 --- /dev/null +++ b/Pointcept/pointcept/models/losses/__init__.py @@ -0,0 +1,4 @@ +from .builder import build_criteria + +from .misc import CrossEntropyLoss, SmoothCELoss, DiceLoss, FocalLoss, BinaryFocalLoss +from .lovasz import LovaszLoss diff --git a/Pointcept/pointcept/models/losses/builder.py b/Pointcept/pointcept/models/losses/builder.py new file mode 100644 index 0000000000000000000000000000000000000000..ef642d9830622d847eb2b7e7013252a32c2b6368 --- /dev/null +++ b/Pointcept/pointcept/models/losses/builder.py @@ -0,0 +1,31 @@ +""" +Criteria Builder + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.utils.registry import Registry + +LOSSES = Registry("losses") + + +class Criteria(object): + def __init__(self, cfg=None): + self.cfg = cfg if cfg is not None else [] + self.criteria = [] + for loss_cfg in self.cfg: + self.criteria.append(LOSSES.build(cfg=loss_cfg)) + + def __call__(self, pred, target): + if len(self.criteria) == 0: + # loss computation occur in model + return pred + loss = 0 + for c in self.criteria: + loss += c(pred, target) + return loss + + +def build_criteria(cfg): + return Criteria(cfg) diff --git a/Pointcept/pointcept/models/losses/lovasz.py b/Pointcept/pointcept/models/losses/lovasz.py new file mode 100644 index 0000000000000000000000000000000000000000..690c2ba507478853cf7ea827ac6e4a35d8f31709 --- /dev/null +++ b/Pointcept/pointcept/models/losses/lovasz.py @@ -0,0 +1,257 @@ +""" +Lovasz Loss +refer https://arxiv.org/abs/1705.08790 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from typing import Optional +from itertools import filterfalse +import torch +import torch.nn.functional as F +from torch.nn.modules.loss import _Loss + +from .builder import LOSSES + +BINARY_MODE: str = "binary" +MULTICLASS_MODE: str = "multiclass" +MULTILABEL_MODE: str = "multilabel" + + +def _lovasz_grad(gt_sorted): + """Compute gradient of the Lovasz extension w.r.t sorted errors + See Alg. 1 in paper + """ + p = len(gt_sorted) + gts = gt_sorted.sum() + intersection = gts - gt_sorted.float().cumsum(0) + union = gts + (1 - gt_sorted).float().cumsum(0) + jaccard = 1.0 - intersection / union + if p > 1: # cover 1-pixel case + jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] + return jaccard + + +def _lovasz_hinge(logits, labels, per_image=True, ignore=None): + """ + Binary Lovasz hinge loss + logits: [B, H, W] Logits at each pixel (between -infinity and +infinity) + labels: [B, H, W] Tensor, binary ground truth masks (0 or 1) + per_image: compute the loss per image instead of per batch + ignore: void class id + """ + if per_image: + loss = mean( + _lovasz_hinge_flat( + *_flatten_binary_scores(log.unsqueeze(0), lab.unsqueeze(0), ignore) + ) + for log, lab in zip(logits, labels) + ) + else: + loss = _lovasz_hinge_flat(*_flatten_binary_scores(logits, labels, ignore)) + return loss + + +def _lovasz_hinge_flat(logits, labels): + """Binary Lovasz hinge loss + Args: + logits: [P] Logits at each prediction (between -infinity and +infinity) + labels: [P] Tensor, binary ground truth labels (0 or 1) + """ + if len(labels) == 0: + # only void pixels, the gradients should be 0 + return logits.sum() * 0.0 + signs = 2.0 * labels.float() - 1.0 + errors = 1.0 - logits * signs + errors_sorted, perm = torch.sort(errors, dim=0, descending=True) + perm = perm.data + gt_sorted = labels[perm] + grad = _lovasz_grad(gt_sorted) + loss = torch.dot(F.relu(errors_sorted), grad) + return loss + + +def _flatten_binary_scores(scores, labels, ignore=None): + """Flattens predictions in the batch (binary case) + Remove labels equal to 'ignore' + """ + scores = scores.view(-1) + labels = labels.view(-1) + if ignore is None: + return scores, labels + valid = labels != ignore + vscores = scores[valid] + vlabels = labels[valid] + return vscores, vlabels + + +def _lovasz_softmax( + probas, labels, classes="present", class_seen=None, per_image=False, ignore=None +): + """Multi-class Lovasz-Softmax loss + Args: + @param probas: [B, C, H, W] Class probabilities at each prediction (between 0 and 1). + Interpreted as binary (sigmoid) output with outputs of size [B, H, W]. + @param labels: [B, H, W] Tensor, ground truth labels (between 0 and C - 1) + @param classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. + @param per_image: compute the loss per image instead of per batch + @param ignore: void class labels + """ + if per_image: + loss = mean( + _lovasz_softmax_flat( + *_flatten_probas(prob.unsqueeze(0), lab.unsqueeze(0), ignore), + classes=classes + ) + for prob, lab in zip(probas, labels) + ) + else: + loss = _lovasz_softmax_flat( + *_flatten_probas(probas, labels, ignore), + classes=classes, + class_seen=class_seen + ) + return loss + + +def _lovasz_softmax_flat(probas, labels, classes="present", class_seen=None): + """Multi-class Lovasz-Softmax loss + Args: + @param probas: [P, C] Class probabilities at each prediction (between 0 and 1) + @param labels: [P] Tensor, ground truth labels (between 0 and C - 1) + @param classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. + """ + if probas.numel() == 0: + # only void pixels, the gradients should be 0 + return probas * 0.0 + C = probas.size(1) + losses = [] + class_to_sum = list(range(C)) if classes in ["all", "present"] else classes + # for c in class_to_sum: + for c in labels.unique(): + if class_seen is None: + fg = (labels == c).type_as(probas) # foreground for class c + if classes == "present" and fg.sum() == 0: + continue + if C == 1: + if len(classes) > 1: + raise ValueError("Sigmoid output possible only with 1 class") + class_pred = probas[:, 0] + else: + class_pred = probas[:, c] + errors = (fg - class_pred).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + losses.append(torch.dot(errors_sorted, _lovasz_grad(fg_sorted))) + else: + if c in class_seen: + fg = (labels == c).type_as(probas) # foreground for class c + if classes == "present" and fg.sum() == 0: + continue + if C == 1: + if len(classes) > 1: + raise ValueError("Sigmoid output possible only with 1 class") + class_pred = probas[:, 0] + else: + class_pred = probas[:, c] + errors = (fg - class_pred).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + losses.append(torch.dot(errors_sorted, _lovasz_grad(fg_sorted))) + return mean(losses) + + +def _flatten_probas(probas, labels, ignore=None): + """Flattens predictions in the batch""" + if probas.dim() == 3: + # assumes output of a sigmoid layer + B, H, W = probas.size() + probas = probas.view(B, 1, H, W) + + C = probas.size(1) + probas = torch.movedim(probas, 1, -1) # [B, C, Di, Dj, ...] -> [B, Di, Dj, ..., C] + probas = probas.contiguous().view(-1, C) # [P, C] + + labels = labels.view(-1) + if ignore is None: + return probas, labels + valid = labels != ignore + vprobas = probas[valid] + vlabels = labels[valid] + return vprobas, vlabels + + +def isnan(x): + return x != x + + +def mean(values, ignore_nan=False, empty=0): + """Nan-mean compatible with generators.""" + values = iter(values) + if ignore_nan: + values = filterfalse(isnan, values) + try: + n = 1 + acc = next(values) + except StopIteration: + if empty == "raise": + raise ValueError("Empty mean") + return empty + for n, v in enumerate(values, 2): + acc += v + if n == 1: + return acc + return acc / n + + +@LOSSES.register_module() +class LovaszLoss(_Loss): + def __init__( + self, + mode: str, + class_seen: Optional[int] = None, + per_image: bool = False, + ignore_index: Optional[int] = None, + loss_weight: float = 1.0, + ): + """Lovasz loss for segmentation task. + It supports binary, multiclass and multilabel cases + Args: + mode: Loss mode 'binary', 'multiclass' or 'multilabel' + ignore_index: Label that indicates ignored pixels (does not contribute to loss) + per_image: If True loss computed per each image and then averaged, else computed per whole batch + Shape + - **y_pred** - torch.Tensor of shape (N, C, H, W) + - **y_true** - torch.Tensor of shape (N, H, W) or (N, C, H, W) + Reference + https://github.com/BloodAxe/pytorch-toolbelt + """ + assert mode in {BINARY_MODE, MULTILABEL_MODE, MULTICLASS_MODE} + super().__init__() + + self.mode = mode + self.ignore_index = ignore_index + self.per_image = per_image + self.class_seen = class_seen + self.loss_weight = loss_weight + + def forward(self, y_pred, y_true): + if self.mode in {BINARY_MODE, MULTILABEL_MODE}: + loss = _lovasz_hinge( + y_pred, y_true, per_image=self.per_image, ignore=self.ignore_index + ) + elif self.mode == MULTICLASS_MODE: + y_pred = y_pred.softmax(dim=1) + loss = _lovasz_softmax( + y_pred, + y_true, + class_seen=self.class_seen, + per_image=self.per_image, + ignore=self.ignore_index, + ) + else: + raise ValueError("Wrong mode {}.".format(self.mode)) + return loss * self.loss_weight diff --git a/Pointcept/pointcept/models/losses/misc.py b/Pointcept/pointcept/models/losses/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..ec300a54b2d920d37882d25e8c7771bce01db97c --- /dev/null +++ b/Pointcept/pointcept/models/losses/misc.py @@ -0,0 +1,223 @@ +""" +Misc Losses + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F +from .builder import LOSSES + + +@LOSSES.register_module() +class CrossEntropyLoss(nn.Module): + def __init__( + self, + weight=None, + size_average=None, + reduce=None, + reduction="mean", + label_smoothing=0.0, + loss_weight=1.0, + ignore_index=-1, + ): + super(CrossEntropyLoss, self).__init__() + weight = torch.tensor(weight).cuda() if weight is not None else None + self.loss_weight = loss_weight + self.loss = nn.CrossEntropyLoss( + weight=weight, + size_average=size_average, + ignore_index=ignore_index, + reduce=reduce, + reduction=reduction, + label_smoothing=label_smoothing, + ) + + def forward(self, pred, target): + return self.loss(pred, target) * self.loss_weight + + +@LOSSES.register_module() +class SmoothCELoss(nn.Module): + def __init__(self, smoothing_ratio=0.1): + super(SmoothCELoss, self).__init__() + self.smoothing_ratio = smoothing_ratio + + def forward(self, pred, target): + eps = self.smoothing_ratio + n_class = pred.size(1) + one_hot = torch.zeros_like(pred).scatter(1, target.view(-1, 1), 1) + one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1) + log_prb = F.log_softmax(pred, dim=1) + loss = -(one_hot * log_prb).total(dim=1) + loss = loss[torch.isfinite(loss)].mean() + return loss + + +@LOSSES.register_module() +class BinaryFocalLoss(nn.Module): + def __init__(self, gamma=2.0, alpha=0.5, logits=True, reduce=True, loss_weight=1.0): + """Binary Focal Loss + ` + """ + super(BinaryFocalLoss, self).__init__() + assert 0 < alpha < 1 + self.gamma = gamma + self.alpha = alpha + self.logits = logits + self.reduce = reduce + self.loss_weight = loss_weight + + def forward(self, pred, target, **kwargs): + """Forward function. + Args: + pred (torch.Tensor): The prediction with shape (N) + target (torch.Tensor): The ground truth. If containing class + indices, shape (N) where each value is 0โ‰คtargets[i]โ‰ค1, If containing class probabilities, + same shape as the input. + Returns: + torch.Tensor: The calculated loss + """ + if self.logits: + bce = F.binary_cross_entropy_with_logits(pred, target, reduction="none") + else: + bce = F.binary_cross_entropy(pred, target, reduction="none") + pt = torch.exp(-bce) + alpha = self.alpha * target + (1 - self.alpha) * (1 - target) + focal_loss = alpha * (1 - pt) ** self.gamma * bce + + if self.reduce: + focal_loss = torch.mean(focal_loss) + return focal_loss * self.loss_weight + + +@LOSSES.register_module() +class FocalLoss(nn.Module): + def __init__( + self, gamma=2.0, alpha=0.5, reduction="mean", loss_weight=1.0, ignore_index=-1 + ): + """Focal Loss + ` + """ + super(FocalLoss, self).__init__() + assert reduction in ( + "mean", + "sum", + ), "AssertionError: reduction should be 'mean' or 'sum'" + assert isinstance( + alpha, (float, list) + ), "AssertionError: alpha should be of type float" + assert isinstance(gamma, float), "AssertionError: gamma should be of type float" + assert isinstance( + loss_weight, float + ), "AssertionError: loss_weight should be of type float" + assert isinstance(ignore_index, int), "ignore_index must be of type int" + self.gamma = gamma + self.alpha = alpha + self.reduction = reduction + self.loss_weight = loss_weight + self.ignore_index = ignore_index + + def forward(self, pred, target, **kwargs): + """Forward function. + Args: + pred (torch.Tensor): The prediction with shape (N, C) where C = number of classes. + target (torch.Tensor): The ground truth. If containing class + indices, shape (N) where each value is 0โ‰คtargets[i]โ‰คCโˆ’1, If containing class probabilities, + same shape as the input. + Returns: + torch.Tensor: The calculated loss + """ + # [B, C, d_1, d_2, ..., d_k] -> [C, B, d_1, d_2, ..., d_k] + pred = pred.transpose(0, 1) + # [C, B, d_1, d_2, ..., d_k] -> [C, N] + pred = pred.reshape(pred.size(0), -1) + # [C, N] -> [N, C] + pred = pred.transpose(0, 1).contiguous() + # (B, d_1, d_2, ..., d_k) --> (B * d_1 * d_2 * ... * d_k,) + target = target.view(-1).contiguous() + assert pred.size(0) == target.size( + 0 + ), "The shape of pred doesn't match the shape of target" + valid_mask = target != self.ignore_index + target = target[valid_mask] + pred = pred[valid_mask] + + if len(target) == 0: + return 0.0 + + num_classes = pred.size(1) + target = F.one_hot(target, num_classes=num_classes) + + alpha = self.alpha + if isinstance(alpha, list): + alpha = pred.new_tensor(alpha) + pred_sigmoid = pred.sigmoid() + target = target.type_as(pred) + one_minus_pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) + focal_weight = (alpha * target + (1 - alpha) * (1 - target)) * one_minus_pt.pow( + self.gamma + ) + + loss = ( + F.binary_cross_entropy_with_logits(pred, target, reduction="none") + * focal_weight + ) + if self.reduction == "mean": + loss = loss.mean() + elif self.reduction == "sum": + loss = loss.total() + return self.loss_weight * loss + + +@LOSSES.register_module() +class DiceLoss(nn.Module): + def __init__(self, smooth=1, exponent=2, loss_weight=1.0, ignore_index=-1): + """DiceLoss. + This loss is proposed in `V-Net: Fully Convolutional Neural Networks for + Volumetric Medical Image Segmentation `_. + """ + super(DiceLoss, self).__init__() + self.smooth = smooth + self.exponent = exponent + self.loss_weight = loss_weight + self.ignore_index = ignore_index + + def forward(self, pred, target, **kwargs): + # [B, C, d_1, d_2, ..., d_k] -> [C, B, d_1, d_2, ..., d_k] + pred = pred.transpose(0, 1) + # [C, B, d_1, d_2, ..., d_k] -> [C, N] + pred = pred.reshape(pred.size(0), -1) + # [C, N] -> [N, C] + pred = pred.transpose(0, 1).contiguous() + # (B, d_1, d_2, ..., d_k) --> (B * d_1 * d_2 * ... * d_k,) + target = target.view(-1).contiguous() + assert pred.size(0) == target.size( + 0 + ), "The shape of pred doesn't match the shape of target" + valid_mask = target != self.ignore_index + target = target[valid_mask] + pred = pred[valid_mask] + + pred = F.softmax(pred, dim=1) + num_classes = pred.shape[1] + target = F.one_hot( + torch.clamp(target.long(), 0, num_classes - 1), num_classes=num_classes + ) + + total_loss = 0 + for i in range(num_classes): + if i != self.ignore_index: + num = torch.sum(torch.mul(pred[:, i], target[:, i])) * 2 + self.smooth + den = ( + torch.sum( + pred[:, i].pow(self.exponent) + target[:, i].pow(self.exponent) + ) + + self.smooth + ) + dice_loss = 1 - num / den + total_loss += dice_loss + loss = total_loss / num_classes + return self.loss_weight * loss diff --git a/Pointcept/pointcept/models/masked_scene_contrast/__init__.py b/Pointcept/pointcept/models/masked_scene_contrast/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..18733d94acc5c7ed0feb7c52c77cb040d53bee7e --- /dev/null +++ b/Pointcept/pointcept/models/masked_scene_contrast/__init__.py @@ -0,0 +1,2 @@ +from .masked_scene_contrast_v1m1_base import MaskedSceneContrast +from .masked_scene_contrast_v1m2_csc import MaskedSceneContrast diff --git a/Pointcept/pointcept/models/masked_scene_contrast/masked_scene_contrast_v1m1_base.py b/Pointcept/pointcept/models/masked_scene_contrast/masked_scene_contrast_v1m1_base.py new file mode 100644 index 0000000000000000000000000000000000000000..3b18bd563a36695eca4882a6620d4ddbe246ef80 --- /dev/null +++ b/Pointcept/pointcept/models/masked_scene_contrast/masked_scene_contrast_v1m1_base.py @@ -0,0 +1,310 @@ +""" +Masked Scene Contrast +https://arxiv.org/abs/2303.14191 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import random +from itertools import chain +import torch +import torch.nn as nn +import torch.distributed as dist +from torch_geometric.nn.pool import voxel_grid + +from timm.models.layers import trunc_normal_ +import pointops + +from pointcept.models.builder import MODELS, build_model +from pointcept.models.utils import offset2batch +from pointcept.utils.comm import get_world_size + + +@MODELS.register_module("MSC-v1m1") +class MaskedSceneContrast(nn.Module): + def __init__( + self, + backbone, + backbone_in_channels, + backbone_out_channels, + mask_grid_size=0.1, + mask_rate=0.4, + view1_mix_prob=0, + view2_mix_prob=0, + matching_max_k=8, + matching_max_radius=0.03, + matching_max_pair=8192, + nce_t=0.4, + contrast_weight=1, + reconstruct_weight=1, + reconstruct_color=True, + reconstruct_normal=True, + ): + super().__init__() + self.backbone = build_model(backbone) + self.mask_grid_size = mask_grid_size + self.mask_rate = mask_rate + self.view1_mix_prob = view1_mix_prob + self.view2_mix_prob = view2_mix_prob + self.matching_max_k = matching_max_k + self.matching_max_radius = matching_max_radius + self.matching_max_pair = matching_max_pair + self.nce_t = nce_t + self.contrast_weight = contrast_weight + self.reconstruct_weight = reconstruct_weight + self.reconstruct_color = reconstruct_color + self.reconstruct_normal = reconstruct_normal + + self.mask_token = nn.Parameter(torch.zeros(1, backbone_in_channels)) + trunc_normal_(self.mask_token, mean=0.0, std=0.02) + self.color_head = ( + nn.Linear(backbone_out_channels, 3) if reconstruct_color else None + ) + self.normal_head = ( + nn.Linear(backbone_out_channels, 3) if reconstruct_normal else None + ) + self.nce_criteria = torch.nn.CrossEntropyLoss(reduction="mean") + + @torch.no_grad() + def generate_cross_masks( + self, view1_origin_coord, view1_offset, view2_origin_coord, view2_offset + ): + # union origin coord + view1_batch = offset2batch(view1_offset) + view2_batch = offset2batch(view2_offset) + + view1_batch_count = view1_batch.bincount() + view2_batch_count = view2_batch.bincount() + view1_origin_coord_split = view1_origin_coord.split(list(view1_batch_count)) + view2_origin_coord_split = view2_origin_coord.split(list(view2_batch_count)) + union_origin_coord = torch.cat( + list( + chain.from_iterable( + zip(view1_origin_coord_split, view2_origin_coord_split) + ) + ) + ) + union_offset = torch.cat( + [view1_offset.unsqueeze(-1), view2_offset.unsqueeze(-1)], dim=-1 + ).sum(-1) + union_batch = offset2batch(union_offset) + + # grid partition + mask_patch_coord = union_origin_coord.div(self.mask_grid_size) + mask_patch_grid_coord = torch.floor(mask_patch_coord) + mask_patch_cluster = voxel_grid( + pos=mask_patch_grid_coord, size=1, batch=union_batch, start=0 + ) + unique, cluster, counts = torch.unique( + mask_patch_cluster, sorted=True, return_inverse=True, return_counts=True + ) + patch_num = unique.shape[0] + patch_max_point = counts.max().item() + patch2point_map = cluster.new_zeros(patch_num, patch_max_point) + patch2point_mask = torch.lt( + torch.arange(patch_max_point).cuda().unsqueeze(0), counts.unsqueeze(-1) + ) + sorted_cluster_value, sorted_cluster_indices = torch.sort(cluster) + patch2point_map[patch2point_mask] = sorted_cluster_indices + + # generate cross masks + assert self.mask_rate <= 0.5 + patch_mask = torch.zeros(patch_num, device=union_origin_coord.device).int() + rand_perm = torch.randperm(patch_num) + mask_patch_num = int(patch_num * self.mask_rate) + + # mask1 tag with 1, mask2 tag with 2 + patch_mask[rand_perm[0:mask_patch_num]] = 1 + patch_mask[rand_perm[mask_patch_num : mask_patch_num * 2]] = 2 + point_mask = torch.zeros( + union_origin_coord.shape[0], device=union_origin_coord.device + ).int() + point_mask[ + patch2point_map[patch_mask == 1][patch2point_mask[patch_mask == 1]] + ] = 1 + point_mask[ + patch2point_map[patch_mask == 2][patch2point_mask[patch_mask == 2]] + ] = 2 + + # separate mask to view1 and view2 + point_mask_split = point_mask.split( + list( + torch.cat( + [view1_batch_count.unsqueeze(-1), view2_batch_count.unsqueeze(-1)], + dim=-1, + ).flatten() + ) + ) + view1_point_mask = torch.cat(point_mask_split[0::2]) == 1 + view2_point_mask = torch.cat(point_mask_split[1::2]) == 2 + return view1_point_mask, view2_point_mask + + @torch.no_grad() + def match_contrastive_pair( + self, view1_coord, view1_offset, view2_coord, view2_offset, max_k, max_radius + ): + index, distance = pointops.knn_query( + max_k, + view2_coord.float(), + view2_offset.int(), + view1_coord.float(), + view1_offset.int(), + ) + index = torch.cat( + [ + torch.arange(index.shape[0], device=index.device, dtype=torch.long) + .view(-1, 1, 1) + .expand(-1, max_k, 1), + index.view(-1, max_k, 1), + ], + dim=-1, + )[distance.squeeze(-1) < max_radius] + unique, count = index[:, 0].unique(return_counts=True) + select = ( + torch.cumsum(count, dim=0) + - torch.randint(count.max(), count.shape, device=count.device) % count + - 1 + ) + index = index[select] + if index.shape[0] > self.matching_max_pair: + index = index[torch.randperm(index.shape[0])[: self.matching_max_pair]] + return index + + def compute_contrastive_loss( + self, view1_feat, view1_offset, view2_feat, view2_offset, match_index + ): + assert view1_offset.shape == view2_offset.shape + + view1_feat = view1_feat[match_index[:, 0]] + view2_feat = view2_feat[match_index[:, 1]] + view1_feat = view1_feat / ( + torch.norm(view1_feat, p=2, dim=1, keepdim=True) + 1e-7 + ) + view2_feat = view2_feat / ( + torch.norm(view2_feat, p=2, dim=1, keepdim=True) + 1e-7 + ) + sim = torch.mm(view1_feat, view2_feat.transpose(1, 0)) + + with torch.no_grad(): + pos_sim = torch.diagonal(sim).mean() + neg_sim = sim.mean(dim=-1).mean() - pos_sim / match_index.shape[0] + labels = torch.arange(sim.shape[0], device=view1_feat.device).long() + loss = self.nce_criteria(torch.div(sim, self.nce_t), labels) + + if get_world_size() > 1: + dist.all_reduce(loss) + dist.all_reduce(pos_sim) + dist.all_reduce(neg_sim) + return ( + loss / get_world_size(), + pos_sim / get_world_size(), + neg_sim / get_world_size(), + ) + + def forward(self, data_dict): + view1_origin_coord = data_dict["view1_origin_coord"] + view1_coord = data_dict["view1_coord"] + view1_feat = data_dict["view1_feat"] + view1_offset = data_dict["view1_offset"].int() + + view2_origin_coord = data_dict["view2_origin_coord"] + view2_coord = data_dict["view2_coord"] + view2_feat = data_dict["view2_feat"] + view2_offset = data_dict["view2_offset"].int() + + # mask generation by union original coord (without spatial augmentation) + view1_point_mask, view2_point_mask = self.generate_cross_masks( + view1_origin_coord, view1_offset, view2_origin_coord, view2_offset + ) + + view1_mask_tokens = self.mask_token.expand(view1_coord.shape[0], -1) + view1_weight = view1_point_mask.unsqueeze(-1).type_as(view1_mask_tokens) + view1_feat = view1_feat * (1 - view1_weight) + view1_mask_tokens * view1_weight + + view2_mask_tokens = self.mask_token.expand(view2_coord.shape[0], -1) + view2_weight = view2_point_mask.unsqueeze(-1).type_as(view2_mask_tokens) + view2_feat = view2_feat * (1 - view2_weight) + view2_mask_tokens * view2_weight + + view1_data_dict = dict( + origin_coord=view1_origin_coord, + coord=view1_coord, + feat=view1_feat, + offset=view1_offset, + ) + view2_data_dict = dict( + origin_coord=view2_origin_coord, + coord=view2_coord, + feat=view2_feat, + offset=view2_offset, + ) + + # SparseConv based method need grid coord + if "view1_grid_coord" in data_dict.keys(): + view1_data_dict["grid_coord"] = data_dict["view1_grid_coord"] + if "view2_grid_coord" in data_dict.keys(): + view2_data_dict["grid_coord"] = data_dict["view2_grid_coord"] + + # view mixing strategy + if random.random() < self.view1_mix_prob: + view1_data_dict["offset"] = torch.cat( + [view1_offset[1:-1:2], view1_offset[-1].unsqueeze(0)], dim=0 + ) + if random.random() < self.view2_mix_prob: + view2_data_dict["offset"] = torch.cat( + [view2_offset[1:-1:2], view2_offset[-1].unsqueeze(0)], dim=0 + ) + + view1_feat = self.backbone(view1_data_dict) + view2_feat = self.backbone(view2_data_dict) + match_index = self.match_contrastive_pair( + view1_origin_coord, + view1_offset, + view2_origin_coord, + view2_offset, + max_k=self.matching_max_k, + max_radius=self.matching_max_radius, + ) + nce_loss, pos_sim, neg_sim = self.compute_contrastive_loss( + view1_feat, view1_offset, view2_feat, view2_offset, match_index + ) + loss = nce_loss * self.contrast_weight + result_dict = dict(nce_loss=nce_loss, pos_sim=pos_sim, neg_sim=neg_sim) + + if self.color_head is not None: + assert "view1_color" in data_dict.keys() + assert "view2_color" in data_dict.keys() + view1_color = data_dict["view1_color"] + view2_color = data_dict["view2_color"] + view1_color_pred = self.color_head(view1_feat[view1_point_mask]) + view2_color_pred = self.color_head(view2_feat[view2_point_mask]) + color_loss = ( + torch.sum((view1_color_pred - view1_color[view1_point_mask]) ** 2) + + torch.sum((view2_color_pred - view2_color[view2_point_mask]) ** 2) + ) / (view1_color_pred.shape[0] + view2_color_pred.shape[0]) + loss = loss + color_loss * self.reconstruct_weight + result_dict["color_loss"] = color_loss + + if self.normal_head is not None: + assert "view1_normal" in data_dict.keys() + assert "view2_normal" in data_dict.keys() + view1_normal = data_dict["view1_normal"] + view2_normal = data_dict["view2_normal"] + view1_normal_pred = self.normal_head(view1_feat[view1_point_mask]) + view2_normal_pred = self.normal_head(view2_feat[view2_point_mask]) + + view1_normal_pred = view1_normal_pred / ( + torch.norm(view1_normal_pred, p=2, dim=1, keepdim=True) + 1e-10 + ) + view2_normal_pred = view2_normal_pred / ( + torch.norm(view2_normal_pred, p=2, dim=1, keepdim=True) + 1e-10 + ) + normal_loss = ( + torch.sum(view1_normal_pred * view1_normal[view1_point_mask]) + + torch.sum(view2_normal_pred * view2_normal[view2_point_mask]) + ) / (view1_normal_pred.shape[0] + view2_normal_pred.shape[0]) + loss = loss + normal_loss * self.reconstruct_weight + result_dict["normal_loss"] = normal_loss + + result_dict["loss"] = loss + return result_dict diff --git a/Pointcept/pointcept/models/masked_scene_contrast/masked_scene_contrast_v1m2_csc.py b/Pointcept/pointcept/models/masked_scene_contrast/masked_scene_contrast_v1m2_csc.py new file mode 100644 index 0000000000000000000000000000000000000000..139e26b89d88bbe8711fb9a162b5347d846fd399 --- /dev/null +++ b/Pointcept/pointcept/models/masked_scene_contrast/masked_scene_contrast_v1m2_csc.py @@ -0,0 +1,377 @@ +""" +Masked Scene Contrast v1m2 +contrastive learning backend with CSC (https://arxiv.org/abs/2012.09165) + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Chengyao Wang (cywang22@cse.cuhk.edu.hk) +Please cite our work if the code is helpful to you. +""" + +import random +from itertools import chain +import torch +import torch.nn as nn +import torch.distributed as dist +from torch_geometric.nn.pool import voxel_grid + +from timm.models.layers import trunc_normal_ +import pointops + +from pointcept.models.builder import MODELS, build_model +from pointcept.models.utils import offset2batch +from pointcept.utils.comm import get_world_size + + +@MODELS.register_module("MSC-v1m2") +class MaskedSceneContrast(nn.Module): + def __init__( + self, + backbone, + backbone_in_channels, + backbone_out_channels, + mask_grid_size=0.1, + mask_rate=0.4, + view1_mix_prob=0, + view2_mix_prob=0, + matching_max_k=8, + matching_max_radius=0.03, + matching_max_pair=8192, + nce_t=0.4, + contrast_weight=1, + reconstruct_weight=1, + reconstruct_color=True, + reconstruct_normal=True, + partitions=4, + r1=0.125, + r2=2, + ): + super().__init__() + self.backbone = build_model(backbone) + self.mask_grid_size = mask_grid_size + self.mask_rate = mask_rate + self.view1_mix_prob = view1_mix_prob + self.view2_mix_prob = view2_mix_prob + self.matching_max_k = matching_max_k + self.matching_max_radius = matching_max_radius + self.matching_max_pair = matching_max_pair + self.nce_t = nce_t + self.contrast_weight = contrast_weight + self.reconstruct_weight = reconstruct_weight + self.reconstruct_color = reconstruct_color + self.reconstruct_normal = reconstruct_normal + + # csc partition + self.partitions = partitions + self.r1 = r1 + self.r2 = r2 + + self.mask_token = nn.Parameter(torch.zeros(1, backbone_in_channels)) + trunc_normal_(self.mask_token, mean=0.0, std=0.02) + self.color_head = ( + nn.Linear(backbone_out_channels, 3) if reconstruct_color else None + ) + self.normal_head = ( + nn.Linear(backbone_out_channels, 3) if reconstruct_normal else None + ) + self.nce_criteria = torch.nn.CrossEntropyLoss(reduction="mean") + + @torch.no_grad() + def generate_cross_masks( + self, view1_origin_coord, view1_offset, view2_origin_coord, view2_offset + ): + # union origin coord + view1_batch = offset2batch(view1_offset) + view2_batch = offset2batch(view2_offset) + + view1_batch_count = view1_batch.bincount() + view2_batch_count = view2_batch.bincount() + view1_origin_coord_split = view1_origin_coord.split(list(view1_batch_count)) + view2_origin_coord_split = view2_origin_coord.split(list(view2_batch_count)) + union_origin_coord = torch.cat( + list( + chain.from_iterable( + zip(view1_origin_coord_split, view2_origin_coord_split) + ) + ) + ) + union_offset = torch.cat( + [view1_offset.unsqueeze(-1), view2_offset.unsqueeze(-1)], dim=-1 + ).sum(-1) + union_batch = offset2batch(union_offset) + + # grid partition + mask_patch_coord = union_origin_coord.div(self.mask_grid_size) + mask_patch_grid_coord = torch.floor(mask_patch_coord) + mask_patch_cluster = voxel_grid( + pos=mask_patch_grid_coord, size=1, batch=union_batch, start=0 + ) + unique, cluster, counts = torch.unique( + mask_patch_cluster, sorted=True, return_inverse=True, return_counts=True + ) + patch_num = unique.shape[0] + patch_max_point = counts.max().item() + patch2point_map = cluster.new_zeros(patch_num, patch_max_point) + patch2point_mask = torch.lt( + torch.arange(patch_max_point).cuda().unsqueeze(0), counts.unsqueeze(-1) + ) + sorted_cluster_value, sorted_cluster_indices = torch.sort(cluster) + patch2point_map[patch2point_mask] = sorted_cluster_indices + + # generate cross masks + assert self.mask_rate <= 0.5 + patch_mask = torch.zeros(patch_num, device=union_origin_coord.device).int() + rand_perm = torch.randperm(patch_num) + mask_patch_num = int(patch_num * self.mask_rate) + + # mask1 tag with 1, mask2 tag with 2 + patch_mask[rand_perm[0:mask_patch_num]] = 1 + patch_mask[rand_perm[mask_patch_num : mask_patch_num * 2]] = 2 + point_mask = torch.zeros( + union_origin_coord.shape[0], device=union_origin_coord.device + ).int() + point_mask[ + patch2point_map[patch_mask == 1][patch2point_mask[patch_mask == 1]] + ] = 1 + point_mask[ + patch2point_map[patch_mask == 2][patch2point_mask[patch_mask == 2]] + ] = 2 + + # separate mask to view1 and view2 + point_mask_split = point_mask.split( + list( + torch.cat( + [view1_batch_count.unsqueeze(-1), view2_batch_count.unsqueeze(-1)], + dim=-1, + ).flatten() + ) + ) + view1_point_mask = torch.cat(point_mask_split[0::2]) == 1 + view2_point_mask = torch.cat(point_mask_split[1::2]) == 2 + return view1_point_mask, view2_point_mask + + @torch.no_grad() + def match_contrastive_pair( + self, view1_coord, view1_offset, view2_coord, view2_offset, max_k, max_radius + ): + index, distance = pointops.knn_query( + max_k, + view2_coord.float(), + view2_offset.int(), + view1_coord.float(), + view1_offset.int(), + ) + index = torch.cat( + [ + torch.arange(index.shape[0], device=index.device, dtype=torch.long) + .view(-1, 1, 1) + .expand(-1, max_k, 1), + index.view(-1, max_k, 1), + ], + dim=-1, + )[distance.squeeze(-1) < max_radius] + unique, count = index[:, 0].unique(return_counts=True) + select = ( + torch.cumsum(count, dim=0) + - torch.randint(count.max(), count.shape, device=count.device) % count + - 1 + ) + index = index[select] + if index.shape[0] > self.matching_max_pair: + index = index[torch.randperm(index.shape[0])[: self.matching_max_pair]] + return index + + def compute_partitions(self, coord1, coord2): + partition_matrix = torch.zeros((coord1.shape[0], coord2.shape[0])) + partition_matrix = partition_matrix.cuda() - 1e7 + + rel_trans = coord1.unsqueeze(0) - coord2.unsqueeze(1) + mask_up = rel_trans[:, :, 2] > 0.0 + mask_down = rel_trans[:, :, 2] < 0.0 + + distance_matrix = torch.sqrt(torch.sum(rel_trans.pow(2), 2).add(1e-7)) + + mask = (distance_matrix[:, :] > self.r1) & (distance_matrix[:, :] <= self.r2) + partition_matrix[mask & mask_up] = 0 + partition_matrix[mask & mask_down] = 1 + + mask = distance_matrix[:, :] > self.r2 + partition_matrix[mask & mask_up] = 2 + partition_matrix[mask & mask_down] = 3 + + return partition_matrix + + def compute_contrastive_loss( + self, + view1_feat, + view1_coord, + view1_offset, + view2_feat, + view2_coord, + view2_offset, + match_index, + ): + assert view1_offset.shape == view2_offset.shape + device = view1_feat.device + loss = torch.tensor(0.0, device=device) + pos_sim = torch.tensor(0.0, device=device) + neg_sim = torch.tensor(0.0, device=device) + large_num = 1e9 + + view1_feat = view1_feat[match_index[:, 0]] + view2_feat = view2_feat[match_index[:, 1]] + view1_feat = view1_feat / ( + torch.norm(view1_feat, p=2, dim=1, keepdim=True) + 1e-7 + ) + view2_feat = view2_feat / ( + torch.norm(view2_feat, p=2, dim=1, keepdim=True) + 1e-7 + ) + + view1_coord = view1_coord[match_index[:, 0]] + view2_coord = view2_coord[match_index[:, 1]] + + batch = offset2batch(view1_offset)[match_index[:, 0]] + for batch_id in batch.unique(): + batch_mask = batch == batch_id + sim = torch.mm(view1_feat[batch_mask], view2_feat[batch_mask].T) + + with torch.no_grad(): + pos_sim += torch.diagonal(sim).mean() + neg_sim += sim.mean(dim=-1).mean() - pos_sim / batch_mask.sum() + + labels = torch.arange(sim.shape[0], device=view1_feat.device).long() + part = self.compute_partitions( + view1_coord[batch_mask], view2_coord[batch_mask] + ) + for part_id in part.unique(): + part_mask = part == part_id + part_mask.fill_diagonal_(True) + loss += self.nce_criteria( + torch.div(sim, self.nce_t) - large_num * (~part_mask).float(), + labels, + ) + + loss /= len(view1_offset) * self.partitions + pos_sim /= len(view1_offset) + neg_sim /= len(view1_offset) + + if get_world_size() > 1: + dist.all_reduce(loss) + dist.all_reduce(pos_sim) + dist.all_reduce(neg_sim) + return ( + loss / get_world_size(), + pos_sim / get_world_size(), + neg_sim / get_world_size(), + ) + + def forward(self, data_dict): + view1_origin_coord = data_dict["view1_origin_coord"] + view1_coord = data_dict["view1_coord"] + view1_feat = data_dict["view1_feat"] + view1_offset = data_dict["view1_offset"].int() + + view2_origin_coord = data_dict["view2_origin_coord"] + view2_coord = data_dict["view2_coord"] + view2_feat = data_dict["view2_feat"] + view2_offset = data_dict["view2_offset"].int() + + # mask generation by union original coord (without spatial augmentation) + view1_point_mask, view2_point_mask = self.generate_cross_masks( + view1_origin_coord, view1_offset, view2_origin_coord, view2_offset + ) + + view1_mask_tokens = self.mask_token.expand(view1_coord.shape[0], -1) + view1_weight = view1_point_mask.unsqueeze(-1).type_as(view1_mask_tokens) + view1_feat = view1_feat * (1 - view1_weight) + view1_mask_tokens * view1_weight + + view2_mask_tokens = self.mask_token.expand(view2_coord.shape[0], -1) + view2_weight = view2_point_mask.unsqueeze(-1).type_as(view2_mask_tokens) + view2_feat = view2_feat * (1 - view2_weight) + view2_mask_tokens * view2_weight + + view1_data_dict = dict( + origin_coord=view1_origin_coord, + coord=view1_coord, + feat=view1_feat, + offset=view1_offset, + ) + view2_data_dict = dict( + origin_coord=view2_origin_coord, + coord=view2_coord, + feat=view2_feat, + offset=view2_offset, + ) + + # SparseConv based method need grid coord + if "view1_grid_coord" in data_dict.keys(): + view1_data_dict["grid_coord"] = data_dict["view1_grid_coord"] + if "view2_grid_coord" in data_dict.keys(): + view2_data_dict["grid_coord"] = data_dict["view2_grid_coord"] + + # view mixing strategy + if random.random() < self.view1_mix_prob: + view1_data_dict["offset"] = torch.cat( + [view1_offset[1:-1:2], view1_offset[-1].unsqueeze(0)], dim=0 + ) + if random.random() < self.view2_mix_prob: + view2_data_dict["offset"] = torch.cat( + [view2_offset[1:-1:2], view2_offset[-1].unsqueeze(0)], dim=0 + ) + + view1_feat = self.backbone(view1_data_dict) + view2_feat = self.backbone(view2_data_dict) + match_index = self.match_contrastive_pair( + view1_origin_coord, + view1_offset, + view2_origin_coord, + view2_offset, + max_k=self.matching_max_k, + max_radius=self.matching_max_radius, + ) + nce_loss, pos_sim, neg_sim = self.compute_contrastive_loss( + view1_feat, + view1_origin_coord, + view1_offset, + view2_feat, + view2_origin_coord, + view2_offset, + match_index, + ) + loss = nce_loss * self.contrast_weight + result_dict = dict(nce_loss=nce_loss, pos_sim=pos_sim, neg_sim=neg_sim) + + if self.color_head is not None: + assert "view1_color" in data_dict.keys() + assert "view2_color" in data_dict.keys() + view1_color = data_dict["view1_color"] + view2_color = data_dict["view2_color"] + view1_color_pred = self.color_head(view1_feat[view1_point_mask]) + view2_color_pred = self.color_head(view2_feat[view2_point_mask]) + color_loss = ( + torch.sum((view1_color_pred - view1_color[view1_point_mask]) ** 2) + + torch.sum((view2_color_pred - view2_color[view2_point_mask]) ** 2) + ) / (view1_color_pred.shape[0] + view2_color_pred.shape[0]) + loss = loss + color_loss * self.reconstruct_weight + result_dict["color_loss"] = color_loss + + if self.normal_head is not None: + assert "view1_normal" in data_dict.keys() + assert "view2_normal" in data_dict.keys() + view1_normal = data_dict["view1_normal"] + view2_normal = data_dict["view2_normal"] + view1_normal_pred = self.normal_head(view1_feat[view1_point_mask]) + view2_normal_pred = self.normal_head(view2_feat[view2_point_mask]) + + view1_normal_pred = view1_normal_pred / ( + torch.norm(view1_normal_pred, p=2, dim=1, keepdim=True) + 1e-10 + ) + view2_normal_pred = view2_normal_pred / ( + torch.norm(view2_normal_pred, p=2, dim=1, keepdim=True) + 1e-10 + ) + normal_loss = ( + torch.sum(view1_normal_pred * view1_normal[view1_point_mask]) + + torch.sum(view2_normal_pred * view2_normal[view2_point_mask]) + ) / (view1_normal_pred.shape[0] + view2_normal_pred.shape[0]) + loss = loss + normal_loss * self.reconstruct_weight + result_dict["normal_loss"] = normal_loss + + result_dict["loss"] = loss + return result_dict diff --git a/Pointcept/pointcept/models/modules.py b/Pointcept/pointcept/models/modules.py new file mode 100644 index 0000000000000000000000000000000000000000..8a737ae9e52620ffc4eb139a81e0bdd46cded0ed --- /dev/null +++ b/Pointcept/pointcept/models/modules.py @@ -0,0 +1,83 @@ +import sys +import torch.nn as nn +import spconv.pytorch as spconv +from collections import OrderedDict +from pointcept.models.utils.structure import Point + + +class PointModule(nn.Module): + r"""PointModule + placeholder, all module subclass from this will take Point in PointSequential. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + +class PointSequential(PointModule): + r"""A sequential container. + Modules will be added to it in the order they are passed in the constructor. + Alternatively, an ordered dict of modules can also be passed in. + """ + + def __init__(self, *args, **kwargs): + super().__init__() + if len(args) == 1 and isinstance(args[0], OrderedDict): + for key, module in args[0].items(): + self.add_module(key, module) + else: + for idx, module in enumerate(args): + self.add_module(str(idx), module) + for name, module in kwargs.items(): + if sys.version_info < (3, 6): + raise ValueError("kwargs only supported in py36+") + if name in self._modules: + raise ValueError("name exists.") + self.add_module(name, module) + + def __getitem__(self, idx): + if not (-len(self) <= idx < len(self)): + raise IndexError("index {} is out of range".format(idx)) + if idx < 0: + idx += len(self) + it = iter(self._modules.values()) + for i in range(idx): + next(it) + return next(it) + + def __len__(self): + return len(self._modules) + + def add(self, module, name=None): + if name is None: + name = str(len(self._modules)) + if name in self._modules: + raise KeyError("name exists") + self.add_module(name, module) + + def forward(self, input): + for k, module in self._modules.items(): + # Point module + if isinstance(module, PointModule): + input = module(input) + # Spconv module + elif spconv.modules.is_spconv_module(module): + if isinstance(input, Point): + input.sparse_conv_feat = module(input.sparse_conv_feat) + input.feat = input.sparse_conv_feat.features + else: + input = module(input) + # PyTorch module + else: + if isinstance(input, Point): + input.feat = module(input.feat) + if "sparse_conv_feat" in input.keys(): + input.sparse_conv_feat = input.sparse_conv_feat.replace_feature( + input.feat + ) + elif isinstance(input, spconv.SparseConvTensor): + if input.indices.shape[0] != 0: + input = input.replace_feature(module(input.features)) + else: + input = module(input) + return input diff --git a/Pointcept/pointcept/models/oacnns/__init__.py b/Pointcept/pointcept/models/oacnns/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..654b767080457f3814142159a121a2bef9c682b7 --- /dev/null +++ b/Pointcept/pointcept/models/oacnns/__init__.py @@ -0,0 +1 @@ +from .oacnns_v1m1_base import OACNNs diff --git a/Pointcept/pointcept/models/oacnns/oacnns_v1m1_base.py b/Pointcept/pointcept/models/oacnns/oacnns_v1m1_base.py new file mode 100644 index 0000000000000000000000000000000000000000..bd8ee6d25ed152f04ab8b3905bbd8c2bdf127d06 --- /dev/null +++ b/Pointcept/pointcept/models/oacnns/oacnns_v1m1_base.py @@ -0,0 +1,345 @@ +from functools import partial +import torch +import torch.nn as nn +from einops import rearrange +import spconv.pytorch as spconv +from timm.models.layers import trunc_normal_ +from ..builder import MODELS +from ..utils import offset2batch +from torch_geometric.nn.pool import voxel_grid +from torch_geometric.utils import scatter + + +class BasicBlock(nn.Module): + def __init__( + self, + in_channels, + embed_channels, + norm_fn=None, + indice_key=None, + depth=4, + groups=None, + grid_size=None, + bias=False, + ): + super().__init__() + assert embed_channels % groups == 0 + self.groups = groups + self.embed_channels = embed_channels + self.proj = nn.ModuleList() + self.grid_size = grid_size + self.weight = nn.ModuleList() + self.l_w = nn.ModuleList() + self.proj.append( + nn.Sequential( + nn.Linear(embed_channels, embed_channels, bias=False), + norm_fn(embed_channels), + nn.ReLU(), + ) + ) + for _ in range(depth - 1): + self.proj.append( + nn.Sequential( + nn.Linear(embed_channels, embed_channels, bias=False), + norm_fn(embed_channels), + nn.ReLU(), + ) + ) + self.l_w.append( + nn.Sequential( + nn.Linear(embed_channels, embed_channels, bias=False), + norm_fn(embed_channels), + nn.ReLU(), + ) + ) + self.weight.append(nn.Linear(embed_channels, embed_channels, bias=False)) + + self.adaptive = nn.Linear(embed_channels, depth - 1, bias=False) + self.fuse = nn.Sequential( + nn.Linear(embed_channels * 2, embed_channels, bias=False), + norm_fn(embed_channels), + nn.ReLU(), + ) + self.voxel_block = spconv.SparseSequential( + spconv.SubMConv3d( + embed_channels, + embed_channels, + kernel_size=3, + stride=1, + padding=1, + indice_key=indice_key, + bias=bias, + ), + norm_fn(embed_channels), + nn.ReLU(), + spconv.SubMConv3d( + embed_channels, + embed_channels, + kernel_size=3, + stride=1, + padding=1, + indice_key=indice_key, + bias=bias, + ), + norm_fn(embed_channels), + ) + self.act = nn.ReLU() + + def forward(self, x, clusters): + feat = x.features + feats = [] + for i, cluster in enumerate(clusters): + pw = self.l_w[i](feat) + pw = pw - scatter(pw, cluster, reduce="mean")[cluster] + pw = self.weight[i](pw) + pw = torch.exp(pw - pw.max()) + pw = pw / (scatter(pw, cluster, reduce="sum", dim=0)[cluster] + 1e-6) + pfeat = self.proj[i](feat) * pw + pfeat = scatter(pfeat, cluster, reduce="sum")[cluster] + feats.append(pfeat) + adp = self.adaptive(feat) + adp = torch.softmax(adp, dim=1) + feats = torch.stack(feats, dim=1) + feats = torch.einsum("l n, l n c -> l c", adp, feats) + feat = self.proj[-1](feat) + feat = torch.cat([feat, feats], dim=1) + feat = self.fuse(feat) + x.features + res = feat + x = x.replace_feature(feat) + x = self.voxel_block(x) + x = x.replace_feature(self.act(x.features + res)) + return x + + +class DonwBlock(nn.Module): + def __init__( + self, + in_channels, + embed_channels, + depth, + sp_indice_key, + point_grid_size, + num_ref=16, + groups=None, + norm_fn=None, + sub_indice_key=None, + ): + super().__init__() + self.num_ref = num_ref + self.depth = depth + self.point_grid_size = point_grid_size + self.down = spconv.SparseSequential( + spconv.SparseConv3d( + in_channels, + embed_channels, + kernel_size=2, + stride=2, + indice_key=sp_indice_key, + bias=False, + ), + norm_fn(embed_channels), + nn.ReLU(), + ) + self.blocks = nn.ModuleList() + for _ in range(depth): + self.blocks.append( + BasicBlock( + in_channels=embed_channels, + embed_channels=embed_channels, + depth=len(point_grid_size) + 1, + groups=groups, + grid_size=point_grid_size, + norm_fn=norm_fn, + indice_key=sub_indice_key, + ) + ) + + def forward(self, x): + x = self.down(x) + coord = x.indices[:, 1:].float() + batch = x.indices[:, 0] + clusters = [] + for grid_size in self.point_grid_size: + cluster = voxel_grid(pos=coord, size=grid_size, batch=batch) + _, cluster = torch.unique(cluster, return_inverse=True) + clusters.append(cluster) + for block in self.blocks: + x = block(x, clusters) + return x + + +class UpBlock(nn.Module): + def __init__( + self, + in_channels, + skip_channels, + embed_channels, + depth, + sp_indice_key, + norm_fn=None, + down_ratio=2, + sub_indice_key=None, + ): + super().__init__() + assert depth > 0 + self.up = spconv.SparseSequential( + spconv.SparseInverseConv3d( + in_channels, + embed_channels, + kernel_size=down_ratio, + indice_key=sp_indice_key, + bias=False, + ), + norm_fn(embed_channels), + nn.ReLU(), + ) + self.blocks = nn.ModuleList() + self.fuse = nn.Sequential( + nn.Linear(skip_channels + embed_channels, embed_channels), + norm_fn(embed_channels), + nn.ReLU(), + nn.Linear(embed_channels, embed_channels), + norm_fn(embed_channels), + nn.ReLU(), + ) + + def forward(self, x, skip_x): + x = self.up(x) + x = x.replace_feature( + self.fuse(torch.cat([x.features, skip_x.features], dim=1)) + x.features + ) + return x + + +@MODELS.register_module() +class OACNNs(nn.Module): + def __init__( + self, + in_channels, + num_classes, + embed_channels=64, + enc_num_ref=[16, 16, 16, 16], + enc_channels=[64, 64, 128, 256], + groups=[2, 4, 8, 16], + enc_depth=[2, 3, 6, 4], + down_ratio=[2, 2, 2, 2], + dec_channels=[96, 96, 128, 256], + point_grid_size=[[16, 32, 64], [8, 16, 24], [4, 8, 12], [2, 4, 6]], + dec_depth=[2, 2, 2, 2], + ): + super().__init__() + self.in_channels = in_channels + self.num_classes = num_classes + self.num_stages = len(enc_channels) + self.embed_channels = embed_channels + norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01) + + self.stem = spconv.SparseSequential( + spconv.SubMConv3d( + in_channels, + embed_channels, + kernel_size=3, + padding=1, + indice_key="stem", + bias=False, + ), + norm_fn(embed_channels), + nn.ReLU(), + spconv.SubMConv3d( + embed_channels, + embed_channels, + kernel_size=3, + padding=1, + indice_key="stem", + bias=False, + ), + norm_fn(embed_channels), + nn.ReLU(), + spconv.SubMConv3d( + embed_channels, + embed_channels, + kernel_size=3, + padding=1, + indice_key="stem", + bias=False, + ), + norm_fn(embed_channels), + nn.ReLU(), + ) + + self.enc = nn.ModuleList() + self.dec = nn.ModuleList() + for i in range(self.num_stages): + self.enc.append( + DonwBlock( + in_channels=embed_channels if i == 0 else enc_channels[i - 1], + embed_channels=enc_channels[i], + depth=enc_depth[i], + norm_fn=norm_fn, + groups=groups[i], + point_grid_size=point_grid_size[i], + num_ref=enc_num_ref[i], + sp_indice_key=f"spconv{i}", + sub_indice_key=f"subm{i + 1}", + ) + ) + self.dec.append( + UpBlock( + in_channels=( + enc_channels[-1] + if i == self.num_stages - 1 + else dec_channels[i + 1] + ), + skip_channels=embed_channels if i == 0 else enc_channels[i - 1], + embed_channels=dec_channels[i], + depth=dec_depth[i], + norm_fn=norm_fn, + sp_indice_key=f"spconv{i}", + sub_indice_key=f"subm{i}", + ) + ) + + self.final = spconv.SubMConv3d(dec_channels[0], num_classes, kernel_size=1) + self.apply(self._init_weights) + + def forward(self, input_dict): + discrete_coord = input_dict["grid_coord"] + feat = input_dict["feat"] + offset = input_dict["offset"] + batch = offset2batch(offset) + x = spconv.SparseConvTensor( + features=feat, + indices=torch.cat([batch.unsqueeze(-1), discrete_coord], dim=1) + .int() + .contiguous(), + spatial_shape=torch.add( + torch.max(discrete_coord, dim=0).values, 1 + ).tolist(), + batch_size=batch[-1].tolist() + 1, + ) + + x = self.stem(x) + skips = [x] + for i in range(self.num_stages): + x = self.enc[i](x) + skips.append(x) + x = skips.pop(-1) + for i in reversed(range(self.num_stages)): + skip = skips.pop(-1) + x = self.dec[i](x, skip) + x = self.final(x) + return x.features + + @staticmethod + def _init_weights(m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, spconv.SubMConv3d): + trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) diff --git a/Pointcept/pointcept/models/octformer/__init__.py b/Pointcept/pointcept/models/octformer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..06bea370d10808b0bad2b68189e957765fc46081 --- /dev/null +++ b/Pointcept/pointcept/models/octformer/__init__.py @@ -0,0 +1 @@ +from .octformer_v1m1_base import OctFormer diff --git a/Pointcept/pointcept/models/octformer/octformer_v1m1_base.py b/Pointcept/pointcept/models/octformer/octformer_v1m1_base.py new file mode 100644 index 0000000000000000000000000000000000000000..7c0faf700126b0d589867811e871025dc9782b76 --- /dev/null +++ b/Pointcept/pointcept/models/octformer/octformer_v1m1_base.py @@ -0,0 +1,629 @@ +""" +Octree Transformer + +Modified from https://github.com/octree-nn/octformer + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from typing import Optional, List, Dict +import torch +import torch.nn as nn +from torch.utils.checkpoint import checkpoint + +try: + import ocnn + from ocnn.octree import Octree, Points +except ImportError: + from pointcept.utils.misc import DummyClass + + ocnn = None + Octree = DummyClass + Points = DummyClass + +try: + import dwconv +except ImportError: + dwconv = None + +from pointcept.models.builder import MODELS +from pointcept.models.utils import offset2batch + + +class OctreeT(Octree): + def __init__( + self, + octree: Octree, + patch_size: int = 24, + dilation: int = 4, + nempty: bool = True, + max_depth: Optional[int] = None, + start_depth: Optional[int] = None, + **kwargs + ): + super().__init__(octree.depth, octree.full_depth) + self.__dict__.update(octree.__dict__) + + self.patch_size = patch_size + self.dilation = dilation + self.nempty = nempty + self.max_depth = max_depth or self.depth + self.start_depth = start_depth or self.full_depth + self.invalid_mask_value = -1e3 + assert self.start_depth > 1 + + self.block_num = patch_size * dilation + self.nnum_t = self.nnum_nempty if nempty else self.nnum + self.nnum_a = ((self.nnum_t / self.block_num).ceil() * self.block_num).int() + + num = self.max_depth + 1 + self.batch_idx = [None] * num + self.patch_mask = [None] * num + self.dilate_mask = [None] * num + self.rel_pos = [None] * num + self.dilate_pos = [None] * num + self.build_t() + + def build_t(self): + for d in range(self.start_depth, self.max_depth + 1): + self.build_batch_idx(d) + self.build_attn_mask(d) + self.build_rel_pos(d) + + def build_batch_idx(self, depth: int): + batch = self.batch_id(depth, self.nempty) + self.batch_idx[depth] = self.patch_partition(batch, depth, self.batch_size) + + def build_attn_mask(self, depth: int): + batch = self.batch_idx[depth] + mask = batch.view(-1, self.patch_size) + self.patch_mask[depth] = self._calc_attn_mask(mask) + + mask = batch.view(-1, self.patch_size, self.dilation) + mask = mask.transpose(1, 2).reshape(-1, self.patch_size) + self.dilate_mask[depth] = self._calc_attn_mask(mask) + + def _calc_attn_mask(self, mask: torch.Tensor): + attn_mask = mask.unsqueeze(2) - mask.unsqueeze(1) + attn_mask = attn_mask.masked_fill(attn_mask != 0, self.invalid_mask_value) + return attn_mask + + def build_rel_pos(self, depth: int): + key = self.key(depth, self.nempty) + key = self.patch_partition(key, depth) + x, y, z, _ = ocnn.octree.key2xyz(key, depth) + xyz = torch.stack([x, y, z], dim=1) + + xyz = xyz.view(-1, self.patch_size, 3) + self.rel_pos[depth] = xyz.unsqueeze(2) - xyz.unsqueeze(1) + + xyz = xyz.view(-1, self.patch_size, self.dilation, 3) + xyz = xyz.transpose(1, 2).reshape(-1, self.patch_size, 3) + self.dilate_pos[depth] = xyz.unsqueeze(2) - xyz.unsqueeze(1) + + def patch_partition(self, data: torch.Tensor, depth: int, fill_value=0): + num = self.nnum_a[depth] - self.nnum_t[depth] + tail = data.new_full((num,) + data.shape[1:], fill_value) + return torch.cat([data, tail], dim=0) + + def patch_reverse(self, data: torch.Tensor, depth: int): + return data[: self.nnum_t[depth]] + + +class MLP(torch.nn.Module): + def __init__( + self, + in_features: int, + hidden_features: Optional[int] = None, + out_features: Optional[int] = None, + activation=torch.nn.GELU, + drop: float = 0.0, + **kwargs + ): + super().__init__() + self.in_features = in_features + self.out_features = out_features or in_features + self.hidden_features = hidden_features or in_features + + self.fc1 = torch.nn.Linear(self.in_features, self.hidden_features) + self.act = activation() + self.fc2 = torch.nn.Linear(self.hidden_features, self.out_features) + self.drop = torch.nn.Dropout(drop, inplace=True) + + def forward(self, data: torch.Tensor): + data = self.fc1(data) + data = self.act(data) + data = self.drop(data) + data = self.fc2(data) + data = self.drop(data) + return data + + +class OctreeDWConvBn(torch.nn.Module): + def __init__( + self, + in_channels: int, + kernel_size: List[int] = [3], + stride: int = 1, + nempty: bool = False, + ): + super().__init__() + self.conv = dwconv.OctreeDWConv( + in_channels, kernel_size, nempty, use_bias=False + ) + self.bn = torch.nn.BatchNorm1d(in_channels) + + def forward(self, data: torch.Tensor, octree: Octree, depth: int): + out = self.conv(data, octree, depth) + out = self.bn(out) + return out + + +class RPE(torch.nn.Module): + def __init__(self, patch_size: int, num_heads: int, dilation: int = 1): + super().__init__() + self.patch_size = patch_size + self.num_heads = num_heads + self.dilation = dilation + self.pos_bnd = self.get_pos_bnd(patch_size) + self.rpe_num = 2 * self.pos_bnd + 1 + self.rpe_table = torch.nn.Parameter(torch.zeros(3 * self.rpe_num, num_heads)) + torch.nn.init.trunc_normal_(self.rpe_table, std=0.02) + + def get_pos_bnd(self, patch_size: int): + return int(0.8 * patch_size * self.dilation**0.5) + + def xyz2idx(self, xyz: torch.Tensor): + mul = torch.arange(3, device=xyz.device) * self.rpe_num + xyz = xyz.clamp(-self.pos_bnd, self.pos_bnd) + idx = xyz + (self.pos_bnd + mul) + return idx + + def forward(self, xyz): + idx = self.xyz2idx(xyz) + out = self.rpe_table.index_select(0, idx.reshape(-1)) + out = out.view(idx.shape + (-1,)).sum(3) + out = out.permute(0, 3, 1, 2) # (N, K, K, H) -> (N, H, K, K) + return out + + def extra_repr(self) -> str: + return "num_heads={}, pos_bnd={}, dilation={}".format( + self.num_heads, self.pos_bnd, self.dilation + ) # noqa + + +class OctreeAttention(torch.nn.Module): + def __init__( + self, + dim: int, + patch_size: int, + num_heads: int, + qkv_bias: bool = True, + qk_scale: Optional[float] = None, + attn_drop: float = 0.0, + proj_drop: float = 0.0, + dilation: int = 1, + use_rpe: bool = True, + ): + super().__init__() + self.dim = dim + self.patch_size = patch_size + self.num_heads = num_heads + self.dilation = dilation + self.use_rpe = use_rpe + self.scale = qk_scale or (dim // num_heads) ** -0.5 + + self.qkv = torch.nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = torch.nn.Dropout(attn_drop) + self.proj = torch.nn.Linear(dim, dim) + self.proj_drop = torch.nn.Dropout(proj_drop) + self.softmax = torch.nn.Softmax(dim=-1) + self.rpe = RPE(patch_size, num_heads, dilation) if use_rpe else None + + def forward(self, data: torch.Tensor, octree: OctreeT, depth: int): + H = self.num_heads + K = self.patch_size + C = self.dim + D = self.dilation + + # patch partition + data = octree.patch_partition(data, depth) + if D > 1: # dilation + rel_pos = octree.dilate_pos[depth] + mask = octree.dilate_mask[depth] + data = data.view(-1, K, D, C).transpose(1, 2).reshape(-1, C) + else: + rel_pos = octree.rel_pos[depth] + mask = octree.patch_mask[depth] + data = data.view(-1, K, C) + + # qkv + qkv = self.qkv(data).reshape(-1, K, 3, H, C // H).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] # (N, H, K, C') + q = q * self.scale + + # attn + attn = q @ k.transpose(-2, -1) # (N, H, K, K) + attn = self.apply_rpe(attn, rel_pos) # (N, H, K, K) + attn = attn + mask.unsqueeze(1) + attn = self.softmax(attn) + attn = self.attn_drop(attn) + data = (attn @ v).transpose(1, 2).reshape(-1, C) + + # patch reverse + if D > 1: # dilation + data = data.view(-1, D, K, C).transpose(1, 2).reshape(-1, C) + data = octree.patch_reverse(data, depth) + + # ffn + data = self.proj(data) + data = self.proj_drop(data) + return data + + def apply_rpe(self, attn, rel_pos): + if self.use_rpe: + attn = attn + self.rpe(rel_pos) + return attn + + def extra_repr(self) -> str: + return "dim={}, patch_size={}, num_heads={}, dilation={}".format( + self.dim, self.patch_size, self.num_heads, self.dilation + ) # noqa + + +class OctFormerBlock(torch.nn.Module): + def __init__( + self, + dim: int, + num_heads: int, + patch_size: int = 32, + dilation: int = 0, + mlp_ratio: float = 4.0, + qkv_bias: bool = True, + qk_scale: Optional[float] = None, + attn_drop: float = 0.0, + proj_drop: float = 0.0, + drop_path: float = 0.0, + nempty: bool = True, + activation: torch.nn.Module = torch.nn.GELU, + **kwargs + ): + super().__init__() + self.norm1 = torch.nn.LayerNorm(dim) + self.attention = OctreeAttention( + dim, + patch_size, + num_heads, + qkv_bias, + qk_scale, + attn_drop, + proj_drop, + dilation, + ) + self.norm2 = torch.nn.LayerNorm(dim) + self.mlp = MLP(dim, int(dim * mlp_ratio), dim, activation, proj_drop) + self.drop_path = ocnn.nn.OctreeDropPath(drop_path, nempty) + self.cpe = OctreeDWConvBn(dim, nempty=nempty) + + def forward(self, data: torch.Tensor, octree: OctreeT, depth: int): + data = self.cpe(data, octree, depth) + data + attn = self.attention(self.norm1(data), octree, depth) + data = data + self.drop_path(attn, octree, depth) + ffn = self.mlp(self.norm2(data)) + data = data + self.drop_path(ffn, octree, depth) + return data + + +class OctFormerStage(torch.nn.Module): + def __init__( + self, + dim: int, + num_heads: int, + patch_size: int = 32, + dilation: int = 0, + mlp_ratio: float = 4.0, + qkv_bias: bool = True, + qk_scale: Optional[float] = None, + attn_drop: float = 0.0, + proj_drop: float = 0.0, + drop_path: float = 0.0, + nempty: bool = True, + activation: torch.nn.Module = torch.nn.GELU, + interval: int = 6, + use_checkpoint: bool = True, + num_blocks: int = 2, + octformer_block=OctFormerBlock, + **kwargs + ): + super().__init__() + self.num_blocks = num_blocks + self.use_checkpoint = use_checkpoint + self.interval = interval # normalization interval + self.num_norms = (num_blocks - 1) // self.interval + + self.blocks = torch.nn.ModuleList( + [ + octformer_block( + dim=dim, + num_heads=num_heads, + patch_size=patch_size, + dilation=1 if (i % 2 == 0) else dilation, + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + drop_path=( + drop_path[i] if isinstance(drop_path, list) else drop_path + ), + nempty=nempty, + activation=activation, + ) + for i in range(num_blocks) + ] + ) + # self.norms = torch.nn.ModuleList([ + # torch.nn.BatchNorm1d(dim) for _ in range(self.num_norms)]) + + def forward(self, data: torch.Tensor, octree: OctreeT, depth: int): + for i in range(self.num_blocks): + if self.use_checkpoint and self.training: + data = checkpoint(self.blocks[i], data, octree, depth) + else: + data = self.blocks[i](data, octree, depth) + # if i % self.interval == 0 and i != 0: + # data = self.norms[(i - 1) // self.interval](data) + return data + + +class OctFormerDecoder(torch.nn.Module): + def __init__( + self, channels: List[int], fpn_channel: int, nempty: bool, head_up: int = 1 + ): + super().__init__() + self.head_up = head_up + self.num_stages = len(channels) + self.conv1x1 = torch.nn.ModuleList( + [ + torch.nn.Linear(channels[i], fpn_channel) + for i in range(self.num_stages - 1, -1, -1) + ] + ) + self.upsample = ocnn.nn.OctreeUpsample("nearest", nempty) + self.conv3x3 = torch.nn.ModuleList( + [ + ocnn.modules.OctreeConvBnRelu( + fpn_channel, fpn_channel, kernel_size=[3], stride=1, nempty=nempty + ) + for _ in range(self.num_stages) + ] + ) + self.up_conv = torch.nn.ModuleList( + [ + ocnn.modules.OctreeDeconvBnRelu( + fpn_channel, fpn_channel, kernel_size=[3], stride=2, nempty=nempty + ) + for _ in range(self.head_up) + ] + ) + + def forward(self, features: Dict[int, torch.Tensor], octree: Octree): + depth = min(features.keys()) + depth_max = max(features.keys()) + assert self.num_stages == len(features) + + feature = self.conv1x1[0](features[depth]) + conv_out = self.conv3x3[0](feature, octree, depth) + out = self.upsample(conv_out, octree, depth, depth_max) + for i in range(1, self.num_stages): + depth_i = depth + i + feature = self.upsample(feature, octree, depth_i - 1) + feature = self.conv1x1[i](features[depth_i]) + feature + conv_out = self.conv3x3[i](feature, octree, depth_i) + out = out + self.upsample(conv_out, octree, depth_i, depth_max) + for i in range(self.head_up): + out = self.up_conv[i](out, octree, depth_max + i) + return out + + +class PatchEmbed(torch.nn.Module): + def __init__( + self, + in_channels: int = 3, + dim: int = 96, + num_down: int = 2, + nempty: bool = True, + **kwargs + ): + super().__init__() + self.num_stages = num_down + self.delta_depth = -num_down + channels = [int(dim * 2**i) for i in range(-self.num_stages, 1)] + + self.convs = torch.nn.ModuleList( + [ + ocnn.modules.OctreeConvBnRelu( + in_channels if i == 0 else channels[i], + channels[i], + kernel_size=[3], + stride=1, + nempty=nempty, + ) + for i in range(self.num_stages) + ] + ) + self.downsamples = torch.nn.ModuleList( + [ + ocnn.modules.OctreeConvBnRelu( + channels[i], + channels[i + 1], + kernel_size=[2], + stride=2, + nempty=nempty, + ) + for i in range(self.num_stages) + ] + ) + self.proj = ocnn.modules.OctreeConvBnRelu( + channels[-1], dim, kernel_size=[3], stride=1, nempty=nempty + ) + + def forward(self, data: torch.Tensor, octree: Octree, depth: int): + # TODO: reduce to single input + for i in range(self.num_stages): + depth_i = depth - i + data = self.convs[i](data, octree, depth_i) + data = self.downsamples[i](data, octree, depth_i) + data = self.proj(data, octree, depth_i - 1) + return data + + +class Downsample(torch.nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: List[int] = (2,), + nempty: bool = True, + ): + super().__init__() + self.norm = torch.nn.BatchNorm1d(out_channels) + self.conv = ocnn.nn.OctreeConv( + in_channels, + out_channels, + kernel_size, + stride=2, + nempty=nempty, + use_bias=True, + ) + + def forward(self, data: torch.Tensor, octree: Octree, depth: int): + data = self.conv(data, octree, depth) + data = self.norm(data) + return data + + +@MODELS.register_module("OctFormer-v1m1") +class OctFormer(torch.nn.Module): + def __init__( + self, + in_channels, + num_classes, + fpn_channels=168, + channels=(96, 192, 384, 384), + num_blocks=(2, 2, 18, 2), + num_heads=(6, 12, 24, 24), + patch_size=26, + stem_down=2, + head_up=2, + dilation=4, + drop_path=0.5, + nempty=True, + octree_scale_factor=10.24, + octree_depth=11, + octree_full_depth=2, + ): + super().__init__() + assert ocnn is not None, "Please follow `README.md` to install ocnn.`" + assert dwconv is not None, "Please follow `README.md` to install dwconv.`" + + self.patch_size = patch_size + self.dilation = dilation + self.nempty = nempty + self.num_stages = len(num_blocks) + self.stem_down = stem_down + self.octree_scale_factor = octree_scale_factor + self.octree_depth = octree_depth + self.octree_full_depth = octree_full_depth + drop_ratio = torch.linspace(0, drop_path, sum(num_blocks)).tolist() + + self.patch_embed = PatchEmbed(in_channels, channels[0], stem_down, nempty) + self.layers = torch.nn.ModuleList( + [ + OctFormerStage( + dim=channels[i], + num_heads=num_heads[i], + patch_size=patch_size, + drop_path=drop_ratio[ + sum(num_blocks[:i]) : sum(num_blocks[: i + 1]) + ], + dilation=dilation, + nempty=nempty, + num_blocks=num_blocks[i], + ) + for i in range(self.num_stages) + ] + ) + self.downsamples = torch.nn.ModuleList( + [ + Downsample(channels[i], channels[i + 1], kernel_size=[2], nempty=nempty) + for i in range(self.num_stages - 1) + ] + ) + self.decoder = OctFormerDecoder( + channels=channels, fpn_channel=fpn_channels, nempty=nempty, head_up=head_up + ) + self.interp = ocnn.nn.OctreeInterp("nearest", nempty) + self.seg_head = ( + nn.Sequential( + nn.Linear(fpn_channels, fpn_channels), + torch.nn.BatchNorm1d(fpn_channels), + nn.ReLU(inplace=True), + nn.Linear(fpn_channels, num_classes), + ) + if num_classes > 0 + else nn.Identity() + ) + + def points2octree(self, points): + octree = ocnn.octree.Octree(self.octree_depth, self.octree_full_depth) + octree.build_octree(points) + return octree + + def forward(self, data_dict): + coord = data_dict["coord"] + normal = data_dict["normal"] + feat = data_dict["feat"] + offset = data_dict["offset"] + batch = offset2batch(offset) + + point = Points( + points=coord / self.octree_scale_factor, + normals=normal, + features=feat, + batch_id=batch.unsqueeze(-1), + batch_size=len(offset), + ) + octree = ocnn.octree.Octree( + depth=self.octree_depth, + full_depth=self.octree_full_depth, + batch_size=len(offset), + device=coord.device, + ) + octree.build_octree(point) + octree.construct_all_neigh() + + feat = self.patch_embed(octree.features[octree.depth], octree, octree.depth) + depth = octree.depth - self.stem_down # current octree depth + octree = OctreeT( + octree, + self.patch_size, + self.dilation, + self.nempty, + max_depth=depth, + start_depth=depth - self.num_stages + 1, + ) + features = {} + for i in range(self.num_stages): + depth_i = depth - i + feat = self.layers[i](feat, octree, depth_i) + features[depth_i] = feat + if i < self.num_stages - 1: + feat = self.downsamples[i](feat, octree, depth_i) + out = self.decoder(features, octree) + # interp representation to points before Octreeization + query_pts = torch.cat([point.points, point.batch_id], dim=1).contiguous() + out = self.interp(out, octree, octree.depth, query_pts) + out = self.seg_head(out) + return out diff --git a/Pointcept/pointcept/models/point_group/__init__.py b/Pointcept/pointcept/models/point_group/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5d9f35f2a05f4a88043a45f2da64faf21f38f520 --- /dev/null +++ b/Pointcept/pointcept/models/point_group/__init__.py @@ -0,0 +1 @@ +from .point_group_v1m1_base import PointGroup diff --git a/Pointcept/pointcept/models/point_group/point_group_v1m1_base.py b/Pointcept/pointcept/models/point_group/point_group_v1m1_base.py new file mode 100644 index 0000000000000000000000000000000000000000..2c36d3fe009fa33136d92605f1c810cff8892959 --- /dev/null +++ b/Pointcept/pointcept/models/point_group/point_group_v1m1_base.py @@ -0,0 +1,174 @@ +""" +PointGroup for instance segmentation + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Chengyao Wang +Please cite our work if the code is helpful to you. +""" + +from functools import partial +import torch +import torch.nn as nn +import torch.nn.functional as F + +try: + from pointgroup_ops import ballquery_batch_p, bfs_cluster +except ImportError: + ballquery_batch_p, bfs_cluster = None, None + +from pointcept.models.utils import offset2batch, batch2offset + +from pointcept.models.builder import MODELS, build_model + + +@MODELS.register_module("PG-v1m1") +class PointGroup(nn.Module): + def __init__( + self, + backbone, + backbone_out_channels=64, + semantic_num_classes=20, + semantic_ignore_index=-1, + segment_ignore_index=(-1, 0, 1), + instance_ignore_index=-1, + cluster_thresh=1.5, + cluster_closed_points=300, + cluster_propose_points=100, + cluster_min_points=50, + voxel_size=0.02, + ): + super().__init__() + norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01) + self.semantic_num_classes = semantic_num_classes + self.segment_ignore_index = segment_ignore_index + self.semantic_ignore_index = semantic_ignore_index + self.instance_ignore_index = instance_ignore_index + self.cluster_thresh = cluster_thresh + self.cluster_closed_points = cluster_closed_points + self.cluster_propose_points = cluster_propose_points + self.cluster_min_points = cluster_min_points + self.voxel_size = voxel_size + self.backbone = build_model(backbone) + self.bias_head = nn.Sequential( + nn.Linear(backbone_out_channels, backbone_out_channels), + norm_fn(backbone_out_channels), + nn.ReLU(), + nn.Linear(backbone_out_channels, 3), + ) + self.seg_head = nn.Linear(backbone_out_channels, semantic_num_classes) + self.ce_criteria = torch.nn.CrossEntropyLoss(ignore_index=semantic_ignore_index) + + def forward(self, data_dict): + coord = data_dict["coord"] + segment = data_dict["segment"] + instance = data_dict["instance"] + instance_centroid = data_dict["instance_centroid"] + offset = data_dict["offset"] + + feat = self.backbone(data_dict) + bias_pred = self.bias_head(feat) + logit_pred = self.seg_head(feat) + + # compute loss + seg_loss = self.ce_criteria(logit_pred, segment) + + mask = (instance != self.instance_ignore_index).float() + bias_gt = instance_centroid - coord + bias_dist = torch.sum(torch.abs(bias_pred - bias_gt), dim=-1) + bias_l1_loss = torch.sum(bias_dist * mask) / (torch.sum(mask) + 1e-8) + + bias_pred_norm = bias_pred / ( + torch.norm(bias_pred, p=2, dim=1, keepdim=True) + 1e-8 + ) + bias_gt_norm = bias_gt / (torch.norm(bias_gt, p=2, dim=1, keepdim=True) + 1e-8) + cosine_similarity = -(bias_pred_norm * bias_gt_norm).sum(-1) + bias_cosine_loss = torch.sum(cosine_similarity * mask) / ( + torch.sum(mask) + 1e-8 + ) + + loss = seg_loss + bias_l1_loss + bias_cosine_loss + return_dict = dict( + loss=loss, + seg_loss=seg_loss, + bias_l1_loss=bias_l1_loss, + bias_cosine_loss=bias_cosine_loss, + ) + + if not self.training: + center_pred = coord + bias_pred + center_pred /= self.voxel_size + logit_pred = F.softmax(logit_pred, dim=-1) + segment_pred = torch.max(logit_pred, 1)[1] # [n] + # cluster + mask = ( + ~torch.concat( + [ + (segment_pred == index).unsqueeze(-1) + for index in self.segment_ignore_index + ], + dim=1, + ) + .sum(-1) + .bool() + ) + + if mask.sum() == 0: + proposals_idx = torch.zeros(0).int() + proposals_offset = torch.zeros(1).int() + else: + center_pred_ = center_pred[mask] + segment_pred_ = segment_pred[mask] + + batch_ = offset2batch(offset)[mask] + offset_ = nn.ConstantPad1d((1, 0), 0)(batch2offset(batch_)) + idx, start_len = ballquery_batch_p( + center_pred_, + batch_.int(), + offset_.int(), + self.cluster_thresh, + self.cluster_closed_points, + ) + proposals_idx, proposals_offset = bfs_cluster( + segment_pred_.int().cpu(), + idx.cpu(), + start_len.cpu(), + self.cluster_min_points, + ) + proposals_idx[:, 1] = ( + mask.nonzero().view(-1)[proposals_idx[:, 1].long()].int() + ) + + # get proposal + proposals_pred = torch.zeros( + (proposals_offset.shape[0] - 1, center_pred.shape[0]), dtype=torch.int + ) + proposals_pred[proposals_idx[:, 0].long(), proposals_idx[:, 1].long()] = 1 + instance_pred = segment_pred[ + proposals_idx[:, 1][proposals_offset[:-1].long()].long() + ] + proposals_point_num = proposals_pred.sum(1) + proposals_mask = proposals_point_num > self.cluster_propose_points + proposals_pred = proposals_pred[proposals_mask] + instance_pred = instance_pred[proposals_mask] + + pred_scores = [] + pred_classes = [] + pred_masks = proposals_pred.detach().cpu() + for proposal_id in range(len(proposals_pred)): + segment_ = proposals_pred[proposal_id] + confidence_ = logit_pred[ + segment_.bool(), instance_pred[proposal_id] + ].mean() + object_ = instance_pred[proposal_id] + pred_scores.append(confidence_) + pred_classes.append(object_) + if len(pred_scores) > 0: + pred_scores = torch.stack(pred_scores).cpu() + pred_classes = torch.stack(pred_classes).cpu() + else: + pred_scores = torch.tensor([]) + pred_classes = torch.tensor([]) + + return_dict["pred_scores"] = pred_scores + return_dict["pred_masks"] = pred_masks + return_dict["pred_classes"] = pred_classes + return return_dict diff --git a/Pointcept/pointcept/models/point_group/utils.py b/Pointcept/pointcept/models/point_group/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d095b5bc89291ec30418c0aaf0eb9f672fe26ccc --- /dev/null +++ b/Pointcept/pointcept/models/point_group/utils.py @@ -0,0 +1,176 @@ +import torch +from torch.autograd import Function +import pointgroup_ops + + +class BallQueryBatchP(Function): + @staticmethod + def forward(ctx, coords, batch_idxs, batch_offsets, radius, meanActive): + """ + :param ctx: + :param coords: (n, 3) float + :param batch_idxs: (n) int + :param batch_offsets: (B+1) int + :param radius: float + :param meanActive: int + :return: idx (nActive), int + :return: start_len (n, 2), int + """ + + n = coords.size(0) + + assert coords.is_contiguous() and coords.is_cuda + assert batch_idxs.is_contiguous() and batch_idxs.is_cuda + assert batch_offsets.is_contiguous() and batch_offsets.is_cuda + + while True: + idx = torch.cuda.IntTensor(n * meanActive).zero_() + start_len = torch.cuda.IntTensor(n, 2).zero_() + nActive = pointgroup_ops.ballquery_batch_p( + coords, batch_idxs, batch_offsets, idx, start_len, n, meanActive, radius + ) + if nActive <= n * meanActive: + break + meanActive = int(nActive // n + 1) + idx = idx[:nActive] + + return idx, start_len + + @staticmethod + def backward(ctx, a=None, b=None): + return None, None, None + + +ballquery_batch_p = BallQueryBatchP.apply + + +class Clustering: + def __init__( + self, + ignored_labels, + class_mapping, + thresh=0.03, + closed_points=300, + min_points=50, + propose_points=100, + score_func=torch.max, + ) -> None: + self.ignored_labels = ignored_labels + self.thresh = thresh + self.closed_points = closed_points + self.min_points = min_points + self.class_mapping = class_mapping + self.propose_points = propose_points + self.score_func = score_func + + def cluster(self, vertices, scores): + labels = torch.max(scores, 1)[1] # (N) long, cuda + proposals_idx, proposals_offset = self.cluster_(vertices, labels) + + ## debug + # import ipdb; ipdb.set_trace() + # colors = np.array(create_color_palette())[labels.cpu()] + # write_triangle_mesh(vertices, colors, None, 'semantics.ply') + + # scatter + proposals_pred = torch.zeros( + (proposals_offset.shape[0] - 1, vertices.shape[0]), dtype=torch.int + ) # (nProposal, N), int, cuda + proposals_pred[proposals_idx[:, 0].long(), proposals_idx[:, 1].long()] = 1 + labels = labels[proposals_idx[:, 1][proposals_offset[:-1].long()].long()] + + proposals_pointnum = proposals_pred.sum(1) + npoint_mask = proposals_pointnum > self.propose_points + + proposals_pred = proposals_pred[npoint_mask] + labels = labels[npoint_mask] + return proposals_pred, labels + + def cluster_(self, vertices, labels): + """ + :param batch_idxs: (N), int, cuda + :labels: 0-19 + """ + batch_idxs = torch.zeros_like(labels) + + mask_non_ignored = torch.ones_like(labels).bool() + for ignored_label in self.ignored_labels: + mask_non_ignored = mask_non_ignored & ( + self.class_mapping[labels] != ignored_label + ) + object_idxs = mask_non_ignored.nonzero().view(-1) + + vertices_ = vertices[object_idxs].float() + labels_ = labels[object_idxs].int() + + if vertices_.numel() == 0: + return torch.zeros((0, 2)).int(), torch.zeros(1).int() + + batch_idxs_ = batch_idxs[object_idxs].int() + batch_offsets_ = torch.FloatTensor([0, object_idxs.shape[0]]).int().cuda() + + idx, start_len = ballquery_batch_p( + vertices_, batch_idxs_, batch_offsets_, self.thresh, self.closed_points + ) + proposals_idx, proposals_offset = bfs_cluster( + labels_.cpu(), idx.cpu(), start_len.cpu(), self.min_points + ) + proposals_idx[:, 1] = object_idxs[proposals_idx[:, 1].long()].int() + + return proposals_idx, proposals_offset + + def get_instances(self, vertices, scores): + proposals_pred, labels = self.cluster(vertices, scores) + instances = {} + for proposal_id in range(len(proposals_pred)): + clusters_i = proposals_pred[proposal_id] + score = scores[clusters_i.bool(), labels[proposal_id]] + score = self.score_func(score) + instances[proposal_id] = {} + instances[proposal_id]["conf"] = score.cpu().numpy() + instances[proposal_id]["label_id"] = self.class_mapping.cpu()[ + labels[proposal_id] + ] + instances[proposal_id]["pred_mask"] = clusters_i.cpu().numpy() + return instances + + +class BFSCluster(Function): + @staticmethod + def forward(ctx, semantic_label, ball_query_idxs, start_len, threshold): + """ + :param ctx: + :param semantic_label: (N), int + :param ball_query_idxs: (nActive), int + :param start_len: (N, 2), int + :return: cluster_idxs: int (sumNPoint, 2), dim 0 for cluster_id, dim 1 for corresponding point idxs in N + :return: cluster_offsets: int (nCluster + 1) + """ + + N = start_len.size(0) + + assert semantic_label.is_contiguous() + assert ball_query_idxs.is_contiguous() + assert start_len.is_contiguous() + + cluster_idxs = semantic_label.new() + cluster_offsets = semantic_label.new() + + pointgroup_ops.bfs_cluster( + semantic_label, + ball_query_idxs, + start_len, + cluster_idxs, + cluster_offsets, + N, + threshold, + ) + + return cluster_idxs, cluster_offsets + + @staticmethod + def backward(ctx, a=None): + return None + + +bfs_cluster = BFSCluster.apply diff --git a/Pointcept/pointcept/models/point_prompt_training/__init__.py b/Pointcept/pointcept/models/point_prompt_training/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f4c980b70b8c49dfc51625623071f21d6405d856 --- /dev/null +++ b/Pointcept/pointcept/models/point_prompt_training/__init__.py @@ -0,0 +1,4 @@ +from .point_prompt_training_v1m1_language_guided import * +from .point_prompt_training_v1m2_decoupled import * + +from .prompt_driven_normalization import PDNorm diff --git a/Pointcept/pointcept/models/point_prompt_training/point_prompt_training_v1m1_language_guided.py b/Pointcept/pointcept/models/point_prompt_training/point_prompt_training_v1m1_language_guided.py new file mode 100644 index 0000000000000000000000000000000000000000..443fc482ba02c319905a387c58fa7ec5032f6a07 --- /dev/null +++ b/Pointcept/pointcept/models/point_prompt_training/point_prompt_training_v1m1_language_guided.py @@ -0,0 +1,118 @@ +""" +Point Prompt Training + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from functools import partial +from collections import OrderedDict + +import torch +import torch.nn as nn +from pointcept.models.utils.structure import Point +from pointcept.models.builder import MODELS +from pointcept.models.losses import build_criteria + + +@MODELS.register_module("PPT-v1m1") +class PointPromptTraining(nn.Module): + """ + PointPromptTraining provides Data-driven Context and enables multi-dataset training with + Language-driven Categorical Alignment. PDNorm is supported by SpUNet-v1m3 to adapt the + backbone to a specific dataset with a given dataset condition and context. + """ + + def __init__( + self, + backbone=None, + criteria=None, + backbone_out_channels=96, + context_channels=256, + conditions=("Structured3D", "ScanNet", "S3DIS"), + template="[x]", + clip_model="ViT-B/16", + # fmt: off + class_name=( + "wall", "floor", "cabinet", "bed", "chair", "sofa", "table", "door", + "window", "bookshelf", "bookcase", "picture", "counter", "desk", "shelves", "curtain", + "dresser", "pillow", "mirror", "ceiling", "refrigerator", "television", "shower curtain", "nightstand", + "toilet", "sink", "lamp", "bathtub", "garbagebin", "board", "beam", "column", + "clutter", "otherstructure", "otherfurniture", "otherprop", + ), + valid_index=( + (0, 1, 2, 3, 4, 5, 6, 7, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 33, 34, 35), + (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 20, 22, 24, 25, 27, 34), + (0, 1, 4, 5, 6, 7, 8, 10, 19, 29, 30, 31, 32), + ), + # fmt: on + backbone_mode=False, + ): + super().__init__() + assert len(conditions) == len(valid_index) + assert backbone.type in ["SpUNet-v1m3", "PT-v2m3", "PT-v3m1"] + self.backbone = MODELS.build(backbone) + self.criteria = build_criteria(criteria) + self.conditions = conditions + self.valid_index = valid_index + self.embedding_table = nn.Embedding(len(conditions), context_channels) + self.backbone_mode = backbone_mode + if not self.backbone_mode: + import clip + + clip_model, _ = clip.load( + clip_model, device="cpu", download_root="./.cache/clip" + ) + clip_model.requires_grad_(False) + class_prompt = [template.replace("[x]", name) for name in class_name] + class_token = clip.tokenize(class_prompt) + class_embedding = clip_model.encode_text(class_token) + class_embedding = class_embedding / class_embedding.norm( + dim=-1, keepdim=True + ) + self.register_buffer("class_embedding", class_embedding) + self.proj_head = nn.Linear( + backbone_out_channels, clip_model.text_projection.shape[1] + ) + self.logit_scale = clip_model.logit_scale + + def forward(self, data_dict): + condition = data_dict["condition"][0] + assert condition in self.conditions + context = self.embedding_table( + torch.tensor( + [self.conditions.index(condition)], device=data_dict["coord"].device + ) + ) + data_dict["context"] = context + point = self.backbone(data_dict) + # Backbone added after v1.5.0 return Point instead of feat and use DefaultSegmentorV2 + # TODO: remove this part after make all backbone return Point only. + if isinstance(point, Point): + feat = point.feat + else: + feat = point + if self.backbone_mode: + # PPT serve as a multi-dataset backbone when enable backbone mode + return feat + feat = self.proj_head(feat) + feat = feat / feat.norm(dim=-1, keepdim=True) + sim = ( + feat + @ self.class_embedding[ + self.valid_index[self.conditions.index(condition)], : + ].t() + ) + logit_scale = self.logit_scale.exp() + seg_logits = logit_scale * sim + # train + if self.training: + loss = self.criteria(seg_logits, data_dict["segment"]) + return dict(loss=loss) + # eval + elif "segment" in data_dict.keys(): + loss = self.criteria(seg_logits, data_dict["segment"]) + return dict(loss=loss, seg_logits=seg_logits) + # test + else: + return dict(seg_logits=seg_logits) diff --git a/Pointcept/pointcept/models/point_prompt_training/point_prompt_training_v1m2_decoupled.py b/Pointcept/pointcept/models/point_prompt_training/point_prompt_training_v1m2_decoupled.py new file mode 100644 index 0000000000000000000000000000000000000000..9ad9c6bf1fcb5e17b139d6c06b44b589fcee816f --- /dev/null +++ b/Pointcept/pointcept/models/point_prompt_training/point_prompt_training_v1m2_decoupled.py @@ -0,0 +1,79 @@ +""" +Point Prompt Training with decoupled segmentation head + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from functools import partial +from collections import OrderedDict + +import torch +import torch.nn as nn +from pointcept.models.utils.structure import Point +from pointcept.models.builder import MODELS +from pointcept.models.losses import build_criteria + + +@MODELS.register_module("PPT-v1m2") +class PointPromptTraining(nn.Module): + """ + PointPromptTraining v1m2 provides Data-driven Context and enables multi-dataset training with + Decoupled Segmentation Head. PDNorm is supported by SpUNet-v1m3 to adapt the + backbone to a specific dataset with a given dataset condition and context. + """ + + def __init__( + self, + backbone=None, + criteria=None, + backbone_out_channels=96, + context_channels=256, + conditions=("Structured3D", "ScanNet", "S3DIS"), + num_classes=(25, 20, 13), + backbone_mode=False, + ): + super().__init__() + assert len(conditions) == len(num_classes) + assert backbone.type in ["SpUNet-v1m3", "PT-v2m3", "PT-v3m1"] + self.backbone = MODELS.build(backbone) + self.criteria = build_criteria(criteria) + self.conditions = conditions + self.embedding_table = nn.Embedding(len(conditions), context_channels) + self.backbone_mode = backbone_mode + self.seg_heads = nn.ModuleList( + [nn.Linear(backbone_out_channels, num_cls) for num_cls in num_classes] + ) + + def forward(self, data_dict): + condition = data_dict["condition"][0] + assert condition in self.conditions + context = self.embedding_table( + torch.tensor( + [self.conditions.index(condition)], device=data_dict["coord"].device + ) + ) + data_dict["context"] = context + point = self.backbone(data_dict) + # Backbone added after v1.5.0 return Point instead of feat and use DefaultSegmentorV2 + # TODO: remove this part after make all backbone return Point only. + if isinstance(point, Point): + feat = point.feat + else: + feat = point + if self.backbone_mode: + # PPT serve as a multi-dataset backbone when enable backbone mode + return feat + seg_head = self.seg_heads[self.conditions.index(condition)] + seg_logits = seg_head(feat) + # train + if self.training: + loss = self.criteria(seg_logits, data_dict["segment"]) + return dict(loss=loss) + # eval + elif "segment" in data_dict.keys(): + loss = self.criteria(seg_logits, data_dict["segment"]) + return dict(loss=loss, seg_logits=seg_logits) + # test + else: + return dict(seg_logits=seg_logits) diff --git a/Pointcept/pointcept/models/point_prompt_training/prompt_driven_normalization.py b/Pointcept/pointcept/models/point_prompt_training/prompt_driven_normalization.py new file mode 100644 index 0000000000000000000000000000000000000000..5d7d0d0c01dd4ccf939afeff870b5d72cab403a3 --- /dev/null +++ b/Pointcept/pointcept/models/point_prompt_training/prompt_driven_normalization.py @@ -0,0 +1,47 @@ +import torch.nn as nn + +from pointcept.models.modules import PointModule, PointSequential +from pointcept.models.builder import MODULES + + +@MODULES.register_module() +class PDNorm(PointModule): + def __init__( + self, + num_features, + norm_layer, + context_channels=256, + conditions=("ScanNet", "S3DIS", "Structured3D"), + decouple=True, + adaptive=False, + ): + super().__init__() + self.conditions = conditions + self.decouple = decouple + self.adaptive = adaptive + if self.decouple: + self.norm = nn.ModuleList([norm_layer(num_features) for _ in conditions]) + else: + self.norm = norm_layer + if self.adaptive: + self.modulation = nn.Sequential( + nn.SiLU(), nn.Linear(context_channels, 2 * num_features, bias=True) + ) + + def forward(self, point): + assert {"feat", "condition"}.issubset(point.keys()) + if isinstance(point.condition, str): + condition = point.condition + else: + condition = point.condition[0] + if self.decouple: + assert condition in self.conditions + norm = self.norm[self.conditions.index(condition)] + else: + norm = self.norm + point.feat = norm(point.feat) + if self.adaptive: + assert "context" in point.keys() + shift, scale = self.modulation(point.context).chunk(2, dim=1) + point.feat = point.feat * (1.0 + scale) + shift + return point diff --git a/Pointcept/pointcept/models/point_transformer/__init__.py b/Pointcept/pointcept/models/point_transformer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d6493a312bfcf559642e6a2cc77d96c3770f0dd1 --- /dev/null +++ b/Pointcept/pointcept/models/point_transformer/__init__.py @@ -0,0 +1,3 @@ +from .point_transformer_seg import * +from .point_transformer_partseg import * +from .point_transformer_cls import * diff --git a/Pointcept/pointcept/models/point_transformer/point_transformer_cls.py b/Pointcept/pointcept/models/point_transformer/point_transformer_cls.py new file mode 100644 index 0000000000000000000000000000000000000000..8e12746fef73e9b3ee75b72942fbc8dc96e6e1bf --- /dev/null +++ b/Pointcept/pointcept/models/point_transformer/point_transformer_cls.py @@ -0,0 +1,131 @@ +""" +Point Transformer V1 for Object Classification + +Might be a bit different from the original paper + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch +import torch.nn as nn + +from .point_transformer_seg import TransitionDown, Bottleneck +from pointcept.models.builder import MODELS + + +class PointTransformerCls(nn.Module): + def __init__(self, block, blocks, in_channels=6, num_classes=40): + super().__init__() + self.in_channels = in_channels + self.in_planes, planes = in_channels, [32, 64, 128, 256, 512] + fpn_planes, fpnhead_planes, share_planes = 128, 64, 8 + stride, nsample = [1, 4, 4, 4, 4], [8, 16, 16, 16, 16] + self.enc1 = self._make_enc( + block, + planes[0], + blocks[0], + share_planes, + stride=stride[0], + nsample=nsample[0], + ) # N/1 + self.enc2 = self._make_enc( + block, + planes[1], + blocks[1], + share_planes, + stride=stride[1], + nsample=nsample[1], + ) # N/4 + self.enc3 = self._make_enc( + block, + planes[2], + blocks[2], + share_planes, + stride=stride[2], + nsample=nsample[2], + ) # N/16 + self.enc4 = self._make_enc( + block, + planes[3], + blocks[3], + share_planes, + stride=stride[3], + nsample=nsample[3], + ) # N/64 + self.enc5 = self._make_enc( + block, + planes[4], + blocks[4], + share_planes, + stride=stride[4], + nsample=nsample[4], + ) # N/256 + self.cls = nn.Sequential( + nn.Linear(planes[4], 256), + nn.BatchNorm1d(256), + nn.ReLU(inplace=True), + nn.Dropout(p=0.5), + nn.Linear(256, 128), + nn.BatchNorm1d(128), + nn.ReLU(inplace=True), + nn.Dropout(p=0.5), + nn.Linear(128, num_classes), + ) + + def _make_enc(self, block, planes, blocks, share_planes=8, stride=1, nsample=16): + layers = [ + TransitionDown(self.in_planes, planes * block.expansion, stride, nsample) + ] + self.in_planes = planes * block.expansion + for _ in range(1, blocks): + layers.append( + block(self.in_planes, self.in_planes, share_planes, nsample=nsample) + ) + return nn.Sequential(*layers) + + def forward(self, data_dict): + p0 = data_dict["coord"] + x0 = data_dict["feat"] + o0 = data_dict["offset"].int() + x0 = p0 if self.in_channels == 3 else torch.cat((p0, x0), 1) + p1, x1, o1 = self.enc1([p0, x0, o0]) + p2, x2, o2 = self.enc2([p1, x1, o1]) + p3, x3, o3 = self.enc3([p2, x2, o2]) + p4, x4, o4 = self.enc4([p3, x3, o3]) + p5, x5, o5 = self.enc5([p4, x4, o4]) + x = [] + for i in range(o5.shape[0]): + if i == 0: + s_i, e_i, cnt = 0, o5[0], o5[0] + else: + s_i, e_i, cnt = o5[i - 1], o5[i], o5[i] - o5[i - 1] + x_b = x5[s_i:e_i, :].sum(0, True) / cnt + x.append(x_b) + x = torch.cat(x, 0) + x = self.cls(x) + return x + + +@MODELS.register_module("PointTransformer-Cls26") +class PointTransformerCls26(PointTransformerCls): + def __init__(self, **kwargs): + super(PointTransformerCls26, self).__init__( + Bottleneck, [1, 1, 1, 1, 1], **kwargs + ) + + +@MODELS.register_module("PointTransformer-Cls38") +class PointTransformerCls38(PointTransformerCls): + def __init__(self, **kwargs): + super(PointTransformerCls38, self).__init__( + Bottleneck, [1, 2, 2, 2, 2], **kwargs + ) + + +@MODELS.register_module("PointTransformer-Cls50") +class PointTransformerCls50(PointTransformerCls): + def __init__(self, **kwargs): + super(PointTransformerCls50, self).__init__( + Bottleneck, [1, 2, 3, 5, 2], **kwargs + ) diff --git a/Pointcept/pointcept/models/point_transformer/point_transformer_partseg.py b/Pointcept/pointcept/models/point_transformer/point_transformer_partseg.py new file mode 100644 index 0000000000000000000000000000000000000000..3326a9f7d6fd62a9394e434135615339a3c679f8 --- /dev/null +++ b/Pointcept/pointcept/models/point_transformer/point_transformer_partseg.py @@ -0,0 +1,374 @@ +""" +Point Transformer V1 for Part Segmentation + +Might be a bit different from the original paper + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch +import torch.nn as nn +import einops +import pointops + +from pointcept.models.builder import MODELS +from .utils import LayerNorm1d + + +class PointTransformerLayer(nn.Module): + def __init__(self, in_planes, out_planes, share_planes=8, nsample=16): + super().__init__() + self.mid_planes = mid_planes = out_planes // 1 + self.out_planes = out_planes + self.share_planes = share_planes + self.nsample = nsample + self.linear_q = nn.Linear(in_planes, mid_planes) + self.linear_k = nn.Linear(in_planes, mid_planes) + self.linear_v = nn.Linear(in_planes, out_planes) + self.linear_p = nn.Sequential( + nn.Linear(3, 3), + LayerNorm1d(3), + nn.ReLU(inplace=True), + nn.Linear(3, out_planes), + ) + self.linear_w = nn.Sequential( + LayerNorm1d(mid_planes), + nn.ReLU(inplace=True), + nn.Linear(mid_planes, out_planes // share_planes), + LayerNorm1d(out_planes // share_planes), + nn.ReLU(inplace=True), + nn.Linear(out_planes // share_planes, out_planes // share_planes), + ) + self.softmax = nn.Softmax(dim=1) + + def forward(self, pxo) -> torch.Tensor: + p, x, o = pxo # (n, 3), (n, c), (b) + x_q, x_k, x_v = self.linear_q(x), self.linear_k(x), self.linear_v(x) + x_k, idx = pointops.knn_query_and_group( + x_k, p, o, new_xyz=p, new_offset=o, nsample=self.nsample, with_xyz=True + ) + x_v, _ = pointops.knn_query_and_group( + x_v, + p, + o, + new_xyz=p, + new_offset=o, + idx=idx, + nsample=self.nsample, + with_xyz=False, + ) + p_r, x_k = x_k[:, :, 0:3], x_k[:, :, 3:] + p_r = self.linear_p(p_r) + r_qk = ( + x_k + - x_q.unsqueeze(1) + + einops.reduce( + p_r, "n ns (i j) -> n ns j", reduction="sum", j=self.mid_planes + ) + ) + w = self.linear_w(r_qk) # (n, nsample, c) + w = self.softmax(w) + x = torch.einsum( + "n t s i, n t i -> n s i", + einops.rearrange(x_v + p_r, "n ns (s i) -> n ns s i", s=self.share_planes), + w, + ) + x = einops.rearrange(x, "n s i -> n (s i)") + return x + + +class TransitionDown(nn.Module): + def __init__(self, in_planes, out_planes, stride=1, nsample=16): + super().__init__() + self.stride, self.nsample = stride, nsample + if stride != 1: + self.linear = nn.Linear(3 + in_planes, out_planes, bias=False) + self.pool = nn.MaxPool1d(nsample) + else: + self.linear = nn.Linear(in_planes, out_planes, bias=False) + self.bn = nn.BatchNorm1d(out_planes) + self.relu = nn.ReLU(inplace=True) + + def forward(self, pxo): + p, x, o = pxo # (n, 3), (n, c), (b) + if self.stride != 1: + n_o, count = [o[0].item() // self.stride], o[0].item() // self.stride + for i in range(1, o.shape[0]): + count += (o[i].item() - o[i - 1].item()) // self.stride + n_o.append(count) + n_o = torch.cuda.IntTensor(n_o) + idx = pointops.farthest_point_sampling(p, o, n_o) # (m) + n_p = p[idx.long(), :] # (m, 3) + x, _ = pointops.knn_query_and_group( + x, + p, + offset=o, + new_xyz=n_p, + new_offset=n_o, + nsample=self.nsample, + with_xyz=True, + ) + x = self.relu( + self.bn(self.linear(x).transpose(1, 2).contiguous()) + ) # (m, c, nsample) + x = self.pool(x).squeeze(-1) # (m, c) + p, o = n_p, n_o + else: + x = self.relu(self.bn(self.linear(x))) # (n, c) + return [p, x, o] + + +class TransitionUp(nn.Module): + def __init__(self, in_planes, out_planes=None, num_shape_class=None): + super().__init__() + if out_planes is None: + self.num_shape_class = num_shape_class + if num_shape_class is not None: + self.linear1 = nn.Sequential( + nn.Linear(2 * in_planes + 1024, in_planes), + nn.BatchNorm1d(in_planes), + nn.ReLU(inplace=True), + ) + else: + self.linear1 = nn.Sequential( + nn.Linear(2 * in_planes, in_planes), + nn.BatchNorm1d(in_planes), + nn.ReLU(inplace=True), + ) + + self.linear2 = nn.Sequential( + nn.Linear(in_planes, in_planes), nn.ReLU(inplace=True) + ) + if num_shape_class is not None: + self.linear3 = nn.Sequential( + nn.Linear(num_shape_class, 1024), nn.ReLU(inplace=True) + ) + else: + self.linear1 = nn.Sequential( + nn.Linear(out_planes, out_planes), + nn.BatchNorm1d(out_planes), + nn.ReLU(inplace=True), + ) + self.linear2 = nn.Sequential( + nn.Linear(in_planes, out_planes), + nn.BatchNorm1d(out_planes), + nn.ReLU(inplace=True), + ) + + def forward(self, pxo1, pxo2=None, y=None): + if pxo2 is None: + _, x, o = pxo1 # (n, 3), (n, c), (b) + x_tmp = [] + for i in range(o.shape[0]): + if i == 0: + s_i, e_i, cnt = 0, o[0], o[0] + else: + s_i, e_i, cnt = o[i - 1], o[i], o[i] - o[i - 1] + x_b = x[s_i:e_i, :] + y_b = y[i].unsqueeze(-1).unsqueeze(-1).long() + y_onehot = torch.zeros(1, self.num_shape_class).cuda() # (1, l) + y_onehot.scatter_(1, y_b, 1) # (1, l) + x_b = torch.cat( + ( + x_b, + self.linear2(x_b.sum(0, True) / cnt).repeat(cnt, 1), + self.linear3(y_onehot).repeat(cnt, 1), + ), + dim=1, + ) + x_tmp.append(x_b) + x = torch.cat(x_tmp, 0) + x = self.linear1(x) + else: + p1, x1, o1 = pxo1 + p2, x2, o2 = pxo2 + x = self.linear1(x1) + pointops.interpolation( + p2, p1, self.linear2(x2), o2, o1 + ) + return x + + +class Bottleneck(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, share_planes=8, nsample=16): + super(Bottleneck, self).__init__() + self.linear1 = nn.Linear(in_planes, planes, bias=False) + self.bn1 = nn.BatchNorm1d(planes) + self.transformer = PointTransformerLayer(planes, planes, share_planes, nsample) + self.bn2 = nn.BatchNorm1d(planes) + self.linear3 = nn.Linear(planes, planes * self.expansion, bias=False) + self.bn3 = nn.BatchNorm1d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + + def forward(self, pxo): + p, x, o = pxo # (n, 3), (n, c), (b) + identity = x + x = self.relu(self.bn1(self.linear1(x))) + x = self.relu(self.bn2(self.transformer([p, x, o]))) + x = self.bn3(self.linear3(x)) + x += identity + x = self.relu(x) + return [p, x, o] + + +class PointTransformerSeg(nn.Module): + def __init__( + self, block, blocks, in_channels=6, num_classes=50, num_shape_classes=None + ): + super().__init__() + self.in_channels = in_channels + self.num_classes = num_classes + self.num_shape_classes = num_shape_classes + self.in_planes, planes = in_channels, [32, 64, 128, 256, 512] + fpn_planes, fpnhead_planes, share_planes = 128, 64, 8 + stride, nsample = [1, 4, 4, 4, 4], [8, 16, 16, 16, 16] + self.enc1 = self._make_enc( + block, + planes[0], + blocks[0], + share_planes, + stride=stride[0], + nsample=nsample[0], + ) # N/1 + self.enc2 = self._make_enc( + block, + planes[1], + blocks[1], + share_planes, + stride=stride[1], + nsample=nsample[1], + ) # N/4 + self.enc3 = self._make_enc( + block, + planes[2], + blocks[2], + share_planes, + stride=stride[2], + nsample=nsample[2], + ) # N/16 + self.enc4 = self._make_enc( + block, + planes[3], + blocks[3], + share_planes, + stride=stride[3], + nsample=nsample[3], + ) # N/64 + self.enc5 = self._make_enc( + block, + planes[4], + blocks[4], + share_planes, + stride=stride[4], + nsample=nsample[4], + ) # N/256 + self.dec5 = self._make_dec( + block, + planes[4], + 1, + share_planes, + num_shape_classes=num_shape_classes, + nsample=nsample[4], + is_head=True, + ) # transform p5 + self.dec4 = self._make_dec( + block, planes[3], 1, share_planes, nsample=nsample[3] + ) # fusion p5 and p4 + self.dec3 = self._make_dec( + block, planes[2], 1, share_planes, nsample=nsample[2] + ) # fusion p4 and p3 + self.dec2 = self._make_dec( + block, planes[1], 1, share_planes, nsample=nsample[1] + ) # fusion p3 and p2 + self.dec1 = self._make_dec( + block, planes[0], 1, share_planes, nsample=nsample[0] + ) # fusion p2 and p1 + self.cls = nn.Sequential( + nn.Linear(planes[0], planes[0]), + nn.BatchNorm1d(planes[0]), + nn.ReLU(inplace=True), + nn.Linear(planes[0], num_classes), + ) + + def _make_enc(self, block, planes, blocks, share_planes=8, stride=1, nsample=16): + layers = [ + TransitionDown(self.in_planes, planes * block.expansion, stride, nsample) + ] + self.in_planes = planes * block.expansion + for _ in range(blocks): + layers.append( + block(self.in_planes, self.in_planes, share_planes, nsample=nsample) + ) + return nn.Sequential(*layers) + + def _make_dec( + self, + block, + planes, + blocks, + share_planes=8, + num_shape_classes=None, + nsample=16, + is_head=False, + ): + layers = [ + TransitionUp( + self.in_planes, + None if is_head else planes * block.expansion, + num_shape_classes, + ) + ] + self.in_planes = planes * block.expansion + for _ in range(blocks): + layers.append( + block(self.in_planes, self.in_planes, share_planes, nsample=nsample) + ) + return nn.Sequential(*layers) + + def forward(self, data_dict): + p0 = data_dict["coord"] + x0 = data_dict["feat"] + o0 = data_dict["offset"].int() + if self.num_shape_classes is not None: + y = data_dict["cls_token"] + p1, x1, o1 = self.enc1([p0, x0, o0]) + p2, x2, o2 = self.enc2([p1, x1, o1]) + p3, x3, o3 = self.enc3([p2, x2, o2]) + p4, x4, o4 = self.enc4([p3, x3, o3]) + p5, x5, o5 = self.enc5([p4, x4, o4]) + if self.num_shape_classes is not None: + x5 = self.dec5[1:]([p5, self.dec5[0]([p5, x5, o5], y=y), o5])[1] + else: + x5 = self.dec5[1:]([p5, self.dec5[0]([p5, x5, o5]), o5])[1] + x4 = self.dec4[1:]([p4, self.dec4[0]([p4, x4, o4], [p5, x5, o5]), o4])[1] + x3 = self.dec3[1:]([p3, self.dec3[0]([p3, x3, o3], [p4, x4, o4]), o3])[1] + x2 = self.dec2[1:]([p2, self.dec2[0]([p2, x2, o2], [p3, x3, o3]), o2])[1] + x1 = self.dec1[1:]([p1, self.dec1[0]([p1, x1, o1], [p2, x2, o2]), o1])[1] + x = self.cls(x1) + return x + + +@MODELS.register_module("PointTransformer-PartSeg26") +class PointTransformerSeg26(PointTransformerSeg): + def __init__(self, **kwargs): + super(PointTransformerSeg26, self).__init__( + Bottleneck, [1, 1, 1, 1, 1], **kwargs + ) + + +@MODELS.register_module("PointTransformer-PartSeg38") +class PointTransformerSeg38(PointTransformerSeg): + def __init__(self, **kwargs): + super(PointTransformerSeg38, self).__init__( + Bottleneck, [1, 2, 2, 2, 2], **kwargs + ) + + +@MODELS.register_module("PointTransformer-PartSeg50") +class PointTransformerSeg50(PointTransformerSeg): + def __init__(self, **kwargs): + super(PointTransformerSeg50, self).__init__( + Bottleneck, [1, 2, 3, 5, 2], **kwargs + ) diff --git a/Pointcept/pointcept/models/point_transformer/point_transformer_seg.py b/Pointcept/pointcept/models/point_transformer/point_transformer_seg.py new file mode 100644 index 0000000000000000000000000000000000000000..248cacad1ade65e48fa4686560fb40617a0ea449 --- /dev/null +++ b/Pointcept/pointcept/models/point_transformer/point_transformer_seg.py @@ -0,0 +1,327 @@ +""" +Point Transformer V1 for Semantic Segmentation + +Might be a bit different from the original paper + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch +import torch.nn as nn +import einops +import pointops + +from pointcept.models.builder import MODELS +from .utils import LayerNorm1d + + +class PointTransformerLayer(nn.Module): + def __init__(self, in_planes, out_planes, share_planes=8, nsample=16): + super().__init__() + self.mid_planes = mid_planes = out_planes // 1 + self.out_planes = out_planes + self.share_planes = share_planes + self.nsample = nsample + self.linear_q = nn.Linear(in_planes, mid_planes) + self.linear_k = nn.Linear(in_planes, mid_planes) + self.linear_v = nn.Linear(in_planes, out_planes) + self.linear_p = nn.Sequential( + nn.Linear(3, 3), + LayerNorm1d(3), + nn.ReLU(inplace=True), + nn.Linear(3, out_planes), + ) + self.linear_w = nn.Sequential( + LayerNorm1d(mid_planes), + nn.ReLU(inplace=True), + nn.Linear(mid_planes, out_planes // share_planes), + LayerNorm1d(out_planes // share_planes), + nn.ReLU(inplace=True), + nn.Linear(out_planes // share_planes, out_planes // share_planes), + ) + self.softmax = nn.Softmax(dim=1) + + def forward(self, pxo) -> torch.Tensor: + p, x, o = pxo # (n, 3), (n, c), (b) + x_q, x_k, x_v = self.linear_q(x), self.linear_k(x), self.linear_v(x) + x_k, idx = pointops.knn_query_and_group( + x_k, p, o, new_xyz=p, new_offset=o, nsample=self.nsample, with_xyz=True + ) + x_v, _ = pointops.knn_query_and_group( + x_v, + p, + o, + new_xyz=p, + new_offset=o, + idx=idx, + nsample=self.nsample, + with_xyz=False, + ) + p_r, x_k = x_k[:, :, 0:3], x_k[:, :, 3:] + p_r = self.linear_p(p_r) + r_qk = ( + x_k + - x_q.unsqueeze(1) + + einops.reduce( + p_r, "n ns (i j) -> n ns j", reduction="sum", j=self.mid_planes + ) + ) + w = self.linear_w(r_qk) # (n, nsample, c) + w = self.softmax(w) + x = torch.einsum( + "n t s i, n t i -> n s i", + einops.rearrange(x_v + p_r, "n ns (s i) -> n ns s i", s=self.share_planes), + w, + ) + x = einops.rearrange(x, "n s i -> n (s i)") + return x + + +class TransitionDown(nn.Module): + def __init__(self, in_planes, out_planes, stride=1, nsample=16): + super().__init__() + self.stride, self.nsample = stride, nsample + if stride != 1: + self.linear = nn.Linear(3 + in_planes, out_planes, bias=False) + self.pool = nn.MaxPool1d(nsample) + else: + self.linear = nn.Linear(in_planes, out_planes, bias=False) + self.bn = nn.BatchNorm1d(out_planes) + self.relu = nn.ReLU(inplace=True) + + def forward(self, pxo): + p, x, o = pxo # (n, 3), (n, c), (b) + if self.stride != 1: + n_o, count = [o[0].item() // self.stride], o[0].item() // self.stride + for i in range(1, o.shape[0]): + count += (o[i].item() - o[i - 1].item()) // self.stride + n_o.append(count) + n_o = torch.cuda.IntTensor(n_o) + idx = pointops.farthest_point_sampling(p, o, n_o) # (m) + n_p = p[idx.long(), :] # (m, 3) + x, _ = pointops.knn_query_and_group( + x, + p, + offset=o, + new_xyz=n_p, + new_offset=n_o, + nsample=self.nsample, + with_xyz=True, + ) + x = self.relu( + self.bn(self.linear(x).transpose(1, 2).contiguous()) + ) # (m, c, nsample) + x = self.pool(x).squeeze(-1) # (m, c) + p, o = n_p, n_o + else: + x = self.relu(self.bn(self.linear(x))) # (n, c) + return [p, x, o] + + +class TransitionUp(nn.Module): + def __init__(self, in_planes, out_planes=None): + super().__init__() + if out_planes is None: + self.linear1 = nn.Sequential( + nn.Linear(2 * in_planes, in_planes), + nn.BatchNorm1d(in_planes), + nn.ReLU(inplace=True), + ) + self.linear2 = nn.Sequential( + nn.Linear(in_planes, in_planes), nn.ReLU(inplace=True) + ) + else: + self.linear1 = nn.Sequential( + nn.Linear(out_planes, out_planes), + nn.BatchNorm1d(out_planes), + nn.ReLU(inplace=True), + ) + self.linear2 = nn.Sequential( + nn.Linear(in_planes, out_planes), + nn.BatchNorm1d(out_planes), + nn.ReLU(inplace=True), + ) + + def forward(self, pxo1, pxo2=None): + if pxo2 is None: + _, x, o = pxo1 # (n, 3), (n, c), (b) + x_tmp = [] + for i in range(o.shape[0]): + if i == 0: + s_i, e_i, cnt = 0, o[0], o[0] + else: + s_i, e_i, cnt = o[i - 1], o[i], o[i] - o[i - 1] + x_b = x[s_i:e_i, :] + x_b = torch.cat( + (x_b, self.linear2(x_b.sum(0, True) / cnt).repeat(cnt, 1)), 1 + ) + x_tmp.append(x_b) + x = torch.cat(x_tmp, 0) + x = self.linear1(x) + else: + p1, x1, o1 = pxo1 + p2, x2, o2 = pxo2 + x = self.linear1(x1) + pointops.interpolation( + p2, p1, self.linear2(x2), o2, o1 + ) + return x + + +class Bottleneck(nn.Module): + expansion = 1 + + def __init__(self, in_planes, planes, share_planes=8, nsample=16): + super(Bottleneck, self).__init__() + self.linear1 = nn.Linear(in_planes, planes, bias=False) + self.bn1 = nn.BatchNorm1d(planes) + self.transformer = PointTransformerLayer(planes, planes, share_planes, nsample) + self.bn2 = nn.BatchNorm1d(planes) + self.linear3 = nn.Linear(planes, planes * self.expansion, bias=False) + self.bn3 = nn.BatchNorm1d(planes * self.expansion) + self.relu = nn.ReLU(inplace=True) + + def forward(self, pxo): + p, x, o = pxo # (n, 3), (n, c), (b) + identity = x + x = self.relu(self.bn1(self.linear1(x))) + x = self.relu(self.bn2(self.transformer([p, x, o]))) + x = self.bn3(self.linear3(x)) + x += identity + x = self.relu(x) + return [p, x, o] + + +class PointTransformerSeg(nn.Module): + def __init__(self, block, blocks, in_channels=6, num_classes=13): + super().__init__() + self.in_channels = in_channels + self.in_planes, planes = in_channels, [32, 64, 128, 256, 512] + fpn_planes, fpnhead_planes, share_planes = 128, 64, 8 + stride, nsample = [1, 4, 4, 4, 4], [8, 16, 16, 16, 16] + self.enc1 = self._make_enc( + block, + planes[0], + blocks[0], + share_planes, + stride=stride[0], + nsample=nsample[0], + ) # N/1 + self.enc2 = self._make_enc( + block, + planes[1], + blocks[1], + share_planes, + stride=stride[1], + nsample=nsample[1], + ) # N/4 + self.enc3 = self._make_enc( + block, + planes[2], + blocks[2], + share_planes, + stride=stride[2], + nsample=nsample[2], + ) # N/16 + self.enc4 = self._make_enc( + block, + planes[3], + blocks[3], + share_planes, + stride=stride[3], + nsample=nsample[3], + ) # N/64 + self.enc5 = self._make_enc( + block, + planes[4], + blocks[4], + share_planes, + stride=stride[4], + nsample=nsample[4], + ) # N/256 + self.dec5 = self._make_dec( + block, planes[4], 1, share_planes, nsample=nsample[4], is_head=True + ) # transform p5 + self.dec4 = self._make_dec( + block, planes[3], 1, share_planes, nsample=nsample[3] + ) # fusion p5 and p4 + self.dec3 = self._make_dec( + block, planes[2], 1, share_planes, nsample=nsample[2] + ) # fusion p4 and p3 + self.dec2 = self._make_dec( + block, planes[1], 1, share_planes, nsample=nsample[1] + ) # fusion p3 and p2 + self.dec1 = self._make_dec( + block, planes[0], 1, share_planes, nsample=nsample[0] + ) # fusion p2 and p1 + self.cls = nn.Sequential( + nn.Linear(planes[0], planes[0]), + nn.BatchNorm1d(planes[0]), + nn.ReLU(inplace=True), + nn.Linear(planes[0], num_classes), + ) + + def _make_enc(self, block, planes, blocks, share_planes=8, stride=1, nsample=16): + layers = [ + TransitionDown(self.in_planes, planes * block.expansion, stride, nsample) + ] + self.in_planes = planes * block.expansion + for _ in range(blocks): + layers.append( + block(self.in_planes, self.in_planes, share_planes, nsample=nsample) + ) + return nn.Sequential(*layers) + + def _make_dec( + self, block, planes, blocks, share_planes=8, nsample=16, is_head=False + ): + layers = [ + TransitionUp(self.in_planes, None if is_head else planes * block.expansion) + ] + self.in_planes = planes * block.expansion + for _ in range(blocks): + layers.append( + block(self.in_planes, self.in_planes, share_planes, nsample=nsample) + ) + return nn.Sequential(*layers) + + def forward(self, data_dict): + p0 = data_dict["coord"] + x0 = data_dict["feat"] + o0 = data_dict["offset"].int() + p1, x1, o1 = self.enc1([p0, x0, o0]) + p2, x2, o2 = self.enc2([p1, x1, o1]) + p3, x3, o3 = self.enc3([p2, x2, o2]) + p4, x4, o4 = self.enc4([p3, x3, o3]) + p5, x5, o5 = self.enc5([p4, x4, o4]) + x5 = self.dec5[1:]([p5, self.dec5[0]([p5, x5, o5]), o5])[1] + x4 = self.dec4[1:]([p4, self.dec4[0]([p4, x4, o4], [p5, x5, o5]), o4])[1] + x3 = self.dec3[1:]([p3, self.dec3[0]([p3, x3, o3], [p4, x4, o4]), o3])[1] + x2 = self.dec2[1:]([p2, self.dec2[0]([p2, x2, o2], [p3, x3, o3]), o2])[1] + x1 = self.dec1[1:]([p1, self.dec1[0]([p1, x1, o1], [p2, x2, o2]), o1])[1] + x = self.cls(x1) + return x + + +@MODELS.register_module("PointTransformer-Seg26") +class PointTransformerSeg26(PointTransformerSeg): + def __init__(self, **kwargs): + super(PointTransformerSeg26, self).__init__( + Bottleneck, [1, 1, 1, 1, 1], **kwargs + ) + + +@MODELS.register_module("PointTransformer-Seg38") +class PointTransformerSeg38(PointTransformerSeg): + def __init__(self, **kwargs): + super(PointTransformerSeg38, self).__init__( + Bottleneck, [1, 2, 2, 2, 2], **kwargs + ) + + +@MODELS.register_module("PointTransformer-Seg50") +class PointTransformerSeg50(PointTransformerSeg): + def __init__(self, **kwargs): + super(PointTransformerSeg50, self).__init__( + Bottleneck, [1, 2, 3, 5, 2], **kwargs + ) diff --git a/Pointcept/pointcept/models/point_transformer/utils.py b/Pointcept/pointcept/models/point_transformer/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c5687701835bb1f8a8936ea5ae5d52285567dc77 --- /dev/null +++ b/Pointcept/pointcept/models/point_transformer/utils.py @@ -0,0 +1,14 @@ +import torch +import torch.nn as nn + +torch.nn.LayerNorm + + +class LayerNorm1d(nn.BatchNorm1d): + def forward(self, input: torch.Tensor) -> torch.Tensor: + return ( + super() + .forward(input.transpose(1, 2).contiguous()) + .transpose(1, 2) + .contiguous() + ) diff --git a/Pointcept/pointcept/models/point_transformer_v2/__init__.py b/Pointcept/pointcept/models/point_transformer_v2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e9689fa2518b599bc6f94e6f8d0ea461859b8909 --- /dev/null +++ b/Pointcept/pointcept/models/point_transformer_v2/__init__.py @@ -0,0 +1,10 @@ +""" +Point Transformer V2 + +Copyright (c) Xiaoyang Wu (xiaoyang.wu@connect.hku.hk). All Rights Reserved. +Please cite our work if you use any part of the code. +""" + +from .point_transformer_v2m1_origin import * +from .point_transformer_v2m2_base import * +from .point_transformer_v2m3_pdnorm import * diff --git a/Pointcept/pointcept/models/point_transformer_v2/point_transformer_v2m1_origin.py b/Pointcept/pointcept/models/point_transformer_v2/point_transformer_v2m1_origin.py new file mode 100644 index 0000000000000000000000000000000000000000..b325d9eb7d5e1507ce62d5cbf60bb000cf83acbc --- /dev/null +++ b/Pointcept/pointcept/models/point_transformer_v2/point_transformer_v2m1_origin.py @@ -0,0 +1,614 @@ +""" +Point Transformer V2 mode 1 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from copy import deepcopy +import math +import torch +import torch.nn as nn +from torch.utils.checkpoint import checkpoint +from torch_geometric.nn.pool import voxel_grid +from torch_scatter import segment_csr + +import einops +from timm.models.layers import DropPath +import pointops + +from pointcept.models.builder import MODELS +from pointcept.models.utils import offset2batch, batch2offset + + +class GroupedLinear(nn.Module): + __constants__ = ["in_features", "out_features", "groups"] + in_features: int + out_features: int + groups: int + weight: torch.Tensor + + def __init__( + self, in_features: int, out_features: int, groups: int, device=None, dtype=None + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + super(GroupedLinear, self).__init__() + self.in_features = in_features + self.out_features = out_features + self.groups = groups + assert in_features & groups == 0 + assert out_features % groups == 0 + # for convenient, currently only support out_features == groups, one output + assert out_features == groups + self.weight = nn.Parameter(torch.empty((1, in_features), **factory_kwargs)) + self.reset_parameters() + + def reset_parameters(self) -> None: + nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5)) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + return ( + (input * self.weight) + .reshape( + list(input.shape[:-1]) + [self.groups, input.shape[-1] // self.groups] + ) + .sum(-1) + ) + + def extra_repr(self) -> str: + return "in_features={}, out_features={}, bias={}".format( + self.in_features, self.out_features, self.bias is not None + ) + + +class PointBatchNorm(nn.Module): + """ + Batch Normalization for Point Clouds data in shape of [B*N, C], [B*N, L, C] + """ + + def __init__(self, embed_channels): + super().__init__() + self.norm = nn.BatchNorm1d(embed_channels) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + if input.dim() == 3: + return ( + self.norm(input.transpose(1, 2).contiguous()) + .transpose(1, 2) + .contiguous() + ) + elif input.dim() == 2: + return self.norm(input) + else: + raise NotImplementedError + + +class GroupedVectorAttention(nn.Module): + def __init__( + self, + embed_channels, + groups, + attn_drop_rate=0.0, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + ): + super(GroupedVectorAttention, self).__init__() + self.embed_channels = embed_channels + self.groups = groups + assert embed_channels % groups == 0 + self.attn_drop_rate = attn_drop_rate + self.qkv_bias = qkv_bias + self.pe_multiplier = pe_multiplier + self.pe_bias = pe_bias + + self.linear_q = nn.Sequential( + nn.Linear(embed_channels, embed_channels, bias=qkv_bias), + PointBatchNorm(embed_channels), + nn.ReLU(inplace=True), + ) + self.linear_k = nn.Sequential( + nn.Linear(embed_channels, embed_channels, bias=qkv_bias), + PointBatchNorm(embed_channels), + nn.ReLU(inplace=True), + ) + + self.linear_v = nn.Linear(embed_channels, embed_channels, bias=qkv_bias) + + if self.pe_multiplier: + self.linear_p_multiplier = nn.Sequential( + nn.Linear(3, embed_channels), + PointBatchNorm(embed_channels), + nn.ReLU(inplace=True), + nn.Linear(embed_channels, embed_channels), + ) + if self.pe_bias: + self.linear_p_bias = nn.Sequential( + nn.Linear(3, embed_channels), + PointBatchNorm(embed_channels), + nn.ReLU(inplace=True), + nn.Linear(embed_channels, embed_channels), + ) + self.weight_encoding = nn.Sequential( + GroupedLinear(embed_channels, groups, groups), + PointBatchNorm(groups), + nn.ReLU(inplace=True), + nn.Linear(groups, groups), + ) + self.softmax = nn.Softmax(dim=1) + self.attn_drop = nn.Dropout(attn_drop_rate) + + def forward(self, feat, coord, reference_index): + query, key, value = ( + self.linear_q(feat), + self.linear_k(feat), + self.linear_v(feat), + ) + key = pointops.grouping(reference_index, key, coord, with_xyz=True) + value = pointops.grouping(reference_index, value, coord, with_xyz=False) + pos, key = key[:, :, 0:3], key[:, :, 3:] + relation_qk = key - query.unsqueeze(1) + if self.pe_multiplier: + pem = self.linear_p_multiplier(pos) + relation_qk = relation_qk * pem + if self.pe_bias: + peb = self.linear_p_bias(pos) + relation_qk = relation_qk + peb + value = value + peb + + weight = self.weight_encoding(relation_qk) + weight = self.attn_drop(self.softmax(weight)) + + mask = torch.sign(reference_index + 1) + weight = torch.einsum("n s g, n s -> n s g", weight, mask) + value = einops.rearrange(value, "n ns (g i) -> n ns g i", g=self.groups) + feat = torch.einsum("n s g i, n s g -> n g i", value, weight) + feat = einops.rearrange(feat, "n g i -> n (g i)") + return feat + + +class Block(nn.Module): + def __init__( + self, + embed_channels, + groups, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.0, + enable_checkpoint=False, + ): + super(Block, self).__init__() + self.attn = GroupedVectorAttention( + embed_channels=embed_channels, + groups=groups, + qkv_bias=qkv_bias, + attn_drop_rate=attn_drop_rate, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + ) + self.fc1 = nn.Linear(embed_channels, embed_channels, bias=False) + self.fc3 = nn.Linear(embed_channels, embed_channels, bias=False) + self.norm1 = PointBatchNorm(embed_channels) + self.norm2 = PointBatchNorm(embed_channels) + self.norm3 = PointBatchNorm(embed_channels) + self.act = nn.ReLU(inplace=True) + self.enable_checkpoint = enable_checkpoint + self.drop_path = ( + DropPath(drop_path_rate) if drop_path_rate > 0.0 else nn.Identity() + ) + + def forward(self, points, reference_index): + coord, feat, offset = points + identity = feat + feat = self.act(self.norm1(self.fc1(feat))) + feat = ( + self.attn(feat, coord, reference_index) + if not self.enable_checkpoint + else checkpoint(self.attn, feat, coord, reference_index) + ) + feat = self.act(self.norm2(feat)) + feat = self.norm3(self.fc3(feat)) + feat = identity + self.drop_path(feat) + feat = self.act(feat) + return [coord, feat, offset] + + +class BlockSequence(nn.Module): + def __init__( + self, + depth, + embed_channels, + groups, + neighbours=16, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.0, + enable_checkpoint=False, + ): + super(BlockSequence, self).__init__() + + if isinstance(drop_path_rate, list): + drop_path_rates = drop_path_rate + assert len(drop_path_rates) == depth + elif isinstance(drop_path_rate, float): + drop_path_rates = [deepcopy(drop_path_rate) for _ in range(depth)] + else: + drop_path_rates = [0.0 for _ in range(depth)] + + self.neighbours = neighbours + self.blocks = nn.ModuleList() + for i in range(depth): + block = Block( + embed_channels=embed_channels, + groups=groups, + qkv_bias=qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + drop_path_rate=drop_path_rates[i], + enable_checkpoint=enable_checkpoint, + ) + self.blocks.append(block) + + def forward(self, points): + coord, feat, offset = points + # reference index query of neighbourhood attention + # for windows attention, modify reference index query method + reference_index, _ = pointops.knn_query(self.neighbours, coord, offset) + for block in self.blocks: + points = block(points, reference_index) + return points + + +class GridPool(nn.Module): + """ + Partition-based Pooling (Grid Pooling) + """ + + def __init__(self, in_channels, out_channels, grid_size, bias=False): + super(GridPool, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.grid_size = grid_size + + self.fc = nn.Linear(in_channels, out_channels, bias=bias) + self.norm = PointBatchNorm(out_channels) + self.act = nn.ReLU(inplace=True) + + def forward(self, points, start=None): + coord, feat, offset = points + batch = offset2batch(offset) + feat = self.act(self.norm(self.fc(feat))) + start = ( + segment_csr( + coord, + torch.cat([batch.new_zeros(1), torch.cumsum(batch.bincount(), dim=0)]), + reduce="min", + ) + if start is None + else start + ) + cluster = voxel_grid( + pos=coord - start[batch], size=self.grid_size, batch=batch, start=0 + ) + unique, cluster, counts = torch.unique( + cluster, sorted=True, return_inverse=True, return_counts=True + ) + _, sorted_cluster_indices = torch.sort(cluster) + idx_ptr = torch.cat([counts.new_zeros(1), torch.cumsum(counts, dim=0)]) + coord = segment_csr(coord[sorted_cluster_indices], idx_ptr, reduce="mean") + feat = segment_csr(feat[sorted_cluster_indices], idx_ptr, reduce="max") + batch = batch[idx_ptr[:-1]] + offset = batch2offset(batch) + return [coord, feat, offset], cluster + + +class UnpoolWithSkip(nn.Module): + """ + Map Unpooling with skip connection + """ + + def __init__( + self, + in_channels, + skip_channels, + out_channels, + bias=True, + skip=True, + backend="map", + ): + super(UnpoolWithSkip, self).__init__() + self.in_channels = in_channels + self.skip_channels = skip_channels + self.out_channels = out_channels + self.skip = skip + self.backend = backend + assert self.backend in ["map", "interp"] + + self.proj = nn.Sequential( + nn.Linear(in_channels, out_channels, bias=bias), + PointBatchNorm(out_channels), + nn.ReLU(inplace=True), + ) + self.proj_skip = nn.Sequential( + nn.Linear(skip_channels, out_channels, bias=bias), + PointBatchNorm(out_channels), + nn.ReLU(inplace=True), + ) + + def forward(self, points, skip_points, cluster=None): + coord, feat, offset = points + skip_coord, skip_feat, skip_offset = skip_points + if self.backend == "map" and cluster is not None: + feat = self.proj(feat)[cluster] + else: + feat = pointops.interpolation( + coord, skip_coord, self.proj(feat), offset, skip_offset + ) + if self.skip: + feat = feat + self.proj_skip(skip_feat) + return [skip_coord, feat, skip_offset] + + +class Encoder(nn.Module): + def __init__( + self, + depth, + in_channels, + embed_channels, + groups, + grid_size=None, + neighbours=16, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=None, + drop_path_rate=None, + enable_checkpoint=False, + ): + super(Encoder, self).__init__() + + self.down = GridPool( + in_channels=in_channels, + out_channels=embed_channels, + grid_size=grid_size, + ) + + self.blocks = BlockSequence( + depth=depth, + embed_channels=embed_channels, + groups=groups, + neighbours=neighbours, + qkv_bias=qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate if attn_drop_rate is not None else 0.0, + drop_path_rate=drop_path_rate if drop_path_rate is not None else 0.0, + enable_checkpoint=enable_checkpoint, + ) + + def forward(self, points): + points, cluster = self.down(points) + return self.blocks(points), cluster + + +class Decoder(nn.Module): + def __init__( + self, + in_channels, + skip_channels, + embed_channels, + groups, + depth, + neighbours=16, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=None, + drop_path_rate=None, + enable_checkpoint=False, + unpool_backend="map", + ): + super(Decoder, self).__init__() + + self.up = UnpoolWithSkip( + in_channels=in_channels, + out_channels=embed_channels, + skip_channels=skip_channels, + backend=unpool_backend, + ) + + self.blocks = BlockSequence( + depth=depth, + embed_channels=embed_channels, + groups=groups, + neighbours=neighbours, + qkv_bias=qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate if attn_drop_rate is not None else 0.0, + drop_path_rate=drop_path_rate if drop_path_rate is not None else 0.0, + enable_checkpoint=enable_checkpoint, + ) + + def forward(self, points, skip_points, cluster): + points = self.up(points, skip_points, cluster) + return self.blocks(points) + + +class GVAPatchEmbed(nn.Module): + def __init__( + self, + depth, + in_channels, + embed_channels, + groups, + neighbours=16, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.0, + enable_checkpoint=False, + ): + super(GVAPatchEmbed, self).__init__() + self.in_channels = in_channels + self.embed_channels = embed_channels + self.proj = nn.Sequential( + nn.Linear(in_channels, embed_channels, bias=False), + PointBatchNorm(embed_channels), + nn.ReLU(inplace=True), + ) + self.blocks = BlockSequence( + depth=depth, + embed_channels=embed_channels, + groups=groups, + neighbours=neighbours, + qkv_bias=qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + drop_path_rate=drop_path_rate, + enable_checkpoint=enable_checkpoint, + ) + + def forward(self, points): + coord, feat, offset = points + feat = self.proj(feat) + return self.blocks([coord, feat, offset]) + + +@MODELS.register_module("PT-v2m1") +class PointTransformerV2(nn.Module): + def __init__( + self, + in_channels, + num_classes, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.12, 0.24, 0.48), + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0, + enable_checkpoint=False, + unpool_backend="map", + ): + super(PointTransformerV2, self).__init__() + self.in_channels = in_channels + self.num_classes = num_classes + self.num_stages = len(enc_depths) + assert self.num_stages == len(dec_depths) + assert self.num_stages == len(enc_channels) + assert self.num_stages == len(dec_channels) + assert self.num_stages == len(enc_groups) + assert self.num_stages == len(dec_groups) + assert self.num_stages == len(enc_neighbours) + assert self.num_stages == len(dec_neighbours) + assert self.num_stages == len(grid_sizes) + self.patch_embed = GVAPatchEmbed( + in_channels=in_channels, + embed_channels=patch_embed_channels, + groups=patch_embed_groups, + depth=patch_embed_depth, + neighbours=patch_embed_neighbours, + qkv_bias=attn_qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + enable_checkpoint=enable_checkpoint, + ) + + enc_dp_rates = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(enc_depths)) + ] + dec_dp_rates = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(dec_depths)) + ] + enc_channels = [patch_embed_channels] + list(enc_channels) + dec_channels = list(dec_channels) + [enc_channels[-1]] + self.enc_stages = nn.ModuleList() + self.dec_stages = nn.ModuleList() + for i in range(self.num_stages): + enc = Encoder( + depth=enc_depths[i], + in_channels=enc_channels[i], + embed_channels=enc_channels[i + 1], + groups=enc_groups[i], + grid_size=grid_sizes[i], + neighbours=enc_neighbours[i], + qkv_bias=attn_qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + drop_path_rate=enc_dp_rates[ + sum(enc_depths[:i]) : sum(enc_depths[: i + 1]) + ], + enable_checkpoint=enable_checkpoint, + ) + dec = Decoder( + depth=dec_depths[i], + in_channels=dec_channels[i + 1], + skip_channels=enc_channels[i], + embed_channels=dec_channels[i], + groups=dec_groups[i], + neighbours=dec_neighbours[i], + qkv_bias=attn_qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + drop_path_rate=dec_dp_rates[ + sum(dec_depths[:i]) : sum(dec_depths[: i + 1]) + ], + enable_checkpoint=enable_checkpoint, + unpool_backend=unpool_backend, + ) + self.enc_stages.append(enc) + self.dec_stages.append(dec) + self.seg_head = ( + nn.Sequential( + nn.Linear(dec_channels[0], dec_channels[0]), + PointBatchNorm(dec_channels[0]), + nn.ReLU(inplace=True), + nn.Linear(dec_channels[0], num_classes), + ) + if num_classes > 0 + else nn.Identity() + ) + + def forward(self, data_dict): + coord = data_dict["coord"] + feat = data_dict["feat"] + offset = data_dict["offset"].int() + + # a batch of point cloud is a list of coord, feat and offset + points = [coord, feat, offset] + points = self.patch_embed(points) + skips = [[points]] + for i in range(self.num_stages): + points, cluster = self.enc_stages[i](points) + skips[-1].append(cluster) # record grid cluster of pooling + skips.append([points]) # record points info of current stage + + points = skips.pop(-1)[0] # unpooling points info in the last enc stage + for i in reversed(range(self.num_stages)): + skip_points, cluster = skips.pop(-1) + points = self.dec_stages[i](points, skip_points, cluster) + coord, feat, offset = points + seg_logits = self.seg_head(feat) + return seg_logits diff --git a/Pointcept/pointcept/models/point_transformer_v2/point_transformer_v2m2_base.py b/Pointcept/pointcept/models/point_transformer_v2/point_transformer_v2m2_base.py new file mode 100644 index 0000000000000000000000000000000000000000..dec45ff92504cf184505d0cad96d30346613df46 --- /dev/null +++ b/Pointcept/pointcept/models/point_transformer_v2/point_transformer_v2m2_base.py @@ -0,0 +1,576 @@ +""" +Point Transformer V2 Mode 2 (recommend) + +Disable Grouped Linear + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from copy import deepcopy +import math +import torch +import torch.nn as nn +from torch.utils.checkpoint import checkpoint +from torch_geometric.nn.pool import voxel_grid +from torch_scatter import segment_csr + +import einops +from timm.models.layers import DropPath +import pointops + +from pointcept.models.builder import MODELS +from pointcept.models.utils import offset2batch, batch2offset + + +class PointBatchNorm(nn.Module): + """ + Batch Normalization for Point Clouds data in shape of [B*N, C], [B*N, L, C] + """ + + def __init__(self, embed_channels): + super().__init__() + self.norm = nn.BatchNorm1d(embed_channels) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + if input.dim() == 3: + return ( + self.norm(input.transpose(1, 2).contiguous()) + .transpose(1, 2) + .contiguous() + ) + elif input.dim() == 2: + return self.norm(input) + else: + raise NotImplementedError + + +class GroupedVectorAttention(nn.Module): + def __init__( + self, + embed_channels, + groups, + attn_drop_rate=0.0, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + ): + super(GroupedVectorAttention, self).__init__() + self.embed_channels = embed_channels + self.groups = groups + assert embed_channels % groups == 0 + self.attn_drop_rate = attn_drop_rate + self.qkv_bias = qkv_bias + self.pe_multiplier = pe_multiplier + self.pe_bias = pe_bias + + self.linear_q = nn.Sequential( + nn.Linear(embed_channels, embed_channels, bias=qkv_bias), + PointBatchNorm(embed_channels), + nn.ReLU(inplace=True), + ) + self.linear_k = nn.Sequential( + nn.Linear(embed_channels, embed_channels, bias=qkv_bias), + PointBatchNorm(embed_channels), + nn.ReLU(inplace=True), + ) + + self.linear_v = nn.Linear(embed_channels, embed_channels, bias=qkv_bias) + + if self.pe_multiplier: + self.linear_p_multiplier = nn.Sequential( + nn.Linear(3, embed_channels), + PointBatchNorm(embed_channels), + nn.ReLU(inplace=True), + nn.Linear(embed_channels, embed_channels), + ) + if self.pe_bias: + self.linear_p_bias = nn.Sequential( + nn.Linear(3, embed_channels), + PointBatchNorm(embed_channels), + nn.ReLU(inplace=True), + nn.Linear(embed_channels, embed_channels), + ) + self.weight_encoding = nn.Sequential( + nn.Linear(embed_channels, groups), + PointBatchNorm(groups), + nn.ReLU(inplace=True), + nn.Linear(groups, groups), + ) + self.softmax = nn.Softmax(dim=1) + self.attn_drop = nn.Dropout(attn_drop_rate) + + def forward(self, feat, coord, reference_index): + query, key, value = ( + self.linear_q(feat), + self.linear_k(feat), + self.linear_v(feat), + ) + key = pointops.grouping(reference_index, key, coord, with_xyz=True) + value = pointops.grouping(reference_index, value, coord, with_xyz=False) + pos, key = key[:, :, 0:3], key[:, :, 3:] + relation_qk = key - query.unsqueeze(1) + if self.pe_multiplier: + pem = self.linear_p_multiplier(pos) + relation_qk = relation_qk * pem + if self.pe_bias: + peb = self.linear_p_bias(pos) + relation_qk = relation_qk + peb + value = value + peb + + weight = self.weight_encoding(relation_qk) + weight = self.attn_drop(self.softmax(weight)) + + mask = torch.sign(reference_index + 1) + weight = torch.einsum("n s g, n s -> n s g", weight, mask) + value = einops.rearrange(value, "n ns (g i) -> n ns g i", g=self.groups) + feat = torch.einsum("n s g i, n s g -> n g i", value, weight) + feat = einops.rearrange(feat, "n g i -> n (g i)") + return feat + + +class Block(nn.Module): + def __init__( + self, + embed_channels, + groups, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.0, + enable_checkpoint=False, + ): + super(Block, self).__init__() + self.attn = GroupedVectorAttention( + embed_channels=embed_channels, + groups=groups, + qkv_bias=qkv_bias, + attn_drop_rate=attn_drop_rate, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + ) + self.fc1 = nn.Linear(embed_channels, embed_channels, bias=False) + self.fc3 = nn.Linear(embed_channels, embed_channels, bias=False) + self.norm1 = PointBatchNorm(embed_channels) + self.norm2 = PointBatchNorm(embed_channels) + self.norm3 = PointBatchNorm(embed_channels) + self.act = nn.ReLU(inplace=True) + self.enable_checkpoint = enable_checkpoint + self.drop_path = ( + DropPath(drop_path_rate) if drop_path_rate > 0.0 else nn.Identity() + ) + + def forward(self, points, reference_index): + coord, feat, offset = points + identity = feat + feat = self.act(self.norm1(self.fc1(feat))) + feat = ( + self.attn(feat, coord, reference_index) + if not self.enable_checkpoint + else checkpoint(self.attn, feat, coord, reference_index) + ) + feat = self.act(self.norm2(feat)) + feat = self.norm3(self.fc3(feat)) + feat = identity + self.drop_path(feat) + feat = self.act(feat) + return [coord, feat, offset] + + +class BlockSequence(nn.Module): + def __init__( + self, + depth, + embed_channels, + groups, + neighbours=16, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.0, + enable_checkpoint=False, + ): + super(BlockSequence, self).__init__() + + if isinstance(drop_path_rate, list): + drop_path_rates = drop_path_rate + assert len(drop_path_rates) == depth + elif isinstance(drop_path_rate, float): + drop_path_rates = [deepcopy(drop_path_rate) for _ in range(depth)] + else: + drop_path_rates = [0.0 for _ in range(depth)] + + self.neighbours = neighbours + self.blocks = nn.ModuleList() + for i in range(depth): + block = Block( + embed_channels=embed_channels, + groups=groups, + qkv_bias=qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + drop_path_rate=drop_path_rates[i], + enable_checkpoint=enable_checkpoint, + ) + self.blocks.append(block) + + def forward(self, points): + coord, feat, offset = points + # reference index query of neighbourhood attention + # for windows attention, modify reference index query method + reference_index, _ = pointops.knn_query(self.neighbours, coord, offset) + for block in self.blocks: + points = block(points, reference_index) + return points + + +class GridPool(nn.Module): + """ + Partition-based Pooling (Grid Pooling) + """ + + def __init__(self, in_channels, out_channels, grid_size, bias=False): + super(GridPool, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.grid_size = grid_size + + self.fc = nn.Linear(in_channels, out_channels, bias=bias) + self.norm = PointBatchNorm(out_channels) + self.act = nn.ReLU(inplace=True) + + def forward(self, points, start=None): + coord, feat, offset = points + batch = offset2batch(offset) + feat = self.act(self.norm(self.fc(feat))) + start = ( + segment_csr( + coord, + torch.cat([batch.new_zeros(1), torch.cumsum(batch.bincount(), dim=0)]), + reduce="min", + ) + if start is None + else start + ) + cluster = voxel_grid( + pos=coord - start[batch], size=self.grid_size, batch=batch, start=0 + ) + unique, cluster, counts = torch.unique( + cluster, sorted=True, return_inverse=True, return_counts=True + ) + _, sorted_cluster_indices = torch.sort(cluster) + idx_ptr = torch.cat([counts.new_zeros(1), torch.cumsum(counts, dim=0)]) + coord = segment_csr(coord[sorted_cluster_indices], idx_ptr, reduce="mean") + feat = segment_csr(feat[sorted_cluster_indices], idx_ptr, reduce="max") + batch = batch[idx_ptr[:-1]] + offset = batch2offset(batch) + return [coord, feat, offset], cluster + + +class UnpoolWithSkip(nn.Module): + """ + Map Unpooling with skip connection + """ + + def __init__( + self, + in_channels, + skip_channels, + out_channels, + bias=True, + skip=True, + backend="map", + ): + super(UnpoolWithSkip, self).__init__() + self.in_channels = in_channels + self.skip_channels = skip_channels + self.out_channels = out_channels + self.skip = skip + self.backend = backend + assert self.backend in ["map", "interp"] + + self.proj = nn.Sequential( + nn.Linear(in_channels, out_channels, bias=bias), + PointBatchNorm(out_channels), + nn.ReLU(inplace=True), + ) + self.proj_skip = nn.Sequential( + nn.Linear(skip_channels, out_channels, bias=bias), + PointBatchNorm(out_channels), + nn.ReLU(inplace=True), + ) + + def forward(self, points, skip_points, cluster=None): + coord, feat, offset = points + skip_coord, skip_feat, skip_offset = skip_points + if self.backend == "map" and cluster is not None: + feat = self.proj(feat)[cluster] + else: + feat = pointops.interpolation( + coord, skip_coord, self.proj(feat), offset, skip_offset + ) + if self.skip: + feat = feat + self.proj_skip(skip_feat) + return [skip_coord, feat, skip_offset] + + +class Encoder(nn.Module): + def __init__( + self, + depth, + in_channels, + embed_channels, + groups, + grid_size=None, + neighbours=16, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=None, + drop_path_rate=None, + enable_checkpoint=False, + ): + super(Encoder, self).__init__() + + self.down = GridPool( + in_channels=in_channels, + out_channels=embed_channels, + grid_size=grid_size, + ) + + self.blocks = BlockSequence( + depth=depth, + embed_channels=embed_channels, + groups=groups, + neighbours=neighbours, + qkv_bias=qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate if attn_drop_rate is not None else 0.0, + drop_path_rate=drop_path_rate if drop_path_rate is not None else 0.0, + enable_checkpoint=enable_checkpoint, + ) + + def forward(self, points): + points, cluster = self.down(points) + return self.blocks(points), cluster + + +class Decoder(nn.Module): + def __init__( + self, + in_channels, + skip_channels, + embed_channels, + groups, + depth, + neighbours=16, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=None, + drop_path_rate=None, + enable_checkpoint=False, + unpool_backend="map", + ): + super(Decoder, self).__init__() + + self.up = UnpoolWithSkip( + in_channels=in_channels, + out_channels=embed_channels, + skip_channels=skip_channels, + backend=unpool_backend, + ) + + self.blocks = BlockSequence( + depth=depth, + embed_channels=embed_channels, + groups=groups, + neighbours=neighbours, + qkv_bias=qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate if attn_drop_rate is not None else 0.0, + drop_path_rate=drop_path_rate if drop_path_rate is not None else 0.0, + enable_checkpoint=enable_checkpoint, + ) + + def forward(self, points, skip_points, cluster): + points = self.up(points, skip_points, cluster) + return self.blocks(points) + + +class GVAPatchEmbed(nn.Module): + def __init__( + self, + depth, + in_channels, + embed_channels, + groups, + neighbours=16, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.0, + enable_checkpoint=False, + ): + super(GVAPatchEmbed, self).__init__() + self.in_channels = in_channels + self.embed_channels = embed_channels + self.proj = nn.Sequential( + nn.Linear(in_channels, embed_channels, bias=False), + PointBatchNorm(embed_channels), + nn.ReLU(inplace=True), + ) + self.blocks = BlockSequence( + depth=depth, + embed_channels=embed_channels, + groups=groups, + neighbours=neighbours, + qkv_bias=qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + drop_path_rate=drop_path_rate, + enable_checkpoint=enable_checkpoint, + ) + + def forward(self, points): + coord, feat, offset = points + feat = self.proj(feat) + return self.blocks([coord, feat, offset]) + + +@MODELS.register_module("PT-v2m2") +class PointTransformerV2(nn.Module): + def __init__( + self, + in_channels, + num_classes, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.12, 0.24, 0.48), + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0, + enable_checkpoint=False, + unpool_backend="map", + ): + super(PointTransformerV2, self).__init__() + self.in_channels = in_channels + self.num_classes = num_classes + self.num_stages = len(enc_depths) + assert self.num_stages == len(dec_depths) + assert self.num_stages == len(enc_channels) + assert self.num_stages == len(dec_channels) + assert self.num_stages == len(enc_groups) + assert self.num_stages == len(dec_groups) + assert self.num_stages == len(enc_neighbours) + assert self.num_stages == len(dec_neighbours) + assert self.num_stages == len(grid_sizes) + self.patch_embed = GVAPatchEmbed( + in_channels=in_channels, + embed_channels=patch_embed_channels, + groups=patch_embed_groups, + depth=patch_embed_depth, + neighbours=patch_embed_neighbours, + qkv_bias=attn_qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + enable_checkpoint=enable_checkpoint, + ) + + enc_dp_rates = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(enc_depths)) + ] + dec_dp_rates = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(dec_depths)) + ] + enc_channels = [patch_embed_channels] + list(enc_channels) + dec_channels = list(dec_channels) + [enc_channels[-1]] + self.enc_stages = nn.ModuleList() + self.dec_stages = nn.ModuleList() + for i in range(self.num_stages): + enc = Encoder( + depth=enc_depths[i], + in_channels=enc_channels[i], + embed_channels=enc_channels[i + 1], + groups=enc_groups[i], + grid_size=grid_sizes[i], + neighbours=enc_neighbours[i], + qkv_bias=attn_qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + drop_path_rate=enc_dp_rates[ + sum(enc_depths[:i]) : sum(enc_depths[: i + 1]) + ], + enable_checkpoint=enable_checkpoint, + ) + dec = Decoder( + depth=dec_depths[i], + in_channels=dec_channels[i + 1], + skip_channels=enc_channels[i], + embed_channels=dec_channels[i], + groups=dec_groups[i], + neighbours=dec_neighbours[i], + qkv_bias=attn_qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + drop_path_rate=dec_dp_rates[ + sum(dec_depths[:i]) : sum(dec_depths[: i + 1]) + ], + enable_checkpoint=enable_checkpoint, + unpool_backend=unpool_backend, + ) + self.enc_stages.append(enc) + self.dec_stages.append(dec) + self.seg_head = ( + nn.Sequential( + nn.Linear(dec_channels[0], dec_channels[0]), + PointBatchNorm(dec_channels[0]), + nn.ReLU(inplace=True), + nn.Linear(dec_channels[0], num_classes), + ) + if num_classes > 0 + else nn.Identity() + ) + + def forward(self, data_dict): + coord = data_dict["coord"] + feat = data_dict["feat"] + offset = data_dict["offset"].int() + + # a batch of point cloud is a list of coord, feat and offset + points = [coord, feat, offset] + points = self.patch_embed(points) + skips = [[points]] + for i in range(self.num_stages): + points, cluster = self.enc_stages[i](points) + skips[-1].append(cluster) # record grid cluster of pooling + skips.append([points]) # record points info of current stage + + points = skips.pop(-1)[0] # unpooling points info in the last enc stage + for i in reversed(range(self.num_stages)): + skip_points, cluster = skips.pop(-1) + points = self.dec_stages[i](points, skip_points, cluster) + coord, feat, offset = points + seg_logits = self.seg_head(feat) + return seg_logits diff --git a/Pointcept/pointcept/models/point_transformer_v2/point_transformer_v2m3_pdnorm.py b/Pointcept/pointcept/models/point_transformer_v2/point_transformer_v2m3_pdnorm.py new file mode 100644 index 0000000000000000000000000000000000000000..b944f19f2b13a73ae01bad4c094c51e4213896c4 --- /dev/null +++ b/Pointcept/pointcept/models/point_transformer_v2/point_transformer_v2m3_pdnorm.py @@ -0,0 +1,659 @@ +""" +Point Transformer V2M3 + +Enable Prompt-Driven Normalization for Point Prompt Training + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from functools import partial +from copy import deepcopy +import math +import torch +import torch.nn as nn +from torch.utils.checkpoint import checkpoint +from torch_geometric.nn.pool import voxel_grid +from torch_scatter import segment_csr + +import einops +from timm.models.layers import DropPath +import pointops + +from pointcept.models.builder import MODELS +from pointcept.models.utils import offset2batch, batch2offset + + +class PDBatchNorm(torch.nn.Module): + def __init__( + self, + num_features, + context_channels=256, + eps=1e-3, + momentum=0.01, + conditions=("ScanNet", "S3DIS", "Structured3D"), + decouple=True, + adaptive=False, + affine=True, + ): + super().__init__() + self.conditions = conditions + self.decouple = decouple + self.adaptive = adaptive + self.affine = affine + if self.decouple: + self.bns = nn.ModuleList( + [ + nn.BatchNorm1d( + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + ) + for _ in conditions + ] + ) + else: + self.bn = nn.BatchNorm1d( + num_features=num_features, eps=eps, momentum=momentum, affine=affine + ) + if self.adaptive: + self.modulation = nn.Sequential( + nn.SiLU(), nn.Linear(context_channels, 2 * num_features, bias=True) + ) + + def forward(self, feat, condition=None, context=None): + if self.decouple: + assert condition in self.conditions + bn = self.bns[self.conditions.index(condition)] + else: + bn = self.bn + feat = bn(feat) + if self.adaptive: + assert context is not None + shift, scale = self.modulation(context).chunk(2, dim=1) + feat = feat * (1.0 + scale) + shift + return feat + + +class PointBatchNorm(nn.Module): + """ + Batch Normalization for Point Clouds data in shape of [B*N, C], [B*N, L, C] + """ + + def __init__(self, embed_channels): + super().__init__() + self.norm = nn.BatchNorm1d(embed_channels) + + def forward(self, input: torch.Tensor) -> torch.Tensor: + if input.dim() == 3: + return ( + self.norm(input.transpose(1, 2).contiguous()) + .transpose(1, 2) + .contiguous() + ) + elif input.dim() == 2: + return self.norm(input) + else: + raise NotImplementedError + + +class GroupedVectorAttention(nn.Module): + def __init__( + self, + embed_channels, + groups, + attn_drop_rate=0.0, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + ): + super(GroupedVectorAttention, self).__init__() + self.embed_channels = embed_channels + self.groups = groups + assert embed_channels % groups == 0 + self.attn_drop_rate = attn_drop_rate + self.qkv_bias = qkv_bias + self.pe_multiplier = pe_multiplier + self.pe_bias = pe_bias + + self.linear_q = nn.Sequential( + nn.Linear(embed_channels, embed_channels, bias=qkv_bias), + PointBatchNorm(embed_channels), + nn.ReLU(inplace=True), + ) + self.linear_k = nn.Sequential( + nn.Linear(embed_channels, embed_channels, bias=qkv_bias), + PointBatchNorm(embed_channels), + nn.ReLU(inplace=True), + ) + + self.linear_v = nn.Linear(embed_channels, embed_channels, bias=qkv_bias) + + if self.pe_multiplier: + self.linear_p_multiplier = nn.Sequential( + nn.Linear(3, embed_channels), + PointBatchNorm(embed_channels), + nn.ReLU(inplace=True), + nn.Linear(embed_channels, embed_channels), + ) + if self.pe_bias: + self.linear_p_bias = nn.Sequential( + nn.Linear(3, embed_channels), + PointBatchNorm(embed_channels), + nn.ReLU(inplace=True), + nn.Linear(embed_channels, embed_channels), + ) + self.weight_encoding = nn.Sequential( + nn.Linear(embed_channels, groups), + PointBatchNorm(groups), + nn.ReLU(inplace=True), + nn.Linear(groups, groups), + ) + self.softmax = nn.Softmax(dim=1) + self.attn_drop = nn.Dropout(attn_drop_rate) + + def forward(self, feat, coord, reference_index): + query, key, value = ( + self.linear_q(feat), + self.linear_k(feat), + self.linear_v(feat), + ) + key = pointops.grouping(reference_index, key, coord, with_xyz=True) + value = pointops.grouping(reference_index, value, coord, with_xyz=False) + pos, key = key[:, :, 0:3], key[:, :, 3:] + relation_qk = key - query.unsqueeze(1) + if self.pe_multiplier: + pem = self.linear_p_multiplier(pos) + relation_qk = relation_qk * pem + if self.pe_bias: + peb = self.linear_p_bias(pos) + relation_qk = relation_qk + peb + value = value + peb + + weight = self.weight_encoding(relation_qk) + weight = self.attn_drop(self.softmax(weight)) + + mask = torch.sign(reference_index + 1) + weight = torch.einsum("n s g, n s -> n s g", weight, mask) + value = einops.rearrange(value, "n ns (g i) -> n ns g i", g=self.groups) + feat = torch.einsum("n s g i, n s g -> n g i", value, weight) + feat = einops.rearrange(feat, "n g i -> n (g i)") + return feat + + +class Block(nn.Module): + def __init__( + self, + embed_channels, + groups, + norm_fn=None, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.0, + enable_checkpoint=False, + ): + super(Block, self).__init__() + self.attn = GroupedVectorAttention( + embed_channels=embed_channels, + groups=groups, + qkv_bias=qkv_bias, + attn_drop_rate=attn_drop_rate, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + ) + + assert norm_fn is not None + + self.fc1 = nn.Linear(embed_channels, embed_channels, bias=False) + self.fc3 = nn.Linear(embed_channels, embed_channels, bias=False) + self.norm1 = norm_fn(embed_channels) + self.norm2 = norm_fn(embed_channels) + self.norm3 = norm_fn(embed_channels) + self.act = nn.ReLU(inplace=True) + self.enable_checkpoint = enable_checkpoint + self.drop_path = ( + DropPath(drop_path_rate) if drop_path_rate > 0.0 else nn.Identity() + ) + + def forward(self, points, reference_index): + coord, feat, offset, condition, context = points + identity = feat + feat = self.act(self.norm1(self.fc1(feat), condition, context)) + feat = ( + self.attn(feat, coord, reference_index) + if not self.enable_checkpoint + else checkpoint(self.attn, feat, coord, reference_index) + ) + feat = self.act(self.norm2(feat, condition, context)) + feat = self.norm3(self.fc3(feat), condition, context) + feat = identity + self.drop_path(feat) + feat = self.act(feat) + return [coord, feat, offset, condition, context] + + +class BlockSequence(nn.Module): + def __init__( + self, + depth, + embed_channels, + groups, + neighbours=16, + norm_fn=None, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.0, + enable_checkpoint=False, + ): + super(BlockSequence, self).__init__() + + if isinstance(drop_path_rate, list): + drop_path_rates = drop_path_rate + assert len(drop_path_rates) == depth + elif isinstance(drop_path_rate, float): + drop_path_rates = [deepcopy(drop_path_rate) for _ in range(depth)] + else: + drop_path_rates = [0.0 for _ in range(depth)] + + self.neighbours = neighbours + self.blocks = nn.ModuleList() + for i in range(depth): + block = Block( + embed_channels=embed_channels, + groups=groups, + norm_fn=norm_fn, + qkv_bias=qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + drop_path_rate=drop_path_rates[i], + enable_checkpoint=enable_checkpoint, + ) + self.blocks.append(block) + + def forward(self, points): + coord, feat, offset, condition, context = points + # reference index query of neighbourhood attention + # for windows attention, modify reference index query method + reference_index, _ = pointops.knn_query(self.neighbours, coord, offset) + for block in self.blocks: + points = block(points, reference_index) + return points + + +class GridPool(nn.Module): + """ + Partition-based Pooling (Grid Pooling) + """ + + def __init__(self, in_channels, out_channels, grid_size, norm_fn, bias=False): + super(GridPool, self).__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.grid_size = grid_size + + self.fc = nn.Linear(in_channels, out_channels, bias=bias) + self.norm = norm_fn(out_channels) + self.act = nn.ReLU(inplace=True) + + def forward(self, points, start=None): + coord, feat, offset, condition, context = points + batch = offset2batch(offset) + feat = self.act(self.norm(self.fc(feat), condition, context)) + start = ( + segment_csr( + coord, + torch.cat([batch.new_zeros(1), torch.cumsum(batch.bincount(), dim=0)]), + reduce="min", + ) + if start is None + else start + ) + cluster = voxel_grid( + pos=coord - start[batch], size=self.grid_size, batch=batch, start=0 + ) + unique, cluster, counts = torch.unique( + cluster, sorted=True, return_inverse=True, return_counts=True + ) + _, sorted_cluster_indices = torch.sort(cluster) + idx_ptr = torch.cat([counts.new_zeros(1), torch.cumsum(counts, dim=0)]) + coord = segment_csr(coord[sorted_cluster_indices], idx_ptr, reduce="mean") + feat = segment_csr(feat[sorted_cluster_indices], idx_ptr, reduce="max") + batch = batch[idx_ptr[:-1]] + offset = batch2offset(batch) + return [coord, feat, offset, condition, context], cluster + + +class UnpoolWithSkip(nn.Module): + """ + Map Unpooling with skip connection + """ + + def __init__( + self, + in_channels, + skip_channels, + out_channels, + norm_fn, + bias=True, + skip=True, + backend="map", + ): + super(UnpoolWithSkip, self).__init__() + self.in_channels = in_channels + self.skip_channels = skip_channels + self.out_channels = out_channels + self.skip = skip + self.backend = backend + assert self.backend in ["map", "interp"] + + self.proj_linear = nn.Linear(in_channels, out_channels, bias=bias) + self.proj_norm = norm_fn(out_channels) + self.proj_act = nn.ReLU(inplace=True) + + self.proj_skip_linear = nn.Linear(skip_channels, out_channels, bias=bias) + self.proj_skip_norm = norm_fn(out_channels) + self.proj_skip_act = nn.ReLU(inplace=True) + + def forward(self, points, skip_points, cluster=None): + coord, feat, offset, condition, context = points + skip_coord, skip_feat, skip_offset, _, _ = skip_points + feat = self.proj_act(self.proj_norm(self.proj_linear(feat), condition, context)) + if self.backend == "map" and cluster is not None: + feat = feat[cluster] + else: + feat = pointops.interpolation(coord, skip_coord, feat, offset, skip_offset) + if self.skip: + feat = feat + self.proj_skip_act( + self.proj_skip_norm( + self.proj_skip_linear(skip_feat), condition, context + ) + ) + return [skip_coord, feat, skip_offset, condition, context] + + +class Encoder(nn.Module): + def __init__( + self, + depth, + in_channels, + embed_channels, + groups, + norm_fn, + grid_size=None, + neighbours=16, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=None, + drop_path_rate=None, + enable_checkpoint=False, + ): + super(Encoder, self).__init__() + + self.down = GridPool( + in_channels=in_channels, + out_channels=embed_channels, + grid_size=grid_size, + norm_fn=norm_fn, + ) + + self.blocks = BlockSequence( + depth=depth, + embed_channels=embed_channels, + groups=groups, + neighbours=neighbours, + norm_fn=norm_fn, + qkv_bias=qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate if attn_drop_rate is not None else 0.0, + drop_path_rate=drop_path_rate if drop_path_rate is not None else 0.0, + enable_checkpoint=enable_checkpoint, + ) + + def forward(self, points): + points, cluster = self.down(points) + return self.blocks(points), cluster + + +class Decoder(nn.Module): + def __init__( + self, + in_channels, + skip_channels, + embed_channels, + groups, + depth, + norm_fn, + neighbours=16, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=None, + drop_path_rate=None, + enable_checkpoint=False, + unpool_backend="map", + ): + super(Decoder, self).__init__() + + self.up = UnpoolWithSkip( + in_channels=in_channels, + out_channels=embed_channels, + skip_channels=skip_channels, + backend=unpool_backend, + norm_fn=norm_fn, + ) + + self.blocks = BlockSequence( + depth=depth, + embed_channels=embed_channels, + groups=groups, + neighbours=neighbours, + norm_fn=norm_fn, + qkv_bias=qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate if attn_drop_rate is not None else 0.0, + drop_path_rate=drop_path_rate if drop_path_rate is not None else 0.0, + enable_checkpoint=enable_checkpoint, + ) + + def forward(self, points, skip_points, cluster): + points = self.up(points, skip_points, cluster) + return self.blocks(points) + + +class GVAPatchEmbed(nn.Module): + def __init__( + self, + depth, + in_channels, + embed_channels, + groups, + norm_fn, + neighbours=16, + qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.0, + enable_checkpoint=False, + ): + super(GVAPatchEmbed, self).__init__() + self.in_channels = in_channels + self.embed_channels = embed_channels + self.proj_linear = nn.Linear(in_channels, embed_channels, bias=False) + self.proj_norm = norm_fn(embed_channels) + self.proj_act = nn.ReLU(inplace=True) + self.blocks = BlockSequence( + depth=depth, + embed_channels=embed_channels, + groups=groups, + neighbours=neighbours, + norm_fn=norm_fn, + qkv_bias=qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + drop_path_rate=drop_path_rate, + enable_checkpoint=enable_checkpoint, + ) + + def forward(self, points): + coord, feat, offset, condition, context = points + feat = self.proj_act(self.proj_norm(self.proj_linear(feat), condition, context)) + return self.blocks([coord, feat, offset, condition, context]) + + +@MODELS.register_module("PT-v2m3") +class PointTransformerV2(nn.Module): + def __init__( + self, + in_channels, + num_classes, + patch_embed_depth=1, + patch_embed_channels=48, + patch_embed_groups=6, + patch_embed_neighbours=8, + enc_depths=(2, 2, 6, 2), + enc_channels=(96, 192, 384, 512), + enc_groups=(12, 24, 48, 64), + enc_neighbours=(16, 16, 16, 16), + dec_depths=(1, 1, 1, 1), + dec_channels=(48, 96, 192, 384), + dec_groups=(6, 12, 24, 48), + dec_neighbours=(16, 16, 16, 16), + grid_sizes=(0.06, 0.12, 0.24, 0.48), + attn_qkv_bias=True, + pe_multiplier=False, + pe_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0, + enable_checkpoint=False, + unpool_backend="map", + context_channels=256, + conditions=("ScanNet", "S3DIS", "Structured3D"), + norm_decouple=True, + norm_adaptive=True, + norm_affine=False, + ): + super(PointTransformerV2, self).__init__() + self.in_channels = in_channels + self.num_classes = num_classes + self.num_stages = len(enc_depths) + assert self.num_stages == len(dec_depths) + assert self.num_stages == len(enc_channels) + assert self.num_stages == len(dec_channels) + assert self.num_stages == len(enc_groups) + assert self.num_stages == len(dec_groups) + assert self.num_stages == len(enc_neighbours) + assert self.num_stages == len(dec_neighbours) + assert self.num_stages == len(grid_sizes) + + norm_fn = partial( + PDBatchNorm, + eps=1e-3, + momentum=0.01, + conditions=conditions, + context_channels=context_channels, + decouple=norm_decouple, + adaptive=norm_adaptive, + affine=norm_affine, + ) + + self.patch_embed = GVAPatchEmbed( + in_channels=in_channels, + embed_channels=patch_embed_channels, + groups=patch_embed_groups, + depth=patch_embed_depth, + neighbours=patch_embed_neighbours, + norm_fn=norm_fn, + qkv_bias=attn_qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + enable_checkpoint=enable_checkpoint, + ) + + enc_dp_rates = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(enc_depths)) + ] + dec_dp_rates = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(dec_depths)) + ] + enc_channels = [patch_embed_channels] + list(enc_channels) + dec_channels = list(dec_channels) + [enc_channels[-1]] + self.enc_stages = nn.ModuleList() + self.dec_stages = nn.ModuleList() + for i in range(self.num_stages): + enc = Encoder( + depth=enc_depths[i], + in_channels=enc_channels[i], + embed_channels=enc_channels[i + 1], + groups=enc_groups[i], + grid_size=grid_sizes[i], + neighbours=enc_neighbours[i], + norm_fn=norm_fn, + qkv_bias=attn_qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + drop_path_rate=enc_dp_rates[ + sum(enc_depths[:i]) : sum(enc_depths[: i + 1]) + ], + enable_checkpoint=enable_checkpoint, + ) + dec = Decoder( + depth=dec_depths[i], + in_channels=dec_channels[i + 1], + skip_channels=enc_channels[i], + embed_channels=dec_channels[i], + groups=dec_groups[i], + neighbours=dec_neighbours[i], + norm_fn=norm_fn, + qkv_bias=attn_qkv_bias, + pe_multiplier=pe_multiplier, + pe_bias=pe_bias, + attn_drop_rate=attn_drop_rate, + drop_path_rate=dec_dp_rates[ + sum(dec_depths[:i]) : sum(dec_depths[: i + 1]) + ], + enable_checkpoint=enable_checkpoint, + unpool_backend=unpool_backend, + ) + self.enc_stages.append(enc) + self.dec_stages.append(dec) + self.seg_head = ( + nn.Sequential(nn.Linear(dec_channels[0], num_classes)) + if num_classes > 0 + else nn.Identity() + ) + + def forward(self, data_dict): + coord = data_dict["coord"] + feat = data_dict["feat"] + offset = data_dict["offset"].int() + condition = data_dict["condition"][0] + context = data_dict["context"] if "context" in data_dict.keys() else None + + # a batch of point cloud is a list of coord, feat and offset + points = [coord, feat, offset, condition, context] + points = self.patch_embed(points) + skips = [[points]] + for i in range(self.num_stages): + points, cluster = self.enc_stages[i](points) + skips[-1].append(cluster) # record grid cluster of pooling + skips.append([points]) # record points info of current stage + + points = skips.pop(-1)[0] # unpooling points info in the last enc stage + for i in reversed(range(self.num_stages)): + skip_points, cluster = skips.pop(-1) + points = self.dec_stages[i](points, skip_points, cluster) + coord, feat, offset, _, _ = points + seg_logits = self.seg_head(feat) + return seg_logits diff --git a/Pointcept/pointcept/models/point_transformer_v3/__init__.py b/Pointcept/pointcept/models/point_transformer_v3/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5fe25f32abaf4d60241dcf21507f85a47e46f070 --- /dev/null +++ b/Pointcept/pointcept/models/point_transformer_v3/__init__.py @@ -0,0 +1 @@ +from .point_transformer_v3m1_base import * diff --git a/Pointcept/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py b/Pointcept/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py new file mode 100644 index 0000000000000000000000000000000000000000..f9f567162dc424324ee5c30a0803fe3ea465f9b1 --- /dev/null +++ b/Pointcept/pointcept/models/point_transformer_v3/point_transformer_v3m1_base.py @@ -0,0 +1,714 @@ +""" +Point Transformer - V3 Mode1 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from functools import partial +from addict import Dict +import math +import torch +import torch.nn as nn +import spconv.pytorch as spconv +import torch_scatter +from timm.models.layers import DropPath + +try: + import flash_attn +except ImportError: + flash_attn = None + +from pointcept.models.point_prompt_training import PDNorm +from pointcept.models.builder import MODELS +from pointcept.models.utils.misc import offset2bincount +from pointcept.models.utils.structure import Point +from pointcept.models.modules import PointModule, PointSequential + + +class RPE(torch.nn.Module): + def __init__(self, patch_size, num_heads): + super().__init__() + self.patch_size = patch_size + self.num_heads = num_heads + self.pos_bnd = int((4 * patch_size) ** (1 / 3) * 2) + self.rpe_num = 2 * self.pos_bnd + 1 + self.rpe_table = torch.nn.Parameter(torch.zeros(3 * self.rpe_num, num_heads)) + torch.nn.init.trunc_normal_(self.rpe_table, std=0.02) + + def forward(self, coord): + idx = ( + coord.clamp(-self.pos_bnd, self.pos_bnd) # clamp into bnd + + self.pos_bnd # relative position to positive index + + torch.arange(3, device=coord.device) * self.rpe_num # x, y, z stride + ) + out = self.rpe_table.index_select(0, idx.reshape(-1)) + out = out.view(idx.shape + (-1,)).sum(3) + out = out.permute(0, 3, 1, 2) # (N, K, K, H) -> (N, H, K, K) + return out + + +class SerializedAttention(PointModule): + def __init__( + self, + channels, + num_heads, + patch_size, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + order_index=0, + enable_rpe=False, + enable_flash=True, + upcast_attention=True, + upcast_softmax=True, + ): + super().__init__() + assert channels % num_heads == 0 + self.channels = channels + self.num_heads = num_heads + self.scale = qk_scale or (channels // num_heads) ** -0.5 + self.order_index = order_index + self.upcast_attention = upcast_attention + self.upcast_softmax = upcast_softmax + self.enable_rpe = enable_rpe + self.enable_flash = enable_flash + if enable_flash: + assert ( + enable_rpe is False + ), "Set enable_rpe to False when enable Flash Attention" + assert ( + upcast_attention is False + ), "Set upcast_attention to False when enable Flash Attention" + assert ( + upcast_softmax is False + ), "Set upcast_softmax to False when enable Flash Attention" + assert flash_attn is not None, "Make sure flash_attn is installed." + self.patch_size = patch_size + self.attn_drop = attn_drop + else: + # when disable flash attention, we still don't want to use mask + # consequently, patch size will auto set to the + # min number of patch_size_max and number of points + self.patch_size_max = patch_size + self.patch_size = 0 + self.attn_drop = torch.nn.Dropout(attn_drop) + + self.qkv = torch.nn.Linear(channels, channels * 3, bias=qkv_bias) + self.proj = torch.nn.Linear(channels, channels) + self.proj_drop = torch.nn.Dropout(proj_drop) + self.softmax = torch.nn.Softmax(dim=-1) + self.rpe = RPE(patch_size, num_heads) if self.enable_rpe else None + + @torch.no_grad() + def get_rel_pos(self, point, order): + K = self.patch_size + rel_pos_key = f"rel_pos_{self.order_index}" + if rel_pos_key not in point.keys(): + grid_coord = point.grid_coord[order] + grid_coord = grid_coord.reshape(-1, K, 3) + point[rel_pos_key] = grid_coord.unsqueeze(2) - grid_coord.unsqueeze(1) + return point[rel_pos_key] + + @torch.no_grad() + def get_padding_and_inverse(self, point): + pad_key = "pad" + unpad_key = "unpad" + cu_seqlens_key = "cu_seqlens_key" + if ( + pad_key not in point.keys() + or unpad_key not in point.keys() + or cu_seqlens_key not in point.keys() + ): + offset = point.offset + bincount = offset2bincount(offset) + bincount_pad = ( + torch.div( + bincount + self.patch_size - 1, + self.patch_size, + rounding_mode="trunc", + ) + * self.patch_size + ) + # only pad point when num of points larger than patch_size + mask_pad = bincount > self.patch_size + bincount_pad = ~mask_pad * bincount + mask_pad * bincount_pad + _offset = nn.functional.pad(offset, (1, 0)) + _offset_pad = nn.functional.pad(torch.cumsum(bincount_pad, dim=0), (1, 0)) + pad = torch.arange(_offset_pad[-1], device=offset.device) + unpad = torch.arange(_offset[-1], device=offset.device) + cu_seqlens = [] + for i in range(len(offset)): + unpad[_offset[i] : _offset[i + 1]] += _offset_pad[i] - _offset[i] + if bincount[i] != bincount_pad[i]: + pad[ + _offset_pad[i + 1] + - self.patch_size + + (bincount[i] % self.patch_size) : _offset_pad[i + 1] + ] = pad[ + _offset_pad[i + 1] + - 2 * self.patch_size + + (bincount[i] % self.patch_size) : _offset_pad[i + 1] + - self.patch_size + ] + pad[_offset_pad[i] : _offset_pad[i + 1]] -= _offset_pad[i] - _offset[i] + cu_seqlens.append( + torch.arange( + _offset_pad[i], + _offset_pad[i + 1], + step=self.patch_size, + dtype=torch.int32, + device=offset.device, + ) + ) + point[pad_key] = pad + point[unpad_key] = unpad + point[cu_seqlens_key] = nn.functional.pad( + torch.concat(cu_seqlens), (0, 1), value=_offset_pad[-1] + ) + return point[pad_key], point[unpad_key], point[cu_seqlens_key] + + def forward(self, point): + if not self.enable_flash: + self.patch_size = min( + offset2bincount(point.offset).min().tolist(), self.patch_size_max + ) + + H = self.num_heads + K = self.patch_size + C = self.channels + + pad, unpad, cu_seqlens = self.get_padding_and_inverse(point) + + order = point.serialized_order[self.order_index][pad] + inverse = unpad[point.serialized_inverse[self.order_index]] + + # padding and reshape feat and batch for serialized point patch + qkv = self.qkv(point.feat)[order] + + if not self.enable_flash: + # encode and reshape qkv: (N', K, 3, H, C') => (3, N', H, K, C') + q, k, v = ( + qkv.reshape(-1, K, 3, H, C // H).permute(2, 0, 3, 1, 4).unbind(dim=0) + ) + # attn + if self.upcast_attention: + q = q.float() + k = k.float() + attn = (q * self.scale) @ k.transpose(-2, -1) # (N', H, K, K) + if self.enable_rpe: + attn = attn + self.rpe(self.get_rel_pos(point, order)) + if self.upcast_softmax: + attn = attn.float() + attn = self.softmax(attn) + attn = self.attn_drop(attn).to(qkv.dtype) + feat = (attn @ v).transpose(1, 2).reshape(-1, C) + else: + feat = flash_attn.flash_attn_varlen_qkvpacked_func( + qkv.half().reshape(-1, 3, H, C // H), + cu_seqlens, + max_seqlen=self.patch_size, + dropout_p=self.attn_drop if self.training else 0, + softmax_scale=self.scale, + ).reshape(-1, C) + feat = feat.to(qkv.dtype) + feat = feat[inverse] + + # ffn + feat = self.proj(feat) + feat = self.proj_drop(feat) + point.feat = feat + return point + + +class MLP(nn.Module): + def __init__( + self, + in_channels, + hidden_channels=None, + out_channels=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_channels = out_channels or in_channels + hidden_channels = hidden_channels or in_channels + self.fc1 = nn.Linear(in_channels, hidden_channels) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_channels, out_channels) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Block(PointModule): + def __init__( + self, + channels, + num_heads, + patch_size=48, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + norm_layer=nn.LayerNorm, + act_layer=nn.GELU, + pre_norm=True, + order_index=0, + cpe_indice_key=None, + enable_rpe=False, + enable_flash=True, + upcast_attention=True, + upcast_softmax=True, + ): + super().__init__() + self.channels = channels + self.pre_norm = pre_norm + + self.cpe = PointSequential( + spconv.SubMConv3d( + channels, + channels, + kernel_size=3, + bias=True, + indice_key=cpe_indice_key, + ), + nn.Linear(channels, channels), + norm_layer(channels), + ) + + self.norm1 = PointSequential(norm_layer(channels)) + self.attn = SerializedAttention( + channels=channels, + patch_size=patch_size, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + order_index=order_index, + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ) + self.norm2 = PointSequential(norm_layer(channels)) + self.mlp = PointSequential( + MLP( + in_channels=channels, + hidden_channels=int(channels * mlp_ratio), + out_channels=channels, + act_layer=act_layer, + drop=proj_drop, + ) + ) + self.drop_path = PointSequential( + DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + ) + + def forward(self, point: Point): + shortcut = point.feat + point = self.cpe(point) + point.feat = shortcut + point.feat + shortcut = point.feat + if self.pre_norm: + point = self.norm1(point) + point = self.drop_path(self.attn(point)) + point.feat = shortcut + point.feat + if not self.pre_norm: + point = self.norm1(point) + + shortcut = point.feat + if self.pre_norm: + point = self.norm2(point) + point = self.drop_path(self.mlp(point)) + point.feat = shortcut + point.feat + if not self.pre_norm: + point = self.norm2(point) + point.sparse_conv_feat = point.sparse_conv_feat.replace_feature(point.feat) + return point + + +class SerializedPooling(PointModule): + def __init__( + self, + in_channels, + out_channels, + stride=2, + norm_layer=None, + act_layer=None, + reduce="max", + shuffle_orders=True, + traceable=True, # record parent and cluster + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + + assert stride == 2 ** (math.ceil(stride) - 1).bit_length() # 2, 4, 8 + # TODO: add support to grid pool (any stride) + self.stride = stride + assert reduce in ["sum", "mean", "min", "max"] + self.reduce = reduce + self.shuffle_orders = shuffle_orders + self.traceable = traceable + + self.proj = nn.Linear(in_channels, out_channels) + if norm_layer is not None: + self.norm = PointSequential(norm_layer(out_channels)) + if act_layer is not None: + self.act = PointSequential(act_layer()) + + def forward(self, point: Point): + pooling_depth = (math.ceil(self.stride) - 1).bit_length() + if pooling_depth > point.serialized_depth: + pooling_depth = 0 + assert { + "serialized_code", + "serialized_order", + "serialized_inverse", + "serialized_depth", + }.issubset( + point.keys() + ), "Run point.serialization() point cloud before SerializedPooling" + + code = point.serialized_code >> pooling_depth * 3 + code_, cluster, counts = torch.unique( + code[0], + sorted=True, + return_inverse=True, + return_counts=True, + ) + # indices of point sorted by cluster, for torch_scatter.segment_csr + _, indices = torch.sort(cluster) + # index pointer for sorted point, for torch_scatter.segment_csr + idx_ptr = torch.cat([counts.new_zeros(1), torch.cumsum(counts, dim=0)]) + # head_indices of each cluster, for reduce attr e.g. code, batch + head_indices = indices[idx_ptr[:-1]] + # generate down code, order, inverse + code = code[:, head_indices] + order = torch.argsort(code) + inverse = torch.zeros_like(order).scatter_( + dim=1, + index=order, + src=torch.arange(0, code.shape[1], device=order.device).repeat( + code.shape[0], 1 + ), + ) + + if self.shuffle_orders: + perm = torch.randperm(code.shape[0]) + code = code[perm] + order = order[perm] + inverse = inverse[perm] + + # collect information + point_dict = Dict( + feat=torch_scatter.segment_csr( + self.proj(point.feat)[indices], idx_ptr, reduce=self.reduce + ), + coord=torch_scatter.segment_csr( + point.coord[indices], idx_ptr, reduce="mean" + ), + grid_coord=point.grid_coord[head_indices] >> pooling_depth, + serialized_code=code, + serialized_order=order, + serialized_inverse=inverse, + serialized_depth=point.serialized_depth - pooling_depth, + batch=point.batch[head_indices], + ) + + if "condition" in point.keys(): + point_dict["condition"] = point.condition + if "context" in point.keys(): + point_dict["context"] = point.context + + if self.traceable: + point_dict["pooling_inverse"] = cluster + point_dict["pooling_parent"] = point + point = Point(point_dict) + if self.norm is not None: + point = self.norm(point) + if self.act is not None: + point = self.act(point) + point.sparsify() + return point + + +class SerializedUnpooling(PointModule): + def __init__( + self, + in_channels, + skip_channels, + out_channels, + norm_layer=None, + act_layer=None, + traceable=False, # record parent and cluster + ): + super().__init__() + self.proj = PointSequential(nn.Linear(in_channels, out_channels)) + self.proj_skip = PointSequential(nn.Linear(skip_channels, out_channels)) + + if norm_layer is not None: + self.proj.add(norm_layer(out_channels)) + self.proj_skip.add(norm_layer(out_channels)) + + if act_layer is not None: + self.proj.add(act_layer()) + self.proj_skip.add(act_layer()) + + self.traceable = traceable + + def forward(self, point): + assert "pooling_parent" in point.keys() + assert "pooling_inverse" in point.keys() + parent = point.pop("pooling_parent") + inverse = point.pop("pooling_inverse") + point = self.proj(point) + parent = self.proj_skip(parent) + parent.feat = parent.feat + point.feat[inverse] + + if self.traceable: + parent["unpooling_parent"] = point + return parent + + +class Embedding(PointModule): + def __init__( + self, + in_channels, + embed_channels, + norm_layer=None, + act_layer=None, + ): + super().__init__() + self.in_channels = in_channels + self.embed_channels = embed_channels + + # TODO: check remove spconv + self.stem = PointSequential( + conv=spconv.SubMConv3d( + in_channels, + embed_channels, + kernel_size=5, + padding=1, + bias=False, + indice_key="stem", + ) + ) + if norm_layer is not None: + self.stem.add(norm_layer(embed_channels), name="norm") + if act_layer is not None: + self.stem.add(act_layer(), name="act") + + def forward(self, point: Point): + point = self.stem(point) + return point + + +@MODELS.register_module("PT-v3m1") +class PointTransformerV3(PointModule): + def __init__( + self, + in_channels=6, + order=("z", "z-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(48, 48, 48, 48, 48), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(48, 48, 48, 48), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + pre_norm=True, + shuffle_orders=True, + enable_rpe=False, + enable_flash=True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ): + super().__init__() + self.num_stages = len(enc_depths) + self.order = [order] if isinstance(order, str) else order + self.cls_mode = cls_mode + self.shuffle_orders = shuffle_orders + + assert self.num_stages == len(stride) + 1 + assert self.num_stages == len(enc_depths) + assert self.num_stages == len(enc_channels) + assert self.num_stages == len(enc_num_head) + assert self.num_stages == len(enc_patch_size) + assert self.cls_mode or self.num_stages == len(dec_depths) + 1 + assert self.cls_mode or self.num_stages == len(dec_channels) + 1 + assert self.cls_mode or self.num_stages == len(dec_num_head) + 1 + assert self.cls_mode or self.num_stages == len(dec_patch_size) + 1 + + # norm layers + if pdnorm_bn: + bn_layer = partial( + PDNorm, + norm_layer=partial( + nn.BatchNorm1d, eps=1e-3, momentum=0.01, affine=pdnorm_affine + ), + conditions=pdnorm_conditions, + decouple=pdnorm_decouple, + adaptive=pdnorm_adaptive, + ) + else: + bn_layer = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01) + if pdnorm_ln: + ln_layer = partial( + PDNorm, + norm_layer=partial(nn.LayerNorm, elementwise_affine=pdnorm_affine), + conditions=pdnorm_conditions, + decouple=pdnorm_decouple, + adaptive=pdnorm_adaptive, + ) + else: + ln_layer = nn.LayerNorm + # activation layers + act_layer = nn.GELU + + self.embedding = Embedding( + in_channels=in_channels, + embed_channels=enc_channels[0], + norm_layer=bn_layer, + act_layer=act_layer, + ) + + # encoder + enc_drop_path = [ + x.item() for x in torch.linspace(0, drop_path, sum(enc_depths)) + ] + self.enc = PointSequential() + for s in range(self.num_stages): + enc_drop_path_ = enc_drop_path[ + sum(enc_depths[:s]) : sum(enc_depths[: s + 1]) + ] + enc = PointSequential() + if s > 0: + enc.add( + SerializedPooling( + in_channels=enc_channels[s - 1], + out_channels=enc_channels[s], + stride=stride[s - 1], + norm_layer=bn_layer, + act_layer=act_layer, + ), + name="down", + ) + for i in range(enc_depths[s]): + enc.add( + Block( + channels=enc_channels[s], + num_heads=enc_num_head[s], + patch_size=enc_patch_size[s], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + drop_path=enc_drop_path_[i], + norm_layer=ln_layer, + act_layer=act_layer, + pre_norm=pre_norm, + order_index=i % len(self.order), + cpe_indice_key=f"stage{s}", + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ), + name=f"block{i}", + ) + if len(enc) != 0: + self.enc.add(module=enc, name=f"enc{s}") + + # decoder + if not self.cls_mode: + dec_drop_path = [ + x.item() for x in torch.linspace(0, drop_path, sum(dec_depths)) + ] + self.dec = PointSequential() + dec_channels = list(dec_channels) + [enc_channels[-1]] + for s in reversed(range(self.num_stages - 1)): + dec_drop_path_ = dec_drop_path[ + sum(dec_depths[:s]) : sum(dec_depths[: s + 1]) + ] + dec_drop_path_.reverse() + dec = PointSequential() + dec.add( + SerializedUnpooling( + in_channels=dec_channels[s + 1], + skip_channels=enc_channels[s], + out_channels=dec_channels[s], + norm_layer=bn_layer, + act_layer=act_layer, + ), + name="up", + ) + for i in range(dec_depths[s]): + dec.add( + Block( + channels=dec_channels[s], + num_heads=dec_num_head[s], + patch_size=dec_patch_size[s], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + drop_path=dec_drop_path_[i], + norm_layer=ln_layer, + act_layer=act_layer, + pre_norm=pre_norm, + order_index=i % len(self.order), + cpe_indice_key=f"stage{s}", + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ), + name=f"block{i}", + ) + self.dec.add(module=dec, name=f"dec{s}") + + def forward(self, data_dict): + point = Point(data_dict) + point.serialization(order=self.order, shuffle_orders=self.shuffle_orders) + point.sparsify() + + point = self.embedding(point) + point = self.enc(point) + if not self.cls_mode: + point = self.dec(point) + # else: + # point.feat = torch_scatter.segment_csr( + # src=point.feat, + # indptr=nn.functional.pad(point.offset, (1, 0)), + # reduce="mean", + # ) + return point diff --git a/Pointcept/pointcept/models/sparse_unet/__init__.py b/Pointcept/pointcept/models/sparse_unet/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..aafc3859f43bd47da8cd966a57d4307cc771525f --- /dev/null +++ b/Pointcept/pointcept/models/sparse_unet/__init__.py @@ -0,0 +1,4 @@ +from .mink_unet import * +from .spconv_unet_v1m1_base import * +from .spconv_unet_v1m2_bn_momentum import * +from .spconv_unet_v1m3_pdnorm import * diff --git a/Pointcept/pointcept/models/sparse_unet/mink_unet.py b/Pointcept/pointcept/models/sparse_unet/mink_unet.py new file mode 100644 index 0000000000000000000000000000000000000000..1ff8a01d0500abc207b82a8252a8131b5e671469 --- /dev/null +++ b/Pointcept/pointcept/models/sparse_unet/mink_unet.py @@ -0,0 +1,442 @@ +""" +SparseUNet Driven by MinkowskiEngine + +Modified from chrischoy/SpatioTemporalSegmentation + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch +import torch.nn as nn + +try: + import MinkowskiEngine as ME +except ImportError: + ME = None + +from pointcept.models.builder import MODELS + + +def offset2batch(offset): + return ( + torch.cat( + [ + ( + torch.tensor([i] * (o - offset[i - 1])) + if i > 0 + else torch.tensor([i] * o) + ) + for i, o in enumerate(offset) + ], + dim=0, + ) + .long() + .to(offset.device) + ) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__( + self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + bn_momentum=0.1, + dimension=-1, + ): + super(BasicBlock, self).__init__() + assert dimension > 0 + + self.conv1 = ME.MinkowskiConvolution( + inplanes, + planes, + kernel_size=3, + stride=stride, + dilation=dilation, + dimension=dimension, + ) + self.norm1 = ME.MinkowskiBatchNorm(planes, momentum=bn_momentum) + self.conv2 = ME.MinkowskiConvolution( + planes, + planes, + kernel_size=3, + stride=1, + dilation=dilation, + dimension=dimension, + ) + self.norm2 = ME.MinkowskiBatchNorm(planes, momentum=bn_momentum) + self.relu = ME.MinkowskiReLU(inplace=True) + self.downsample = downsample + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.norm2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__( + self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + bn_momentum=0.1, + dimension=-1, + ): + super(Bottleneck, self).__init__() + assert dimension > 0 + + self.conv1 = ME.MinkowskiConvolution( + inplanes, planes, kernel_size=1, dimension=dimension + ) + self.norm1 = ME.MinkowskiBatchNorm(planes, momentum=bn_momentum) + + self.conv2 = ME.MinkowskiConvolution( + planes, + planes, + kernel_size=3, + stride=stride, + dilation=dilation, + dimension=dimension, + ) + self.norm2 = ME.MinkowskiBatchNorm(planes, momentum=bn_momentum) + + self.conv3 = ME.MinkowskiConvolution( + planes, planes * self.expansion, kernel_size=1, dimension=dimension + ) + self.norm3 = ME.MinkowskiBatchNorm( + planes * self.expansion, momentum=bn_momentum + ) + + self.relu = ME.MinkowskiReLU(inplace=True) + self.downsample = downsample + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.norm2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.norm3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class MinkUNetBase(nn.Module): + BLOCK = None + PLANES = None + DILATIONS = (1, 1, 1, 1, 1, 1, 1, 1) + LAYERS = (2, 2, 2, 2, 2, 2, 2, 2) + PLANES = (32, 64, 128, 256, 256, 128, 96, 96) + INIT_DIM = 32 + OUT_TENSOR_STRIDE = 1 + + def __init__(self, in_channels, out_channels, dimension=3): + super().__init__() + assert ME is not None, "Please follow `README.md` to install MinkowskiEngine.`" + self.D = dimension + assert self.BLOCK is not None + # Output of the first conv concated to conv6 + self.inplanes = self.INIT_DIM + self.conv0p1s1 = ME.MinkowskiConvolution( + in_channels, self.inplanes, kernel_size=5, dimension=self.D + ) + + self.bn0 = ME.MinkowskiBatchNorm(self.inplanes) + + self.conv1p1s2 = ME.MinkowskiConvolution( + self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=self.D + ) + self.bn1 = ME.MinkowskiBatchNorm(self.inplanes) + + self.block1 = self._make_layer(self.BLOCK, self.PLANES[0], self.LAYERS[0]) + + self.conv2p2s2 = ME.MinkowskiConvolution( + self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=self.D + ) + self.bn2 = ME.MinkowskiBatchNorm(self.inplanes) + + self.block2 = self._make_layer(self.BLOCK, self.PLANES[1], self.LAYERS[1]) + + self.conv3p4s2 = ME.MinkowskiConvolution( + self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=self.D + ) + + self.bn3 = ME.MinkowskiBatchNorm(self.inplanes) + self.block3 = self._make_layer(self.BLOCK, self.PLANES[2], self.LAYERS[2]) + + self.conv4p8s2 = ME.MinkowskiConvolution( + self.inplanes, self.inplanes, kernel_size=2, stride=2, dimension=self.D + ) + self.bn4 = ME.MinkowskiBatchNorm(self.inplanes) + self.block4 = self._make_layer(self.BLOCK, self.PLANES[3], self.LAYERS[3]) + + self.convtr4p16s2 = ME.MinkowskiConvolutionTranspose( + self.inplanes, self.PLANES[4], kernel_size=2, stride=2, dimension=self.D + ) + self.bntr4 = ME.MinkowskiBatchNorm(self.PLANES[4]) + + self.inplanes = self.PLANES[4] + self.PLANES[2] * self.BLOCK.expansion + self.block5 = self._make_layer(self.BLOCK, self.PLANES[4], self.LAYERS[4]) + self.convtr5p8s2 = ME.MinkowskiConvolutionTranspose( + self.inplanes, self.PLANES[5], kernel_size=2, stride=2, dimension=self.D + ) + self.bntr5 = ME.MinkowskiBatchNorm(self.PLANES[5]) + + self.inplanes = self.PLANES[5] + self.PLANES[1] * self.BLOCK.expansion + self.block6 = self._make_layer(self.BLOCK, self.PLANES[5], self.LAYERS[5]) + self.convtr6p4s2 = ME.MinkowskiConvolutionTranspose( + self.inplanes, self.PLANES[6], kernel_size=2, stride=2, dimension=self.D + ) + self.bntr6 = ME.MinkowskiBatchNorm(self.PLANES[6]) + + self.inplanes = self.PLANES[6] + self.PLANES[0] * self.BLOCK.expansion + self.block7 = self._make_layer(self.BLOCK, self.PLANES[6], self.LAYERS[6]) + self.convtr7p2s2 = ME.MinkowskiConvolutionTranspose( + self.inplanes, self.PLANES[7], kernel_size=2, stride=2, dimension=self.D + ) + self.bntr7 = ME.MinkowskiBatchNorm(self.PLANES[7]) + + self.inplanes = self.PLANES[7] + self.INIT_DIM + self.block8 = self._make_layer(self.BLOCK, self.PLANES[7], self.LAYERS[7]) + + self.final = ME.MinkowskiConvolution( + self.PLANES[7] * self.BLOCK.expansion, + out_channels, + kernel_size=1, + bias=True, + dimension=self.D, + ) + self.relu = ME.MinkowskiReLU(inplace=True) + + self.weight_initialization() + + def weight_initialization(self): + for m in self.modules(): + if isinstance(m, ME.MinkowskiConvolution): + ME.utils.kaiming_normal_(m.kernel, mode="fan_out", nonlinearity="relu") + + if isinstance(m, ME.MinkowskiBatchNorm): + nn.init.constant_(m.bn.weight, 1) + nn.init.constant_(m.bn.bias, 0) + + def _make_layer(self, block, planes, blocks, stride=1, dilation=1, bn_momentum=0.1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + ME.MinkowskiConvolution( + self.inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + dimension=self.D, + ), + ME.MinkowskiBatchNorm(planes * block.expansion), + ) + layers = [] + layers.append( + block( + self.inplanes, + planes, + stride=stride, + dilation=dilation, + downsample=downsample, + dimension=self.D, + ) + ) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append( + block( + self.inplanes, planes, stride=1, dilation=dilation, dimension=self.D + ) + ) + + return nn.Sequential(*layers) + + def forward(self, data_dict): + grid_coord = data_dict["grid_coord"] + feat = data_dict["feat"] + offset = data_dict["offset"] + batch = offset2batch(offset) + in_field = ME.TensorField( + feat, + coordinates=torch.cat([batch.unsqueeze(-1).int(), grid_coord.int()], dim=1), + quantization_mode=ME.SparseTensorQuantizationMode.UNWEIGHTED_AVERAGE, + minkowski_algorithm=ME.MinkowskiAlgorithm.SPEED_OPTIMIZED, + device=feat.device, + ) + x = in_field.sparse() + + out = self.conv0p1s1(x) + out = self.bn0(out) + out_p1 = self.relu(out) + + out = self.conv1p1s2(out_p1) + out = self.bn1(out) + out = self.relu(out) + out_b1p2 = self.block1(out) + + out = self.conv2p2s2(out_b1p2) + out = self.bn2(out) + out = self.relu(out) + out_b2p4 = self.block2(out) + + out = self.conv3p4s2(out_b2p4) + out = self.bn3(out) + out = self.relu(out) + out_b3p8 = self.block3(out) + + # tensor_stride=16 + out = self.conv4p8s2(out_b3p8) + out = self.bn4(out) + out = self.relu(out) + out = self.block4(out) + + # tensor_stride=8 + out = self.convtr4p16s2(out) + out = self.bntr4(out) + out = self.relu(out) + + out = ME.cat(out, out_b3p8) + out = self.block5(out) + + # tensor_stride=4 + out = self.convtr5p8s2(out) + out = self.bntr5(out) + out = self.relu(out) + + out = ME.cat(out, out_b2p4) + out = self.block6(out) + + # tensor_stride=2 + out = self.convtr6p4s2(out) + out = self.bntr6(out) + out = self.relu(out) + + out = ME.cat(out, out_b1p2) + out = self.block7(out) + + # tensor_stride=1 + out = self.convtr7p2s2(out) + out = self.bntr7(out) + out = self.relu(out) + + out = ME.cat(out, out_p1) + out = self.block8(out) + + return self.final(out).slice(in_field).F + + +@MODELS.register_module() +class MinkUNet14(MinkUNetBase): + BLOCK = BasicBlock + LAYERS = (1, 1, 1, 1, 1, 1, 1, 1) + + +@MODELS.register_module() +class MinkUNet18(MinkUNetBase): + BLOCK = BasicBlock + LAYERS = (2, 2, 2, 2, 2, 2, 2, 2) + + +@MODELS.register_module() +class MinkUNet34(MinkUNetBase): + BLOCK = BasicBlock + LAYERS = (2, 3, 4, 6, 2, 2, 2, 2) + + +@MODELS.register_module() +class MinkUNet50(MinkUNetBase): + BLOCK = Bottleneck + LAYERS = (2, 3, 4, 6, 2, 2, 2, 2) + + +@MODELS.register_module() +class MinkUNet101(MinkUNetBase): + BLOCK = Bottleneck + LAYERS = (2, 3, 4, 23, 2, 2, 2, 2) + + +@MODELS.register_module() +class MinkUNet14A(MinkUNet14): + PLANES = (32, 64, 128, 256, 128, 128, 96, 96) + + +@MODELS.register_module() +class MinkUNet14B(MinkUNet14): + PLANES = (32, 64, 128, 256, 128, 128, 128, 128) + + +@MODELS.register_module() +class MinkUNet14C(MinkUNet14): + PLANES = (32, 64, 128, 256, 192, 192, 128, 128) + + +@MODELS.register_module() +class MinkUNet14D(MinkUNet14): + PLANES = (32, 64, 128, 256, 384, 384, 384, 384) + + +@MODELS.register_module() +class MinkUNet18A(MinkUNet18): + PLANES = (32, 64, 128, 256, 128, 128, 96, 96) + + +@MODELS.register_module() +class MinkUNet18B(MinkUNet18): + PLANES = (32, 64, 128, 256, 128, 128, 128, 128) + + +@MODELS.register_module() +class MinkUNet18D(MinkUNet18): + PLANES = (32, 64, 128, 256, 384, 384, 384, 384) + + +@MODELS.register_module() +class MinkUNet34A(MinkUNet34): + PLANES = (32, 64, 128, 256, 256, 128, 96, 96) + + +@MODELS.register_module() +class MinkUNet34B(MinkUNet34): + PLANES = (32, 64, 128, 256, 256, 128, 64, 32) + + +@MODELS.register_module() +class MinkUNet34C(MinkUNet34): + PLANES = (32, 64, 128, 256, 256, 128, 96, 96) diff --git a/Pointcept/pointcept/models/sparse_unet/spconv_unet_v1m1_base.py b/Pointcept/pointcept/models/sparse_unet/spconv_unet_v1m1_base.py new file mode 100644 index 0000000000000000000000000000000000000000..dfcacb00b8dfb8a38aa9ab6968b0c9c63a63301c --- /dev/null +++ b/Pointcept/pointcept/models/sparse_unet/spconv_unet_v1m1_base.py @@ -0,0 +1,463 @@ +""" +SparseUNet Driven by SpConv (recommend) + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from functools import partial +from collections import OrderedDict + +import torch +import torch.nn as nn + +import spconv.pytorch as spconv +from torch_geometric.utils import scatter + +from timm.models.layers import trunc_normal_ + +from pointcept.models.builder import MODELS +from pointcept.models.utils import offset2batch + + +class BasicBlock(spconv.SparseModule): + expansion = 1 + + def __init__( + self, + in_channels, + embed_channels, + stride=1, + norm_fn=None, + indice_key=None, + bias=False, + ): + super().__init__() + + assert norm_fn is not None + + if in_channels == embed_channels: + self.proj = spconv.SparseSequential(nn.Identity()) + else: + self.proj = spconv.SparseSequential( + spconv.SubMConv3d( + in_channels, embed_channels, kernel_size=1, bias=False + ), + norm_fn(embed_channels), + ) + + self.conv1 = spconv.SubMConv3d( + in_channels, + embed_channels, + kernel_size=3, + stride=stride, + padding=1, + bias=bias, + indice_key=indice_key, + ) + self.bn1 = norm_fn(embed_channels) + self.relu = nn.ReLU() + self.conv2 = spconv.SubMConv3d( + embed_channels, + embed_channels, + kernel_size=3, + stride=stride, + padding=1, + bias=bias, + indice_key=indice_key, + ) + self.bn2 = norm_fn(embed_channels) + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = out.replace_feature(self.bn1(out.features)) + out = out.replace_feature(self.relu(out.features)) + + out = self.conv2(out) + out = out.replace_feature(self.bn2(out.features)) + + out = out.replace_feature(out.features + self.proj(residual).features) + out = out.replace_feature(self.relu(out.features)) + + return out + + +@MODELS.register_module("SpUNet-v1m1") +class SpUNetBase(nn.Module): + def __init__( + self, + in_channels, + num_classes, + base_channels=32, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + ): + super().__init__() + assert len(layers) % 2 == 0 + assert len(layers) == len(channels) + self.in_channels = in_channels + self.num_classes = num_classes + self.base_channels = base_channels + self.channels = channels + self.layers = layers + self.num_stages = len(layers) // 2 + self.cls_mode = cls_mode + + norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01) + block = BasicBlock + + self.conv_input = spconv.SparseSequential( + spconv.SubMConv3d( + in_channels, + base_channels, + kernel_size=5, + padding=1, + bias=False, + indice_key="stem", + ), + norm_fn(base_channels), + nn.ReLU(), + ) + + enc_channels = base_channels + dec_channels = channels[-1] + self.down = nn.ModuleList() + self.up = nn.ModuleList() + self.enc = nn.ModuleList() + self.dec = nn.ModuleList() if not self.cls_mode else None + + for s in range(self.num_stages): + # encode num_stages + self.down.append( + spconv.SparseSequential( + spconv.SparseConv3d( + enc_channels, + channels[s], + kernel_size=2, + stride=2, + bias=False, + indice_key=f"spconv{s + 1}", + ), + norm_fn(channels[s]), + nn.ReLU(), + ) + ) + self.enc.append( + spconv.SparseSequential( + OrderedDict( + [ + # (f"block{i}", block(enc_channels, channels[s], norm_fn=norm_fn, indice_key=f"subm{s + 1}")) + # if i == 0 else + ( + f"block{i}", + block( + channels[s], + channels[s], + norm_fn=norm_fn, + indice_key=f"subm{s + 1}", + ), + ) + for i in range(layers[s]) + ] + ) + ) + ) + if not self.cls_mode: + # decode num_stages + self.up.append( + spconv.SparseSequential( + spconv.SparseInverseConv3d( + channels[len(channels) - s - 2], + dec_channels, + kernel_size=2, + bias=False, + indice_key=f"spconv{s + 1}", + ), + norm_fn(dec_channels), + nn.ReLU(), + ) + ) + self.dec.append( + spconv.SparseSequential( + OrderedDict( + [ + ( + ( + f"block{i}", + block( + dec_channels + enc_channels, + dec_channels, + norm_fn=norm_fn, + indice_key=f"subm{s}", + ), + ) + if i == 0 + else ( + f"block{i}", + block( + dec_channels, + dec_channels, + norm_fn=norm_fn, + indice_key=f"subm{s}", + ), + ) + ) + for i in range(layers[len(channels) - s - 1]) + ] + ) + ) + ) + + enc_channels = channels[s] + dec_channels = channels[len(channels) - s - 2] + + final_in_channels = ( + channels[-1] if not self.cls_mode else channels[self.num_stages - 1] + ) + self.final = ( + spconv.SubMConv3d( + final_in_channels, num_classes, kernel_size=1, padding=1, bias=True + ) + if num_classes > 0 + else spconv.Identity() + ) + self.apply(self._init_weights) + + @staticmethod + def _init_weights(m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, spconv.SubMConv3d): + trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def forward(self, input_dict): + grid_coord = input_dict["grid_coord"] + feat = input_dict["feat"] + offset = input_dict["offset"] + + batch = offset2batch(offset) + sparse_shape = torch.add(torch.max(grid_coord, dim=0).values, 96).tolist() + x = spconv.SparseConvTensor( + features=feat, + indices=torch.cat( + [batch.unsqueeze(-1).int(), grid_coord.int()], dim=1 + ).contiguous(), + spatial_shape=sparse_shape, + batch_size=batch[-1].tolist() + 1, + ) + x = self.conv_input(x) + skips = [x] + # enc forward + for s in range(self.num_stages): + x = self.down[s](x) + x = self.enc[s](x) + skips.append(x) + x = skips.pop(-1) + if not self.cls_mode: + # dec forward + for s in reversed(range(self.num_stages)): + x = self.up[s](x) + skip = skips.pop(-1) + x = x.replace_feature(torch.cat((x.features, skip.features), dim=1)) + x = self.dec[s](x) + + x = self.final(x) + if self.cls_mode: + x = x.replace_feature( + scatter(x.features, x.indices[:, 0].long(), reduce="mean", dim=0) + ) + return x.features + + +@MODELS.register_module() +class SpUNetNoSkipBase(nn.Module): + def __init__( + self, + in_channels, + out_channels, + base_channels=32, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + ): + super().__init__() + assert len(layers) % 2 == 0 + assert len(layers) == len(channels) + self.in_channels = in_channels + self.out_channels = out_channels + self.base_channels = base_channels + self.channels = channels + self.layers = layers + self.num_stages = len(layers) // 2 + + norm_fn = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01) + block = BasicBlock + + self.conv_input = spconv.SparseSequential( + spconv.SubMConv3d( + in_channels, + base_channels, + kernel_size=5, + padding=1, + bias=False, + indice_key="stem", + ), + norm_fn(base_channels), + nn.ReLU(), + ) + + enc_channels = base_channels + dec_channels = channels[-1] + self.down = nn.ModuleList() + self.up = nn.ModuleList() + self.enc = nn.ModuleList() + self.dec = nn.ModuleList() + + for s in range(self.num_stages): + # encode num_stages + self.down.append( + spconv.SparseSequential( + spconv.SparseConv3d( + enc_channels, + channels[s], + kernel_size=2, + stride=2, + bias=False, + indice_key=f"spconv{s + 1}", + ), + norm_fn(channels[s]), + nn.ReLU(), + ) + ) + self.enc.append( + spconv.SparseSequential( + OrderedDict( + [ + # (f"block{i}", block(enc_channels, channels[s], norm_fn=norm_fn, indice_key=f"subm{s + 1}")) + # if i == 0 else + ( + f"block{i}", + block( + channels[s], + channels[s], + norm_fn=norm_fn, + indice_key=f"subm{s + 1}", + ), + ) + for i in range(layers[s]) + ] + ) + ) + ) + + # decode num_stages + self.up.append( + spconv.SparseSequential( + spconv.SparseInverseConv3d( + channels[len(channels) - s - 2], + dec_channels, + kernel_size=2, + bias=False, + indice_key=f"spconv{s + 1}", + ), + norm_fn(dec_channels), + nn.ReLU(), + ) + ) + self.dec.append( + spconv.SparseSequential( + OrderedDict( + [ + ( + ( + f"block{i}", + block( + dec_channels, + dec_channels, + norm_fn=norm_fn, + indice_key=f"subm{s}", + ), + ) + if i == 0 + else ( + f"block{i}", + block( + dec_channels, + dec_channels, + norm_fn=norm_fn, + indice_key=f"subm{s}", + ), + ) + ) + for i in range(layers[len(channels) - s - 1]) + ] + ) + ) + ) + enc_channels = channels[s] + dec_channels = channels[len(channels) - s - 2] + + self.final = ( + spconv.SubMConv3d( + channels[-1], out_channels, kernel_size=1, padding=1, bias=True + ) + if out_channels > 0 + else spconv.Identity() + ) + self.apply(self._init_weights) + + @staticmethod + def _init_weights(m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, spconv.SubMConv3d): + trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def forward(self, data_dict): + grid_coord = data_dict["grid_coord"] + feat = data_dict["feat"] + offset = data_dict["offset"] + batch = offset2batch(offset) + sparse_shape = torch.add(torch.max(grid_coord, dim=0).values, 1).tolist() + x = spconv.SparseConvTensor( + features=feat, + indices=torch.cat( + [batch.unsqueeze(-1).int(), grid_coord.int()], dim=1 + ).contiguous(), + spatial_shape=sparse_shape, + batch_size=batch[-1].tolist() + 1, + ) + x = self.conv_input(x) + skips = [x] + # enc forward + for s in range(self.num_stages): + x = self.down[s](x) + x = self.enc[s](x) + skips.append(x) + x = skips.pop(-1) + # dec forward + for s in reversed(range(self.num_stages)): + x = self.up[s](x) + # skip = skips.pop(-1) + # x = x.replace_feature(torch.cat((x.features, skip.features), dim=1)) + x = self.dec[s](x) + + x = self.final(x) + return x.features diff --git a/Pointcept/pointcept/models/sparse_unet/spconv_unet_v1m2_bn_momentum.py b/Pointcept/pointcept/models/sparse_unet/spconv_unet_v1m2_bn_momentum.py new file mode 100644 index 0000000000000000000000000000000000000000..979b1b8b5488d6c55bbc20ad0cede5002c8a5c67 --- /dev/null +++ b/Pointcept/pointcept/models/sparse_unet/spconv_unet_v1m2_bn_momentum.py @@ -0,0 +1,290 @@ +""" +SparseUNet Driven by SpConv (recommend) + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from functools import partial +from collections import OrderedDict + +import torch +import torch.nn as nn + +try: + import spconv.pytorch as spconv +except ImportError: + import warnings + + warnings.warn("Please follow `README.md` to install spconv2.`") + +from timm.models.layers import trunc_normal_ +from pointcept.models.builder import MODELS + + +def offset2batch(offset): + return ( + torch.cat( + [ + ( + torch.tensor([i] * (o - offset[i - 1])) + if i > 0 + else torch.tensor([i] * o) + ) + for i, o in enumerate(offset) + ], + dim=0, + ) + .long() + .to(offset.device) + ) + + +class BasicBlock(spconv.SparseModule): + expansion = 1 + + def __init__( + self, + in_channels, + embed_channels, + stride=1, + norm_fn=None, + indice_key=None, + bias=False, + ): + super().__init__() + + assert norm_fn is not None + + if in_channels == embed_channels: + self.proj = spconv.SparseSequential(nn.Identity()) + else: + self.proj = spconv.SparseSequential( + spconv.SubMConv3d( + in_channels, embed_channels, kernel_size=1, bias=False + ), + norm_fn(embed_channels, momentum=0.02), + ) + + self.conv1 = spconv.SubMConv3d( + in_channels, + embed_channels, + kernel_size=3, + stride=stride, + padding=1, + bias=bias, + indice_key=indice_key, + ) + self.bn1 = norm_fn(embed_channels) + self.relu = nn.ReLU() + self.conv2 = spconv.SubMConv3d( + embed_channels, + embed_channels, + kernel_size=3, + stride=stride, + padding=1, + bias=bias, + indice_key=indice_key, + ) + self.bn2 = norm_fn(embed_channels) + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = out.replace_feature(self.bn1(out.features)) + out = out.replace_feature(self.relu(out.features)) + + out = self.conv2(out) + out = out.replace_feature(self.bn2(out.features)) + + out = out.replace_feature(out.features + self.proj(residual).features) + out = out.replace_feature(self.relu(out.features)) + + return out + + +@MODELS.register_module("SpUNet-v1m2") +class SpUNetBase(nn.Module): + def __init__( + self, + in_channels, + num_classes, + base_channels=32, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + bn_momentum=0.1, + ): + super().__init__() + assert len(layers) % 2 == 0 + assert len(layers) == len(channels) + self.in_channels = in_channels + self.num_classes = num_classes + self.base_channels = base_channels + self.channels = channels + self.layers = layers + self.num_stages = len(layers) // 2 + + norm_fn = partial(nn.BatchNorm1d, eps=1e-5, momentum=bn_momentum) + block = BasicBlock + + self.conv_input = spconv.SparseSequential( + spconv.SubMConv3d( + in_channels, + base_channels, + kernel_size=5, + padding=1, + bias=False, + indice_key="stem", + ), + norm_fn(base_channels, momentum=0.02), + nn.ReLU(), + ) + + enc_channels = base_channels + dec_channels = channels[-1] + self.down = nn.ModuleList() + self.up = nn.ModuleList() + self.enc = nn.ModuleList() + self.dec = nn.ModuleList() + + for s in range(self.num_stages): + # encode num_stages + self.down.append( + spconv.SparseSequential( + spconv.SparseConv3d( + enc_channels, + channels[s], + kernel_size=2, + stride=2, + bias=False, + indice_key=f"spconv{s + 1}", + ), + norm_fn(channels[s], momentum=0.02), + nn.ReLU(), + ) + ) + self.enc.append( + spconv.SparseSequential( + OrderedDict( + [ + # (f"block{i}", block(enc_channels, channels[s], norm_fn=norm_fn, indice_key=f"subm{s + 1}")) + # if i == 0 else + ( + f"block{i}", + block( + channels[s], + channels[s], + norm_fn=norm_fn, + indice_key=f"subm{s + 1}", + ), + ) + for i in range(layers[s]) + ] + ) + ) + ) + + # decode num_stages + self.up.append( + spconv.SparseSequential( + spconv.SparseInverseConv3d( + channels[len(channels) - s - 2], + dec_channels, + kernel_size=2, + bias=False, + indice_key=f"spconv{s + 1}", + ), + norm_fn(dec_channels, momentum=0.02), + nn.ReLU(), + ) + ) + self.dec.append( + spconv.SparseSequential( + OrderedDict( + [ + ( + ( + f"block{i}", + block( + dec_channels + enc_channels, + dec_channels, + norm_fn=norm_fn, + indice_key=f"subm{s}", + ), + ) + if i == 0 + else ( + f"block{i}", + block( + dec_channels, + dec_channels, + norm_fn=norm_fn, + indice_key=f"subm{s}", + ), + ) + ) + for i in range(layers[len(channels) - s - 1]) + ] + ) + ) + ) + enc_channels = channels[s] + dec_channels = channels[len(channels) - s - 2] + + self.final = ( + spconv.SubMConv3d( + channels[-1], num_classes, kernel_size=1, padding=1, bias=True + ) + if num_classes > 0 + else spconv.Identity() + ) + self.apply(self._init_weights) + + @staticmethod + def _init_weights(m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, spconv.SubMConv3d): + trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + def forward(self, data_dict): + grid_coord = data_dict["grid_coord"] + feat = data_dict["feat"] + offset = data_dict["offset"] + + batch = offset2batch(offset) + sparse_shape = torch.add(torch.max(grid_coord, dim=0).values, 1).tolist() + x = spconv.SparseConvTensor( + features=feat, + indices=torch.cat( + [batch.unsqueeze(-1).int(), grid_coord.int()], dim=1 + ).contiguous(), + spatial_shape=sparse_shape, + batch_size=batch[-1].tolist() + 1, + ) + x = self.conv_input(x) + skips = [x] + # enc forward + for s in range(self.num_stages): + x = self.down[s](x) + x = self.enc[s](x) + skips.append(x) + x = skips.pop(-1) + # dec forward + for s in reversed(range(self.num_stages)): + x = self.up[s](x) + skip = skips.pop(-1) + x = x.replace_feature(torch.cat((x.features, skip.features), dim=1)) + x = self.dec[s](x) + + x = self.final(x) + return x.features diff --git a/Pointcept/pointcept/models/sparse_unet/spconv_unet_v1m3_pdnorm.py b/Pointcept/pointcept/models/sparse_unet/spconv_unet_v1m3_pdnorm.py new file mode 100644 index 0000000000000000000000000000000000000000..968f8f2c5a19cf016f9427812a8071ca83d612d5 --- /dev/null +++ b/Pointcept/pointcept/models/sparse_unet/spconv_unet_v1m3_pdnorm.py @@ -0,0 +1,429 @@ +""" +SparseUNet V1M3 + +Enable Prompt-Driven Normalization for Point Prompt Training + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from functools import partial +from collections import OrderedDict + +import torch +import torch.nn as nn + +import spconv.pytorch as spconv +from torch_geometric.utils import scatter + +from timm.models.layers import trunc_normal_ + +from pointcept.models.builder import MODELS +from pointcept.models.utils import offset2batch + + +class PDBatchNorm(torch.nn.Module): + def __init__( + self, + num_features, + context_channels=256, + eps=1e-3, + momentum=0.01, + conditions=("ScanNet", "S3DIS", "Structured3D"), + decouple=True, + adaptive=False, + affine=True, + ): + super().__init__() + self.conditions = conditions + self.decouple = decouple + self.adaptive = adaptive + self.affine = affine + if self.decouple: + self.bns = nn.ModuleList( + [ + nn.BatchNorm1d( + num_features=num_features, + eps=eps, + momentum=momentum, + affine=affine, + ) + for _ in conditions + ] + ) + else: + self.bn = nn.BatchNorm1d( + num_features=num_features, eps=eps, momentum=momentum, affine=affine + ) + if self.adaptive: + self.modulation = nn.Sequential( + nn.SiLU(), nn.Linear(context_channels, 2 * num_features, bias=True) + ) + + def forward(self, feat, condition=None, context=None): + if self.decouple: + assert condition in self.conditions + bn = self.bns[self.conditions.index(condition)] + else: + bn = self.bn + feat = bn(feat) + if self.adaptive: + assert context is not None + shift, scale = self.modulation(context).chunk(2, dim=1) + feat = feat * (1.0 + scale) + shift + return feat + + +class BasicBlock(spconv.SparseModule): + expansion = 1 + + def __init__( + self, + in_channels, + embed_channels, + stride=1, + norm_fn=None, + indice_key=None, + bias=False, + ): + super().__init__() + + assert norm_fn is not None + + self.in_channels = in_channels + self.embed_channels = embed_channels + if in_channels == embed_channels: + self.proj = spconv.SparseSequential(nn.Identity()) + else: + # TODO remove norm after project + self.proj_conv = spconv.SubMConv3d( + in_channels, embed_channels, kernel_size=1, bias=False + ) + self.proj_norm = norm_fn(embed_channels) + + self.conv1 = spconv.SubMConv3d( + in_channels, + embed_channels, + kernel_size=3, + stride=stride, + padding=1, + bias=bias, + indice_key=indice_key, + ) + self.bn1 = norm_fn(embed_channels) + self.relu = nn.ReLU() + self.conv2 = spconv.SubMConv3d( + embed_channels, + embed_channels, + kernel_size=3, + stride=stride, + padding=1, + bias=bias, + indice_key=indice_key, + ) + self.bn2 = norm_fn(embed_channels) + self.stride = stride + + def forward(self, x): + x, condition, context = x + residual = x + + out = self.conv1(x) + out = out.replace_feature(self.bn1(out.features, condition, context)) + out = out.replace_feature(self.relu(out.features)) + + out = self.conv2(out) + out = out.replace_feature(self.bn2(out.features, condition, context)) + + if self.in_channels == self.embed_channels: + residual = self.proj(residual) + else: + residual = residual.replace_feature( + self.proj_norm(self.proj_conv(residual).features, condition, context) + ) + out = out.replace_feature(out.features + residual.features) + out = out.replace_feature(self.relu(out.features)) + return out, condition, context + + +class SPConvDown(nn.Module): + def __init__( + self, + in_channels, + out_channels, + indice_key, + kernel_size=2, + bias=False, + norm_fn=None, + ): + super().__init__() + self.conv = spconv.SparseConv3d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=kernel_size, + bias=bias, + indice_key=indice_key, + ) + self.bn = norm_fn(out_channels) + self.relu = nn.ReLU() + + def forward(self, x): + x, condition, context = x + out = self.conv(x) + out = out.replace_feature(self.bn(out.features, condition, context)) + out = out.replace_feature(self.relu(out.features)) + return out + + +class SPConvUp(nn.Module): + def __init__( + self, + in_channels, + out_channels, + indice_key, + kernel_size=2, + bias=False, + norm_fn=None, + ): + super().__init__() + self.conv = spconv.SparseInverseConv3d( + in_channels, + out_channels, + kernel_size=kernel_size, + bias=bias, + indice_key=indice_key, + ) + self.bn = norm_fn(out_channels) + self.relu = nn.ReLU() + + def forward(self, x): + x, condition, context = x + out = self.conv(x) + out = out.replace_feature(self.bn(out.features, condition, context)) + out = out.replace_feature(self.relu(out.features)) + return out + + +class SPConvPatchEmbedding(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=5, norm_fn=None): + super().__init__() + self.conv = spconv.SubMConv3d( + in_channels, + out_channels, + kernel_size=kernel_size, + padding=1, + bias=False, + indice_key="stem", + ) + self.bn = norm_fn(out_channels) + self.relu = nn.ReLU() + + def forward(self, x): + x, condition, context = x + out = self.conv(x) + out = out.replace_feature(self.bn(out.features, condition, context)) + out = out.replace_feature(self.relu(out.features)) + return out + + +@MODELS.register_module("SpUNet-v1m3") +class SpUNetBase(nn.Module): + def __init__( + self, + in_channels, + num_classes=0, + base_channels=32, + context_channels=256, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 3, 4, 6, 2, 2, 2, 2), + cls_mode=False, + conditions=("ScanNet", "S3DIS", "Structured3D"), + zero_init=True, + norm_decouple=True, + norm_adaptive=True, + norm_affine=False, + ): + super().__init__() + assert len(layers) % 2 == 0 + assert len(layers) == len(channels) + self.in_channels = in_channels + self.num_classes = num_classes + self.base_channels = base_channels + self.channels = channels + self.layers = layers + self.num_stages = len(layers) // 2 + self.cls_mode = cls_mode + self.conditions = conditions + self.zero_init = zero_init + + norm_fn = partial( + PDBatchNorm, + eps=1e-3, + momentum=0.01, + conditions=conditions, + context_channels=context_channels, + decouple=norm_decouple, + adaptive=norm_adaptive, + affine=norm_affine, + ) + block = BasicBlock + + self.conv_input = SPConvPatchEmbedding( + in_channels, base_channels, kernel_size=5, norm_fn=norm_fn + ) + + enc_channels = base_channels + dec_channels = channels[-1] + self.down = nn.ModuleList() + self.up = nn.ModuleList() + self.enc = nn.ModuleList() + self.dec = nn.ModuleList() if not self.cls_mode else None + + for s in range(self.num_stages): + # encode num_stages + self.down.append( + SPConvDown( + enc_channels, + channels[s], + kernel_size=2, + bias=False, + indice_key=f"spconv{s + 1}", + norm_fn=norm_fn, + ) + ) + self.enc.append( + spconv.SparseSequential( + OrderedDict( + [ + # (f"block{i}", block(enc_channels, channels[s], norm_fn=norm_fn, indice_key=f"subm{s + 1}")) + # if i == 0 else + ( + f"block{i}", + block( + channels[s], + channels[s], + norm_fn=norm_fn, + indice_key=f"subm{s + 1}", + ), + ) + for i in range(layers[s]) + ] + ) + ) + ) + if not self.cls_mode: + # decode num_stages + self.up.append( + SPConvUp( + channels[len(channels) - s - 2], + dec_channels, + kernel_size=2, + bias=False, + indice_key=f"spconv{s + 1}", + norm_fn=norm_fn, + ) + ) + self.dec.append( + spconv.SparseSequential( + OrderedDict( + [ + ( + ( + f"block{i}", + block( + dec_channels + enc_channels, + dec_channels, + norm_fn=norm_fn, + indice_key=f"subm{s}", + ), + ) + if i == 0 + else ( + f"block{i}", + block( + dec_channels, + dec_channels, + norm_fn=norm_fn, + indice_key=f"subm{s}", + ), + ) + ) + for i in range(layers[len(channels) - s - 1]) + ] + ) + ) + ) + + enc_channels = channels[s] + dec_channels = channels[len(channels) - s - 2] + + final_in_channels = ( + channels[-1] if not self.cls_mode else channels[self.num_stages - 1] + ) + self.final = ( + spconv.SubMConv3d( + final_in_channels, num_classes, kernel_size=1, padding=1, bias=True + ) + if num_classes > 0 + else spconv.Identity() + ) + self.apply(self._init_weights) + + def _init_weights(self, m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, spconv.SubMConv3d): + trunc_normal_(m.weight, std=0.02) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm1d): + if m.affine: + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + elif isinstance(m, PDBatchNorm): + if self.zero_init: + nn.init.constant_(m.modulation[-1].weight, 0) + nn.init.constant_(m.modulation[-1].bias, 0) + + def forward(self, input_dict): + grid_coord = input_dict["grid_coord"] + feat = input_dict["feat"] + offset = input_dict["offset"] + condition = input_dict["condition"][0] + context = input_dict["context"] if "context" in input_dict.keys() else None + + batch = offset2batch(offset) + sparse_shape = torch.add(torch.max(grid_coord, dim=0).values, 96).tolist() + x = spconv.SparseConvTensor( + features=feat, + indices=torch.cat( + [batch.unsqueeze(-1).int(), grid_coord.int()], dim=1 + ).contiguous(), + spatial_shape=sparse_shape, + batch_size=batch[-1].tolist() + 1, + ) + x = self.conv_input([x, condition, context]) + skips = [x] + # enc forward + for s in range(self.num_stages): + x = self.down[s]([x, condition, context]) + x, _, _ = self.enc[s]([x, condition, context]) + skips.append(x) + x = skips.pop(-1) + if not self.cls_mode: + # dec forward + for s in reversed(range(self.num_stages)): + x = self.up[s]([x, condition, context]) + skip = skips.pop(-1) + x = x.replace_feature(torch.cat((x.features, skip.features), dim=1)) + x, _, _ = self.dec[s]([x, condition, context]) + + x = self.final(x) + if self.cls_mode: + x = x.replace_feature( + scatter(x.features, x.indices[:, 0].long(), reduce="mean", dim=0) + ) + return x.features diff --git a/Pointcept/pointcept/models/spvcnn/__init__.py b/Pointcept/pointcept/models/spvcnn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ecdc75a6878026437124c187323ca9676bf35c76 --- /dev/null +++ b/Pointcept/pointcept/models/spvcnn/__init__.py @@ -0,0 +1 @@ +from .ts_spvcnn import * diff --git a/Pointcept/pointcept/models/spvcnn/ts_spvcnn.py b/Pointcept/pointcept/models/spvcnn/ts_spvcnn.py new file mode 100644 index 0000000000000000000000000000000000000000..c26f1ea8d41a8edfbd1542b0f9e607e660441b38 --- /dev/null +++ b/Pointcept/pointcept/models/spvcnn/ts_spvcnn.py @@ -0,0 +1,438 @@ +""" +SPVCNN + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch +import torch.nn as nn + +try: + import torchsparse + import torchsparse.nn as spnn + import torchsparse.nn.functional as F + from torchsparse.nn.utils import get_kernel_offsets + from torchsparse import PointTensor, SparseTensor +except ImportError: + torchsparse = None + + +from pointcept.models.utils import offset2batch +from pointcept.models.builder import MODELS + + +def initial_voxelize(z): + pc_hash = F.sphash(torch.floor(z.C).int()) + sparse_hash = torch.unique(pc_hash) + idx_query = F.sphashquery(pc_hash, sparse_hash) + counts = F.spcount(idx_query.int(), len(sparse_hash)) + + inserted_coords = F.spvoxelize(torch.floor(z.C), idx_query, counts) + inserted_coords = torch.round(inserted_coords).int() + inserted_feat = F.spvoxelize(z.F, idx_query, counts) + + new_tensor = SparseTensor(inserted_feat, inserted_coords, 1) + new_tensor.cmaps.setdefault(new_tensor.stride, new_tensor.coords) + z.additional_features["idx_query"][1] = idx_query + z.additional_features["counts"][1] = counts + return new_tensor + + +# x: SparseTensor, z: PointTensor +# return: SparseTensor +def point_to_voxel(x, z): + if ( + z.additional_features is None + or z.additional_features.get("idx_query") is None + or z.additional_features["idx_query"].get(x.s) is None + ): + pc_hash = F.sphash( + torch.cat( + [ + torch.floor(z.C[:, :3] / x.s[0]).int() * x.s[0], + z.C[:, -1].int().view(-1, 1), + ], + 1, + ) + ) + sparse_hash = F.sphash(x.C) + idx_query = F.sphashquery(pc_hash, sparse_hash) + counts = F.spcount(idx_query.int(), x.C.shape[0]) + z.additional_features["idx_query"][x.s] = idx_query + z.additional_features["counts"][x.s] = counts + else: + idx_query = z.additional_features["idx_query"][x.s] + counts = z.additional_features["counts"][x.s] + + inserted_feat = F.spvoxelize(z.F, idx_query, counts) + new_tensor = SparseTensor(inserted_feat, x.C, x.s) + new_tensor.cmaps = x.cmaps + new_tensor.kmaps = x.kmaps + + return new_tensor + + +# x: SparseTensor, z: PointTensor +# return: PointTensor +def voxel_to_point(x, z, nearest=False): + if ( + z.idx_query is None + or z.weights is None + or z.idx_query.get(x.s) is None + or z.weights.get(x.s) is None + ): + off = spnn.utils.get_kernel_offsets(2, x.s, 1, device=z.F.device) + old_hash = F.sphash( + torch.cat( + [ + torch.floor(z.C[:, :3] / x.s[0]).int() * x.s[0], + z.C[:, -1].int().view(-1, 1), + ], + 1, + ), + off, + ) + pc_hash = F.sphash(x.C.to(z.F.device)) + idx_query = F.sphashquery(old_hash, pc_hash) + weights = ( + F.calc_ti_weights(z.C, idx_query, scale=x.s[0]).transpose(0, 1).contiguous() + ) + idx_query = idx_query.transpose(0, 1).contiguous() + if nearest: + weights[:, 1:] = 0.0 + idx_query[:, 1:] = -1 + new_feat = F.spdevoxelize(x.F, idx_query, weights) + new_tensor = PointTensor( + new_feat, z.C, idx_query=z.idx_query, weights=z.weights + ) + new_tensor.additional_features = z.additional_features + new_tensor.idx_query[x.s] = idx_query + new_tensor.weights[x.s] = weights + z.idx_query[x.s] = idx_query + z.weights[x.s] = weights + + else: + new_feat = F.spdevoxelize(x.F, z.idx_query.get(x.s), z.weights.get(x.s)) + new_tensor = PointTensor( + new_feat, z.C, idx_query=z.idx_query, weights=z.weights + ) + new_tensor.additional_features = z.additional_features + + return new_tensor + + +class BasicConvolutionBlock(nn.Module): + def __init__(self, inc, outc, ks=3, stride=1, dilation=1): + super().__init__() + self.net = nn.Sequential( + spnn.Conv3d(inc, outc, kernel_size=ks, dilation=dilation, stride=stride), + spnn.BatchNorm(outc), + spnn.ReLU(True), + ) + + def forward(self, x): + out = self.net(x) + return out + + +class BasicDeconvolutionBlock(nn.Module): + def __init__(self, inc, outc, ks=3, stride=1): + super().__init__() + self.net = nn.Sequential( + spnn.Conv3d(inc, outc, kernel_size=ks, stride=stride, transposed=True), + spnn.BatchNorm(outc), + spnn.ReLU(True), + ) + + def forward(self, x): + return self.net(x) + + +class ResidualBlock(nn.Module): + def __init__(self, inc, outc, ks=3, stride=1, dilation=1): + super().__init__() + self.net = nn.Sequential( + spnn.Conv3d(inc, outc, kernel_size=ks, dilation=dilation, stride=stride), + spnn.BatchNorm(outc), + spnn.ReLU(True), + spnn.Conv3d(outc, outc, kernel_size=ks, dilation=dilation, stride=1), + spnn.BatchNorm(outc), + ) + + if inc == outc and stride == 1: + self.downsample = nn.Identity() + else: + self.downsample = nn.Sequential( + spnn.Conv3d(inc, outc, kernel_size=1, dilation=1, stride=stride), + spnn.BatchNorm(outc), + ) + + self.relu = spnn.ReLU(True) + + def forward(self, x): + out = self.relu(self.net(x) + self.downsample(x)) + return out + + +@MODELS.register_module() +class SPVCNN(nn.Module): + def __init__( + self, + in_channels, + out_channels, + base_channels=32, + channels=(32, 64, 128, 256, 256, 128, 96, 96), + layers=(2, 2, 2, 2, 2, 2, 2, 2), + ): # not implement + super().__init__() + + assert ( + torchsparse is not None + ), "Please follow `README.md` to install torchsparse.`" + assert len(layers) % 2 == 0 + assert len(layers) == len(channels) + self.in_channels = in_channels + self.out_channels = out_channels + self.base_channels = base_channels + self.channels = channels + self.layers = layers + self.num_stages = len(layers) // 2 + + self.stem = nn.Sequential( + spnn.Conv3d(in_channels, base_channels, kernel_size=3, stride=1), + spnn.BatchNorm(base_channels), + spnn.ReLU(True), + spnn.Conv3d(base_channels, base_channels, kernel_size=3, stride=1), + spnn.BatchNorm(base_channels), + spnn.ReLU(True), + ) + + self.stage1 = nn.Sequential( + *[ + BasicConvolutionBlock( + base_channels, base_channels, ks=2, stride=2, dilation=1 + ), + ResidualBlock(base_channels, channels[0], ks=3, stride=1, dilation=1), + ] + + [ + ResidualBlock(channels[0], channels[0], ks=3, stride=1, dilation=1) + for _ in range(layers[0] - 1) + ] + ) + + self.stage2 = nn.Sequential( + *[ + BasicConvolutionBlock( + channels[0], channels[0], ks=2, stride=2, dilation=1 + ), + ResidualBlock(channels[0], channels[1], ks=3, stride=1, dilation=1), + ] + + [ + ResidualBlock(channels[1], channels[1], ks=3, stride=1, dilation=1) + for _ in range(layers[1] - 1) + ] + ) + + self.stage3 = nn.Sequential( + *[ + BasicConvolutionBlock( + channels[1], channels[1], ks=2, stride=2, dilation=1 + ), + ResidualBlock(channels[1], channels[2], ks=3, stride=1, dilation=1), + ] + + [ + ResidualBlock(channels[2], channels[2], ks=3, stride=1, dilation=1) + for _ in range(layers[2] - 1) + ] + ) + + self.stage4 = nn.Sequential( + *[ + BasicConvolutionBlock( + channels[2], channels[2], ks=2, stride=2, dilation=1 + ), + ResidualBlock(channels[2], channels[3], ks=3, stride=1, dilation=1), + ] + + [ + ResidualBlock(channels[3], channels[3], ks=3, stride=1, dilation=1) + for _ in range(layers[3] - 1) + ] + ) + + self.up1 = nn.ModuleList( + [ + BasicDeconvolutionBlock(channels[3], channels[4], ks=2, stride=2), + nn.Sequential( + *[ + ResidualBlock( + channels[4] + channels[2], + channels[4], + ks=3, + stride=1, + dilation=1, + ) + ] + + [ + ResidualBlock( + channels[4], channels[4], ks=3, stride=1, dilation=1 + ) + for _ in range(layers[4] - 1) + ] + ), + ] + ) + + self.up2 = nn.ModuleList( + [ + BasicDeconvolutionBlock(channels[4], channels[5], ks=2, stride=2), + nn.Sequential( + *[ + ResidualBlock( + channels[5] + channels[1], + channels[5], + ks=3, + stride=1, + dilation=1, + ) + ] + + [ + ResidualBlock( + channels[5], channels[5], ks=3, stride=1, dilation=1 + ) + for _ in range(layers[5] - 1) + ] + ), + ] + ) + + self.up3 = nn.ModuleList( + [ + BasicDeconvolutionBlock(channels[5], channels[6], ks=2, stride=2), + nn.Sequential( + *[ + ResidualBlock( + channels[6] + channels[0], + channels[6], + ks=3, + stride=1, + dilation=1, + ) + ] + + [ + ResidualBlock( + channels[6], channels[6], ks=3, stride=1, dilation=1 + ) + for _ in range(layers[6] - 1) + ] + ), + ] + ) + + self.up4 = nn.ModuleList( + [ + BasicDeconvolutionBlock(channels[6], channels[7], ks=2, stride=2), + nn.Sequential( + *[ + ResidualBlock( + channels[7] + base_channels, + channels[7], + ks=3, + stride=1, + dilation=1, + ) + ] + + [ + ResidualBlock( + channels[7], channels[7], ks=3, stride=1, dilation=1 + ) + for _ in range(layers[7] - 1) + ] + ), + ] + ) + + self.classifier = nn.Sequential(nn.Linear(channels[7], out_channels)) + + self.point_transforms = nn.ModuleList( + [ + nn.Sequential( + nn.Linear(base_channels, channels[3]), + nn.BatchNorm1d(channels[3]), + nn.ReLU(True), + ), + nn.Sequential( + nn.Linear(channels[3], channels[5]), + nn.BatchNorm1d(channels[5]), + nn.ReLU(True), + ), + nn.Sequential( + nn.Linear(channels[5], channels[7]), + nn.BatchNorm1d(channels[7]), + nn.ReLU(True), + ), + ] + ) + + self.weight_initialization() + self.dropout = nn.Dropout(0.3, True) + + def weight_initialization(self): + for m in self.modules(): + if isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + def forward(self, data_dict): + grid_coord = data_dict["grid_coord"] + feat = data_dict["feat"] + offset = data_dict["offset"] + batch = offset2batch(offset) + + # x: SparseTensor z: PointTensor + z = PointTensor( + feat, + torch.cat( + [grid_coord.float(), batch.unsqueeze(-1).float()], dim=1 + ).contiguous(), + ) + x0 = initial_voxelize(z) + + x0 = self.stem(x0) + z0 = voxel_to_point(x0, z, nearest=False) + z0.F = z0.F + + x1 = point_to_voxel(x0, z0) + x1 = self.stage1(x1) + x2 = self.stage2(x1) + x3 = self.stage3(x2) + x4 = self.stage4(x3) + z1 = voxel_to_point(x4, z0) + z1.F = z1.F + self.point_transforms[0](z0.F) + + y1 = point_to_voxel(x4, z1) + y1.F = self.dropout(y1.F) + y1 = self.up1[0](y1) + y1 = torchsparse.cat([y1, x3]) + y1 = self.up1[1](y1) + + y2 = self.up2[0](y1) + y2 = torchsparse.cat([y2, x2]) + y2 = self.up2[1](y2) + z2 = voxel_to_point(y2, z1) + z2.F = z2.F + self.point_transforms[1](z1.F) + + y3 = point_to_voxel(y2, z2) + y3.F = self.dropout(y3.F) + y3 = self.up3[0](y3) + y3 = torchsparse.cat([y3, x1]) + y3 = self.up3[1](y3) + + y4 = self.up4[0](y3) + y4 = torchsparse.cat([y4, x0]) + y4 = self.up4[1](y4) + z3 = voxel_to_point(y4, z2) + z3.F = z3.F + self.point_transforms[2](z2.F) + + out = self.classifier(z3.F) + return out diff --git a/Pointcept/pointcept/models/stratified_transformer/__init__.py b/Pointcept/pointcept/models/stratified_transformer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..24712d2bc0fe76a52a273172fe9c4f37c807a6c3 --- /dev/null +++ b/Pointcept/pointcept/models/stratified_transformer/__init__.py @@ -0,0 +1,2 @@ +from .stratified_transformer_v1m1_origin import StratifiedTransformer +from .stratified_transformer_v1m2_refine import StratifiedTransformer diff --git a/Pointcept/pointcept/models/stratified_transformer/stratified_transformer_v1m1_origin.py b/Pointcept/pointcept/models/stratified_transformer/stratified_transformer_v1m1_origin.py new file mode 100644 index 0000000000000000000000000000000000000000..5bf18f71298efa1e96f0697c96e26d2127140c36 --- /dev/null +++ b/Pointcept/pointcept/models/stratified_transformer/stratified_transformer_v1m1_origin.py @@ -0,0 +1,830 @@ +import torch +import torch.nn as nn + +try: + import torch_points_kernels as tp +except ImportError: + tp = None + +try: + from torch_points3d.modules.KPConv.kernels import KPConvLayer + from torch_points3d.core.common_modules import FastBatchNorm1d +except ImportError: + KPConvLayer = None + FastBatchNorm1d = None + +from torch_scatter import scatter_softmax +from timm.models.layers import DropPath, trunc_normal_ +from torch_geometric.nn.pool import voxel_grid + +try: + import pointops2.pointops as pointops +except ImportError: + pointops = None + +from pointcept.models.builder import MODELS + + +def offset2batch(offset): + return ( + torch.cat( + [ + ( + torch.tensor([i] * (o - offset[i - 1])) + if i > 0 + else torch.tensor([i] * o) + ) + for i, o in enumerate(offset) + ], + dim=0, + ) + .long() + .to(offset.device) + ) + + +def get_indice_pairs( + p2v_map, counts, new_p2v_map, new_counts, downsample_idx, batch, xyz, window_size, i +): + # p2v_map: [n, k] + # counts: [n, ] + + n, k = p2v_map.shape + mask = torch.arange(k).unsqueeze(0).cuda() < counts.unsqueeze(-1) # [n, k] + mask_mat = mask.unsqueeze(-1) & mask.unsqueeze(-2) # [n, k, k] + index_0 = p2v_map.unsqueeze(-1).expand(-1, -1, k)[mask_mat] # [M, ] + index_1 = p2v_map.unsqueeze(1).expand(-1, k, -1)[mask_mat] # [M, ] + + downsample_mask = torch.zeros_like(batch).bool() # [N, ] + downsample_mask[downsample_idx.long()] = True + + downsample_mask = downsample_mask[new_p2v_map] # [n, k] + n, k = new_p2v_map.shape + mask = torch.arange(k).unsqueeze(0).cuda() < new_counts.unsqueeze(-1) # [n, k] + downsample_mask = downsample_mask & mask + mask_mat = mask.unsqueeze(-1) & downsample_mask.unsqueeze(-2) # [n, k, k] + xyz_min = xyz.min(0)[0] + if i % 2 == 0: + window_coord = (xyz[new_p2v_map] - xyz_min) // window_size # [n, k, 3] + else: + window_coord = ( + xyz[new_p2v_map] + 1 / 2 * window_size - xyz_min + ) // window_size # [n, k, 3] + + mask_mat_prev = (window_coord.unsqueeze(2) != window_coord.unsqueeze(1)).any( + -1 + ) # [n, k, k] + mask_mat = mask_mat & mask_mat_prev # [n, k, k] + + new_index_0 = new_p2v_map.unsqueeze(-1).expand(-1, -1, k)[mask_mat] # [M, ] + new_index_1 = new_p2v_map.unsqueeze(1).expand(-1, k, -1)[mask_mat] # [M, ] + + index_0 = torch.cat([index_0, new_index_0], 0) + index_1 = torch.cat([index_1, new_index_1], 0) + return index_0, index_1 + + +def grid_sample(pos, batch, size, start, return_p2v=True): + # pos: float [N, 3] + # batch: long [N] + # size: float [3, ] + # start: float [3, ] / None + + cluster = voxel_grid(pos, batch, size, start=start) # [N, ] + + if return_p2v == False: + unique, cluster = torch.unique(cluster, sorted=True, return_inverse=True) + return cluster + + unique, cluster, counts = torch.unique( + cluster, sorted=True, return_inverse=True, return_counts=True + ) + + # obtain p2v_map + n = unique.shape[0] + k = counts.max().item() + p2v_map = cluster.new_zeros(n, k) # [n, k] + mask = torch.arange(k).cuda().unsqueeze(0) < counts.unsqueeze(-1) # [n, k] + p2v_map[mask] = torch.argsort(cluster) + + return cluster, p2v_map, counts + + +class Mlp(nn.Module): + """Multilayer perceptron.""" + + def __init__( + self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop, inplace=True) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class TransitionDown(nn.Module): + def __init__(self, in_channels, out_channels, ratio, k, norm_layer=nn.LayerNorm): + super().__init__() + self.ratio = ratio + self.k = k + self.norm = norm_layer(in_channels) if norm_layer else None + self.linear = nn.Linear(in_channels, out_channels, bias=False) + self.pool = nn.MaxPool1d(k) + + def forward(self, feats, xyz, offset): + n_offset, count = [int(offset[0].item() * self.ratio) + 1], int( + offset[0].item() * self.ratio + ) + 1 + for i in range(1, offset.shape[0]): + count += ((offset[i].item() - offset[i - 1].item()) * self.ratio) + 1 + n_offset.append(count) + n_offset = torch.cuda.IntTensor(n_offset) + idx = pointops.furthestsampling(xyz, offset, n_offset) # (m) + n_xyz = xyz[idx.long(), :] # (m, 3) + + feats = pointops.queryandgroup( + self.k, xyz, n_xyz, feats, None, offset, n_offset, use_xyz=False + ) # (m, nsample, 3+c) + m, k, c = feats.shape + feats = ( + self.linear(self.norm(feats.view(m * k, c)).view(m, k, c)) + .transpose(1, 2) + .contiguous() + ) + feats = self.pool(feats).squeeze(-1) # (m, c) + + return feats, n_xyz, n_offset + + +class WindowAttention(nn.Module): + """Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + """ + + def __init__( + self, + dim, + window_size, + num_heads, + quant_size, + rel_query=True, + rel_key=False, + rel_value=False, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + ): + super().__init__() + self.dim = dim + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + self.window_size = window_size + + self.quant_size = quant_size + self.rel_query = rel_query + self.rel_key = rel_key + self.rel_value = rel_value + + quant_grid_length = int((2 * window_size + 1e-4) // quant_size) + + if rel_query: + self.relative_pos_query_table = nn.Parameter( + torch.zeros(2 * quant_grid_length, num_heads, head_dim, 3) + ) + trunc_normal_(self.relative_pos_query_table, std=0.02) + if rel_key: + self.relative_pos_key_table = nn.Parameter( + torch.zeros(2 * quant_grid_length, num_heads, head_dim, 3) + ) + trunc_normal_(self.relative_pos_key_table, std=0.02) + if rel_value: + self.relative_pos_value_table = nn.Parameter( + torch.zeros(2 * quant_grid_length, num_heads, head_dim, 3) + ) + trunc_normal_(self.relative_pos_value_table, std=0.02) + + self.quant_grid_length = quant_grid_length + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop, inplace=True) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop, inplace=True) + + self.softmax = nn.Softmax(dim=-1) + + # def forward(self, feats, xyz, index_0, index_1): + def forward(self, feats, xyz, index_0, index_1, index_0_offsets, n_max): + """Forward function. + + Args: + feats: N, C + xyz: N, 3 + index_0: M, + index_1: M, + """ + + N, C = feats.shape + M = index_0.shape[0] + + assert index_0.shape[0] == index_1.shape[0] + + # Query, Key, Value + qkv = ( + self.qkv(feats) + .reshape(N, 3, self.num_heads, C // self.num_heads) + .permute(1, 0, 2, 3) + .contiguous() + ) + query, key, value = qkv[0], qkv[1], qkv[2] # [N, num_heads, C//num_heads] + query = query * self.scale + attn_flat = pointops.attention_step1_v2( + query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max + ) + + # # Position embedding + relative_position = xyz[index_0] - xyz[index_1] + relative_position = torch.round(relative_position * 100000) / 100000 + relative_position_index = ( + relative_position + 2 * self.window_size - 0.0001 + ) // self.quant_size + assert (relative_position_index >= 0).all() + assert (relative_position_index <= 2 * self.quant_grid_length - 1).all() + + assert self.rel_query and self.rel_key + if self.rel_query and self.rel_key: + relative_position_bias = pointops.dot_prod_with_idx_v3( + query.float(), + index_0_offsets.int(), + n_max, + key.float(), + index_1.int(), + self.relative_pos_query_table.float(), + self.relative_pos_key_table.float(), + relative_position_index.int(), + ) + elif self.rel_query: + relative_position_bias = pointops.dot_prod_with_idx( + query.float(), + index_0.int(), + self.relative_pos_query_table.float(), + relative_position_index.int(), + ) # [M, num_heads] + elif self.rel_key: + relative_position_bias = pointops.dot_prod_with_idx( + key.float(), + index_1.int(), + self.relative_pos_key_table.float(), + relative_position_index.int(), + ) # [M, num_heads] + else: + relative_position_bias = 0.0 + + attn_flat = attn_flat + relative_position_bias # [M, num_heads] + + softmax_attn_flat = scatter_softmax( + src=attn_flat, index=index_0, dim=0 + ) # [M, num_heads] + + if self.rel_value: + x = pointops.attention_step2_with_rel_pos_value_v2( + softmax_attn_flat.float(), + value.float(), + index_0_offsets.int(), + n_max, + index_1.int(), + self.relative_pos_value_table.float(), + relative_position_index.int(), + ) + else: + x = pointops.attention_step2( + softmax_attn_flat.float(), value.float(), index_0.int(), index_1.int() + ) + + x = x.view(N, C) + + x = self.proj(x) + x = self.proj_drop(x) # [N, C] + + return x + + +class SwinTransformerBlock(nn.Module): + def __init__( + self, + dim, + num_heads, + window_size, + quant_size, + rel_query=True, + rel_key=False, + rel_value=False, + drop_path=0.0, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + mode=4, + ): # mode=4:mean + super().__init__() + self.mode = mode + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, + window_size, + num_heads=num_heads, + quant_size=quant_size, + rel_query=rel_query, + rel_key=rel_key, + rel_value=rel_value, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + ) + + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer + ) + + def forward(self, feats, xyz, index_0, index_1, index_0_offsets, n_max): + # feats: [N, c] + # pos: [N, 3] + + short_cut = feats + + feats = self.norm1(feats) + + feats = self.attn( + feats, xyz, index_0, index_1, index_0_offsets, n_max + ) # index_0 MUST be in ascending order + + feats = short_cut + self.drop_path(feats) + feats = feats + self.drop_path(self.mlp(self.norm2(feats))) + + return feats + + +class BasicLayer(nn.Module): + def __init__( + self, + downsample_scale, + depth, + channel, + num_heads, + window_size, + grid_size, + quant_size, + rel_query=True, + rel_key=False, + rel_value=False, + drop_path=0.0, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + norm_layer=nn.LayerNorm, + downsample=None, + ratio=0.25, + k=16, + out_channels=None, + ): + super().__init__() + self.depth = depth + self.grid_size = grid_size + self.max_window_counts = 64 + self.window_size = window_size + self.downsample_scale = downsample_scale + + self.blocks = nn.ModuleList( + [ + SwinTransformerBlock( + channel, + num_heads, + window_size, + quant_size, + rel_query=rel_query, + rel_key=rel_key, + rel_value=rel_value, + drop_path=( + drop_path[i] if isinstance(drop_path, list) else drop_path + ), + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + norm_layer=norm_layer, + ) + for i in range(depth) + ] + ) + + self.downsample = ( + downsample(channel, out_channels, ratio, k) if downsample else None + ) + + def forward(self, feats, xyz, offset): + # feats: N, C + # xyz: N, 3 + + window_size = torch.tensor([self.window_size] * 3).type_as(xyz).to(xyz.device) + + offset_ = offset.clone() + offset_[1:] = offset_[1:] - offset_[:-1] + batch = ( + torch.cat([torch.tensor([ii] * o) for ii, o in enumerate(offset_)], 0) + .long() + .cuda() + ) + + v2p_map, p2v_map, counts = grid_sample(xyz, batch, window_size, start=None) + + shift_size = 1 / 2 * window_size + shift_v2p_map, shift_p2v_map, shift_counts = grid_sample( + xyz + shift_size, batch, window_size, start=xyz.min(0)[0] + ) + + downsample_scale = self.downsample_scale + new_offset, count = [offset[0].item() // downsample_scale + 1], offset[ + 0 + ].item() // downsample_scale + 1 + for i in range(1, offset.shape[0]): + count += (offset[i].item() - offset[i - 1].item()) // downsample_scale + 1 + new_offset.append(count) + + new_offset = torch.cuda.IntTensor(new_offset) + downsample_idx = pointops.furthestsampling( + xyz, offset.int(), new_offset.int() + ) # [N/16,] + + new_window_size = 2 * torch.tensor([self.window_size] * 3).type_as(xyz).to( + xyz.device + ) + + # offset_ = new_offset.clone() + # offset_[1:] = offset_[1:] - offset_[:-1] + # new_batch = torch.cat([torch.tensor([ii]*o) for ii,o in enumerate(offset_)], 0).long().cuda() + + new_v2p_map, new_p2v_map, new_counts = grid_sample( + xyz, batch, new_window_size, start=None + ) + + shift_size = 1 / 2 * new_window_size + shift_new_v2p_map, shift_new_p2v_map, shift_new_counts = grid_sample( + xyz + shift_size, batch, new_window_size, start=xyz.min(0)[0] + ) + + for i, blk in enumerate(self.blocks): + p2v_map_blk = p2v_map if i % 2 == 0 else shift_p2v_map + counts_blk = counts if i % 2 == 0 else shift_counts + + new_p2v_map_blk = new_p2v_map if i % 2 == 0 else shift_new_p2v_map + new_counts_blk = new_counts if i % 2 == 0 else shift_new_counts + + index_0, index_1 = get_indice_pairs( + p2v_map_blk, + counts_blk, + new_p2v_map_blk, + new_counts_blk, + downsample_idx, + batch, + xyz, + window_size, + i, + ) + + # rearrange index for acceleration + index_0, indices = torch.sort(index_0) # [M,] + index_1 = index_1[indices] # [M,] + index_0_counts = index_0.bincount() + n_max = index_0_counts.max() + index_0_offsets = index_0_counts.cumsum(dim=-1) # [N] + index_0_offsets = torch.cat( + [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0 + ) # [N+1] + + feats = blk(feats, xyz, index_0, index_1, index_0_offsets, n_max) + + if self.downsample: + feats_down, xyz_down, offset_down = self.downsample(feats, xyz, offset) + else: + feats_down, xyz_down, offset_down = None, None, None + + return feats, xyz, offset, feats_down, xyz_down, offset_down + + +class Upsample(nn.Module): + def __init__(self, k, in_channels, out_channels, bn_momentum=0.02): + super().__init__() + self.k = k + self.in_channels = in_channels + self.out_channels = out_channels + + self.linear1 = nn.Sequential( + nn.LayerNorm(out_channels), nn.Linear(out_channels, out_channels) + ) + self.linear2 = nn.Sequential( + nn.LayerNorm(in_channels), nn.Linear(in_channels, out_channels) + ) + + def forward( + self, feats, xyz, support_xyz, offset, support_offset, support_feats=None + ): + feats = self.linear1(support_feats) + pointops.interpolation( + xyz, support_xyz, self.linear2(feats), offset, support_offset + ) + return feats, support_xyz, support_offset + + +class KPConvSimpleBlock(nn.Module): + def __init__( + self, + in_channels, + out_channels, + prev_grid_size, + sigma=1.0, + negative_slope=0.2, + bn_momentum=0.02, + ): + super().__init__() + self.kpconv = KPConvLayer( + in_channels, + out_channels, + point_influence=prev_grid_size * sigma, + add_one=False, + ) + self.bn = FastBatchNorm1d(out_channels, momentum=bn_momentum) + self.activation = nn.LeakyReLU(negative_slope=negative_slope) + + def forward(self, feats, xyz, batch, neighbor_idx): + # feats: [N, C] + # xyz: [N, 3] + # batch: [N,] + # neighbor_idx: [N, M] + + feats = self.kpconv(xyz, xyz, neighbor_idx, feats) + feats = self.activation(self.bn(feats)) + return feats + + +class KPConvResBlock(nn.Module): + def __init__( + self, + in_channels, + out_channels, + prev_grid_size, + sigma=1.0, + negative_slope=0.2, + bn_momentum=0.02, + ): + super().__init__() + d_2 = out_channels // 4 + activation = nn.LeakyReLU(negative_slope=negative_slope) + self.unary_1 = torch.nn.Sequential( + nn.Linear(in_channels, d_2, bias=False), + FastBatchNorm1d(d_2, momentum=bn_momentum), + activation, + ) + self.unary_2 = torch.nn.Sequential( + nn.Linear(d_2, out_channels, bias=False), + FastBatchNorm1d(out_channels, momentum=bn_momentum), + activation, + ) + self.kpconv = KPConvLayer( + d_2, d_2, point_influence=prev_grid_size * sigma, add_one=False + ) + self.bn = FastBatchNorm1d(out_channels, momentum=bn_momentum) + self.activation = activation + + if in_channels != out_channels: + self.shortcut_op = torch.nn.Sequential( + nn.Linear(in_channels, out_channels, bias=False), + FastBatchNorm1d(out_channels, momentum=bn_momentum), + ) + else: + self.shortcut_op = nn.Identity() + + def forward(self, feats, xyz, batch, neighbor_idx): + # feats: [N, C] + # xyz: [N, 3] + # batch: [N,] + # neighbor_idx: [N, M] + + shortcut = feats + feats = self.unary_1(feats) + feats = self.kpconv(xyz, xyz, neighbor_idx, feats) + feats = self.unary_2(feats) + shortcut = self.shortcut_op(shortcut) + feats += shortcut + return feats + + +@MODELS.register_module("ST-v1m1") +class StratifiedTransformer(nn.Module): + def __init__( + self, + downsample_scale, + depths, + channels, + num_heads, + window_size, + up_k, + grid_sizes, + quant_sizes, + rel_query=True, + rel_key=False, + rel_value=False, + drop_path_rate=0.2, + num_layers=4, + concat_xyz=False, + num_classes=13, + ratio=0.25, + k=16, + prev_grid_size=0.04, + sigma=1.0, + stem_transformer=False, + kp_ball_radius=0.02 * 2.5, + kp_max_neighbor=34, + ): + super().__init__() + assert ( + KPConvLayer is not None and FastBatchNorm1d is not None + ), "Please make sure torch_points3d is installed" + assert tp is not None, "Please make sure torch_points_kernels is installed" + assert pointops is not None, "Please make sure pointops2 is installed" + + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + + self.kp_ball_radius = kp_ball_radius + self.kp_max_neighbor = kp_max_neighbor + if stem_transformer: + self.stem_layer = nn.ModuleList( + [ + KPConvSimpleBlock( + 3 if not concat_xyz else 6, + channels[0], + prev_grid_size, + sigma=sigma, + ) + ] + ) + self.layer_start = 0 + else: + self.stem_layer = nn.ModuleList( + [ + KPConvSimpleBlock( + 3 if not concat_xyz else 6, + channels[0], + prev_grid_size, + sigma=sigma, + ), + KPConvResBlock( + channels[0], channels[0], prev_grid_size, sigma=sigma + ), + ] + ) + self.downsample = TransitionDown(channels[0], channels[1], ratio, k) + self.layer_start = 1 + + self.layers = nn.ModuleList( + [ + BasicLayer( + downsample_scale, + depths[i], + channels[i], + num_heads[i], + window_size[i], + grid_sizes[i], + quant_sizes[i], + rel_query=rel_query, + rel_key=rel_key, + rel_value=rel_value, + drop_path=dpr[sum(depths[:i]) : sum(depths[: i + 1])], + downsample=TransitionDown if i < num_layers - 1 else None, + ratio=ratio, + k=k, + out_channels=channels[i + 1] if i < num_layers - 1 else None, + ) + for i in range(self.layer_start, num_layers) + ] + ) + + self.upsamples = nn.ModuleList( + [ + Upsample(up_k, channels[i], channels[i - 1]) + for i in range(num_layers - 1, 0, -1) + ] + ) + + self.classifier = nn.Sequential( + nn.Linear(channels[0], channels[0]), + nn.BatchNorm1d(channels[0]), + nn.ReLU(inplace=True), + nn.Linear(channels[0], num_classes), + ) + + self.init_weights() + + def forward(self, data_dict): + feats = data_dict["feat"] + xyz = data_dict["coord"] + offset = data_dict["offset"].int() + batch = offset2batch(offset) + neighbor_idx = tp.ball_query( + self.kp_ball_radius, + self.kp_max_neighbor, + xyz, + xyz, + mode="partial_dense", + batch_x=batch, + batch_y=batch, + )[0] + + feats_stack = [] + xyz_stack = [] + offset_stack = [] + + for i, layer in enumerate(self.stem_layer): + feats = layer(feats, xyz, batch, neighbor_idx) + + feats = feats.contiguous() + + if self.layer_start == 1: + feats_stack.append(feats) + xyz_stack.append(xyz) + offset_stack.append(offset) + feats, xyz, offset = self.downsample(feats, xyz, offset) + + for i, layer in enumerate(self.layers): + feats, xyz, offset, feats_down, xyz_down, offset_down = layer( + feats, xyz, offset + ) + + feats_stack.append(feats) + xyz_stack.append(xyz) + offset_stack.append(offset) + + feats = feats_down + xyz = xyz_down + offset = offset_down + + feats = feats_stack.pop() + xyz = xyz_stack.pop() + offset = offset_stack.pop() + + for i, upsample in enumerate(self.upsamples): + feats, xyz, offset = upsample( + feats, + xyz, + xyz_stack.pop(), + offset, + offset_stack.pop(), + support_feats=feats_stack.pop(), + ) + + out = self.classifier(feats) + + return out + + def init_weights(self): + """Initialize the weights in backbone.""" + + def _init_weights(m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm) or isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + self.apply(_init_weights) diff --git a/Pointcept/pointcept/models/stratified_transformer/stratified_transformer_v1m2_refine.py b/Pointcept/pointcept/models/stratified_transformer/stratified_transformer_v1m2_refine.py new file mode 100644 index 0000000000000000000000000000000000000000..234afc12a7be6ea1feb87259c8c77e1bf0a8b3d3 --- /dev/null +++ b/Pointcept/pointcept/models/stratified_transformer/stratified_transformer_v1m2_refine.py @@ -0,0 +1,763 @@ +""" +Stratified Transformer + +Modified from https://github.com/dvlab-research/Stratified-Transformer + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from copy import deepcopy +import torch +import torch.nn as nn + +try: + import torch_points_kernels as tp +except ImportError: + tp = None + +try: + from torch_points3d.modules.KPConv.kernels import KPConvLayer + from torch_points3d.core.common_modules import FastBatchNorm1d +except ImportError: + KPConvLayer = None + FastBatchNorm1d = None + +from torch_scatter import scatter_softmax +from timm.models.layers import DropPath, trunc_normal_ +from torch_geometric.nn.pool import voxel_grid + +try: + import pointops2.pointops as pointops +except ImportError: + pointops = None + +from pointcept.models.builder import MODELS + + +def offset2batch(offset): + return ( + torch.cat( + [ + ( + torch.tensor([i] * (o - offset[i - 1])) + if i > 0 + else torch.tensor([i] * o) + ) + for i, o in enumerate(offset) + ], + dim=0, + ) + .long() + .to(offset.device) + ) + + +def grid_sample(coords, batch, size, start, return_p2v=True): + cluster = voxel_grid(coords, batch, size, start=start) + + if not return_p2v: + unique, cluster = torch.unique(cluster, sorted=True, return_inverse=True) + return cluster + else: + unique, cluster, counts = torch.unique( + cluster, sorted=True, return_inverse=True, return_counts=True + ) + + # obtain p2v_map + n = unique.shape[0] + k = counts.max().item() + p2v_map = cluster.new_zeros(n, k) + mask = torch.arange(k).cuda().unsqueeze(0) < counts.unsqueeze(-1) + p2v_map[mask] = torch.argsort(cluster) + return cluster, p2v_map, counts + + +class WindowAttention(nn.Module): + """Window based multi-head self attention (W-MSA) module with relative position bias. + It supports both of shifted and non-shifted window. + """ + + def __init__( + self, + embed_channels, + num_heads, + window_size, + quant_size, + attn_drop=0.0, + proj_drop=0.0, + scale=None, + rel_query=True, + rel_key=True, + rel_value=True, + qkv_bias=True, + ): + super().__init__() + self.embed_channels = embed_channels + self.head_channels = embed_channels // num_heads + self.num_heads = num_heads + self.scale = scale or self.head_channels**-0.5 + + self.window_size = window_size + self.quant_size = quant_size + + self.rel_query = rel_query + self.rel_key = rel_key + self.rel_value = rel_value + + self.quant_grid_length = int((2 * window_size + 1e-4) // quant_size) + + assert self.rel_query and self.rel_key + if rel_query: + self.relative_pos_query_table = nn.Parameter( + torch.zeros( + 2 * self.quant_grid_length, self.num_heads, self.head_channels, 3 + ) + ) + trunc_normal_(self.relative_pos_query_table, std=0.02) + + if rel_key: + self.relative_pos_key_table = nn.Parameter( + torch.zeros( + 2 * self.quant_grid_length, self.num_heads, self.head_channels, 3 + ) + ) + trunc_normal_(self.relative_pos_query_table, std=0.02) + + if rel_value: + self.relative_pos_value_table = nn.Parameter( + torch.zeros( + 2 * self.quant_grid_length, self.num_heads, self.head_channels, 3 + ) + ) + trunc_normal_(self.relative_pos_query_table, std=0.02) + + self.qkv = nn.Linear(embed_channels, embed_channels * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop, inplace=True) + self.proj = nn.Linear(embed_channels, embed_channels) + self.proj_drop = nn.Dropout(proj_drop, inplace=True) + + self.softmax = nn.Softmax(dim=-1) + + def forward(self, feats, coords, index_0, index_1, index_0_offsets, n_max): + n, c = feats.shape + m = index_0.shape[0] + + assert index_0.shape[0] == index_1.shape[0] + + qkv = ( + self.qkv(feats) + .reshape(n, 3, self.num_heads, c // self.num_heads) + .permute(1, 0, 2, 3) + .contiguous() + ) + query, key, value = qkv[0], qkv[1], qkv[2] + query = query * self.scale + attn_flat = pointops.attention_step1_v2( + query.float(), key.float(), index_1.int(), index_0_offsets.int(), n_max + ) + + # Position embedding + relative_position = coords[index_0] - coords[index_1] + relative_position = torch.round(relative_position * 100000) / 100000 + relative_position_index = torch.div( + relative_position + 2 * self.window_size - 1e-4, + self.quant_size, + rounding_mode="trunc", + ) + # relative_position_index = (relative_position + 2 * self.window_size - 1e-4) // self.quant_size + assert (relative_position_index >= 0).all() + assert (relative_position_index <= 2 * self.quant_grid_length - 1).all() + + if self.rel_query and self.rel_key: + relative_position_bias = pointops.dot_prod_with_idx_v3( + query.float(), + index_0_offsets.int(), + n_max, + key.float(), + index_1.int(), + self.relative_pos_query_table.float(), + self.relative_pos_key_table.float(), + relative_position_index.int(), + ) + elif self.rel_query: + relative_position_bias = pointops.dot_prod_with_idx( + query.float(), + index_0.int(), + self.relative_pos_query_table.float(), + relative_position_index.int(), + ) # [M, num_heads] + elif self.rel_key: + relative_position_bias = pointops.dot_prod_with_idx( + key.float(), + index_1.int(), + self.relative_pos_key_table.float(), + relative_position_index.int(), + ) # [M, num_heads] + else: + relative_position_bias = 0.0 + + attn_flat += relative_position_bias + softmax_attn_flat = scatter_softmax(src=attn_flat, index=index_0, dim=0) + + if self.rel_value: + x = pointops.attention_step2_with_rel_pos_value_v2( + softmax_attn_flat.float(), + value.float(), + index_0_offsets.int(), + n_max, + index_1.int(), + self.relative_pos_value_table.float(), + relative_position_index.int(), + ) + else: + x = pointops.attention_step2( + softmax_attn_flat.float(), value.float(), index_0.int(), index_1.int() + ) + + x = x.view(n, c) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class MLP(nn.Module): + def __init__(self, in_channels, hidden_channels=None, out_channels=None, drop=0.0): + super().__init__() + out_channels = out_channels or in_channels + hidden_channels = hidden_channels or in_channels + self.fc1 = nn.Linear(in_channels, hidden_channels) + self.act = nn.GELU() + self.fc2 = nn.Linear(hidden_channels, out_channels) + self.drop = nn.Dropout(drop, inplace=True) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Block(nn.Module): + def __init__( + self, + embed_channels, + num_heads, + window_size, + quant_size, + mlp_expend_ratio=4.0, + drop_path=0.0, + qk_scale=None, + rel_query=True, + rel_key=True, + rel_value=True, + qkv_bias=True, + ): + super().__init__() + self.norm1 = nn.LayerNorm(embed_channels) + self.attn = WindowAttention( + embed_channels, + num_heads, + window_size, + quant_size, + scale=qk_scale, + rel_query=rel_query, + rel_key=rel_key, + rel_value=rel_value, + qkv_bias=qkv_bias, + ) + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = nn.LayerNorm(embed_channels) + self.mlp = MLP( + in_channels=embed_channels, + hidden_channels=int(embed_channels * mlp_expend_ratio), + ) + + def forward(self, feats, coords, index_0, index_1, index_0_offsets, n_max): + short_cut = feats + feats = self.norm1(feats) + feats = self.attn(feats, coords, index_0, index_1, index_0_offsets, n_max) + + feats = short_cut + self.drop_path(feats) + feats += self.drop_path(self.mlp(self.norm2(feats))) + return feats + + +class BasicLayer(nn.Module): + def __init__( + self, + embed_channels, + out_channels, + depth, + num_heads, + window_size, + quant_size, + mlp_expend_ratio=4.0, + down_ratio=0.25, + down_num_sample=16, + drop_path=None, + qk_scale=None, + down=True, + rel_query=True, + rel_key=True, + rel_value=True, + qkv_bias=True, + ): + super().__init__() + self.depth = depth + self.window_size = window_size + self.quant_size = quant_size + self.down_ratio = down_ratio + + if isinstance(drop_path, list): + drop_path = drop_path + assert len(drop_path) == depth + elif isinstance(drop_path, float): + drop_path = [deepcopy(drop_path) for _ in range(depth)] + else: + drop_path = [0.0 for _ in range(depth)] + + self.blocks = nn.ModuleList() + for i in range(depth): + block = Block( + embed_channels, + num_heads, + window_size, + quant_size, + mlp_expend_ratio=mlp_expend_ratio, + drop_path=drop_path[i], + qk_scale=qk_scale, + rel_query=rel_query, + rel_key=rel_key, + rel_value=rel_value, + qkv_bias=qkv_bias, + ) + self.blocks.append(block) + + self.down = ( + TransitionDown(embed_channels, out_channels, down_ratio, down_num_sample) + if down + else None + ) + + def forward(self, feats, coords, offset): + # window_size -> [window_size, window_size, window_size] + window_size = torch.tensor( + [self.window_size] * 3, dtype=coords.dtype, device=coords.device + ) + new_window_size = 2 * torch.tensor( + [self.window_size] * 3, dtype=coords.dtype, device=coords.device + ) + batch = offset2batch(offset) + + # compute new offset + new_offset = [int(offset[0].item() * self.down_ratio) + 1] + count = int(offset[0].item() * self.down_ratio) + 1 + for i in range(1, offset.shape[0]): + count += ( + int((offset[i].item() - offset[i - 1].item()) * self.down_ratio) + 1 + ) + new_offset.append(count) + new_offset = torch.cuda.IntTensor(new_offset) + down_idx = pointops.furthestsampling(coords, offset.int(), new_offset.int()) + + # compute window mapping + coords_min = coords.min(0).values + v2p_map, p2v_map, counts = grid_sample(coords, batch, window_size, start=None) + shift_size = window_size * 1 / 2 + shift_v2p_map, shift_p2v_map, shift_counts = grid_sample( + coords + shift_size, batch, window_size, start=coords_min + ) + + new_v2p_map, new_p2v_map, new_counts = grid_sample( + coords, batch, new_window_size, start=None + ) + shift_size = new_window_size * 1 / 2 + shift_new_v2p_map, shift_new_p2v_map, shift_new_counts = grid_sample( + coords + shift_size, batch, new_window_size, start=coords_min + ) + + # stratified attention + for i, blk in enumerate(self.blocks): + p2v_map_blk = p2v_map if i % 2 == 0 else shift_p2v_map + counts_blk = counts if i % 2 == 0 else shift_counts + + new_p2v_map_blk = new_p2v_map if i % 2 == 0 else shift_new_p2v_map + new_counts_blk = new_counts if i % 2 == 0 else shift_new_counts + + n, k = p2v_map_blk.shape + mask = torch.arange(k).unsqueeze(0).cuda() < counts_blk.unsqueeze(-1) + mask_mat = mask.unsqueeze(-1) & mask.unsqueeze(-2) + index_0 = p2v_map_blk.unsqueeze(-1).expand(-1, -1, k)[mask_mat] + index_1 = p2v_map_blk.unsqueeze(1).expand(-1, k, -1)[mask_mat] + + down_mask = torch.zeros_like(batch).bool() + down_mask[down_idx.long()] = True + down_mask = down_mask[new_p2v_map_blk] # [n, k], down sample mask + n, k = new_p2v_map_blk.shape + mask = torch.arange(k).unsqueeze(0).cuda() < new_counts_blk.unsqueeze( + -1 + ) # [n, k] + down_mask = down_mask & mask # down sample and window mask + # [n, k, k] query: dense point in large windows; key: sparse point in large windows + mask_mat = mask.unsqueeze(-1) & down_mask.unsqueeze(-2) + + if i % 2 == 0: + # [n, k, 3] + # window_coord = (coords[new_p2v_map_blk] - coords_min) // window_size + window_coord = torch.div( + coords[new_p2v_map_blk] - coords_min, + window_size, + rounding_mode="trunc", + ) + else: + # [n, k, 3] + # window_coord = (coords[new_p2v_map_blk] - coords_min + 1/2 * window_size) // window_size + window_coord = torch.div( + coords[new_p2v_map_blk] - coords_min + 1 / 2 * window_size, + window_size, + rounding_mode="trunc", + ) + # [n, k, k], whether pair points are in same small windows + mask_mat_prev = ( + window_coord.unsqueeze(2) != window_coord.unsqueeze(1) + ).any(-1) + mask_mat = mask_mat & mask_mat_prev + + new_index_0 = new_p2v_map_blk.unsqueeze(-1).expand(-1, -1, k)[mask_mat] + new_index_1 = new_p2v_map_blk.unsqueeze(1).expand(-1, k, -1)[mask_mat] + + index_0 = torch.cat([index_0, new_index_0], 0) + index_1 = torch.cat([index_1, new_index_1], 0) + + # rearrange index for acceleration + index_0, indices = torch.sort(index_0) + index_1 = index_1[indices] + index_0_counts = index_0.bincount() + n_max = index_0_counts.max() + index_0_offsets = index_0_counts.cumsum(dim=-1) + index_0_offsets = torch.cat( + [torch.zeros(1, dtype=torch.long).cuda(), index_0_offsets], 0 + ) + + feats = blk(feats, coords, index_0, index_1, index_0_offsets, n_max) + + if self.down: + feats_down, coords_down, offset_down = self.down(feats, coords, offset) + else: + feats_down, coords_down, offset_down = None, None, None + + return feats, coords, offset, feats_down, coords_down, offset_down + + +class TransitionDown(nn.Module): + def __init__(self, in_channels, out_channels, ratio, k, norm_layer=nn.LayerNorm): + super().__init__() + self.ratio = ratio + self.k = k + self.norm = norm_layer(in_channels) if norm_layer else None + self.linear = nn.Linear(in_channels, out_channels, bias=False) + self.pool = nn.MaxPool1d(k) + + def forward(self, feats, coords, offset): + new_offset, count = [int(offset[0].item() * self.ratio) + 1], int( + offset[0].item() * self.ratio + ) + 1 + for i in range(1, offset.shape[0]): + count += ((offset[i].item() - offset[i - 1].item()) * self.ratio) + 1 + new_offset.append(count) + new_offset = torch.cuda.IntTensor(new_offset) + idx = pointops.furthestsampling(coords, offset, new_offset) # (m) + new_coords = coords[idx.long(), :] # (m, 3) + + feats = pointops.queryandgroup( + self.k, coords, new_coords, feats, None, offset, new_offset, use_xyz=False + ) # (m, nsample, 3+c) + m, k, c = feats.shape + feats = ( + self.linear(self.norm(feats.view(m * k, c)).view(m, k, c)) + .transpose(1, 2) + .contiguous() + ) + feats = self.pool(feats).squeeze(-1) # (m, c) + return feats, new_coords, new_offset + + +class TransitionUp(nn.Module): + def __init__(self, in_channels, out_channels): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + + self.linear1 = nn.Sequential( + nn.LayerNorm(out_channels), nn.Linear(out_channels, out_channels) + ) + + self.linear2 = nn.Sequential( + nn.LayerNorm(in_channels), nn.Linear(in_channels, out_channels) + ) + + def forward(self, feats, coords, offset, skip_feats, skip_coords, skip_offset): + feats = self.linear1(skip_feats) + pointops.interpolation( + coords, skip_coords, self.linear2(feats), offset, skip_offset + ) + return feats, skip_coords, skip_offset + + +class KPConvSimpleBlock(nn.Module): + def __init__( + self, + in_channels, + out_channels, + prev_grid_size, + sigma=1.0, + negative_slope=0.2, + bn_momentum=0.02, + ): + super().__init__() + self.kpconv = KPConvLayer( + in_channels, + out_channels, + point_influence=prev_grid_size * sigma, + add_one=False, + ) + self.bn = FastBatchNorm1d(out_channels, momentum=bn_momentum) + self.activation = nn.LeakyReLU(negative_slope=negative_slope) + + def forward(self, feats, xyz, batch, neighbor_idx): + # feats: [N, C] + # coords: [N, 3] + # batch: [N,] + # neighbor_idx: [N, M] + + feats = self.kpconv(xyz, xyz, neighbor_idx, feats) + feats = self.activation(self.bn(feats)) + return feats + + +class KPConvResBlock(nn.Module): + def __init__( + self, + in_channels, + out_channels, + prev_grid_size, + sigma=1.0, + negative_slope=0.2, + bn_momentum=0.02, + ): + super().__init__() + d_2 = out_channels // 4 + activation = nn.LeakyReLU(negative_slope=negative_slope) + self.unary_1 = torch.nn.Sequential( + nn.Linear(in_channels, d_2, bias=False), + FastBatchNorm1d(d_2, momentum=bn_momentum), + activation, + ) + self.unary_2 = torch.nn.Sequential( + nn.Linear(d_2, out_channels, bias=False), + FastBatchNorm1d(out_channels, momentum=bn_momentum), + activation, + ) + self.kpconv = KPConvLayer( + d_2, d_2, point_influence=prev_grid_size * sigma, add_one=False + ) + self.bn = FastBatchNorm1d(out_channels, momentum=bn_momentum) + self.activation = activation + + if in_channels != out_channels: + self.shortcut_op = torch.nn.Sequential( + nn.Linear(in_channels, out_channels, bias=False), + FastBatchNorm1d(out_channels, momentum=bn_momentum), + ) + else: + self.shortcut_op = nn.Identity() + + def forward(self, feats, xyz, batch, neighbor_idx): + # feats: [N, C] + # coords: [N, 3] + # batch: [N,] + # neighbor_idx: [N, M] + + shortcut = feats + feats = self.unary_1(feats) + feats = self.kpconv(xyz, xyz, neighbor_idx, feats) + feats = self.unary_2(feats) + shortcut = self.shortcut_op(shortcut) + feats += shortcut + return feats + + +@MODELS.register_module("ST-v1m2") +class StratifiedTransformer(nn.Module): + def __init__( + self, + in_channels, + num_classes, + channels=(48, 96, 192, 384, 384), + num_heads=(6, 12, 24, 24), + depths=(3, 9, 3, 3), + window_size=(0.2, 0.4, 0.8, 1.6), + quant_size=(0.01, 0.02, 0.04, 0.08), + mlp_expend_ratio=4.0, + down_ratio=0.25, + down_num_sample=16, + kp_ball_radius=2.5 * 0.02, + kp_max_neighbor=34, + kp_grid_size=0.02, + kp_sigma=1.0, + drop_path_rate=0.2, + rel_query=True, + rel_key=True, + rel_value=True, + qkv_bias=True, + stem=True, + ): + super().__init__() + assert ( + KPConvLayer is not None and FastBatchNorm1d is not None + ), "Please make sure torch_points3d is installed" + assert tp is not None, "Please make sure torch_points_kernels is installed" + assert pointops is not None, "Please make sure pointops2 is installed" + # stochastic depth decay rule + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))] + self.kp_ball_radius = kp_ball_radius + self.kp_max_neighbor = kp_max_neighbor + self.stem = stem + if stem: + self.point_embed = nn.ModuleList( + [ + KPConvSimpleBlock( + in_channels, channels[0], kp_grid_size, sigma=kp_sigma + ), + KPConvResBlock( + channels[0], channels[0], kp_grid_size, sigma=kp_sigma + ), + ] + ) + self.down = TransitionDown( + channels[0], channels[1], down_ratio, down_num_sample + ) + else: + assert channels[0] == channels[1] + self.point_embed = nn.ModuleList( + [ + KPConvSimpleBlock( + in_channels, channels[1], kp_grid_size, sigma=kp_sigma + ), + ] + ) + + num_layers = len(depths) + self.layers = nn.ModuleList() + for i in range(num_layers): + layer = BasicLayer( + embed_channels=channels[i + 1], + out_channels=channels[i + 2] if i < num_layers - 1 else channels[i + 1], + depth=depths[i], + num_heads=num_heads[i], + window_size=window_size[i], + quant_size=quant_size[i], + mlp_expend_ratio=mlp_expend_ratio, + down_ratio=down_ratio, + down_num_sample=down_num_sample, + drop_path=dpr[sum(depths[:i]) : sum(depths[: i + 1])], + rel_query=rel_query, + rel_key=rel_key, + rel_value=rel_value, + qkv_bias=qkv_bias, + down=True if i < num_layers - 1 else False, + ) + self.layers.append(layer) + + self.up = nn.ModuleList( + [ + TransitionUp(channels[i + 1], channels[i]) + for i in reversed(range(1, num_layers)) + ] + ) + if self.stem: + self.up.append(TransitionUp(channels[1], channels[0])) + + self.classifier = nn.Sequential( + nn.Linear(channels[0], channels[0]), + nn.BatchNorm1d(channels[0]), + nn.ReLU(inplace=True), + nn.Linear(channels[0], num_classes), + ) + + self.init_weights() + + def forward(self, data_dict): + feats = data_dict["feat"] + coords = data_dict["coord"] + offset = data_dict["offset"].int() + batch = offset2batch(offset) + neighbor_idx = tp.ball_query( + self.kp_ball_radius, + self.kp_max_neighbor, + coords, + coords, + mode="partial_dense", + batch_x=batch, + batch_y=batch, + )[0] + + feats_stack = [] + coords_stack = [] + offset_stack = [] + + for i, layer in enumerate(self.point_embed): + feats = layer(feats, coords, batch, neighbor_idx) + + feats = feats.contiguous() + if self.stem: + feats_stack.append(feats) + coords_stack.append(coords) + offset_stack.append(offset) + feats, coords, offset = self.down(feats, coords, offset) + + for i, layer in enumerate(self.layers): + feats, coords, offset, feats_down, coords_down, offset_down = layer( + feats, coords, offset + ) + + feats_stack.append(feats) + coords_stack.append(coords) + offset_stack.append(offset) + + feats = feats_down + coords = coords_down + offset = offset_down + + feats = feats_stack.pop() + coords = coords_stack.pop() + offset = offset_stack.pop() + + for i, up in enumerate(self.up): + feats, coords, offset = up( + feats, + coords, + offset, + feats_stack.pop(), + coords_stack.pop(), + offset_stack.pop(), + ) + + out = self.classifier(feats) + return out + + def init_weights(self): + """Initialize the weights in backbone.""" + + def _init_weights(m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm) or isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + self.apply(_init_weights) diff --git a/Pointcept/pointcept/models/swin3d/__init__.py b/Pointcept/pointcept/models/swin3d/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..36050969d9abb027778008e4d6d8f77710f52392 --- /dev/null +++ b/Pointcept/pointcept/models/swin3d/__init__.py @@ -0,0 +1 @@ +from .swin3d_v1m1_base import Swin3DUNet diff --git a/Pointcept/pointcept/models/swin3d/mink_layers.py b/Pointcept/pointcept/models/swin3d/mink_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..ee3e8cfc002e8311ac196335592c337644659612 --- /dev/null +++ b/Pointcept/pointcept/models/swin3d/mink_layers.py @@ -0,0 +1,249 @@ +""" +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +""" + +import torch +import torch.nn as nn +import torch.nn.functional as F +import MinkowskiEngine as ME +import numpy as np + + +def assign_feats(sp, x): + return ME.SparseTensor( + features=x.float(), + coordinate_map_key=sp.coordinate_map_key, + coordinate_manager=sp.coordinate_manager, + ) + + +class MinkConvBN(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size=3, + stride=1, + dilation=1, + bias=False, + dimension=3, + ): + super().__init__() + self.conv_layers = nn.Sequential( + ME.MinkowskiConvolution( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + bias=bias, + dimension=dimension, + ), + ME.MinkowskiBatchNorm(out_channels), + ) + + def forward(self, x): + x = self.conv_layers(x) + return x + + +class MinkConvBNRelu(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size=3, + stride=1, + dilation=1, + bias=False, + dimension=3, + ): + super().__init__() + self.conv_layers = nn.Sequential( + ME.MinkowskiConvolution( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + bias=bias, + dimension=dimension, + ), + ME.MinkowskiBatchNorm(out_channels), + ME.MinkowskiReLU(inplace=True), + ) + + def forward(self, x): + x = self.conv_layers(x) + if x.F.dtype == torch.float16: + x = assign_feats(x, x.F.float()) + return x + + +class MinkDeConvBNRelu(nn.Module): + def __init__( + self, + in_channels, + out_channels, + kernel_size, + stride, + dilation=1, + bias=False, + dimension=3, + ): + super().__init__() + self.conv_layers = nn.Sequential( + ME.MinkowskiConvolutionTranspose( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + bias=bias, + dimension=dimension, + ), + ME.MinkowskiBatchNorm(out_channels), + ME.MinkowskiReLU(), + ) + + def forward(self, x): + x = self.conv_layers(x) + return x + + +class MinkResBlock(nn.Module): + def __init__(self, in_channels, out_channels, stride=1, dilation=1): + super(MinkResBlock, self).__init__() + + self.conv1 = ME.MinkowskiConvolution( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + dilation=dilation, + bias=False, + dimension=3, + ) + self.norm1 = ME.MinkowskiBatchNorm(out_channels) + self.conv2 = ME.MinkowskiConvolution( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=1, + dilation=dilation, + bias=False, + dimension=3, + ) + + self.norm2 = ME.MinkowskiBatchNorm(out_channels) + self.relu = ME.MinkowskiReLU(inplace=True) + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.norm2(out) + + out += residual + out = self.relu(out) + + return out + + +class SparseTensorLinear(nn.Module): + def __init__(self, in_channels, out_channels, bias=False): + super().__init__() + self.linear = nn.Linear(in_channels, out_channels, bias=bias) + + def forward(self, sp): + x = self.linear(sp.F) + return assign_feats(sp, x.float()) + + +class SparseTensorLayerNorm(nn.Module): + def __init__(self, dim): + super().__init__() + self.norm = nn.LayerNorm(dim) + + def forward(self, sp): + x = self.norm(sp.F) + return assign_feats(sp, x.float()) + + +class MinkResBlock_v2(nn.Module): + def __init__(self, in_channels, out_channels): + super().__init__() + d_2 = out_channels // 4 + self.conv1 = torch.nn.Sequential( + SparseTensorLinear(in_channels, d_2, bias=False), + ME.MinkowskiBatchNorm(d_2), + ME.MinkowskiReLU(), + ) + self.unary_2 = torch.nn.Sequential( + SparseTensorLinear(d_2, out_channels, bias=False), + ME.MinkowskiBatchNorm(out_channels), + ME.MinkowskiReLU(), + ) + self.spconv = ME.MinkowskiConvolution( + in_channels=d_2, + out_channels=d_2, + kernel_size=5, + stride=1, + dilation=1, + bias=False, + dimension=3, + ) + if in_channels != out_channels: + self.shortcut_op = torch.nn.Sequential( + SparseTensorLinear(in_channels, out_channels, bias=False), + ME.MinkowskiBatchNorm(out_channels), + ) + else: + self.shortcut_op = nn.Identity() + + def forward(self, x): + # feats: [N, C] + # xyz: [N, 3] + # batch: [N,] + # neighbor_idx: [N, M] + shortcut = x + x = self.unary_1(x) + x = self.spconv(x) + x = self.unary_2(x) + shortcut = self.shortcut_op(shortcut) + x += shortcut + return x + + +class MinkResBlock_BottleNeck(nn.Module): + def __init__(self, in_channels, out_channels): + super(MinkResBlock_BottleNeck, self).__init__() + bottle_neck = out_channels // 4 + self.conv1x1a = MinkConvBNRelu( + in_channels, bottle_neck, kernel_size=1, stride=1 + ) + self.conv3x3 = MinkConvBNRelu(bottle_neck, bottle_neck, kernel_size=3, stride=1) + self.conv1x1b = MinkConvBN(bottle_neck, out_channels, kernel_size=1, stride=1) + if in_channels != out_channels: + self.conv1x1c = MinkConvBN( + in_channels, out_channels, kernel_size=1, stride=1 + ) + else: + self.conv1x1c = None + self.relu = ME.MinkowskiReLU(inplace=True) + + def forward(self, x): + residual = x + out = self.conv1x1a(x) + out = self.conv3x3(out) + out = self.conv1x1b(out) + if self.conv1x1c is not None: + residual = self.conv1x1c(residual) + out = self.relu(out + residual) + + return out diff --git a/Pointcept/pointcept/models/swin3d/swin3d_layers.py b/Pointcept/pointcept/models/swin3d/swin3d_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..e737e9677ae93f8f5f9188ba774fcd1d0fa42443 --- /dev/null +++ b/Pointcept/pointcept/models/swin3d/swin3d_layers.py @@ -0,0 +1,876 @@ +""" +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +""" + +import numpy as np +import torch +import torch.nn as nn +from timm.models.layers import DropPath, trunc_normal_ +import MinkowskiEngine as ME +from MinkowskiEngine import SparseTensor +from Swin3D.sparse_dl.attn.attn_coff import ( + SelfAttnAIOFunction, + PosEmb, + TableDims, + IndexMode, + PrecisionMode, +) +import Swin3D.sparse_dl.knn +from Swin3D.sparse_dl.knn import KNN + +from .mink_layers import ( + assign_feats, + SparseTensorLayerNorm, + SparseTensorLinear, +) + + +def query_knn_feature( + K, src_xyz, query_xyz, src_feat, src_offset, query_offset, return_idx=False +): + """ + gather feature in the KNN neighborhood + """ + assert ( + src_xyz.is_contiguous() + and query_xyz.is_contiguous() + and src_feat.is_contiguous() + ) + if query_xyz is None: + query_xyz = src_xyz + query_offset = src_offset + + idx, _ = KNN.apply(K, src_xyz, query_xyz, src_offset, query_offset) + + n, m, c = src_xyz.shape[0], query_xyz.shape[0], src_feat.shape[1] + grouped_feat = src_feat[idx.view(-1).long(), :].view(m, K, c) + + if return_idx: + return grouped_feat, idx + else: + return grouped_feat + + +def knn_linear_interpolation( + src_xyz, query_xyz, src_feat, src_offset, query_offset, K=3 +): + """ + interpolation feature using distance in KNN neighborhood + """ + N, C = query_xyz.shape[0], src_feat.shape[1] + assert ( + src_xyz.is_contiguous() + and query_xyz.is_contiguous() + and src_feat.is_contiguous() + ) + # (N, K) + idx, dist = KNN.apply(K, src_xyz, query_xyz, src_offset, query_offset) + weight = 1.0 / (dist + 1e-8) + norm = torch.sum(weight, dim=1, keepdim=True) + weight = weight / norm + query_feat = torch.zeros((N, C), dtype=src_feat.dtype, device=src_feat.device) + for i in range(K): + query_feat += src_feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1) + return query_feat + + +def sparse_self_attention( + w_w_id: torch.Tensor, w_sizes: torch.Tensor, protocol: str = "v1" +): + """ + Args: + indices [torch.Tensor]: sparse window index with shape [N, 2], N is the total + number of non-empty voxels with indices (window_id, within_window_id). window_id + is ordered and starts from 0; within_window_id is a sparse index to indicate the + offset of kernel_size ** 3. + feats [torch.Tensor]: sprase features of each non-empty voxel with shape [N, C] + Outputs: + [M, 3]: sparse indices of cofficient matrix (window_id, att_a_id, att_b_id). att_a_id + and att_b_id are the within_window_id + [M, 1]: the sparse coffient matrix + + Spaces: + W: total number of windows + N: total number of input voxels + M: total number of output cofficients + """ + w_sizes_2 = w_sizes**2 + + # w2n_indices - [W], mapping window index to window global offset in input + # space + w_cumsum = torch.cumsum(w_sizes, dim=-1) + w2n_indices = torch.cat( + [torch.zeros(1, dtype=w_cumsum.dtype, device=w_cumsum.device), w_cumsum[:-1]] + ) + + # w2m indices - [W], mapping window index to window global offset in output + # space + w2_cumsum = torch.cumsum(w_sizes_2, dim=-1) + w2m_indices = torch.cat( + [torch.zeros(1, dtype=w2_cumsum.dtype, device=w2_cumsum.device), w2_cumsum[:-1]] + ) + + # m2w indices - [M], mapping element global offset to the window index + m2w_indices = torch.zeros( + [w2_cumsum[-1]], dtype=w_sizes.dtype, device=w_sizes.device + ) + m2w_offset = torch.zeros( + [w2_cumsum[-1]], dtype=w_sizes.dtype, device=w_sizes.device + ) + m2w_indices[w2m_indices[1:]] = 1 + m2w_offset[w2m_indices[1:]] = w_sizes_2[:-1] + m2w_indices = torch.cumsum(m2w_indices, dim=-1) + m2w_offset = torch.cumsum(m2w_offset, dim=-1) + + # m_indices = [M], element global offset in output space + m_indices = torch.arange( + 0, w2_cumsum[-1], dtype=w_sizes.dtype, device=w_sizes.device + ) + + # m2n_indices - [M], mapping element global offset to the window global offset + # in input space + m2n_indices = w2n_indices[m2w_indices] + + m_offset = m_indices - m2w_offset + m2w_sizes = w_sizes[m2w_indices] + + # print_log_main("m_offset:", m_offset, m_offset.shape) + # print_log_main("m2n_indices:", m2n_indices, m2n_indices.shape) + + y_offset = m2n_indices + m_offset % m2w_sizes + x_offset = m2n_indices + torch.div(m_offset, m2w_sizes, rounding_mode="floor") + + # print_log_main("=================================") + # print_log_main(w_sizes[:5]) + # print_log_main(x_offset[:50]) + # print_log_main(y_offset[:50]) + # coord = torch.stack([m2w_indices, w_w_id[x_offset], w_w_id[y_offset]], axis=-1) + if protocol == "v1": + return x_offset, y_offset + elif protocol == "v2": + return x_offset, y_offset, m2w_indices, w_sizes, w2n_indices, w2m_indices + + +class Mlp(nn.Module): + def __init__( + self, + in_features, + hidden_features=None, + out_features=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_features = out_features or in_features + hidden_features = hidden_features or in_features + self.fc1 = nn.Linear(in_features, hidden_features) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_features, out_features) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class GridCoordsDown(nn.Module): + """ + downsample the grid coordinates + keep the nearest point to the average point of the downsampled grid + """ + + def __init__(self, stride): + super().__init__() + self.stride = stride + self.avg_pool = ME.MinkowskiAvgPooling( + kernel_size=self.stride, stride=self.stride, dimension=3 + ) + self.unpool = ME.MinkowskiPoolingTranspose( + kernel_size=stride, stride=stride, dimension=3 + ) + self.max_pool = ME.MinkowskiMaxPooling( + kernel_size=self.stride, stride=self.stride, dimension=3 + ) + + def forward(self, coords_sp, sp, return_map=False): + device = sp.C.device + # is_pool = True means pooling map + # is_pool = False means conv map (query as center) + + N = sp.shape[0] + avg_coords_sp = self.avg_pool(coords_sp) + dist_sp = self.unpool(avg_coords_sp) - coords_sp + dist = dist_sp.F + dist = -torch.sqrt((dist**2).sum(dim=1)).unsqueeze(1) + dist_sp = assign_feats(dist_sp, dist) + min_dist_sp = self.max_pool(dist_sp) + map_pair = sp.coordinate_manager.kernel_map( + dist_sp.coordinate_map_key, + min_dist_sp.coordinate_map_key, + stride=self.stride, + kernel_size=self.stride, + is_pool=True, + )[0] + in_map, out_map = map_pair + broad_min_dist_sp = self.unpool(min_dist_sp) + mask = (broad_min_dist_sp.F == dist_sp.F).squeeze(1) + in_map = in_map[mask].long() + out_map = out_map[mask].long() + downsample_map = torch.zeros(N, dtype=torch.long, device=device) - 1 + downsample_map[out_map] = in_map + assert (downsample_map >= 0).all() + assert (dist_sp.F[downsample_map] == min_dist_sp.F).all() + new_coords = coords_sp.F[downsample_map] + new_coords_sp = assign_feats(sp, new_coords) + if return_map: + return new_coords_sp, downsample_map + else: + return new_coords_sp + + +def get_offset(batch): + offset = [] + bs = batch.max() + 1 + for i in range(bs): + offset.append(torch.sum(batch == i)) + offset = torch.cuda.IntTensor(offset) + offset = offset.cumsum(dim=0).int() + return offset + + +class GridDownsample(nn.Module): + """ + use stride to downsample voxel + use grid maxpooling with kernel_size + """ + + def __init__(self, in_channels, out_channels, kernel_size=2, stride=2): + super().__init__() + self.kernel_size = kernel_size + self.stride = stride + self.in_channels = in_channels + self.out_channels = out_channels + self.sp_pool = ME.MinkowskiMaxPooling( + kernel_size=kernel_size, stride=stride, dimension=3 + ) + self.coords_pool = GridCoordsDown(stride=stride) + self.norm = SparseTensorLayerNorm(in_channels) + self.linear = SparseTensorLinear(in_channels, out_channels) + + def forward(self, sp, coords_sp): + sp_down = self.sp_pool(self.linear(self.norm(sp))) + coords_sp_down = self.coords_pool(coords_sp, sp_down) + return sp_down, coords_sp_down + + def extra_repr(self) -> str: + return f"kernel_size={self.kernel_size}, stride={self.stride}, in_channels={self.in_channels}, out_channels={self.out_channels}" + + +class GridKNNDownsample(nn.Module): + """ + use stride to downsample voxel + use KNN to do maxpooling + """ + + def __init__(self, in_channels, out_channels, kernel_size=2, stride=2): + super().__init__() + self.stride = stride + self.in_channels = in_channels + self.out_channels = out_channels + self.k = 16 + self.sp_pool = ME.MinkowskiMaxPooling( + kernel_size=stride, stride=stride, dimension=3 + ) + self.coords_pool = GridCoordsDown(stride=stride) + self.norm = nn.LayerNorm(in_channels) + self.linear = nn.Linear(in_channels, out_channels, bias=False) + self.pool = nn.MaxPool1d(self.k) + + def forward(self, sp, coords_sp): + # calculate the voxel + sp_down = self.sp_pool(sp) + # for downsampled cRSE + coords_sp_down = self.coords_pool(coords_sp, sp_down) + offset = get_offset(sp.C[:, 0]) + n_offset = get_offset(sp_down.C[:, 0]) + + xyz = coords_sp.F[:, 1:4].detach().contiguous() + n_xyz = coords_sp_down.F[:, 1:4].detach().contiguous() + feats = query_knn_feature(self.k, xyz, n_xyz, sp.F, offset, n_offset) + m, k, c = feats.shape + feats = ( + self.linear(self.norm(feats.view(m * k, c)).view(m, k, c)) + .transpose(1, 2) + .contiguous() + ) + feats = self.pool(feats).squeeze(-1) + sp = assign_feats(sp_down, feats.float()) + coords_sp = coords_sp_down + return sp, coords_sp + + def extra_repr(self) -> str: + return f"kernel_size={self.k}, stride={self.stride}, in_channels={self.in_channels}, out_channels={self.out_channels}" + + +class Upsample(nn.Module): + """ + upsample using trilinear interpolation + follower by attn block according to self.attn + """ + + def __init__( + self, + in_channels, + out_channels, + num_heads, + window_size, + quant_size, + attn=True, + up_k=3, + cRSE="XYZ_RGB", + fp16_mode=0, + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + + self.linear1 = nn.Sequential( + nn.LayerNorm(out_channels), nn.Linear(out_channels, out_channels) + ) + self.linear2 = nn.Sequential( + nn.LayerNorm(in_channels), nn.Linear(in_channels, out_channels) + ) + self.up_k = up_k + self.attn = attn and window_size > 0 + if self.attn: + self.block = BasicLayer( + dim=out_channels, + depth=1, + num_heads=num_heads, + window_size=window_size, + quant_size=quant_size, + drop_path=0.1, + downsample=None, + out_channels=None, + cRSE=cRSE, + fp16_mode=fp16_mode, + ) + + def forward(self, sp, coords_sp, sp_up, coords_sp_up): + feats = sp.F + support_feats = sp_up.F + xyz = coords_sp.F[:, 1:4].detach().contiguous() + support_xyz = coords_sp_up.F[:, 1:4].detach().contiguous() + offset = get_offset(sp.C[:, 0]) + support_offset = get_offset(sp_up.C[:, 0]) + + feats = self.linear1(support_feats) + knn_linear_interpolation( + xyz, support_xyz, self.linear2(feats), offset, support_offset, K=self.up_k + ) + sp_up = assign_feats(sp_up, feats) + if self.attn: + sp_up, _, _ = self.block(sp_up, coords_sp_up) + return sp_up + + def extra_repr(self) -> str: + return f"up_k={self.up_k}, in_channels={self.in_channels}, out_channels={self.out_channels}, attn={self.attn}" + + +class WindowAttention(nn.Module): + """ + Window based multi-head self attention (W-MSA) module with cRSE. + Designed for sparse structure + It supports both of shifted and non-shifted window. + + Args: + dim (int): Number of input channels. + window_size (tuple[int]): The height and width of the window. + quant_size (int): quant_size for for finer cRSE table + num_heads (int): Number of attention heads. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set + attn_drop (float, optional): Dropout ratio of attention weight. Default: 0.0 + proj_drop (float, optional): Dropout ratio of output. Default: 0.0 + cRSE (str | 'XYZ', 'XYZ_RGB', 'XYZ_RGB_NORM'): cRSE mode. Default: 'XYZ_RGB' + fp16_mode (int | 0, 1, 2): fp16 mode for attention module, Default: 0 + 0: fp32 forward and fp32 backward + 1: fp16 forward and fp32 backward + 2: fp16 forward and fp16 backward + """ + + def __init__( + self, + dim, + window_size, + quant_size, + num_heads, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + cRSE="XYZ_RGB", + fp16_mode=0, + ): + super().__init__() + self.dim = dim + self.window_size = window_size + self.num_heads = num_heads + head_dim = dim // num_heads + self.scale = qk_scale or head_dim**-0.5 + + # color in [-1, 1], color_windowsize = 2 + # normal in [-1, 1], normal_windowsize = 2 + self.color_windowsize = 2 + self.normal_windowsize = 2 + + self.fp16_mode = fp16_mode + + table_offsets = [] + self.cRSE = cRSE + if "XYZ" in cRSE: + self.xyz_quant_size = quant_size + quant_grid_length_xyz = window_size * self.xyz_quant_size + table_shape_xyz = (3, 2 * quant_grid_length_xyz, num_heads, head_dim) + self.query_xyz_table = nn.Parameter(torch.zeros(table_shape_xyz)) + trunc_normal_(self.query_xyz_table, std=0.02) + self.key_xyz_table = nn.Parameter(torch.zeros(table_shape_xyz)) + trunc_normal_(self.key_xyz_table, std=0.02) + self.value_xyz_table = nn.Parameter(torch.zeros(table_shape_xyz)) + trunc_normal_(self.value_xyz_table, std=0.02) + table_offsets += [np.prod(table_shape_xyz[1:])] * 3 + + if "RGB" in cRSE: + self.color_quant_size = quant_size * 2 + quant_grid_length_rgb = self.color_windowsize * self.color_quant_size + table_shape_rgb = (3, 2 * quant_grid_length_rgb, num_heads, head_dim) + self.query_rgb_table = nn.Parameter(torch.zeros(table_shape_rgb)) + trunc_normal_(self.query_rgb_table, std=0.02) + self.key_rgb_table = nn.Parameter(torch.zeros(table_shape_rgb)) + trunc_normal_(self.key_rgb_table, std=0.02) + self.value_rgb_table = nn.Parameter(torch.zeros(table_shape_rgb)) + trunc_normal_(self.value_rgb_table, std=0.02) + table_offsets += [np.prod(table_shape_rgb[1:])] * 3 + + if "NORM" in cRSE: + self.normal_quant_size = quant_size * 2 + quant_grid_length_norm = self.normal_windowsize * self.normal_quant_size + table_shape_norm = (3, 2 * quant_grid_length_norm, num_heads, head_dim) + self.query_norm_table = nn.Parameter(torch.zeros(table_shape_norm)) + trunc_normal_(self.query_norm_table, std=0.02) + self.key_norm_table = nn.Parameter(torch.zeros(table_shape_norm)) + trunc_normal_(self.key_norm_table, std=0.02) + self.value_norm_table = nn.Parameter(torch.zeros(table_shape_norm)) + trunc_normal_(self.value_norm_table, std=0.02) + table_offsets += [np.prod(table_shape_norm[1:])] * 3 + + self.table_offsets = table_offsets + + self.quant_size = quant_size + + self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop, inplace=True) + self.proj = nn.Linear(dim, dim) + self.proj_drop = nn.Dropout(proj_drop, inplace=True) + + self.softmax = nn.Softmax(dim=-1) + + def forward(self, feats: torch.Tensor, attn_args): + """Forward function. + + Args: + feats: N, C + attn_args: arguments for computing attention + """ + num_v, _ = feats.shape + num_sc = self.dim // self.num_heads + + ( + x_offset, + y_offset, + m2w_indices, + w_sizes, + w2n_indices, + n2n_indices, + w2m_indices, + n_coords, + ) = attn_args + + # Query, Key, Value + qkv = self.qkv(feats) + qkv = ( + qkv.reshape(num_v, 3, self.num_heads, num_sc) + .permute(1, 0, 2, 3) + .contiguous() + ) + query, key, value = qkv[0], qkv[1], qkv[2] # [N, num_heads, C//num_heads] + query = query * self.scale + + table_offsets = torch.IntTensor(self.table_offsets).cuda() + query_table, key_table, value_table = [], [], [] + n_cRSE = [] + if "XYZ" in self.cRSE: + n_xyz = n_coords[:, 0:3] + n_xyz = n_xyz * self.quant_size + n_cRSE.append(n_xyz) + query_table.append(self.query_xyz_table.view(-1)) + key_table.append(self.key_xyz_table.view(-1)) + value_table.append(self.value_xyz_table.view(-1)) + if "RGB" in self.cRSE: + n_rgb = n_coords[:, 3:6] + n_rgb = n_rgb * self.color_quant_size + n_cRSE.append(n_rgb) + query_table.append(self.query_rgb_table.view(-1)) + key_table.append(self.key_rgb_table.view(-1)) + value_table.append(self.value_rgb_table.view(-1)) + if "NORM" in self.cRSE: + n_norm = n_coords[:, 6:9] + n_norm = n_norm * self.normal_quant_size + n_cRSE.append(n_norm) + query_table.append(self.query_norm_table.view(-1)) + key_table.append(self.key_norm_table.view(-1)) + value_table.append(self.value_norm_table.view(-1)) + + n_cRSE = torch.cat(n_cRSE, dim=1) + + indices = [m2w_indices, w_sizes, w2m_indices, w2n_indices, n2n_indices, n_cRSE] + query_table = torch.cat(query_table) + key_table = torch.cat(key_table) + value_table = torch.cat(value_table) + + if self.fp16_mode == 0: + # do not use fp16 + # cast q,k,v to fp32 in forward and backward + fp16_mode = PrecisionMode.HALF_NONE + elif self.fp16_mode == 1: + # use fp16 only in forward + fp16_mode = PrecisionMode.HALF_FORWARD + elif self.fp16_mode == 2: + # use fp16 both in forward and backward + fp16_mode = PrecisionMode.HALF_ALL + + updated_values = SelfAttnAIOFunction.apply( + query, + key, + value, + query_table, + key_table, + value_table, + table_offsets, + indices, + PosEmb.SEPARATE, + TableDims.D0, + IndexMode.INDIRECT, + fp16_mode, + ) + + updated_values = updated_values.flatten(1) + updated_feats = updated_values.view(num_v, self.dim) + + updated_feats = self.proj(updated_feats) + updated_feats = self.proj_drop(updated_feats) # [N, C] + + return updated_feats + + +class SwinTransformerBlock(nn.Module): + def __init__( + self, + dim, + num_heads, + window_size, + quant_size, + drop_path=0.0, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + act_layer=nn.GELU, + norm_layer=nn.LayerNorm, + cRSE="XYZ_RGB", + fp16_mode=0, + ): + super().__init__() + self.window_size = window_size + + self.norm1 = norm_layer(dim) + self.attn = WindowAttention( + dim, + window_size=self.window_size, + quant_size=quant_size, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + cRSE=cRSE, + fp16_mode=fp16_mode, + ) + + self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + self.norm2 = norm_layer(dim) + mlp_hidden_dim = int(dim * mlp_ratio) + self.mlp = Mlp( + in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer + ) + + def forward(self, feats, attn_args): + # feats: [N, c] + short_cut = feats + feats = self.norm1(feats) + feats = self.attn(feats, attn_args) # [N, c] + + feats = short_cut + self.drop_path(feats) + feats = feats + self.drop_path(self.mlp(self.norm2(feats))) + + return feats + + +class BasicLayer(nn.Module): + """A basic Swin3D layer for one stage. + + Args: + dim (int): Number of input channels. + depth (int): Number of blocks. + num_heads (int): Number of attention heads. + window_size (int): Local window size. + quant_size (int): quant_size for for finer cRSE table + mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. + qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True + qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set. + drop_path (float | tuple[float], optional): Stochastic depth rate. Default: 0.0 + norm_layer (nn.Module, optional): Normalization layer. Default: nn.LayerNorm + downsample (nn.Module | None, optional): Downsample layer at the end of the layer. Default: None + cRSE (str | 'XYZ', 'XYZ_RGB', 'XYZ_RGB_NORM'): cRSE mode. Default: 'XYZ_RGB' + fp16_mode (int | 0, 1, 2): fp16 mode for attention module, Default: 0 + 0: fp32 forward and fp32 backward + 1: fp16 forward and fp32 backward + 2: fp16 forward and fp16 backward + """ + + def __init__( + self, + dim, + depth, + num_heads, + window_size, + quant_size, + out_channels=None, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + drop_path=0.0, + norm_layer=nn.LayerNorm, + downsample=None, + down_stride=2, + cRSE="XYZ_RGB", + fp16_mode=0, + ): + super().__init__() + self.window_size = window_size + self.depth = depth + self.dim = dim + self.num_heads = num_heads + self.quant_size = quant_size + self.cRSE = cRSE + self.fp16_mode = fp16_mode + + self.shift_size = window_size // 2 + # build blocks + self.blocks = nn.ModuleList( + [ + SwinTransformerBlock( + dim, + num_heads, + window_size, + quant_size, + drop_path=( + drop_path[i] if isinstance(drop_path, list) else drop_path + ), + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + norm_layer=norm_layer, + cRSE=cRSE, + fp16_mode=fp16_mode, + ) + for i in range(depth) + ] + ) + + self.pool = ME.MinkowskiMaxPooling( + kernel_size=self.window_size, stride=self.window_size, dimension=3 + ) + + if downsample is not None: + if out_channels is None: + out_channels = dim * 2 + self.downsample = downsample( + dim, out_channels, kernel_size=down_stride, stride=down_stride + ) + else: + self.downsample = None + + def get_map_pair(self, sp): + """ + use minkowski pool to calculate windows + get the mapping from voxel to window + """ + window_size = [self.window_size] * 3 + pool_sp = self.pool(sp) + windows = pool_sp.C + window_N = windows.shape[0] + + stride_in = sp.coordinate_map_key.get_tensor_stride() + x, y, z = [ + torch.arange(window_size[i], device=self.device) * stride_in[i] + for i in range(3) + ] + x, y, z = torch.meshgrid(x, y, z) + i = torch.zeros_like(x, device=self.device) + local_window = torch.stack([i, x, y, z], dim=-1).flatten(0, -2) + all_windows = windows.unsqueeze(1) + local_window.unsqueeze(0) + all_windows = all_windows.flatten(0, -2).int() + cm = sp.coordinate_manager + query_key, (map, inverse_map) = cm.insert_and_map( + all_windows, tensor_stride=stride_in + ) + map_pair = cm.kernel_map(query_key, sp.coordinate_map_key, kernel_size=1)[0] + return map_pair, window_N + + def get_window_mapping(self, sp): + """ + calculate the relationshape in the window: + w_w_id: non-empty idx inside the window(sorted by window) + w_w_xyz: xyz inside the window(sorted by window) + nempty_num: non-empty voxel number in each window + sort_idx: sort voxel according to window_id, to gather the point inside the same window + inv_sort_idx: inverse sort index + """ + map_pair, window_N = self.get_map_pair(sp) + window_size = self.window_size + nW = window_size**3 + in_map, out_map = map_pair + in_map, sort_idx = torch.sort(in_map) + # assert out_map == arange(out_map.shape[0]) + out_map = out_map[sort_idx] + sort_idx = out_map.long() + inv_sort_idx = torch.zeros_like(sort_idx) + inv_sort_idx[sort_idx] = torch.arange( + sort_idx.shape[0], dtype=sort_idx.dtype, device=self.device + ) + N = window_N * nW + v2w_mask = torch.zeros(N, dtype=torch.bool, device=self.device) + w_id = ( + torch.arange(window_N, dtype=torch.long, device=self.device) + .unsqueeze(1) + .repeat(1, nW) + .view(-1) + ) + w_w_id = ( + torch.arange(nW, dtype=torch.long, device=self.device) + .unsqueeze(0) + .repeat(window_N, 1) + .view(-1) + ) + v2w_mask[in_map.long()] = True + nempty_num = v2w_mask.view(-1, nW).sum(dim=-1) + w_id = w_id[in_map.long()] + w_w_id = w_w_id[in_map.long()] + w_w_xyz = torch.stack( + [ + w_w_id // window_size // window_size, + w_w_id // window_size % window_size, + w_w_id % window_size, + ], + dim=-1, + ) + return w_w_id, w_w_xyz, nempty_num, sort_idx, inv_sort_idx + + def get_index01(self, sp, local_xyz, colors): + """ + calculate the arguments for sparse attention + """ + ( + w_w_id, + w_w_xyz, + nempty_num, + n2n_indices, + inv_sort_idx, + ) = self.get_window_mapping(sp) + local_xyz = local_xyz[n2n_indices] + colors = colors[n2n_indices] + # recover the relative pos in the voxel + n_coords = w_w_xyz + local_xyz + n_coords = torch.cat([n_coords, colors], dim=1) + ( + x_offset, + y_offset, + m2w_indices, + w_sizes, + w2n_indices, + w2m_indices, + ) = sparse_self_attention(w_w_id, nempty_num, protocol="v2") + return ( + x_offset, + y_offset, + m2w_indices, + w_sizes, + w2n_indices, + n2n_indices, + w2m_indices, + n_coords, + ) + + def get_shifted_sp(self, sp): + """ + get the shifted sparse tensor for shift-window + """ + stride_in = sp.coordinate_map_key.get_tensor_stride() + shift_size = self.shift_size * stride_in[0] + shifted_C = sp.C.clone() + shifted_C[:, 1:] += shift_size + shifted_sp = SparseTensor( + features=sp.F, + coordinates=shifted_C, + device=self.device, + tensor_stride=stride_in, + ) + return shifted_sp + + def get_window_pos(self, sp): + stride_in = sp.coordinate_map_key.get_tensor_stride() + return (sp.C[:, 1:] / stride_in[0]) % self.window_size + + def forward(self, sp, coords_sp): + """ + xyz: position of point inside voxel + colors: other signal for cRSE, include colors and normals + local_xyz: relative position of point indide voxel(using for finer cRSE table) + """ + colors = coords_sp.F[:, 4:] + xyz = coords_sp.F[:, :4] + local_xyz = (xyz - coords_sp.C)[ + :, 1: + ] / coords_sp.coordinate_map_key.get_tensor_stride()[0] + self.device = sp.device + sp_shift = self.get_shifted_sp(sp) + + attn_args = self.get_index01(sp, local_xyz, colors) + attn_args_shift = self.get_index01(sp_shift, local_xyz, colors) + + feats = sp.F + for i, blk in enumerate(self.blocks): + attn_args_blk = attn_args if i % 2 == 0 else attn_args_shift + feats = blk(feats, attn_args_blk) # [N, C] + + sp = assign_feats(sp, feats) + if self.downsample is not None: + sp_down, coords_sp = self.downsample(sp, coords_sp) + return sp, sp_down, coords_sp + else: + return sp, sp, coords_sp + + def extra_repr(self) -> str: + return f"window_size={self.window_size}, depth={self.depth}, channel={self.dim}, num_heads={self.num_heads}, quant_size={self.quant_size}, cRSE={self.cRSE}, fp16_mode={self.fp16_mode}" diff --git a/Pointcept/pointcept/models/swin3d/swin3d_v1m1_base.py b/Pointcept/pointcept/models/swin3d/swin3d_v1m1_base.py new file mode 100644 index 0000000000000000000000000000000000000000..1295e5d791e8ac33d3d4c43be03d4f08ade1345f --- /dev/null +++ b/Pointcept/pointcept/models/swin3d/swin3d_v1m1_base.py @@ -0,0 +1,190 @@ +import torch +import torch.nn as nn +import MinkowskiEngine as ME +from MinkowskiEngine import SparseTensor +from timm.models.layers import trunc_normal_ + +from .mink_layers import MinkConvBNRelu, MinkResBlock +from .swin3d_layers import GridDownsample, GridKNNDownsample, BasicLayer, Upsample +from pointcept.models.builder import MODELS +from pointcept.models.utils import offset2batch, batch2offset + + +@MODELS.register_module("Swin3D-v1m1") +class Swin3DUNet(nn.Module): + def __init__( + self, + in_channels, + num_classes, + base_grid_size, + depths, + channels, + num_heads, + window_sizes, + quant_size, + drop_path_rate=0.2, + up_k=3, + num_layers=5, + stem_transformer=True, + down_stride=2, + upsample="linear", + knn_down=True, + cRSE="XYZ_RGB", + fp16_mode=0, + ): + super().__init__() + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + if knn_down: + downsample = GridKNNDownsample + else: + downsample = GridDownsample + + self.cRSE = cRSE + if stem_transformer: + self.stem_layer = MinkConvBNRelu( + in_channels=in_channels, + out_channels=channels[0], + kernel_size=3, + stride=1, + ) + self.layer_start = 0 + else: + self.stem_layer = nn.Sequential( + MinkConvBNRelu( + in_channels=in_channels, + out_channels=channels[0], + kernel_size=3, + stride=1, + ), + MinkResBlock(in_channels=channels[0], out_channels=channels[0]), + ) + self.downsample = downsample( + channels[0], channels[1], kernel_size=down_stride, stride=down_stride + ) + self.layer_start = 1 + self.layers = nn.ModuleList( + [ + BasicLayer( + dim=channels[i], + depth=depths[i], + num_heads=num_heads[i], + window_size=window_sizes[i], + quant_size=quant_size, + drop_path=dpr[sum(depths[:i]) : sum(depths[: i + 1])], + downsample=downsample if i < num_layers - 1 else None, + down_stride=down_stride if i == 0 else 2, + out_channels=channels[i + 1] if i < num_layers - 1 else None, + cRSE=cRSE, + fp16_mode=fp16_mode, + ) + for i in range(self.layer_start, num_layers) + ] + ) + + if "attn" in upsample: + up_attn = True + else: + up_attn = False + + self.upsamples = nn.ModuleList( + [ + Upsample( + channels[i], + channels[i - 1], + num_heads[i - 1], + window_sizes[i - 1], + quant_size, + attn=up_attn, + up_k=up_k, + cRSE=cRSE, + fp16_mode=fp16_mode, + ) + for i in range(num_layers - 1, 0, -1) + ] + ) + + self.classifier = nn.Sequential( + nn.Linear(channels[0], channels[0]), + nn.BatchNorm1d(channels[0]), + nn.ReLU(inplace=True), + nn.Linear(channels[0], num_classes), + ) + self.num_classes = num_classes + self.base_grid_size = base_grid_size + self.init_weights() + + def forward(self, data_dict): + grid_coord = data_dict["grid_coord"] + feat = data_dict["feat"] + coord_feat = data_dict["coord_feat"] + coord = data_dict["coord"] + offset = data_dict["offset"] + batch = offset2batch(offset) + in_field = ME.TensorField( + features=torch.cat( + [ + batch.unsqueeze(-1), + coord / self.base_grid_size, + coord_feat / 1.001, + feat, + ], + dim=1, + ), + coordinates=torch.cat([batch.unsqueeze(-1).int(), grid_coord.int()], dim=1), + quantization_mode=ME.SparseTensorQuantizationMode.UNWEIGHTED_AVERAGE, + minkowski_algorithm=ME.MinkowskiAlgorithm.SPEED_OPTIMIZED, + device=feat.device, + ) + + sp = in_field.sparse() + coords_sp = SparseTensor( + features=sp.F[:, : coord_feat.shape[-1] + 4], + coordinate_map_key=sp.coordinate_map_key, + coordinate_manager=sp.coordinate_manager, + ) + sp = SparseTensor( + features=sp.F[:, coord_feat.shape[-1] + 4 :], + coordinate_map_key=sp.coordinate_map_key, + coordinate_manager=sp.coordinate_manager, + ) + sp_stack = [] + coords_sp_stack = [] + sp = self.stem_layer(sp) + if self.layer_start > 0: + sp_stack.append(sp) + coords_sp_stack.append(coords_sp) + sp, coords_sp = self.downsample(sp, coords_sp) + + for i, layer in enumerate(self.layers): + coords_sp_stack.append(coords_sp) + sp, sp_down, coords_sp = layer(sp, coords_sp) + sp_stack.append(sp) + assert (coords_sp.C == sp_down.C).all() + sp = sp_down + + sp = sp_stack.pop() + coords_sp = coords_sp_stack.pop() + for i, upsample in enumerate(self.upsamples): + sp_i = sp_stack.pop() + coords_sp_i = coords_sp_stack.pop() + sp = upsample(sp, coords_sp, sp_i, coords_sp_i) + coords_sp = coords_sp_i + + output = self.classifier(sp.slice(in_field).F) + return output + + def init_weights(self): + """Initialize the weights in backbone.""" + + def _init_weights(m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=0.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm) or isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + self.apply(_init_weights) diff --git a/Pointcept/pointcept/models/utils/__init__.py b/Pointcept/pointcept/models/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..66e6bc0f62993abb3625a9598f54e7775aeb0008 --- /dev/null +++ b/Pointcept/pointcept/models/utils/__init__.py @@ -0,0 +1,4 @@ +from .misc import offset2batch, offset2bincount, batch2offset, off_diagonal +from .checkpoint import checkpoint +from .serialization import encode, decode +from .structure import Point diff --git a/Pointcept/pointcept/models/utils/checkpoint.py b/Pointcept/pointcept/models/utils/checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..58820352bd5d1b37b3905b038816323253ffd3de --- /dev/null +++ b/Pointcept/pointcept/models/utils/checkpoint.py @@ -0,0 +1,57 @@ +""" +Checkpoint Utils for Models + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch + + +class CheckpointFunction(torch.autograd.Function): + @staticmethod + def forward(ctx, run_function, length, *args): + ctx.run_function = run_function + ctx.input_tensors = list(args[:length]) + ctx.input_params = list(args[length:]) + + with torch.no_grad(): + output_tensors = ctx.run_function(*ctx.input_tensors) + return output_tensors + + @staticmethod + def backward(ctx, *output_grads): + ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors] + with torch.enable_grad(): + # Fixes a bug where the first op in run_function modifies the + # Tensor storage in place, which is not allowed for detach()'d + # Tensors. + shallow_copies = [x.view_as(x) for x in ctx.input_tensors] + output_tensors = ctx.run_function(*shallow_copies) + input_grads = torch.autograd.grad( + output_tensors, + ctx.input_tensors + ctx.input_params, + output_grads, + allow_unused=True, + ) + del ctx.input_tensors + del ctx.input_params + del output_tensors + return (None, None) + input_grads + + +def checkpoint(func, inputs, params, flag): + """ + Evaluate a function without caching intermediate activations, allowing for + reduced memory at the expense of extra compute in the backward pass. + :param func: the function to evaluate. + :param inputs: the argument sequence to pass to `func`. + :param params: a sequence of parameters `func` depends on but does not + explicitly take as arguments. + :param flag: if False, disable gradient checkpointing. + """ + if flag: + args = tuple(inputs) + tuple(params) + return CheckpointFunction.apply(func, len(inputs), *args) + else: + return func(*inputs) diff --git a/Pointcept/pointcept/models/utils/misc.py b/Pointcept/pointcept/models/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..61dfdfb44a82fc0ef585ca5732518fe85e466889 --- /dev/null +++ b/Pointcept/pointcept/models/utils/misc.py @@ -0,0 +1,35 @@ +""" +General Utils for Models + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch + + +@torch.inference_mode() +def offset2bincount(offset): + return torch.diff( + offset, prepend=torch.tensor([0], device=offset.device, dtype=torch.long) + ) + + +@torch.inference_mode() +def offset2batch(offset): + bincount = offset2bincount(offset) + return torch.arange( + len(bincount), device=offset.device, dtype=torch.long + ).repeat_interleave(bincount) + + +@torch.inference_mode() +def batch2offset(batch): + return torch.cumsum(batch.bincount(), dim=0).long() + + +def off_diagonal(x): + # return a flattened view of the off-diagonal elements of a square matrix + n, m = x.shape + assert n == m + return x.flatten()[:-1].view(n - 1, n + 1)[:, 1:].flatten() diff --git a/Pointcept/pointcept/models/utils/serialization/__init__.py b/Pointcept/pointcept/models/utils/serialization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..058c5e1001c76d9c7014bf0bbb824eec4f54f476 --- /dev/null +++ b/Pointcept/pointcept/models/utils/serialization/__init__.py @@ -0,0 +1,8 @@ +from .default import ( + encode, + decode, + z_order_encode, + z_order_decode, + hilbert_encode, + hilbert_decode, +) diff --git a/Pointcept/pointcept/models/utils/serialization/default.py b/Pointcept/pointcept/models/utils/serialization/default.py new file mode 100644 index 0000000000000000000000000000000000000000..15898b55625fc0e1125db9b713e900892f04176c --- /dev/null +++ b/Pointcept/pointcept/models/utils/serialization/default.py @@ -0,0 +1,59 @@ +import torch +from .z_order import xyz2key as z_order_encode_ +from .z_order import key2xyz as z_order_decode_ +from .hilbert import encode as hilbert_encode_ +from .hilbert import decode as hilbert_decode_ + + +@torch.inference_mode() +def encode(grid_coord, batch=None, depth=16, order="z"): + assert order in {"z", "z-trans", "hilbert", "hilbert-trans"} + if order == "z": + code = z_order_encode(grid_coord, depth=depth) + elif order == "z-trans": + code = z_order_encode(grid_coord[:, [1, 0, 2]], depth=depth) + elif order == "hilbert": + code = hilbert_encode(grid_coord, depth=depth) + elif order == "hilbert-trans": + code = hilbert_encode(grid_coord[:, [1, 0, 2]], depth=depth) + else: + raise NotImplementedError + if batch is not None: + batch = batch.long() + code = batch << depth * 3 | code + return code + + +@torch.inference_mode() +def decode(code, depth=16, order="z"): + assert order in {"z", "hilbert"} + batch = code >> depth * 3 + code = code & ((1 << depth * 3) - 1) + if order == "z": + grid_coord = z_order_decode(code, depth=depth) + elif order == "hilbert": + grid_coord = hilbert_decode(code, depth=depth) + else: + raise NotImplementedError + return grid_coord, batch + + +def z_order_encode(grid_coord: torch.Tensor, depth: int = 16): + x, y, z = grid_coord[:, 0].long(), grid_coord[:, 1].long(), grid_coord[:, 2].long() + # we block the support to batch, maintain batched code in Point class + code = z_order_encode_(x, y, z, b=None, depth=depth) + return code + + +def z_order_decode(code: torch.Tensor, depth): + x, y, z = z_order_decode_(code, depth=depth) + grid_coord = torch.stack([x, y, z], dim=-1) # (N, 3) + return grid_coord + + +def hilbert_encode(grid_coord: torch.Tensor, depth: int = 16): + return hilbert_encode_(grid_coord, num_dims=3, num_bits=depth) + + +def hilbert_decode(code: torch.Tensor, depth: int = 16): + return hilbert_decode_(code, num_dims=3, num_bits=depth) diff --git a/Pointcept/pointcept/models/utils/serialization/hilbert.py b/Pointcept/pointcept/models/utils/serialization/hilbert.py new file mode 100644 index 0000000000000000000000000000000000000000..c96a3a9e15be64059811eb86139f28c6016ad0fe --- /dev/null +++ b/Pointcept/pointcept/models/utils/serialization/hilbert.py @@ -0,0 +1,303 @@ +""" +Hilbert Order +Modified from https://github.com/PrincetonLIPS/numpy-hilbert-curve + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Kaixin Xu +Please cite our work if the code is helpful to you. +""" + +import torch + + +def right_shift(binary, k=1, axis=-1): + """Right shift an array of binary values. + + Parameters: + ----------- + binary: An ndarray of binary values. + + k: The number of bits to shift. Default 1. + + axis: The axis along which to shift. Default -1. + + Returns: + -------- + Returns an ndarray with zero prepended and the ends truncated, along + whatever axis was specified.""" + + # If we're shifting the whole thing, just return zeros. + if binary.shape[axis] <= k: + return torch.zeros_like(binary) + + # Determine the padding pattern. + # padding = [(0,0)] * len(binary.shape) + # padding[axis] = (k,0) + + # Determine the slicing pattern to eliminate just the last one. + slicing = [slice(None)] * len(binary.shape) + slicing[axis] = slice(None, -k) + shifted = torch.nn.functional.pad( + binary[tuple(slicing)], (k, 0), mode="constant", value=0 + ) + + return shifted + + +def binary2gray(binary, axis=-1): + """Convert an array of binary values into Gray codes. + + This uses the classic X ^ (X >> 1) trick to compute the Gray code. + + Parameters: + ----------- + binary: An ndarray of binary values. + + axis: The axis along which to compute the gray code. Default=-1. + + Returns: + -------- + Returns an ndarray of Gray codes. + """ + shifted = right_shift(binary, axis=axis) + + # Do the X ^ (X >> 1) trick. + gray = torch.logical_xor(binary, shifted) + + return gray + + +def gray2binary(gray, axis=-1): + """Convert an array of Gray codes back into binary values. + + Parameters: + ----------- + gray: An ndarray of gray codes. + + axis: The axis along which to perform Gray decoding. Default=-1. + + Returns: + -------- + Returns an ndarray of binary values. + """ + + # Loop the log2(bits) number of times necessary, with shift and xor. + shift = 2 ** (torch.Tensor([gray.shape[axis]]).log2().ceil().int() - 1) + while shift > 0: + gray = torch.logical_xor(gray, right_shift(gray, shift)) + shift = torch.div(shift, 2, rounding_mode="floor") + return gray + + +def encode(locs, num_dims, num_bits): + """Decode an array of locations in a hypercube into a Hilbert integer. + + This is a vectorized-ish version of the Hilbert curve implementation by John + Skilling as described in: + + Skilling, J. (2004, April). Programming the Hilbert curve. In AIP Conference + Proceedings (Vol. 707, No. 1, pp. 381-387). American Institute of Physics. + + Params: + ------- + locs - An ndarray of locations in a hypercube of num_dims dimensions, in + which each dimension runs from 0 to 2**num_bits-1. The shape can + be arbitrary, as long as the last dimension of the same has size + num_dims. + + num_dims - The dimensionality of the hypercube. Integer. + + num_bits - The number of bits for each dimension. Integer. + + Returns: + -------- + The output is an ndarray of uint64 integers with the same shape as the + input, excluding the last dimension, which needs to be num_dims. + """ + + # Keep around the original shape for later. + orig_shape = locs.shape + bitpack_mask = 1 << torch.arange(0, 8).to(locs.device) + bitpack_mask_rev = bitpack_mask.flip(-1) + + if orig_shape[-1] != num_dims: + raise ValueError( + """ + The shape of locs was surprising in that the last dimension was of size + %d, but num_dims=%d. These need to be equal. + """ + % (orig_shape[-1], num_dims) + ) + + if num_dims * num_bits > 63: + raise ValueError( + """ + num_dims=%d and num_bits=%d for %d bits total, which can't be encoded + into a int64. Are you sure you need that many points on your Hilbert + curve? + """ + % (num_dims, num_bits, num_dims * num_bits) + ) + + # Treat the location integers as 64-bit unsigned and then split them up into + # a sequence of uint8s. Preserve the association by dimension. + locs_uint8 = locs.long().view(torch.uint8).reshape((-1, num_dims, 8)).flip(-1) + + # Now turn these into bits and truncate to num_bits. + gray = ( + locs_uint8.unsqueeze(-1) + .bitwise_and(bitpack_mask_rev) + .ne(0) + .byte() + .flatten(-2, -1)[..., -num_bits:] + ) + + # Run the decoding process the other way. + # Iterate forwards through the bits. + for bit in range(0, num_bits): + # Iterate forwards through the dimensions. + for dim in range(0, num_dims): + # Identify which ones have this bit active. + mask = gray[:, dim, bit] + + # Where this bit is on, invert the 0 dimension for lower bits. + gray[:, 0, bit + 1 :] = torch.logical_xor( + gray[:, 0, bit + 1 :], mask[:, None] + ) + + # Where the bit is off, exchange the lower bits with the 0 dimension. + to_flip = torch.logical_and( + torch.logical_not(mask[:, None]).repeat(1, gray.shape[2] - bit - 1), + torch.logical_xor(gray[:, 0, bit + 1 :], gray[:, dim, bit + 1 :]), + ) + gray[:, dim, bit + 1 :] = torch.logical_xor( + gray[:, dim, bit + 1 :], to_flip + ) + gray[:, 0, bit + 1 :] = torch.logical_xor(gray[:, 0, bit + 1 :], to_flip) + + # Now flatten out. + gray = gray.swapaxes(1, 2).reshape((-1, num_bits * num_dims)) + + # Convert Gray back to binary. + hh_bin = gray2binary(gray) + + # Pad back out to 64 bits. + extra_dims = 64 - num_bits * num_dims + padded = torch.nn.functional.pad(hh_bin, (extra_dims, 0), "constant", 0) + + # Convert binary values into uint8s. + hh_uint8 = ( + (padded.flip(-1).reshape((-1, 8, 8)) * bitpack_mask) + .sum(2) + .squeeze() + .type(torch.uint8) + ) + + # Convert uint8s into uint64s. + hh_uint64 = hh_uint8.view(torch.int64).squeeze() + + return hh_uint64 + + +def decode(hilberts, num_dims, num_bits): + """Decode an array of Hilbert integers into locations in a hypercube. + + This is a vectorized-ish version of the Hilbert curve implementation by John + Skilling as described in: + + Skilling, J. (2004, April). Programming the Hilbert curve. In AIP Conference + Proceedings (Vol. 707, No. 1, pp. 381-387). American Institute of Physics. + + Params: + ------- + hilberts - An ndarray of Hilbert integers. Must be an integer dtype and + cannot have fewer bits than num_dims * num_bits. + + num_dims - The dimensionality of the hypercube. Integer. + + num_bits - The number of bits for each dimension. Integer. + + Returns: + -------- + The output is an ndarray of unsigned integers with the same shape as hilberts + but with an additional dimension of size num_dims. + """ + + if num_dims * num_bits > 64: + raise ValueError( + """ + num_dims=%d and num_bits=%d for %d bits total, which can't be encoded + into a uint64. Are you sure you need that many points on your Hilbert + curve? + """ + % (num_dims, num_bits) + ) + + # Handle the case where we got handed a naked integer. + hilberts = torch.atleast_1d(hilberts) + + # Keep around the shape for later. + orig_shape = hilberts.shape + bitpack_mask = 2 ** torch.arange(0, 8).to(hilberts.device) + bitpack_mask_rev = bitpack_mask.flip(-1) + + # Treat each of the hilberts as a s equence of eight uint8. + # This treats all of the inputs as uint64 and makes things uniform. + hh_uint8 = ( + hilberts.ravel().type(torch.int64).view(torch.uint8).reshape((-1, 8)).flip(-1) + ) + + # Turn these lists of uints into lists of bits and then truncate to the size + # we actually need for using Skilling's procedure. + hh_bits = ( + hh_uint8.unsqueeze(-1) + .bitwise_and(bitpack_mask_rev) + .ne(0) + .byte() + .flatten(-2, -1)[:, -num_dims * num_bits :] + ) + + # Take the sequence of bits and Gray-code it. + gray = binary2gray(hh_bits) + + # There has got to be a better way to do this. + # I could index them differently, but the eventual packbits likes it this way. + gray = gray.reshape((-1, num_bits, num_dims)).swapaxes(1, 2) + + # Iterate backwards through the bits. + for bit in range(num_bits - 1, -1, -1): + # Iterate backwards through the dimensions. + for dim in range(num_dims - 1, -1, -1): + # Identify which ones have this bit active. + mask = gray[:, dim, bit] + + # Where this bit is on, invert the 0 dimension for lower bits. + gray[:, 0, bit + 1 :] = torch.logical_xor( + gray[:, 0, bit + 1 :], mask[:, None] + ) + + # Where the bit is off, exchange the lower bits with the 0 dimension. + to_flip = torch.logical_and( + torch.logical_not(mask[:, None]), + torch.logical_xor(gray[:, 0, bit + 1 :], gray[:, dim, bit + 1 :]), + ) + gray[:, dim, bit + 1 :] = torch.logical_xor( + gray[:, dim, bit + 1 :], to_flip + ) + gray[:, 0, bit + 1 :] = torch.logical_xor(gray[:, 0, bit + 1 :], to_flip) + + # Pad back out to 64 bits. + extra_dims = 64 - num_bits + padded = torch.nn.functional.pad(gray, (extra_dims, 0), "constant", 0) + + # Now chop these up into blocks of 8. + locs_chopped = padded.flip(-1).reshape((-1, num_dims, 8, 8)) + + # Take those blocks and turn them unto uint8s. + # from IPython import embed; embed() + locs_uint8 = (locs_chopped * bitpack_mask).sum(3).squeeze().type(torch.uint8) + + # Finally, treat these as uint64s. + flat_locs = locs_uint8.view(torch.int64) + + # Return them in the expected shape. + return flat_locs.reshape((*orig_shape, num_dims)) diff --git a/Pointcept/pointcept/models/utils/serialization/z_order.py b/Pointcept/pointcept/models/utils/serialization/z_order.py new file mode 100644 index 0000000000000000000000000000000000000000..6fd01a5bcf4b6c76c5d75db4999326e174409ee3 --- /dev/null +++ b/Pointcept/pointcept/models/utils/serialization/z_order.py @@ -0,0 +1,126 @@ +# -------------------------------------------------------- +# Octree-based Sparse Convolutional Neural Networks +# Copyright (c) 2022 Peng-Shuai Wang +# Licensed under The MIT License [see LICENSE for details] +# Written by Peng-Shuai Wang +# -------------------------------------------------------- + +import torch +from typing import Optional, Union + + +class KeyLUT: + def __init__(self): + r256 = torch.arange(256, dtype=torch.int64) + r512 = torch.arange(512, dtype=torch.int64) + zero = torch.zeros(256, dtype=torch.int64) + device = torch.device("cpu") + + self._encode = { + device: ( + self.xyz2key(r256, zero, zero, 8), + self.xyz2key(zero, r256, zero, 8), + self.xyz2key(zero, zero, r256, 8), + ) + } + self._decode = {device: self.key2xyz(r512, 9)} + + def encode_lut(self, device=torch.device("cpu")): + if device not in self._encode: + cpu = torch.device("cpu") + self._encode[device] = tuple(e.to(device) for e in self._encode[cpu]) + return self._encode[device] + + def decode_lut(self, device=torch.device("cpu")): + if device not in self._decode: + cpu = torch.device("cpu") + self._decode[device] = tuple(e.to(device) for e in self._decode[cpu]) + return self._decode[device] + + def xyz2key(self, x, y, z, depth): + key = torch.zeros_like(x) + for i in range(depth): + mask = 1 << i + key = ( + key + | ((x & mask) << (2 * i + 2)) + | ((y & mask) << (2 * i + 1)) + | ((z & mask) << (2 * i + 0)) + ) + return key + + def key2xyz(self, key, depth): + x = torch.zeros_like(key) + y = torch.zeros_like(key) + z = torch.zeros_like(key) + for i in range(depth): + x = x | ((key & (1 << (3 * i + 2))) >> (2 * i + 2)) + y = y | ((key & (1 << (3 * i + 1))) >> (2 * i + 1)) + z = z | ((key & (1 << (3 * i + 0))) >> (2 * i + 0)) + return x, y, z + + +_key_lut = KeyLUT() + + +def xyz2key( + x: torch.Tensor, + y: torch.Tensor, + z: torch.Tensor, + b: Optional[Union[torch.Tensor, int]] = None, + depth: int = 16, +): + r"""Encodes :attr:`x`, :attr:`y`, :attr:`z` coordinates to the shuffled keys + based on pre-computed look up tables. The speed of this function is much + faster than the method based on for-loop. + + Args: + x (torch.Tensor): The x coordinate. + y (torch.Tensor): The y coordinate. + z (torch.Tensor): The z coordinate. + b (torch.Tensor or int): The batch index of the coordinates, and should be + smaller than 32768. If :attr:`b` is :obj:`torch.Tensor`, the size of + :attr:`b` must be the same as :attr:`x`, :attr:`y`, and :attr:`z`. + depth (int): The depth of the shuffled key, and must be smaller than 17 (< 17). + """ + + EX, EY, EZ = _key_lut.encode_lut(x.device) + x, y, z = x.long(), y.long(), z.long() + + mask = 255 if depth > 8 else (1 << depth) - 1 + key = EX[x & mask] | EY[y & mask] | EZ[z & mask] + if depth > 8: + mask = (1 << (depth - 8)) - 1 + key16 = EX[(x >> 8) & mask] | EY[(y >> 8) & mask] | EZ[(z >> 8) & mask] + key = key16 << 24 | key + + if b is not None: + b = b.long() + key = b << 48 | key + + return key + + +def key2xyz(key: torch.Tensor, depth: int = 16): + r"""Decodes the shuffled key to :attr:`x`, :attr:`y`, :attr:`z` coordinates + and the batch index based on pre-computed look up tables. + + Args: + key (torch.Tensor): The shuffled key. + depth (int): The depth of the shuffled key, and must be smaller than 17 (< 17). + """ + + DX, DY, DZ = _key_lut.decode_lut(key.device) + x, y, z = torch.zeros_like(key), torch.zeros_like(key), torch.zeros_like(key) + + b = key >> 48 + key = key & ((1 << 48) - 1) + + n = (depth + 2) // 3 + for i in range(n): + k = key >> (i * 9) & 511 + x = x | (DX[k] << (i * 3)) + y = y | (DY[k] << (i * 3)) + z = z | (DZ[k] << (i * 3)) + + return x, y, z, b diff --git a/Pointcept/pointcept/models/utils/structure.py b/Pointcept/pointcept/models/utils/structure.py new file mode 100644 index 0000000000000000000000000000000000000000..47fcd054067967f1ce5953d32df288ecc41c7aae --- /dev/null +++ b/Pointcept/pointcept/models/utils/structure.py @@ -0,0 +1,180 @@ +import torch +import spconv.pytorch as spconv + +try: + import ocnn +except ImportError: + ocnn = None +from addict import Dict + +from pointcept.models.utils.serialization import encode, decode +from pointcept.models.utils import offset2batch, batch2offset + + +class Point(Dict): + """ + Point Structure of Pointcept + + A Point (point cloud) in Pointcept is a dictionary that contains various properties of + a batched point cloud. The property with the following names have a specific definition + as follows: + + - "coord": original coordinate of point cloud; + - "grid_coord": grid coordinate for specific grid size (related to GridSampling); + Point also support the following optional attributes: + - "offset": if not exist, initialized as batch size is 1; + - "batch": if not exist, initialized as batch size is 1; + - "feat": feature of point cloud, default input of model; + - "grid_size": Grid size of point cloud (related to GridSampling); + (related to Serialization) + - "serialized_depth": depth of serialization, 2 ** depth * grid_size describe the maximum of point cloud range; + - "serialized_code": a list of serialization codes; + - "serialized_order": a list of serialization order determined by code; + - "serialized_inverse": a list of inverse mapping determined by code; + (related to Sparsify: SpConv) + - "sparse_shape": Sparse shape for Sparse Conv Tensor; + - "sparse_conv_feat": SparseConvTensor init with information provide by Point; + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # If one of "offset" or "batch" do not exist, generate by the existing one + if "batch" not in self.keys() and "offset" in self.keys(): + self["batch"] = offset2batch(self.offset) + elif "offset" not in self.keys() and "batch" in self.keys(): + self["offset"] = batch2offset(self.batch) + + def serialization(self, order="z", depth=None, shuffle_orders=False): + """ + Point Cloud Serialization + + relay on ["grid_coord" or "coord" + "grid_size", "batch", "feat"] + """ + assert "batch" in self.keys() + if "grid_coord" not in self.keys(): + # if you don't want to operate GridSampling in data augmentation, + # please add the following augmentation into your pipline: + # dict(type="Copy", keys_dict={"grid_size": 0.01}), + # (adjust `grid_size` to what your want) + assert {"grid_size", "coord"}.issubset(self.keys()) + self["grid_coord"] = torch.div( + self.coord - self.coord.min(0)[0], self.grid_size, rounding_mode="trunc" + ).int() + + if depth is None: + # Adaptive measure the depth of serialization cube (length = 2 ^ depth) + depth = int(self.grid_coord.max()).bit_length() + self["serialized_depth"] = depth + # Maximum bit length for serialization code is 63 (int64) + assert depth * 3 + len(self.offset).bit_length() <= 63 + # Here we follow OCNN and set the depth limitation to 16 (48bit) for the point position. + # Although depth is limited to less than 16, we can encode a 655.36^3 (2^16 * 0.01) meter^3 + # cube with a grid size of 0.01 meter. We consider it is enough for the current stage. + # We can unlock the limitation by optimizing the z-order encoding function if necessary. + assert depth <= 16 + + # The serialization codes are arranged as following structures: + # [Order1 ([n]), + # Order2 ([n]), + # ... + # OrderN ([n])] (k, n) + code = [ + encode(self.grid_coord, self.batch, depth, order=order_) for order_ in order + ] + code = torch.stack(code) + order = torch.argsort(code) + inverse = torch.zeros_like(order).scatter_( + dim=1, + index=order, + src=torch.arange(0, code.shape[1], device=order.device).repeat( + code.shape[0], 1 + ), + ) + + if shuffle_orders: + perm = torch.randperm(code.shape[0]) + code = code[perm] + order = order[perm] + inverse = inverse[perm] + + self["serialized_code"] = code + self["serialized_order"] = order + self["serialized_inverse"] = inverse + + def sparsify(self, pad=96): + """ + Point Cloud Serialization + + Point cloud is sparse, here we use "sparsify" to specifically refer to + preparing "spconv.SparseConvTensor" for SpConv. + + relay on ["grid_coord" or "coord" + "grid_size", "batch", "feat"] + + pad: padding sparse for sparse shape. + """ + assert {"feat", "batch"}.issubset(self.keys()) + if "grid_coord" not in self.keys(): + # if you don't want to operate GridSampling in data augmentation, + # please add the following augmentation into your pipline: + # dict(type="Copy", keys_dict={"grid_size": 0.01}), + # (adjust `grid_size` to what your want) + assert {"grid_size", "coord"}.issubset(self.keys()) + self["grid_coord"] = torch.div( + self.coord - self.coord.min(0)[0], self.grid_size, rounding_mode="trunc" + ).int() + if "sparse_shape" in self.keys(): + sparse_shape = self.sparse_shape + else: + sparse_shape = torch.add( + torch.max(self.grid_coord, dim=0).values, pad + ).tolist() + sparse_conv_feat = spconv.SparseConvTensor( + features=self.feat, + indices=torch.cat( + [self.batch.unsqueeze(-1).int(), self.grid_coord.int()], dim=1 + ).contiguous(), + spatial_shape=sparse_shape, + batch_size=self.batch[-1].tolist() + 1, + ) + self["sparse_shape"] = sparse_shape + self["sparse_conv_feat"] = sparse_conv_feat + + def octreetization(self, depth=None, full_depth=None): + """ + Point Cloud Octreelization + + Generate octree with OCNN + relay on ["grid_coord", "batch", "feat"] + """ + assert ( + ocnn is not None + ), "Please follow https://github.com/octree-nn/ocnn-pytorch install ocnn." + assert {"grid_coord", "feat", "batch"}.issubset(self.keys()) + # add 1 to make grid space support shift order + if depth is None: + if "depth" in self.keys(): + depth = self.depth + else: + depth = int(self.grid_coord.max() + 1).bit_length() + if full_depth is None: + full_depth = 2 + self["depth"] = depth + assert depth <= 16 # maximum in ocnn + + # [0, 2**depth] -> [0, 2] -> [-1, 1] + coord = self.grid_coord / 2 ** (self.depth - 1) - 1.0 + point = ocnn.octree.Points( + points=coord, + features=self.feat, + batch_id=self.batch.unsqueeze(-1), + batch_size=self.batch[-1] + 1, + ) + octree = ocnn.octree.Octree( + depth=depth, + full_depth=full_depth, + batch_size=self.batch[-1] + 1, + device=coord.device, + ) + octree.build_octree(point) + octree.construct_all_neigh() + self["octree"] = octree diff --git a/Pointcept/pointcept/utils/__init__.py b/Pointcept/pointcept/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Pointcept/pointcept/utils/cache.py b/Pointcept/pointcept/utils/cache.py new file mode 100644 index 0000000000000000000000000000000000000000..623897e42a7a4256a65a1a0e9a7b5c0c46ce5a3e --- /dev/null +++ b/Pointcept/pointcept/utils/cache.py @@ -0,0 +1,56 @@ +""" +Data Cache Utils + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import SharedArray + +try: + from multiprocessing.shared_memory import ShareableList +except ImportError: + import warnings + + warnings.warn("Please update python version >= 3.8 to enable shared_memory") +import numpy as np + + +def shared_array(name, var=None): + if var is not None: + # check exist + if os.path.exists(f"/dev/shm/{name}"): + return SharedArray.attach(f"shm://{name}") + # create shared_array + data = SharedArray.create(f"shm://{name}", var.shape, dtype=var.dtype) + data[...] = var[...] + data.flags.writeable = False + else: + data = SharedArray.attach(f"shm://{name}").copy() + return data + + +def shared_dict(name, var=None): + name = str(name) + assert "." not in name # '.' is used as sep flag + data = {} + if var is not None: + assert isinstance(var, dict) + keys = var.keys() + # current version only cache np.array + keys_valid = [] + for key in keys: + if isinstance(var[key], np.ndarray): + keys_valid.append(key) + keys = keys_valid + + ShareableList(sequence=keys, name=name + ".keys") + for key in keys: + if isinstance(var[key], np.ndarray): + data[key] = shared_array(name=f"{name}.{key}", var=var[key]) + else: + keys = list(ShareableList(name=name + ".keys")) + for key in keys: + data[key] = shared_array(name=f"{name}.{key}") + return data diff --git a/Pointcept/pointcept/utils/comm.py b/Pointcept/pointcept/utils/comm.py new file mode 100644 index 0000000000000000000000000000000000000000..69e29e7c690fe0500d3d9a84b6a8749e2f4f4655 --- /dev/null +++ b/Pointcept/pointcept/utils/comm.py @@ -0,0 +1,198 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +""" +This file contains primitives for multi-gpu communication. +This is useful when doing distributed training. +Modified from detectron2(https://github.com/facebookresearch/detectron2) + +Copyright (c) Xiaoyang Wu (xiaoyang.wu@connect.hku.hk). All Rights Reserved. +Please cite our work if you use any part of the code. +""" + +import functools +import numpy as np +import torch +import torch.distributed as dist + +_LOCAL_PROCESS_GROUP = None +""" +A torch process group which only includes processes that on the same machine as the current process. +This variable is set when processes are spawned by `launch()` in "engine/launch.py". +""" + + +def get_world_size() -> int: + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank() -> int: + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank() + + +def get_local_rank() -> int: + """ + Returns: + The rank of the current process within the local (per-machine) process group. + """ + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + assert ( + _LOCAL_PROCESS_GROUP is not None + ), "Local process group is not created! Please use launch() to spawn processes!" + return dist.get_rank(group=_LOCAL_PROCESS_GROUP) + + +def get_local_size() -> int: + """ + Returns: + The size of the per-machine process group, + i.e. the number of processes per machine. + """ + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) + + +def is_main_process() -> bool: + return get_rank() == 0 + + +def synchronize(): + """ + Helper function to synchronize (barrier) among all processes when + using distributed training + """ + if not dist.is_available(): + return + if not dist.is_initialized(): + return + world_size = dist.get_world_size() + if world_size == 1: + return + if dist.get_backend() == dist.Backend.NCCL: + # This argument is needed to avoid warnings. + # It's valid only for NCCL backend. + dist.barrier(device_ids=[torch.cuda.current_device()]) + else: + dist.barrier() + + +@functools.lru_cache() +def _get_global_gloo_group(): + """ + Return a process group based on gloo backend, containing all the ranks + The result is cached. + """ + if dist.get_backend() == "nccl": + return dist.new_group(backend="gloo") + else: + return dist.group.WORLD + + +def all_gather(data, group=None): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors). + Args: + data: any picklable object + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + Returns: + list[data]: list of data gathered from each rank + """ + if get_world_size() == 1: + return [data] + if group is None: + group = ( + _get_global_gloo_group() + ) # use CPU group by default, to reduce GPU RAM usage. + world_size = dist.get_world_size(group) + if world_size == 1: + return [data] + + output = [None for _ in range(world_size)] + dist.all_gather_object(output, data, group=group) + return output + + +def gather(data, dst=0, group=None): + """ + Run gather on arbitrary picklable data (not necessarily tensors). + Args: + data: any picklable object + dst (int): destination rank + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + Returns: + list[data]: on dst, a list of data gathered from each rank. Otherwise, + an empty list. + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + world_size = dist.get_world_size(group=group) + if world_size == 1: + return [data] + rank = dist.get_rank(group=group) + + if rank == dst: + output = [None for _ in range(world_size)] + dist.gather_object(data, output, dst=dst, group=group) + return output + else: + dist.gather_object(data, None, dst=dst, group=group) + return [] + + +def shared_random_seed(): + """ + Returns: + int: a random number that is the same across all workers. + If workers need a shared RNG, they can use this shared seed to + create one. + All workers must call this function, otherwise it will deadlock. + """ + ints = np.random.randint(2**31) + all_ints = all_gather(ints) + return all_ints[0] + + +def reduce_dict(input_dict, average=True): + """ + Reduce the values in the dictionary from all processes so that process with rank + 0 has the reduced results. + Args: + input_dict (dict): inputs to be reduced. All the values must be scalar CUDA Tensor. + average (bool): whether to do average or sum + Returns: + a dict with the same keys as input_dict, after reduction. + """ + world_size = get_world_size() + if world_size < 2: + return input_dict + with torch.no_grad(): + names = [] + values = [] + # sort the keys so that they are consistent across processes + for k in sorted(input_dict.keys()): + names.append(k) + values.append(input_dict[k]) + values = torch.stack(values, dim=0) + dist.reduce(values, dst=0) + if dist.get_rank() == 0 and average: + # only main process gets accumulated, so only divide by + # world_size in this case + values /= world_size + reduced_dict = {k: v for k, v in zip(names, values)} + return reduced_dict diff --git a/Pointcept/pointcept/utils/config.py b/Pointcept/pointcept/utils/config.py new file mode 100644 index 0000000000000000000000000000000000000000..316dd458b3760b38feeb33d941ad9ad060364a61 --- /dev/null +++ b/Pointcept/pointcept/utils/config.py @@ -0,0 +1,694 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import ast +import copy +import os +import os.path as osp +import platform +import shutil +import sys +import tempfile +import uuid +import warnings +from argparse import Action, ArgumentParser +from collections import abc +from importlib import import_module + +from addict import Dict +from yapf.yapflib.yapf_api import FormatCode + +from .misc import import_modules_from_strings +from .path import check_file_exist + +if platform.system() == "Windows": + import regex as re +else: + import re + +BASE_KEY = "_base_" +DELETE_KEY = "_delete_" +DEPRECATION_KEY = "_deprecation_" +RESERVED_KEYS = ["filename", "text", "pretty_text"] + + +class ConfigDict(Dict): + def __missing__(self, name): + raise KeyError(name) + + def __getattr__(self, name): + try: + value = super(ConfigDict, self).__getattr__(name) + except KeyError: + ex = AttributeError( + f"'{self.__class__.__name__}' object has no " f"attribute '{name}'" + ) + except Exception as e: + ex = e + else: + return value + raise ex + + +def add_args(parser, cfg, prefix=""): + for k, v in cfg.items(): + if isinstance(v, str): + parser.add_argument("--" + prefix + k) + elif isinstance(v, int): + parser.add_argument("--" + prefix + k, type=int) + elif isinstance(v, float): + parser.add_argument("--" + prefix + k, type=float) + elif isinstance(v, bool): + parser.add_argument("--" + prefix + k, action="store_true") + elif isinstance(v, dict): + add_args(parser, v, prefix + k + ".") + elif isinstance(v, abc.Iterable): + parser.add_argument("--" + prefix + k, type=type(v[0]), nargs="+") + else: + print(f"cannot parse key {prefix + k} of type {type(v)}") + return parser + + +class Config: + """A facility for config and config files. + + It supports common file formats as configs: python/json/yaml. The interface + is the same as a dict object and also allows access config values as + attributes. + + Example: + >>> cfg = Config(dict(a=1, b=dict(b1=[0, 1]))) + >>> cfg.a + 1 + >>> cfg.b + {'b1': [0, 1]} + >>> cfg.b.b1 + [0, 1] + >>> cfg = Config.fromfile('tests/data/config/a.py') + >>> cfg.filename + "/home/kchen/projects/mmcv/tests/data/config/a.py" + >>> cfg.item4 + 'test' + >>> cfg + "Config [path: /home/kchen/projects/mmcv/tests/data/config/a.py]: " + "{'item1': [1, 2], 'item2': {'a': 0}, 'item3': True, 'item4': 'test'}" + """ + + @staticmethod + def _validate_py_syntax(filename): + with open(filename, "r", encoding="utf-8") as f: + # Setting encoding explicitly to resolve coding issue on windows + content = f.read() + try: + ast.parse(content) + except SyntaxError as e: + raise SyntaxError( + "There are syntax errors in config " f"file {filename}: {e}" + ) + + @staticmethod + def _substitute_predefined_vars(filename, temp_config_name): + file_dirname = osp.dirname(filename) + file_basename = osp.basename(filename) + file_basename_no_extension = osp.splitext(file_basename)[0] + file_extname = osp.splitext(filename)[1] + support_templates = dict( + fileDirname=file_dirname, + fileBasename=file_basename, + fileBasenameNoExtension=file_basename_no_extension, + fileExtname=file_extname, + ) + with open(filename, "r", encoding="utf-8") as f: + # Setting encoding explicitly to resolve coding issue on windows + config_file = f.read() + for key, value in support_templates.items(): + regexp = r"\{\{\s*" + str(key) + r"\s*\}\}" + value = value.replace("\\", "/") + config_file = re.sub(regexp, value, config_file) + with open(temp_config_name, "w", encoding="utf-8") as tmp_config_file: + tmp_config_file.write(config_file) + + @staticmethod + def _pre_substitute_base_vars(filename, temp_config_name): + """Substitute base variable placehoders to string, so that parsing + would work.""" + with open(filename, "r", encoding="utf-8") as f: + # Setting encoding explicitly to resolve coding issue on windows + config_file = f.read() + base_var_dict = {} + regexp = r"\{\{\s*" + BASE_KEY + r"\.([\w\.]+)\s*\}\}" + base_vars = set(re.findall(regexp, config_file)) + for base_var in base_vars: + randstr = f"_{base_var}_{uuid.uuid4().hex.lower()[:6]}" + base_var_dict[randstr] = base_var + regexp = r"\{\{\s*" + BASE_KEY + r"\." + base_var + r"\s*\}\}" + config_file = re.sub(regexp, f'"{randstr}"', config_file) + with open(temp_config_name, "w", encoding="utf-8") as tmp_config_file: + tmp_config_file.write(config_file) + return base_var_dict + + @staticmethod + def _substitute_base_vars(cfg, base_var_dict, base_cfg): + """Substitute variable strings to their actual values.""" + cfg = copy.deepcopy(cfg) + + if isinstance(cfg, dict): + for k, v in cfg.items(): + if isinstance(v, str) and v in base_var_dict: + new_v = base_cfg + for new_k in base_var_dict[v].split("."): + new_v = new_v[new_k] + cfg[k] = new_v + elif isinstance(v, (list, tuple, dict)): + cfg[k] = Config._substitute_base_vars(v, base_var_dict, base_cfg) + elif isinstance(cfg, tuple): + cfg = tuple( + Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg + ) + elif isinstance(cfg, list): + cfg = [ + Config._substitute_base_vars(c, base_var_dict, base_cfg) for c in cfg + ] + elif isinstance(cfg, str) and cfg in base_var_dict: + new_v = base_cfg + for new_k in base_var_dict[cfg].split("."): + new_v = new_v[new_k] + cfg = new_v + + return cfg + + @staticmethod + def _file2dict(filename, use_predefined_variables=True): + filename = osp.abspath(osp.expanduser(filename)) + check_file_exist(filename) + fileExtname = osp.splitext(filename)[1] + if fileExtname not in [".py", ".json", ".yaml", ".yml"]: + raise IOError("Only py/yml/yaml/json type are supported now!") + + with tempfile.TemporaryDirectory() as temp_config_dir: + temp_config_file = tempfile.NamedTemporaryFile( + dir=temp_config_dir, suffix=fileExtname + ) + if platform.system() == "Windows": + temp_config_file.close() + temp_config_name = osp.basename(temp_config_file.name) + # Substitute predefined variables + if use_predefined_variables: + Config._substitute_predefined_vars(filename, temp_config_file.name) + else: + shutil.copyfile(filename, temp_config_file.name) + # Substitute base variables from placeholders to strings + base_var_dict = Config._pre_substitute_base_vars( + temp_config_file.name, temp_config_file.name + ) + + if filename.endswith(".py"): + temp_module_name = osp.splitext(temp_config_name)[0] + sys.path.insert(0, temp_config_dir) + Config._validate_py_syntax(filename) + mod = import_module(temp_module_name) + sys.path.pop(0) + cfg_dict = { + name: value + for name, value in mod.__dict__.items() + if not name.startswith("__") + } + # delete imported module + del sys.modules[temp_module_name] + elif filename.endswith((".yml", ".yaml", ".json")): + raise NotImplementedError + # close temp file + temp_config_file.close() + + # check deprecation information + if DEPRECATION_KEY in cfg_dict: + deprecation_info = cfg_dict.pop(DEPRECATION_KEY) + warning_msg = ( + f"The config file {filename} will be deprecated " "in the future." + ) + if "expected" in deprecation_info: + warning_msg += f' Please use {deprecation_info["expected"]} ' "instead." + if "reference" in deprecation_info: + warning_msg += ( + " More information can be found at " + f'{deprecation_info["reference"]}' + ) + warnings.warn(warning_msg) + + cfg_text = filename + "\n" + with open(filename, "r", encoding="utf-8") as f: + # Setting encoding explicitly to resolve coding issue on windows + cfg_text += f.read() + + if BASE_KEY in cfg_dict: + cfg_dir = osp.dirname(filename) + base_filename = cfg_dict.pop(BASE_KEY) + base_filename = ( + base_filename if isinstance(base_filename, list) else [base_filename] + ) + + cfg_dict_list = list() + cfg_text_list = list() + for f in base_filename: + _cfg_dict, _cfg_text = Config._file2dict(osp.join(cfg_dir, f)) + cfg_dict_list.append(_cfg_dict) + cfg_text_list.append(_cfg_text) + + base_cfg_dict = dict() + for c in cfg_dict_list: + duplicate_keys = base_cfg_dict.keys() & c.keys() + if len(duplicate_keys) > 0: + raise KeyError( + "Duplicate key is not allowed among bases. " + f"Duplicate keys: {duplicate_keys}" + ) + base_cfg_dict.update(c) + + # Substitute base variables from strings to their actual values + cfg_dict = Config._substitute_base_vars( + cfg_dict, base_var_dict, base_cfg_dict + ) + + base_cfg_dict = Config._merge_a_into_b(cfg_dict, base_cfg_dict) + cfg_dict = base_cfg_dict + + # merge cfg_text + cfg_text_list.append(cfg_text) + cfg_text = "\n".join(cfg_text_list) + + return cfg_dict, cfg_text + + @staticmethod + def _merge_a_into_b(a, b, allow_list_keys=False): + """merge dict ``a`` into dict ``b`` (non-inplace). + + Values in ``a`` will overwrite ``b``. ``b`` is copied first to avoid + in-place modifications. + + Args: + a (dict): The source dict to be merged into ``b``. + b (dict): The origin dict to be fetch keys from ``a``. + allow_list_keys (bool): If True, int string keys (e.g. '0', '1') + are allowed in source ``a`` and will replace the element of the + corresponding index in b if b is a list. Default: False. + + Returns: + dict: The modified dict of ``b`` using ``a``. + + Examples: + # Normally merge a into b. + >>> Config._merge_a_into_b( + ... dict(obj=dict(a=2)), dict(obj=dict(a=1))) + {'obj': {'a': 2}} + + # Delete b first and merge a into b. + >>> Config._merge_a_into_b( + ... dict(obj=dict(_delete_=True, a=2)), dict(obj=dict(a=1))) + {'obj': {'a': 2}} + + # b is a list + >>> Config._merge_a_into_b( + ... {'0': dict(a=2)}, [dict(a=1), dict(b=2)], True) + [{'a': 2}, {'b': 2}] + """ + b = b.copy() + for k, v in a.items(): + if allow_list_keys and k.isdigit() and isinstance(b, list): + k = int(k) + if len(b) <= k: + raise KeyError(f"Index {k} exceeds the length of list {b}") + b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) + elif isinstance(v, dict) and k in b and not v.pop(DELETE_KEY, False): + allowed_types = (dict, list) if allow_list_keys else dict + if not isinstance(b[k], allowed_types): + raise TypeError( + f"{k}={v} in child config cannot inherit from base " + f"because {k} is a dict in the child config but is of " + f"type {type(b[k])} in base config. You may set " + f"`{DELETE_KEY}=True` to ignore the base config" + ) + b[k] = Config._merge_a_into_b(v, b[k], allow_list_keys) + else: + b[k] = v + return b + + @staticmethod + def fromfile(filename, use_predefined_variables=True, import_custom_modules=True): + cfg_dict, cfg_text = Config._file2dict(filename, use_predefined_variables) + if import_custom_modules and cfg_dict.get("custom_imports", None): + import_modules_from_strings(**cfg_dict["custom_imports"]) + return Config(cfg_dict, cfg_text=cfg_text, filename=filename) + + @staticmethod + def fromstring(cfg_str, file_format): + """Generate config from config str. + + Args: + cfg_str (str): Config str. + file_format (str): Config file format corresponding to the + config str. Only py/yml/yaml/json type are supported now! + + Returns: + obj:`Config`: Config obj. + """ + if file_format not in [".py", ".json", ".yaml", ".yml"]: + raise IOError("Only py/yml/yaml/json type are supported now!") + if file_format != ".py" and "dict(" in cfg_str: + # check if users specify a wrong suffix for python + warnings.warn('Please check "file_format", the file format may be .py') + with tempfile.NamedTemporaryFile( + "w", encoding="utf-8", suffix=file_format, delete=False + ) as temp_file: + temp_file.write(cfg_str) + # on windows, previous implementation cause error + # see PR 1077 for details + cfg = Config.fromfile(temp_file.name) + os.remove(temp_file.name) + return cfg + + @staticmethod + def auto_argparser(description=None): + """Generate argparser from config file automatically (experimental)""" + partial_parser = ArgumentParser(description=description) + partial_parser.add_argument("config", help="config file path") + cfg_file = partial_parser.parse_known_args()[0].config + cfg = Config.fromfile(cfg_file) + parser = ArgumentParser(description=description) + parser.add_argument("config", help="config file path") + add_args(parser, cfg) + return parser, cfg + + def __init__(self, cfg_dict=None, cfg_text=None, filename=None): + if cfg_dict is None: + cfg_dict = dict() + elif not isinstance(cfg_dict, dict): + raise TypeError("cfg_dict must be a dict, but " f"got {type(cfg_dict)}") + for key in cfg_dict: + if key in RESERVED_KEYS: + raise KeyError(f"{key} is reserved for config file") + + super(Config, self).__setattr__("_cfg_dict", ConfigDict(cfg_dict)) + super(Config, self).__setattr__("_filename", filename) + if cfg_text: + text = cfg_text + elif filename: + with open(filename, "r") as f: + text = f.read() + else: + text = "" + super(Config, self).__setattr__("_text", text) + + @property + def filename(self): + return self._filename + + @property + def text(self): + return self._text + + @property + def pretty_text(self): + indent = 4 + + def _indent(s_, num_spaces): + s = s_.split("\n") + if len(s) == 1: + return s_ + first = s.pop(0) + s = [(num_spaces * " ") + line for line in s] + s = "\n".join(s) + s = first + "\n" + s + return s + + def _format_basic_types(k, v, use_mapping=False): + if isinstance(v, str): + v_str = f"'{v}'" + else: + v_str = str(v) + + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f"{k_str}: {v_str}" + else: + attr_str = f"{str(k)}={v_str}" + attr_str = _indent(attr_str, indent) + + return attr_str + + def _format_list(k, v, use_mapping=False): + # check if all items in the list are dict + if all(isinstance(_, dict) for _ in v): + v_str = "[\n" + v_str += "\n".join( + f"dict({_indent(_format_dict(v_), indent)})," for v_ in v + ).rstrip(",") + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f"{k_str}: {v_str}" + else: + attr_str = f"{str(k)}={v_str}" + attr_str = _indent(attr_str, indent) + "]" + else: + attr_str = _format_basic_types(k, v, use_mapping) + return attr_str + + def _contain_invalid_identifier(dict_str): + contain_invalid_identifier = False + for key_name in dict_str: + contain_invalid_identifier |= not str(key_name).isidentifier() + return contain_invalid_identifier + + def _format_dict(input_dict, outest_level=False): + r = "" + s = [] + + use_mapping = _contain_invalid_identifier(input_dict) + if use_mapping: + r += "{" + for idx, (k, v) in enumerate(input_dict.items()): + is_last = idx >= len(input_dict) - 1 + end = "" if outest_level or is_last else "," + if isinstance(v, dict): + v_str = "\n" + _format_dict(v) + if use_mapping: + k_str = f"'{k}'" if isinstance(k, str) else str(k) + attr_str = f"{k_str}: dict({v_str}" + else: + attr_str = f"{str(k)}=dict({v_str}" + attr_str = _indent(attr_str, indent) + ")" + end + elif isinstance(v, list): + attr_str = _format_list(k, v, use_mapping) + end + else: + attr_str = _format_basic_types(k, v, use_mapping) + end + + s.append(attr_str) + r += "\n".join(s) + if use_mapping: + r += "}" + return r + + cfg_dict = self._cfg_dict.to_dict() + text = _format_dict(cfg_dict, outest_level=True) + # copied from setup.cfg + yapf_style = dict( + based_on_style="pep8", + blank_line_before_nested_class_or_def=True, + split_before_expression_after_opening_paren=True, + ) + text, _ = FormatCode(text, style_config=yapf_style, verify=True) + + return text + + def __repr__(self): + return f"Config (path: {self.filename}): {self._cfg_dict.__repr__()}" + + def __len__(self): + return len(self._cfg_dict) + + def __getattr__(self, name): + return getattr(self._cfg_dict, name) + + def __getitem__(self, name): + return self._cfg_dict.__getitem__(name) + + def __setattr__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setattr__(name, value) + + def __setitem__(self, name, value): + if isinstance(value, dict): + value = ConfigDict(value) + self._cfg_dict.__setitem__(name, value) + + def __iter__(self): + return iter(self._cfg_dict) + + def __getstate__(self): + return (self._cfg_dict, self._filename, self._text) + + def __setstate__(self, state): + _cfg_dict, _filename, _text = state + super(Config, self).__setattr__("_cfg_dict", _cfg_dict) + super(Config, self).__setattr__("_filename", _filename) + super(Config, self).__setattr__("_text", _text) + + def dump(self, file=None): + cfg_dict = super(Config, self).__getattribute__("_cfg_dict").to_dict() + if self.filename.endswith(".py"): + if file is None: + return self.pretty_text + else: + with open(file, "w", encoding="utf-8") as f: + f.write(self.pretty_text) + else: + import mmcv + + if file is None: + file_format = self.filename.split(".")[-1] + return mmcv.dump(cfg_dict, file_format=file_format) + else: + mmcv.dump(cfg_dict, file) + + def merge_from_dict(self, options, allow_list_keys=True): + """Merge list into cfg_dict. + + Merge the dict parsed by MultipleKVAction into this cfg. + + Examples: + >>> options = {'models.backbone.depth': 50, + ... 'models.backbone.with_cp':True} + >>> cfg = Config(dict(models=dict(backbone=dict(type='ResNet')))) + >>> cfg.merge_from_dict(options) + >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + >>> assert cfg_dict == dict( + ... models=dict(backbone=dict(depth=50, with_cp=True))) + + # Merge list element + >>> cfg = Config(dict(pipeline=[ + ... dict(type='LoadImage'), dict(type='LoadAnnotations')])) + >>> options = dict(pipeline={'0': dict(type='SelfLoadImage')}) + >>> cfg.merge_from_dict(options, allow_list_keys=True) + >>> cfg_dict = super(Config, self).__getattribute__('_cfg_dict') + >>> assert cfg_dict == dict(pipeline=[ + ... dict(type='SelfLoadImage'), dict(type='LoadAnnotations')]) + + Args: + options (dict): dict of configs to merge from. + allow_list_keys (bool): If True, int string keys (e.g. '0', '1') + are allowed in ``options`` and will replace the element of the + corresponding index in the config if the config is a list. + Default: True. + """ + option_cfg_dict = {} + for full_key, v in options.items(): + d = option_cfg_dict + key_list = full_key.split(".") + for subkey in key_list[:-1]: + d.setdefault(subkey, ConfigDict()) + d = d[subkey] + subkey = key_list[-1] + d[subkey] = v + + cfg_dict = super(Config, self).__getattribute__("_cfg_dict") + super(Config, self).__setattr__( + "_cfg_dict", + Config._merge_a_into_b( + option_cfg_dict, cfg_dict, allow_list_keys=allow_list_keys + ), + ) + + +class DictAction(Action): + """ + argparse action to split an argument into KEY=VALUE form + on the first = and append to a dictionary. List options can + be passed as comma separated values, i.e 'KEY=V1,V2,V3', or with explicit + brackets, i.e. 'KEY=[V1,V2,V3]'. It also support nested brackets to build + list/tuple values. e.g. 'KEY=[(V1,V2),(V3,V4)]' + """ + + @staticmethod + def _parse_int_float_bool(val): + try: + return int(val) + except ValueError: + pass + try: + return float(val) + except ValueError: + pass + if val.lower() in ["true", "false"]: + return True if val.lower() == "true" else False + return val + + @staticmethod + def _parse_iterable(val): + """Parse iterable values in the string. + + All elements inside '()' or '[]' are treated as iterable values. + + Args: + val (str): Value string. + + Returns: + list | tuple: The expanded list or tuple from the string. + + Examples: + >>> DictAction._parse_iterable('1,2,3') + [1, 2, 3] + >>> DictAction._parse_iterable('[a, b, c]') + ['a', 'b', 'c'] + >>> DictAction._parse_iterable('[(1, 2, 3), [a, b], c]') + [(1, 2, 3), ['a', 'b'], 'c'] + """ + + def find_next_comma(string): + """Find the position of next comma in the string. + + If no ',' is found in the string, return the string length. All + chars inside '()' and '[]' are treated as one element and thus ',' + inside these brackets are ignored. + """ + assert (string.count("(") == string.count(")")) and ( + string.count("[") == string.count("]") + ), f"Imbalanced brackets exist in {string}" + end = len(string) + for idx, char in enumerate(string): + pre = string[:idx] + # The string before this ',' is balanced + if ( + (char == ",") + and (pre.count("(") == pre.count(")")) + and (pre.count("[") == pre.count("]")) + ): + end = idx + break + return end + + # Strip ' and " characters and replace whitespace. + val = val.strip("'\"").replace(" ", "") + is_tuple = False + if val.startswith("(") and val.endswith(")"): + is_tuple = True + val = val[1:-1] + elif val.startswith("[") and val.endswith("]"): + val = val[1:-1] + elif "," not in val: + # val is a single value + return DictAction._parse_int_float_bool(val) + + values = [] + while len(val) > 0: + comma_idx = find_next_comma(val) + element = DictAction._parse_iterable(val[:comma_idx]) + values.append(element) + val = val[comma_idx + 1 :] + if is_tuple: + values = tuple(values) + return values + + def __call__(self, parser, namespace, values, option_string=None): + options = {} + for kv in values: + key, val = kv.split("=", maxsplit=1) + options[key] = self._parse_iterable(val) + setattr(namespace, self.dest, options) diff --git a/Pointcept/pointcept/utils/env.py b/Pointcept/pointcept/utils/env.py new file mode 100644 index 0000000000000000000000000000000000000000..653f007dde5c4a7564e732da88dd47e7d37adf97 --- /dev/null +++ b/Pointcept/pointcept/utils/env.py @@ -0,0 +1,36 @@ +""" +Environment Utils + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import random +import numpy as np +import torch +import torch.backends.cudnn as cudnn + +from datetime import datetime + + +def get_random_seed(): + seed = ( + os.getpid() + + int(datetime.now().strftime("%S%f")) + + int.from_bytes(os.urandom(2), "big") + ) + return seed + + +def set_seed(seed=None): + if seed is None: + seed = get_random_seed() + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + cudnn.benchmark = False + cudnn.deterministic = True + os.environ["PYTHONHASHSEED"] = str(seed) diff --git a/Pointcept/pointcept/utils/events.py b/Pointcept/pointcept/utils/events.py new file mode 100644 index 0000000000000000000000000000000000000000..718ee9191e5cce9343383baa1aae99f22c3d0734 --- /dev/null +++ b/Pointcept/pointcept/utils/events.py @@ -0,0 +1,612 @@ +""" +Events Utils + +Modified from Detectron2 + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import datetime +import json +import logging +import os +import time +import torch +import numpy as np +import traceback +import sys + +from typing import List, Optional, Tuple +from collections import defaultdict +from contextlib import contextmanager + +__all__ = [ + "get_event_storage", + "JSONWriter", + "TensorboardXWriter", + "CommonMetricPrinter", + "EventStorage", + "ExceptionWriter", +] + +_CURRENT_STORAGE_STACK = [] + + +def get_event_storage(): + """ + Returns: + The :class:`EventStorage` object that's currently being used. + Throws an error if no :class:`EventStorage` is currently enabled. + """ + assert len( + _CURRENT_STORAGE_STACK + ), "get_event_storage() has to be called inside a 'with EventStorage(...)' context!" + return _CURRENT_STORAGE_STACK[-1] + + +class EventWriter: + """ + Base class for writers that obtain events from :class:`EventStorage` and process them. + """ + + def write(self): + raise NotImplementedError + + def close(self): + pass + + +class JSONWriter(EventWriter): + """ + Write scalars to a json file. + It saves scalars as one json per line (instead of a big json) for easy parsing. + Examples parsing such a json file: + :: + $ cat metrics.json | jq -s '.[0:2]' + [ + { + "data_time": 0.008433341979980469, + "iteration": 19, + "loss": 1.9228371381759644, + "loss_box_reg": 0.050025828182697296, + "loss_classifier": 0.5316952466964722, + "loss_mask": 0.7236229181289673, + "loss_rpn_box": 0.0856662318110466, + "loss_rpn_cls": 0.48198649287223816, + "lr": 0.007173333333333333, + "time": 0.25401854515075684 + }, + { + "data_time": 0.007216215133666992, + "iteration": 39, + "loss": 1.282649278640747, + "loss_box_reg": 0.06222952902317047, + "loss_classifier": 0.30682939291000366, + "loss_mask": 0.6970193982124329, + "loss_rpn_box": 0.038663312792778015, + "loss_rpn_cls": 0.1471673548221588, + "lr": 0.007706666666666667, + "time": 0.2490077018737793 + } + ] + $ cat metrics.json | jq '.loss_mask' + 0.7126231789588928 + 0.689423680305481 + 0.6776131987571716 + ... + """ + + def __init__(self, json_file, window_size=20): + """ + Args: + json_file (str): path to the json file. New data will be appended if the file exists. + window_size (int): the window size of median smoothing for the scalars whose + `smoothing_hint` are True. + """ + self._file_handle = open(json_file, "a") + self._window_size = window_size + self._last_write = -1 + + def write(self): + storage = get_event_storage() + to_save = defaultdict(dict) + + for k, (v, iter) in storage.latest_with_smoothing_hint( + self._window_size + ).items(): + # keep scalars that have not been written + if iter <= self._last_write: + continue + to_save[iter][k] = v + if len(to_save): + all_iters = sorted(to_save.keys()) + self._last_write = max(all_iters) + + for itr, scalars_per_iter in to_save.items(): + scalars_per_iter["iteration"] = itr + self._file_handle.write(json.dumps(scalars_per_iter, sort_keys=True) + "\n") + self._file_handle.flush() + try: + os.fsync(self._file_handle.fileno()) + except AttributeError: + pass + + def close(self): + self._file_handle.close() + + +class TensorboardXWriter(EventWriter): + """ + Write all scalars to a tensorboard file. + """ + + def __init__(self, log_dir: str, window_size: int = 20, **kwargs): + """ + Args: + log_dir (str): the directory to save the output events + window_size (int): the scalars will be median-smoothed by this window size + kwargs: other arguments passed to `torch.utils.tensorboard.SummaryWriter(...)` + """ + self._window_size = window_size + from torch.utils.tensorboard import SummaryWriter + + self._writer = SummaryWriter(log_dir, **kwargs) + self._last_write = -1 + + def write(self): + storage = get_event_storage() + new_last_write = self._last_write + for k, (v, iter) in storage.latest_with_smoothing_hint( + self._window_size + ).items(): + if iter > self._last_write: + self._writer.add_scalar(k, v, iter) + new_last_write = max(new_last_write, iter) + self._last_write = new_last_write + + # storage.put_{image,histogram} is only meant to be used by + # tensorboard writer. So we access its internal fields directly from here. + if len(storage._vis_data) >= 1: + for img_name, img, step_num in storage._vis_data: + self._writer.add_image(img_name, img, step_num) + # Storage stores all image data and rely on this writer to clear them. + # As a result it assumes only one writer will use its image data. + # An alternative design is to let storage store limited recent + # data (e.g. only the most recent image) that all writers can access. + # In that case a writer may not see all image data if its period is long. + storage.clear_images() + + if len(storage._histograms) >= 1: + for params in storage._histograms: + self._writer.add_histogram_raw(**params) + storage.clear_histograms() + + def close(self): + if hasattr(self, "_writer"): # doesn't exist when the code fails at import + self._writer.close() + + +class CommonMetricPrinter(EventWriter): + """ + Print **common** metrics to the terminal, including + iteration time, ETA, memory, all losses, and the learning rate. + It also applies smoothing using a window of 20 elements. + It's meant to print common metrics in common ways. + To print something in more customized ways, please implement a similar printer by yourself. + """ + + def __init__(self, max_iter: Optional[int] = None, window_size: int = 20): + """ + Args: + max_iter: the maximum number of iterations to train. + Used to compute ETA. If not given, ETA will not be printed. + window_size (int): the losses will be median-smoothed by this window size + """ + self.logger = logging.getLogger(__name__) + self._max_iter = max_iter + self._window_size = window_size + self._last_write = ( + None # (step, time) of last call to write(). Used to compute ETA + ) + + def _get_eta(self, storage) -> Optional[str]: + if self._max_iter is None: + return "" + iteration = storage.iter + try: + eta_seconds = storage.history("time").median(1000) * ( + self._max_iter - iteration - 1 + ) + storage.put_scalar("eta_seconds", eta_seconds, smoothing_hint=False) + return str(datetime.timedelta(seconds=int(eta_seconds))) + except KeyError: + # estimate eta on our own - more noisy + eta_string = None + if self._last_write is not None: + estimate_iter_time = (time.perf_counter() - self._last_write[1]) / ( + iteration - self._last_write[0] + ) + eta_seconds = estimate_iter_time * (self._max_iter - iteration - 1) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + self._last_write = (iteration, time.perf_counter()) + return eta_string + + def write(self): + storage = get_event_storage() + iteration = storage.iter + if iteration == self._max_iter: + # This hook only reports training progress (loss, ETA, etc) but not other data, + # therefore do not write anything after training succeeds, even if this method + # is called. + return + + try: + data_time = storage.history("data_time").avg(20) + except KeyError: + # they may not exist in the first few iterations (due to warmup) + # or when SimpleTrainer is not used + data_time = None + try: + iter_time = storage.history("time").global_avg() + except KeyError: + iter_time = None + try: + lr = "{:.5g}".format(storage.history("lr").latest()) + except KeyError: + lr = "N/A" + + eta_string = self._get_eta(storage) + + if torch.cuda.is_available(): + max_mem_mb = torch.cuda.max_memory_allocated() / 1024.0 / 1024.0 + else: + max_mem_mb = None + + # NOTE: max_mem is parsed by grep in "dev/parse_results.sh" + self.logger.info( + " {eta}iter: {iter} {losses} {time}{data_time}lr: {lr} {memory}".format( + eta=f"eta: {eta_string} " if eta_string else "", + iter=iteration, + losses=" ".join( + [ + "{}: {:.4g}".format(k, v.median(self._window_size)) + for k, v in storage.histories().items() + if "loss" in k + ] + ), + time=( + "time: {:.4f} ".format(iter_time) if iter_time is not None else "" + ), + data_time=( + "data_time: {:.4f} ".format(data_time) + if data_time is not None + else "" + ), + lr=lr, + memory=( + "max_mem: {:.0f}M".format(max_mem_mb) + if max_mem_mb is not None + else "" + ), + ) + ) + + +class EventStorage: + """ + The user-facing class that provides metric storage functionalities. + In the future we may add support for storing / logging other types of data if needed. + """ + + def __init__(self, start_iter=0): + """ + Args: + start_iter (int): the iteration number to start with + """ + self._history = defaultdict(AverageMeter) + self._smoothing_hints = {} + self._latest_scalars = {} + self._iter = start_iter + self._current_prefix = "" + self._vis_data = [] + self._histograms = [] + + # def put_image(self, img_name, img_tensor): + # """ + # Add an `img_tensor` associated with `img_name`, to be shown on + # tensorboard. + # Args: + # img_name (str): The name of the image to put into tensorboard. + # img_tensor (torch.Tensor or numpy.array): An `uint8` or `float` + # Tensor of shape `[channel, height, width]` where `channel` is + # 3. The image format should be RGB. The elements in img_tensor + # can either have values in [0, 1] (float32) or [0, 255] (uint8). + # The `img_tensor` will be visualized in tensorboard. + # """ + # self._vis_data.append((img_name, img_tensor, self._iter)) + + def put_scalar(self, name, value, n=1, smoothing_hint=False): + """ + Add a scalar `value` to the `HistoryBuffer` associated with `name`. + Args: + smoothing_hint (bool): a 'hint' on whether this scalar is noisy and should be + smoothed when logged. The hint will be accessible through + :meth:`EventStorage.smoothing_hints`. A writer may ignore the hint + and apply custom smoothing rule. + It defaults to True because most scalars we save need to be smoothed to + provide any useful signal. + """ + name = self._current_prefix + name + history = self._history[name] + history.update(value, n) + self._latest_scalars[name] = (value, self._iter) + + existing_hint = self._smoothing_hints.get(name) + if existing_hint is not None: + assert ( + existing_hint == smoothing_hint + ), "Scalar {} was put with a different smoothing_hint!".format(name) + else: + self._smoothing_hints[name] = smoothing_hint + + # def put_scalars(self, *, smoothing_hint=True, **kwargs): + # """ + # Put multiple scalars from keyword arguments. + # Examples: + # storage.put_scalars(loss=my_loss, accuracy=my_accuracy, smoothing_hint=True) + # """ + # for k, v in kwargs.items(): + # self.put_scalar(k, v, smoothing_hint=smoothing_hint) + # + # def put_histogram(self, hist_name, hist_tensor, bins=1000): + # """ + # Create a histogram from a tensor. + # Args: + # hist_name (str): The name of the histogram to put into tensorboard. + # hist_tensor (torch.Tensor): A Tensor of arbitrary shape to be converted + # into a histogram. + # bins (int): Number of histogram bins. + # """ + # ht_min, ht_max = hist_tensor.min().item(), hist_tensor.max().item() + # + # # Create a histogram with PyTorch + # hist_counts = torch.histc(hist_tensor, bins=bins) + # hist_edges = torch.linspace(start=ht_min, end=ht_max, steps=bins + 1, dtype=torch.float32) + # + # # Parameter for the add_histogram_raw function of SummaryWriter + # hist_params = dict( + # tag=hist_name, + # min=ht_min, + # max=ht_max, + # num=len(hist_tensor), + # sum=float(hist_tensor.sum()), + # sum_squares=float(torch.sum(hist_tensor**2)), + # bucket_limits=hist_edges[1:].tolist(), + # bucket_counts=hist_counts.tolist(), + # global_step=self._iter, + # ) + # self._histograms.append(hist_params) + + def history(self, name): + """ + Returns: + AverageMeter: the history for name + """ + ret = self._history.get(name, None) + if ret is None: + raise KeyError("No history metric available for {}!".format(name)) + return ret + + def histories(self): + """ + Returns: + dict[name -> HistoryBuffer]: the HistoryBuffer for all scalars + """ + return self._history + + def latest(self): + """ + Returns: + dict[str -> (float, int)]: mapping from the name of each scalar to the most + recent value and the iteration number its added. + """ + return self._latest_scalars + + def latest_with_smoothing_hint(self, window_size=20): + """ + Similar to :meth:`latest`, but the returned values + are either the un-smoothed original latest value, + or a median of the given window_size, + depend on whether the smoothing_hint is True. + This provides a default behavior that other writers can use. + """ + result = {} + for k, (v, itr) in self._latest_scalars.items(): + result[k] = ( + self._history[k].median(window_size) if self._smoothing_hints[k] else v, + itr, + ) + return result + + def smoothing_hints(self): + """ + Returns: + dict[name -> bool]: the user-provided hint on whether the scalar + is noisy and needs smoothing. + """ + return self._smoothing_hints + + def step(self): + """ + User should either: (1) Call this function to increment storage.iter when needed. Or + (2) Set `storage.iter` to the correct iteration number before each iteration. + The storage will then be able to associate the new data with an iteration number. + """ + self._iter += 1 + + @property + def iter(self): + """ + Returns: + int: The current iteration number. When used together with a trainer, + this is ensured to be the same as trainer.iter. + """ + return self._iter + + @iter.setter + def iter(self, val): + self._iter = int(val) + + @property + def iteration(self): + # for backward compatibility + return self._iter + + def __enter__(self): + _CURRENT_STORAGE_STACK.append(self) + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + assert _CURRENT_STORAGE_STACK[-1] == self + _CURRENT_STORAGE_STACK.pop() + + @contextmanager + def name_scope(self, name): + """ + Yields: + A context within which all the events added to this storage + will be prefixed by the name scope. + """ + old_prefix = self._current_prefix + self._current_prefix = name.rstrip("/") + "/" + yield + self._current_prefix = old_prefix + + def clear_images(self): + """ + Delete all the stored images for visualization. This should be called + after images are written to tensorboard. + """ + self._vis_data = [] + + def clear_histograms(self): + """ + Delete all the stored histograms for visualization. + This should be called after histograms are written to tensorboard. + """ + self._histograms = [] + + def reset_history(self, name): + ret = self._history.get(name, None) + if ret is None: + raise KeyError("No history metric available for {}!".format(name)) + ret.reset() + + def reset_histories(self): + for name in self._history.keys(): + self._history[name].reset() + + +class AverageMeter: + """Computes and stores the average and current value""" + + def __init__(self): + self.val = 0 + self.avg = 0 + self.total = 0 + self.count = 0 + + def reset(self): + self.val = 0 + self.avg = 0 + self.total = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.total += val * n + self.count += n + self.avg = self.total / self.count + + +class HistoryBuffer: + """ + Track a series of scalar values and provide access to smoothed values over a + window or the global average of the series. + """ + + def __init__(self, max_length: int = 1000000) -> None: + """ + Args: + max_length: maximal number of values that can be stored in the + buffer. When the capacity of the buffer is exhausted, old + values will be removed. + """ + self._max_length: int = max_length + self._data: List[Tuple[float, float]] = [] # (value, iteration) pairs + self._count: int = 0 + self._global_avg: float = 0 + + def update(self, value: float, iteration: Optional[float] = None) -> None: + """ + Add a new scalar value produced at certain iteration. If the length + of the buffer exceeds self._max_length, the oldest element will be + removed from the buffer. + """ + if iteration is None: + iteration = self._count + if len(self._data) == self._max_length: + self._data.pop(0) + self._data.append((value, iteration)) + + self._count += 1 + self._global_avg += (value - self._global_avg) / self._count + + def latest(self) -> float: + """ + Return the latest scalar value added to the buffer. + """ + return self._data[-1][0] + + def median(self, window_size: int) -> float: + """ + Return the median of the latest `window_size` values in the buffer. + """ + return np.median([x[0] for x in self._data[-window_size:]]) + + def avg(self, window_size: int) -> float: + """ + Return the mean of the latest `window_size` values in the buffer. + """ + return np.mean([x[0] for x in self._data[-window_size:]]) + + def global_avg(self) -> float: + """ + Return the mean of all the elements in the buffer. Note that this + includes those getting removed due to limited buffer storage. + """ + return self._global_avg + + def values(self) -> List[Tuple[float, float]]: + """ + Returns: + list[(number, iteration)]: content of the current buffer. + """ + return self._data + + +class ExceptionWriter: + + def __init__(self): + self.logger = logging.getLogger(__name__) + + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_val, exc_tb): + if exc_type: + tb = traceback.format_exception(exc_type, exc_val, exc_tb) + formatted_tb_str = "".join(tb) + self.logger.error(formatted_tb_str) + sys.exit(1) # This prevents double logging the error to the console diff --git a/Pointcept/pointcept/utils/logger.py b/Pointcept/pointcept/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..ddaf2c5a765c9f1325737c3cbc73e1169f13cdd4 --- /dev/null +++ b/Pointcept/pointcept/utils/logger.py @@ -0,0 +1,172 @@ +""" +Logger Utils + +Modified from mmcv + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import logging +import torch +import torch.distributed as dist + +from termcolor import colored + +logger_initialized = {} +root_status = 0 + + +class _ColorfulFormatter(logging.Formatter): + def __init__(self, *args, **kwargs): + self._root_name = kwargs.pop("root_name") + "." + super(_ColorfulFormatter, self).__init__(*args, **kwargs) + + def formatMessage(self, record): + log = super(_ColorfulFormatter, self).formatMessage(record) + if record.levelno == logging.WARNING: + prefix = colored("WARNING", "red", attrs=["blink"]) + elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL: + prefix = colored("ERROR", "red", attrs=["blink", "underline"]) + else: + return log + return prefix + " " + log + + +def get_logger(name, log_file=None, log_level=logging.INFO, file_mode="a", color=False): + """Initialize and get a logger by name. + + If the logger has not been initialized, this method will initialize the + logger by adding one or two handlers, otherwise the initialized logger will + be directly returned. During initialization, a StreamHandler will always be + added. If `log_file` is specified and the process rank is 0, a FileHandler + will also be added. + + Args: + name (str): Logger name. + log_file (str | None): The log filename. If specified, a FileHandler + will be added to the logger. + log_level (int): The logger level. Note that only the process of + rank 0 is affected, and other processes will set the level to + "Error" thus be silent most of the time. + file_mode (str): The file mode used in opening log file. + Defaults to 'a'. + color (bool): Colorful log output. Defaults to True + + Returns: + logging.Logger: The expected logger. + """ + logger = logging.getLogger(name) + + if name in logger_initialized: + return logger + # handle hierarchical names + # e.g., logger "a" is initialized, then logger "a.b" will skip the + # initialization since it is a child of "a". + for logger_name in logger_initialized: + if name.startswith(logger_name): + return logger + + logger.propagate = False + + stream_handler = logging.StreamHandler() + handlers = [stream_handler] + + if dist.is_available() and dist.is_initialized(): + rank = dist.get_rank() + else: + rank = 0 + + # only rank 0 will add a FileHandler + if rank == 0 and log_file is not None: + # Here, the default behaviour of the official logger is 'a'. Thus, we + # provide an interface to change the file mode to the default + # behaviour. + file_handler = logging.FileHandler(log_file, file_mode) + handlers.append(file_handler) + + plain_formatter = logging.Formatter( + "[%(asctime)s %(levelname)s %(filename)s line %(lineno)d %(process)d] %(message)s" + ) + if color: + formatter = _ColorfulFormatter( + colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s", + datefmt="%m/%d %H:%M:%S", + root_name=name, + ) + else: + formatter = plain_formatter + for handler in handlers: + handler.setFormatter(formatter) + handler.setLevel(log_level) + logger.addHandler(handler) + + if rank == 0: + logger.setLevel(log_level) + else: + logger.setLevel(logging.ERROR) + + logger_initialized[name] = True + + return logger + + +def print_log(msg, logger=None, level=logging.INFO): + """Print a log message. + + Args: + msg (str): The message to be logged. + logger (logging.Logger | str | None): The logger to be used. + Some special loggers are: + - "silent": no message will be printed. + - other str: the logger obtained with `get_root_logger(logger)`. + - None: The `print()` method will be used to print log messages. + level (int): Logging level. Only available when `logger` is a Logger + object or "root". + """ + if logger is None: + print(msg) + elif isinstance(logger, logging.Logger): + logger.log(level, msg) + elif logger == "silent": + pass + elif isinstance(logger, str): + _logger = get_logger(logger) + _logger.log(level, msg) + else: + raise TypeError( + "logger should be either a logging.Logger object, str, " + f'"silent" or None, but got {type(logger)}' + ) + + +def get_root_logger(log_file=None, log_level=logging.INFO, file_mode="a"): + """Get the root logger. + + The logger will be initialized if it has not been initialized. By default a + StreamHandler will be added. If `log_file` is specified, a FileHandler will + also be added. The name of the root logger is the top-level package name. + + Args: + log_file (str | None): The log filename. If specified, a FileHandler + will be added to the root logger. + log_level (int): The root logger level. Note that only the process of + rank 0 is affected, while other processes will set the level to + "Error" and be silent most of the time. + file_mode (str): File Mode of logger. (w or a) + + Returns: + logging.Logger: The root logger. + """ + logger = get_logger( + name="pointcept", log_file=log_file, log_level=log_level, file_mode=file_mode + ) + return logger + + +def _log_api_usage(identifier: str): + """ + Internal function used to log the usage of different detectron2 components + inside facebook's infra. + """ + torch._C._log_api_usage_once("pointcept." + identifier) diff --git a/Pointcept/pointcept/utils/misc.py b/Pointcept/pointcept/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..3177bae3882ccad347002165d2b34d5dc2540359 --- /dev/null +++ b/Pointcept/pointcept/utils/misc.py @@ -0,0 +1,164 @@ +""" +Misc + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import warnings +from collections import abc +import numpy as np +import torch +from importlib import import_module + + +class AverageMeter(object): + """Computes and stores the average and current value""" + + def __init__(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + + +def intersection_and_union(output, target, K, ignore_index=-1): + # 'K' classes, output and target sizes are N or N * L or N * H * W, each value in range 0 to K - 1. + assert output.ndim in [1, 2, 3] + assert output.shape == target.shape + output = output.reshape(output.size).copy() + target = target.reshape(target.size) + output[np.where(target == ignore_index)[0]] = ignore_index + intersection = output[np.where(output == target)[0]] + area_intersection, _ = np.histogram(intersection, bins=np.arange(K + 1)) + area_output, _ = np.histogram(output, bins=np.arange(K + 1)) + area_target, _ = np.histogram(target, bins=np.arange(K + 1)) + area_union = area_output + area_target - area_intersection + return area_intersection, area_union, area_target + + +def intersection_and_union_gpu(output, target, k, ignore_index=-1): + # 'K' classes, output and target sizes are N or N * L or N * H * W, each value in range 0 to K - 1. + assert output.dim() in [1, 2, 3] + assert output.shape == target.shape + output = output.view(-1) + target = target.view(-1) + output[target == ignore_index] = ignore_index + intersection = output[output == target] + area_intersection = torch.histc(intersection, bins=k, min=0, max=k - 1) + area_output = torch.histc(output, bins=k, min=0, max=k - 1) + area_target = torch.histc(target, bins=k, min=0, max=k - 1) + area_union = area_output + area_target - area_intersection + return area_intersection, area_union, area_target + + +def make_dirs(dir_name): + if not os.path.exists(dir_name): + os.makedirs(dir_name, exist_ok=True) + + +def find_free_port(): + import socket + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + # Binding to port 0 will cause the OS to find an available port for us + sock.bind(("", 0)) + port = sock.getsockname()[1] + sock.close() + # NOTE: there is still a chance the port could be taken by other processes. + return port + + +def is_seq_of(seq, expected_type, seq_type=None): + """Check whether it is a sequence of some type. + + Args: + seq (Sequence): The sequence to be checked. + expected_type (type): Expected type of sequence items. + seq_type (type, optional): Expected sequence type. + + Returns: + bool: Whether the sequence is valid. + """ + if seq_type is None: + exp_seq_type = abc.Sequence + else: + assert isinstance(seq_type, type) + exp_seq_type = seq_type + if not isinstance(seq, exp_seq_type): + return False + for item in seq: + if not isinstance(item, expected_type): + return False + return True + + +def is_str(x): + """Whether the input is an string instance. + + Note: This method is deprecated since python 2 is no longer supported. + """ + return isinstance(x, str) + + +def import_modules_from_strings(imports, allow_failed_imports=False): + """Import modules from the given list of strings. + + Args: + imports (list | str | None): The given module names to be imported. + allow_failed_imports (bool): If True, the failed imports will return + None. Otherwise, an ImportError is raise. Default: False. + + Returns: + list[module] | module | None: The imported modules. + + Examples: + >>> osp, sys = import_modules_from_strings( + ... ['os.path', 'sys']) + >>> import os.path as osp_ + >>> import sys as sys_ + >>> assert osp == osp_ + >>> assert sys == sys_ + """ + if not imports: + return + single_import = False + if isinstance(imports, str): + single_import = True + imports = [imports] + if not isinstance(imports, list): + raise TypeError(f"custom_imports must be a list but got type {type(imports)}") + imported = [] + for imp in imports: + if not isinstance(imp, str): + raise TypeError(f"{imp} is of type {type(imp)} and cannot be imported.") + try: + imported_tmp = import_module(imp) + except ImportError: + if allow_failed_imports: + warnings.warn(f"{imp} failed to import and is ignored.", UserWarning) + imported_tmp = None + else: + raise ImportError + imported.append(imported_tmp) + if single_import: + imported = imported[0] + return imported + + +class DummyClass: + def __init__(self): + pass diff --git a/Pointcept/pointcept/utils/optimizer.py b/Pointcept/pointcept/utils/optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..355ec8916ad041ca02b404029983b0f59933fb8c --- /dev/null +++ b/Pointcept/pointcept/utils/optimizer.py @@ -0,0 +1,55 @@ +""" +Optimizer + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch +from pointcept.utils.logger import get_root_logger +from pointcept.utils.registry import Registry + +OPTIMIZERS = Registry("optimizers") + + +OPTIMIZERS.register_module(module=torch.optim.SGD, name="SGD") +OPTIMIZERS.register_module(module=torch.optim.Adam, name="Adam") +OPTIMIZERS.register_module(module=torch.optim.AdamW, name="AdamW") + + +def build_optimizer(cfg, model, param_dicts=None): + if param_dicts is None: + cfg.params = model.parameters() + else: + cfg.params = [dict(names=[], params=[], lr=cfg.lr)] + for i in range(len(param_dicts)): + param_group = dict(names=[], params=[]) + if "lr" in param_dicts[i].keys(): + param_group["lr"] = param_dicts[i].lr + if "momentum" in param_dicts[i].keys(): + param_group["momentum"] = param_dicts[i].momentum + if "weight_decay" in param_dicts[i].keys(): + param_group["weight_decay"] = param_dicts[i].weight_decay + cfg.params.append(param_group) + + for n, p in model.named_parameters(): + flag = False + for i in range(len(param_dicts)): + if param_dicts[i].keyword in n: + cfg.params[i + 1]["names"].append(n) + cfg.params[i + 1]["params"].append(p) + flag = True + break + if not flag: + cfg.params[0]["names"].append(n) + cfg.params[0]["params"].append(p) + + logger = get_root_logger() + for i in range(len(cfg.params)): + param_names = cfg.params[i].pop("names") + message = "" + for key in cfg.params[i].keys(): + if key != "params": + message += f" {key}: {cfg.params[i][key]};" + logger.info(f"Params Group {i+1} -{message} Params: {param_names}.") + return OPTIMIZERS.build(cfg=cfg) diff --git a/Pointcept/pointcept/utils/path.py b/Pointcept/pointcept/utils/path.py new file mode 100644 index 0000000000000000000000000000000000000000..ce98fa5fd0dfbf6e1d61e833ecc35fea4ab2782b --- /dev/null +++ b/Pointcept/pointcept/utils/path.py @@ -0,0 +1,103 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import os.path as osp +from pathlib import Path + +from .misc import is_str + + +def is_filepath(x): + return is_str(x) or isinstance(x, Path) + + +def fopen(filepath, *args, **kwargs): + if is_str(filepath): + return open(filepath, *args, **kwargs) + elif isinstance(filepath, Path): + return filepath.open(*args, **kwargs) + raise ValueError("`filepath` should be a string or a Path") + + +def check_file_exist(filename, msg_tmpl='file "{}" does not exist'): + if not osp.isfile(filename): + raise FileNotFoundError(msg_tmpl.format(filename)) + + +def mkdir_or_exist(dir_name, mode=0o777): + if dir_name == "": + return + dir_name = osp.expanduser(dir_name) + os.makedirs(dir_name, mode=mode, exist_ok=True) + + +def symlink(src, dst, overwrite=True, **kwargs): + if os.path.lexists(dst) and overwrite: + os.remove(dst) + os.symlink(src, dst, **kwargs) + + +def scandir(dir_path, suffix=None, recursive=False, case_sensitive=True): + """Scan a directory to find the interested files. + + Args: + dir_path (str | obj:`Path`): Path of the directory. + suffix (str | tuple(str), optional): File suffix that we are + interested in. Default: None. + recursive (bool, optional): If set to True, recursively scan the + directory. Default: False. + case_sensitive (bool, optional) : If set to False, ignore the case of + suffix. Default: True. + + Returns: + A generator for all the interested files with relative paths. + """ + if isinstance(dir_path, (str, Path)): + dir_path = str(dir_path) + else: + raise TypeError('"dir_path" must be a string or Path object') + + if (suffix is not None) and not isinstance(suffix, (str, tuple)): + raise TypeError('"suffix" must be a string or tuple of strings') + + if suffix is not None and not case_sensitive: + suffix = ( + suffix.lower() + if isinstance(suffix, str) + else tuple(item.lower() for item in suffix) + ) + + root = dir_path + + def _scandir(dir_path, suffix, recursive, case_sensitive): + for entry in os.scandir(dir_path): + if not entry.name.startswith(".") and entry.is_file(): + rel_path = osp.relpath(entry.path, root) + _rel_path = rel_path if case_sensitive else rel_path.lower() + if suffix is None or _rel_path.endswith(suffix): + yield rel_path + elif recursive and os.path.isdir(entry.path): + # scan recursively if entry.path is a directory + yield from _scandir(entry.path, suffix, recursive, case_sensitive) + + return _scandir(dir_path, suffix, recursive, case_sensitive) + + +def find_vcs_root(path, markers=(".git",)): + """Finds the root directory (including itself) of specified markers. + + Args: + path (str): Path of directory or file. + markers (list[str], optional): List of file or directory names. + + Returns: + The directory contained one of the markers or None if not found. + """ + if osp.isfile(path): + path = osp.dirname(path) + + prev, cur = None, osp.abspath(osp.expanduser(path)) + while cur != prev: + if any(osp.exists(osp.join(cur, marker)) for marker in markers): + return cur + prev, cur = cur, osp.split(cur)[0] + return None diff --git a/Pointcept/pointcept/utils/registry.py b/Pointcept/pointcept/utils/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..7ac308a87d38ff61da14d6b4d5c73b4c68c15a58 --- /dev/null +++ b/Pointcept/pointcept/utils/registry.py @@ -0,0 +1,316 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import inspect +import warnings +from functools import partial + +from .misc import is_seq_of + + +def build_from_cfg(cfg, registry, default_args=None): + """Build a module from configs dict. + + Args: + cfg (dict): Config dict. It should at least contain the key "type". + registry (:obj:`Registry`): The registry to search the type from. + default_args (dict, optional): Default initialization arguments. + + Returns: + object: The constructed object. + """ + if not isinstance(cfg, dict): + raise TypeError(f"cfg must be a dict, but got {type(cfg)}") + if "type" not in cfg: + if default_args is None or "type" not in default_args: + raise KeyError( + '`cfg` or `default_args` must contain the key "type", ' + f"but got {cfg}\n{default_args}" + ) + if not isinstance(registry, Registry): + raise TypeError( + "registry must be an mmcv.Registry object, " f"but got {type(registry)}" + ) + if not (isinstance(default_args, dict) or default_args is None): + raise TypeError( + "default_args must be a dict or None, " f"but got {type(default_args)}" + ) + + args = cfg.copy() + + if default_args is not None: + for name, value in default_args.items(): + args.setdefault(name, value) + + obj_type = args.pop("type") + if isinstance(obj_type, str): + obj_cls = registry.get(obj_type) + if obj_cls is None: + raise KeyError(f"{obj_type} is not in the {registry.name} registry") + elif inspect.isclass(obj_type): + obj_cls = obj_type + else: + raise TypeError(f"type must be a str or valid type, but got {type(obj_type)}") + try: + return obj_cls(**args) + except Exception as e: + # Normal TypeError does not print class name. + raise type(e)(f"{obj_cls.__name__}: {e}") + + +class Registry: + """A registry to map strings to classes. + + Registered object could be built from registry. + Example: + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + >>> resnet = MODELS.build(dict(type='ResNet')) + + Please refer to + https://mmcv.readthedocs.io/en/latest/understand_mmcv/registry.html for + advanced usage. + + Args: + name (str): Registry name. + build_func(func, optional): Build function to construct instance from + Registry, func:`build_from_cfg` is used if neither ``parent`` or + ``build_func`` is specified. If ``parent`` is specified and + ``build_func`` is not given, ``build_func`` will be inherited + from ``parent``. Default: None. + parent (Registry, optional): Parent registry. The class registered in + children registry could be built from parent. Default: None. + scope (str, optional): The scope of registry. It is the key to search + for children registry. If not specified, scope will be the name of + the package where class is defined, e.g. mmdet, mmcls, mmseg. + Default: None. + """ + + def __init__(self, name, build_func=None, parent=None, scope=None): + self._name = name + self._module_dict = dict() + self._children = dict() + self._scope = self.infer_scope() if scope is None else scope + + # self.build_func will be set with the following priority: + # 1. build_func + # 2. parent.build_func + # 3. build_from_cfg + if build_func is None: + if parent is not None: + self.build_func = parent.build_func + else: + self.build_func = build_from_cfg + else: + self.build_func = build_func + if parent is not None: + assert isinstance(parent, Registry) + parent._add_children(self) + self.parent = parent + else: + self.parent = None + + def __len__(self): + return len(self._module_dict) + + def __contains__(self, key): + return self.get(key) is not None + + def __repr__(self): + format_str = ( + self.__class__.__name__ + f"(name={self._name}, " + f"items={self._module_dict})" + ) + return format_str + + @staticmethod + def infer_scope(): + """Infer the scope of registry. + + The name of the package where registry is defined will be returned. + + Example: + # in mmdet/models/backbone/resnet.py + >>> MODELS = Registry('models') + >>> @MODELS.register_module() + >>> class ResNet: + >>> pass + The scope of ``ResNet`` will be ``mmdet``. + + + Returns: + scope (str): The inferred scope name. + """ + # inspect.stack() trace where this function is called, the index-2 + # indicates the frame where `infer_scope()` is called + filename = inspect.getmodule(inspect.stack()[2][0]).__name__ + split_filename = filename.split(".") + return split_filename[0] + + @staticmethod + def split_scope_key(key): + """Split scope and key. + + The first scope will be split from key. + + Examples: + >>> Registry.split_scope_key('mmdet.ResNet') + 'mmdet', 'ResNet' + >>> Registry.split_scope_key('ResNet') + None, 'ResNet' + + Return: + scope (str, None): The first scope. + key (str): The remaining key. + """ + split_index = key.find(".") + if split_index != -1: + return key[:split_index], key[split_index + 1 :] + else: + return None, key + + @property + def name(self): + return self._name + + @property + def scope(self): + return self._scope + + @property + def module_dict(self): + return self._module_dict + + @property + def children(self): + return self._children + + def get(self, key): + """Get the registry record. + + Args: + key (str): The class name in string format. + + Returns: + class: The corresponding class. + """ + scope, real_key = self.split_scope_key(key) + if scope is None or scope == self._scope: + # get from self + if real_key in self._module_dict: + return self._module_dict[real_key] + else: + # get from self._children + if scope in self._children: + return self._children[scope].get(real_key) + else: + # goto root + parent = self.parent + while parent.parent is not None: + parent = parent.parent + return parent.get(key) + + def build(self, *args, **kwargs): + return self.build_func(*args, **kwargs, registry=self) + + def _add_children(self, registry): + """Add children for a registry. + + The ``registry`` will be added as children based on its scope. + The parent registry could build objects from children registry. + + Example: + >>> models = Registry('models') + >>> mmdet_models = Registry('models', parent=models) + >>> @mmdet_models.register_module() + >>> class ResNet: + >>> pass + >>> resnet = models.build(dict(type='mmdet.ResNet')) + """ + + assert isinstance(registry, Registry) + assert registry.scope is not None + assert ( + registry.scope not in self.children + ), f"scope {registry.scope} exists in {self.name} registry" + self.children[registry.scope] = registry + + def _register_module(self, module_class, module_name=None, force=False): + if not inspect.isclass(module_class): + raise TypeError("module must be a class, " f"but got {type(module_class)}") + + if module_name is None: + module_name = module_class.__name__ + if isinstance(module_name, str): + module_name = [module_name] + for name in module_name: + if not force and name in self._module_dict: + raise KeyError(f"{name} is already registered " f"in {self.name}") + self._module_dict[name] = module_class + + def deprecated_register_module(self, cls=None, force=False): + warnings.warn( + "The old API of register_module(module, force=False) " + "is deprecated and will be removed, please use the new API " + "register_module(name=None, force=False, module=None) instead." + ) + if cls is None: + return partial(self.deprecated_register_module, force=force) + self._register_module(cls, force=force) + return cls + + def register_module(self, name=None, force=False, module=None): + """Register a module. + + A record will be added to `self._module_dict`, whose key is the class + name or the specified name, and value is the class itself. + It can be used as a decorator or a normal function. + + Example: + >>> backbones = Registry('backbone') + >>> @backbones.register_module() + >>> class ResNet: + >>> pass + + >>> backbones = Registry('backbone') + >>> @backbones.register_module(name='mnet') + >>> class MobileNet: + >>> pass + + >>> backbones = Registry('backbone') + >>> class ResNet: + >>> pass + >>> backbones.register_module(ResNet) + + Args: + name (str | None): The module name to be registered. If not + specified, the class name will be used. + force (bool, optional): Whether to override an existing class with + the same name. Default: False. + module (type): Module class to be registered. + """ + if not isinstance(force, bool): + raise TypeError(f"force must be a boolean, but got {type(force)}") + # NOTE: This is a walkaround to be compatible with the old api, + # while it may introduce unexpected bugs. + if isinstance(name, type): + return self.deprecated_register_module(name, force=force) + + # raise the error ahead of time + if not (name is None or isinstance(name, str) or is_seq_of(name, str)): + raise TypeError( + "name must be either of None, an instance of str or a sequence" + f" of str, but got {type(name)}" + ) + + # use it as a normal method: x.register_module(module=SomeClass) + if module is not None: + self._register_module(module_class=module, module_name=name, force=force) + return module + + # use it as a decorator: @x.register_module() + def _register(cls): + self._register_module(module_class=cls, module_name=name, force=force) + return cls + + return _register diff --git a/Pointcept/pointcept/utils/scheduler.py b/Pointcept/pointcept/utils/scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..3e2e29fdde2e2668c023af36afdb89e73fb9ce53 --- /dev/null +++ b/Pointcept/pointcept/utils/scheduler.py @@ -0,0 +1,147 @@ +""" +Scheduler + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import torch.optim.lr_scheduler as lr_scheduler +from .registry import Registry + +SCHEDULERS = Registry("schedulers") + + +@SCHEDULERS.register_module() +class MultiStepLR(lr_scheduler.MultiStepLR): + def __init__( + self, + optimizer, + milestones, + total_steps, + gamma=0.1, + last_epoch=-1, + verbose=False, + ): + super().__init__( + optimizer=optimizer, + milestones=[rate * total_steps for rate in milestones], + gamma=gamma, + last_epoch=last_epoch, + verbose=verbose, + ) + + +@SCHEDULERS.register_module() +class MultiStepWithWarmupLR(lr_scheduler.LambdaLR): + def __init__( + self, + optimizer, + milestones, + total_steps, + gamma=0.1, + warmup_rate=0.05, + warmup_scale=1e-6, + last_epoch=-1, + verbose=False, + ): + milestones = [rate * total_steps for rate in milestones] + + def multi_step_with_warmup(s): + factor = 1.0 + for i in range(len(milestones)): + if s < milestones[i]: + break + factor *= gamma + + if s <= warmup_rate * total_steps: + warmup_coefficient = 1 - (1 - s / warmup_rate / total_steps) * ( + 1 - warmup_scale + ) + else: + warmup_coefficient = 1.0 + return warmup_coefficient * factor + + super().__init__( + optimizer=optimizer, + lr_lambda=multi_step_with_warmup, + last_epoch=last_epoch, + verbose=verbose, + ) + + +@SCHEDULERS.register_module() +class PolyLR(lr_scheduler.LambdaLR): + def __init__(self, optimizer, total_steps, power=0.9, last_epoch=-1, verbose=False): + super().__init__( + optimizer=optimizer, + lr_lambda=lambda s: (1 - s / (total_steps + 1)) ** power, + last_epoch=last_epoch, + verbose=verbose, + ) + + +@SCHEDULERS.register_module() +class ExpLR(lr_scheduler.LambdaLR): + def __init__(self, optimizer, total_steps, gamma=0.9, last_epoch=-1, verbose=False): + super().__init__( + optimizer=optimizer, + lr_lambda=lambda s: gamma ** (s / total_steps), + last_epoch=last_epoch, + verbose=verbose, + ) + + +@SCHEDULERS.register_module() +class CosineAnnealingLR(lr_scheduler.CosineAnnealingLR): + def __init__(self, optimizer, total_steps, eta_min=0, last_epoch=-1, verbose=False): + super().__init__( + optimizer=optimizer, + T_max=total_steps, + eta_min=eta_min, + last_epoch=last_epoch, + verbose=verbose, + ) + + +@SCHEDULERS.register_module() +class OneCycleLR(lr_scheduler.OneCycleLR): + r""" + torch.optim.lr_scheduler.OneCycleLR, Block total_steps + """ + + def __init__( + self, + optimizer, + max_lr, + total_steps=None, + pct_start=0.3, + anneal_strategy="cos", + cycle_momentum=True, + base_momentum=0.85, + max_momentum=0.95, + div_factor=25.0, + final_div_factor=1e4, + three_phase=False, + last_epoch=-1, + verbose=False, + ): + super().__init__( + optimizer=optimizer, + max_lr=max_lr, + total_steps=total_steps, + pct_start=pct_start, + anneal_strategy=anneal_strategy, + cycle_momentum=cycle_momentum, + base_momentum=base_momentum, + max_momentum=max_momentum, + div_factor=div_factor, + final_div_factor=final_div_factor, + three_phase=three_phase, + last_epoch=last_epoch, + verbose=verbose, + ) + + +def build_scheduler(cfg, optimizer): + cfg.optimizer = optimizer + return SCHEDULERS.build(cfg=cfg) diff --git a/Pointcept/pointcept/utils/timer.py b/Pointcept/pointcept/utils/timer.py new file mode 100644 index 0000000000000000000000000000000000000000..3de4a16e33c43fe61ea3088f82216fd62eb6e959 --- /dev/null +++ b/Pointcept/pointcept/utils/timer.py @@ -0,0 +1,70 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# -*- coding: utf-8 -*- + +from time import perf_counter +from typing import Optional + + +class Timer: + """ + A timer which computes the time elapsed since the start/reset of the timer. + """ + + def __init__(self) -> None: + self.reset() + + def reset(self) -> None: + """ + Reset the timer. + """ + self._start = perf_counter() + self._paused: Optional[float] = None + self._total_paused = 0 + self._count_start = 1 + + def pause(self) -> None: + """ + Pause the timer. + """ + if self._paused is not None: + raise ValueError("Trying to pause a Timer that is already paused!") + self._paused = perf_counter() + + def is_paused(self) -> bool: + """ + Returns: + bool: whether the timer is currently paused + """ + return self._paused is not None + + def resume(self) -> None: + """ + Resume the timer. + """ + if self._paused is None: + raise ValueError("Trying to resume a Timer that is not paused!") + # pyre-fixme[58]: `-` is not supported for operand types `float` and + # `Optional[float]`. + self._total_paused += perf_counter() - self._paused + self._paused = None + self._count_start += 1 + + def seconds(self) -> float: + """ + Returns: + (float): the total number of seconds since the start/reset of the + timer, excluding the time when the timer is paused. + """ + if self._paused is not None: + end_time: float = self._paused # type: ignore + else: + end_time = perf_counter() + return end_time - self._start - self._total_paused + + def avg_seconds(self) -> float: + """ + Returns: + (float): the average number of seconds between every start/reset and + pause. + """ + return self.seconds() / self._count_start diff --git a/Pointcept/pointcept/utils/visualization.py b/Pointcept/pointcept/utils/visualization.py new file mode 100644 index 0000000000000000000000000000000000000000..7a010dd8289f60119d1bfbccdff65edb908e24f6 --- /dev/null +++ b/Pointcept/pointcept/utils/visualization.py @@ -0,0 +1,89 @@ +""" +Visualization Utils + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import open3d as o3d +import numpy as np +import torch + + +def to_numpy(x): + if isinstance(x, torch.Tensor): + x = x.clone().detach().cpu().numpy() + assert isinstance(x, np.ndarray) + return x + + +def save_point_cloud(coord, color=None, file_path="pc.ply", logger=None): + os.makedirs(os.path.dirname(file_path), exist_ok=True) + coord = to_numpy(coord) + if color is not None: + color = to_numpy(color) + pcd = o3d.geometry.PointCloud() + pcd.points = o3d.utility.Vector3dVector(coord) + pcd.colors = o3d.utility.Vector3dVector( + np.ones_like(coord) if color is None else color + ) + o3d.io.write_point_cloud(file_path, pcd) + if logger is not None: + logger.info(f"Save Point Cloud to: {file_path}") + + +def save_bounding_boxes( + bboxes_corners, color=(1.0, 0.0, 0.0), file_path="bbox.ply", logger=None +): + bboxes_corners = to_numpy(bboxes_corners) + # point list + points = bboxes_corners.reshape(-1, 3) + # line list + box_lines = np.array( + [ + [0, 1], + [1, 2], + [2, 3], + [3, 0], + [4, 5], + [5, 6], + [6, 7], + [7, 0], + [0, 4], + [1, 5], + [2, 6], + [3, 7], + ] + ) + lines = [] + for i, _ in enumerate(bboxes_corners): + lines.append(box_lines + i * 8) + lines = np.concatenate(lines) + # color list + color = np.array([color for _ in range(len(lines))]) + # generate line set + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + line_set.colors = o3d.utility.Vector3dVector(color) + o3d.io.write_line_set(file_path, line_set) + + if logger is not None: + logger.info(f"Save Boxes to: {file_path}") + + +def save_lines( + points, lines, color=(1.0, 0.0, 0.0), file_path="lines.ply", logger=None +): + points = to_numpy(points) + lines = to_numpy(lines) + colors = np.array([color for _ in range(len(lines))]) + line_set = o3d.geometry.LineSet() + line_set.points = o3d.utility.Vector3dVector(points) + line_set.lines = o3d.utility.Vector2iVector(lines) + line_set.colors = o3d.utility.Vector3dVector(colors) + o3d.io.write_line_set(file_path, line_set) + + if logger is not None: + logger.info(f"Save Lines to: {file_path}") diff --git a/Pointcept/scripts/build_image.sh b/Pointcept/scripts/build_image.sh new file mode 100644 index 0000000000000000000000000000000000000000..31a6a7fc23e57b3b738450d5c42fed4cc45b9b65 --- /dev/null +++ b/Pointcept/scripts/build_image.sh @@ -0,0 +1,83 @@ +TORCH_VERSION=2.0.1 +CUDA_VERSION=11.7 +CUDNN_VERSION=8 + +ARGS=`getopt -o t:c: -l torch:,cuda:,cudnn: -n "$0" -- "$@"` +[ $? != 0 ] && exit 1 +eval set -- "${ARGS}" +while true ; do + case "$1" in + -t | --torch) + TORCH_VERSION=$2 + shift 2 + ;; + -c | --cuda) + CUDA_VERSION=$2 + shift 2 + ;; + --cudnn) + CUDNN_VERSION=$2 + shift 2 + ;; + --) + break + ;; + *) + echo "Invalid option: $1" + exit 1 + ;; + esac +done + +CUDA_VERSION_NO_DOT=`echo ${CUDA_VERSION} | tr -d "."` +BASE_TORCH_TAG=${TORCH_VERSION}-cuda${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel +IMG_TAG=pointcept/pointcept:pytorch${BASE_TORCH_TAG} + +echo "TORCH VERSION: ${TORCH_VERSION}" +echo "CUDA VERSION: ${CUDA_VERSION}" +echo "CUDNN VERSION: ${CUDNN_VERSION}" + + +cat > ./Dockerfile <<- EOM +FROM pytorch/pytorch:${BASE_TORCH_TAG} + +# Fix nvidia-key error issue (NO_PUBKEY A4B469963BF863CC) +RUN rm /etc/apt/sources.list.d/*.list + +# Installing apt packages +RUN export DEBIAN_FRONTEND=noninteractive \ + && apt -y update --no-install-recommends \ + && apt -y install --no-install-recommends \ + git wget tmux vim zsh build-essential cmake ninja-build libopenblas-dev libsparsehash-dev \ + && apt autoremove -y \ + && apt clean -y \ + && export DEBIAN_FRONTEND=dialog + +# Install Pointcept environment +RUN conda install h5py pyyaml -c anaconda -y +RUN conda install sharedarray tensorboard tensorboardx yapf addict einops scipy plyfile termcolor timm -c conda-forge -y +RUN conda install pytorch-cluster pytorch-scatter pytorch-sparse -c pyg -y + +RUN pip install --upgrade pip +RUN pip install torch-geometric +RUN pip install spconv-cu${CUDA_VERSION_NO_DOT} +RUN pip install open3d + +# Build MinkowskiEngine +RUN git clone https://github.com/NVIDIA/MinkowskiEngine.git +WORKDIR /workspace/MinkowskiEngine +RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0+PTX 8.0" python setup.py install --blas=openblas --force_cuda +WORKDIR /workspace + +# Build pointops +RUN git clone https://github.com/Pointcept/Pointcept.git +RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0+PTX 8.0" pip install Pointcept/libs/pointops -v + +# Build pointgroup_ops +RUN TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0+PTX 8.0" pip install Pointcept/libs/pointgroup_ops -v + +# Build swin3d +RUN TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX 8.0" pip install -U git+https://github.com/microsoft/Swin3D.git -v +EOM + +docker build . -f ./Dockerfile -t $IMG_TAG \ No newline at end of file diff --git a/Pointcept/scripts/create_tars.sh b/Pointcept/scripts/create_tars.sh new file mode 100644 index 0000000000000000000000000000000000000000..8bd990b2fc6d3448202a04db63c2adb707c2652b --- /dev/null +++ b/Pointcept/scripts/create_tars.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +# Variables +SOURCE_DIR=$1 +DEST_DIR=$2 +MAX_SIZE=$(awk "BEGIN {printf \"%d\", $3 * 1024 * 1024}") # Convert GB to KB as an integer + +# Get the base name of the source directory to use as TAR_NAME +TAR_NAME=$(basename "$SOURCE_DIR") + +# Create destination directory if it doesn't exist +mkdir -p "$DEST_DIR" + +# Function to create a new tar file +create_tar() { + tar_number=$1 + file_list=$2 + tar_name=$(printf "%s/${TAR_NAME}_%0${width}d.tar.gz" "$DEST_DIR" "$tar_number") + tar -zcvf "$tar_name" -C "$SOURCE_DIR" -T "$file_list" +} + +# Initialize +tar_number=1 +current_size=0 +temp_dir=$(mktemp -d) +file_list="$temp_dir/file_list_$tar_number" +echo Start indexing "file_list_$tar_number" + +cd "$SOURCE_DIR" || exit 1 + +# Iterate over all files in the source directory +find . -type f | while IFS= read -r file; do + file_size=$(du -k "$file" | cut -f1) + + if [ $(( current_size + file_size )) -gt $MAX_SIZE ]; then + tar_number=$((tar_number + 1)) + file_list="$temp_dir/file_list_$tar_number" + echo Start indexing "file_list_$tar_number" + current_size=0 + fi + + echo "$file" >> "$file_list" + current_size=$((current_size + file_size)) +done + +# Determine the width for the tar file numbers +total_files=$(find "$temp_dir" -name 'file_list_*' | wc -l) +width=${#total_files} + +# Set PARALLEL_PROCESSES to the number of file lists if not provided +PARALLEL_PROCESSES=${4:-$total_files} + +# Debug information +echo "Total files: $total_files" +echo "Width: $width" +echo "Parallel processes: $PARALLEL_PROCESSES" + +# Run tar creation in parallel +find "$temp_dir" -name 'file_list_*' | xargs -P "$PARALLEL_PROCESSES" -I {} sh -c ' + file_list={} + tar_number=$(basename "$file_list" | cut -d_ -f3) + tar_name=$(printf "%s/'"$TAR_NAME"'_%0'"$width"'d.tar.gz" "'"$DEST_DIR"'" "$tar_number") + tar -zcvf "$tar_name" -C "'"$SOURCE_DIR"'" -T "$file_list" +' + +# Clean up +rm -rf "$temp_dir" \ No newline at end of file diff --git a/Pointcept/scripts/test.sh b/Pointcept/scripts/test.sh new file mode 100644 index 0000000000000000000000000000000000000000..a104f98e67873c7741711b63da6cdbd8c88b73f4 --- /dev/null +++ b/Pointcept/scripts/test.sh @@ -0,0 +1,74 @@ +#!/bin/sh + +cd $(dirname $(dirname "$0")) || exit +PYTHON=python + +TEST_CODE=test.py + +DATASET=scannet +CONFIG="None" +EXP_NAME=debug +WEIGHT=model_best +GPU=None + +while getopts "p:d:c:n:w:g:" opt; do + case $opt in + p) + PYTHON=$OPTARG + ;; + d) + DATASET=$OPTARG + ;; + c) + CONFIG=$OPTARG + ;; + n) + EXP_NAME=$OPTARG + ;; + w) + WEIGHT=$OPTARG + ;; + g) + GPU=$OPTARG + ;; + \?) + echo "Invalid option: -$OPTARG" + ;; + esac +done + +if [ "${NUM_GPU}" = 'None' ] +then + NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'` +fi + +echo "Experiment name: $EXP_NAME" +echo "Python interpreter dir: $PYTHON" +echo "Dataset: $DATASET" +echo "GPU Num: $GPU" + +EXP_DIR=exp/${DATASET}/${EXP_NAME} +MODEL_DIR=${EXP_DIR}/model +CODE_DIR=${EXP_DIR}/code +CONFIG_DIR=${EXP_DIR}/config.py + +if [ "${CONFIG}" = "None" ] +then + CONFIG_DIR=${EXP_DIR}/config.py +else + CONFIG_DIR=configs/${DATASET}/${CONFIG}.py +fi + +echo "Loading config in:" $CONFIG_DIR +#export PYTHONPATH=./$CODE_DIR +export PYTHONPATH=./ +echo "Running code in: $CODE_DIR" + + +echo " =========> RUN TASK <=========" + +#$PYTHON -u "$CODE_DIR"/tools/$TEST_CODE \ +$PYTHON -u tools/$TEST_CODE \ + --config-file "$CONFIG_DIR" \ + --num-gpus "$GPU" \ + --options save_path="$EXP_DIR" weight="${MODEL_DIR}"/"${WEIGHT}".pth diff --git a/Pointcept/scripts/train.sh b/Pointcept/scripts/train.sh new file mode 100644 index 0000000000000000000000000000000000000000..2910ba1e92423ce8decf40eeeb4d5115da60b8b9 --- /dev/null +++ b/Pointcept/scripts/train.sh @@ -0,0 +1,92 @@ +#!/bin/sh + +cd $(dirname $(dirname "$0")) || exit +ROOT_DIR=$(pwd) +PYTHON=python + +TRAIN_CODE=train.py + +DATASET=scannet +CONFIG="None" +EXP_NAME=debug +WEIGHT="None" +RESUME=false +GPU=None + + +while getopts "p:d:c:n:w:g:r:" opt; do + case $opt in + p) + PYTHON=$OPTARG + ;; + d) + DATASET=$OPTARG + ;; + c) + CONFIG=$OPTARG + ;; + n) + EXP_NAME=$OPTARG + ;; + w) + WEIGHT=$OPTARG + ;; + r) + RESUME=$OPTARG + ;; + g) + GPU=$OPTARG + ;; + \?) + echo "Invalid option: -$OPTARG" + ;; + esac +done + +if [ "${NUM_GPU}" = 'None' ] +then + NUM_GPU=`$PYTHON -c 'import torch; print(torch.cuda.device_count())'` +fi + +echo "Experiment name: $EXP_NAME" +echo "Python interpreter dir: $PYTHON" +echo "Dataset: $DATASET" +echo "Config: $CONFIG" +echo "GPU Num: $GPU" + +EXP_DIR=exp/${DATASET}/${EXP_NAME} +MODEL_DIR=${EXP_DIR}/model +CODE_DIR=${EXP_DIR}/code +CONFIG_DIR=configs/${DATASET}/${CONFIG}.py + + +echo " =========> CREATE EXP DIR <=========" +echo "Experiment dir: $ROOT_DIR/$EXP_DIR" +if ${RESUME} +then + CONFIG_DIR=${EXP_DIR}/config.py + WEIGHT=$MODEL_DIR/model_last.pth +else + mkdir -p "$MODEL_DIR" "$CODE_DIR" + cp -r scripts tools pointcept "$CODE_DIR" +fi + +echo "Loading config in:" $CONFIG_DIR +export PYTHONPATH=./$CODE_DIR +echo "Running code in: $CODE_DIR" + + +echo " =========> RUN TASK <=========" + +if [ "${WEIGHT}" = "None" ] +then + $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ + --config-file "$CONFIG_DIR" \ + --num-gpus "$GPU" \ + --options save_path="$EXP_DIR" +else + $PYTHON "$CODE_DIR"/tools/$TRAIN_CODE \ + --config-file "$CONFIG_DIR" \ + --num-gpus "$GPU" \ + --options save_path="$EXP_DIR" resume="$RESUME" weight="$WEIGHT" +fi \ No newline at end of file diff --git a/Pointcept/tools/create_waymo_semseg_submission.py b/Pointcept/tools/create_waymo_semseg_submission.py new file mode 100644 index 0000000000000000000000000000000000000000..ded9f68bde40015a1bc7d1b7197ae909ff5831fe --- /dev/null +++ b/Pointcept/tools/create_waymo_semseg_submission.py @@ -0,0 +1,131 @@ +""" +Script for Creating Waymo Semantic Segmentation Submission + +The Waymo dataset toolkit relies on an old version of Tensorflow +which share a conflicting dependency with the Pointcept environment, +therefore we detach the submission generation from the test process +and the script require the following environment: + +```bash +conda create -n waymo python=3.8 -y +conda activate waymo +pip3 install waymo-open-dataset-tf-2-11-0 +``` + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import os +import tqdm +import argparse +import numpy as np +import zlib +import waymo_open_dataset.dataset_pb2 as open_dataset +from waymo_open_dataset.protos import segmentation_metrics_pb2 +from waymo_open_dataset.protos import segmentation_submission_pb2 + + +def compress_array(array: np.ndarray, is_int32: bool = False): + """Compress a numpy array to ZLIP compressed serialized MatrixFloat/Int32. + + Args: + array: A numpy array. + is_int32: If true, use MatrixInt32, otherwise use MatrixFloat. + + Returns: + The compressed bytes. + """ + if is_int32: + m = open_dataset.MatrixInt32() + else: + m = open_dataset.MatrixFloat() + m.shape.dims.extend(list(array.shape)) + m.data.extend(array.reshape([-1]).tolist()) + return zlib.compress(m.SerializeToString()) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--record_path", + required=True, + help="Path to the prediction result folder of Waymo dataset", + ) + parser.add_argument( + "--dataset_path", + required=True, + help="Path to the processed Waymo dataset", + ) + parser.add_argument( + "--split", + required=True, + choices=["validation", "testing"], + help="Split of the prediction ([training, validation, testing]).", + ) + args = parser.parse_args() + file_list = [file for file in os.listdir(args.record_path) if file.endswith(".npy")] + submission = segmentation_submission_pb2.SemanticSegmentationSubmission() + frames = segmentation_metrics_pb2.SegmentationFrameList() + bar = tqdm.tqdm(file_list) + for file in bar: + bar.set_postfix(file=file) + context_name, frame_timestamp_micros = file.strip("segment-*_pred.npy").split( + "_with_camera_labels_" + ) + # Load prediction. + # In Pointcept waymo dataset, we minus 1 to label to ignore UNLABELLED class (0 -> -1) + pred = np.load(os.path.join(args.record_path, file)) + 1 + masks = np.load( + os.path.join( + args.dataset_path, + args.split, + f"segment-{context_name}_with_camera_labels", + frame_timestamp_micros, + "mask.npy", + ), + allow_pickle=True, + ) + offset = np.cumsum([mask.sum() for mask in masks.reshape(-1)]) + pred = np.split(pred[: offset[-1]], offset[:-1]) + pred_ri1 = pred[0] + pred_ri2 = pred[5] + mask_ri1 = np.expand_dims(masks[0, 0], -1) + mask_ri2 = np.expand_dims(masks[1, 0], -1) + range_dummy = np.zeros_like(mask_ri1, dtype=np.int32) + range_pred_ri1 = np.zeros_like(mask_ri1, dtype=np.int32) + range_pred_ri1[mask_ri1] = pred_ri1 + range_pred_ri1 = np.concatenate([range_dummy, range_pred_ri1], axis=-1) + range_pred_ri2 = np.zeros_like(mask_ri2, dtype=np.int32) + range_pred_ri2[mask_ri2] = pred_ri2 + range_pred_ri2 = np.concatenate([range_dummy, range_pred_ri2], axis=-1) + + # generate frame submission + segmentation_label = open_dataset.Laser() + segmentation_label.name = open_dataset.LaserName.TOP + segmentation_label.ri_return1.segmentation_label_compressed = compress_array( + range_pred_ri1, is_int32=True + ) + segmentation_label.ri_return2.segmentation_label_compressed = compress_array( + range_pred_ri2, is_int32=True + ) + frame = segmentation_metrics_pb2.SegmentationFrame() + frame.segmentation_labels.append(segmentation_label) + frame.context_name = context_name + frame.frame_timestamp_micros = int(frame_timestamp_micros) + frames.frames.append(frame) + submission.account_name = "***" + submission.unique_method_name = "***" + submission.authors.append("***") + submission.affiliation = "***" + submission.method_link = "***" + submission.sensor_type = ( + segmentation_submission_pb2.SemanticSegmentationSubmission.LIDAR_ALL + ) + submission.number_past_frames_exclude_current = 0 + submission.number_future_frames_exclude_current = 0 + submission.inference_results.CopyFrom(frames) + output_filename = os.path.join(args.record_path, "submission.bin") + f = open(output_filename, "wb") + f.write(submission.SerializeToString()) + f.close() diff --git a/Pointcept/tools/test.py b/Pointcept/tools/test.py new file mode 100644 index 0000000000000000000000000000000000000000..c66708d417082451f23cb635bf4dd1c59082f625 --- /dev/null +++ b/Pointcept/tools/test.py @@ -0,0 +1,38 @@ +""" +Main Testing Script + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.engines.defaults import ( + default_argument_parser, + default_config_parser, + default_setup, +) +from pointcept.engines.test import TESTERS +from pointcept.engines.launch import launch + + +def main_worker(cfg): + cfg = default_setup(cfg) + tester = TESTERS.build(dict(type=cfg.test.type, cfg=cfg)) + tester.test() + + +def main(): + args = default_argument_parser().parse_args() + cfg = default_config_parser(args.config_file, args.options) + + launch( + main_worker, + num_gpus_per_machine=args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + cfg=(cfg,), + ) + + +if __name__ == "__main__": + main() diff --git a/Pointcept/tools/test_s3dis_6fold.py b/Pointcept/tools/test_s3dis_6fold.py new file mode 100644 index 0000000000000000000000000000000000000000..711ad42c956412cb9cb68adf596b679e25f48d19 --- /dev/null +++ b/Pointcept/tools/test_s3dis_6fold.py @@ -0,0 +1,102 @@ +""" +Test script for S3DIS 6-fold cross validation + +Gathering Area_X.pth from result folder of experiment record of each area as follows: +|- RECORDS_PATH + |- Area_1.pth + |- Area_2.pth + |- Area_3.pth + |- Area_4.pth + |- Area_5.pth + |- Area_6.pth + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import argparse +import os + +import torch +import numpy as np +import glob +from pointcept.utils.logger import get_root_logger + +CLASS_NAMES = [ + "ceiling", + "floor", + "wall", + "beam", + "column", + "window", + "door", + "table", + "chair", + "sofa", + "bookcase", + "board", + "clutter", +] + + +def evaluation(intersection, union, target, logger=None): + iou_class = intersection / (union + 1e-10) + accuracy_class = intersection / (target + 1e-10) + mIoU = np.mean(iou_class) + mAcc = np.mean(accuracy_class) + allAcc = sum(intersection) / (sum(target) + 1e-10) + + if logger is not None: + logger.info( + "Val result: mIoU/mAcc/allAcc {:.4f}/{:.4f}/{:.4f}".format( + mIoU, mAcc, allAcc + ) + ) + for i in range(len(CLASS_NAMES)): + logger.info( + "Class_{idx} - {name} Result: iou/accuracy {iou:.4f}/{accuracy:.4f}".format( + idx=i, + name=CLASS_NAMES[i], + iou=iou_class[i], + accuracy=accuracy_class[i], + ) + ) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--record_root", + required=True, + help="Path to the S3DIS record of each split", + ) + config = parser.parse_args() + logger = get_root_logger( + log_file=os.path.join(config.record_root, "6-fold.log"), + file_mode="w", + ) + + records = sorted(glob.glob(os.path.join(config.record_root, "Area_*.pth"))) + assert len(records) == 6 + intersection_ = np.zeros(len(CLASS_NAMES), dtype=int) + union_ = np.zeros(len(CLASS_NAMES), dtype=int) + target_ = np.zeros(len(CLASS_NAMES), dtype=int) + + for record in records: + area = os.path.basename(record).split(".")[0] + info = torch.load(record) + logger.info(f"<<<<<<<<<<<<<<<<< Parsing {area} <<<<<<<<<<<<<<<<<") + intersection = info["intersection"] + union = info["union"] + target = info["target"] + evaluation(intersection, union, target, logger=logger) + intersection_ += intersection + union_ += union + target_ += target + + logger.info(f"<<<<<<<<<<<<<<<<< Parsing 6-fold <<<<<<<<<<<<<<<<<") + evaluation(intersection_, union_, target_, logger=logger) + + +if __name__ == "__main__": + main() diff --git a/Pointcept/tools/train.py b/Pointcept/tools/train.py new file mode 100644 index 0000000000000000000000000000000000000000..e3ed749c4d0bae2c3ad26487d92c46c5695341a2 --- /dev/null +++ b/Pointcept/tools/train.py @@ -0,0 +1,38 @@ +""" +Main Training Script + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +from pointcept.engines.defaults import ( + default_argument_parser, + default_config_parser, + default_setup, +) +from pointcept.engines.train import TRAINERS +from pointcept.engines.launch import launch + + +def main_worker(cfg): + cfg = default_setup(cfg) + trainer = TRAINERS.build(dict(type=cfg.train.type, cfg=cfg)) + trainer.train() + + +def main(): + args = default_argument_parser().parse_args() + cfg = default_config_parser(args.config_file, args.options) + + launch( + main_worker, + num_gpus_per_machine=args.num_gpus, + num_machines=args.num_machines, + machine_rank=args.machine_rank, + dist_url=args.dist_url, + cfg=(cfg,), + ) + + +if __name__ == "__main__": + main() diff --git a/README copy.md b/README copy.md new file mode 100644 index 0000000000000000000000000000000000000000..1b67df7da8814e787af387c2345f1dc303762836 --- /dev/null +++ b/README copy.md @@ -0,0 +1,12 @@ +--- +title: Find3D +emoji: ๐Ÿ”ฅ +colorFrom: gray +colorTo: indigo +sdk: gradio +sdk_version: 5.5.0 +app_file: app.py +pinned: false +--- + +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..df6b856f2f6e704ef3343a15978ac82d025b225e --- /dev/null +++ b/app.py @@ -0,0 +1,178 @@ +import gradio as gr +import re +from utils import read_pcd, render_point_cloud, render_pcd_file +from inference.utils import get_legend +from inference.inference import segment_obj, get_heatmap +from huggingface_hub import login +import os + + +os.chdir("Pointcept/libs/pointops") +os.system("python setup.py install") +os.chdir("../../../") + +login(token=os.getenv('hfkey')) + +parts_dict = { + "fireplug": "bonnet of a fireplug,side cap of a fireplug,barrel of a fireplug,base of a fireplug", + "mickey": "ear,head,arms,hands,body,legs", + "motorvehicle": "wheel of a motor vehicle,seat of a motor vehicle,handle of a motor vehicle", + "teddy": "head,body,arms,legs", + "lamppost": "lighting of a lamppost,pole of a lamppost", + "shirt": "sleeve of a shirt,collar of a shirt,body of a shirt", + "capybara": "hat worn by a capybara,head,body,feet", + "corgi": "head,leg,body,ear", + "pushcar": "wheel,body,handle", + "plant": "pot,plant", + "chair": "back of chair,leg,seat" +} + +source_dict = { + "fireplug":"objaverse", + "mickey":"objaverse", + "motorvehicle":"objaverse", + "teddy":"objaverse", + "lamppost":"objaverse", + "shirt":"objaverse", + "capybara": "wild", + "corgi": "wild", + "pushcar": "wild", + "plant": "wild", + "chair": "wild" +} + +def predict(pcd_path, inference_mode, part_queries): + xyz, rgb, normal = read_pcd(pcd_path) + if inference_mode == "Segmentation": + parts = [part.strip(" ") for part in re.split(r'[,;.|]', part_queries)] + seg_rgb = segment_obj(xyz, rgb, normal, parts).cpu().numpy() + legend = get_legend(parts) + return render_point_cloud(xyz, seg_rgb, legend=legend) + elif inference_mode == "Localization": + heatmap_rgb = get_heatmap(xyz, rgb, normal, part_queries).cpu().numpy() + return render_point_cloud(xyz, heatmap_rgb) + else: + return None + +def on_select(evt: gr.SelectData): + obj_name = evt.value['image']['orig_name'][:-4] + src = source_dict[obj_name] + return [f"examples/{src}/{obj_name}.pcd", parts_dict[obj_name]] + + +with gr.Blocks(theme=gr.themes.Default(text_size="lg", radius_size="none")) as demo: + gr.HTML( + '''

Find Any Part in 3D

+

This is a demo for Find3D: Find Any Part in 3D! Two modes are supported: segmentation and localization. + For segmentation mode, please provide multiple part queries in the "queries" text box, in the format of comma-separated string, such as "part1,part2,part3". + After hitting "Run", the model will segment the object into the provided parts. + For localization mode, please only provide one query string in the "queries" text box. After hitting "Run", the model will generate a heatmap for the provided query text. + Please click on the buttons below "Objaverse" and "In the Wild" for some examples. You can also upload your own .pcd files.

+ ''' + ) + + with gr.Row(variant="panel"): + with gr.Column(scale=4): + file_upload = gr.File( + label="Upload Point Cloud File", + type="filepath", + file_types=[".pcd"], + value="examples/objaverse/lamppost.pcd" + ) + inference_mode = gr.Radio( + choices=["Segmentation", "Localization"], + label="Inference Mode", + value="Segmentation", + ) + part_queries = gr.Textbox( + label="Part Queries", + value="lighting of a lamppost,pole of a lamppost", + ) + run_button = gr.Button( + value="Run", + variant="primary", + ) + + with gr.Column(scale=4): + input_image = gr.Image(label="Input Image", visible=False, type='pil', image_mode='RGBA', height=290) + input_point_cloud = gr.Plot(label="Input Point Cloud") + + with gr.Column(scale=4): + output_point_cloud = gr.Plot(label="Output Result") + + with gr.Row(variant="panel"): + with gr.Column(scale=6): + title = gr.HTML('''

Objaverse

+

Online 3D assets from Objaverse!

+ ''') + gallery_objaverse = gr.Gallery([("examples/objaverse/lamppost.jpg", "lamppost"), + ("examples/objaverse/fireplug.jpg", "fireplug"), + ("examples/objaverse/mickey.jpg", "Mickey"), + ("examples/objaverse/motorvehicle.jpg", "motor vehicle"), + ("examples/objaverse/teddy.jpg", "teddy bear"), + ("examples/objaverse/shirt.jpg", "shirt")], + columns=3, + allow_preview=False) + gallery_objaverse.select(fn=on_select, + inputs=None, + outputs=[file_upload, part_queries]) + ''' + gr.Examples( + inputs=[file_upload, part_queries], + examples=[ + ["examples/objaverse/fireplug.pcd", "bonnet of a fireplug,side cap of a fireplug,barrel of a fireplug,base of a fireplug"], + ["examples/objaverse/mickey.pcd", "ear,head,arms,hands,body,legs"], + ["examples/objaverse/motorvehicle.pcd", "wheel of a motor vehicle,seat of a motor vehicle,handle of a motor vehicle"], + ["examples/objaverse/teddy.pcd", "head,body,arms,legs"], + ["examples/objaverse/lamppost.pcd", "lighting of a lamppost,pole of a lamppost"], + ["examples/objaverse/shirt.pcd", "sleeve of a shirt,collar of a shirt,body of a shirt"] + ], + example_labels=["fireplug", "Mickey", "motor vehicle", "teddy bear", "lamppost", "shirt"], + label="" + ) + ''' + with gr.Column(scale=6): + title = gr.HTML("""

In the Wild

+

Challenging in-the-wild reconstructions from iPhone photos & AI-generated images!

+ """) + gallery_wild = gr.Gallery([("examples/wild/capybara.png", "DALLE-capybara"), + ("examples/wild/corgi.jpg", "DALLE-corgi"), + ("examples/wild/plant.jpg", "iPhone-plant"), + ("examples/wild/pushcar.jpg", "iPhone-pushcar"), + ("examples/wild/chair.jpg", "iPhone-chair")], + columns=3, + allow_preview=False) + gallery_wild.select(fn=on_select, + inputs=None, + outputs=[file_upload, part_queries]) + ''' + gr.Examples( + inputs=[file_upload, part_queries], + examples=[ + ["examples/wild/mcc_dalle_capybara.pcd", "hat worn by a capybara,head,body,feet"], + ["examples/wild/mcc_dalle_corgi.pcd", "head,leg,body,ear"], + ["examples/wild/mcc_iphone_pushcar.pcd", "wheel,body,handle"], + ["examples/wild/mcc_iphone_plant.pcd", "pot,plant"], + ["examples/wild/mcc_iphone_chair.pcd", "back of chair,leg,seat"], + ], + example_labels=["DALLE-capybara", "DALLE-corgi", "iPhone-pushcar", "iPhone-plant", "iPhone-chair"], + label="" + ) + ''' + + file_upload.change( + fn=render_pcd_file, + inputs=[file_upload], + outputs=[input_point_cloud], + ) + run_button.click( + fn=predict, + inputs=[file_upload, inference_mode, part_queries], + outputs=[output_point_cloud], + ) + demo.load( + fn=render_pcd_file, + inputs=[file_upload], + outputs=[input_point_cloud]) # initialize + +demo.launch() diff --git a/examples/objaverse/fireplug.jpg b/examples/objaverse/fireplug.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c8afe8251ac134217558588011678e65df5c5ef4 Binary files /dev/null and b/examples/objaverse/fireplug.jpg differ diff --git a/examples/objaverse/fireplug.pcd b/examples/objaverse/fireplug.pcd new file mode 100644 index 0000000000000000000000000000000000000000..2b2bcc9bf480069fd5068dfb5de7fed3d648ad83 Binary files /dev/null and b/examples/objaverse/fireplug.pcd differ diff --git a/examples/objaverse/lamppost.jpg b/examples/objaverse/lamppost.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ab175351e8923f65bcba1d21fec11b70b3e7b118 Binary files /dev/null and b/examples/objaverse/lamppost.jpg differ diff --git a/examples/objaverse/lamppost.pcd b/examples/objaverse/lamppost.pcd new file mode 100644 index 0000000000000000000000000000000000000000..81a8d981811b72b7a3fa6196a5177741f72d2337 Binary files /dev/null and b/examples/objaverse/lamppost.pcd differ diff --git a/examples/objaverse/mickey.jpg b/examples/objaverse/mickey.jpg new file mode 100644 index 0000000000000000000000000000000000000000..1663bcf4ddbdce86be1158766fd2e66871310341 Binary files /dev/null and b/examples/objaverse/mickey.jpg differ diff --git a/examples/objaverse/mickey.pcd b/examples/objaverse/mickey.pcd new file mode 100644 index 0000000000000000000000000000000000000000..939ecca3a3f6525481363e2ce2d6a4f21b8a7aec Binary files /dev/null and b/examples/objaverse/mickey.pcd differ diff --git a/examples/objaverse/motorvehicle.jpg b/examples/objaverse/motorvehicle.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d255d69d63024a61e9e4d9c69b0de1f9d5819698 Binary files /dev/null and b/examples/objaverse/motorvehicle.jpg differ diff --git a/examples/objaverse/motorvehicle.pcd b/examples/objaverse/motorvehicle.pcd new file mode 100644 index 0000000000000000000000000000000000000000..fde4fa26f13221a390446d3b32f86029b76d4045 Binary files /dev/null and b/examples/objaverse/motorvehicle.pcd differ diff --git a/examples/objaverse/penguin.jpg b/examples/objaverse/penguin.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cb0d08c6eff0bbdc0c5172557360ea134e65626c Binary files /dev/null and b/examples/objaverse/penguin.jpg differ diff --git a/examples/objaverse/shirt.jpg b/examples/objaverse/shirt.jpg new file mode 100644 index 0000000000000000000000000000000000000000..f9ae89b7f4b192b6b0eb6990067d9d4b3c2d4a63 Binary files /dev/null and b/examples/objaverse/shirt.jpg differ diff --git a/examples/objaverse/shirt.pcd b/examples/objaverse/shirt.pcd new file mode 100644 index 0000000000000000000000000000000000000000..d5a519955ae924b750351f3c2bd19e3afd104f83 Binary files /dev/null and b/examples/objaverse/shirt.pcd differ diff --git a/examples/objaverse/teddy.jpg b/examples/objaverse/teddy.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d2a5011a69909c2e4bb8079e14e1385f0bd7698f Binary files /dev/null and b/examples/objaverse/teddy.jpg differ diff --git a/examples/objaverse/teddy.pcd b/examples/objaverse/teddy.pcd new file mode 100644 index 0000000000000000000000000000000000000000..e3081ad00441e9dd3f949ea00472f6adee999f38 Binary files /dev/null and b/examples/objaverse/teddy.pcd differ diff --git a/examples/wild/capybara.pcd b/examples/wild/capybara.pcd new file mode 100644 index 0000000000000000000000000000000000000000..40c6a2ab82acfd879b54ceb0fc629ca24251286d Binary files /dev/null and b/examples/wild/capybara.pcd differ diff --git a/examples/wild/capybara.png b/examples/wild/capybara.png new file mode 100644 index 0000000000000000000000000000000000000000..f81b07cce69b15864de0cbc43d2365ac7978727b Binary files /dev/null and b/examples/wild/capybara.png differ diff --git a/examples/wild/chair.jpg b/examples/wild/chair.jpg new file mode 100644 index 0000000000000000000000000000000000000000..89b5f663a00d9c6286f3022d5586c68489ec8d61 Binary files /dev/null and b/examples/wild/chair.jpg differ diff --git a/examples/wild/chair.pcd b/examples/wild/chair.pcd new file mode 100644 index 0000000000000000000000000000000000000000..5fe291071ea9142d20ba01c000bd31756cc5825e Binary files /dev/null and b/examples/wild/chair.pcd differ diff --git a/examples/wild/corgi.jpg b/examples/wild/corgi.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4977e0dc891d45022545ee73203f886171f9f1e8 Binary files /dev/null and b/examples/wild/corgi.jpg differ diff --git a/examples/wild/corgi.pcd b/examples/wild/corgi.pcd new file mode 100644 index 0000000000000000000000000000000000000000..26aec368144ecb76693fcb70a5084243c28a3e05 Binary files /dev/null and b/examples/wild/corgi.pcd differ diff --git a/examples/wild/cup.pcd b/examples/wild/cup.pcd new file mode 100644 index 0000000000000000000000000000000000000000..fd52380948afe96a97f06e7e9c64b929d92e112a Binary files /dev/null and b/examples/wild/cup.pcd differ diff --git a/examples/wild/mcc_cup.pcd b/examples/wild/mcc_cup.pcd new file mode 100644 index 0000000000000000000000000000000000000000..fd52380948afe96a97f06e7e9c64b929d92e112a Binary files /dev/null and b/examples/wild/mcc_cup.pcd differ diff --git a/examples/wild/mcc_dalle_capybara.pcd b/examples/wild/mcc_dalle_capybara.pcd new file mode 100644 index 0000000000000000000000000000000000000000..40c6a2ab82acfd879b54ceb0fc629ca24251286d Binary files /dev/null and b/examples/wild/mcc_dalle_capybara.pcd differ diff --git a/examples/wild/mcc_dalle_corgi.pcd b/examples/wild/mcc_dalle_corgi.pcd new file mode 100644 index 0000000000000000000000000000000000000000..26aec368144ecb76693fcb70a5084243c28a3e05 Binary files /dev/null and b/examples/wild/mcc_dalle_corgi.pcd differ diff --git a/examples/wild/mcc_iphone_chair.pcd b/examples/wild/mcc_iphone_chair.pcd new file mode 100644 index 0000000000000000000000000000000000000000..5fe291071ea9142d20ba01c000bd31756cc5825e Binary files /dev/null and b/examples/wild/mcc_iphone_chair.pcd differ diff --git a/examples/wild/mcc_iphone_plant.pcd b/examples/wild/mcc_iphone_plant.pcd new file mode 100644 index 0000000000000000000000000000000000000000..8c0ea19868bf2488756492e41dd48bbc66678b37 Binary files /dev/null and b/examples/wild/mcc_iphone_plant.pcd differ diff --git a/examples/wild/mcc_iphone_pushcar.pcd b/examples/wild/mcc_iphone_pushcar.pcd new file mode 100644 index 0000000000000000000000000000000000000000..a81d9850190d776a925f80429b573a287bf0df15 Binary files /dev/null and b/examples/wild/mcc_iphone_pushcar.pcd differ diff --git a/examples/wild/mcc_toyplane1.pcd b/examples/wild/mcc_toyplane1.pcd new file mode 100644 index 0000000000000000000000000000000000000000..99a6ea2130e2c4d6b0885d599609091c4711ca62 Binary files /dev/null and b/examples/wild/mcc_toyplane1.pcd differ diff --git a/examples/wild/mcc_toyplane2.pcd b/examples/wild/mcc_toyplane2.pcd new file mode 100644 index 0000000000000000000000000000000000000000..5ae23c93221a4142d07c32bb1012453035b22486 Binary files /dev/null and b/examples/wild/mcc_toyplane2.pcd differ diff --git a/examples/wild/plant.jpg b/examples/wild/plant.jpg new file mode 100644 index 0000000000000000000000000000000000000000..6dd9b5752e2dba096fca98f40da4d6bfb3b0eb4c Binary files /dev/null and b/examples/wild/plant.jpg differ diff --git a/examples/wild/plant.pcd b/examples/wild/plant.pcd new file mode 100644 index 0000000000000000000000000000000000000000..8c0ea19868bf2488756492e41dd48bbc66678b37 Binary files /dev/null and b/examples/wild/plant.pcd differ diff --git a/examples/wild/pushcar.jpg b/examples/wild/pushcar.jpg new file mode 100644 index 0000000000000000000000000000000000000000..631fa33ce77a7205984ab55c0ffb7f0a30c038c0 Binary files /dev/null and b/examples/wild/pushcar.jpg differ diff --git a/examples/wild/pushcar.pcd b/examples/wild/pushcar.pcd new file mode 100644 index 0000000000000000000000000000000000000000..a81d9850190d776a925f80429b573a287bf0df15 Binary files /dev/null and b/examples/wild/pushcar.pcd differ diff --git a/examples/wild/spyro.pcd b/examples/wild/spyro.pcd new file mode 100644 index 0000000000000000000000000000000000000000..3c594ebc40d7d5d20806c725612b3719d2b48461 Binary files /dev/null and b/examples/wild/spyro.pcd differ diff --git a/examples/wild/spyro06.pcd b/examples/wild/spyro06.pcd new file mode 100644 index 0000000000000000000000000000000000000000..3c594ebc40d7d5d20806c725612b3719d2b48461 Binary files /dev/null and b/examples/wild/spyro06.pcd differ diff --git a/examples/wild/toyplane1.pcd b/examples/wild/toyplane1.pcd new file mode 100644 index 0000000000000000000000000000000000000000..99a6ea2130e2c4d6b0885d599609091c4711ca62 Binary files /dev/null and b/examples/wild/toyplane1.pcd differ diff --git a/examples/wild/toyplane2.pcd b/examples/wild/toyplane2.pcd new file mode 100644 index 0000000000000000000000000000000000000000..5ae23c93221a4142d07c32bb1012453035b22486 Binary files /dev/null and b/examples/wild/toyplane2.pcd differ diff --git a/inference/inference.py b/inference/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..9ab2a1597f0fb17c0c0c9f98e91adfc6202b6f65 --- /dev/null +++ b/inference/inference.py @@ -0,0 +1,91 @@ +import torch +import numpy as np +import matplotlib.pyplot as plt +from inference.utils import get_seg_color, load_model, preprocess_pcd, encode_text + +DEVICE = "cpu" +if torch.cuda.is_available(): + DEVICE = "cuda:0" + +def pred_3d_upsample( + pred, # n_subsampled_pts, feat_dim + part_text_embeds, # n_parts, feat_dim + temperature, + xyz_sub, + xyz_full, # n_pts, 3 + N_CHUNKS=1 + ): + xyz_full = xyz_full.squeeze() + logits = pred @ part_text_embeds.T # n_pts, n_mask + + logits_prepend0 = torch.cat([torch.zeros(logits.shape[0],1).to(DEVICE), logits],axis=1) + pred_softmax = torch.nn.Softmax(dim=1)(logits_prepend0 * temperature) + + chunk_len = xyz_full.shape[0]//N_CHUNKS+1 + closest_idx_list = [] + for i in range(N_CHUNKS): + cur_chunk = xyz_full[chunk_len*i:chunk_len*(i+1)] + dist_all = (xyz_sub.unsqueeze(0) - cur_chunk.to(DEVICE).unsqueeze(1))**2 # 300k,5k,3 + cur_dist = (dist_all.sum(dim=-1))**0.5 # 300k,5k + min_idxs = torch.min(cur_dist, 1)[1] + del cur_dist + closest_idx_list.append(min_idxs) + all_nn_idxs = torch.cat(closest_idx_list,axis=0) + # just inversely weight all points + all_probs = pred_softmax[all_nn_idxs] + all_logits = logits[all_nn_idxs] + pred_full = all_probs.argmax(dim=1).cpu()# here, 0 is unlabeled, 1,...n_part correspond to actual part assignment + return all_logits, all_probs, pred_full + +def get_segmentation_rgb(model, data, N_CHUNKS=5): # evaluate loader can only have batch size=1 + temperature = np.exp(model.ln_logit_scale.item()) + with torch.no_grad(): + for key in data.keys(): + if isinstance(data[key], torch.Tensor) and "full" not in key: + data[key] = data[key].to(DEVICE) + net_out = model(x=data) + text_embeds = data['label_embeds'] + xyz_sub = data["coord"] + xyz_full = data["xyz_full"] + _, _, pred_full = pred_3d_upsample(net_out, # n_subsampled_pts, feat_dim + text_embeds, # n_parts, feat_dim + temperature, + xyz_sub, + xyz_full, # n_pts, 3 + N_CHUNKS=N_CHUNKS) + seg_rgb = get_seg_color(pred_full.cpu()) + return seg_rgb + +def get_heatmap_rgb(model, data, N_CHUNKS=5): # evaluate loader can only have batch size=1 + temperature = np.exp(model.ln_logit_scale.item()) + with torch.no_grad(): + for key in data.keys(): + if isinstance(data[key], torch.Tensor) and "full" not in key: + data[key] = data[key].to(DEVICE) + net_out = model(x=data) + text_embeds = data['label_embeds'] + xyz_sub = data["coord"] + xyz_full = data["xyz_full"] + all_logits, _, _ = pred_3d_upsample(net_out, # n_subsampled_pts, feat_dim + text_embeds, # n_parts, feat_dim + temperature, + xyz_sub, + xyz_full, # n_pts, 3 + N_CHUNKS=N_CHUNKS) + scores = all_logits.squeeze().cpu() + heatmap_rgb = torch.tensor(plt.cm.jet(scores.numpy())[:,:3]).squeeze() + return heatmap_rgb + +def segment_obj(xyz, rgb, normal, queries): + model = load_model() + data_dict = preprocess_pcd(torch.tensor(xyz).float().to(DEVICE), torch.tensor(rgb).float().to(DEVICE), torch.tensor(normal).float().to(DEVICE)) + data_dict["label_embeds"] = encode_text(queries) + seg_rgb = get_segmentation_rgb(model, data_dict) + return seg_rgb + +def get_heatmap(xyz, rgb, normal, query): + model = load_model() + data_dict = preprocess_pcd(torch.tensor(xyz).float().to(DEVICE), torch.tensor(rgb).float().to(DEVICE), torch.tensor(normal).float().to(DEVICE)) + data_dict["label_embeds"] = encode_text([query]) + heatmap_rgb = get_heatmap_rgb(model, data_dict) + return heatmap_rgb \ No newline at end of file diff --git a/inference/utils.py b/inference/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5c42283e15c16dfe91a1b4cdd04379bda5579835 --- /dev/null +++ b/inference/utils.py @@ -0,0 +1,164 @@ +import torch +import torch.nn.functional as F +from model.model import PointSemSeg, Find3D +import numpy as np +import random +from transformers import AutoTokenizer, AutoModel + +DEVICE = "cpu" +if torch.cuda.is_available(): + DEVICE = "cuda:0" + +def get_seg_color(labels): + part_num = labels.max() + cmap_matrix = torch.tensor([[1,1,1], [1,0,0], [0,1,0], [0,0,1], [1,1,0], [1,0,1], + [0,1,1], [0.5,0.5,0.5], [0.5,0.5,0], [0.5,0,0.5],[0,0.5,0.5], + [0.1,0.2,0.3],[0.2,0.5,0.3], [0.6,0.3,0.2], [0.5,0.3,0.5], + [0.6,0.7,0.2],[0.5,0.8,0.3]])[:part_num+1,:] + onehot = F.one_hot(labels.long(), num_classes=part_num+1) * 1.0 # n_pts, part_num+1, each row 00.010.0, first place is unlabeled (0 originally) + pts_rgb = torch.matmul(onehot, cmap_matrix) + return pts_rgb + +def get_legend(parts): + colors = ["white", "red", "green", "blue", "yellow", "magenta", "cyan","grey", "olive", + "purple", "teal", "navy", "darkgreen", "brown", "pinkpurple", "yellowgreen", "limegreen"] + legends = [] + i = 1 + for part in parts: + cur_color = colors[i] + legends.append(f"{cur_color}:{part}") + i += 1 + legend = " ".join(legends) + return legend + + +def load_model(): + model = Find3D.from_pretrained("ziqima/find3d-checkpt0", dim_output=768) + #model.load_state_dict(torch.load("find3d_checkpoint.pth")["model_state_dict"]) + model.eval() + model = model.to(DEVICE) + return model + +def set_seed(seed): + torch.manual_seed(seed) + if DEVICE != "cpu": + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + np.random.seed(seed) + random.seed(seed) + +def fnv_hash_vec(arr): + """ + FNV64-1A + """ + assert arr.ndim == 2 + # Floor first for negative coordinates + arr = arr.copy() + arr = arr.astype(np.uint64, copy=False) + hashed_arr = np.uint64(14695981039346656037) * np.ones( + arr.shape[0], dtype=np.uint64 + ) + for j in range(arr.shape[1]): + hashed_arr *= np.uint64(1099511628211) + hashed_arr = np.bitwise_xor(hashed_arr, arr[:, j]) + return hashed_arr + + +def grid_sample_numpy(xyz, rgb, normal, grid_size): # this should hopefully be 5000 or close + xyz = xyz.cpu().numpy() + rgb = rgb.cpu().numpy() + normal = normal.cpu().numpy() + + scaled_coord = xyz / np.array(grid_size) + grid_coord = np.floor(scaled_coord).astype(int) + min_coord = grid_coord.min(0) + grid_coord -= min_coord + scaled_coord -= min_coord + min_coord = min_coord * np.array(grid_size) + key = fnv_hash_vec(grid_coord) + idx_sort = np.argsort(key) + key_sort = key[idx_sort] + _, inverse, count = np.unique(key_sort, return_inverse=True, return_counts=True) + idx_select = ( + np.cumsum(np.insert(count, 0, 0)[0:-1]) + + np.random.randint(0, count.max(), count.size) % count + ) + idx_unique = idx_sort[idx_select] + + grid_coord = grid_coord[idx_unique] + + xyz = torch.tensor(xyz[idx_unique]).to(DEVICE) + rgb = torch.tensor(rgb[idx_unique]).to(DEVICE) + normal = torch.tensor(normal[idx_unique]).to(DEVICE) + grid_coord = torch.tensor(grid_coord).to(DEVICE) + + return xyz, rgb, normal, grid_coord + + +def encode_text(texts): + siglip = AutoModel.from_pretrained("google/siglip-base-patch16-224") # dim 768 #"google/siglip-so400m-patch14-384") + tokenizer = AutoTokenizer.from_pretrained("google/siglip-base-patch16-224")#"google/siglip-so400m-patch14-384") + inputs = tokenizer(texts, padding="max_length", return_tensors="pt") + for key in inputs: + inputs[key] = inputs[key].to(DEVICE) + with torch.no_grad(): + text_feat = siglip.to(DEVICE).get_text_features(**inputs) + text_feat = text_feat / (text_feat.norm(dim=-1, keepdim=True) + 1e-12) + return text_feat + + +def preprocess_pcd(xyz, rgb, normal): # rgb should be 0-1 + assert rgb.max() <=1 + # normalize + # this is the same preprocessing I do before training + center = xyz.mean(0) + scale = max((xyz - center).abs().max(0)[0]) + xyz -= center + xyz *= (0.75 / float(scale)) # put in 0.75-size box + + # axis swap + xyz = torch.cat([-xyz[:,0].reshape(-1,1), xyz[:,2].reshape(-1,1), xyz[:,1].reshape(-1,1)], dim=1) + + # center shift + xyz_min = xyz.min(dim=0)[0] + xyz_max = xyz.max(dim=0)[0] + xyz_max[2] = 0 + shift = (xyz_min+xyz_max)/2 + xyz -= shift + + # subsample/upsample to 5000 pts for grid sampling + if xyz.shape[0] != 5000: + random_indices = torch.randint(0, xyz.shape[0], (5000,)) + pts_xyz_subsampled = xyz[random_indices] + pts_rgb_subsampled = rgb[random_indices] + normal_subsampled = normal[random_indices] + else: + pts_xyz_subsampled = xyz + pts_rgb_subsampled = rgb + normal_subsampled = normal + + # grid sampling + pts_xyz_gridsampled, pts_rgb_gridsampled, normal_gridsampled, grid_coord = grid_sample_numpy(pts_xyz_subsampled, pts_rgb_subsampled, normal_subsampled, 0.02) + + # another center shift, z=false + xyz_min = pts_xyz_gridsampled.min(dim=0)[0] + xyz_min[2] = 0 + xyz_max = pts_xyz_gridsampled.max(dim=0)[0] + xyz_max[2] = 0 + shift = (xyz_min+xyz_max)/2 + pts_xyz_gridsampled -= shift + xyz -= shift + + # normalize color + pts_rgb_gridsampled = pts_rgb_gridsampled / 0.5 - 1 + + # combine color and normal as feat + feat = torch.cat([pts_rgb_gridsampled, normal_gridsampled], dim=1) + + data_dict = {} + data_dict["coord"] = pts_xyz_gridsampled + data_dict["feat"] = feat + data_dict["grid_coord"] = grid_coord + data_dict["xyz_full"] = xyz + data_dict["offset"] = torch.tensor([pts_xyz_gridsampled.shape[0]]) + return data_dict \ No newline at end of file diff --git a/model/model.py b/model/model.py new file mode 100644 index 0000000000000000000000000000000000000000..51be7c5eff76428d81db5d818401aedc45894f5d --- /dev/null +++ b/model/model.py @@ -0,0 +1,1063 @@ +""" +Point Transformer - V3 Mode1 +Pointcept detached version + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com) +Please cite our work if the code is helpful to you. +""" + +import sys +from functools import partial +from addict import Dict +import math +import torch +import torch.nn as nn +import spconv.pytorch as spconv +import torch_scatter +from timm.models.layers import DropPath +from collections import OrderedDict +import numpy as np +import torch.nn.functional as F +try: + import flash_attn +except ImportError: + flash_attn = None +from model.serialization import encode +from huggingface_hub import PyTorchModelHubMixin + +@torch.inference_mode() +def offset2bincount(offset): + return torch.diff( + offset, prepend=torch.tensor([0], device=offset.device, dtype=torch.long) + ) + + +@torch.inference_mode() +def offset2batch(offset): + bincount = offset2bincount(offset) + return torch.arange( + len(bincount), device=offset.device, dtype=torch.long + ).repeat_interleave(bincount) + + +@torch.inference_mode() +def batch2offset(batch): + return torch.cumsum(batch.bincount(), dim=0).long() + + +class Point(Dict): + """ + Point Structure of Pointcept + + A Point (point cloud) in Pointcept is a dictionary that contains various properties of + a batched point cloud. The property with the following names have a specific definition + as follows: + + - "coord": original coordinate of point cloud; + - "grid_coord": grid coordinate for specific grid size (related to GridSampling); + Point also support the following optional attributes: + - "offset": if not exist, initialized as batch size is 1; + - "batch": if not exist, initialized as batch size is 1; + - "feat": feature of point cloud, default input of model; + - "grid_size": Grid size of point cloud (related to GridSampling); + (related to Serialization) + - "serialized_depth": depth of serialization, 2 ** depth * grid_size describe the maximum of point cloud range; + - "serialized_code": a list of serialization codes; + - "serialized_order": a list of serialization order determined by code; + - "serialized_inverse": a list of inverse mapping determined by code; + (related to Sparsify: SpConv) + - "sparse_shape": Sparse shape for Sparse Conv Tensor; + - "sparse_conv_feat": SparseConvTensor init with information provide by Point; + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # If one of "offset" or "batch" do not exist, generate by the existing one + if "batch" not in self.keys() and "offset" in self.keys(): + self["batch"] = offset2batch(self.offset) + elif "offset" not in self.keys() and "batch" in self.keys(): + self["offset"] = batch2offset(self.batch) + + def serialization(self, order="z", depth=None, shuffle_orders=False): + """ + Point Cloud Serialization + + relay on ["grid_coord" or "coord" + "grid_size", "batch", "feat"] + """ + assert "batch" in self.keys() + if "grid_coord" not in self.keys(): + # if you don't want to operate GridSampling in data augmentation, + # please add the following augmentation into your pipline: + # dict(type="Copy", keys_dict={"grid_size": 0.01}), + # (adjust `grid_size` to what your want) + assert {"grid_size", "coord"}.issubset(self.keys()) + self["grid_coord"] = torch.div( + self.coord - self.coord.min(0)[0], self.grid_size, rounding_mode="trunc" + ).int() + + if depth is None: + # Adaptive measure the depth of serialization cube (length = 2 ^ depth) + depth = int(self.grid_coord.max()).bit_length() + self["serialized_depth"] = depth + # Maximum bit length for serialization code is 63 (int64) + assert depth * 3 + len(self.offset).bit_length() <= 63 + # Here we follow OCNN and set the depth limitation to 16 (48bit) for the point position. + # Although depth is limited to less than 16, we can encode a 655.36^3 (2^16 * 0.01) meter^3 + # cube with a grid size of 0.01 meter. We consider it is enough for the current stage. + # We can unlock the limitation by optimizing the z-order encoding function if necessary. + assert depth <= 16 + + # The serialization codes are arranged as following structures: + # [Order1 ([n]), + # Order2 ([n]), + # ... + # OrderN ([n])] (k, n) + code = [ + encode(self.grid_coord, self.batch, depth, order=order_) for order_ in order + ] + code = torch.stack(code) + order = torch.argsort(code) + inverse = torch.zeros_like(order).scatter_( + dim=1, + index=order, + src=torch.arange(0, code.shape[1], device=order.device).repeat( + code.shape[0], 1 + ), + ) + + if shuffle_orders: + perm = torch.randperm(code.shape[0]) + code = code[perm] + order = order[perm] + inverse = inverse[perm] + + self["serialized_code"] = code + self["serialized_order"] = order + self["serialized_inverse"] = inverse + + def sparsify(self, pad=96): + """ + Point Cloud Serialization + + Point cloud is sparse, here we use "sparsify" to specifically refer to + preparing "spconv.SparseConvTensor" for SpConv. + + relay on ["grid_coord" or "coord" + "grid_size", "batch", "feat"] + + pad: padding sparse for sparse shape. + """ + assert {"feat", "batch"}.issubset(self.keys()) + if "grid_coord" not in self.keys(): + # if you don't want to operate GridSampling in data augmentation, + # please add the following augmentation into your pipline: + # dict(type="Copy", keys_dict={"grid_size": 0.01}), + # (adjust `grid_size` to what your want) + assert {"grid_size", "coord"}.issubset(self.keys()) + self["grid_coord"] = torch.div( + self.coord - self.coord.min(0)[0], self.grid_size, rounding_mode="trunc" + ).int() + if "sparse_shape" in self.keys(): + sparse_shape = self.sparse_shape + else: + sparse_shape = torch.add( + torch.max(self.grid_coord, dim=0).values, pad + ).tolist() + sparse_conv_feat = spconv.SparseConvTensor( + features=self.feat, + indices=torch.cat( + [self.batch.unsqueeze(-1).int(), self.grid_coord.int()], dim=1 + ).contiguous(), + spatial_shape=sparse_shape, + batch_size=self.batch[-1].tolist() + 1, + ) + self["sparse_shape"] = sparse_shape + self["sparse_conv_feat"] = sparse_conv_feat + + +class PointModule(nn.Module): + r"""PointModule + placeholder, all module subclass from this will take Point in PointSequential. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + +class PointSequential(PointModule): + r"""A sequential container. + Modules will be added to it in the order they are passed in the constructor. + Alternatively, an ordered dict of modules can also be passed in. + """ + + def __init__(self, *args, **kwargs): + super().__init__() + if len(args) == 1 and isinstance(args[0], OrderedDict): + for key, module in args[0].items(): + self.add_module(key, module) + else: + for idx, module in enumerate(args): + self.add_module(str(idx), module) + for name, module in kwargs.items(): + if sys.version_info < (3, 6): + raise ValueError("kwargs only supported in py36+") + if name in self._modules: + raise ValueError("name exists.") + self.add_module(name, module) + + def __getitem__(self, idx): + if not (-len(self) <= idx < len(self)): + raise IndexError("index {} is out of range".format(idx)) + if idx < 0: + idx += len(self) + it = iter(self._modules.values()) + for i in range(idx): + next(it) + return next(it) + + def __len__(self): + return len(self._modules) + + def add(self, module, name=None): + if name is None: + name = str(len(self._modules)) + if name in self._modules: + raise KeyError("name exists") + self.add_module(name, module) + + def forward(self, input): + for k, module in self._modules.items(): + # Point module + if isinstance(module, PointModule): + input = module(input) + # Spconv module + elif spconv.modules.is_spconv_module(module): + if isinstance(input, Point): + input.sparse_conv_feat = module(input.sparse_conv_feat) + input.feat = input.sparse_conv_feat.features + else: + input = module(input) + # PyTorch module + else: + if isinstance(input, Point): + input.feat = module(input.feat) + if "sparse_conv_feat" in input.keys(): + input.sparse_conv_feat = input.sparse_conv_feat.replace_feature( + input.feat + ) + elif isinstance(input, spconv.SparseConvTensor): + if input.indices.shape[0] != 0: + input = input.replace_feature(module(input.features)) + else: + input = module(input) + return input + + +class PDNorm(PointModule): + def __init__( + self, + num_features, + norm_layer, + context_channels=256, + conditions=("ScanNet", "S3DIS", "Structured3D"), + decouple=True, + adaptive=False, + ): + super().__init__() + self.conditions = conditions + self.decouple = decouple + self.adaptive = adaptive + if self.decouple: + self.norm = nn.ModuleList([norm_layer(num_features) for _ in conditions]) + else: + self.norm = norm_layer + if self.adaptive: + self.modulation = nn.Sequential( + nn.SiLU(), nn.Linear(context_channels, 2 * num_features, bias=True) + ) + + def forward(self, point): + assert {"feat", "condition"}.issubset(point.keys()) + if isinstance(point.condition, str): + condition = point.condition + else: + condition = point.condition[0] + if self.decouple: + assert condition in self.conditions + norm = self.norm[self.conditions.index(condition)] + else: + norm = self.norm + point.feat = norm(point.feat) + if self.adaptive: + assert "context" in point.keys() + shift, scale = self.modulation(point.context).chunk(2, dim=1) + point.feat = point.feat * (1.0 + scale) + shift + return point + + +class RPE(torch.nn.Module): + def __init__(self, patch_size, num_heads): + super().__init__() + self.patch_size = patch_size + self.num_heads = num_heads + self.pos_bnd = int((4 * patch_size) ** (1 / 3) * 2) + self.rpe_num = 2 * self.pos_bnd + 1 + self.rpe_table = torch.nn.Parameter(torch.zeros(3 * self.rpe_num, num_heads)) + torch.nn.init.trunc_normal_(self.rpe_table, std=0.02) + + def forward(self, coord): + idx = ( + coord.clamp(-self.pos_bnd, self.pos_bnd) # clamp into bnd + + self.pos_bnd # relative position to positive index + + torch.arange(3, device=coord.device) * self.rpe_num # x, y, z stride + ) + out = self.rpe_table.index_select(0, idx.reshape(-1)) + out = out.view(idx.shape + (-1,)).sum(3) + out = out.permute(0, 3, 1, 2) # (N, K, K, H) -> (N, H, K, K) + return out + + +class SerializedAttention(PointModule): + def __init__( + self, + channels, + num_heads, + patch_size, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + order_index=0, + enable_rpe=False, + enable_flash=True, + upcast_attention=True, + upcast_softmax=True, + ): + super().__init__() + assert channels % num_heads == 0 + self.channels = channels + self.num_heads = num_heads + self.scale = qk_scale or (channels // num_heads) ** -0.5 + self.order_index = order_index + self.upcast_attention = upcast_attention + self.upcast_softmax = upcast_softmax + self.enable_rpe = enable_rpe + self.enable_flash = enable_flash + if enable_flash: + assert ( + enable_rpe is False + ), "Set enable_rpe to False when enable Flash Attention" + assert ( + upcast_attention is False + ), "Set upcast_attention to False when enable Flash Attention" + assert ( + upcast_softmax is False + ), "Set upcast_softmax to False when enable Flash Attention" + #assert flash_attn is not None, "Make sure flash_attn is installed." + self.patch_size = patch_size + self.attn_drop = attn_drop + else: + # when disable flash attention, we still don't want to use mask + # consequently, patch size will auto set to the + # min number of patch_size_max and number of points + self.patch_size_max = patch_size + self.patch_size = 0 + self.attn_drop = torch.nn.Dropout(attn_drop) + + self.qkv = torch.nn.Linear(channels, channels * 3, bias=qkv_bias) + self.proj = torch.nn.Linear(channels, channels) + self.proj_drop = torch.nn.Dropout(proj_drop) + self.softmax = torch.nn.Softmax(dim=-1) + self.rpe = RPE(patch_size, num_heads) if self.enable_rpe else None + + @torch.no_grad() + def get_rel_pos(self, point, order): + K = self.patch_size + rel_pos_key = f"rel_pos_{self.order_index}" + if rel_pos_key not in point.keys(): + grid_coord = point.grid_coord[order] + grid_coord = grid_coord.reshape(-1, K, 3) + point[rel_pos_key] = grid_coord.unsqueeze(2) - grid_coord.unsqueeze(1) + return point[rel_pos_key] + + @torch.no_grad() + def get_padding_and_inverse(self, point): + pad_key = "pad" + unpad_key = "unpad" + cu_seqlens_key = "cu_seqlens_key" + if ( + pad_key not in point.keys() + or unpad_key not in point.keys() + or cu_seqlens_key not in point.keys() + ): + offset = point.offset + bincount = offset2bincount(offset) + bincount_pad = ( + torch.div( + bincount + self.patch_size - 1, + self.patch_size, + rounding_mode="trunc", + ) + * self.patch_size + ) + # only pad point when num of points larger than patch_size + mask_pad = bincount > self.patch_size + bincount_pad = ~mask_pad * bincount + mask_pad * bincount_pad + _offset = nn.functional.pad(offset, (1, 0)) + _offset_pad = nn.functional.pad(torch.cumsum(bincount_pad, dim=0), (1, 0)) + pad = torch.arange(_offset_pad[-1], device=offset.device) + unpad = torch.arange(_offset[-1], device=offset.device) + cu_seqlens = [] + for i in range(len(offset)): + unpad[_offset[i] : _offset[i + 1]] += _offset_pad[i] - _offset[i] + if bincount[i] != bincount_pad[i]: + pad[ + _offset_pad[i + 1] + - self.patch_size + + (bincount[i] % self.patch_size) : _offset_pad[i + 1] + ] = pad[ + _offset_pad[i + 1] + - 2 * self.patch_size + + (bincount[i] % self.patch_size) : _offset_pad[i + 1] + - self.patch_size + ] + pad[_offset_pad[i] : _offset_pad[i + 1]] -= _offset_pad[i] - _offset[i] + cu_seqlens.append( + torch.arange( + _offset_pad[i], + _offset_pad[i + 1], + step=self.patch_size, + dtype=torch.int32, + device=offset.device, + ) + ) + point[pad_key] = pad + point[unpad_key] = unpad + point[cu_seqlens_key] = nn.functional.pad( + torch.concat(cu_seqlens), (0, 1), value=_offset_pad[-1] + ) + return point[pad_key], point[unpad_key], point[cu_seqlens_key] + + def forward(self, point): + if not self.enable_flash: + self.patch_size = min( + offset2bincount(point.offset).min().tolist(), self.patch_size_max + ) + + H = self.num_heads + K = self.patch_size + C = self.channels + + pad, unpad, cu_seqlens = self.get_padding_and_inverse(point) + + order = point.serialized_order[self.order_index][pad] + inverse = unpad[point.serialized_inverse[self.order_index]] + + # padding and reshape feat and batch for serialized point patch + qkv = self.qkv(point.feat)[order] + + if not self.enable_flash: + # encode and reshape qkv: (N', K, 3, H, C') => (3, N', H, K, C') + q, k, v = ( + qkv.reshape(-1, K, 3, H, C // H).permute(2, 0, 3, 1, 4).unbind(dim=0) + ) + # attn + if self.upcast_attention: + q = q.float() + k = k.float() + attn = (q * self.scale) @ k.transpose(-2, -1) # (N', H, K, K) + if self.enable_rpe: + attn = attn + self.rpe(self.get_rel_pos(point, order)) + if self.upcast_softmax: + attn = attn.float() + attn = self.softmax(attn) + attn = self.attn_drop(attn).to(qkv.dtype) + feat = (attn @ v).transpose(1, 2).reshape(-1, C) + else: + feat = flash_attn.flash_attn_varlen_qkvpacked_func( + qkv.half().reshape(-1, 3, H, C // H), + cu_seqlens, + max_seqlen=self.patch_size, + dropout_p=self.attn_drop if self.training else 0, + softmax_scale=self.scale, + ).reshape(-1, C) + feat = feat.to(qkv.dtype) + feat = feat[inverse] + + # ffn + feat = self.proj(feat) + feat = self.proj_drop(feat) + point.feat = feat + return point + + +class MLP(nn.Module): + def __init__( + self, + in_channels, + hidden_channels=None, + out_channels=None, + act_layer=nn.GELU, + drop=0.0, + ): + super().__init__() + out_channels = out_channels or in_channels + hidden_channels = hidden_channels or in_channels + self.fc1 = nn.Linear(in_channels, hidden_channels) + self.act = act_layer() + self.fc2 = nn.Linear(hidden_channels, out_channels) + self.drop = nn.Dropout(drop) + + def forward(self, x): + x = self.fc1(x) + x = self.act(x) + x = self.drop(x) + x = self.fc2(x) + x = self.drop(x) + return x + + +class Block(PointModule): + def __init__( + self, + channels, + num_heads, + patch_size=48, + mlp_ratio=4.0, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.0, + norm_layer=nn.LayerNorm, + act_layer=nn.GELU, + pre_norm=True, + order_index=0, + cpe_indice_key=None, + enable_rpe=False, + enable_flash=True, + upcast_attention=True, + upcast_softmax=True, + ): + super().__init__() + self.channels = channels + self.pre_norm = pre_norm + + self.cpe = PointSequential( + spconv.SubMConv3d( + channels, + channels, + kernel_size=3, + bias=True, + indice_key=cpe_indice_key, + ), + nn.Linear(channels, channels), + norm_layer(channels), + ) + + self.norm1 = PointSequential(norm_layer(channels)) + self.attn = SerializedAttention( + channels=channels, + patch_size=patch_size, + num_heads=num_heads, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + order_index=order_index, + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ) + self.norm2 = PointSequential(norm_layer(channels)) + self.mlp = PointSequential( + MLP( + in_channels=channels, + hidden_channels=int(channels * mlp_ratio), + out_channels=channels, + act_layer=act_layer, + drop=proj_drop, + ) + ) + self.drop_path = PointSequential( + DropPath(drop_path) if drop_path > 0.0 else nn.Identity() + ) + + def forward(self, point: Point): + shortcut = point.feat + point = self.cpe(point) + point.feat = shortcut + point.feat + shortcut = point.feat + if self.pre_norm: + point = self.norm1(point) + point = self.drop_path(self.attn(point)) + point.feat = shortcut + point.feat + if not self.pre_norm: + point = self.norm1(point) + + shortcut = point.feat + if self.pre_norm: + point = self.norm2(point) + point = self.drop_path(self.mlp(point)) + point.feat = shortcut + point.feat + if not self.pre_norm: + point = self.norm2(point) + point.sparse_conv_feat = point.sparse_conv_feat.replace_feature(point.feat) + #point.sparse_conv_feat.replace_feature(point.feat) old version + return point + + +class SerializedPooling(PointModule): + def __init__( + self, + in_channels, + out_channels, + stride=2, + norm_layer=None, + act_layer=None, + reduce="max", + shuffle_orders=True, + traceable=True, # record parent and cluster + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + + assert stride == 2 ** (math.ceil(stride) - 1).bit_length() # 2, 4, 8 + # TODO: add support to grid pool (any stride) + self.stride = stride + assert reduce in ["sum", "mean", "min", "max"] + self.reduce = reduce + self.shuffle_orders = shuffle_orders + self.traceable = traceable + + self.proj = nn.Linear(in_channels, out_channels) + if norm_layer is not None: + self.norm = PointSequential(norm_layer(out_channels)) + if act_layer is not None: + self.act = PointSequential(act_layer()) + + def forward(self, point: Point): + pooling_depth = (math.ceil(self.stride) - 1).bit_length() + if pooling_depth > point.serialized_depth: + pooling_depth = 0 + assert { + "serialized_code", + "serialized_order", + "serialized_inverse", + "serialized_depth", + }.issubset( + point.keys() + ), "Run point.serialization() point cloud before SerializedPooling" + code = point.serialized_code >> pooling_depth * 3 # if pooling depth=1, right shift 3 i.e. divide by 8 + # this is divide by 2^(pooling_depth+2) i.e. 4*stride + # this is because it's 3d, shift index by 8 means half + code_, cluster, counts = torch.unique( + code[0], + sorted=True, + return_inverse=True, + return_counts=True, + ) + # indices of point sorted by cluster, for torch_scatter.segment_csr + _, indices = torch.sort(cluster) + # index pointer for sorted point, for torch_scatter.segment_csr + idx_ptr = torch.cat([counts.new_zeros(1), torch.cumsum(counts, dim=0)]) + # head_indices of each cluster, for reduce attr e.g. code, batch + head_indices = indices[idx_ptr[:-1]] + # generate down code, order, inverse + code = code[:, head_indices] # these are the unique entries + order = torch.argsort(code) + inverse = torch.zeros_like(order).scatter_( + dim=1, + index=order, + src=torch.arange(0, code.shape[1], device=order.device).repeat( + code.shape[0], 1 + ), + ) + + if self.shuffle_orders: + perm = torch.randperm(code.shape[0]) + code = code[perm] + order = order[perm] + inverse = inverse[perm] + # coordinate is also halved - the space is sparser + # collect information + point_dict = Dict( + feat=torch_scatter.segment_csr( + self.proj(point.feat)[indices], idx_ptr, reduce=self.reduce + ), + coord=torch_scatter.segment_csr( + point.coord[indices], idx_ptr, reduce="mean" + ), + grid_coord=point.grid_coord[head_indices] >> pooling_depth, + serialized_code=code, + serialized_order=order, + serialized_inverse=inverse, + serialized_depth=point.serialized_depth - pooling_depth, + batch=point.batch[head_indices], + ) + + if "condition" in point.keys(): + point_dict["condition"] = point.condition + if "context" in point.keys(): + point_dict["context"] = point.context + + if self.traceable: + point_dict["pooling_inverse"] = cluster + point_dict["pooling_parent"] = point + point = Point(point_dict) + if self.norm is not None: + point = self.norm(point) + if self.act is not None: + point = self.act(point) + point.sparsify() + return point + + +class SerializedUnpooling(PointModule): + def __init__( + self, + in_channels, + skip_channels, + out_channels, + norm_layer=None, + act_layer=None, + traceable=False, # record parent and cluster + ): + super().__init__() + self.proj = PointSequential(nn.Linear(in_channels, out_channels)) + self.proj_skip = PointSequential(nn.Linear(skip_channels, out_channels)) + + if norm_layer is not None: + self.proj.add(norm_layer(out_channels)) + self.proj_skip.add(norm_layer(out_channels)) + + if act_layer is not None: + self.proj.add(act_layer()) + self.proj_skip.add(act_layer()) + + self.traceable = traceable + + def forward(self, point): + assert "pooling_parent" in point.keys() + assert "pooling_inverse" in point.keys() + parent = point.pop("pooling_parent") + inverse = point.pop("pooling_inverse") + point = self.proj(point) + parent = self.proj_skip(parent) + parent.feat = parent.feat + point.feat[inverse] + + if self.traceable: + parent["unpooling_parent"] = point + return parent + + +class Embedding(PointModule): + def __init__( + self, + in_channels, + embed_channels, + norm_layer=None, + act_layer=None, + ): + super().__init__() + self.in_channels = in_channels + self.embed_channels = embed_channels + + # TODO: check remove spconv + self.stem = PointSequential( + conv=spconv.SubMConv3d( + in_channels, + embed_channels, + kernel_size=5, + padding=1, + bias=False, + indice_key="stem", + ) + ) + if norm_layer is not None: + self.stem.add(norm_layer(embed_channels), name="norm") + if act_layer is not None: + self.stem.add(act_layer(), name="act") + + def forward(self, point: Point): + point = self.stem(point) + return point + + +class PointTransformerV3(PointModule): + def __init__( + self, + in_channels=6, + order=("z", "z-trans", "hilbert", "hilbert-trans"), + stride=(2, 2, 2, 2), + enc_depths=(2, 2, 2, 6, 2), + enc_channels=(32, 64, 128, 256, 512), + enc_num_head=(2, 4, 8, 16, 32), + enc_patch_size=(1024, 1024, 1024, 1024, 1024), + dec_depths=(2, 2, 2, 2), + dec_channels=(64, 64, 128, 256), + dec_num_head=(4, 4, 8, 16), + dec_patch_size=(1024, 1024, 1024, 1024), + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + attn_drop=0.0, + proj_drop=0.0, + drop_path=0.3, + pre_norm=True, + shuffle_orders=True, + enable_rpe=False, + enable_flash=False,#True, + upcast_attention=False, + upcast_softmax=False, + cls_mode=False, + pdnorm_bn=False, + pdnorm_ln=False, + pdnorm_decouple=True, + pdnorm_adaptive=False, + pdnorm_affine=True, + pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"), + ): + super().__init__() + self.num_stages = len(enc_depths) + self.order = [order] if isinstance(order, str) else order + self.cls_mode = cls_mode + self.shuffle_orders = shuffle_orders + + assert self.num_stages == len(stride) + 1 + assert self.num_stages == len(enc_depths) + assert self.num_stages == len(enc_channels) + assert self.num_stages == len(enc_num_head) + assert self.num_stages == len(enc_patch_size) + assert self.cls_mode or self.num_stages == len(dec_depths) + 1 + assert self.cls_mode or self.num_stages == len(dec_channels) + 1 + assert self.cls_mode or self.num_stages == len(dec_num_head) + 1 + assert self.cls_mode or self.num_stages == len(dec_patch_size) + 1 + + # norm layers + if pdnorm_bn: + bn_layer = partial( + PDNorm, + norm_layer=partial( + nn.BatchNorm1d, eps=1e-3, momentum=0.01, affine=pdnorm_affine + ), + conditions=pdnorm_conditions, + decouple=pdnorm_decouple, + adaptive=pdnorm_adaptive, + ) + else: + bn_layer = partial(nn.BatchNorm1d, eps=1e-3, momentum=0.01) + if pdnorm_ln: + ln_layer = partial( + PDNorm, + norm_layer=partial(nn.LayerNorm, elementwise_affine=pdnorm_affine), + conditions=pdnorm_conditions, + decouple=pdnorm_decouple, + adaptive=pdnorm_adaptive, + ) + else: + ln_layer = nn.LayerNorm + # activation layers + act_layer = nn.GELU + + self.embedding = Embedding( + in_channels=in_channels, + embed_channels=enc_channels[0], + norm_layer=bn_layer, + act_layer=act_layer, + ) + + # encoder + enc_drop_path = [ + x.item() for x in torch.linspace(0, drop_path, sum(enc_depths)) + ] + self.enc = PointSequential() + for s in range(self.num_stages): + enc_drop_path_ = enc_drop_path[ + sum(enc_depths[:s]) : sum(enc_depths[: s + 1]) + ] + enc = PointSequential() + if s > 0: + enc.add( + SerializedPooling( + in_channels=enc_channels[s - 1], + out_channels=enc_channels[s], + stride=stride[s - 1], + norm_layer=bn_layer, + act_layer=act_layer, + ), + name="down", + ) + for i in range(enc_depths[s]): + enc.add( + Block( + channels=enc_channels[s], + num_heads=enc_num_head[s], + patch_size=enc_patch_size[s], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + drop_path=enc_drop_path_[i], + norm_layer=ln_layer, + act_layer=act_layer, + pre_norm=pre_norm, + order_index=i % len(self.order), + cpe_indice_key=f"stage{s}", + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ), + name=f"block{i}", + ) + if len(enc) != 0: + self.enc.add(module=enc, name=f"enc{s}") + + # decoder + if not self.cls_mode: + dec_drop_path = [ + x.item() for x in torch.linspace(0, drop_path, sum(dec_depths)) + ] + self.dec = PointSequential() + dec_channels = list(dec_channels) + [enc_channels[-1]] + for s in reversed(range(self.num_stages - 1)): + dec_drop_path_ = dec_drop_path[ + sum(dec_depths[:s]) : sum(dec_depths[: s + 1]) + ] + dec_drop_path_.reverse() + dec = PointSequential() + dec.add( + SerializedUnpooling( + in_channels=dec_channels[s + 1], + skip_channels=enc_channels[s], + out_channels=dec_channels[s], + norm_layer=bn_layer, + act_layer=act_layer, + ), + name="up", + ) + for i in range(dec_depths[s]): + dec.add( + Block( + channels=dec_channels[s], + num_heads=dec_num_head[s], + patch_size=dec_patch_size[s], + mlp_ratio=mlp_ratio, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop=attn_drop, + proj_drop=proj_drop, + drop_path=dec_drop_path_[i], + norm_layer=ln_layer, + act_layer=act_layer, + pre_norm=pre_norm, + order_index=i % len(self.order), + cpe_indice_key=f"stage{s}", + enable_rpe=enable_rpe, + enable_flash=enable_flash, + upcast_attention=upcast_attention, + upcast_softmax=upcast_softmax, + ), + name=f"block{i}", + ) + self.dec.add(module=dec, name=f"dec{s}") + + def forward(self, data_dict): + """ + A data_dict is a dictionary containing properties of a batched point cloud. + It should contain the following properties for PTv3: + 1. "feat": feature of point cloud + 2. "grid_coord": discrete coordinate after grid sampling (voxelization) or "coord" + "grid_size" + 3. "offset" or "batch": https://github.com/Pointcept/Pointcept?tab=readme-ov-file#offset + """ + point = Point(data_dict) + point.serialization(order=self.order, shuffle_orders=self.shuffle_orders) + point.sparsify() + point = self.embedding(point) + point = self.enc(point) #23,512 + if not self.cls_mode: + point = self.dec(point) #n_pts, 64 + return point + + +class PointSemSeg(nn.Module): + def __init__(self, args, dim_output, emb=64, init_logit_scale=np.log(1 / 0.07)): + super().__init__() + + self.dim_output = dim_output + + # define the extractor + self.extractor = PointTransformerV3() # this outputs a 64-dim feature per point + + # define logit scale + self.ln_logit_scale = nn.Parameter(torch.ones([]) * init_logit_scale) + + self.fc1 = nn.Linear(emb, emb) + self.fc2 = nn.Linear(emb, emb) + self.fc3 = nn.Linear(emb, emb) + self.fc4 = nn.Linear(emb, dim_output) + + def distillation_head(self, x): + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = F.relu(self.fc3(x)) + x = self.fc4(x) + return x + + def freeze_extractor(self): + for param in self.extractor.parameters(): + param.requires_grad = False + + def forward(self, x, return_pts_feat=False): + pointall = self.extractor(x) + feature = pointall["feat"] #[n_pts_cur_batch, 64] + + x = self.distillation_head(feature) #[n_pts_cur_batch, dim_out] + + if return_pts_feat: + return x, feature + else: + return x + + +class Find3D(nn.Module, PyTorchModelHubMixin): + def __init__(self, dim_output, emb=64, init_logit_scale=np.log(1 / 0.07)): + super().__init__() + + self.dim_output = dim_output + + # define the extractor + self.extractor = PointTransformerV3() # this outputs a 64-dim feature per point + + # define logit scale + self.ln_logit_scale = nn.Parameter(torch.ones([]) * init_logit_scale) + + self.fc1 = nn.Linear(emb, emb) + self.fc2 = nn.Linear(emb, emb) + self.fc3 = nn.Linear(emb, emb) + self.fc4 = nn.Linear(emb, dim_output) + + def distillation_head(self, x): + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = F.relu(self.fc3(x)) + x = self.fc4(x) + return x + + def freeze_extractor(self): + for param in self.extractor.parameters(): + param.requires_grad = False + + def forward(self, x, return_pts_feat=False): + pointall = self.extractor(x) + feature = pointall["feat"] #[n_pts_cur_batch, 64] + + x = self.distillation_head(feature) #[n_pts_cur_batch, dim_out] + + if return_pts_feat: + return x, feature + else: + return x \ No newline at end of file diff --git a/model/serialization/__init__.py b/model/serialization/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..86159d07ca3405e01de235dc8bf921a2752b8ce8 --- /dev/null +++ b/model/serialization/__init__.py @@ -0,0 +1,8 @@ +from .default import ( + encode, + decode, + z_order_encode, + z_order_decode, + hilbert_encode, + hilbert_decode, +) diff --git a/model/serialization/default.py b/model/serialization/default.py new file mode 100644 index 0000000000000000000000000000000000000000..ea76c88e2eef3308e700230bc11ae3cab77c43e9 --- /dev/null +++ b/model/serialization/default.py @@ -0,0 +1,59 @@ +import torch +from .z_order import xyz2key as z_order_encode_ +from .z_order import key2xyz as z_order_decode_ +from .hilbert import encode as hilbert_encode_ +from .hilbert import decode as hilbert_decode_ + + +@torch.inference_mode() +def encode(grid_coord, batch=None, depth=16, order="z"): + assert order in {"z", "z-trans", "hilbert", "hilbert-trans"} + if order == "z": + code = z_order_encode(grid_coord, depth=depth) + elif order == "z-trans": + code = z_order_encode(grid_coord[:, [1, 0, 2]], depth=depth) + elif order == "hilbert": + code = hilbert_encode(grid_coord, depth=depth) + elif order == "hilbert-trans": + code = hilbert_encode(grid_coord[:, [1, 0, 2]], depth=depth) + else: + raise NotImplementedError + if batch is not None: + batch = batch.long() + code = batch << depth * 3 | code + return code + + +@torch.inference_mode() +def decode(code, depth=16, order="z"): + assert order in {"z", "hilbert"} + batch = code >> depth * 3 + code = code & ((1 << depth * 3) - 1) + if order == "z": + grid_coord = z_order_decode(code, depth=depth) + elif order == "hilbert": + grid_coord = hilbert_decode(code, depth=depth) + else: + raise NotImplementedError + return grid_coord, batch + + +def z_order_encode(grid_coord: torch.Tensor, depth: int = 16): + x, y, z = grid_coord[:, 0].long(), grid_coord[:, 1].long(), grid_coord[:, 2].long() + # we block the support to batch, maintain batched code in Point class + code = z_order_encode_(x, y, z, b=None, depth=depth) + return code + + +def z_order_decode(code: torch.Tensor, depth): + x, y, z = z_order_decode_(code, depth=depth) + grid_coord = torch.stack([x, y, z], dim=-1) # (N, 3) + return grid_coord + + +def hilbert_encode(grid_coord: torch.Tensor, depth: int = 16): + return hilbert_encode_(grid_coord, num_dims=3, num_bits=depth) + + +def hilbert_decode(code: torch.Tensor, depth: int = 16): + return hilbert_decode_(code, num_dims=3, num_bits=depth) diff --git a/model/serialization/hilbert.py b/model/serialization/hilbert.py new file mode 100644 index 0000000000000000000000000000000000000000..682be19e296beaa26448f9485d0a226e1adc9f0b --- /dev/null +++ b/model/serialization/hilbert.py @@ -0,0 +1,303 @@ +""" +Hilbert Order +Modified from https://github.com/PrincetonLIPS/numpy-hilbert-curve + +Author: Xiaoyang Wu (xiaoyang.wu.cs@gmail.com), Kaixin Xu +Please cite our work if the code is helpful to you. +""" + +import torch + + +def right_shift(binary, k=1, axis=-1): + """Right shift an array of binary values. + + Parameters: + ----------- + binary: An ndarray of binary values. + + k: The number of bits to shift. Default 1. + + axis: The axis along which to shift. Default -1. + + Returns: + -------- + Returns an ndarray with zero prepended and the ends truncated, along + whatever axis was specified.""" + + # If we're shifting the whole thing, just return zeros. + if binary.shape[axis] <= k: + return torch.zeros_like(binary) + + # Determine the padding pattern. + # padding = [(0,0)] * len(binary.shape) + # padding[axis] = (k,0) + + # Determine the slicing pattern to eliminate just the last one. + slicing = [slice(None)] * len(binary.shape) + slicing[axis] = slice(None, -k) + shifted = torch.nn.functional.pad( + binary[tuple(slicing)], (k, 0), mode="constant", value=0 + ) + + return shifted + + +def binary2gray(binary, axis=-1): + """Convert an array of binary values into Gray codes. + + This uses the classic X ^ (X >> 1) trick to compute the Gray code. + + Parameters: + ----------- + binary: An ndarray of binary values. + + axis: The axis along which to compute the gray code. Default=-1. + + Returns: + -------- + Returns an ndarray of Gray codes. + """ + shifted = right_shift(binary, axis=axis) + + # Do the X ^ (X >> 1) trick. + gray = torch.logical_xor(binary, shifted) + + return gray + + +def gray2binary(gray, axis=-1): + """Convert an array of Gray codes back into binary values. + + Parameters: + ----------- + gray: An ndarray of gray codes. + + axis: The axis along which to perform Gray decoding. Default=-1. + + Returns: + -------- + Returns an ndarray of binary values. + """ + + # Loop the log2(bits) number of times necessary, with shift and xor. + shift = 2 ** (torch.Tensor([gray.shape[axis]]).log2().ceil().int() - 1) + while shift > 0: + gray = torch.logical_xor(gray, right_shift(gray, shift)) + shift = torch.div(shift, 2, rounding_mode="floor") + return gray + + +def encode(locs, num_dims, num_bits): + """Decode an array of locations in a hypercube into a Hilbert integer. + + This is a vectorized-ish version of the Hilbert curve implementation by John + Skilling as described in: + + Skilling, J. (2004, April). Programming the Hilbert curve. In AIP Conference + Proceedings (Vol. 707, No. 1, pp. 381-387). American Institute of Physics. + + Params: + ------- + locs - An ndarray of locations in a hypercube of num_dims dimensions, in + which each dimension runs from 0 to 2**num_bits-1. The shape can + be arbitrary, as long as the last dimension of the same has size + num_dims. + + num_dims - The dimensionality of the hypercube. Integer. + + num_bits - The number of bits for each dimension. Integer. + + Returns: + -------- + The output is an ndarray of uint64 integers with the same shape as the + input, excluding the last dimension, which needs to be num_dims. + """ + + # Keep around the original shape for later. + orig_shape = locs.shape + bitpack_mask = 1 << torch.arange(0, 8).to(locs.device) + bitpack_mask_rev = bitpack_mask.flip(-1) + + if orig_shape[-1] != num_dims: + raise ValueError( + """ + The shape of locs was surprising in that the last dimension was of size + %d, but num_dims=%d. These need to be equal. + """ + % (orig_shape[-1], num_dims) + ) + + if num_dims * num_bits > 63: + raise ValueError( + """ + num_dims=%d and num_bits=%d for %d bits total, which can't be encoded + into a int64. Are you sure you need that many points on your Hilbert + curve? + """ + % (num_dims, num_bits, num_dims * num_bits) + ) + + # Treat the location integers as 64-bit unsigned and then split them up into + # a sequence of uint8s. Preserve the association by dimension. + locs_uint8 = locs.long().view(torch.uint8).reshape((-1, num_dims, 8)).flip(-1) + + # Now turn these into bits and truncate to num_bits. + gray = ( + locs_uint8.unsqueeze(-1) + .bitwise_and(bitpack_mask_rev) + .ne(0) + .byte() + .flatten(-2, -1)[..., -num_bits:] + ) + + # Run the decoding process the other way. + # Iterate forwards through the bits. + for bit in range(0, num_bits): + # Iterate forwards through the dimensions. + for dim in range(0, num_dims): + # Identify which ones have this bit active. + mask = gray[:, dim, bit] + + # Where this bit is on, invert the 0 dimension for lower bits. + gray[:, 0, bit + 1 :] = torch.logical_xor( + gray[:, 0, bit + 1 :], mask[:, None] + ) + + # Where the bit is off, exchange the lower bits with the 0 dimension. + to_flip = torch.logical_and( + torch.logical_not(mask[:, None]).repeat(1, gray.shape[2] - bit - 1), + torch.logical_xor(gray[:, 0, bit + 1 :], gray[:, dim, bit + 1 :]), + ) + gray[:, dim, bit + 1 :] = torch.logical_xor( + gray[:, dim, bit + 1 :], to_flip + ) + gray[:, 0, bit + 1 :] = torch.logical_xor(gray[:, 0, bit + 1 :], to_flip) + + # Now flatten out. + gray = gray.swapaxes(1, 2).reshape((-1, num_bits * num_dims)) + + # Convert Gray back to binary. + hh_bin = gray2binary(gray) + + # Pad back out to 64 bits. + extra_dims = 64 - num_bits * num_dims + padded = torch.nn.functional.pad(hh_bin, (extra_dims, 0), "constant", 0) + + # Convert binary values into uint8s. + hh_uint8 = ( + (padded.flip(-1).reshape((-1, 8, 8)) * bitpack_mask) + .sum(2) + .squeeze() + .type(torch.uint8) + ) + + # Convert uint8s into uint64s. + hh_uint64 = hh_uint8.view(torch.int64).squeeze() + + return hh_uint64 + + +def decode(hilberts, num_dims, num_bits): + """Decode an array of Hilbert integers into locations in a hypercube. + + This is a vectorized-ish version of the Hilbert curve implementation by John + Skilling as described in: + + Skilling, J. (2004, April). Programming the Hilbert curve. In AIP Conference + Proceedings (Vol. 707, No. 1, pp. 381-387). American Institute of Physics. + + Params: + ------- + hilberts - An ndarray of Hilbert integers. Must be an integer dtype and + cannot have fewer bits than num_dims * num_bits. + + num_dims - The dimensionality of the hypercube. Integer. + + num_bits - The number of bits for each dimension. Integer. + + Returns: + -------- + The output is an ndarray of unsigned integers with the same shape as hilberts + but with an additional dimension of size num_dims. + """ + + if num_dims * num_bits > 64: + raise ValueError( + """ + num_dims=%d and num_bits=%d for %d bits total, which can't be encoded + into a uint64. Are you sure you need that many points on your Hilbert + curve? + """ + % (num_dims, num_bits) + ) + + # Handle the case where we got handed a naked integer. + hilberts = torch.atleast_1d(hilberts) + + # Keep around the shape for later. + orig_shape = hilberts.shape + bitpack_mask = 2 ** torch.arange(0, 8).to(hilberts.device) + bitpack_mask_rev = bitpack_mask.flip(-1) + + # Treat each of the hilberts as a s equence of eight uint8. + # This treats all of the inputs as uint64 and makes things uniform. + hh_uint8 = ( + hilberts.ravel().type(torch.int64).view(torch.uint8).reshape((-1, 8)).flip(-1) + ) + + # Turn these lists of uints into lists of bits and then truncate to the size + # we actually need for using Skilling's procedure. + hh_bits = ( + hh_uint8.unsqueeze(-1) + .bitwise_and(bitpack_mask_rev) + .ne(0) + .byte() + .flatten(-2, -1)[:, -num_dims * num_bits :] + ) + + # Take the sequence of bits and Gray-code it. + gray = binary2gray(hh_bits) + + # There has got to be a better way to do this. + # I could index them differently, but the eventual packbits likes it this way. + gray = gray.reshape((-1, num_bits, num_dims)).swapaxes(1, 2) + + # Iterate backwards through the bits. + for bit in range(num_bits - 1, -1, -1): + # Iterate backwards through the dimensions. + for dim in range(num_dims - 1, -1, -1): + # Identify which ones have this bit active. + mask = gray[:, dim, bit] + + # Where this bit is on, invert the 0 dimension for lower bits. + gray[:, 0, bit + 1 :] = torch.logical_xor( + gray[:, 0, bit + 1 :], mask[:, None] + ) + + # Where the bit is off, exchange the lower bits with the 0 dimension. + to_flip = torch.logical_and( + torch.logical_not(mask[:, None]), + torch.logical_xor(gray[:, 0, bit + 1 :], gray[:, dim, bit + 1 :]), + ) + gray[:, dim, bit + 1 :] = torch.logical_xor( + gray[:, dim, bit + 1 :], to_flip + ) + gray[:, 0, bit + 1 :] = torch.logical_xor(gray[:, 0, bit + 1 :], to_flip) + + # Pad back out to 64 bits. + extra_dims = 64 - num_bits + padded = torch.nn.functional.pad(gray, (extra_dims, 0), "constant", 0) + + # Now chop these up into blocks of 8. + locs_chopped = padded.flip(-1).reshape((-1, num_dims, 8, 8)) + + # Take those blocks and turn them unto uint8s. + # from IPython import embed; embed() + locs_uint8 = (locs_chopped * bitpack_mask).sum(3).squeeze().type(torch.uint8) + + # Finally, treat these as uint64s. + flat_locs = locs_uint8.view(torch.int64) + + # Return them in the expected shape. + return flat_locs.reshape((*orig_shape, num_dims)) diff --git a/model/serialization/z_order.py b/model/serialization/z_order.py new file mode 100644 index 0000000000000000000000000000000000000000..2fa498fafc371365c5462f678adf35f88cae2c8b --- /dev/null +++ b/model/serialization/z_order.py @@ -0,0 +1,126 @@ +# -------------------------------------------------------- +# Octree-based Sparse Convolutional Neural Networks +# Copyright (c) 2022 Peng-Shuai Wang +# Licensed under The MIT License [see LICENSE for details] +# Written by Peng-Shuai Wang +# -------------------------------------------------------- + +import torch +from typing import Optional, Union + + +class KeyLUT: + def __init__(self): + r256 = torch.arange(256, dtype=torch.int64) + r512 = torch.arange(512, dtype=torch.int64) + zero = torch.zeros(256, dtype=torch.int64) + device = torch.device("cpu") + + self._encode = { + device: ( + self.xyz2key(r256, zero, zero, 8), + self.xyz2key(zero, r256, zero, 8), + self.xyz2key(zero, zero, r256, 8), + ) + } + self._decode = {device: self.key2xyz(r512, 9)} + + def encode_lut(self, device=torch.device("cpu")): + if device not in self._encode: + cpu = torch.device("cpu") + self._encode[device] = tuple(e.to(device) for e in self._encode[cpu]) + return self._encode[device] + + def decode_lut(self, device=torch.device("cpu")): + if device not in self._decode: + cpu = torch.device("cpu") + self._decode[device] = tuple(e.to(device) for e in self._decode[cpu]) + return self._decode[device] + + def xyz2key(self, x, y, z, depth): + key = torch.zeros_like(x) + for i in range(depth): + mask = 1 << i + key = ( + key + | ((x & mask) << (2 * i + 2)) + | ((y & mask) << (2 * i + 1)) + | ((z & mask) << (2 * i + 0)) + ) + return key + + def key2xyz(self, key, depth): + x = torch.zeros_like(key) + y = torch.zeros_like(key) + z = torch.zeros_like(key) + for i in range(depth): + x = x | ((key & (1 << (3 * i + 2))) >> (2 * i + 2)) + y = y | ((key & (1 << (3 * i + 1))) >> (2 * i + 1)) + z = z | ((key & (1 << (3 * i + 0))) >> (2 * i + 0)) + return x, y, z + + +_key_lut = KeyLUT() + + +def xyz2key( + x: torch.Tensor, + y: torch.Tensor, + z: torch.Tensor, + b: Optional[Union[torch.Tensor, int]] = None, + depth: int = 16, +): + r"""Encodes :attr:`x`, :attr:`y`, :attr:`z` coordinates to the shuffled keys + based on pre-computed look up tables. The speed of this function is much + faster than the method based on for-loop. + + Args: + x (torch.Tensor): The x coordinate. + y (torch.Tensor): The y coordinate. + z (torch.Tensor): The z coordinate. + b (torch.Tensor or int): The batch index of the coordinates, and should be + smaller than 32768. If :attr:`b` is :obj:`torch.Tensor`, the size of + :attr:`b` must be the same as :attr:`x`, :attr:`y`, and :attr:`z`. + depth (int): The depth of the shuffled key, and must be smaller than 17 (< 17). + """ + + EX, EY, EZ = _key_lut.encode_lut(x.device) + x, y, z = x.long(), y.long(), z.long() + + mask = 255 if depth > 8 else (1 << depth) - 1 + key = EX[x & mask] | EY[y & mask] | EZ[z & mask] + if depth > 8: + mask = (1 << (depth - 8)) - 1 + key16 = EX[(x >> 8) & mask] | EY[(y >> 8) & mask] | EZ[(z >> 8) & mask] + key = key16 << 24 | key + + if b is not None: + b = b.long() + key = b << 48 | key + + return key + + +def key2xyz(key: torch.Tensor, depth: int = 16): + r"""Decodes the shuffled key to :attr:`x`, :attr:`y`, :attr:`z` coordinates + and the batch index based on pre-computed look up tables. + + Args: + key (torch.Tensor): The shuffled key. + depth (int): The depth of the shuffled key, and must be smaller than 17 (< 17). + """ + + DX, DY, DZ = _key_lut.decode_lut(key.device) + x, y, z = torch.zeros_like(key), torch.zeros_like(key), torch.zeros_like(key) + + b = key >> 48 + key = key & ((1 << 48) - 1) + + n = (depth + 2) // 3 + for i in range(n): + k = key >> (i * 9) & 511 + x = x | (DX[k] << (i * 3)) + y = y | (DY[k] << (i * 3)) + z = z | (DZ[k] << (i * 3)) + + return x, y, z, b diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..802cf128ec288d3a89ae4860fdc2f538562af7cb --- /dev/null +++ b/requirements.txt @@ -0,0 +1,32 @@ +--extra-index-url https://download.pytorch.org/whl/cu118 +torch==2.0.0 +torchvision==0.15.0 +torchmetrics==0.10.3 +--find-links https://data.pyg.org/whl/torch-2.0.0+cu118.html +torch-cluster +torch-scatter +torch-sparse +torch-geometric +plotly==5.24.1 +omegaconf +fvcore +iopath +xformers==0.0.18 +h5py +pyyaml +sharedarray +tensorboard +tensorboardx +yapf +addict +einops +scipy +plyfile +termcolor +timm +spconv +transformers +open3d +sentencepiece +numpy==1.24.1 +gradio==5.5.0 \ No newline at end of file diff --git a/utils.py b/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..44ea034e6d1f3a9d31a1fd737d5e716a499fc1cc --- /dev/null +++ b/utils.py @@ -0,0 +1,97 @@ +import plotly.graph_objects as go +import open3d as o3d +import numpy as np +import textwrap + +def read_pcd(pcd_path): + pcd = o3d.io.read_point_cloud(pcd_path) + xyz = np.asarray(pcd.points) + rgb = np.asarray(pcd.colors) + if not pcd.has_normals(): + pcd.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(radius=0.1, max_nn=100)) + normal = np.asarray(pcd.normals) + return xyz, rgb, normal + +def render_pcd_file(pcd_path): + pcd = o3d.io.read_point_cloud(pcd_path) + xyz = np.asarray(pcd.points) + rgb = np.asarray(pcd.colors) + return render_point_cloud(xyz, rgb) + +def render_point_cloud(xyz, rgb, legend=None): + x = xyz[:, 0] + y = xyz[:, 1] + z = xyz[:, 2] + rgb = rgb * 255 + hex_colors = [f'#{int(r):02x}{int(g):02x}{int(b):02x}' for r, g, b in rgb] + + fig = go.Figure(data=[go.Scatter3d(x=x, y=y, z=z, mode='markers', + marker=dict(size=2, color=hex_colors, colorscale='Viridis', opacity=0.8))]) + + if legend: + fig.add_annotation(x=0.5, y=1.25, text="
".join(textwrap.wrap(legend, width=30)), showarrow=False, xref="paper", yref="paper") + + # Customize layout + fig.update_layout( + scene=dict( + xaxis=dict(title='x', showgrid=False, zeroline=False, visible=False), + yaxis=dict(title='y', showgrid=False, zeroline=False, visible=False), + zaxis=dict(title='z', showgrid=False, zeroline=False, visible=False), + aspectmode='manual'), + scene_camera=dict( + up=dict(x=0, y=1, z=0), # Adjust these values for your point cloud + eye=dict(x=0, y=-0.9, z=2), # Increase the values to move further away + center = dict(x=0,y=0,z=0) + ) + ) + # fig.update_layout( + # height=450, + # autosize=True, + # hovermode=False, + # margin=go.layout.Margin(l=0, r=0, b=0, t=0), + # showlegend=False, + # legend=dict( + # yanchor='bottom', + # y=0.01, + # xanchor='right', + # x=0.99, + # ), + # scene=dict( + # aspectmode='manual', + # aspectratio=dict(x=1, y=1, z=1.0), + # camera=dict( + # eye=dict(x=base_radius - 1.6, y=0.0, z=0.6), + # center=dict(x=0.0, y=0.0, z=0.0), + # up=dict(x=0.0, y=0.0, z=1.0)), + # xaxis_title='', + # yaxis_title='', + # zaxis_title='', + # xaxis=dict( + # range=[-scene_bounds, scene_bounds], + # showticklabels=False, + # showgrid=True, + # zeroline=False, + # showbackground=True, + # showspikes=False, + # showline=False, + # ticks=''), + # yaxis=dict( + # range=[-scene_bounds, scene_bounds], + # showticklabels=False, + # showgrid=True, + # zeroline=False, + # showbackground=True, + # showspikes=False, + # showline=False, + # ticks=''), + # zaxis=dict( + # range=[-scene_bounds, scene_bounds], + # showticklabels=False, + # showgrid=True, + # zeroline=False, + # showbackground=True, + # showspikes=False, + # showline=False, + # ticks=''))) + + return fig \ No newline at end of file