Spaces:
Runtime error
Runtime error
lkeab
commited on
Commit
·
a13a033
1
Parent(s):
cbe011a
update
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +5 -5
- app.py +84 -0
- configs/Base-RCNN-C4.yaml +18 -0
- configs/Base-RCNN-DilatedC5.yaml +31 -0
- configs/Base-RCNN-FPN.yaml +43 -0
- configs/Base-RetinaNet.yaml +25 -0
- configs/Cityscapes/mask_rcnn_R_50_FPN.yaml +27 -0
- configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml +27 -0
- configs/Detectron1-Comparisons/README.md +84 -0
- configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml +17 -0
- configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml +27 -0
- configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml +20 -0
- configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml +19 -0
- configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml +19 -0
- configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml +19 -0
- configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml +23 -0
- configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml +22 -0
- configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml +22 -0
- configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml +26 -0
- configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml +12 -0
- configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml +15 -0
- configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml +36 -0
- configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml +10 -0
- configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml +8 -0
- configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml +11 -0
- configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml +11 -0
- configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml +21 -0
- configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml +24 -0
- configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py +151 -0
- configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml +26 -0
- configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml +13 -0
- configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml +19 -0
- configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml +19 -0
- configs/Misc/semantic_R_50_FPN_1x.yaml +11 -0
- configs/Misc/torchvision_imagenet_R_50.py +150 -0
- configs/common/README.md +6 -0
- configs/common/coco_schedule.py +47 -0
- configs/common/data/coco.py +48 -0
- configs/common/data/coco_keypoint.py +13 -0
- configs/common/data/coco_panoptic_separated.py +26 -0
- configs/common/models/cascade_rcnn.py +36 -0
- configs/common/models/keypoint_rcnn_fpn.py +33 -0
- configs/common/models/mask_rcnn_c4.py +88 -0
- configs/common/models/mask_rcnn_fpn.py +93 -0
- configs/common/models/panoptic_fpn.py +20 -0
- configs/common/models/retinanet.py +52 -0
- configs/common/optim.py +15 -0
- configs/common/train.py +18 -0
- configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py +9 -0
- configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py +14 -0
README.md
CHANGED
@@ -1,13 +1,13 @@
|
|
1 |
---
|
2 |
title: Transfiner
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version:
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces
|
|
|
1 |
---
|
2 |
title: Transfiner
|
3 |
+
emoji: 📊
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: green
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 2.9.3
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
|
13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
app.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#try:
|
2 |
+
# import detectron2
|
3 |
+
#except:
|
4 |
+
import os
|
5 |
+
os.system('pip install git+https://github.com/SysCV/transfiner.git')
|
6 |
+
|
7 |
+
from matplotlib.pyplot import axis
|
8 |
+
import gradio as gr
|
9 |
+
import requests
|
10 |
+
import numpy as np
|
11 |
+
from torch import nn
|
12 |
+
import requests
|
13 |
+
|
14 |
+
import torch
|
15 |
+
|
16 |
+
from detectron2 import model_zoo
|
17 |
+
from detectron2.engine import DefaultPredictor
|
18 |
+
from detectron2.config import get_cfg
|
19 |
+
from detectron2.utils.visualizer import Visualizer
|
20 |
+
from detectron2.data import MetadataCatalog
|
21 |
+
|
22 |
+
|
23 |
+
model_name='./configs/transfiner/mask_rcnn_R_101_FPN_3x_deform.yaml'
|
24 |
+
|
25 |
+
|
26 |
+
cfg = get_cfg()
|
27 |
+
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
|
28 |
+
cfg.merge_from_file(model_name)
|
29 |
+
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model
|
30 |
+
cfg.VIS_PERIOD = 100
|
31 |
+
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as w ell
|
32 |
+
#cfg.MODEL.WEIGHTS = './output_3x_transfiner_r50.pth'
|
33 |
+
cfg.MODEL.WEIGHTS = './output_3x_transfiner_r101_deform.pth'
|
34 |
+
|
35 |
+
if not torch.cuda.is_available():
|
36 |
+
cfg.MODEL.DEVICE='cpu'
|
37 |
+
|
38 |
+
predictor = DefaultPredictor(cfg)
|
39 |
+
|
40 |
+
|
41 |
+
def inference(image):
|
42 |
+
width, height = image.size
|
43 |
+
if width > 1300:
|
44 |
+
ratio = float(height) / float(width)
|
45 |
+
width = 1300
|
46 |
+
height = int(ratio * width)
|
47 |
+
image = image.resize((width, height))
|
48 |
+
|
49 |
+
img = np.asarray(image)
|
50 |
+
|
51 |
+
#img = np.array(image)
|
52 |
+
outputs = predictor(img)
|
53 |
+
|
54 |
+
v = Visualizer(img, MetadataCatalog.get(cfg.DATASETS.TRAIN[0]))
|
55 |
+
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
|
56 |
+
|
57 |
+
return out.get_image()
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
title = "Mask Transfiner [CVPR, 2022]"
|
62 |
+
description = "Demo for <a target='_blank' href='https://arxiv.org/abs/2111.13673'>Mask Transfiner for High-Quality Instance Segmentation, CVPR 2022</a> based on R50-FPN. To use it, simply upload your image, or click one of the examples to load them. Note that it runs in the <b>CPU environment</b> provided by Hugging Face so the processing speed may be slow."
|
63 |
+
article = "<p style='text-align: center'><a target='_blank' href='https://arxiv.org/abs/2111.13673'>Mask Transfiner for High-Quality Instance Segmentation, CVPR 2022</a> | <a target='_blank' href='https://github.com/SysCV/transfiner'>Mask Transfiner Github Code</a></p>"
|
64 |
+
|
65 |
+
gr.Interface(
|
66 |
+
inference,
|
67 |
+
[gr.inputs.Image(type="pil", label="Input")],
|
68 |
+
gr.outputs.Image(type="numpy", label="Output"),
|
69 |
+
title=title,
|
70 |
+
description=description,
|
71 |
+
article=article,
|
72 |
+
examples=[
|
73 |
+
["demo/sample_imgs/000000131444.jpg"],
|
74 |
+
["demo/sample_imgs/000000157365.jpg"],
|
75 |
+
["demo/sample_imgs/000000176037.jpg"],
|
76 |
+
["demo/sample_imgs/000000018737.jpg"],
|
77 |
+
["demo/sample_imgs/000000224200.jpg"],
|
78 |
+
["demo/sample_imgs/000000558073.jpg"],
|
79 |
+
["demo/sample_imgs/000000404922.jpg"],
|
80 |
+
["demo/sample_imgs/000000252776.jpg"],
|
81 |
+
["demo/sample_imgs/000000482477.jpg"],
|
82 |
+
["demo/sample_imgs/000000344909.jpg"]
|
83 |
+
]).launch()
|
84 |
+
|
configs/Base-RCNN-C4.yaml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MODEL:
|
2 |
+
META_ARCHITECTURE: "GeneralizedRCNN"
|
3 |
+
RPN:
|
4 |
+
PRE_NMS_TOPK_TEST: 6000
|
5 |
+
POST_NMS_TOPK_TEST: 1000
|
6 |
+
ROI_HEADS:
|
7 |
+
NAME: "Res5ROIHeads"
|
8 |
+
DATASETS:
|
9 |
+
TRAIN: ("coco_2017_train",)
|
10 |
+
TEST: ("coco_2017_val",)
|
11 |
+
SOLVER:
|
12 |
+
IMS_PER_BATCH: 16
|
13 |
+
BASE_LR: 0.02
|
14 |
+
STEPS: (60000, 80000)
|
15 |
+
MAX_ITER: 90000
|
16 |
+
INPUT:
|
17 |
+
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
18 |
+
VERSION: 2
|
configs/Base-RCNN-DilatedC5.yaml
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MODEL:
|
2 |
+
META_ARCHITECTURE: "GeneralizedRCNN"
|
3 |
+
RESNETS:
|
4 |
+
OUT_FEATURES: ["res5"]
|
5 |
+
RES5_DILATION: 2
|
6 |
+
RPN:
|
7 |
+
IN_FEATURES: ["res5"]
|
8 |
+
PRE_NMS_TOPK_TEST: 6000
|
9 |
+
POST_NMS_TOPK_TEST: 1000
|
10 |
+
ROI_HEADS:
|
11 |
+
NAME: "StandardROIHeads"
|
12 |
+
IN_FEATURES: ["res5"]
|
13 |
+
ROI_BOX_HEAD:
|
14 |
+
NAME: "FastRCNNConvFCHead"
|
15 |
+
NUM_FC: 2
|
16 |
+
POOLER_RESOLUTION: 7
|
17 |
+
ROI_MASK_HEAD:
|
18 |
+
NAME: "MaskRCNNConvUpsampleHead"
|
19 |
+
NUM_CONV: 4
|
20 |
+
POOLER_RESOLUTION: 14
|
21 |
+
DATASETS:
|
22 |
+
TRAIN: ("coco_2017_train",)
|
23 |
+
TEST: ("coco_2017_val",)
|
24 |
+
SOLVER:
|
25 |
+
IMS_PER_BATCH: 16
|
26 |
+
BASE_LR: 0.02
|
27 |
+
STEPS: (60000, 80000)
|
28 |
+
MAX_ITER: 90000
|
29 |
+
INPUT:
|
30 |
+
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
31 |
+
VERSION: 2
|
configs/Base-RCNN-FPN.yaml
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MODEL:
|
2 |
+
META_ARCHITECTURE: "GeneralizedRCNN"
|
3 |
+
BACKBONE:
|
4 |
+
NAME: "build_resnet_fpn_backbone"
|
5 |
+
RESNETS:
|
6 |
+
OUT_FEATURES: ["res2", "res3", "res4", "res5"]
|
7 |
+
FPN:
|
8 |
+
IN_FEATURES: ["res2", "res3", "res4", "res5"]
|
9 |
+
ANCHOR_GENERATOR:
|
10 |
+
SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
|
11 |
+
ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
|
12 |
+
RPN:
|
13 |
+
IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
|
14 |
+
PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
|
15 |
+
PRE_NMS_TOPK_TEST: 1000 # Per FPN level
|
16 |
+
# Detectron1 uses 2000 proposals per-batch,
|
17 |
+
# (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
|
18 |
+
# which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
|
19 |
+
POST_NMS_TOPK_TRAIN: 1000
|
20 |
+
POST_NMS_TOPK_TEST: 1000
|
21 |
+
ROI_HEADS:
|
22 |
+
NAME: "StandardROIHeads"
|
23 |
+
IN_FEATURES: ["p2", "p3", "p4", "p5"]
|
24 |
+
ROI_BOX_HEAD:
|
25 |
+
NAME: "FastRCNNConvFCHead"
|
26 |
+
NUM_FC: 2
|
27 |
+
POOLER_RESOLUTION: 7
|
28 |
+
ROI_MASK_HEAD:
|
29 |
+
NAME: "MaskRCNNConvUpsampleHead"
|
30 |
+
NUM_CONV: 4
|
31 |
+
POOLER_RESOLUTION: 14
|
32 |
+
DATASETS:
|
33 |
+
TRAIN: ("coco_2017_train",)
|
34 |
+
#TEST: ("coco_2017_val",)
|
35 |
+
TEST: ("coco_2017_test-dev",)
|
36 |
+
SOLVER:
|
37 |
+
IMS_PER_BATCH: 16 #16
|
38 |
+
BASE_LR: 0.02
|
39 |
+
STEPS: (60000, 80000)
|
40 |
+
MAX_ITER: 90000
|
41 |
+
INPUT:
|
42 |
+
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
43 |
+
VERSION: 2
|
configs/Base-RetinaNet.yaml
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MODEL:
|
2 |
+
META_ARCHITECTURE: "RetinaNet"
|
3 |
+
BACKBONE:
|
4 |
+
NAME: "build_retinanet_resnet_fpn_backbone"
|
5 |
+
RESNETS:
|
6 |
+
OUT_FEATURES: ["res3", "res4", "res5"]
|
7 |
+
ANCHOR_GENERATOR:
|
8 |
+
SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
|
9 |
+
FPN:
|
10 |
+
IN_FEATURES: ["res3", "res4", "res5"]
|
11 |
+
RETINANET:
|
12 |
+
IOU_THRESHOLDS: [0.4, 0.5]
|
13 |
+
IOU_LABELS: [0, -1, 1]
|
14 |
+
SMOOTH_L1_LOSS_BETA: 0.0
|
15 |
+
DATASETS:
|
16 |
+
TRAIN: ("coco_2017_train",)
|
17 |
+
TEST: ("coco_2017_val",)
|
18 |
+
SOLVER:
|
19 |
+
IMS_PER_BATCH: 16
|
20 |
+
BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate
|
21 |
+
STEPS: (60000, 80000)
|
22 |
+
MAX_ITER: 90000
|
23 |
+
INPUT:
|
24 |
+
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
25 |
+
VERSION: 2
|
configs/Cityscapes/mask_rcnn_R_50_FPN.yaml
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
# WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
4 |
+
# For better, more stable performance initialize from COCO
|
5 |
+
WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
|
6 |
+
MASK_ON: True
|
7 |
+
ROI_HEADS:
|
8 |
+
NUM_CLASSES: 8
|
9 |
+
# This is similar to the setting used in Mask R-CNN paper, Appendix A
|
10 |
+
# But there are some differences, e.g., we did not initialize the output
|
11 |
+
# layer using the corresponding classes from COCO
|
12 |
+
INPUT:
|
13 |
+
MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
|
14 |
+
MIN_SIZE_TRAIN_SAMPLING: "choice"
|
15 |
+
MIN_SIZE_TEST: 1024
|
16 |
+
MAX_SIZE_TRAIN: 2048
|
17 |
+
MAX_SIZE_TEST: 2048
|
18 |
+
DATASETS:
|
19 |
+
TRAIN: ("cityscapes_fine_instance_seg_train",)
|
20 |
+
TEST: ("cityscapes_fine_instance_seg_val",)
|
21 |
+
SOLVER:
|
22 |
+
BASE_LR: 0.01
|
23 |
+
STEPS: (18000,)
|
24 |
+
MAX_ITER: 24000
|
25 |
+
IMS_PER_BATCH: 8
|
26 |
+
TEST:
|
27 |
+
EVAL_PERIOD: 8000
|
configs/Cityscapes/mask_rcnn_R_50_FPN_4gpu.yaml
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
# WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
4 |
+
# For better, more stable performance initialize from COCO
|
5 |
+
WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
|
6 |
+
MASK_ON: True
|
7 |
+
ROI_HEADS:
|
8 |
+
NUM_CLASSES: 8
|
9 |
+
# This is similar to the setting used in Mask R-CNN paper, Appendix A
|
10 |
+
# But there are some differences, e.g., we did not initialize the output
|
11 |
+
# layer using the corresponding classes from COCO
|
12 |
+
INPUT:
|
13 |
+
MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
|
14 |
+
MIN_SIZE_TRAIN_SAMPLING: "choice"
|
15 |
+
MIN_SIZE_TEST: 1024
|
16 |
+
MAX_SIZE_TRAIN: 2048
|
17 |
+
MAX_SIZE_TEST: 2048
|
18 |
+
DATASETS:
|
19 |
+
TRAIN: ("cityscapes_fine_instance_seg_train",)
|
20 |
+
TEST: ("cityscapes_fine_instance_seg_val",)
|
21 |
+
SOLVER:
|
22 |
+
BASE_LR: 0.005
|
23 |
+
STEPS: (36000,)
|
24 |
+
MAX_ITER: 48000
|
25 |
+
IMS_PER_BATCH: 4
|
26 |
+
TEST:
|
27 |
+
EVAL_PERIOD: 48000
|
configs/Detectron1-Comparisons/README.md
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
Detectron2 model zoo's experimental settings and a few implementation details are different from Detectron.
|
3 |
+
|
4 |
+
The differences in implementation details are shared in
|
5 |
+
[Compatibility with Other Libraries](../../docs/notes/compatibility.md).
|
6 |
+
|
7 |
+
The differences in model zoo's experimental settings include:
|
8 |
+
* Use scale augmentation during training. This improves AP with lower training cost.
|
9 |
+
* Use L1 loss instead of smooth L1 loss for simplicity. This sometimes improves box AP but may
|
10 |
+
affect other AP.
|
11 |
+
* Use `POOLER_SAMPLING_RATIO=0` instead of 2. This does not significantly affect AP.
|
12 |
+
* Use `ROIAlignV2`. This does not significantly affect AP.
|
13 |
+
|
14 |
+
In this directory, we provide a few configs that __do not__ have the above changes.
|
15 |
+
They mimic Detectron's behavior as close as possible,
|
16 |
+
and provide a fair comparison of accuracy and speed against Detectron.
|
17 |
+
|
18 |
+
<!--
|
19 |
+
./gen_html_table.py --config 'Detectron1-Comparisons/*.yaml' --name "Faster R-CNN" "Keypoint R-CNN" "Mask R-CNN" --fields lr_sched train_speed inference_speed mem box_AP mask_AP keypoint_AP --base-dir ../../../configs/Detectron1-Comparisons
|
20 |
+
-->
|
21 |
+
|
22 |
+
|
23 |
+
<table><tbody>
|
24 |
+
<!-- START TABLE -->
|
25 |
+
<!-- TABLE HEADER -->
|
26 |
+
<th valign="bottom">Name</th>
|
27 |
+
<th valign="bottom">lr<br/>sched</th>
|
28 |
+
<th valign="bottom">train<br/>time<br/>(s/iter)</th>
|
29 |
+
<th valign="bottom">inference<br/>time<br/>(s/im)</th>
|
30 |
+
<th valign="bottom">train<br/>mem<br/>(GB)</th>
|
31 |
+
<th valign="bottom">box<br/>AP</th>
|
32 |
+
<th valign="bottom">mask<br/>AP</th>
|
33 |
+
<th valign="bottom">kp.<br/>AP</th>
|
34 |
+
<th valign="bottom">model id</th>
|
35 |
+
<th valign="bottom">download</th>
|
36 |
+
<!-- TABLE BODY -->
|
37 |
+
<!-- ROW: faster_rcnn_R_50_FPN_noaug_1x -->
|
38 |
+
<tr><td align="left"><a href="faster_rcnn_R_50_FPN_noaug_1x.yaml">Faster R-CNN</a></td>
|
39 |
+
<td align="center">1x</td>
|
40 |
+
<td align="center">0.219</td>
|
41 |
+
<td align="center">0.038</td>
|
42 |
+
<td align="center">3.1</td>
|
43 |
+
<td align="center">36.9</td>
|
44 |
+
<td align="center"></td>
|
45 |
+
<td align="center"></td>
|
46 |
+
<td align="center">137781054</td>
|
47 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/model_final_7ab50c.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x/137781054/metrics.json">metrics</a></td>
|
48 |
+
</tr>
|
49 |
+
<!-- ROW: keypoint_rcnn_R_50_FPN_1x -->
|
50 |
+
<tr><td align="left"><a href="keypoint_rcnn_R_50_FPN_1x.yaml">Keypoint R-CNN</a></td>
|
51 |
+
<td align="center">1x</td>
|
52 |
+
<td align="center">0.313</td>
|
53 |
+
<td align="center">0.071</td>
|
54 |
+
<td align="center">5.0</td>
|
55 |
+
<td align="center">53.1</td>
|
56 |
+
<td align="center"></td>
|
57 |
+
<td align="center">64.2</td>
|
58 |
+
<td align="center">137781195</td>
|
59 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/model_final_cce136.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x/137781195/metrics.json">metrics</a></td>
|
60 |
+
</tr>
|
61 |
+
<!-- ROW: mask_rcnn_R_50_FPN_noaug_1x -->
|
62 |
+
<tr><td align="left"><a href="mask_rcnn_R_50_FPN_noaug_1x.yaml">Mask R-CNN</a></td>
|
63 |
+
<td align="center">1x</td>
|
64 |
+
<td align="center">0.273</td>
|
65 |
+
<td align="center">0.043</td>
|
66 |
+
<td align="center">3.4</td>
|
67 |
+
<td align="center">37.8</td>
|
68 |
+
<td align="center">34.9</td>
|
69 |
+
<td align="center"></td>
|
70 |
+
<td align="center">137781281</td>
|
71 |
+
<td align="center"><a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/model_final_62ca52.pkl">model</a> | <a href="https://dl.fbaipublicfiles.com/detectron2/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x/137781281/metrics.json">metrics</a></td>
|
72 |
+
</tr>
|
73 |
+
</tbody></table>
|
74 |
+
|
75 |
+
## Comparisons:
|
76 |
+
|
77 |
+
* Faster R-CNN: Detectron's AP is 36.7, similar to ours.
|
78 |
+
* Keypoint R-CNN: Detectron's AP is box 53.6, keypoint 64.2. Fixing a Detectron's
|
79 |
+
[bug](https://github.com/facebookresearch/Detectron/issues/459) lead to a drop in box AP, and can be
|
80 |
+
compensated back by some parameter tuning.
|
81 |
+
* Mask R-CNN: Detectron's AP is box 37.7, mask 33.9. We're 1 AP better in mask AP, due to more correct implementation.
|
82 |
+
See [this article](https://ppwwyyxx.com/blog/2021/Where-are-Pixels/) for details.
|
83 |
+
|
84 |
+
For speed comparison, see [benchmarks](https://detectron2.readthedocs.io/notes/benchmarks.html).
|
configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
4 |
+
MASK_ON: False
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 50
|
7 |
+
# Detectron1 uses smooth L1 loss with some magic beta values.
|
8 |
+
# The defaults are changed to L1 loss in Detectron2.
|
9 |
+
RPN:
|
10 |
+
SMOOTH_L1_BETA: 0.1111
|
11 |
+
ROI_BOX_HEAD:
|
12 |
+
SMOOTH_L1_BETA: 1.0
|
13 |
+
POOLER_SAMPLING_RATIO: 2
|
14 |
+
POOLER_TYPE: "ROIAlign"
|
15 |
+
INPUT:
|
16 |
+
# no scale augmentation
|
17 |
+
MIN_SIZE_TRAIN: (800, )
|
configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
4 |
+
KEYPOINT_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 50
|
7 |
+
ROI_HEADS:
|
8 |
+
NUM_CLASSES: 1
|
9 |
+
ROI_KEYPOINT_HEAD:
|
10 |
+
POOLER_RESOLUTION: 14
|
11 |
+
POOLER_SAMPLING_RATIO: 2
|
12 |
+
POOLER_TYPE: "ROIAlign"
|
13 |
+
# Detectron1 uses smooth L1 loss with some magic beta values.
|
14 |
+
# The defaults are changed to L1 loss in Detectron2.
|
15 |
+
ROI_BOX_HEAD:
|
16 |
+
SMOOTH_L1_BETA: 1.0
|
17 |
+
POOLER_SAMPLING_RATIO: 2
|
18 |
+
POOLER_TYPE: "ROIAlign"
|
19 |
+
RPN:
|
20 |
+
SMOOTH_L1_BETA: 0.1111
|
21 |
+
# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
|
22 |
+
# 1000 proposals per-image is found to hurt box AP.
|
23 |
+
# Therefore we increase it to 1500 per-image.
|
24 |
+
POST_NMS_TOPK_TRAIN: 1500
|
25 |
+
DATASETS:
|
26 |
+
TRAIN: ("keypoints_coco_2017_train",)
|
27 |
+
TEST: ("keypoints_coco_2017_val",)
|
configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
4 |
+
MASK_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 50
|
7 |
+
# Detectron1 uses smooth L1 loss with some magic beta values.
|
8 |
+
# The defaults are changed to L1 loss in Detectron2.
|
9 |
+
RPN:
|
10 |
+
SMOOTH_L1_BETA: 0.1111
|
11 |
+
ROI_BOX_HEAD:
|
12 |
+
SMOOTH_L1_BETA: 1.0
|
13 |
+
POOLER_SAMPLING_RATIO: 2
|
14 |
+
POOLER_TYPE: "ROIAlign"
|
15 |
+
ROI_MASK_HEAD:
|
16 |
+
POOLER_SAMPLING_RATIO: 2
|
17 |
+
POOLER_TYPE: "ROIAlign"
|
18 |
+
INPUT:
|
19 |
+
# no scale augmentation
|
20 |
+
MIN_SIZE_TRAIN: (800, )
|
configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
|
4 |
+
MASK_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 101
|
7 |
+
ROI_HEADS:
|
8 |
+
NUM_CLASSES: 1230
|
9 |
+
SCORE_THRESH_TEST: 0.0001
|
10 |
+
INPUT:
|
11 |
+
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
12 |
+
DATASETS:
|
13 |
+
TRAIN: ("lvis_v0.5_train",)
|
14 |
+
TEST: ("lvis_v0.5_val",)
|
15 |
+
TEST:
|
16 |
+
DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
|
17 |
+
DATALOADER:
|
18 |
+
SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
|
19 |
+
REPEAT_THRESHOLD: 0.001
|
configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x_finetune.yaml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "./model_final_824ab5.pkl"
|
4 |
+
MASK_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 101
|
7 |
+
ROI_HEADS:
|
8 |
+
NUM_CLASSES: 1230
|
9 |
+
SCORE_THRESH_TEST: 0.0001
|
10 |
+
INPUT:
|
11 |
+
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
12 |
+
DATASETS:
|
13 |
+
TRAIN: ("lvis_v0.5_train",)
|
14 |
+
TEST: ("lvis_v0.5_val",)
|
15 |
+
TEST:
|
16 |
+
DETECTIONS_PER_IMAGE: 150 #300 # LVIS allows up to 300
|
17 |
+
DATALOADER:
|
18 |
+
SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
|
19 |
+
REPEAT_THRESHOLD: 0.001
|
configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
4 |
+
MASK_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 50
|
7 |
+
ROI_HEADS:
|
8 |
+
NUM_CLASSES: 1230
|
9 |
+
SCORE_THRESH_TEST: 0.0001
|
10 |
+
INPUT:
|
11 |
+
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
12 |
+
DATASETS:
|
13 |
+
TRAIN: ("lvis_v0.5_train",)
|
14 |
+
TEST: ("lvis_v0.5_val",)
|
15 |
+
TEST:
|
16 |
+
DETECTIONS_PER_IMAGE: 150 # LVIS allows up to 300
|
17 |
+
DATALOADER:
|
18 |
+
SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
|
19 |
+
REPEAT_THRESHOLD: 0.001
|
configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
|
4 |
+
PIXEL_STD: [57.375, 57.120, 58.395]
|
5 |
+
MASK_ON: True
|
6 |
+
RESNETS:
|
7 |
+
STRIDE_IN_1X1: False # this is a C2 model
|
8 |
+
NUM_GROUPS: 32
|
9 |
+
WIDTH_PER_GROUP: 8
|
10 |
+
DEPTH: 101
|
11 |
+
ROI_HEADS:
|
12 |
+
NUM_CLASSES: 1230
|
13 |
+
SCORE_THRESH_TEST: 0.0001
|
14 |
+
INPUT:
|
15 |
+
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
16 |
+
DATASETS:
|
17 |
+
TRAIN: ("lvis_v0.5_train",)
|
18 |
+
TEST: ("lvis_v0.5_val",)
|
19 |
+
TEST:
|
20 |
+
DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
|
21 |
+
DATALOADER:
|
22 |
+
SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
|
23 |
+
REPEAT_THRESHOLD: 0.001
|
configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
|
4 |
+
MASK_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 101
|
7 |
+
ROI_HEADS:
|
8 |
+
NUM_CLASSES: 1203
|
9 |
+
SCORE_THRESH_TEST: 0.0001
|
10 |
+
INPUT:
|
11 |
+
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
12 |
+
DATASETS:
|
13 |
+
TRAIN: ("lvis_v1_train",)
|
14 |
+
TEST: ("lvis_v1_val",)
|
15 |
+
TEST:
|
16 |
+
DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
|
17 |
+
SOLVER:
|
18 |
+
STEPS: (120000, 160000)
|
19 |
+
MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
|
20 |
+
DATALOADER:
|
21 |
+
SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
|
22 |
+
REPEAT_THRESHOLD: 0.001
|
configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
4 |
+
MASK_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 50
|
7 |
+
ROI_HEADS:
|
8 |
+
NUM_CLASSES: 1203
|
9 |
+
SCORE_THRESH_TEST: 0.0001
|
10 |
+
INPUT:
|
11 |
+
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
12 |
+
DATASETS:
|
13 |
+
TRAIN: ("lvis_v1_train",)
|
14 |
+
TEST: ("lvis_v1_val",)
|
15 |
+
TEST:
|
16 |
+
DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
|
17 |
+
SOLVER:
|
18 |
+
STEPS: (120000, 160000)
|
19 |
+
MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
|
20 |
+
DATALOADER:
|
21 |
+
SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
|
22 |
+
REPEAT_THRESHOLD: 0.001
|
configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
|
4 |
+
PIXEL_STD: [57.375, 57.120, 58.395]
|
5 |
+
MASK_ON: True
|
6 |
+
RESNETS:
|
7 |
+
STRIDE_IN_1X1: False # this is a C2 model
|
8 |
+
NUM_GROUPS: 32
|
9 |
+
WIDTH_PER_GROUP: 8
|
10 |
+
DEPTH: 101
|
11 |
+
ROI_HEADS:
|
12 |
+
NUM_CLASSES: 1203
|
13 |
+
SCORE_THRESH_TEST: 0.0001
|
14 |
+
INPUT:
|
15 |
+
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
16 |
+
DATASETS:
|
17 |
+
TRAIN: ("lvis_v1_train",)
|
18 |
+
TEST: ("lvis_v1_val",)
|
19 |
+
SOLVER:
|
20 |
+
STEPS: (120000, 160000)
|
21 |
+
MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs
|
22 |
+
TEST:
|
23 |
+
DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300
|
24 |
+
DATALOADER:
|
25 |
+
SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
|
26 |
+
REPEAT_THRESHOLD: 0.001
|
configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
4 |
+
MASK_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 50
|
7 |
+
ROI_HEADS:
|
8 |
+
NAME: CascadeROIHeads
|
9 |
+
ROI_BOX_HEAD:
|
10 |
+
CLS_AGNOSTIC_BBOX_REG: True
|
11 |
+
RPN:
|
12 |
+
POST_NMS_TOPK_TRAIN: 2000
|
configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
4 |
+
MASK_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 50
|
7 |
+
ROI_HEADS:
|
8 |
+
NAME: CascadeROIHeads
|
9 |
+
ROI_BOX_HEAD:
|
10 |
+
CLS_AGNOSTIC_BBOX_REG: True
|
11 |
+
RPN:
|
12 |
+
POST_NMS_TOPK_TRAIN: 2000
|
13 |
+
SOLVER:
|
14 |
+
STEPS: (210000, 250000)
|
15 |
+
MAX_ITER: 270000
|
configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
MASK_ON: True
|
4 |
+
WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
|
5 |
+
RESNETS:
|
6 |
+
STRIDE_IN_1X1: False # this is a C2 model
|
7 |
+
NUM_GROUPS: 32
|
8 |
+
WIDTH_PER_GROUP: 8
|
9 |
+
DEPTH: 152
|
10 |
+
DEFORM_ON_PER_STAGE: [False, True, True, True]
|
11 |
+
ROI_HEADS:
|
12 |
+
NAME: "CascadeROIHeads"
|
13 |
+
ROI_BOX_HEAD:
|
14 |
+
NAME: "FastRCNNConvFCHead"
|
15 |
+
NUM_CONV: 4
|
16 |
+
NUM_FC: 1
|
17 |
+
NORM: "GN"
|
18 |
+
CLS_AGNOSTIC_BBOX_REG: True
|
19 |
+
ROI_MASK_HEAD:
|
20 |
+
NUM_CONV: 8
|
21 |
+
NORM: "GN"
|
22 |
+
RPN:
|
23 |
+
POST_NMS_TOPK_TRAIN: 2000
|
24 |
+
SOLVER:
|
25 |
+
IMS_PER_BATCH: 128
|
26 |
+
STEPS: (35000, 45000)
|
27 |
+
MAX_ITER: 50000
|
28 |
+
BASE_LR: 0.16
|
29 |
+
INPUT:
|
30 |
+
MIN_SIZE_TRAIN: (640, 864)
|
31 |
+
MIN_SIZE_TRAIN_SAMPLING: "range"
|
32 |
+
MAX_SIZE_TRAIN: 1440
|
33 |
+
CROP:
|
34 |
+
ENABLED: True
|
35 |
+
TEST:
|
36 |
+
EVAL_PERIOD: 2500
|
configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
4 |
+
MASK_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 50
|
7 |
+
ROI_BOX_HEAD:
|
8 |
+
CLS_AGNOSTIC_BBOX_REG: True
|
9 |
+
ROI_MASK_HEAD:
|
10 |
+
CLS_AGNOSTIC_MASK: True
|
configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
4 |
+
MASK_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 50
|
7 |
+
DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
|
8 |
+
DEFORM_MODULATED: False
|
configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
4 |
+
MASK_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 50
|
7 |
+
DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
|
8 |
+
DEFORM_MODULATED: False
|
9 |
+
SOLVER:
|
10 |
+
STEPS: (210000, 250000)
|
11 |
+
MAX_ITER: 270000
|
configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5_4gpu.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN-4gpu.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
4 |
+
MASK_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 50
|
7 |
+
DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
|
8 |
+
DEFORM_MODULATED: False
|
9 |
+
SOLVER:
|
10 |
+
STEPS: (420000, 500000) # (210000, 250000)
|
11 |
+
MAX_ITER: 540000 # 270000
|
configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
|
4 |
+
MASK_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 50
|
7 |
+
NORM: "GN"
|
8 |
+
STRIDE_IN_1X1: False
|
9 |
+
FPN:
|
10 |
+
NORM: "GN"
|
11 |
+
ROI_BOX_HEAD:
|
12 |
+
NAME: "FastRCNNConvFCHead"
|
13 |
+
NUM_CONV: 4
|
14 |
+
NUM_FC: 1
|
15 |
+
NORM: "GN"
|
16 |
+
ROI_MASK_HEAD:
|
17 |
+
NORM: "GN"
|
18 |
+
SOLVER:
|
19 |
+
# 3x schedule
|
20 |
+
STEPS: (210000, 250000)
|
21 |
+
MAX_ITER: 270000
|
configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
4 |
+
MASK_ON: True
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 50
|
7 |
+
NORM: "SyncBN"
|
8 |
+
STRIDE_IN_1X1: True
|
9 |
+
FPN:
|
10 |
+
NORM: "SyncBN"
|
11 |
+
ROI_BOX_HEAD:
|
12 |
+
NAME: "FastRCNNConvFCHead"
|
13 |
+
NUM_CONV: 4
|
14 |
+
NUM_FC: 1
|
15 |
+
NORM: "SyncBN"
|
16 |
+
ROI_MASK_HEAD:
|
17 |
+
NORM: "SyncBN"
|
18 |
+
SOLVER:
|
19 |
+
# 3x schedule
|
20 |
+
STEPS: (210000, 250000)
|
21 |
+
MAX_ITER: 270000
|
22 |
+
TEST:
|
23 |
+
PRECISE_BN:
|
24 |
+
ENABLED: True
|
configs/Misc/mmdet_mask_rcnn_R_50_FPN_1x.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# An example config to train a mmdetection model using detectron2.
|
2 |
+
|
3 |
+
from ..common.data.coco import dataloader
|
4 |
+
from ..common.coco_schedule import lr_multiplier_1x as lr_multiplier
|
5 |
+
from ..common.optim import SGD as optimizer
|
6 |
+
from ..common.train import train
|
7 |
+
|
8 |
+
from detectron2.modeling.mmdet_wrapper import MMDetDetector
|
9 |
+
from detectron2.config import LazyCall as L
|
10 |
+
|
11 |
+
model = L(MMDetDetector)(
|
12 |
+
detector=dict(
|
13 |
+
type="MaskRCNN",
|
14 |
+
pretrained="torchvision://resnet50",
|
15 |
+
backbone=dict(
|
16 |
+
type="ResNet",
|
17 |
+
depth=50,
|
18 |
+
num_stages=4,
|
19 |
+
out_indices=(0, 1, 2, 3),
|
20 |
+
frozen_stages=1,
|
21 |
+
norm_cfg=dict(type="BN", requires_grad=True),
|
22 |
+
norm_eval=True,
|
23 |
+
style="pytorch",
|
24 |
+
),
|
25 |
+
neck=dict(type="FPN", in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5),
|
26 |
+
rpn_head=dict(
|
27 |
+
type="RPNHead",
|
28 |
+
in_channels=256,
|
29 |
+
feat_channels=256,
|
30 |
+
anchor_generator=dict(
|
31 |
+
type="AnchorGenerator",
|
32 |
+
scales=[8],
|
33 |
+
ratios=[0.5, 1.0, 2.0],
|
34 |
+
strides=[4, 8, 16, 32, 64],
|
35 |
+
),
|
36 |
+
bbox_coder=dict(
|
37 |
+
type="DeltaXYWHBBoxCoder",
|
38 |
+
target_means=[0.0, 0.0, 0.0, 0.0],
|
39 |
+
target_stds=[1.0, 1.0, 1.0, 1.0],
|
40 |
+
),
|
41 |
+
loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
|
42 |
+
loss_bbox=dict(type="L1Loss", loss_weight=1.0),
|
43 |
+
),
|
44 |
+
roi_head=dict(
|
45 |
+
type="StandardRoIHead",
|
46 |
+
bbox_roi_extractor=dict(
|
47 |
+
type="SingleRoIExtractor",
|
48 |
+
roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0),
|
49 |
+
out_channels=256,
|
50 |
+
featmap_strides=[4, 8, 16, 32],
|
51 |
+
),
|
52 |
+
bbox_head=dict(
|
53 |
+
type="Shared2FCBBoxHead",
|
54 |
+
in_channels=256,
|
55 |
+
fc_out_channels=1024,
|
56 |
+
roi_feat_size=7,
|
57 |
+
num_classes=80,
|
58 |
+
bbox_coder=dict(
|
59 |
+
type="DeltaXYWHBBoxCoder",
|
60 |
+
target_means=[0.0, 0.0, 0.0, 0.0],
|
61 |
+
target_stds=[0.1, 0.1, 0.2, 0.2],
|
62 |
+
),
|
63 |
+
reg_class_agnostic=False,
|
64 |
+
loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
|
65 |
+
loss_bbox=dict(type="L1Loss", loss_weight=1.0),
|
66 |
+
),
|
67 |
+
mask_roi_extractor=dict(
|
68 |
+
type="SingleRoIExtractor",
|
69 |
+
roi_layer=dict(type="RoIAlign", output_size=14, sampling_ratio=0),
|
70 |
+
out_channels=256,
|
71 |
+
featmap_strides=[4, 8, 16, 32],
|
72 |
+
),
|
73 |
+
mask_head=dict(
|
74 |
+
type="FCNMaskHead",
|
75 |
+
num_convs=4,
|
76 |
+
in_channels=256,
|
77 |
+
conv_out_channels=256,
|
78 |
+
num_classes=80,
|
79 |
+
loss_mask=dict(type="CrossEntropyLoss", use_mask=True, loss_weight=1.0),
|
80 |
+
),
|
81 |
+
),
|
82 |
+
# model training and testing settings
|
83 |
+
train_cfg=dict(
|
84 |
+
rpn=dict(
|
85 |
+
assigner=dict(
|
86 |
+
type="MaxIoUAssigner",
|
87 |
+
pos_iou_thr=0.7,
|
88 |
+
neg_iou_thr=0.3,
|
89 |
+
min_pos_iou=0.3,
|
90 |
+
match_low_quality=True,
|
91 |
+
ignore_iof_thr=-1,
|
92 |
+
),
|
93 |
+
sampler=dict(
|
94 |
+
type="RandomSampler",
|
95 |
+
num=256,
|
96 |
+
pos_fraction=0.5,
|
97 |
+
neg_pos_ub=-1,
|
98 |
+
add_gt_as_proposals=False,
|
99 |
+
),
|
100 |
+
allowed_border=-1,
|
101 |
+
pos_weight=-1,
|
102 |
+
debug=False,
|
103 |
+
),
|
104 |
+
rpn_proposal=dict(
|
105 |
+
nms_pre=2000,
|
106 |
+
max_per_img=1000,
|
107 |
+
nms=dict(type="nms", iou_threshold=0.7),
|
108 |
+
min_bbox_size=0,
|
109 |
+
),
|
110 |
+
rcnn=dict(
|
111 |
+
assigner=dict(
|
112 |
+
type="MaxIoUAssigner",
|
113 |
+
pos_iou_thr=0.5,
|
114 |
+
neg_iou_thr=0.5,
|
115 |
+
min_pos_iou=0.5,
|
116 |
+
match_low_quality=True,
|
117 |
+
ignore_iof_thr=-1,
|
118 |
+
),
|
119 |
+
sampler=dict(
|
120 |
+
type="RandomSampler",
|
121 |
+
num=512,
|
122 |
+
pos_fraction=0.25,
|
123 |
+
neg_pos_ub=-1,
|
124 |
+
add_gt_as_proposals=True,
|
125 |
+
),
|
126 |
+
mask_size=28,
|
127 |
+
pos_weight=-1,
|
128 |
+
debug=False,
|
129 |
+
),
|
130 |
+
),
|
131 |
+
test_cfg=dict(
|
132 |
+
rpn=dict(
|
133 |
+
nms_pre=1000,
|
134 |
+
max_per_img=1000,
|
135 |
+
nms=dict(type="nms", iou_threshold=0.7),
|
136 |
+
min_bbox_size=0,
|
137 |
+
),
|
138 |
+
rcnn=dict(
|
139 |
+
score_thr=0.05,
|
140 |
+
nms=dict(type="nms", iou_threshold=0.5),
|
141 |
+
max_per_img=100,
|
142 |
+
mask_thr_binary=0.5,
|
143 |
+
),
|
144 |
+
),
|
145 |
+
),
|
146 |
+
pixel_mean=[123.675, 116.280, 103.530],
|
147 |
+
pixel_std=[58.395, 57.120, 57.375],
|
148 |
+
)
|
149 |
+
|
150 |
+
dataloader.train.mapper.image_format = "RGB" # torchvision pretrained model
|
151 |
+
train.init_checkpoint = None # pretrained model is loaded inside backbone
|
configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# A large PanopticFPN for demo purposes.
|
2 |
+
# Use GN on backbone to support semantic seg.
|
3 |
+
# Use Cascade + Deform Conv to improve localization.
|
4 |
+
_BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
|
5 |
+
MODEL:
|
6 |
+
WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
|
7 |
+
RESNETS:
|
8 |
+
DEPTH: 101
|
9 |
+
NORM: "GN"
|
10 |
+
DEFORM_ON_PER_STAGE: [False, True, True, True]
|
11 |
+
STRIDE_IN_1X1: False
|
12 |
+
FPN:
|
13 |
+
NORM: "GN"
|
14 |
+
ROI_HEADS:
|
15 |
+
NAME: CascadeROIHeads
|
16 |
+
ROI_BOX_HEAD:
|
17 |
+
CLS_AGNOSTIC_BBOX_REG: True
|
18 |
+
ROI_MASK_HEAD:
|
19 |
+
NORM: "GN"
|
20 |
+
RPN:
|
21 |
+
POST_NMS_TOPK_TRAIN: 2000
|
22 |
+
SOLVER:
|
23 |
+
STEPS: (105000, 125000)
|
24 |
+
MAX_ITER: 135000
|
25 |
+
IMS_PER_BATCH: 32
|
26 |
+
BASE_LR: 0.04
|
configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
|
2 |
+
MODEL:
|
3 |
+
# Train from random initialization.
|
4 |
+
WEIGHTS: ""
|
5 |
+
# It makes sense to divide by STD when training from scratch
|
6 |
+
# But it seems to make no difference on the results and C2's models didn't do this.
|
7 |
+
# So we keep things consistent with C2.
|
8 |
+
# PIXEL_STD: [57.375, 57.12, 58.395]
|
9 |
+
MASK_ON: True
|
10 |
+
BACKBONE:
|
11 |
+
FREEZE_AT: 0
|
12 |
+
# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
|
13 |
+
# to learn what you need for training from scratch.
|
configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
|
2 |
+
MODEL:
|
3 |
+
PIXEL_STD: [57.375, 57.12, 58.395]
|
4 |
+
WEIGHTS: ""
|
5 |
+
MASK_ON: True
|
6 |
+
RESNETS:
|
7 |
+
STRIDE_IN_1X1: False
|
8 |
+
BACKBONE:
|
9 |
+
FREEZE_AT: 0
|
10 |
+
SOLVER:
|
11 |
+
# 9x schedule
|
12 |
+
IMS_PER_BATCH: 64 # 4x the standard
|
13 |
+
STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k
|
14 |
+
MAX_ITER: 202500 # 90k * 9 / 4
|
15 |
+
BASE_LR: 0.08
|
16 |
+
TEST:
|
17 |
+
EVAL_PERIOD: 2500
|
18 |
+
# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
|
19 |
+
# to learn what you need for training from scratch.
|
configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
|
2 |
+
MODEL:
|
3 |
+
PIXEL_STD: [57.375, 57.12, 58.395]
|
4 |
+
WEIGHTS: ""
|
5 |
+
MASK_ON: True
|
6 |
+
RESNETS:
|
7 |
+
STRIDE_IN_1X1: False
|
8 |
+
BACKBONE:
|
9 |
+
FREEZE_AT: 0
|
10 |
+
SOLVER:
|
11 |
+
# 9x schedule
|
12 |
+
IMS_PER_BATCH: 64 # 4x the standard
|
13 |
+
STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k
|
14 |
+
MAX_ITER: 202500 # 90k * 9 / 4
|
15 |
+
BASE_LR: 0.08
|
16 |
+
TEST:
|
17 |
+
EVAL_PERIOD: 2500
|
18 |
+
# NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
|
19 |
+
# to learn what you need for training from scratch.
|
configs/Misc/semantic_R_50_FPN_1x.yaml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_BASE_: "../Base-RCNN-FPN.yaml"
|
2 |
+
MODEL:
|
3 |
+
META_ARCHITECTURE: "SemanticSegmentor"
|
4 |
+
WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
|
5 |
+
RESNETS:
|
6 |
+
DEPTH: 50
|
7 |
+
DATASETS:
|
8 |
+
TRAIN: ("coco_2017_train_panoptic_stuffonly",)
|
9 |
+
TEST: ("coco_2017_val_panoptic_stuffonly",)
|
10 |
+
INPUT:
|
11 |
+
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
|
configs/Misc/torchvision_imagenet_R_50.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
An example config file to train a ImageNet classifier with detectron2.
|
3 |
+
Model and dataloader both come from torchvision.
|
4 |
+
This shows how to use detectron2 as a general engine for any new models and tasks.
|
5 |
+
|
6 |
+
To run, use the following command:
|
7 |
+
|
8 |
+
python tools/lazyconfig_train_net.py --config-file configs/Misc/torchvision_imagenet_R_50.py \
|
9 |
+
--num-gpus 8 dataloader.train.dataset.root=/path/to/imagenet/
|
10 |
+
|
11 |
+
"""
|
12 |
+
|
13 |
+
|
14 |
+
import torch
|
15 |
+
from torch import nn
|
16 |
+
from torch.nn import functional as F
|
17 |
+
from omegaconf import OmegaConf
|
18 |
+
import torchvision
|
19 |
+
from torchvision.transforms import transforms as T
|
20 |
+
from torchvision.models.resnet import ResNet, Bottleneck
|
21 |
+
from fvcore.common.param_scheduler import MultiStepParamScheduler
|
22 |
+
|
23 |
+
from detectron2.solver import WarmupParamScheduler
|
24 |
+
from detectron2.solver.build import get_default_optimizer_params
|
25 |
+
from detectron2.config import LazyCall as L
|
26 |
+
from detectron2.model_zoo import get_config
|
27 |
+
from detectron2.data.samplers import TrainingSampler, InferenceSampler
|
28 |
+
from detectron2.evaluation import DatasetEvaluator
|
29 |
+
from detectron2.utils import comm
|
30 |
+
|
31 |
+
|
32 |
+
"""
|
33 |
+
Note: Here we put reusable code (models, evaluation, data) together with configs just as a
|
34 |
+
proof-of-concept, to easily demonstrate what's needed to train a ImageNet classifier in detectron2.
|
35 |
+
Writing code in configs offers extreme flexibility but is often not a good engineering practice.
|
36 |
+
In practice, you might want to put code in your project and import them instead.
|
37 |
+
"""
|
38 |
+
|
39 |
+
|
40 |
+
def build_data_loader(dataset, batch_size, num_workers, training=True):
|
41 |
+
return torch.utils.data.DataLoader(
|
42 |
+
dataset,
|
43 |
+
sampler=(TrainingSampler if training else InferenceSampler)(len(dataset)),
|
44 |
+
batch_size=batch_size,
|
45 |
+
num_workers=num_workers,
|
46 |
+
pin_memory=True,
|
47 |
+
)
|
48 |
+
|
49 |
+
|
50 |
+
class ClassificationNet(nn.Module):
|
51 |
+
def __init__(self, model: nn.Module):
|
52 |
+
super().__init__()
|
53 |
+
self.model = model
|
54 |
+
|
55 |
+
@property
|
56 |
+
def device(self):
|
57 |
+
return list(self.model.parameters())[0].device
|
58 |
+
|
59 |
+
def forward(self, inputs):
|
60 |
+
image, label = inputs
|
61 |
+
pred = self.model(image.to(self.device))
|
62 |
+
if self.training:
|
63 |
+
label = label.to(self.device)
|
64 |
+
return F.cross_entropy(pred, label)
|
65 |
+
else:
|
66 |
+
return pred
|
67 |
+
|
68 |
+
|
69 |
+
class ClassificationAcc(DatasetEvaluator):
|
70 |
+
def reset(self):
|
71 |
+
self.corr = self.total = 0
|
72 |
+
|
73 |
+
def process(self, inputs, outputs):
|
74 |
+
image, label = inputs
|
75 |
+
self.corr += (outputs.argmax(dim=1).cpu() == label.cpu()).sum().item()
|
76 |
+
self.total += len(label)
|
77 |
+
|
78 |
+
def evaluate(self):
|
79 |
+
all_corr_total = comm.all_gather([self.corr, self.total])
|
80 |
+
corr = sum(x[0] for x in all_corr_total)
|
81 |
+
total = sum(x[1] for x in all_corr_total)
|
82 |
+
return {"accuracy": corr / total}
|
83 |
+
|
84 |
+
|
85 |
+
# --- End of code that could be in a project and be imported
|
86 |
+
|
87 |
+
|
88 |
+
dataloader = OmegaConf.create()
|
89 |
+
dataloader.train = L(build_data_loader)(
|
90 |
+
dataset=L(torchvision.datasets.ImageNet)(
|
91 |
+
root="/path/to/imagenet",
|
92 |
+
split="train",
|
93 |
+
transform=L(T.Compose)(
|
94 |
+
transforms=[
|
95 |
+
L(T.RandomResizedCrop)(size=224),
|
96 |
+
L(T.RandomHorizontalFlip)(),
|
97 |
+
T.ToTensor(),
|
98 |
+
L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
|
99 |
+
]
|
100 |
+
),
|
101 |
+
),
|
102 |
+
batch_size=256 // 8,
|
103 |
+
num_workers=4,
|
104 |
+
training=True,
|
105 |
+
)
|
106 |
+
|
107 |
+
dataloader.test = L(build_data_loader)(
|
108 |
+
dataset=L(torchvision.datasets.ImageNet)(
|
109 |
+
root="${...train.dataset.root}",
|
110 |
+
split="val",
|
111 |
+
transform=L(T.Compose)(
|
112 |
+
transforms=[
|
113 |
+
L(T.Resize)(size=256),
|
114 |
+
L(T.CenterCrop)(size=224),
|
115 |
+
T.ToTensor(),
|
116 |
+
L(T.Normalize)(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
|
117 |
+
]
|
118 |
+
),
|
119 |
+
),
|
120 |
+
batch_size=256 // 8,
|
121 |
+
num_workers=4,
|
122 |
+
training=False,
|
123 |
+
)
|
124 |
+
|
125 |
+
dataloader.evaluator = L(ClassificationAcc)()
|
126 |
+
|
127 |
+
model = L(ClassificationNet)(
|
128 |
+
model=(ResNet)(block=Bottleneck, layers=[3, 4, 6, 3], zero_init_residual=True)
|
129 |
+
)
|
130 |
+
|
131 |
+
|
132 |
+
optimizer = L(torch.optim.SGD)(
|
133 |
+
params=L(get_default_optimizer_params)(),
|
134 |
+
lr=0.1,
|
135 |
+
momentum=0.9,
|
136 |
+
weight_decay=1e-4,
|
137 |
+
)
|
138 |
+
|
139 |
+
lr_multiplier = L(WarmupParamScheduler)(
|
140 |
+
scheduler=L(MultiStepParamScheduler)(
|
141 |
+
values=[1.0, 0.1, 0.01, 0.001], milestones=[30, 60, 90, 100]
|
142 |
+
),
|
143 |
+
warmup_length=1 / 100,
|
144 |
+
warmup_factor=0.1,
|
145 |
+
)
|
146 |
+
|
147 |
+
|
148 |
+
train = get_config("common/train.py").train
|
149 |
+
train.init_checkpoint = None
|
150 |
+
train.max_iter = 100 * 1281167 // 256
|
configs/common/README.md
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
This directory provides definitions for a few common models, dataloaders, scheduler,
|
2 |
+
and optimizers that are often used in training.
|
3 |
+
The definition of these objects are provided in the form of lazy instantiation:
|
4 |
+
their arguments can be edited by users before constructing the objects.
|
5 |
+
|
6 |
+
They can be imported, or loaded by `model_zoo.get_config` API in users' own configs.
|
configs/common/coco_schedule.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fvcore.common.param_scheduler import MultiStepParamScheduler
|
2 |
+
|
3 |
+
from detectron2.config import LazyCall as L
|
4 |
+
from detectron2.solver import WarmupParamScheduler
|
5 |
+
|
6 |
+
|
7 |
+
def default_X_scheduler(num_X):
|
8 |
+
"""
|
9 |
+
Returns the config for a default multi-step LR scheduler such as "1x", "3x",
|
10 |
+
commonly referred to in papers, where every 1x has the total length of 1440k
|
11 |
+
training images (~12 COCO epochs). LR is decayed twice at the end of training
|
12 |
+
following the strategy defined in "Rethinking ImageNet Pretraining", Sec 4.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
num_X: a positive real number
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
DictConfig: configs that define the multiplier for LR during training
|
19 |
+
"""
|
20 |
+
# total number of iterations assuming 16 batch size, using 1440000/16=90000
|
21 |
+
total_steps_16bs = num_X * 90000
|
22 |
+
|
23 |
+
if num_X <= 2:
|
24 |
+
scheduler = L(MultiStepParamScheduler)(
|
25 |
+
values=[1.0, 0.1, 0.01],
|
26 |
+
# note that scheduler is scale-invariant. This is equivalent to
|
27 |
+
# milestones=[6, 8, 9]
|
28 |
+
milestones=[60000, 80000, 90000],
|
29 |
+
)
|
30 |
+
else:
|
31 |
+
scheduler = L(MultiStepParamScheduler)(
|
32 |
+
values=[1.0, 0.1, 0.01],
|
33 |
+
milestones=[total_steps_16bs - 60000, total_steps_16bs - 20000, total_steps_16bs],
|
34 |
+
)
|
35 |
+
return L(WarmupParamScheduler)(
|
36 |
+
scheduler=scheduler,
|
37 |
+
warmup_length=1000 / total_steps_16bs,
|
38 |
+
warmup_method="linear",
|
39 |
+
warmup_factor=0.001,
|
40 |
+
)
|
41 |
+
|
42 |
+
|
43 |
+
lr_multiplier_1x = default_X_scheduler(1)
|
44 |
+
lr_multiplier_2x = default_X_scheduler(2)
|
45 |
+
lr_multiplier_3x = default_X_scheduler(3)
|
46 |
+
lr_multiplier_6x = default_X_scheduler(6)
|
47 |
+
lr_multiplier_9x = default_X_scheduler(9)
|
configs/common/data/coco.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from omegaconf import OmegaConf
|
2 |
+
|
3 |
+
import detectron2.data.transforms as T
|
4 |
+
from detectron2.config import LazyCall as L
|
5 |
+
from detectron2.data import (
|
6 |
+
DatasetMapper,
|
7 |
+
build_detection_test_loader,
|
8 |
+
build_detection_train_loader,
|
9 |
+
get_detection_dataset_dicts,
|
10 |
+
)
|
11 |
+
from detectron2.evaluation import COCOEvaluator
|
12 |
+
|
13 |
+
dataloader = OmegaConf.create()
|
14 |
+
|
15 |
+
dataloader.train = L(build_detection_train_loader)(
|
16 |
+
dataset=L(get_detection_dataset_dicts)(names="coco_2017_train"),
|
17 |
+
mapper=L(DatasetMapper)(
|
18 |
+
is_train=True,
|
19 |
+
augmentations=[
|
20 |
+
L(T.ResizeShortestEdge)(
|
21 |
+
short_edge_length=(640, 672, 704, 736, 768, 800),
|
22 |
+
sample_style="choice",
|
23 |
+
max_size=1333,
|
24 |
+
),
|
25 |
+
L(T.RandomFlip)(horizontal=True),
|
26 |
+
],
|
27 |
+
image_format="BGR",
|
28 |
+
use_instance_mask=True,
|
29 |
+
),
|
30 |
+
total_batch_size=16,
|
31 |
+
num_workers=4,
|
32 |
+
)
|
33 |
+
|
34 |
+
dataloader.test = L(build_detection_test_loader)(
|
35 |
+
dataset=L(get_detection_dataset_dicts)(names="coco_2017_val", filter_empty=False),
|
36 |
+
mapper=L(DatasetMapper)(
|
37 |
+
is_train=False,
|
38 |
+
augmentations=[
|
39 |
+
L(T.ResizeShortestEdge)(short_edge_length=800, max_size=1333),
|
40 |
+
],
|
41 |
+
image_format="${...train.mapper.image_format}",
|
42 |
+
),
|
43 |
+
num_workers=4,
|
44 |
+
)
|
45 |
+
|
46 |
+
dataloader.evaluator = L(COCOEvaluator)(
|
47 |
+
dataset_name="${..test.dataset.names}",
|
48 |
+
)
|
configs/common/data/coco_keypoint.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from detectron2.data.detection_utils import create_keypoint_hflip_indices
|
2 |
+
|
3 |
+
from .coco import dataloader
|
4 |
+
|
5 |
+
dataloader.train.dataset.min_keypoints = 1
|
6 |
+
dataloader.train.dataset.names = "keypoints_coco_2017_train"
|
7 |
+
dataloader.test.dataset.names = "keypoints_coco_2017_val"
|
8 |
+
|
9 |
+
dataloader.train.mapper.update(
|
10 |
+
use_instance_mask=False,
|
11 |
+
use_keypoint=True,
|
12 |
+
keypoint_hflip_indices=create_keypoint_hflip_indices(dataloader.train.dataset.names),
|
13 |
+
)
|
configs/common/data/coco_panoptic_separated.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from detectron2.config import LazyCall as L
|
2 |
+
from detectron2.evaluation import (
|
3 |
+
COCOEvaluator,
|
4 |
+
COCOPanopticEvaluator,
|
5 |
+
DatasetEvaluators,
|
6 |
+
SemSegEvaluator,
|
7 |
+
)
|
8 |
+
|
9 |
+
from .coco import dataloader
|
10 |
+
|
11 |
+
dataloader.train.dataset.names = "coco_2017_train_panoptic_separated"
|
12 |
+
dataloader.train.dataset.filter_empty = False
|
13 |
+
dataloader.test.dataset.names = "coco_2017_val_panoptic_separated"
|
14 |
+
|
15 |
+
|
16 |
+
dataloader.evaluator = [
|
17 |
+
L(COCOEvaluator)(
|
18 |
+
dataset_name="${...test.dataset.names}",
|
19 |
+
),
|
20 |
+
L(SemSegEvaluator)(
|
21 |
+
dataset_name="${...test.dataset.names}",
|
22 |
+
),
|
23 |
+
L(COCOPanopticEvaluator)(
|
24 |
+
dataset_name="${...test.dataset.names}",
|
25 |
+
),
|
26 |
+
]
|
configs/common/models/cascade_rcnn.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from detectron2.config import LazyCall as L
|
2 |
+
from detectron2.layers import ShapeSpec
|
3 |
+
from detectron2.modeling.box_regression import Box2BoxTransform
|
4 |
+
from detectron2.modeling.matcher import Matcher
|
5 |
+
from detectron2.modeling.roi_heads import FastRCNNOutputLayers, FastRCNNConvFCHead, CascadeROIHeads
|
6 |
+
|
7 |
+
from .mask_rcnn_fpn import model
|
8 |
+
|
9 |
+
# arguments that don't exist for Cascade R-CNN
|
10 |
+
[model.roi_heads.pop(k) for k in ["box_head", "box_predictor", "proposal_matcher"]]
|
11 |
+
|
12 |
+
model.roi_heads.update(
|
13 |
+
_target_=CascadeROIHeads,
|
14 |
+
box_heads=[
|
15 |
+
L(FastRCNNConvFCHead)(
|
16 |
+
input_shape=ShapeSpec(channels=256, height=7, width=7),
|
17 |
+
conv_dims=[],
|
18 |
+
fc_dims=[1024, 1024],
|
19 |
+
)
|
20 |
+
for k in range(3)
|
21 |
+
],
|
22 |
+
box_predictors=[
|
23 |
+
L(FastRCNNOutputLayers)(
|
24 |
+
input_shape=ShapeSpec(channels=1024),
|
25 |
+
test_score_thresh=0.05,
|
26 |
+
box2box_transform=L(Box2BoxTransform)(weights=(w1, w1, w2, w2)),
|
27 |
+
cls_agnostic_bbox_reg=True,
|
28 |
+
num_classes="${...num_classes}",
|
29 |
+
)
|
30 |
+
for (w1, w2) in [(10, 5), (20, 10), (30, 15)]
|
31 |
+
],
|
32 |
+
proposal_matchers=[
|
33 |
+
L(Matcher)(thresholds=[th], labels=[0, 1], allow_low_quality_matches=False)
|
34 |
+
for th in [0.5, 0.6, 0.7]
|
35 |
+
],
|
36 |
+
)
|
configs/common/models/keypoint_rcnn_fpn.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from detectron2.config import LazyCall as L
|
2 |
+
from detectron2.layers import ShapeSpec
|
3 |
+
from detectron2.modeling.poolers import ROIPooler
|
4 |
+
from detectron2.modeling.roi_heads import KRCNNConvDeconvUpsampleHead
|
5 |
+
|
6 |
+
from .mask_rcnn_fpn import model
|
7 |
+
|
8 |
+
[model.roi_heads.pop(x) for x in ["mask_in_features", "mask_pooler", "mask_head"]]
|
9 |
+
|
10 |
+
model.roi_heads.update(
|
11 |
+
num_classes=1,
|
12 |
+
keypoint_in_features=["p2", "p3", "p4", "p5"],
|
13 |
+
keypoint_pooler=L(ROIPooler)(
|
14 |
+
output_size=14,
|
15 |
+
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
|
16 |
+
sampling_ratio=0,
|
17 |
+
pooler_type="ROIAlignV2",
|
18 |
+
),
|
19 |
+
keypoint_head=L(KRCNNConvDeconvUpsampleHead)(
|
20 |
+
input_shape=ShapeSpec(channels=256, width=14, height=14),
|
21 |
+
num_keypoints=17,
|
22 |
+
conv_dims=[512] * 8,
|
23 |
+
loss_normalizer="visible",
|
24 |
+
),
|
25 |
+
)
|
26 |
+
|
27 |
+
# Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
|
28 |
+
# 1000 proposals per-image is found to hurt box AP.
|
29 |
+
# Therefore we increase it to 1500 per-image.
|
30 |
+
model.proposal_generator.post_nms_topk = (1500, 1000)
|
31 |
+
|
32 |
+
# Keypoint AP degrades (though box AP improves) when using plain L1 loss
|
33 |
+
model.roi_heads.box_predictor.smooth_l1_beta = 0.5
|
configs/common/models/mask_rcnn_c4.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from detectron2.config import LazyCall as L
|
2 |
+
from detectron2.layers import ShapeSpec
|
3 |
+
from detectron2.modeling.meta_arch import GeneralizedRCNN
|
4 |
+
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
|
5 |
+
from detectron2.modeling.backbone import BasicStem, BottleneckBlock, ResNet
|
6 |
+
from detectron2.modeling.box_regression import Box2BoxTransform
|
7 |
+
from detectron2.modeling.matcher import Matcher
|
8 |
+
from detectron2.modeling.poolers import ROIPooler
|
9 |
+
from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
|
10 |
+
from detectron2.modeling.roi_heads import (
|
11 |
+
FastRCNNOutputLayers,
|
12 |
+
MaskRCNNConvUpsampleHead,
|
13 |
+
Res5ROIHeads,
|
14 |
+
)
|
15 |
+
|
16 |
+
model = L(GeneralizedRCNN)(
|
17 |
+
backbone=L(ResNet)(
|
18 |
+
stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
|
19 |
+
stages=L(ResNet.make_default_stages)(
|
20 |
+
depth=50,
|
21 |
+
stride_in_1x1=True,
|
22 |
+
norm="FrozenBN",
|
23 |
+
),
|
24 |
+
out_features=["res4"],
|
25 |
+
),
|
26 |
+
proposal_generator=L(RPN)(
|
27 |
+
in_features=["res4"],
|
28 |
+
head=L(StandardRPNHead)(in_channels=1024, num_anchors=15),
|
29 |
+
anchor_generator=L(DefaultAnchorGenerator)(
|
30 |
+
sizes=[[32, 64, 128, 256, 512]],
|
31 |
+
aspect_ratios=[0.5, 1.0, 2.0],
|
32 |
+
strides=[16],
|
33 |
+
offset=0.0,
|
34 |
+
),
|
35 |
+
anchor_matcher=L(Matcher)(
|
36 |
+
thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
|
37 |
+
),
|
38 |
+
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
|
39 |
+
batch_size_per_image=256,
|
40 |
+
positive_fraction=0.5,
|
41 |
+
pre_nms_topk=(12000, 6000),
|
42 |
+
post_nms_topk=(2000, 1000),
|
43 |
+
nms_thresh=0.7,
|
44 |
+
),
|
45 |
+
roi_heads=L(Res5ROIHeads)(
|
46 |
+
num_classes=80,
|
47 |
+
batch_size_per_image=512,
|
48 |
+
positive_fraction=0.25,
|
49 |
+
proposal_matcher=L(Matcher)(
|
50 |
+
thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
|
51 |
+
),
|
52 |
+
in_features=["res4"],
|
53 |
+
pooler=L(ROIPooler)(
|
54 |
+
output_size=14,
|
55 |
+
scales=(1.0 / 16,),
|
56 |
+
sampling_ratio=0,
|
57 |
+
pooler_type="ROIAlignV2",
|
58 |
+
),
|
59 |
+
res5=L(ResNet.make_stage)(
|
60 |
+
block_class=BottleneckBlock,
|
61 |
+
num_blocks=3,
|
62 |
+
stride_per_block=[2, 1, 1],
|
63 |
+
in_channels=1024,
|
64 |
+
bottleneck_channels=512,
|
65 |
+
out_channels=2048,
|
66 |
+
norm="FrozenBN",
|
67 |
+
stride_in_1x1=True,
|
68 |
+
),
|
69 |
+
box_predictor=L(FastRCNNOutputLayers)(
|
70 |
+
input_shape=L(ShapeSpec)(channels="${...res5.out_channels}", height=1, width=1),
|
71 |
+
test_score_thresh=0.05,
|
72 |
+
box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
|
73 |
+
num_classes="${..num_classes}",
|
74 |
+
),
|
75 |
+
mask_head=L(MaskRCNNConvUpsampleHead)(
|
76 |
+
input_shape=L(ShapeSpec)(
|
77 |
+
channels="${...res5.out_channels}",
|
78 |
+
width="${...pooler.output_size}",
|
79 |
+
height="${...pooler.output_size}",
|
80 |
+
),
|
81 |
+
num_classes="${..num_classes}",
|
82 |
+
conv_dims=[256],
|
83 |
+
),
|
84 |
+
),
|
85 |
+
pixel_mean=[103.530, 116.280, 123.675],
|
86 |
+
pixel_std=[1.0, 1.0, 1.0],
|
87 |
+
input_format="BGR",
|
88 |
+
)
|
configs/common/models/mask_rcnn_fpn.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from detectron2.config import LazyCall as L
|
2 |
+
from detectron2.layers import ShapeSpec
|
3 |
+
from detectron2.modeling.meta_arch import GeneralizedRCNN
|
4 |
+
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
|
5 |
+
from detectron2.modeling.backbone.fpn import LastLevelMaxPool
|
6 |
+
from detectron2.modeling.backbone import BasicStem, FPN, ResNet
|
7 |
+
from detectron2.modeling.box_regression import Box2BoxTransform
|
8 |
+
from detectron2.modeling.matcher import Matcher
|
9 |
+
from detectron2.modeling.poolers import ROIPooler
|
10 |
+
from detectron2.modeling.proposal_generator import RPN, StandardRPNHead
|
11 |
+
from detectron2.modeling.roi_heads import (
|
12 |
+
StandardROIHeads,
|
13 |
+
FastRCNNOutputLayers,
|
14 |
+
MaskRCNNConvUpsampleHead,
|
15 |
+
FastRCNNConvFCHead,
|
16 |
+
)
|
17 |
+
|
18 |
+
model = L(GeneralizedRCNN)(
|
19 |
+
backbone=L(FPN)(
|
20 |
+
bottom_up=L(ResNet)(
|
21 |
+
stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
|
22 |
+
stages=L(ResNet.make_default_stages)(
|
23 |
+
depth=50,
|
24 |
+
stride_in_1x1=True,
|
25 |
+
norm="FrozenBN",
|
26 |
+
),
|
27 |
+
out_features=["res2", "res3", "res4", "res5"],
|
28 |
+
),
|
29 |
+
in_features="${.bottom_up.out_features}",
|
30 |
+
out_channels=256,
|
31 |
+
top_block=L(LastLevelMaxPool)(),
|
32 |
+
),
|
33 |
+
proposal_generator=L(RPN)(
|
34 |
+
in_features=["p2", "p3", "p4", "p5", "p6"],
|
35 |
+
head=L(StandardRPNHead)(in_channels=256, num_anchors=3),
|
36 |
+
anchor_generator=L(DefaultAnchorGenerator)(
|
37 |
+
sizes=[[32], [64], [128], [256], [512]],
|
38 |
+
aspect_ratios=[0.5, 1.0, 2.0],
|
39 |
+
strides=[4, 8, 16, 32, 64],
|
40 |
+
offset=0.0,
|
41 |
+
),
|
42 |
+
anchor_matcher=L(Matcher)(
|
43 |
+
thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True
|
44 |
+
),
|
45 |
+
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
|
46 |
+
batch_size_per_image=256,
|
47 |
+
positive_fraction=0.5,
|
48 |
+
pre_nms_topk=(2000, 1000),
|
49 |
+
post_nms_topk=(1000, 1000),
|
50 |
+
nms_thresh=0.7,
|
51 |
+
),
|
52 |
+
roi_heads=L(StandardROIHeads)(
|
53 |
+
num_classes=80,
|
54 |
+
batch_size_per_image=512,
|
55 |
+
positive_fraction=0.25,
|
56 |
+
proposal_matcher=L(Matcher)(
|
57 |
+
thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False
|
58 |
+
),
|
59 |
+
box_in_features=["p2", "p3", "p4", "p5"],
|
60 |
+
box_pooler=L(ROIPooler)(
|
61 |
+
output_size=7,
|
62 |
+
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
|
63 |
+
sampling_ratio=0,
|
64 |
+
pooler_type="ROIAlignV2",
|
65 |
+
),
|
66 |
+
box_head=L(FastRCNNConvFCHead)(
|
67 |
+
input_shape=ShapeSpec(channels=256, height=7, width=7),
|
68 |
+
conv_dims=[],
|
69 |
+
fc_dims=[1024, 1024],
|
70 |
+
),
|
71 |
+
box_predictor=L(FastRCNNOutputLayers)(
|
72 |
+
input_shape=ShapeSpec(channels=1024),
|
73 |
+
test_score_thresh=0.05,
|
74 |
+
box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)),
|
75 |
+
num_classes="${..num_classes}",
|
76 |
+
),
|
77 |
+
mask_in_features=["p2", "p3", "p4", "p5"],
|
78 |
+
mask_pooler=L(ROIPooler)(
|
79 |
+
output_size=14, # ori is 14
|
80 |
+
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32),
|
81 |
+
sampling_ratio=0,
|
82 |
+
pooler_type="ROIAlignV2",
|
83 |
+
),
|
84 |
+
mask_head=L(MaskRCNNConvUpsampleHead)(
|
85 |
+
input_shape=ShapeSpec(channels=256, width=14, height=14),
|
86 |
+
num_classes="${..num_classes}",
|
87 |
+
conv_dims=[256, 256, 256, 256, 256],
|
88 |
+
),
|
89 |
+
),
|
90 |
+
pixel_mean=[103.530, 116.280, 123.675],
|
91 |
+
pixel_std=[1.0, 1.0, 1.0],
|
92 |
+
input_format="BGR",
|
93 |
+
)
|
configs/common/models/panoptic_fpn.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from detectron2.config import LazyCall as L
|
2 |
+
from detectron2.layers import ShapeSpec
|
3 |
+
from detectron2.modeling import PanopticFPN
|
4 |
+
from detectron2.modeling.meta_arch.semantic_seg import SemSegFPNHead
|
5 |
+
|
6 |
+
from .mask_rcnn_fpn import model
|
7 |
+
|
8 |
+
model._target_ = PanopticFPN
|
9 |
+
model.sem_seg_head = L(SemSegFPNHead)(
|
10 |
+
input_shape={
|
11 |
+
f: L(ShapeSpec)(stride=s, channels="${....backbone.out_channels}")
|
12 |
+
for f, s in zip(["p2", "p3", "p4", "p5"], [4, 8, 16, 32])
|
13 |
+
},
|
14 |
+
ignore_value=255,
|
15 |
+
num_classes=54, # COCO stuff + 1
|
16 |
+
conv_dims=128,
|
17 |
+
common_stride=4,
|
18 |
+
loss_weight=0.5,
|
19 |
+
norm="GN",
|
20 |
+
)
|
configs/common/models/retinanet.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
+
from detectron2.config import LazyCall as L
|
4 |
+
from detectron2.layers import ShapeSpec
|
5 |
+
from detectron2.modeling.meta_arch import RetinaNet
|
6 |
+
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator
|
7 |
+
from detectron2.modeling.backbone.fpn import LastLevelP6P7
|
8 |
+
from detectron2.modeling.backbone import BasicStem, FPN, ResNet
|
9 |
+
from detectron2.modeling.box_regression import Box2BoxTransform
|
10 |
+
from detectron2.modeling.matcher import Matcher
|
11 |
+
from detectron2.modeling.meta_arch.retinanet import RetinaNetHead
|
12 |
+
|
13 |
+
model = L(RetinaNet)(
|
14 |
+
backbone=L(FPN)(
|
15 |
+
bottom_up=L(ResNet)(
|
16 |
+
stem=L(BasicStem)(in_channels=3, out_channels=64, norm="FrozenBN"),
|
17 |
+
stages=L(ResNet.make_default_stages)(
|
18 |
+
depth=50,
|
19 |
+
stride_in_1x1=True,
|
20 |
+
norm="FrozenBN",
|
21 |
+
),
|
22 |
+
out_features=["res3", "res4", "res5"],
|
23 |
+
),
|
24 |
+
in_features=["res3", "res4", "res5"],
|
25 |
+
out_channels=256,
|
26 |
+
top_block=L(LastLevelP6P7)(in_channels=2048, out_channels="${..out_channels}"),
|
27 |
+
),
|
28 |
+
head=L(RetinaNetHead)(
|
29 |
+
input_shape=[ShapeSpec(channels=256)],
|
30 |
+
num_classes="${..num_classes}",
|
31 |
+
conv_dims=[256, 256, 256, 256],
|
32 |
+
prior_prob=0.01,
|
33 |
+
num_anchors=9,
|
34 |
+
),
|
35 |
+
anchor_generator=L(DefaultAnchorGenerator)(
|
36 |
+
sizes=[[x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] for x in [32, 64, 128, 256, 512]],
|
37 |
+
aspect_ratios=[0.5, 1.0, 2.0],
|
38 |
+
strides=[8, 16, 32, 64, 128],
|
39 |
+
offset=0.0,
|
40 |
+
),
|
41 |
+
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]),
|
42 |
+
anchor_matcher=L(Matcher)(
|
43 |
+
thresholds=[0.4, 0.5], labels=[0, -1, 1], allow_low_quality_matches=True
|
44 |
+
),
|
45 |
+
num_classes=80,
|
46 |
+
head_in_features=["p3", "p4", "p5", "p6", "p7"],
|
47 |
+
focal_loss_alpha=0.25,
|
48 |
+
focal_loss_gamma=2.0,
|
49 |
+
pixel_mean=[103.530, 116.280, 123.675],
|
50 |
+
pixel_std=[1.0, 1.0, 1.0],
|
51 |
+
input_format="BGR",
|
52 |
+
)
|
configs/common/optim.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from detectron2.config import LazyCall as L
|
4 |
+
from detectron2.solver.build import get_default_optimizer_params
|
5 |
+
|
6 |
+
SGD = L(torch.optim.SGD)(
|
7 |
+
params=L(get_default_optimizer_params)(
|
8 |
+
# params.model is meant to be set to the model object, before instantiating
|
9 |
+
# the optimizer.
|
10 |
+
weight_decay_norm=0.0
|
11 |
+
),
|
12 |
+
lr=0.02,
|
13 |
+
momentum=0.9,
|
14 |
+
weight_decay=1e-4,
|
15 |
+
)
|
configs/common/train.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Common training-related configs that are designed for "tools/lazyconfig_train_net.py"
|
2 |
+
# You can use your own instead, together with your own train_net.py
|
3 |
+
train = dict(
|
4 |
+
output_dir="./output",
|
5 |
+
init_checkpoint="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
|
6 |
+
max_iter=90000,
|
7 |
+
amp=dict(enabled=False), # options for Automatic Mixed Precision
|
8 |
+
ddp=dict( # options for DistributedDataParallel
|
9 |
+
broadcast_buffers=False,
|
10 |
+
find_unused_parameters=False,
|
11 |
+
fp16_compression=False,
|
12 |
+
),
|
13 |
+
checkpointer=dict(period=5000, max_to_keep=100), # options for PeriodicCheckpointer
|
14 |
+
eval_period=5000,
|
15 |
+
log_period=20,
|
16 |
+
device="cuda"
|
17 |
+
# ...
|
18 |
+
)
|
configs/new_baselines/mask_rcnn_R_101_FPN_100ep_LSJ.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .mask_rcnn_R_50_FPN_100ep_LSJ import (
|
2 |
+
dataloader,
|
3 |
+
lr_multiplier,
|
4 |
+
model,
|
5 |
+
optimizer,
|
6 |
+
train,
|
7 |
+
)
|
8 |
+
|
9 |
+
model.backbone.bottom_up.stages.depth = 101
|
configs/new_baselines/mask_rcnn_R_101_FPN_200ep_LSJ.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .mask_rcnn_R_101_FPN_100ep_LSJ import (
|
2 |
+
dataloader,
|
3 |
+
lr_multiplier,
|
4 |
+
model,
|
5 |
+
optimizer,
|
6 |
+
train,
|
7 |
+
)
|
8 |
+
|
9 |
+
train.max_iter *= 2 # 100ep -> 200ep
|
10 |
+
|
11 |
+
lr_multiplier.scheduler.milestones = [
|
12 |
+
milestone * 2 for milestone in lr_multiplier.scheduler.milestones
|
13 |
+
]
|
14 |
+
lr_multiplier.scheduler.num_updates = train.max_iter
|