|
model = dict( |
|
type='DETR', |
|
backbone=dict( |
|
type='ResNet', |
|
depth=50, |
|
num_stages=4, |
|
out_indices=(3, ), |
|
frozen_stages=4, |
|
norm_cfg=dict(type='BN', requires_grad=False), |
|
norm_eval=True, |
|
style='pytorch', |
|
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), |
|
bbox_head=dict( |
|
type='DETRHead', |
|
num_classes=256, |
|
in_channels=2048, |
|
transformer=dict( |
|
type='Transformer', |
|
encoder=dict( |
|
type='DetrTransformerEncoder', |
|
num_layers=6, |
|
transformerlayers=dict( |
|
type='BaseTransformerLayer', |
|
attn_cfgs=[ |
|
dict( |
|
type='MultiheadAttention', |
|
embed_dims=256, |
|
num_heads=8, |
|
dropout=0.1) |
|
], |
|
feedforward_channels=2048, |
|
ffn_dropout=0.1, |
|
operation_order=('self_attn', 'norm', 'ffn', 'norm'))), |
|
decoder=dict( |
|
type='DetrTransformerDecoder', |
|
return_intermediate=True, |
|
num_layers=6, |
|
transformerlayers=dict( |
|
type='DetrTransformerDecoderLayer', |
|
attn_cfgs=dict( |
|
type='MultiheadAttention', |
|
embed_dims=256, |
|
num_heads=8, |
|
dropout=0.1), |
|
feedforward_channels=2048, |
|
ffn_dropout=0.1, |
|
operation_order=('self_attn', 'norm', 'cross_attn', 'norm', |
|
'ffn', 'norm')))), |
|
positional_encoding=dict( |
|
type='SinePositionalEncoding', num_feats=128, normalize=True), |
|
loss_cls=dict( |
|
type='CrossEntropyLoss', |
|
bg_cls_weight=0.1, |
|
use_sigmoid=False, |
|
loss_weight=1.0, |
|
class_weight=1.0), |
|
loss_bbox=dict(type='L1Loss', loss_weight=5.0), |
|
loss_iou=dict(type='GIoULoss', loss_weight=2.0)), |
|
train_cfg=dict( |
|
assigner=dict( |
|
type='HungarianAssigner', |
|
cls_cost=dict(type='ClassificationCost', weight=1.0), |
|
reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'), |
|
iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))), |
|
test_cfg=dict(max_per_img=100)) |
|
dataset_type = 'CocoDataset' |
|
data_root = 'data/coco/' |
|
img_norm_cfg = dict( |
|
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) |
|
train_pipeline = [ |
|
dict(type='LoadImageFromFile'), |
|
dict(type='LoadAnnotations', with_bbox=True), |
|
dict(type='RandomFlip', flip_ratio=0.5), |
|
dict( |
|
type='AutoAugment', |
|
policies=[[{ |
|
'type': |
|
'Resize', |
|
'img_scale': [(480, 1333), (512, 1333), (544, 1333), (576, 1333), |
|
(608, 1333), (640, 1333), (672, 1333), (704, 1333), |
|
(736, 1333), (768, 1333), (800, 1333)], |
|
'multiscale_mode': |
|
'value', |
|
'keep_ratio': |
|
True |
|
}], |
|
[{ |
|
'type': 'Resize', |
|
'img_scale': [(400, 1333), (500, 1333), (600, 1333)], |
|
'multiscale_mode': 'value', |
|
'keep_ratio': True |
|
}, { |
|
'type': 'RandomCrop', |
|
'crop_type': 'absolute_range', |
|
'crop_size': (384, 600), |
|
'allow_negative_crop': True |
|
}, { |
|
'type': |
|
'Resize', |
|
'img_scale': [(480, 1333), (512, 1333), (544, 1333), |
|
(576, 1333), (608, 1333), (640, 1333), |
|
(672, 1333), (704, 1333), (736, 1333), |
|
(768, 1333), (800, 1333)], |
|
'multiscale_mode': |
|
'value', |
|
'override': |
|
True, |
|
'keep_ratio': |
|
True |
|
}]]), |
|
dict( |
|
type='Normalize', |
|
mean=[123.675, 116.28, 103.53], |
|
std=[58.395, 57.12, 57.375], |
|
to_rgb=True), |
|
dict(type='Pad', size_divisor=1), |
|
dict(type='DefaultFormatBundle'), |
|
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) |
|
] |
|
test_pipeline = [ |
|
dict(type='LoadImageFromFile'), |
|
dict( |
|
type='MultiScaleFlipAug', |
|
img_scale=(1333, 800), |
|
flip=False, |
|
transforms=[ |
|
dict(type='Resize', keep_ratio=True), |
|
dict(type='RandomFlip'), |
|
dict( |
|
type='Normalize', |
|
mean=[123.675, 116.28, 103.53], |
|
std=[58.395, 57.12, 57.375], |
|
to_rgb=True), |
|
dict(type='Pad', size_divisor=32), |
|
dict(type='ImageToTensor', keys=['img']), |
|
dict(type='Collect', keys=['img']) |
|
]) |
|
] |
|
data = dict( |
|
samples_per_gpu=2, |
|
workers_per_gpu=2, |
|
train=dict( |
|
type='CocoDataset', |
|
ann_file='train2017_ratio3size0008@0.5_cluster-id-as-class.json', |
|
img_prefix='data/coco/train2017/', |
|
pipeline=[ |
|
dict(type='LoadImageFromFile'), |
|
dict(type='LoadAnnotations', with_bbox=True), |
|
dict(type='RandomFlip', flip_ratio=0.5), |
|
dict( |
|
type='AutoAugment', |
|
policies=[[{ |
|
'type': |
|
'Resize', |
|
'img_scale': [(480, 1333), (512, 1333), (544, 1333), |
|
(576, 1333), (608, 1333), (640, 1333), |
|
(672, 1333), (704, 1333), (736, 1333), |
|
(768, 1333), (800, 1333)], |
|
'multiscale_mode': |
|
'value', |
|
'keep_ratio': |
|
True |
|
}], |
|
[{ |
|
'type': 'Resize', |
|
'img_scale': [(400, 1333), (500, 1333), |
|
(600, 1333)], |
|
'multiscale_mode': 'value', |
|
'keep_ratio': True |
|
}, { |
|
'type': 'RandomCrop', |
|
'crop_type': 'absolute_range', |
|
'crop_size': (384, 600), |
|
'allow_negative_crop': True |
|
}, { |
|
'type': |
|
'Resize', |
|
'img_scale': [(480, 1333), (512, 1333), |
|
(544, 1333), (576, 1333), |
|
(608, 1333), (640, 1333), |
|
(672, 1333), (704, 1333), |
|
(736, 1333), (768, 1333), |
|
(800, 1333)], |
|
'multiscale_mode': |
|
'value', |
|
'override': |
|
True, |
|
'keep_ratio': |
|
True |
|
}]]), |
|
dict( |
|
type='Normalize', |
|
mean=[123.675, 116.28, 103.53], |
|
std=[58.395, 57.12, 57.375], |
|
to_rgb=True), |
|
dict(type='Pad', size_divisor=1), |
|
dict(type='DefaultFormatBundle'), |
|
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) |
|
], |
|
classes=[ |
|
'cluster_1', 'cluster_2', 'cluster_3', 'cluster_4', 'cluster_5', |
|
'cluster_6', 'cluster_7', 'cluster_8', 'cluster_9', 'cluster_10', |
|
'cluster_11', 'cluster_12', 'cluster_13', 'cluster_14', |
|
'cluster_15', 'cluster_16', 'cluster_17', 'cluster_18', |
|
'cluster_19', 'cluster_20', 'cluster_21', 'cluster_22', |
|
'cluster_23', 'cluster_24', 'cluster_25', 'cluster_26', |
|
'cluster_27', 'cluster_28', 'cluster_29', 'cluster_30', |
|
'cluster_31', 'cluster_32', 'cluster_33', 'cluster_34', |
|
'cluster_35', 'cluster_36', 'cluster_37', 'cluster_38', |
|
'cluster_39', 'cluster_40', 'cluster_41', 'cluster_42', |
|
'cluster_43', 'cluster_44', 'cluster_45', 'cluster_46', |
|
'cluster_47', 'cluster_48', 'cluster_49', 'cluster_50', |
|
'cluster_51', 'cluster_52', 'cluster_53', 'cluster_54', |
|
'cluster_55', 'cluster_56', 'cluster_57', 'cluster_58', |
|
'cluster_59', 'cluster_60', 'cluster_61', 'cluster_62', |
|
'cluster_63', 'cluster_64', 'cluster_65', 'cluster_66', |
|
'cluster_67', 'cluster_68', 'cluster_69', 'cluster_70', |
|
'cluster_71', 'cluster_72', 'cluster_73', 'cluster_74', |
|
'cluster_75', 'cluster_76', 'cluster_77', 'cluster_78', |
|
'cluster_79', 'cluster_80', 'cluster_81', 'cluster_82', |
|
'cluster_83', 'cluster_84', 'cluster_85', 'cluster_86', |
|
'cluster_87', 'cluster_88', 'cluster_89', 'cluster_90', |
|
'cluster_91', 'cluster_92', 'cluster_93', 'cluster_94', |
|
'cluster_95', 'cluster_96', 'cluster_97', 'cluster_98', |
|
'cluster_99', 'cluster_100', 'cluster_101', 'cluster_102', |
|
'cluster_103', 'cluster_104', 'cluster_105', 'cluster_106', |
|
'cluster_107', 'cluster_108', 'cluster_109', 'cluster_110', |
|
'cluster_111', 'cluster_112', 'cluster_113', 'cluster_114', |
|
'cluster_115', 'cluster_116', 'cluster_117', 'cluster_118', |
|
'cluster_119', 'cluster_120', 'cluster_121', 'cluster_122', |
|
'cluster_123', 'cluster_124', 'cluster_125', 'cluster_126', |
|
'cluster_127', 'cluster_128', 'cluster_129', 'cluster_130', |
|
'cluster_131', 'cluster_132', 'cluster_133', 'cluster_134', |
|
'cluster_135', 'cluster_136', 'cluster_137', 'cluster_138', |
|
'cluster_139', 'cluster_140', 'cluster_141', 'cluster_142', |
|
'cluster_143', 'cluster_144', 'cluster_145', 'cluster_146', |
|
'cluster_147', 'cluster_148', 'cluster_149', 'cluster_150', |
|
'cluster_151', 'cluster_152', 'cluster_153', 'cluster_154', |
|
'cluster_155', 'cluster_156', 'cluster_157', 'cluster_158', |
|
'cluster_159', 'cluster_160', 'cluster_161', 'cluster_162', |
|
'cluster_163', 'cluster_164', 'cluster_165', 'cluster_166', |
|
'cluster_167', 'cluster_168', 'cluster_169', 'cluster_170', |
|
'cluster_171', 'cluster_172', 'cluster_173', 'cluster_174', |
|
'cluster_175', 'cluster_176', 'cluster_177', 'cluster_178', |
|
'cluster_179', 'cluster_180', 'cluster_181', 'cluster_182', |
|
'cluster_183', 'cluster_184', 'cluster_185', 'cluster_186', |
|
'cluster_187', 'cluster_188', 'cluster_189', 'cluster_190', |
|
'cluster_191', 'cluster_192', 'cluster_193', 'cluster_194', |
|
'cluster_195', 'cluster_196', 'cluster_197', 'cluster_198', |
|
'cluster_199', 'cluster_200', 'cluster_201', 'cluster_202', |
|
'cluster_203', 'cluster_204', 'cluster_205', 'cluster_206', |
|
'cluster_207', 'cluster_208', 'cluster_209', 'cluster_210', |
|
'cluster_211', 'cluster_212', 'cluster_213', 'cluster_214', |
|
'cluster_215', 'cluster_216', 'cluster_217', 'cluster_218', |
|
'cluster_219', 'cluster_220', 'cluster_221', 'cluster_222', |
|
'cluster_223', 'cluster_224', 'cluster_225', 'cluster_226', |
|
'cluster_227', 'cluster_228', 'cluster_229', 'cluster_230', |
|
'cluster_231', 'cluster_232', 'cluster_233', 'cluster_234', |
|
'cluster_235', 'cluster_236', 'cluster_237', 'cluster_238', |
|
'cluster_239', 'cluster_240', 'cluster_241', 'cluster_242', |
|
'cluster_243', 'cluster_244', 'cluster_245', 'cluster_246', |
|
'cluster_247', 'cluster_248', 'cluster_249', 'cluster_250', |
|
'cluster_251', 'cluster_252', 'cluster_253', 'cluster_254', |
|
'cluster_255', 'cluster_256' |
|
]), |
|
val=dict( |
|
type='CocoDataset', |
|
ann_file='data/coco/annotations/instances_val2017.json', |
|
img_prefix='data/coco/val2017/', |
|
pipeline=[ |
|
dict(type='LoadImageFromFile'), |
|
dict( |
|
type='MultiScaleFlipAug', |
|
img_scale=(1333, 800), |
|
flip=False, |
|
transforms=[ |
|
dict(type='Resize', keep_ratio=True), |
|
dict(type='RandomFlip'), |
|
dict( |
|
type='Normalize', |
|
mean=[123.675, 116.28, 103.53], |
|
std=[58.395, 57.12, 57.375], |
|
to_rgb=True), |
|
dict(type='Pad', size_divisor=32), |
|
dict(type='ImageToTensor', keys=['img']), |
|
dict(type='Collect', keys=['img']) |
|
]) |
|
]), |
|
test=dict( |
|
type='CocoDataset', |
|
ann_file='data/coco/annotations/instances_val2017.json', |
|
img_prefix='data/coco/val2017/', |
|
pipeline=[ |
|
dict(type='LoadImageFromFile'), |
|
dict( |
|
type='MultiScaleFlipAug', |
|
img_scale=(1333, 800), |
|
flip=False, |
|
transforms=[ |
|
dict(type='Resize', keep_ratio=True), |
|
dict(type='RandomFlip'), |
|
dict( |
|
type='Normalize', |
|
mean=[123.675, 116.28, 103.53], |
|
std=[58.395, 57.12, 57.375], |
|
to_rgb=True), |
|
dict(type='Pad', size_divisor=32), |
|
dict(type='ImageToTensor', keys=['img']), |
|
dict(type='Collect', keys=['img']) |
|
]) |
|
])) |
|
evaluation = dict( |
|
interval=65535, metric='bbox', save_best='auto', gpu_collect=True) |
|
checkpoint_config = dict(interval=1) |
|
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) |
|
custom_hooks = [ |
|
dict(type='NumClassCheckHook'), |
|
dict( |
|
type='MMDetWandbHook', |
|
init_kwargs=dict(project='I2B', group='finetune'), |
|
interval=50, |
|
num_eval_images=0, |
|
log_checkpoint=False) |
|
] |
|
dist_params = dict(backend='nccl') |
|
log_level = 'INFO' |
|
load_from = None |
|
resume_from = None |
|
workflow = [('train', 1)] |
|
opencv_num_threads = 0 |
|
mp_start_method = 'fork' |
|
auto_scale_lr = dict(enable=True, base_batch_size=64) |
|
custom_imports = dict( |
|
imports=[ |
|
'mmselfsup.datasets.pipelines', |
|
'selfsup.core.hook.momentum_update_hook', |
|
'selfsup.datasets.pipelines.selfsup_pipelines', |
|
'selfsup.datasets.pipelines.rand_aug', |
|
'selfsup.datasets.single_view_coco', |
|
'selfsup.datasets.multi_view_coco', |
|
'selfsup.models.losses.contrastive_loss', |
|
'selfsup.models.dense_heads.fcos_head', |
|
'selfsup.models.dense_heads.retina_head', |
|
'selfsup.models.dense_heads.detr_head', |
|
'selfsup.models.dense_heads.deformable_detr_head', |
|
'selfsup.models.roi_heads.bbox_heads.convfc_bbox_head', |
|
'selfsup.models.roi_heads.standard_roi_head', |
|
'selfsup.models.detectors.selfsup_detector', |
|
'selfsup.models.detectors.selfsup_fcos', |
|
'selfsup.models.detectors.selfsup_detr', |
|
'selfsup.models.detectors.selfsup_deformable_detr', |
|
'selfsup.models.detectors.selfsup_retinanet', |
|
'selfsup.models.detectors.selfsup_mask_rcnn', |
|
'selfsup.core.bbox.assigners.hungarian_assigner', |
|
'selfsup.core.bbox.assigners.pseudo_hungarian_assigner', |
|
'selfsup.core.bbox.match_costs.match_cost' |
|
], |
|
allow_failed_imports=False) |
|
classes = [ |
|
'cluster_1', 'cluster_2', 'cluster_3', 'cluster_4', 'cluster_5', |
|
'cluster_6', 'cluster_7', 'cluster_8', 'cluster_9', 'cluster_10', |
|
'cluster_11', 'cluster_12', 'cluster_13', 'cluster_14', 'cluster_15', |
|
'cluster_16', 'cluster_17', 'cluster_18', 'cluster_19', 'cluster_20', |
|
'cluster_21', 'cluster_22', 'cluster_23', 'cluster_24', 'cluster_25', |
|
'cluster_26', 'cluster_27', 'cluster_28', 'cluster_29', 'cluster_30', |
|
'cluster_31', 'cluster_32', 'cluster_33', 'cluster_34', 'cluster_35', |
|
'cluster_36', 'cluster_37', 'cluster_38', 'cluster_39', 'cluster_40', |
|
'cluster_41', 'cluster_42', 'cluster_43', 'cluster_44', 'cluster_45', |
|
'cluster_46', 'cluster_47', 'cluster_48', 'cluster_49', 'cluster_50', |
|
'cluster_51', 'cluster_52', 'cluster_53', 'cluster_54', 'cluster_55', |
|
'cluster_56', 'cluster_57', 'cluster_58', 'cluster_59', 'cluster_60', |
|
'cluster_61', 'cluster_62', 'cluster_63', 'cluster_64', 'cluster_65', |
|
'cluster_66', 'cluster_67', 'cluster_68', 'cluster_69', 'cluster_70', |
|
'cluster_71', 'cluster_72', 'cluster_73', 'cluster_74', 'cluster_75', |
|
'cluster_76', 'cluster_77', 'cluster_78', 'cluster_79', 'cluster_80', |
|
'cluster_81', 'cluster_82', 'cluster_83', 'cluster_84', 'cluster_85', |
|
'cluster_86', 'cluster_87', 'cluster_88', 'cluster_89', 'cluster_90', |
|
'cluster_91', 'cluster_92', 'cluster_93', 'cluster_94', 'cluster_95', |
|
'cluster_96', 'cluster_97', 'cluster_98', 'cluster_99', 'cluster_100', |
|
'cluster_101', 'cluster_102', 'cluster_103', 'cluster_104', 'cluster_105', |
|
'cluster_106', 'cluster_107', 'cluster_108', 'cluster_109', 'cluster_110', |
|
'cluster_111', 'cluster_112', 'cluster_113', 'cluster_114', 'cluster_115', |
|
'cluster_116', 'cluster_117', 'cluster_118', 'cluster_119', 'cluster_120', |
|
'cluster_121', 'cluster_122', 'cluster_123', 'cluster_124', 'cluster_125', |
|
'cluster_126', 'cluster_127', 'cluster_128', 'cluster_129', 'cluster_130', |
|
'cluster_131', 'cluster_132', 'cluster_133', 'cluster_134', 'cluster_135', |
|
'cluster_136', 'cluster_137', 'cluster_138', 'cluster_139', 'cluster_140', |
|
'cluster_141', 'cluster_142', 'cluster_143', 'cluster_144', 'cluster_145', |
|
'cluster_146', 'cluster_147', 'cluster_148', 'cluster_149', 'cluster_150', |
|
'cluster_151', 'cluster_152', 'cluster_153', 'cluster_154', 'cluster_155', |
|
'cluster_156', 'cluster_157', 'cluster_158', 'cluster_159', 'cluster_160', |
|
'cluster_161', 'cluster_162', 'cluster_163', 'cluster_164', 'cluster_165', |
|
'cluster_166', 'cluster_167', 'cluster_168', 'cluster_169', 'cluster_170', |
|
'cluster_171', 'cluster_172', 'cluster_173', 'cluster_174', 'cluster_175', |
|
'cluster_176', 'cluster_177', 'cluster_178', 'cluster_179', 'cluster_180', |
|
'cluster_181', 'cluster_182', 'cluster_183', 'cluster_184', 'cluster_185', |
|
'cluster_186', 'cluster_187', 'cluster_188', 'cluster_189', 'cluster_190', |
|
'cluster_191', 'cluster_192', 'cluster_193', 'cluster_194', 'cluster_195', |
|
'cluster_196', 'cluster_197', 'cluster_198', 'cluster_199', 'cluster_200', |
|
'cluster_201', 'cluster_202', 'cluster_203', 'cluster_204', 'cluster_205', |
|
'cluster_206', 'cluster_207', 'cluster_208', 'cluster_209', 'cluster_210', |
|
'cluster_211', 'cluster_212', 'cluster_213', 'cluster_214', 'cluster_215', |
|
'cluster_216', 'cluster_217', 'cluster_218', 'cluster_219', 'cluster_220', |
|
'cluster_221', 'cluster_222', 'cluster_223', 'cluster_224', 'cluster_225', |
|
'cluster_226', 'cluster_227', 'cluster_228', 'cluster_229', 'cluster_230', |
|
'cluster_231', 'cluster_232', 'cluster_233', 'cluster_234', 'cluster_235', |
|
'cluster_236', 'cluster_237', 'cluster_238', 'cluster_239', 'cluster_240', |
|
'cluster_241', 'cluster_242', 'cluster_243', 'cluster_244', 'cluster_245', |
|
'cluster_246', 'cluster_247', 'cluster_248', 'cluster_249', 'cluster_250', |
|
'cluster_251', 'cluster_252', 'cluster_253', 'cluster_254', 'cluster_255', |
|
'cluster_256' |
|
] |
|
optimizer = dict( |
|
type='AdamW', |
|
lr=0.0002, |
|
weight_decay=0.0001, |
|
paramwise_cfg=dict( |
|
custom_keys=dict(backbone=dict(lr_mult=0, decay_mult=0)))) |
|
optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) |
|
lr_config = dict(policy='step', step=[40]) |
|
runner = dict(type='EpochBasedRunner', max_epochs=50) |
|
work_dir = 'work_dirs/selfsup_detr_cluster-ids-as-pseudo-labels' |
|
auto_resume = False |
|
gpu_ids = range(0, 32) |
|
|