|
_base_ = ['mmpose::_base_/default_runtime.py'] |
|
|
|
|
|
max_epochs = 420 |
|
stage2_num_epochs = 30 |
|
base_lr = 4e-3 |
|
|
|
train_cfg = dict(max_epochs=max_epochs, val_interval=10) |
|
randomness = dict(seed=21) |
|
|
|
|
|
optim_wrapper = dict( |
|
type='OptimWrapper', |
|
optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05), |
|
paramwise_cfg=dict( |
|
norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True)) |
|
|
|
|
|
param_scheduler = [ |
|
dict( |
|
type='LinearLR', |
|
start_factor=1.0e-5, |
|
by_epoch=False, |
|
begin=0, |
|
end=1000), |
|
dict( |
|
|
|
type='CosineAnnealingLR', |
|
eta_min=base_lr * 0.05, |
|
begin=max_epochs // 2, |
|
end=max_epochs, |
|
T_max=max_epochs // 2, |
|
by_epoch=True, |
|
convert_to_iter_based=True), |
|
] |
|
|
|
|
|
auto_scale_lr = dict(base_batch_size=1024) |
|
|
|
|
|
codec = dict( |
|
type='SimCCLabel', |
|
input_size=(192, 256), |
|
sigma=(4.9, 5.66), |
|
simcc_split_ratio=2.0, |
|
normalize=False, |
|
use_dark=False) |
|
|
|
|
|
model = dict( |
|
type='TopdownPoseEstimator', |
|
data_preprocessor=dict( |
|
type='PoseDataPreprocessor', |
|
mean=[123.675, 116.28, 103.53], |
|
std=[58.395, 57.12, 57.375], |
|
bgr_to_rgb=True), |
|
backbone=dict( |
|
_scope_='mmdet', |
|
type='CSPNeXt', |
|
arch='P5', |
|
expand_ratio=0.5, |
|
deepen_factor=1., |
|
widen_factor=1., |
|
out_indices=(4, ), |
|
channel_attention=True, |
|
norm_cfg=dict(type='SyncBN'), |
|
act_cfg=dict(type='SiLU'), |
|
init_cfg=dict( |
|
type='Pretrained', |
|
prefix='backbone.', |
|
checkpoint='https://download.openmmlab.com/mmpose/v1/projects/' |
|
'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' |
|
)), |
|
head=dict( |
|
type='RTMCCHead', |
|
in_channels=1024, |
|
out_channels=17, |
|
input_size=codec['input_size'], |
|
in_featuremap_size=(6, 8), |
|
simcc_split_ratio=codec['simcc_split_ratio'], |
|
final_layer_kernel_size=7, |
|
gau_cfg=dict( |
|
hidden_dims=256, |
|
s=128, |
|
expansion_factor=2, |
|
dropout_rate=0., |
|
drop_path=0., |
|
act_fn='SiLU', |
|
use_rel_bias=False, |
|
pos_enc=False), |
|
loss=dict( |
|
type='KLDiscretLoss', |
|
use_target_weight=True, |
|
beta=10., |
|
label_softmax=True), |
|
decoder=codec), |
|
test_cfg=dict(flip_test=True)) |
|
|
|
|
|
dataset_type = 'CocoDataset' |
|
data_mode = 'topdown' |
|
data_root = 'data/coco/' |
|
|
|
backend_args = dict(backend='local') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
train_pipeline = [ |
|
dict(type='LoadImage', backend_args=backend_args), |
|
dict(type='GetBBoxCenterScale'), |
|
dict(type='RandomFlip', direction='horizontal'), |
|
dict(type='RandomHalfBody'), |
|
dict( |
|
type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80), |
|
dict(type='TopdownAffine', input_size=codec['input_size']), |
|
dict(type='mmdet.YOLOXHSVRandomAug'), |
|
dict( |
|
type='Albumentation', |
|
transforms=[ |
|
dict(type='Blur', p=0.1), |
|
dict(type='MedianBlur', p=0.1), |
|
dict( |
|
type='CoarseDropout', |
|
max_holes=1, |
|
max_height=0.4, |
|
max_width=0.4, |
|
min_holes=1, |
|
min_height=0.2, |
|
min_width=0.2, |
|
p=1.), |
|
]), |
|
dict(type='GenerateTarget', encoder=codec), |
|
dict(type='PackPoseInputs') |
|
] |
|
val_pipeline = [ |
|
dict(type='LoadImage', backend_args=backend_args), |
|
dict(type='GetBBoxCenterScale'), |
|
dict(type='TopdownAffine', input_size=codec['input_size']), |
|
dict(type='PackPoseInputs') |
|
] |
|
|
|
train_pipeline_stage2 = [ |
|
dict(type='LoadImage', backend_args=backend_args), |
|
dict(type='GetBBoxCenterScale'), |
|
dict(type='RandomFlip', direction='horizontal'), |
|
dict(type='RandomHalfBody'), |
|
dict( |
|
type='RandomBBoxTransform', |
|
shift_factor=0., |
|
scale_factor=[0.75, 1.25], |
|
rotate_factor=60), |
|
dict(type='TopdownAffine', input_size=codec['input_size']), |
|
dict(type='mmdet.YOLOXHSVRandomAug'), |
|
dict( |
|
type='Albumentation', |
|
transforms=[ |
|
dict(type='Blur', p=0.1), |
|
dict(type='MedianBlur', p=0.1), |
|
dict( |
|
type='CoarseDropout', |
|
max_holes=1, |
|
max_height=0.4, |
|
max_width=0.4, |
|
min_holes=1, |
|
min_height=0.2, |
|
min_width=0.2, |
|
p=0.5), |
|
]), |
|
dict(type='GenerateTarget', encoder=codec), |
|
dict(type='PackPoseInputs') |
|
] |
|
|
|
|
|
train_dataloader = dict( |
|
batch_size=256, |
|
num_workers=10, |
|
persistent_workers=True, |
|
sampler=dict(type='DefaultSampler', shuffle=True), |
|
dataset=dict( |
|
type=dataset_type, |
|
data_root=data_root, |
|
data_mode=data_mode, |
|
ann_file='annotations/person_keypoints_train2017.json', |
|
data_prefix=dict(img='train2017/'), |
|
pipeline=train_pipeline, |
|
)) |
|
val_dataloader = dict( |
|
batch_size=64, |
|
num_workers=10, |
|
persistent_workers=True, |
|
drop_last=False, |
|
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), |
|
dataset=dict( |
|
type=dataset_type, |
|
data_root=data_root, |
|
data_mode=data_mode, |
|
ann_file='annotations/person_keypoints_val2017.json', |
|
|
|
|
|
data_prefix=dict(img='val2017/'), |
|
test_mode=True, |
|
pipeline=val_pipeline, |
|
)) |
|
test_dataloader = val_dataloader |
|
|
|
|
|
default_hooks = dict( |
|
checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1)) |
|
|
|
custom_hooks = [ |
|
dict( |
|
type='EMAHook', |
|
ema_type='ExpMomentumEMA', |
|
momentum=0.0002, |
|
update_buffers=True, |
|
priority=49), |
|
dict( |
|
type='mmdet.PipelineSwitchHook', |
|
switch_epoch=max_epochs - stage2_num_epochs, |
|
switch_pipeline=train_pipeline_stage2) |
|
] |
|
|
|
|
|
val_evaluator = dict( |
|
type='CocoMetric', |
|
ann_file=data_root + 'annotations/person_keypoints_val2017.json') |
|
test_evaluator = val_evaluator |
|
|