YOLO-World-Seg / third_party /mmyolo /configs /yolov5 /voc /yolov5_s-v61_fast_1xb64-50e_voc.py
onuralpszr's picture
feat: ✨ YOLO-World-Seg files uploaded
b291f6a verified
raw
history blame contribute delete
No virus
8.54 kB
_base_ = '../yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
# dataset settings
data_root = 'data/VOCdevkit/'
dataset_type = 'YOLOv5VOCDataset'
# parameters that often need to be modified
num_classes = 20
img_scale = (512, 512) # width, height
max_epochs = 50
train_batch_size_per_gpu = 64
train_num_workers = 8
val_batch_size_per_gpu = 1
val_num_workers = 2
# persistent_workers must be False if num_workers is 0.
persistent_workers = True
lr_factor = 0.15135
affine_scale = 0.75544
# only on Val
batch_shapes_cfg = dict(img_size=img_scale[0])
anchors = [[(26, 44), (67, 57), (61, 130)], [(121, 118), (120, 239),
(206, 182)],
[(376, 161), (234, 324), (428, 322)]]
num_det_layers = 3
load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth' # noqa
tta_img_scales = [img_scale, (416, 416), (640, 640)]
# Hyperparameter reference from:
# https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.VOC.yaml
model = dict(
bbox_head=dict(
head_module=dict(num_classes=num_classes),
prior_generator=dict(base_sizes=anchors),
loss_cls=dict(
loss_weight=0.21638 * (num_classes / 80 * 3 / num_det_layers),
class_weight=0.5),
loss_bbox=dict(loss_weight=0.02 * (3 / num_det_layers)),
loss_obj=dict(
loss_weight=0.51728 *
((img_scale[0] / 640)**2 * 3 / num_det_layers),
class_weight=0.67198),
# Different from COCO
prior_match_thr=3.3744),
test_cfg=dict(nms=dict(iou_threshold=0.6)))
albu_train_transforms = _base_.albu_train_transforms
pre_transform = _base_.pre_transform
with_mosiac_pipeline = [
dict(
type='Mosaic',
img_scale=img_scale,
pad_val=114.0,
pre_transform=pre_transform),
dict(
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_translate_ratio=0.04591,
max_shear_degree=0.0,
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114)),
dict(
type='YOLOv5MixUp',
prob=0.04266,
pre_transform=[
*pre_transform,
dict(
type='Mosaic',
img_scale=img_scale,
pad_val=114.0,
pre_transform=pre_transform),
dict(
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_translate_ratio=0.04591,
max_shear_degree=0.0,
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
# img_scale is (width, height)
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114))
])
]
without_mosaic_pipeline = [
dict(
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_translate_ratio=0.04591,
max_shear_degree=0.0,
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
border=(0, 0),
border_val=(114, 114, 114)),
dict(
type='LetterResize',
scale=img_scale,
allow_scale_up=True,
pad_val=dict(img=114))
]
# Because the border parameter is inconsistent when
# using mosaic or not, `RandomChoice` is used here.
randchoice_mosaic_pipeline = dict(
type='RandomChoice',
transforms=[with_mosiac_pipeline, without_mosaic_pipeline],
prob=[0.85834, 0.14166])
train_pipeline = [
*pre_transform, randchoice_mosaic_pipeline,
dict(
type='mmdet.Albu',
transforms=albu_train_transforms,
bbox_params=dict(
type='BboxParams',
format='pascal_voc',
label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
keymap={
'img': 'image',
'gt_bboxes': 'bboxes'
}),
dict(
type='YOLOv5HSVRandomAug',
hue_delta=0.01041,
saturation_delta=0.54703,
value_delta=0.27739),
dict(type='mmdet.RandomFlip', prob=0.5),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
'flip_direction'))
]
train_dataloader = dict(
_delete_=True,
batch_size=train_batch_size_per_gpu,
num_workers=train_num_workers,
persistent_workers=persistent_workers,
pin_memory=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset',
datasets=[
dict(
type=dataset_type,
data_root=data_root,
ann_file='VOC2007/ImageSets/Main/trainval.txt',
data_prefix=dict(sub_data_root='VOC2007/'),
filter_cfg=dict(filter_empty_gt=False, min_size=32),
pipeline=train_pipeline),
dict(
type=dataset_type,
data_root=data_root,
ann_file='VOC2012/ImageSets/Main/trainval.txt',
data_prefix=dict(sub_data_root='VOC2012/'),
filter_cfg=dict(filter_empty_gt=False, min_size=32),
pipeline=train_pipeline)
],
# Use ignore_keys to avoid judging metainfo is
# not equal in `ConcatDataset`.
ignore_keys='dataset_type'),
collate_fn=dict(type='yolov5_collate'))
test_pipeline = [
dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
dict(type='YOLOv5KeepRatioResize', scale=img_scale),
dict(
type='LetterResize',
scale=img_scale,
allow_scale_up=False,
pad_val=dict(img=114)),
dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param'))
]
val_dataloader = dict(
batch_size=val_batch_size_per_gpu,
num_workers=val_num_workers,
persistent_workers=persistent_workers,
pin_memory=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='VOC2007/ImageSets/Main/test.txt',
data_prefix=dict(sub_data_root='VOC2007/'),
test_mode=True,
pipeline=test_pipeline,
batch_shapes_cfg=batch_shapes_cfg))
test_dataloader = val_dataloader
param_scheduler = None
optim_wrapper = dict(
optimizer=dict(
lr=0.00334,
momentum=0.74832,
weight_decay=0.00025,
batch_size_per_gpu=train_batch_size_per_gpu))
default_hooks = dict(
param_scheduler=dict(
lr_factor=lr_factor,
max_epochs=max_epochs,
warmup_epochs=3.3835,
warmup_momentum=0.59462,
warmup_bias_lr=0.18657))
custom_hooks = [
dict(
type='EMAHook',
ema_type='ExpMomentumEMA',
momentum=0.0001,
update_buffers=True,
# To load COCO pretrained model, need to set `strict_load=False`
strict_load=False,
priority=49)
]
# TODO: Support using coco metric in voc dataset
val_evaluator = dict(
_delete_=True, type='mmdet.VOCMetric', metric='mAP', eval_mode='area')
test_evaluator = val_evaluator
train_cfg = dict(max_epochs=max_epochs)
# Config for Test Time Augmentation. (TTA)
_multiscale_resize_transforms = [
dict(
type='Compose',
transforms=[
dict(type='YOLOv5KeepRatioResize', scale=s),
dict(
type='LetterResize',
scale=s,
allow_scale_up=False,
pad_val=dict(img=114))
]) for s in tta_img_scales
]
tta_pipeline = [
dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
dict(
type='TestTimeAug',
transforms=[
_multiscale_resize_transforms,
[
dict(type='mmdet.RandomFlip', prob=1.),
dict(type='mmdet.RandomFlip', prob=0.)
], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
[
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor', 'pad_param', 'flip',
'flip_direction'))
]
])
]