|
|
|
norm_cfg = dict(type='SyncBN', requires_grad=True) |
|
model = dict( |
|
type='EncoderDecoder', |
|
backbone=dict( |
|
type='VIT_MLA', |
|
model_name='vit_large_patch16_384', |
|
img_size=768, |
|
patch_size=16, |
|
in_chans=3, |
|
embed_dim=1024, |
|
depth=24, |
|
num_heads=16, |
|
num_classes=19, |
|
drop_rate=0.1, |
|
norm_cfg=norm_cfg, |
|
pos_embed_interp=True, |
|
align_corners=False, |
|
mla_channels=256, |
|
mla_index=(5,11,17,23) |
|
), |
|
decode_head=dict( |
|
type='VIT_MLAHead', |
|
in_channels=1024, |
|
channels=512, |
|
img_size=768, |
|
mla_channels=256, |
|
mlahead_channels=128, |
|
num_classes=19, |
|
norm_cfg=norm_cfg, |
|
align_corners=False, |
|
loss_decode=dict( |
|
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) |
|
|
|
train_cfg = dict() |
|
test_cfg = dict(mode='whole') |
|
|
|
|
|
|