aaronb commited on
Commit
16381c9
·
1 Parent(s): 6fa5e3d

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_173902.log +1143 -0
  2. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_173902.log.json +7 -0
  3. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_174053.log +0 -0
  4. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_174053.log.json +161 -0
  5. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_184631.log +1139 -0
  6. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_184631.log.json +1 -0
  7. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_190322.log +1139 -0
  8. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_190322.log.json +1 -0
  9. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_211228.log +0 -0
  10. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_211228.log.json +0 -0
  11. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits.py +184 -0
  12. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/best_mIoU_iter_72000.pth +3 -0
  13. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_16000.pth +3 -0
  14. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_24000.pth +3 -0
  15. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_32000.pth +3 -0
  16. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_40000.pth +3 -0
  17. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_48000.pth +3 -0
  18. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_56000.pth +3 -0
  19. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_64000.pth +3 -0
  20. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_72000.pth +3 -0
  21. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_8000.pth +3 -0
  22. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_80000.pth +3 -0
  23. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/latest.pth +3 -0
  24. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/20230304_103602.log +1151 -0
  25. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/20230304_103602.log.json +15 -0
  26. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/20230304_103934.log +0 -0
  27. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/20230304_103934.log.json +161 -0
  28. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/20230304_122534.log +0 -0
  29. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/20230304_122534.log.json +0 -0
  30. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask.py +184 -0
  31. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/best_mIoU_iter_80000.pth +3 -0
  32. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_16000.pth +3 -0
  33. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_24000.pth +3 -0
  34. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_32000.pth +3 -0
  35. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_40000.pth +3 -0
  36. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_48000.pth +3 -0
  37. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_56000.pth +3 -0
  38. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_64000.pth +3 -0
  39. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_72000.pth +3 -0
  40. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_8000.pth +3 -0
  41. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_80000.pth +3 -0
  42. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/latest.pth +3 -0
  43. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/20230305_231050.log +1152 -0
  44. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/20230305_231050.log.json +1 -0
  45. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/20230305_231207.log +0 -0
  46. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/20230305_231207.log.json +0 -0
  47. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce.py +195 -0
  48. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/best_mIoU_iter_32000.pth +3 -0
  49. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/iter_160000.pth +3 -0
  50. ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/latest.pth +3 -0
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_173902.log ADDED
@@ -0,0 +1,1143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-03-04 17:39:02,644 - mmseg - INFO - Multi-processing start method is `None`
2
+ 2023-03-04 17:39:02,657 - mmseg - INFO - OpenCV num_threads is `128
3
+ 2023-03-04 17:39:02,657 - mmseg - INFO - OMP num threads is 1
4
+ 2023-03-04 17:39:02,719 - mmseg - INFO - Environment info:
5
+ ------------------------------------------------------------
6
+ sys.platform: linux
7
+ Python: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]
8
+ CUDA available: True
9
+ GPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB
10
+ CUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch
11
+ NVCC: Cuda compilation tools, release 11.6, V11.6.124
12
+ GCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)
13
+ PyTorch: 1.13.1
14
+ PyTorch compiling details: PyTorch built with:
15
+ - GCC 9.3
16
+ - C++ Version: 201402
17
+ - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications
18
+ - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
19
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
20
+ - LAPACK is enabled (usually provided by MKL)
21
+ - NNPACK is enabled
22
+ - CPU capability usage: AVX2
23
+ - CUDA Runtime 11.6
24
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37
25
+ - CuDNN 8.3.2 (built against CUDA 11.5)
26
+ - Magma 2.6.1
27
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
28
+
29
+ TorchVision: 0.14.1
30
+ OpenCV: 4.7.0
31
+ MMCV: 1.7.1
32
+ MMCV Compiler: GCC 9.3
33
+ MMCV CUDA Compiler: 11.6
34
+ MMSegmentation: 0.30.0+6749699
35
+ ------------------------------------------------------------
36
+
37
+ 2023-03-04 17:39:02,719 - mmseg - INFO - Distributed training: True
38
+ 2023-03-04 17:39:03,384 - mmseg - INFO - Config:
39
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
40
+ checkpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'
41
+ model = dict(
42
+ type='EncoderDecoderFreeze',
43
+ freeze_parameters=['backbone', 'decode_head'],
44
+ pretrained=
45
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
46
+ backbone=dict(
47
+ type='MixVisionTransformerCustomInitWeights',
48
+ in_channels=3,
49
+ embed_dims=64,
50
+ num_stages=4,
51
+ num_layers=[3, 4, 6, 3],
52
+ num_heads=[1, 2, 5, 8],
53
+ patch_sizes=[7, 3, 3, 3],
54
+ sr_ratios=[8, 4, 2, 1],
55
+ out_indices=(0, 1, 2, 3),
56
+ mlp_ratio=4,
57
+ qkv_bias=True,
58
+ drop_rate=0.0,
59
+ attn_drop_rate=0.0,
60
+ drop_path_rate=0.1),
61
+ decode_head=dict(
62
+ type='SegformerHeadUnetFCHeadSingleStepLogits',
63
+ pretrained=
64
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
65
+ dim=128,
66
+ out_dim=256,
67
+ unet_channels=166,
68
+ dim_mults=[1, 1, 1],
69
+ cat_embedding_dim=16,
70
+ in_channels=[64, 128, 320, 512],
71
+ in_index=[0, 1, 2, 3],
72
+ channels=256,
73
+ dropout_ratio=0.1,
74
+ num_classes=151,
75
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
76
+ align_corners=False,
77
+ ignore_index=0,
78
+ loss_decode=dict(
79
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
80
+ train_cfg=dict(),
81
+ test_cfg=dict(mode='whole'))
82
+ dataset_type = 'ADE20K151Dataset'
83
+ data_root = 'data/ade/ADEChallengeData2016'
84
+ img_norm_cfg = dict(
85
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
86
+ crop_size = (512, 512)
87
+ train_pipeline = [
88
+ dict(type='LoadImageFromFile'),
89
+ dict(type='LoadAnnotations', reduce_zero_label=False),
90
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
91
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
92
+ dict(type='RandomFlip', prob=0.5),
93
+ dict(type='PhotoMetricDistortion'),
94
+ dict(
95
+ type='Normalize',
96
+ mean=[123.675, 116.28, 103.53],
97
+ std=[58.395, 57.12, 57.375],
98
+ to_rgb=True),
99
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
100
+ dict(type='DefaultFormatBundle'),
101
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
102
+ ]
103
+ test_pipeline = [
104
+ dict(type='LoadImageFromFile'),
105
+ dict(
106
+ type='MultiScaleFlipAug',
107
+ img_scale=(2048, 512),
108
+ flip=False,
109
+ transforms=[
110
+ dict(type='Resize', keep_ratio=True),
111
+ dict(type='RandomFlip'),
112
+ dict(
113
+ type='Normalize',
114
+ mean=[123.675, 116.28, 103.53],
115
+ std=[58.395, 57.12, 57.375],
116
+ to_rgb=True),
117
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
118
+ dict(type='ImageToTensor', keys=['img']),
119
+ dict(type='Collect', keys=['img'])
120
+ ])
121
+ ]
122
+ data = dict(
123
+ samples_per_gpu=4,
124
+ workers_per_gpu=4,
125
+ train=dict(
126
+ type='ADE20K151Dataset',
127
+ data_root='data/ade/ADEChallengeData2016',
128
+ img_dir='images/training',
129
+ ann_dir='annotations/training',
130
+ pipeline=[
131
+ dict(type='LoadImageFromFile'),
132
+ dict(type='LoadAnnotations', reduce_zero_label=False),
133
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
134
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
135
+ dict(type='RandomFlip', prob=0.5),
136
+ dict(type='PhotoMetricDistortion'),
137
+ dict(
138
+ type='Normalize',
139
+ mean=[123.675, 116.28, 103.53],
140
+ std=[58.395, 57.12, 57.375],
141
+ to_rgb=True),
142
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
143
+ dict(type='DefaultFormatBundle'),
144
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
145
+ ]),
146
+ val=dict(
147
+ type='ADE20K151Dataset',
148
+ data_root='data/ade/ADEChallengeData2016',
149
+ img_dir='images/validation',
150
+ ann_dir='annotations/validation',
151
+ pipeline=[
152
+ dict(type='LoadImageFromFile'),
153
+ dict(
154
+ type='MultiScaleFlipAug',
155
+ img_scale=(2048, 512),
156
+ flip=False,
157
+ transforms=[
158
+ dict(type='Resize', keep_ratio=True),
159
+ dict(type='RandomFlip'),
160
+ dict(
161
+ type='Normalize',
162
+ mean=[123.675, 116.28, 103.53],
163
+ std=[58.395, 57.12, 57.375],
164
+ to_rgb=True),
165
+ dict(
166
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
167
+ dict(type='ImageToTensor', keys=['img']),
168
+ dict(type='Collect', keys=['img'])
169
+ ])
170
+ ]),
171
+ test=dict(
172
+ type='ADE20K151Dataset',
173
+ data_root='data/ade/ADEChallengeData2016',
174
+ img_dir='images/validation',
175
+ ann_dir='annotations/validation',
176
+ pipeline=[
177
+ dict(type='LoadImageFromFile'),
178
+ dict(
179
+ type='MultiScaleFlipAug',
180
+ img_scale=(2048, 512),
181
+ flip=False,
182
+ transforms=[
183
+ dict(type='Resize', keep_ratio=True),
184
+ dict(type='RandomFlip'),
185
+ dict(
186
+ type='Normalize',
187
+ mean=[123.675, 116.28, 103.53],
188
+ std=[58.395, 57.12, 57.375],
189
+ to_rgb=True),
190
+ dict(
191
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
192
+ dict(type='ImageToTensor', keys=['img']),
193
+ dict(type='Collect', keys=['img'])
194
+ ])
195
+ ]))
196
+ log_config = dict(
197
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
198
+ dist_params = dict(backend='nccl')
199
+ log_level = 'INFO'
200
+ load_from = None
201
+ resume_from = None
202
+ workflow = [('train', 1)]
203
+ cudnn_benchmark = True
204
+ optimizer = dict(
205
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
206
+ optimizer_config = dict()
207
+ lr_config = dict(
208
+ policy='step',
209
+ warmup='linear',
210
+ warmup_iters=1000,
211
+ warmup_ratio=1e-06,
212
+ step=10000,
213
+ gamma=0.5,
214
+ min_lr=1e-06,
215
+ by_epoch=False)
216
+ runner = dict(type='IterBasedRunner', max_iters=80000)
217
+ checkpoint_config = dict(by_epoch=False, interval=8000)
218
+ evaluation = dict(
219
+ interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')
220
+ work_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits'
221
+ gpu_ids = range(0, 8)
222
+ auto_resume = True
223
+
224
+ 2023-03-04 17:39:07,974 - mmseg - INFO - Set random seed to 984079870, deterministic: False
225
+ 2023-03-04 17:39:08,230 - mmseg - INFO - Parameters in backbone freezed!
226
+ 2023-03-04 17:39:08,230 - mmseg - INFO - Trainable parameters in SegformerHeadUnetFCHeadSingleStep: ['unet.init_conv.weight', 'unet.init_conv.bias', 'unet.time_mlp.1.weight', 'unet.time_mlp.1.bias', 'unet.time_mlp.3.weight', 'unet.time_mlp.3.bias', 'unet.downs.0.0.mlp.1.weight', 'unet.downs.0.0.mlp.1.bias', 'unet.downs.0.0.block1.proj.weight', 'unet.downs.0.0.block1.proj.bias', 'unet.downs.0.0.block1.norm.weight', 'unet.downs.0.0.block1.norm.bias', 'unet.downs.0.0.block2.proj.weight', 'unet.downs.0.0.block2.proj.bias', 'unet.downs.0.0.block2.norm.weight', 'unet.downs.0.0.block2.norm.bias', 'unet.downs.0.1.mlp.1.weight', 'unet.downs.0.1.mlp.1.bias', 'unet.downs.0.1.block1.proj.weight', 'unet.downs.0.1.block1.proj.bias', 'unet.downs.0.1.block1.norm.weight', 'unet.downs.0.1.block1.norm.bias', 'unet.downs.0.1.block2.proj.weight', 'unet.downs.0.1.block2.proj.bias', 'unet.downs.0.1.block2.norm.weight', 'unet.downs.0.1.block2.norm.bias', 'unet.downs.0.2.fn.fn.to_qkv.weight', 'unet.downs.0.2.fn.fn.to_out.0.weight', 'unet.downs.0.2.fn.fn.to_out.0.bias', 'unet.downs.0.2.fn.fn.to_out.1.g', 'unet.downs.0.2.fn.norm.g', 'unet.downs.0.3.weight', 'unet.downs.0.3.bias', 'unet.downs.1.0.mlp.1.weight', 'unet.downs.1.0.mlp.1.bias', 'unet.downs.1.0.block1.proj.weight', 'unet.downs.1.0.block1.proj.bias', 'unet.downs.1.0.block1.norm.weight', 'unet.downs.1.0.block1.norm.bias', 'unet.downs.1.0.block2.proj.weight', 'unet.downs.1.0.block2.proj.bias', 'unet.downs.1.0.block2.norm.weight', 'unet.downs.1.0.block2.norm.bias', 'unet.downs.1.1.mlp.1.weight', 'unet.downs.1.1.mlp.1.bias', 'unet.downs.1.1.block1.proj.weight', 'unet.downs.1.1.block1.proj.bias', 'unet.downs.1.1.block1.norm.weight', 'unet.downs.1.1.block1.norm.bias', 'unet.downs.1.1.block2.proj.weight', 'unet.downs.1.1.block2.proj.bias', 'unet.downs.1.1.block2.norm.weight', 'unet.downs.1.1.block2.norm.bias', 'unet.downs.1.2.fn.fn.to_qkv.weight', 'unet.downs.1.2.fn.fn.to_out.0.weight', 'unet.downs.1.2.fn.fn.to_out.0.bias', 'unet.downs.1.2.fn.fn.to_out.1.g', 'unet.downs.1.2.fn.norm.g', 'unet.downs.1.3.weight', 'unet.downs.1.3.bias', 'unet.downs.2.0.mlp.1.weight', 'unet.downs.2.0.mlp.1.bias', 'unet.downs.2.0.block1.proj.weight', 'unet.downs.2.0.block1.proj.bias', 'unet.downs.2.0.block1.norm.weight', 'unet.downs.2.0.block1.norm.bias', 'unet.downs.2.0.block2.proj.weight', 'unet.downs.2.0.block2.proj.bias', 'unet.downs.2.0.block2.norm.weight', 'unet.downs.2.0.block2.norm.bias', 'unet.downs.2.1.mlp.1.weight', 'unet.downs.2.1.mlp.1.bias', 'unet.downs.2.1.block1.proj.weight', 'unet.downs.2.1.block1.proj.bias', 'unet.downs.2.1.block1.norm.weight', 'unet.downs.2.1.block1.norm.bias', 'unet.downs.2.1.block2.proj.weight', 'unet.downs.2.1.block2.proj.bias', 'unet.downs.2.1.block2.norm.weight', 'unet.downs.2.1.block2.norm.bias', 'unet.downs.2.2.fn.fn.to_qkv.weight', 'unet.downs.2.2.fn.fn.to_out.0.weight', 'unet.downs.2.2.fn.fn.to_out.0.bias', 'unet.downs.2.2.fn.fn.to_out.1.g', 'unet.downs.2.2.fn.norm.g', 'unet.downs.2.3.weight', 'unet.downs.2.3.bias', 'unet.ups.0.0.mlp.1.weight', 'unet.ups.0.0.mlp.1.bias', 'unet.ups.0.0.block1.proj.weight', 'unet.ups.0.0.block1.proj.bias', 'unet.ups.0.0.block1.norm.weight', 'unet.ups.0.0.block1.norm.bias', 'unet.ups.0.0.block2.proj.weight', 'unet.ups.0.0.block2.proj.bias', 'unet.ups.0.0.block2.norm.weight', 'unet.ups.0.0.block2.norm.bias', 'unet.ups.0.0.res_conv.weight', 'unet.ups.0.0.res_conv.bias', 'unet.ups.0.1.mlp.1.weight', 'unet.ups.0.1.mlp.1.bias', 'unet.ups.0.1.block1.proj.weight', 'unet.ups.0.1.block1.proj.bias', 'unet.ups.0.1.block1.norm.weight', 'unet.ups.0.1.block1.norm.bias', 'unet.ups.0.1.block2.proj.weight', 'unet.ups.0.1.block2.proj.bias', 'unet.ups.0.1.block2.norm.weight', 'unet.ups.0.1.block2.norm.bias', 'unet.ups.0.1.res_conv.weight', 'unet.ups.0.1.res_conv.bias', 'unet.ups.0.2.fn.fn.to_qkv.weight', 'unet.ups.0.2.fn.fn.to_out.0.weight', 'unet.ups.0.2.fn.fn.to_out.0.bias', 'unet.ups.0.2.fn.fn.to_out.1.g', 'unet.ups.0.2.fn.norm.g', 'unet.ups.0.3.1.weight', 'unet.ups.0.3.1.bias', 'unet.ups.1.0.mlp.1.weight', 'unet.ups.1.0.mlp.1.bias', 'unet.ups.1.0.block1.proj.weight', 'unet.ups.1.0.block1.proj.bias', 'unet.ups.1.0.block1.norm.weight', 'unet.ups.1.0.block1.norm.bias', 'unet.ups.1.0.block2.proj.weight', 'unet.ups.1.0.block2.proj.bias', 'unet.ups.1.0.block2.norm.weight', 'unet.ups.1.0.block2.norm.bias', 'unet.ups.1.0.res_conv.weight', 'unet.ups.1.0.res_conv.bias', 'unet.ups.1.1.mlp.1.weight', 'unet.ups.1.1.mlp.1.bias', 'unet.ups.1.1.block1.proj.weight', 'unet.ups.1.1.block1.proj.bias', 'unet.ups.1.1.block1.norm.weight', 'unet.ups.1.1.block1.norm.bias', 'unet.ups.1.1.block2.proj.weight', 'unet.ups.1.1.block2.proj.bias', 'unet.ups.1.1.block2.norm.weight', 'unet.ups.1.1.block2.norm.bias', 'unet.ups.1.1.res_conv.weight', 'unet.ups.1.1.res_conv.bias', 'unet.ups.1.2.fn.fn.to_qkv.weight', 'unet.ups.1.2.fn.fn.to_out.0.weight', 'unet.ups.1.2.fn.fn.to_out.0.bias', 'unet.ups.1.2.fn.fn.to_out.1.g', 'unet.ups.1.2.fn.norm.g', 'unet.ups.1.3.1.weight', 'unet.ups.1.3.1.bias', 'unet.ups.2.0.mlp.1.weight', 'unet.ups.2.0.mlp.1.bias', 'unet.ups.2.0.block1.proj.weight', 'unet.ups.2.0.block1.proj.bias', 'unet.ups.2.0.block1.norm.weight', 'unet.ups.2.0.block1.norm.bias', 'unet.ups.2.0.block2.proj.weight', 'unet.ups.2.0.block2.proj.bias', 'unet.ups.2.0.block2.norm.weight', 'unet.ups.2.0.block2.norm.bias', 'unet.ups.2.0.res_conv.weight', 'unet.ups.2.0.res_conv.bias', 'unet.ups.2.1.mlp.1.weight', 'unet.ups.2.1.mlp.1.bias', 'unet.ups.2.1.block1.proj.weight', 'unet.ups.2.1.block1.proj.bias', 'unet.ups.2.1.block1.norm.weight', 'unet.ups.2.1.block1.norm.bias', 'unet.ups.2.1.block2.proj.weight', 'unet.ups.2.1.block2.proj.bias', 'unet.ups.2.1.block2.norm.weight', 'unet.ups.2.1.block2.norm.bias', 'unet.ups.2.1.res_conv.weight', 'unet.ups.2.1.res_conv.bias', 'unet.ups.2.2.fn.fn.to_qkv.weight', 'unet.ups.2.2.fn.fn.to_out.0.weight', 'unet.ups.2.2.fn.fn.to_out.0.bias', 'unet.ups.2.2.fn.fn.to_out.1.g', 'unet.ups.2.2.fn.norm.g', 'unet.ups.2.3.weight', 'unet.ups.2.3.bias', 'unet.mid_block1.mlp.1.weight', 'unet.mid_block1.mlp.1.bias', 'unet.mid_block1.block1.proj.weight', 'unet.mid_block1.block1.proj.bias', 'unet.mid_block1.block1.norm.weight', 'unet.mid_block1.block1.norm.bias', 'unet.mid_block1.block2.proj.weight', 'unet.mid_block1.block2.proj.bias', 'unet.mid_block1.block2.norm.weight', 'unet.mid_block1.block2.norm.bias', 'unet.mid_attn.fn.fn.to_qkv.weight', 'unet.mid_attn.fn.fn.to_out.weight', 'unet.mid_attn.fn.fn.to_out.bias', 'unet.mid_attn.fn.norm.g', 'unet.mid_block2.mlp.1.weight', 'unet.mid_block2.mlp.1.bias', 'unet.mid_block2.block1.proj.weight', 'unet.mid_block2.block1.proj.bias', 'unet.mid_block2.block1.norm.weight', 'unet.mid_block2.block1.norm.bias', 'unet.mid_block2.block2.proj.weight', 'unet.mid_block2.block2.proj.bias', 'unet.mid_block2.block2.norm.weight', 'unet.mid_block2.block2.norm.bias', 'unet.final_res_block.mlp.1.weight', 'unet.final_res_block.mlp.1.bias', 'unet.final_res_block.block1.proj.weight', 'unet.final_res_block.block1.proj.bias', 'unet.final_res_block.block1.norm.weight', 'unet.final_res_block.block1.norm.bias', 'unet.final_res_block.block2.proj.weight', 'unet.final_res_block.block2.proj.bias', 'unet.final_res_block.block2.norm.weight', 'unet.final_res_block.block2.norm.bias', 'unet.final_res_block.res_conv.weight', 'unet.final_res_block.res_conv.bias', 'unet.final_conv.weight', 'unet.final_conv.bias', 'conv_seg_new.weight', 'conv_seg_new.bias']
227
+ 2023-03-04 17:39:08,231 - mmseg - INFO - Parameters in decode_head freezed!
228
+ 2023-03-04 17:39:08,250 - mmseg - INFO - load checkpoint from local path: pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth
229
+ 2023-03-04 17:39:08,491 - mmseg - WARNING - The model and loaded state dict do not match exactly
230
+
231
+ unexpected key in source state_dict: decode_head.conv_seg.weight, decode_head.conv_seg.bias, decode_head.convs.0.conv.weight, decode_head.convs.0.bn.weight, decode_head.convs.0.bn.bias, decode_head.convs.0.bn.running_mean, decode_head.convs.0.bn.running_var, decode_head.convs.0.bn.num_batches_tracked, decode_head.convs.1.conv.weight, decode_head.convs.1.bn.weight, decode_head.convs.1.bn.bias, decode_head.convs.1.bn.running_mean, decode_head.convs.1.bn.running_var, decode_head.convs.1.bn.num_batches_tracked, decode_head.convs.2.conv.weight, decode_head.convs.2.bn.weight, decode_head.convs.2.bn.bias, decode_head.convs.2.bn.running_mean, decode_head.convs.2.bn.running_var, decode_head.convs.2.bn.num_batches_tracked, decode_head.convs.3.conv.weight, decode_head.convs.3.bn.weight, decode_head.convs.3.bn.bias, decode_head.convs.3.bn.running_mean, decode_head.convs.3.bn.running_var, decode_head.convs.3.bn.num_batches_tracked, decode_head.fusion_conv.conv.weight, decode_head.fusion_conv.bn.weight, decode_head.fusion_conv.bn.bias, decode_head.fusion_conv.bn.running_mean, decode_head.fusion_conv.bn.running_var, decode_head.fusion_conv.bn.num_batches_tracked
232
+
233
+ 2023-03-04 17:39:08,504 - mmseg - INFO - load checkpoint from local path: pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth
234
+ 2023-03-04 17:39:08,721 - mmseg - WARNING - The model and loaded state dict do not match exactly
235
+
236
+ unexpected key in source state_dict: backbone.layers.0.0.projection.weight, backbone.layers.0.0.projection.bias, backbone.layers.0.0.norm.weight, backbone.layers.0.0.norm.bias, backbone.layers.0.1.0.norm1.weight, backbone.layers.0.1.0.norm1.bias, backbone.layers.0.1.0.attn.attn.in_proj_weight, backbone.layers.0.1.0.attn.attn.in_proj_bias, backbone.layers.0.1.0.attn.attn.out_proj.weight, backbone.layers.0.1.0.attn.attn.out_proj.bias, backbone.layers.0.1.0.attn.sr.weight, backbone.layers.0.1.0.attn.sr.bias, backbone.layers.0.1.0.attn.norm.weight, backbone.layers.0.1.0.attn.norm.bias, backbone.layers.0.1.0.norm2.weight, backbone.layers.0.1.0.norm2.bias, backbone.layers.0.1.0.ffn.layers.0.weight, backbone.layers.0.1.0.ffn.layers.0.bias, backbone.layers.0.1.0.ffn.layers.1.weight, backbone.layers.0.1.0.ffn.layers.1.bias, backbone.layers.0.1.0.ffn.layers.4.weight, backbone.layers.0.1.0.ffn.layers.4.bias, backbone.layers.0.1.1.norm1.weight, backbone.layers.0.1.1.norm1.bias, backbone.layers.0.1.1.attn.attn.in_proj_weight, backbone.layers.0.1.1.attn.attn.in_proj_bias, backbone.layers.0.1.1.attn.attn.out_proj.weight, backbone.layers.0.1.1.attn.attn.out_proj.bias, backbone.layers.0.1.1.attn.sr.weight, backbone.layers.0.1.1.attn.sr.bias, backbone.layers.0.1.1.attn.norm.weight, backbone.layers.0.1.1.attn.norm.bias, backbone.layers.0.1.1.norm2.weight, backbone.layers.0.1.1.norm2.bias, backbone.layers.0.1.1.ffn.layers.0.weight, backbone.layers.0.1.1.ffn.layers.0.bias, backbone.layers.0.1.1.ffn.layers.1.weight, backbone.layers.0.1.1.ffn.layers.1.bias, backbone.layers.0.1.1.ffn.layers.4.weight, backbone.layers.0.1.1.ffn.layers.4.bias, backbone.layers.0.1.2.norm1.weight, backbone.layers.0.1.2.norm1.bias, backbone.layers.0.1.2.attn.attn.in_proj_weight, backbone.layers.0.1.2.attn.attn.in_proj_bias, backbone.layers.0.1.2.attn.attn.out_proj.weight, backbone.layers.0.1.2.attn.attn.out_proj.bias, backbone.layers.0.1.2.attn.sr.weight, backbone.layers.0.1.2.attn.sr.bias, backbone.layers.0.1.2.attn.norm.weight, backbone.layers.0.1.2.attn.norm.bias, backbone.layers.0.1.2.norm2.weight, backbone.layers.0.1.2.norm2.bias, backbone.layers.0.1.2.ffn.layers.0.weight, backbone.layers.0.1.2.ffn.layers.0.bias, backbone.layers.0.1.2.ffn.layers.1.weight, backbone.layers.0.1.2.ffn.layers.1.bias, backbone.layers.0.1.2.ffn.layers.4.weight, backbone.layers.0.1.2.ffn.layers.4.bias, backbone.layers.0.2.weight, backbone.layers.0.2.bias, backbone.layers.1.0.projection.weight, backbone.layers.1.0.projection.bias, backbone.layers.1.0.norm.weight, backbone.layers.1.0.norm.bias, backbone.layers.1.1.0.norm1.weight, backbone.layers.1.1.0.norm1.bias, backbone.layers.1.1.0.attn.attn.in_proj_weight, backbone.layers.1.1.0.attn.attn.in_proj_bias, backbone.layers.1.1.0.attn.attn.out_proj.weight, backbone.layers.1.1.0.attn.attn.out_proj.bias, backbone.layers.1.1.0.attn.sr.weight, backbone.layers.1.1.0.attn.sr.bias, backbone.layers.1.1.0.attn.norm.weight, backbone.layers.1.1.0.attn.norm.bias, backbone.layers.1.1.0.norm2.weight, backbone.layers.1.1.0.norm2.bias, backbone.layers.1.1.0.ffn.layers.0.weight, backbone.layers.1.1.0.ffn.layers.0.bias, backbone.layers.1.1.0.ffn.layers.1.weight, backbone.layers.1.1.0.ffn.layers.1.bias, backbone.layers.1.1.0.ffn.layers.4.weight, backbone.layers.1.1.0.ffn.layers.4.bias, backbone.layers.1.1.1.norm1.weight, backbone.layers.1.1.1.norm1.bias, backbone.layers.1.1.1.attn.attn.in_proj_weight, backbone.layers.1.1.1.attn.attn.in_proj_bias, backbone.layers.1.1.1.attn.attn.out_proj.weight, backbone.layers.1.1.1.attn.attn.out_proj.bias, backbone.layers.1.1.1.attn.sr.weight, backbone.layers.1.1.1.attn.sr.bias, backbone.layers.1.1.1.attn.norm.weight, backbone.layers.1.1.1.attn.norm.bias, backbone.layers.1.1.1.norm2.weight, backbone.layers.1.1.1.norm2.bias, backbone.layers.1.1.1.ffn.layers.0.weight, backbone.layers.1.1.1.ffn.layers.0.bias, backbone.layers.1.1.1.ffn.layers.1.weight, backbone.layers.1.1.1.ffn.layers.1.bias, backbone.layers.1.1.1.ffn.layers.4.weight, backbone.layers.1.1.1.ffn.layers.4.bias, backbone.layers.1.1.2.norm1.weight, backbone.layers.1.1.2.norm1.bias, backbone.layers.1.1.2.attn.attn.in_proj_weight, backbone.layers.1.1.2.attn.attn.in_proj_bias, backbone.layers.1.1.2.attn.attn.out_proj.weight, backbone.layers.1.1.2.attn.attn.out_proj.bias, backbone.layers.1.1.2.attn.sr.weight, backbone.layers.1.1.2.attn.sr.bias, backbone.layers.1.1.2.attn.norm.weight, backbone.layers.1.1.2.attn.norm.bias, backbone.layers.1.1.2.norm2.weight, backbone.layers.1.1.2.norm2.bias, backbone.layers.1.1.2.ffn.layers.0.weight, backbone.layers.1.1.2.ffn.layers.0.bias, backbone.layers.1.1.2.ffn.layers.1.weight, backbone.layers.1.1.2.ffn.layers.1.bias, backbone.layers.1.1.2.ffn.layers.4.weight, backbone.layers.1.1.2.ffn.layers.4.bias, backbone.layers.1.1.3.norm1.weight, backbone.layers.1.1.3.norm1.bias, backbone.layers.1.1.3.attn.attn.in_proj_weight, backbone.layers.1.1.3.attn.attn.in_proj_bias, backbone.layers.1.1.3.attn.attn.out_proj.weight, backbone.layers.1.1.3.attn.attn.out_proj.bias, backbone.layers.1.1.3.attn.sr.weight, backbone.layers.1.1.3.attn.sr.bias, backbone.layers.1.1.3.attn.norm.weight, backbone.layers.1.1.3.attn.norm.bias, backbone.layers.1.1.3.norm2.weight, backbone.layers.1.1.3.norm2.bias, backbone.layers.1.1.3.ffn.layers.0.weight, backbone.layers.1.1.3.ffn.layers.0.bias, backbone.layers.1.1.3.ffn.layers.1.weight, backbone.layers.1.1.3.ffn.layers.1.bias, backbone.layers.1.1.3.ffn.layers.4.weight, backbone.layers.1.1.3.ffn.layers.4.bias, backbone.layers.1.2.weight, backbone.layers.1.2.bias, backbone.layers.2.0.projection.weight, backbone.layers.2.0.projection.bias, backbone.layers.2.0.norm.weight, backbone.layers.2.0.norm.bias, backbone.layers.2.1.0.norm1.weight, backbone.layers.2.1.0.norm1.bias, backbone.layers.2.1.0.attn.attn.in_proj_weight, backbone.layers.2.1.0.attn.attn.in_proj_bias, backbone.layers.2.1.0.attn.attn.out_proj.weight, backbone.layers.2.1.0.attn.attn.out_proj.bias, backbone.layers.2.1.0.attn.sr.weight, backbone.layers.2.1.0.attn.sr.bias, backbone.layers.2.1.0.attn.norm.weight, backbone.layers.2.1.0.attn.norm.bias, backbone.layers.2.1.0.norm2.weight, backbone.layers.2.1.0.norm2.bias, backbone.layers.2.1.0.ffn.layers.0.weight, backbone.layers.2.1.0.ffn.layers.0.bias, backbone.layers.2.1.0.ffn.layers.1.weight, backbone.layers.2.1.0.ffn.layers.1.bias, backbone.layers.2.1.0.ffn.layers.4.weight, backbone.layers.2.1.0.ffn.layers.4.bias, backbone.layers.2.1.1.norm1.weight, backbone.layers.2.1.1.norm1.bias, backbone.layers.2.1.1.attn.attn.in_proj_weight, backbone.layers.2.1.1.attn.attn.in_proj_bias, backbone.layers.2.1.1.attn.attn.out_proj.weight, backbone.layers.2.1.1.attn.attn.out_proj.bias, backbone.layers.2.1.1.attn.sr.weight, backbone.layers.2.1.1.attn.sr.bias, backbone.layers.2.1.1.attn.norm.weight, backbone.layers.2.1.1.attn.norm.bias, backbone.layers.2.1.1.norm2.weight, backbone.layers.2.1.1.norm2.bias, backbone.layers.2.1.1.ffn.layers.0.weight, backbone.layers.2.1.1.ffn.layers.0.bias, backbone.layers.2.1.1.ffn.layers.1.weight, backbone.layers.2.1.1.ffn.layers.1.bias, backbone.layers.2.1.1.ffn.layers.4.weight, backbone.layers.2.1.1.ffn.layers.4.bias, backbone.layers.2.1.2.norm1.weight, backbone.layers.2.1.2.norm1.bias, backbone.layers.2.1.2.attn.attn.in_proj_weight, backbone.layers.2.1.2.attn.attn.in_proj_bias, backbone.layers.2.1.2.attn.attn.out_proj.weight, backbone.layers.2.1.2.attn.attn.out_proj.bias, backbone.layers.2.1.2.attn.sr.weight, backbone.layers.2.1.2.attn.sr.bias, backbone.layers.2.1.2.attn.norm.weight, backbone.layers.2.1.2.attn.norm.bias, backbone.layers.2.1.2.norm2.weight, backbone.layers.2.1.2.norm2.bias, backbone.layers.2.1.2.ffn.layers.0.weight, backbone.layers.2.1.2.ffn.layers.0.bias, backbone.layers.2.1.2.ffn.layers.1.weight, backbone.layers.2.1.2.ffn.layers.1.bias, backbone.layers.2.1.2.ffn.layers.4.weight, backbone.layers.2.1.2.ffn.layers.4.bias, backbone.layers.2.1.3.norm1.weight, backbone.layers.2.1.3.norm1.bias, backbone.layers.2.1.3.attn.attn.in_proj_weight, backbone.layers.2.1.3.attn.attn.in_proj_bias, backbone.layers.2.1.3.attn.attn.out_proj.weight, backbone.layers.2.1.3.attn.attn.out_proj.bias, backbone.layers.2.1.3.attn.sr.weight, backbone.layers.2.1.3.attn.sr.bias, backbone.layers.2.1.3.attn.norm.weight, backbone.layers.2.1.3.attn.norm.bias, backbone.layers.2.1.3.norm2.weight, backbone.layers.2.1.3.norm2.bias, backbone.layers.2.1.3.ffn.layers.0.weight, backbone.layers.2.1.3.ffn.layers.0.bias, backbone.layers.2.1.3.ffn.layers.1.weight, backbone.layers.2.1.3.ffn.layers.1.bias, backbone.layers.2.1.3.ffn.layers.4.weight, backbone.layers.2.1.3.ffn.layers.4.bias, backbone.layers.2.1.4.norm1.weight, backbone.layers.2.1.4.norm1.bias, backbone.layers.2.1.4.attn.attn.in_proj_weight, backbone.layers.2.1.4.attn.attn.in_proj_bias, backbone.layers.2.1.4.attn.attn.out_proj.weight, backbone.layers.2.1.4.attn.attn.out_proj.bias, backbone.layers.2.1.4.attn.sr.weight, backbone.layers.2.1.4.attn.sr.bias, backbone.layers.2.1.4.attn.norm.weight, backbone.layers.2.1.4.attn.norm.bias, backbone.layers.2.1.4.norm2.weight, backbone.layers.2.1.4.norm2.bias, backbone.layers.2.1.4.ffn.layers.0.weight, backbone.layers.2.1.4.ffn.layers.0.bias, backbone.layers.2.1.4.ffn.layers.1.weight, backbone.layers.2.1.4.ffn.layers.1.bias, backbone.layers.2.1.4.ffn.layers.4.weight, backbone.layers.2.1.4.ffn.layers.4.bias, backbone.layers.2.1.5.norm1.weight, backbone.layers.2.1.5.norm1.bias, backbone.layers.2.1.5.attn.attn.in_proj_weight, backbone.layers.2.1.5.attn.attn.in_proj_bias, backbone.layers.2.1.5.attn.attn.out_proj.weight, backbone.layers.2.1.5.attn.attn.out_proj.bias, backbone.layers.2.1.5.attn.sr.weight, backbone.layers.2.1.5.attn.sr.bias, backbone.layers.2.1.5.attn.norm.weight, backbone.layers.2.1.5.attn.norm.bias, backbone.layers.2.1.5.norm2.weight, backbone.layers.2.1.5.norm2.bias, backbone.layers.2.1.5.ffn.layers.0.weight, backbone.layers.2.1.5.ffn.layers.0.bias, backbone.layers.2.1.5.ffn.layers.1.weight, backbone.layers.2.1.5.ffn.layers.1.bias, backbone.layers.2.1.5.ffn.layers.4.weight, backbone.layers.2.1.5.ffn.layers.4.bias, backbone.layers.2.2.weight, backbone.layers.2.2.bias, backbone.layers.3.0.projection.weight, backbone.layers.3.0.projection.bias, backbone.layers.3.0.norm.weight, backbone.layers.3.0.norm.bias, backbone.layers.3.1.0.norm1.weight, backbone.layers.3.1.0.norm1.bias, backbone.layers.3.1.0.attn.attn.in_proj_weight, backbone.layers.3.1.0.attn.attn.in_proj_bias, backbone.layers.3.1.0.attn.attn.out_proj.weight, backbone.layers.3.1.0.attn.attn.out_proj.bias, backbone.layers.3.1.0.norm2.weight, backbone.layers.3.1.0.norm2.bias, backbone.layers.3.1.0.ffn.layers.0.weight, backbone.layers.3.1.0.ffn.layers.0.bias, backbone.layers.3.1.0.ffn.layers.1.weight, backbone.layers.3.1.0.ffn.layers.1.bias, backbone.layers.3.1.0.ffn.layers.4.weight, backbone.layers.3.1.0.ffn.layers.4.bias, backbone.layers.3.1.1.norm1.weight, backbone.layers.3.1.1.norm1.bias, backbone.layers.3.1.1.attn.attn.in_proj_weight, backbone.layers.3.1.1.attn.attn.in_proj_bias, backbone.layers.3.1.1.attn.attn.out_proj.weight, backbone.layers.3.1.1.attn.attn.out_proj.bias, backbone.layers.3.1.1.norm2.weight, backbone.layers.3.1.1.norm2.bias, backbone.layers.3.1.1.ffn.layers.0.weight, backbone.layers.3.1.1.ffn.layers.0.bias, backbone.layers.3.1.1.ffn.layers.1.weight, backbone.layers.3.1.1.ffn.layers.1.bias, backbone.layers.3.1.1.ffn.layers.4.weight, backbone.layers.3.1.1.ffn.layers.4.bias, backbone.layers.3.1.2.norm1.weight, backbone.layers.3.1.2.norm1.bias, backbone.layers.3.1.2.attn.attn.in_proj_weight, backbone.layers.3.1.2.attn.attn.in_proj_bias, backbone.layers.3.1.2.attn.attn.out_proj.weight, backbone.layers.3.1.2.attn.attn.out_proj.bias, backbone.layers.3.1.2.norm2.weight, backbone.layers.3.1.2.norm2.bias, backbone.layers.3.1.2.ffn.layers.0.weight, backbone.layers.3.1.2.ffn.layers.0.bias, backbone.layers.3.1.2.ffn.layers.1.weight, backbone.layers.3.1.2.ffn.layers.1.bias, backbone.layers.3.1.2.ffn.layers.4.weight, backbone.layers.3.1.2.ffn.layers.4.bias, backbone.layers.3.2.weight, backbone.layers.3.2.bias
237
+
238
+ missing keys in source state_dict: unet.init_conv.weight, unet.init_conv.bias, unet.time_mlp.1.weight, unet.time_mlp.1.bias, unet.time_mlp.3.weight, unet.time_mlp.3.bias, unet.downs.0.0.mlp.1.weight, unet.downs.0.0.mlp.1.bias, unet.downs.0.0.block1.proj.weight, unet.downs.0.0.block1.proj.bias, unet.downs.0.0.block1.norm.weight, unet.downs.0.0.block1.norm.bias, unet.downs.0.0.block2.proj.weight, unet.downs.0.0.block2.proj.bias, unet.downs.0.0.block2.norm.weight, unet.downs.0.0.block2.norm.bias, unet.downs.0.1.mlp.1.weight, unet.downs.0.1.mlp.1.bias, unet.downs.0.1.block1.proj.weight, unet.downs.0.1.block1.proj.bias, unet.downs.0.1.block1.norm.weight, unet.downs.0.1.block1.norm.bias, unet.downs.0.1.block2.proj.weight, unet.downs.0.1.block2.proj.bias, unet.downs.0.1.block2.norm.weight, unet.downs.0.1.block2.norm.bias, unet.downs.0.2.fn.fn.to_qkv.weight, unet.downs.0.2.fn.fn.to_out.0.weight, unet.downs.0.2.fn.fn.to_out.0.bias, unet.downs.0.2.fn.fn.to_out.1.g, unet.downs.0.2.fn.norm.g, unet.downs.0.3.weight, unet.downs.0.3.bias, unet.downs.1.0.mlp.1.weight, unet.downs.1.0.mlp.1.bias, unet.downs.1.0.block1.proj.weight, unet.downs.1.0.block1.proj.bias, unet.downs.1.0.block1.norm.weight, unet.downs.1.0.block1.norm.bias, unet.downs.1.0.block2.proj.weight, unet.downs.1.0.block2.proj.bias, unet.downs.1.0.block2.norm.weight, unet.downs.1.0.block2.norm.bias, unet.downs.1.1.mlp.1.weight, unet.downs.1.1.mlp.1.bias, unet.downs.1.1.block1.proj.weight, unet.downs.1.1.block1.proj.bias, unet.downs.1.1.block1.norm.weight, unet.downs.1.1.block1.norm.bias, unet.downs.1.1.block2.proj.weight, unet.downs.1.1.block2.proj.bias, unet.downs.1.1.block2.norm.weight, unet.downs.1.1.block2.norm.bias, unet.downs.1.2.fn.fn.to_qkv.weight, unet.downs.1.2.fn.fn.to_out.0.weight, unet.downs.1.2.fn.fn.to_out.0.bias, unet.downs.1.2.fn.fn.to_out.1.g, unet.downs.1.2.fn.norm.g, unet.downs.1.3.weight, unet.downs.1.3.bias, unet.downs.2.0.mlp.1.weight, unet.downs.2.0.mlp.1.bias, unet.downs.2.0.block1.proj.weight, unet.downs.2.0.block1.proj.bias, unet.downs.2.0.block1.norm.weight, unet.downs.2.0.block1.norm.bias, unet.downs.2.0.block2.proj.weight, unet.downs.2.0.block2.proj.bias, unet.downs.2.0.block2.norm.weight, unet.downs.2.0.block2.norm.bias, unet.downs.2.1.mlp.1.weight, unet.downs.2.1.mlp.1.bias, unet.downs.2.1.block1.proj.weight, unet.downs.2.1.block1.proj.bias, unet.downs.2.1.block1.norm.weight, unet.downs.2.1.block1.norm.bias, unet.downs.2.1.block2.proj.weight, unet.downs.2.1.block2.proj.bias, unet.downs.2.1.block2.norm.weight, unet.downs.2.1.block2.norm.bias, unet.downs.2.2.fn.fn.to_qkv.weight, unet.downs.2.2.fn.fn.to_out.0.weight, unet.downs.2.2.fn.fn.to_out.0.bias, unet.downs.2.2.fn.fn.to_out.1.g, unet.downs.2.2.fn.norm.g, unet.downs.2.3.weight, unet.downs.2.3.bias, unet.ups.0.0.mlp.1.weight, unet.ups.0.0.mlp.1.bias, unet.ups.0.0.block1.proj.weight, unet.ups.0.0.block1.proj.bias, unet.ups.0.0.block1.norm.weight, unet.ups.0.0.block1.norm.bias, unet.ups.0.0.block2.proj.weight, unet.ups.0.0.block2.proj.bias, unet.ups.0.0.block2.norm.weight, unet.ups.0.0.block2.norm.bias, unet.ups.0.0.res_conv.weight, unet.ups.0.0.res_conv.bias, unet.ups.0.1.mlp.1.weight, unet.ups.0.1.mlp.1.bias, unet.ups.0.1.block1.proj.weight, unet.ups.0.1.block1.proj.bias, unet.ups.0.1.block1.norm.weight, unet.ups.0.1.block1.norm.bias, unet.ups.0.1.block2.proj.weight, unet.ups.0.1.block2.proj.bias, unet.ups.0.1.block2.norm.weight, unet.ups.0.1.block2.norm.bias, unet.ups.0.1.res_conv.weight, unet.ups.0.1.res_conv.bias, unet.ups.0.2.fn.fn.to_qkv.weight, unet.ups.0.2.fn.fn.to_out.0.weight, unet.ups.0.2.fn.fn.to_out.0.bias, unet.ups.0.2.fn.fn.to_out.1.g, unet.ups.0.2.fn.norm.g, unet.ups.0.3.1.weight, unet.ups.0.3.1.bias, unet.ups.1.0.mlp.1.weight, unet.ups.1.0.mlp.1.bias, unet.ups.1.0.block1.proj.weight, unet.ups.1.0.block1.proj.bias, unet.ups.1.0.block1.norm.weight, unet.ups.1.0.block1.norm.bias, unet.ups.1.0.block2.proj.weight, unet.ups.1.0.block2.proj.bias, unet.ups.1.0.block2.norm.weight, unet.ups.1.0.block2.norm.bias, unet.ups.1.0.res_conv.weight, unet.ups.1.0.res_conv.bias, unet.ups.1.1.mlp.1.weight, unet.ups.1.1.mlp.1.bias, unet.ups.1.1.block1.proj.weight, unet.ups.1.1.block1.proj.bias, unet.ups.1.1.block1.norm.weight, unet.ups.1.1.block1.norm.bias, unet.ups.1.1.block2.proj.weight, unet.ups.1.1.block2.proj.bias, unet.ups.1.1.block2.norm.weight, unet.ups.1.1.block2.norm.bias, unet.ups.1.1.res_conv.weight, unet.ups.1.1.res_conv.bias, unet.ups.1.2.fn.fn.to_qkv.weight, unet.ups.1.2.fn.fn.to_out.0.weight, unet.ups.1.2.fn.fn.to_out.0.bias, unet.ups.1.2.fn.fn.to_out.1.g, unet.ups.1.2.fn.norm.g, unet.ups.1.3.1.weight, unet.ups.1.3.1.bias, unet.ups.2.0.mlp.1.weight, unet.ups.2.0.mlp.1.bias, unet.ups.2.0.block1.proj.weight, unet.ups.2.0.block1.proj.bias, unet.ups.2.0.block1.norm.weight, unet.ups.2.0.block1.norm.bias, unet.ups.2.0.block2.proj.weight, unet.ups.2.0.block2.proj.bias, unet.ups.2.0.block2.norm.weight, unet.ups.2.0.block2.norm.bias, unet.ups.2.0.res_conv.weight, unet.ups.2.0.res_conv.bias, unet.ups.2.1.mlp.1.weight, unet.ups.2.1.mlp.1.bias, unet.ups.2.1.block1.proj.weight, unet.ups.2.1.block1.proj.bias, unet.ups.2.1.block1.norm.weight, unet.ups.2.1.block1.norm.bias, unet.ups.2.1.block2.proj.weight, unet.ups.2.1.block2.proj.bias, unet.ups.2.1.block2.norm.weight, unet.ups.2.1.block2.norm.bias, unet.ups.2.1.res_conv.weight, unet.ups.2.1.res_conv.bias, unet.ups.2.2.fn.fn.to_qkv.weight, unet.ups.2.2.fn.fn.to_out.0.weight, unet.ups.2.2.fn.fn.to_out.0.bias, unet.ups.2.2.fn.fn.to_out.1.g, unet.ups.2.2.fn.norm.g, unet.ups.2.3.weight, unet.ups.2.3.bias, unet.mid_block1.mlp.1.weight, unet.mid_block1.mlp.1.bias, unet.mid_block1.block1.proj.weight, unet.mid_block1.block1.proj.bias, unet.mid_block1.block1.norm.weight, unet.mid_block1.block1.norm.bias, unet.mid_block1.block2.proj.weight, unet.mid_block1.block2.proj.bias, unet.mid_block1.block2.norm.weight, unet.mid_block1.block2.norm.bias, unet.mid_attn.fn.fn.to_qkv.weight, unet.mid_attn.fn.fn.to_out.weight, unet.mid_attn.fn.fn.to_out.bias, unet.mid_attn.fn.norm.g, unet.mid_block2.mlp.1.weight, unet.mid_block2.mlp.1.bias, unet.mid_block2.block1.proj.weight, unet.mid_block2.block1.proj.bias, unet.mid_block2.block1.norm.weight, unet.mid_block2.block1.norm.bias, unet.mid_block2.block2.proj.weight, unet.mid_block2.block2.proj.bias, unet.mid_block2.block2.norm.weight, unet.mid_block2.block2.norm.bias, unet.final_res_block.mlp.1.weight, unet.final_res_block.mlp.1.bias, unet.final_res_block.block1.proj.weight, unet.final_res_block.block1.proj.bias, unet.final_res_block.block1.norm.weight, unet.final_res_block.block1.norm.bias, unet.final_res_block.block2.proj.weight, unet.final_res_block.block2.proj.bias, unet.final_res_block.block2.norm.weight, unet.final_res_block.block2.norm.bias, unet.final_res_block.res_conv.weight, unet.final_res_block.res_conv.bias, unet.final_conv.weight, unet.final_conv.bias, conv_seg_new.weight, conv_seg_new.bias, embed.weight
239
+
240
+ 2023-03-04 17:39:08,744 - mmseg - INFO - EncoderDecoderFreeze(
241
+ (backbone): MixVisionTransformerCustomInitWeights(
242
+ (layers): ModuleList(
243
+ (0): ModuleList(
244
+ (0): PatchEmbed(
245
+ (projection): Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
246
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
247
+ )
248
+ (1): ModuleList(
249
+ (0): TransformerEncoderLayer(
250
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
251
+ (attn): EfficientMultiheadAttention(
252
+ (attn): MultiheadAttention(
253
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
254
+ )
255
+ (proj_drop): Dropout(p=0.0, inplace=False)
256
+ (dropout_layer): DropPath()
257
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
258
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
259
+ )
260
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
261
+ (ffn): MixFFN(
262
+ (activate): GELU(approximate='none')
263
+ (layers): Sequential(
264
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
265
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
266
+ (2): GELU(approximate='none')
267
+ (3): Dropout(p=0.0, inplace=False)
268
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
269
+ (5): Dropout(p=0.0, inplace=False)
270
+ )
271
+ (dropout_layer): DropPath()
272
+ )
273
+ )
274
+ (1): TransformerEncoderLayer(
275
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
276
+ (attn): EfficientMultiheadAttention(
277
+ (attn): MultiheadAttention(
278
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
279
+ )
280
+ (proj_drop): Dropout(p=0.0, inplace=False)
281
+ (dropout_layer): DropPath()
282
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
283
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
284
+ )
285
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
286
+ (ffn): MixFFN(
287
+ (activate): GELU(approximate='none')
288
+ (layers): Sequential(
289
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
290
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
291
+ (2): GELU(approximate='none')
292
+ (3): Dropout(p=0.0, inplace=False)
293
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
294
+ (5): Dropout(p=0.0, inplace=False)
295
+ )
296
+ (dropout_layer): DropPath()
297
+ )
298
+ )
299
+ (2): TransformerEncoderLayer(
300
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
301
+ (attn): EfficientMultiheadAttention(
302
+ (attn): MultiheadAttention(
303
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
304
+ )
305
+ (proj_drop): Dropout(p=0.0, inplace=False)
306
+ (dropout_layer): DropPath()
307
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
308
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
309
+ )
310
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
311
+ (ffn): MixFFN(
312
+ (activate): GELU(approximate='none')
313
+ (layers): Sequential(
314
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
315
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
316
+ (2): GELU(approximate='none')
317
+ (3): Dropout(p=0.0, inplace=False)
318
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
319
+ (5): Dropout(p=0.0, inplace=False)
320
+ )
321
+ (dropout_layer): DropPath()
322
+ )
323
+ )
324
+ )
325
+ (2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
326
+ )
327
+ (1): ModuleList(
328
+ (0): PatchEmbed(
329
+ (projection): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
330
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
331
+ )
332
+ (1): ModuleList(
333
+ (0): TransformerEncoderLayer(
334
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
335
+ (attn): EfficientMultiheadAttention(
336
+ (attn): MultiheadAttention(
337
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
338
+ )
339
+ (proj_drop): Dropout(p=0.0, inplace=False)
340
+ (dropout_layer): DropPath()
341
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
342
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
343
+ )
344
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
345
+ (ffn): MixFFN(
346
+ (activate): GELU(approximate='none')
347
+ (layers): Sequential(
348
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
349
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
350
+ (2): GELU(approximate='none')
351
+ (3): Dropout(p=0.0, inplace=False)
352
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
353
+ (5): Dropout(p=0.0, inplace=False)
354
+ )
355
+ (dropout_layer): DropPath()
356
+ )
357
+ )
358
+ (1): TransformerEncoderLayer(
359
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
360
+ (attn): EfficientMultiheadAttention(
361
+ (attn): MultiheadAttention(
362
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
363
+ )
364
+ (proj_drop): Dropout(p=0.0, inplace=False)
365
+ (dropout_layer): DropPath()
366
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
367
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
368
+ )
369
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
370
+ (ffn): MixFFN(
371
+ (activate): GELU(approximate='none')
372
+ (layers): Sequential(
373
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
374
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
375
+ (2): GELU(approximate='none')
376
+ (3): Dropout(p=0.0, inplace=False)
377
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
378
+ (5): Dropout(p=0.0, inplace=False)
379
+ )
380
+ (dropout_layer): DropPath()
381
+ )
382
+ )
383
+ (2): TransformerEncoderLayer(
384
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
385
+ (attn): EfficientMultiheadAttention(
386
+ (attn): MultiheadAttention(
387
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
388
+ )
389
+ (proj_drop): Dropout(p=0.0, inplace=False)
390
+ (dropout_layer): DropPath()
391
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
392
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
393
+ )
394
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
395
+ (ffn): MixFFN(
396
+ (activate): GELU(approximate='none')
397
+ (layers): Sequential(
398
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
399
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
400
+ (2): GELU(approximate='none')
401
+ (3): Dropout(p=0.0, inplace=False)
402
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
403
+ (5): Dropout(p=0.0, inplace=False)
404
+ )
405
+ (dropout_layer): DropPath()
406
+ )
407
+ )
408
+ (3): TransformerEncoderLayer(
409
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
410
+ (attn): EfficientMultiheadAttention(
411
+ (attn): MultiheadAttention(
412
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
413
+ )
414
+ (proj_drop): Dropout(p=0.0, inplace=False)
415
+ (dropout_layer): DropPath()
416
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
417
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
418
+ )
419
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
420
+ (ffn): MixFFN(
421
+ (activate): GELU(approximate='none')
422
+ (layers): Sequential(
423
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
424
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
425
+ (2): GELU(approximate='none')
426
+ (3): Dropout(p=0.0, inplace=False)
427
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
428
+ (5): Dropout(p=0.0, inplace=False)
429
+ )
430
+ (dropout_layer): DropPath()
431
+ )
432
+ )
433
+ )
434
+ (2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
435
+ )
436
+ (2): ModuleList(
437
+ (0): PatchEmbed(
438
+ (projection): Conv2d(128, 320, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
439
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
440
+ )
441
+ (1): ModuleList(
442
+ (0): TransformerEncoderLayer(
443
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
444
+ (attn): EfficientMultiheadAttention(
445
+ (attn): MultiheadAttention(
446
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
447
+ )
448
+ (proj_drop): Dropout(p=0.0, inplace=False)
449
+ (dropout_layer): DropPath()
450
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
451
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
452
+ )
453
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
454
+ (ffn): MixFFN(
455
+ (activate): GELU(approximate='none')
456
+ (layers): Sequential(
457
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
458
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
459
+ (2): GELU(approximate='none')
460
+ (3): Dropout(p=0.0, inplace=False)
461
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
462
+ (5): Dropout(p=0.0, inplace=False)
463
+ )
464
+ (dropout_layer): DropPath()
465
+ )
466
+ )
467
+ (1): TransformerEncoderLayer(
468
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
469
+ (attn): EfficientMultiheadAttention(
470
+ (attn): MultiheadAttention(
471
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
472
+ )
473
+ (proj_drop): Dropout(p=0.0, inplace=False)
474
+ (dropout_layer): DropPath()
475
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
476
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
477
+ )
478
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
479
+ (ffn): MixFFN(
480
+ (activate): GELU(approximate='none')
481
+ (layers): Sequential(
482
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
483
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
484
+ (2): GELU(approximate='none')
485
+ (3): Dropout(p=0.0, inplace=False)
486
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
487
+ (5): Dropout(p=0.0, inplace=False)
488
+ )
489
+ (dropout_layer): DropPath()
490
+ )
491
+ )
492
+ (2): TransformerEncoderLayer(
493
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
494
+ (attn): EfficientMultiheadAttention(
495
+ (attn): MultiheadAttention(
496
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
497
+ )
498
+ (proj_drop): Dropout(p=0.0, inplace=False)
499
+ (dropout_layer): DropPath()
500
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
501
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
502
+ )
503
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
504
+ (ffn): MixFFN(
505
+ (activate): GELU(approximate='none')
506
+ (layers): Sequential(
507
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
508
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
509
+ (2): GELU(approximate='none')
510
+ (3): Dropout(p=0.0, inplace=False)
511
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
512
+ (5): Dropout(p=0.0, inplace=False)
513
+ )
514
+ (dropout_layer): DropPath()
515
+ )
516
+ )
517
+ (3): TransformerEncoderLayer(
518
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
519
+ (attn): EfficientMultiheadAttention(
520
+ (attn): MultiheadAttention(
521
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
522
+ )
523
+ (proj_drop): Dropout(p=0.0, inplace=False)
524
+ (dropout_layer): DropPath()
525
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
526
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
527
+ )
528
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
529
+ (ffn): MixFFN(
530
+ (activate): GELU(approximate='none')
531
+ (layers): Sequential(
532
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
533
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
534
+ (2): GELU(approximate='none')
535
+ (3): Dropout(p=0.0, inplace=False)
536
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
537
+ (5): Dropout(p=0.0, inplace=False)
538
+ )
539
+ (dropout_layer): DropPath()
540
+ )
541
+ )
542
+ (4): TransformerEncoderLayer(
543
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
544
+ (attn): EfficientMultiheadAttention(
545
+ (attn): MultiheadAttention(
546
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
547
+ )
548
+ (proj_drop): Dropout(p=0.0, inplace=False)
549
+ (dropout_layer): DropPath()
550
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
551
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
552
+ )
553
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
554
+ (ffn): MixFFN(
555
+ (activate): GELU(approximate='none')
556
+ (layers): Sequential(
557
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
558
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
559
+ (2): GELU(approximate='none')
560
+ (3): Dropout(p=0.0, inplace=False)
561
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
562
+ (5): Dropout(p=0.0, inplace=False)
563
+ )
564
+ (dropout_layer): DropPath()
565
+ )
566
+ )
567
+ (5): TransformerEncoderLayer(
568
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
569
+ (attn): EfficientMultiheadAttention(
570
+ (attn): MultiheadAttention(
571
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
572
+ )
573
+ (proj_drop): Dropout(p=0.0, inplace=False)
574
+ (dropout_layer): DropPath()
575
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
576
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
577
+ )
578
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
579
+ (ffn): MixFFN(
580
+ (activate): GELU(approximate='none')
581
+ (layers): Sequential(
582
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
583
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
584
+ (2): GELU(approximate='none')
585
+ (3): Dropout(p=0.0, inplace=False)
586
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
587
+ (5): Dropout(p=0.0, inplace=False)
588
+ )
589
+ (dropout_layer): DropPath()
590
+ )
591
+ )
592
+ )
593
+ (2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
594
+ )
595
+ (3): ModuleList(
596
+ (0): PatchEmbed(
597
+ (projection): Conv2d(320, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
598
+ (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
599
+ )
600
+ (1): ModuleList(
601
+ (0): TransformerEncoderLayer(
602
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
603
+ (attn): EfficientMultiheadAttention(
604
+ (attn): MultiheadAttention(
605
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
606
+ )
607
+ (proj_drop): Dropout(p=0.0, inplace=False)
608
+ (dropout_layer): DropPath()
609
+ )
610
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
611
+ (ffn): MixFFN(
612
+ (activate): GELU(approximate='none')
613
+ (layers): Sequential(
614
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
615
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
616
+ (2): GELU(approximate='none')
617
+ (3): Dropout(p=0.0, inplace=False)
618
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
619
+ (5): Dropout(p=0.0, inplace=False)
620
+ )
621
+ (dropout_layer): DropPath()
622
+ )
623
+ )
624
+ (1): TransformerEncoderLayer(
625
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
626
+ (attn): EfficientMultiheadAttention(
627
+ (attn): MultiheadAttention(
628
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
629
+ )
630
+ (proj_drop): Dropout(p=0.0, inplace=False)
631
+ (dropout_layer): DropPath()
632
+ )
633
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
634
+ (ffn): MixFFN(
635
+ (activate): GELU(approximate='none')
636
+ (layers): Sequential(
637
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
638
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
639
+ (2): GELU(approximate='none')
640
+ (3): Dropout(p=0.0, inplace=False)
641
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
642
+ (5): Dropout(p=0.0, inplace=False)
643
+ )
644
+ (dropout_layer): DropPath()
645
+ )
646
+ )
647
+ (2): TransformerEncoderLayer(
648
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
649
+ (attn): EfficientMultiheadAttention(
650
+ (attn): MultiheadAttention(
651
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
652
+ )
653
+ (proj_drop): Dropout(p=0.0, inplace=False)
654
+ (dropout_layer): DropPath()
655
+ )
656
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
657
+ (ffn): MixFFN(
658
+ (activate): GELU(approximate='none')
659
+ (layers): Sequential(
660
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
661
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
662
+ (2): GELU(approximate='none')
663
+ (3): Dropout(p=0.0, inplace=False)
664
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
665
+ (5): Dropout(p=0.0, inplace=False)
666
+ )
667
+ (dropout_layer): DropPath()
668
+ )
669
+ )
670
+ )
671
+ (2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
672
+ )
673
+ )
674
+ )
675
+ init_cfg={'type': 'Pretrained', 'checkpoint': 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'}
676
+ (decode_head): SegformerHeadUnetFCHeadSingleStepLogits(
677
+ input_transform=multiple_select, ignore_index=0, align_corners=False
678
+ (loss_decode): CrossEntropyLoss(avg_non_ignore=False)
679
+ (conv_seg): Conv2d(256, 150, kernel_size=(1, 1), stride=(1, 1))
680
+ (dropout): Dropout2d(p=0.1, inplace=False)
681
+ (convs): ModuleList(
682
+ (0): ConvModule(
683
+ (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
684
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
685
+ (activate): ReLU(inplace=True)
686
+ )
687
+ (1): ConvModule(
688
+ (conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
689
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
690
+ (activate): ReLU(inplace=True)
691
+ )
692
+ (2): ConvModule(
693
+ (conv): Conv2d(320, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
694
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
695
+ (activate): ReLU(inplace=True)
696
+ )
697
+ (3): ConvModule(
698
+ (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
699
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
700
+ (activate): ReLU(inplace=True)
701
+ )
702
+ )
703
+ (fusion_conv): ConvModule(
704
+ (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
705
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
706
+ (activate): ReLU(inplace=True)
707
+ )
708
+ (unet): Unet(
709
+ (init_conv): Conv2d(166, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
710
+ (time_mlp): Sequential(
711
+ (0): SinusoidalPosEmb()
712
+ (1): Linear(in_features=128, out_features=512, bias=True)
713
+ (2): GELU(approximate='none')
714
+ (3): Linear(in_features=512, out_features=512, bias=True)
715
+ )
716
+ (downs): ModuleList(
717
+ (0): ModuleList(
718
+ (0): ResnetBlock(
719
+ (mlp): Sequential(
720
+ (0): SiLU()
721
+ (1): Linear(in_features=512, out_features=256, bias=True)
722
+ )
723
+ (block1): Block(
724
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
725
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
726
+ (act): SiLU()
727
+ )
728
+ (block2): Block(
729
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
730
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
731
+ (act): SiLU()
732
+ )
733
+ (res_conv): Identity()
734
+ )
735
+ (1): ResnetBlock(
736
+ (mlp): Sequential(
737
+ (0): SiLU()
738
+ (1): Linear(in_features=512, out_features=256, bias=True)
739
+ )
740
+ (block1): Block(
741
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
742
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
743
+ (act): SiLU()
744
+ )
745
+ (block2): Block(
746
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
747
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
748
+ (act): SiLU()
749
+ )
750
+ (res_conv): Identity()
751
+ )
752
+ (2): Residual(
753
+ (fn): PreNorm(
754
+ (fn): LinearAttention(
755
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
756
+ (to_out): Sequential(
757
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
758
+ (1): LayerNorm()
759
+ )
760
+ )
761
+ (norm): LayerNorm()
762
+ )
763
+ )
764
+ (3): Conv2d(128, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
765
+ )
766
+ (1): ModuleList(
767
+ (0): ResnetBlock(
768
+ (mlp): Sequential(
769
+ (0): SiLU()
770
+ (1): Linear(in_features=512, out_features=256, bias=True)
771
+ )
772
+ (block1): Block(
773
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
774
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
775
+ (act): SiLU()
776
+ )
777
+ (block2): Block(
778
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
779
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
780
+ (act): SiLU()
781
+ )
782
+ (res_conv): Identity()
783
+ )
784
+ (1): ResnetBlock(
785
+ (mlp): Sequential(
786
+ (0): SiLU()
787
+ (1): Linear(in_features=512, out_features=256, bias=True)
788
+ )
789
+ (block1): Block(
790
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
791
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
792
+ (act): SiLU()
793
+ )
794
+ (block2): Block(
795
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
796
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
797
+ (act): SiLU()
798
+ )
799
+ (res_conv): Identity()
800
+ )
801
+ (2): Residual(
802
+ (fn): PreNorm(
803
+ (fn): LinearAttention(
804
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
805
+ (to_out): Sequential(
806
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
807
+ (1): LayerNorm()
808
+ )
809
+ )
810
+ (norm): LayerNorm()
811
+ )
812
+ )
813
+ (3): Conv2d(128, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
814
+ )
815
+ (2): ModuleList(
816
+ (0): ResnetBlock(
817
+ (mlp): Sequential(
818
+ (0): SiLU()
819
+ (1): Linear(in_features=512, out_features=256, bias=True)
820
+ )
821
+ (block1): Block(
822
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
823
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
824
+ (act): SiLU()
825
+ )
826
+ (block2): Block(
827
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
828
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
829
+ (act): SiLU()
830
+ )
831
+ (res_conv): Identity()
832
+ )
833
+ (1): ResnetBlock(
834
+ (mlp): Sequential(
835
+ (0): SiLU()
836
+ (1): Linear(in_features=512, out_features=256, bias=True)
837
+ )
838
+ (block1): Block(
839
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
840
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
841
+ (act): SiLU()
842
+ )
843
+ (block2): Block(
844
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
845
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
846
+ (act): SiLU()
847
+ )
848
+ (res_conv): Identity()
849
+ )
850
+ (2): Residual(
851
+ (fn): PreNorm(
852
+ (fn): LinearAttention(
853
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
854
+ (to_out): Sequential(
855
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
856
+ (1): LayerNorm()
857
+ )
858
+ )
859
+ (norm): LayerNorm()
860
+ )
861
+ )
862
+ (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
863
+ )
864
+ )
865
+ (ups): ModuleList(
866
+ (0): ModuleList(
867
+ (0): ResnetBlock(
868
+ (mlp): Sequential(
869
+ (0): SiLU()
870
+ (1): Linear(in_features=512, out_features=256, bias=True)
871
+ )
872
+ (block1): Block(
873
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
874
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
875
+ (act): SiLU()
876
+ )
877
+ (block2): Block(
878
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
879
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
880
+ (act): SiLU()
881
+ )
882
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
883
+ )
884
+ (1): ResnetBlock(
885
+ (mlp): Sequential(
886
+ (0): SiLU()
887
+ (1): Linear(in_features=512, out_features=256, bias=True)
888
+ )
889
+ (block1): Block(
890
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
891
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
892
+ (act): SiLU()
893
+ )
894
+ (block2): Block(
895
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
896
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
897
+ (act): SiLU()
898
+ )
899
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
900
+ )
901
+ (2): Residual(
902
+ (fn): PreNorm(
903
+ (fn): LinearAttention(
904
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
905
+ (to_out): Sequential(
906
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
907
+ (1): LayerNorm()
908
+ )
909
+ )
910
+ (norm): LayerNorm()
911
+ )
912
+ )
913
+ (3): Sequential(
914
+ (0): Upsample(scale_factor=2.0, mode=nearest)
915
+ (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
916
+ )
917
+ )
918
+ (1): ModuleList(
919
+ (0): ResnetBlock(
920
+ (mlp): Sequential(
921
+ (0): SiLU()
922
+ (1): Linear(in_features=512, out_features=256, bias=True)
923
+ )
924
+ (block1): Block(
925
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
926
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
927
+ (act): SiLU()
928
+ )
929
+ (block2): Block(
930
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
931
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
932
+ (act): SiLU()
933
+ )
934
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
935
+ )
936
+ (1): ResnetBlock(
937
+ (mlp): Sequential(
938
+ (0): SiLU()
939
+ (1): Linear(in_features=512, out_features=256, bias=True)
940
+ )
941
+ (block1): Block(
942
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
943
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
944
+ (act): SiLU()
945
+ )
946
+ (block2): Block(
947
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
948
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
949
+ (act): SiLU()
950
+ )
951
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
952
+ )
953
+ (2): Residual(
954
+ (fn): PreNorm(
955
+ (fn): LinearAttention(
956
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
957
+ (to_out): Sequential(
958
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
959
+ (1): LayerNorm()
960
+ )
961
+ )
962
+ (norm): LayerNorm()
963
+ )
964
+ )
965
+ (3): Sequential(
966
+ (0): Upsample(scale_factor=2.0, mode=nearest)
967
+ (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
968
+ )
969
+ )
970
+ (2): ModuleList(
971
+ (0): ResnetBlock(
972
+ (mlp): Sequential(
973
+ (0): SiLU()
974
+ (1): Linear(in_features=512, out_features=256, bias=True)
975
+ )
976
+ (block1): Block(
977
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
978
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
979
+ (act): SiLU()
980
+ )
981
+ (block2): Block(
982
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
983
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
984
+ (act): SiLU()
985
+ )
986
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
987
+ )
988
+ (1): ResnetBlock(
989
+ (mlp): Sequential(
990
+ (0): SiLU()
991
+ (1): Linear(in_features=512, out_features=256, bias=True)
992
+ )
993
+ (block1): Block(
994
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
995
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
996
+ (act): SiLU()
997
+ )
998
+ (block2): Block(
999
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1000
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1001
+ (act): SiLU()
1002
+ )
1003
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
1004
+ )
1005
+ (2): Residual(
1006
+ (fn): PreNorm(
1007
+ (fn): LinearAttention(
1008
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1009
+ (to_out): Sequential(
1010
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
1011
+ (1): LayerNorm()
1012
+ )
1013
+ )
1014
+ (norm): LayerNorm()
1015
+ )
1016
+ )
1017
+ (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1018
+ )
1019
+ )
1020
+ (mid_block1): ResnetBlock(
1021
+ (mlp): Sequential(
1022
+ (0): SiLU()
1023
+ (1): Linear(in_features=512, out_features=256, bias=True)
1024
+ )
1025
+ (block1): Block(
1026
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1027
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1028
+ (act): SiLU()
1029
+ )
1030
+ (block2): Block(
1031
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1032
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1033
+ (act): SiLU()
1034
+ )
1035
+ (res_conv): Identity()
1036
+ )
1037
+ (mid_attn): Residual(
1038
+ (fn): PreNorm(
1039
+ (fn): Attention(
1040
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1041
+ (to_out): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
1042
+ )
1043
+ (norm): LayerNorm()
1044
+ )
1045
+ )
1046
+ (mid_block2): ResnetBlock(
1047
+ (mlp): Sequential(
1048
+ (0): SiLU()
1049
+ (1): Linear(in_features=512, out_features=256, bias=True)
1050
+ )
1051
+ (block1): Block(
1052
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1053
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1054
+ (act): SiLU()
1055
+ )
1056
+ (block2): Block(
1057
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1058
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1059
+ (act): SiLU()
1060
+ )
1061
+ (res_conv): Identity()
1062
+ )
1063
+ (final_res_block): ResnetBlock(
1064
+ (mlp): Sequential(
1065
+ (0): SiLU()
1066
+ (1): Linear(in_features=512, out_features=256, bias=True)
1067
+ )
1068
+ (block1): Block(
1069
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1070
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1071
+ (act): SiLU()
1072
+ )
1073
+ (block2): Block(
1074
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1075
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1076
+ (act): SiLU()
1077
+ )
1078
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
1079
+ )
1080
+ (final_conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
1081
+ )
1082
+ (conv_seg_new): Conv2d(256, 151, kernel_size=(1, 1), stride=(1, 1))
1083
+ (embed): Embedding(151, 16)
1084
+ )
1085
+ init_cfg={'type': 'Pretrained', 'checkpoint': 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'}
1086
+ )
1087
+ 2023-03-04 17:39:09,635 - mmseg - INFO - Loaded 20210 images
1088
+ 2023-03-04 17:39:10,639 - mmseg - INFO - Loaded 2000 images
1089
+ 2023-03-04 17:39:10,642 - mmseg - INFO - Start running, host: laizeqiang@SH-IDC1-10-140-37-130, work_dir: /mnt/petrelfs/laizeqiang/mmseg-baseline/work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits
1090
+ 2023-03-04 17:39:10,642 - mmseg - INFO - Hooks will be executed in the following order:
1091
+ before_run:
1092
+ (VERY_HIGH ) StepLrUpdaterHook
1093
+ (NORMAL ) CheckpointHook
1094
+ (LOW ) DistEvalHookMultiSteps
1095
+ (VERY_LOW ) TextLoggerHook
1096
+ --------------------
1097
+ before_train_epoch:
1098
+ (VERY_HIGH ) StepLrUpdaterHook
1099
+ (LOW ) IterTimerHook
1100
+ (LOW ) DistEvalHookMultiSteps
1101
+ (VERY_LOW ) TextLoggerHook
1102
+ --------------------
1103
+ before_train_iter:
1104
+ (VERY_HIGH ) StepLrUpdaterHook
1105
+ (LOW ) IterTimerHook
1106
+ (LOW ) DistEvalHookMultiSteps
1107
+ --------------------
1108
+ after_train_iter:
1109
+ (ABOVE_NORMAL) OptimizerHook
1110
+ (NORMAL ) CheckpointHook
1111
+ (LOW ) IterTimerHook
1112
+ (LOW ) DistEvalHookMultiSteps
1113
+ (VERY_LOW ) TextLoggerHook
1114
+ --------------------
1115
+ after_train_epoch:
1116
+ (NORMAL ) CheckpointHook
1117
+ (LOW ) DistEvalHookMultiSteps
1118
+ (VERY_LOW ) TextLoggerHook
1119
+ --------------------
1120
+ before_val_epoch:
1121
+ (LOW ) IterTimerHook
1122
+ (VERY_LOW ) TextLoggerHook
1123
+ --------------------
1124
+ before_val_iter:
1125
+ (LOW ) IterTimerHook
1126
+ --------------------
1127
+ after_val_iter:
1128
+ (LOW ) IterTimerHook
1129
+ --------------------
1130
+ after_val_epoch:
1131
+ (VERY_LOW ) TextLoggerHook
1132
+ --------------------
1133
+ after_run:
1134
+ (VERY_LOW ) TextLoggerHook
1135
+ --------------------
1136
+ 2023-03-04 17:39:10,642 - mmseg - INFO - workflow: [('train', 1)], max: 80000 iters
1137
+ 2023-03-04 17:39:10,642 - mmseg - INFO - Checkpoints will be saved to /mnt/petrelfs/laizeqiang/mmseg-baseline/work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits by HardDiskBackend.
1138
+ 2023-03-04 17:39:48,984 - mmseg - INFO - Iter [50/80000] lr: 7.350e-06, eta: 6:37:37, time: 0.298, data_time: 0.015, memory: 19750, decode.loss_ce: 4.0785, decode.acc_seg: 8.5126, loss: 4.0785
1139
+ 2023-03-04 17:39:57,554 - mmseg - INFO - Iter [100/80000] lr: 1.485e-05, eta: 5:12:50, time: 0.171, data_time: 0.007, memory: 19750, decode.loss_ce: 2.9187, decode.acc_seg: 27.5140, loss: 2.9187
1140
+ 2023-03-04 17:40:06,040 - mmseg - INFO - Iter [150/80000] lr: 2.235e-05, eta: 4:43:42, time: 0.170, data_time: 0.007, memory: 19750, decode.loss_ce: 2.3354, decode.acc_seg: 43.1981, loss: 2.3354
1141
+ 2023-03-04 17:40:14,295 - mmseg - INFO - Iter [200/80000] lr: 2.985e-05, eta: 4:27:32, time: 0.165, data_time: 0.007, memory: 19750, decode.loss_ce: 1.8341, decode.acc_seg: 55.2996, loss: 1.8341
1142
+ 2023-03-04 17:40:22,579 - mmseg - INFO - Iter [250/80000] lr: 3.735e-05, eta: 4:17:56, time: 0.166, data_time: 0.007, memory: 19750, decode.loss_ce: 1.5030, decode.acc_seg: 63.0600, loss: 1.5030
1143
+ 2023-03-04 17:40:30,864 - mmseg - INFO - Iter [300/80000] lr: 4.485e-05, eta: 4:11:29, time: 0.166, data_time: 0.006, memory: 19750, decode.loss_ce: 1.2782, decode.acc_seg: 67.0304, loss: 1.2782
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_173902.log.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"env_info": "sys.platform: linux\nPython: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB\nCUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch\nNVCC: Cuda compilation tools, release 11.6, V11.6.124\nGCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)\nPyTorch: 1.13.1\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.6\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.14.1\nOpenCV: 4.7.0\nMMCV: 1.7.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.6\nMMSegmentation: 0.30.0+6749699", "seed": 984079870, "exp_name": "ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits.py", "mmseg_version": "0.30.0+6749699", "config": "norm_cfg = dict(type='SyncBN', requires_grad=True)\ncheckpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'\nmodel = dict(\n type='EncoderDecoderFreeze',\n freeze_parameters=['backbone', 'decode_head'],\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',\n backbone=dict(\n type='MixVisionTransformerCustomInitWeights',\n in_channels=3,\n embed_dims=64,\n num_stages=4,\n num_layers=[3, 4, 6, 3],\n num_heads=[1, 2, 5, 8],\n patch_sizes=[7, 3, 3, 3],\n sr_ratios=[8, 4, 2, 1],\n out_indices=(0, 1, 2, 3),\n mlp_ratio=4,\n qkv_bias=True,\n drop_rate=0.0,\n attn_drop_rate=0.0,\n drop_path_rate=0.1,\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'\n ),\n decode_head=dict(\n type='SegformerHeadUnetFCHeadSingleStepLogits',\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',\n dim=128,\n out_dim=256,\n unet_channels=166,\n dim_mults=[1, 1, 1],\n cat_embedding_dim=16,\n in_channels=[64, 128, 320, 512],\n in_index=[0, 1, 2, 3],\n channels=256,\n dropout_ratio=0.1,\n num_classes=151,\n norm_cfg=dict(type='SyncBN', requires_grad=True),\n align_corners=False,\n ignore_index=0,\n loss_decode=dict(\n type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),\n train_cfg=dict(),\n test_cfg=dict(mode='whole'))\ndataset_type = 'ADE20K151Dataset'\ndata_root = 'data/ade/ADEChallengeData2016'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ncrop_size = (512, 512)\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=4,\n workers_per_gpu=4,\n train=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/training',\n ann_dir='annotations/training',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n ]),\n val=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]),\n test=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]))\nlog_config = dict(\n interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\ncudnn_benchmark = True\noptimizer = dict(\n type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)\noptimizer_config = dict()\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=1000,\n warmup_ratio=1e-06,\n step=10000,\n gamma=0.5,\n min_lr=1e-06,\n by_epoch=False)\nrunner = dict(type='IterBasedRunner', max_iters=80000)\ncheckpoint_config = dict(by_epoch=False, interval=8000)\nevaluation = dict(\n interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')\nwork_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits'\ngpu_ids = range(0, 8)\nauto_resume = True\ndevice = 'cuda'\nseed = 984079870\n", "CLASSES": ["background", "wall", "building", "sky", "floor", "tree", "ceiling", "road", "bed ", "windowpane", "grass", "cabinet", "sidewalk", "person", "earth", "door", "table", "mountain", "plant", "curtain", "chair", "car", "water", "painting", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock", "wardrobe", "lamp", "bathtub", "railing", "cushion", "base", "box", "column", "signboard", "chest of drawers", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator", "grandstand", "path", "stairs", "runway", "case", "pool table", "pillow", "screen door", "stairway", "river", "bridge", "bookcase", "blind", "coffee table", "toilet", "flower", "book", "hill", "bench", "countertop", "stove", "palm", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel", "bus", "towel", "light", "truck", "tower", "chandelier", "awning", "streetlight", "booth", "television receiver", "airplane", "dirt track", "apparel", "pole", "land", "bannister", "escalator", "ottoman", "bottle", "buffet", "poster", "stage", "van", "ship", "fountain", "conveyer belt", "canopy", "washer", "plaything", "swimming pool", "stool", "barrel", "basket", "waterfall", "tent", "bag", "minibike", "cradle", "oven", "ball", "food", "step", "tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket", "sculpture", "hood", "sconce", "vase", "traffic light", "tray", "ashcan", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass", "clock", "flag"], "PALETTE": [[0, 0, 0], [120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], [102, 255, 0], [92, 0, 255]], "hook_msgs": {}}
2
+ {"mode": "train", "epoch": 1, "iter": 50, "lr": 1e-05, "memory": 19750, "data_time": 0.01458, "decode.loss_ce": 4.07853, "decode.acc_seg": 8.51256, "loss": 4.07853, "time": 0.2984}
3
+ {"mode": "train", "epoch": 1, "iter": 100, "lr": 1e-05, "memory": 19750, "data_time": 0.00696, "decode.loss_ce": 2.91874, "decode.acc_seg": 27.51403, "loss": 2.91874, "time": 0.17144}
4
+ {"mode": "train", "epoch": 1, "iter": 150, "lr": 2e-05, "memory": 19750, "data_time": 0.00722, "decode.loss_ce": 2.33541, "decode.acc_seg": 43.1981, "loss": 2.33541, "time": 0.16968}
5
+ {"mode": "train", "epoch": 1, "iter": 200, "lr": 3e-05, "memory": 19750, "data_time": 0.00706, "decode.loss_ce": 1.83407, "decode.acc_seg": 55.29959, "loss": 1.83407, "time": 0.16511}
6
+ {"mode": "train", "epoch": 1, "iter": 250, "lr": 4e-05, "memory": 19750, "data_time": 0.00692, "decode.loss_ce": 1.50299, "decode.acc_seg": 63.05997, "loss": 1.50299, "time": 0.16567}
7
+ {"mode": "train", "epoch": 1, "iter": 300, "lr": 4e-05, "memory": 19750, "data_time": 0.00637, "decode.loss_ce": 1.27818, "decode.acc_seg": 67.03043, "loss": 1.27818, "time": 0.16569}
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_174053.log ADDED
The diff for this file is too large to render. See raw diff
 
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_174053.log.json ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"env_info": "sys.platform: linux\nPython: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB\nCUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch\nNVCC: Cuda compilation tools, release 11.6, V11.6.124\nGCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)\nPyTorch: 1.13.1\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.6\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.14.1\nOpenCV: 4.7.0\nMMCV: 1.7.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.6\nMMSegmentation: 0.30.0+6749699", "seed": 358795777, "exp_name": "ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits.py", "mmseg_version": "0.30.0+6749699", "config": "norm_cfg = dict(type='SyncBN', requires_grad=True)\ncheckpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'\nmodel = dict(\n type='EncoderDecoderFreeze',\n freeze_parameters=['backbone', 'decode_head'],\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',\n backbone=dict(\n type='MixVisionTransformerCustomInitWeights',\n in_channels=3,\n embed_dims=64,\n num_stages=4,\n num_layers=[3, 4, 6, 3],\n num_heads=[1, 2, 5, 8],\n patch_sizes=[7, 3, 3, 3],\n sr_ratios=[8, 4, 2, 1],\n out_indices=(0, 1, 2, 3),\n mlp_ratio=4,\n qkv_bias=True,\n drop_rate=0.0,\n attn_drop_rate=0.0,\n drop_path_rate=0.1,\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'\n ),\n decode_head=dict(\n type='SegformerHeadUnetFCHeadSingleStepLogits',\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',\n dim=128,\n out_dim=256,\n unet_channels=166,\n dim_mults=[1, 1, 1],\n cat_embedding_dim=16,\n in_channels=[64, 128, 320, 512],\n in_index=[0, 1, 2, 3],\n channels=256,\n dropout_ratio=0.1,\n num_classes=151,\n norm_cfg=dict(type='SyncBN', requires_grad=True),\n align_corners=False,\n ignore_index=0,\n loss_decode=dict(\n type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),\n train_cfg=dict(),\n test_cfg=dict(mode='whole'))\ndataset_type = 'ADE20K151Dataset'\ndata_root = 'data/ade/ADEChallengeData2016'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ncrop_size = (512, 512)\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=4,\n workers_per_gpu=4,\n train=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/training',\n ann_dir='annotations/training',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n ]),\n val=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]),\n test=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]))\nlog_config = dict(\n interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\ncudnn_benchmark = True\noptimizer = dict(\n type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)\noptimizer_config = dict()\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=1000,\n warmup_ratio=1e-06,\n step=10000,\n gamma=0.5,\n min_lr=1e-06,\n by_epoch=False)\nrunner = dict(type='IterBasedRunner', max_iters=80000)\ncheckpoint_config = dict(by_epoch=False, interval=8000)\nevaluation = dict(\n interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')\nwork_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits'\ngpu_ids = range(0, 8)\nauto_resume = True\ndevice = 'cuda'\nseed = 358795777\n", "CLASSES": ["background", "wall", "building", "sky", "floor", "tree", "ceiling", "road", "bed ", "windowpane", "grass", "cabinet", "sidewalk", "person", "earth", "door", "table", "mountain", "plant", "curtain", "chair", "car", "water", "painting", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock", "wardrobe", "lamp", "bathtub", "railing", "cushion", "base", "box", "column", "signboard", "chest of drawers", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator", "grandstand", "path", "stairs", "runway", "case", "pool table", "pillow", "screen door", "stairway", "river", "bridge", "bookcase", "blind", "coffee table", "toilet", "flower", "book", "hill", "bench", "countertop", "stove", "palm", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel", "bus", "towel", "light", "truck", "tower", "chandelier", "awning", "streetlight", "booth", "television receiver", "airplane", "dirt track", "apparel", "pole", "land", "bannister", "escalator", "ottoman", "bottle", "buffet", "poster", "stage", "van", "ship", "fountain", "conveyer belt", "canopy", "washer", "plaything", "swimming pool", "stool", "barrel", "basket", "waterfall", "tent", "bag", "minibike", "cradle", "oven", "ball", "food", "step", "tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket", "sculpture", "hood", "sconce", "vase", "traffic light", "tray", "ashcan", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass", "clock", "flag"], "PALETTE": [[0, 0, 0], [120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], [102, 255, 0], [92, 0, 255]], "hook_msgs": {}}
2
+ {"mode": "train", "epoch": 1, "iter": 50, "lr": 1e-05, "memory": 19750, "data_time": 0.01536, "decode.loss_ce": 3.70671, "decode.acc_seg": 12.7455, "loss": 3.70671, "time": 0.29152}
3
+ {"mode": "train", "epoch": 1, "iter": 100, "lr": 1e-05, "memory": 19750, "data_time": 0.0067, "decode.loss_ce": 2.83767, "decode.acc_seg": 33.79282, "loss": 2.83767, "time": 0.17411}
4
+ {"mode": "train", "epoch": 1, "iter": 150, "lr": 2e-05, "memory": 19750, "data_time": 0.00681, "decode.loss_ce": 2.29515, "decode.acc_seg": 45.9183, "loss": 2.29515, "time": 0.17116}
5
+ {"mode": "train", "epoch": 1, "iter": 200, "lr": 3e-05, "memory": 19750, "data_time": 0.00602, "decode.loss_ce": 1.87201, "decode.acc_seg": 55.43423, "loss": 1.87201, "time": 0.1687}
6
+ {"mode": "train", "epoch": 1, "iter": 250, "lr": 4e-05, "memory": 19750, "data_time": 0.0063, "decode.loss_ce": 1.58648, "decode.acc_seg": 61.11375, "loss": 1.58648, "time": 0.16766}
7
+ {"mode": "train", "epoch": 1, "iter": 300, "lr": 4e-05, "memory": 19750, "data_time": 0.00626, "decode.loss_ce": 1.29469, "decode.acc_seg": 67.78908, "loss": 1.29469, "time": 0.16954}
8
+ {"mode": "train", "epoch": 1, "iter": 350, "lr": 5e-05, "memory": 19750, "data_time": 0.00687, "decode.loss_ce": 1.17611, "decode.acc_seg": 69.68662, "loss": 1.17611, "time": 0.16891}
9
+ {"mode": "train", "epoch": 1, "iter": 400, "lr": 6e-05, "memory": 19750, "data_time": 0.00608, "decode.loss_ce": 1.04045, "decode.acc_seg": 72.46196, "loss": 1.04045, "time": 0.16652}
10
+ {"mode": "train", "epoch": 1, "iter": 450, "lr": 7e-05, "memory": 19750, "data_time": 0.00665, "decode.loss_ce": 0.90291, "decode.acc_seg": 73.95125, "loss": 0.90291, "time": 0.16492}
11
+ {"mode": "train", "epoch": 1, "iter": 500, "lr": 7e-05, "memory": 19750, "data_time": 0.00686, "decode.loss_ce": 0.84245, "decode.acc_seg": 75.08308, "loss": 0.84245, "time": 0.16755}
12
+ {"mode": "train", "epoch": 1, "iter": 550, "lr": 8e-05, "memory": 19750, "data_time": 0.00668, "decode.loss_ce": 0.72894, "decode.acc_seg": 77.32032, "loss": 0.72894, "time": 0.17103}
13
+ {"mode": "train", "epoch": 1, "iter": 600, "lr": 9e-05, "memory": 19750, "data_time": 0.00692, "decode.loss_ce": 0.70877, "decode.acc_seg": 77.56479, "loss": 0.70877, "time": 0.16587}
14
+ {"mode": "train", "epoch": 2, "iter": 650, "lr": 0.0001, "memory": 19750, "data_time": 0.05444, "decode.loss_ce": 0.70565, "decode.acc_seg": 77.28747, "loss": 0.70565, "time": 0.21387}
15
+ {"mode": "train", "epoch": 2, "iter": 700, "lr": 0.0001, "memory": 19750, "data_time": 0.00651, "decode.loss_ce": 0.60632, "decode.acc_seg": 79.73918, "loss": 0.60632, "time": 0.16371}
16
+ {"mode": "train", "epoch": 2, "iter": 750, "lr": 0.00011, "memory": 19750, "data_time": 0.00707, "decode.loss_ce": 0.59834, "decode.acc_seg": 79.62232, "loss": 0.59834, "time": 0.16193}
17
+ {"mode": "train", "epoch": 2, "iter": 800, "lr": 0.00012, "memory": 19750, "data_time": 0.00665, "decode.loss_ce": 0.60848, "decode.acc_seg": 79.37198, "loss": 0.60848, "time": 0.16804}
18
+ {"mode": "train", "epoch": 2, "iter": 850, "lr": 0.00013, "memory": 19750, "data_time": 0.0067, "decode.loss_ce": 0.55236, "decode.acc_seg": 80.7622, "loss": 0.55236, "time": 0.17566}
19
+ {"mode": "train", "epoch": 2, "iter": 900, "lr": 0.00013, "memory": 19750, "data_time": 0.00684, "decode.loss_ce": 0.51953, "decode.acc_seg": 81.87914, "loss": 0.51953, "time": 0.17055}
20
+ {"mode": "train", "epoch": 2, "iter": 950, "lr": 0.00014, "memory": 19750, "data_time": 0.00679, "decode.loss_ce": 0.58242, "decode.acc_seg": 80.09418, "loss": 0.58242, "time": 0.1658}
21
+ {"mode": "train", "epoch": 2, "iter": 1000, "lr": 0.00015, "memory": 19750, "data_time": 0.00717, "decode.loss_ce": 0.50039, "decode.acc_seg": 82.35546, "loss": 0.50039, "time": 0.16939}
22
+ {"mode": "train", "epoch": 2, "iter": 1050, "lr": 0.00015, "memory": 19750, "data_time": 0.00674, "decode.loss_ce": 0.56778, "decode.acc_seg": 80.58647, "loss": 0.56778, "time": 0.16986}
23
+ {"mode": "train", "epoch": 2, "iter": 1100, "lr": 0.00015, "memory": 19750, "data_time": 0.00664, "decode.loss_ce": 0.49441, "decode.acc_seg": 82.70061, "loss": 0.49441, "time": 0.16401}
24
+ {"mode": "train", "epoch": 2, "iter": 1150, "lr": 0.00015, "memory": 19750, "data_time": 0.00655, "decode.loss_ce": 0.51378, "decode.acc_seg": 82.00246, "loss": 0.51378, "time": 0.16508}
25
+ {"mode": "train", "epoch": 2, "iter": 1200, "lr": 0.00015, "memory": 19750, "data_time": 0.0066, "decode.loss_ce": 0.50145, "decode.acc_seg": 82.46899, "loss": 0.50145, "time": 0.17358}
26
+ {"mode": "train", "epoch": 2, "iter": 1250, "lr": 0.00015, "memory": 19750, "data_time": 0.00695, "decode.loss_ce": 0.47563, "decode.acc_seg": 83.09324, "loss": 0.47563, "time": 0.16785}
27
+ {"mode": "train", "epoch": 3, "iter": 1300, "lr": 0.00015, "memory": 19750, "data_time": 0.05795, "decode.loss_ce": 0.4762, "decode.acc_seg": 83.24759, "loss": 0.4762, "time": 0.23026}
28
+ {"mode": "train", "epoch": 3, "iter": 1350, "lr": 0.00015, "memory": 19750, "data_time": 0.00627, "decode.loss_ce": 0.44346, "decode.acc_seg": 84.0603, "loss": 0.44346, "time": 0.17177}
29
+ {"mode": "train", "epoch": 3, "iter": 1400, "lr": 0.00015, "memory": 19750, "data_time": 0.00717, "decode.loss_ce": 0.47084, "decode.acc_seg": 83.08637, "loss": 0.47084, "time": 0.16471}
30
+ {"mode": "train", "epoch": 3, "iter": 1450, "lr": 0.00015, "memory": 19750, "data_time": 0.00639, "decode.loss_ce": 0.48639, "decode.acc_seg": 83.01241, "loss": 0.48639, "time": 0.16727}
31
+ {"mode": "train", "epoch": 3, "iter": 1500, "lr": 0.00015, "memory": 19750, "data_time": 0.00674, "decode.loss_ce": 0.41735, "decode.acc_seg": 84.91397, "loss": 0.41735, "time": 0.16454}
32
+ {"mode": "train", "epoch": 3, "iter": 1550, "lr": 0.00015, "memory": 19750, "data_time": 0.00674, "decode.loss_ce": 0.43834, "decode.acc_seg": 84.30104, "loss": 0.43834, "time": 0.16501}
33
+ {"mode": "train", "epoch": 3, "iter": 1600, "lr": 0.00015, "memory": 19750, "data_time": 0.00687, "decode.loss_ce": 0.42182, "decode.acc_seg": 84.60363, "loss": 0.42182, "time": 0.16472}
34
+ {"mode": "train", "epoch": 3, "iter": 1650, "lr": 0.00015, "memory": 19750, "data_time": 0.00682, "decode.loss_ce": 0.44227, "decode.acc_seg": 84.10865, "loss": 0.44227, "time": 0.17544}
35
+ {"mode": "train", "epoch": 3, "iter": 1700, "lr": 0.00015, "memory": 19750, "data_time": 0.00676, "decode.loss_ce": 0.42305, "decode.acc_seg": 84.69608, "loss": 0.42305, "time": 0.16529}
36
+ {"mode": "train", "epoch": 3, "iter": 1750, "lr": 0.00015, "memory": 19750, "data_time": 0.00635, "decode.loss_ce": 0.40065, "decode.acc_seg": 85.28233, "loss": 0.40065, "time": 0.17015}
37
+ {"mode": "train", "epoch": 3, "iter": 1800, "lr": 0.00015, "memory": 19750, "data_time": 0.00687, "decode.loss_ce": 0.44377, "decode.acc_seg": 84.24874, "loss": 0.44377, "time": 0.17315}
38
+ {"mode": "train", "epoch": 3, "iter": 1850, "lr": 0.00015, "memory": 19750, "data_time": 0.00669, "decode.loss_ce": 0.38964, "decode.acc_seg": 85.73132, "loss": 0.38964, "time": 0.16534}
39
+ {"mode": "train", "epoch": 4, "iter": 1900, "lr": 0.00015, "memory": 19750, "data_time": 0.05387, "decode.loss_ce": 0.40727, "decode.acc_seg": 85.29138, "loss": 0.40727, "time": 0.21898}
40
+ {"mode": "train", "epoch": 4, "iter": 1950, "lr": 0.00015, "memory": 19750, "data_time": 0.00683, "decode.loss_ce": 0.40309, "decode.acc_seg": 85.37785, "loss": 0.40309, "time": 0.16413}
41
+ {"mode": "train", "epoch": 4, "iter": 2000, "lr": 0.00015, "memory": 19750, "data_time": 0.00651, "decode.loss_ce": 0.39313, "decode.acc_seg": 85.66311, "loss": 0.39313, "time": 0.16628}
42
+ {"mode": "train", "epoch": 4, "iter": 2050, "lr": 0.00015, "memory": 19750, "data_time": 0.00657, "decode.loss_ce": 0.37103, "decode.acc_seg": 86.2932, "loss": 0.37103, "time": 0.1699}
43
+ {"mode": "train", "epoch": 4, "iter": 2100, "lr": 0.00015, "memory": 19750, "data_time": 0.00694, "decode.loss_ce": 0.41258, "decode.acc_seg": 84.80687, "loss": 0.41258, "time": 0.16672}
44
+ {"mode": "train", "epoch": 4, "iter": 2150, "lr": 0.00015, "memory": 19750, "data_time": 0.00683, "decode.loss_ce": 0.37237, "decode.acc_seg": 85.9416, "loss": 0.37237, "time": 0.16772}
45
+ {"mode": "train", "epoch": 4, "iter": 2200, "lr": 0.00015, "memory": 19750, "data_time": 0.0067, "decode.loss_ce": 0.36418, "decode.acc_seg": 86.28289, "loss": 0.36418, "time": 0.16504}
46
+ {"mode": "train", "epoch": 4, "iter": 2250, "lr": 0.00015, "memory": 19750, "data_time": 0.00682, "decode.loss_ce": 0.37703, "decode.acc_seg": 86.25602, "loss": 0.37703, "time": 0.16477}
47
+ {"mode": "train", "epoch": 4, "iter": 2300, "lr": 0.00015, "memory": 19750, "data_time": 0.00658, "decode.loss_ce": 0.38606, "decode.acc_seg": 85.83164, "loss": 0.38606, "time": 0.17027}
48
+ {"mode": "train", "epoch": 4, "iter": 2350, "lr": 0.00015, "memory": 19750, "data_time": 0.00701, "decode.loss_ce": 0.36377, "decode.acc_seg": 86.44438, "loss": 0.36377, "time": 0.16446}
49
+ {"mode": "train", "epoch": 4, "iter": 2400, "lr": 0.00015, "memory": 19750, "data_time": 0.0069, "decode.loss_ce": 0.37793, "decode.acc_seg": 85.84639, "loss": 0.37793, "time": 0.16814}
50
+ {"mode": "train", "epoch": 4, "iter": 2450, "lr": 0.00015, "memory": 19750, "data_time": 0.00671, "decode.loss_ce": 0.38427, "decode.acc_seg": 85.96406, "loss": 0.38427, "time": 0.16932}
51
+ {"mode": "train", "epoch": 4, "iter": 2500, "lr": 0.00015, "memory": 19750, "data_time": 0.00823, "decode.loss_ce": 0.39177, "decode.acc_seg": 85.56283, "loss": 0.39177, "time": 0.17777}
52
+ {"mode": "train", "epoch": 5, "iter": 2550, "lr": 0.00015, "memory": 19750, "data_time": 0.05398, "decode.loss_ce": 0.38108, "decode.acc_seg": 86.09614, "loss": 0.38108, "time": 0.21458}
53
+ {"mode": "train", "epoch": 5, "iter": 2600, "lr": 0.00015, "memory": 19750, "data_time": 0.00655, "decode.loss_ce": 0.36287, "decode.acc_seg": 86.33698, "loss": 0.36287, "time": 0.16329}
54
+ {"mode": "train", "epoch": 5, "iter": 2650, "lr": 0.00015, "memory": 19750, "data_time": 0.00676, "decode.loss_ce": 0.35487, "decode.acc_seg": 86.52721, "loss": 0.35487, "time": 0.16696}
55
+ {"mode": "train", "epoch": 5, "iter": 2700, "lr": 0.00015, "memory": 19750, "data_time": 0.00703, "decode.loss_ce": 0.34173, "decode.acc_seg": 86.93155, "loss": 0.34173, "time": 0.16757}
56
+ {"mode": "train", "epoch": 5, "iter": 2750, "lr": 0.00015, "memory": 19750, "data_time": 0.00765, "decode.loss_ce": 0.367, "decode.acc_seg": 86.25868, "loss": 0.367, "time": 0.17299}
57
+ {"mode": "train", "epoch": 5, "iter": 2800, "lr": 0.00015, "memory": 19750, "data_time": 0.00677, "decode.loss_ce": 0.34542, "decode.acc_seg": 87.12672, "loss": 0.34542, "time": 0.16933}
58
+ {"mode": "train", "epoch": 5, "iter": 2850, "lr": 0.00015, "memory": 19750, "data_time": 0.00673, "decode.loss_ce": 0.33885, "decode.acc_seg": 86.94783, "loss": 0.33885, "time": 0.17544}
59
+ {"mode": "train", "epoch": 5, "iter": 2900, "lr": 0.00015, "memory": 19750, "data_time": 0.00719, "decode.loss_ce": 0.37236, "decode.acc_seg": 86.18583, "loss": 0.37236, "time": 0.16504}
60
+ {"mode": "train", "epoch": 5, "iter": 2950, "lr": 0.00015, "memory": 19750, "data_time": 0.00744, "decode.loss_ce": 0.35252, "decode.acc_seg": 86.69874, "loss": 0.35252, "time": 0.16951}
61
+ {"mode": "train", "epoch": 5, "iter": 3000, "lr": 0.00015, "memory": 19750, "data_time": 0.00674, "decode.loss_ce": 0.35913, "decode.acc_seg": 86.69249, "loss": 0.35913, "time": 0.16899}
62
+ {"mode": "train", "epoch": 5, "iter": 3050, "lr": 0.00015, "memory": 19750, "data_time": 0.00722, "decode.loss_ce": 0.34791, "decode.acc_seg": 86.99021, "loss": 0.34791, "time": 0.17252}
63
+ {"mode": "train", "epoch": 5, "iter": 3100, "lr": 0.00015, "memory": 19750, "data_time": 0.007, "decode.loss_ce": 0.3334, "decode.acc_seg": 87.31124, "loss": 0.3334, "time": 0.16188}
64
+ {"mode": "train", "epoch": 5, "iter": 3150, "lr": 0.00015, "memory": 19750, "data_time": 0.00757, "decode.loss_ce": 0.34055, "decode.acc_seg": 87.3015, "loss": 0.34055, "time": 0.16971}
65
+ {"mode": "train", "epoch": 6, "iter": 3200, "lr": 0.00015, "memory": 19750, "data_time": 0.05379, "decode.loss_ce": 0.36563, "decode.acc_seg": 86.3573, "loss": 0.36563, "time": 0.21416}
66
+ {"mode": "train", "epoch": 6, "iter": 3250, "lr": 0.00015, "memory": 19750, "data_time": 0.00682, "decode.loss_ce": 0.31579, "decode.acc_seg": 88.11451, "loss": 0.31579, "time": 0.17126}
67
+ {"mode": "train", "epoch": 6, "iter": 3300, "lr": 0.00015, "memory": 19750, "data_time": 0.00661, "decode.loss_ce": 0.36865, "decode.acc_seg": 86.31161, "loss": 0.36865, "time": 0.16823}
68
+ {"mode": "train", "epoch": 6, "iter": 3350, "lr": 0.00015, "memory": 19750, "data_time": 0.00672, "decode.loss_ce": 0.34042, "decode.acc_seg": 87.06849, "loss": 0.34042, "time": 0.17189}
69
+ {"mode": "train", "epoch": 6, "iter": 3400, "lr": 0.00015, "memory": 19750, "data_time": 0.00766, "decode.loss_ce": 0.33385, "decode.acc_seg": 87.29016, "loss": 0.33385, "time": 0.17741}
70
+ {"mode": "train", "epoch": 6, "iter": 3450, "lr": 0.00015, "memory": 19750, "data_time": 0.00694, "decode.loss_ce": 0.33919, "decode.acc_seg": 86.97043, "loss": 0.33919, "time": 0.16819}
71
+ {"mode": "train", "epoch": 6, "iter": 3500, "lr": 0.00015, "memory": 19750, "data_time": 0.00677, "decode.loss_ce": 0.33378, "decode.acc_seg": 87.10703, "loss": 0.33378, "time": 0.16973}
72
+ {"mode": "train", "epoch": 6, "iter": 3550, "lr": 0.00015, "memory": 19750, "data_time": 0.00703, "decode.loss_ce": 0.32859, "decode.acc_seg": 87.48225, "loss": 0.32859, "time": 0.17087}
73
+ {"mode": "train", "epoch": 6, "iter": 3600, "lr": 0.00015, "memory": 19750, "data_time": 0.00689, "decode.loss_ce": 0.34604, "decode.acc_seg": 86.52245, "loss": 0.34604, "time": 0.17393}
74
+ {"mode": "train", "epoch": 6, "iter": 3650, "lr": 0.00015, "memory": 19750, "data_time": 0.00642, "decode.loss_ce": 0.34195, "decode.acc_seg": 87.02483, "loss": 0.34195, "time": 0.17442}
75
+ {"mode": "train", "epoch": 6, "iter": 3700, "lr": 0.00015, "memory": 19750, "data_time": 0.00727, "decode.loss_ce": 0.3416, "decode.acc_seg": 87.16491, "loss": 0.3416, "time": 0.17331}
76
+ {"mode": "train", "epoch": 6, "iter": 3750, "lr": 0.00015, "memory": 19750, "data_time": 0.00681, "decode.loss_ce": 0.34954, "decode.acc_seg": 86.81752, "loss": 0.34954, "time": 0.16843}
77
+ {"mode": "train", "epoch": 7, "iter": 3800, "lr": 0.00015, "memory": 19750, "data_time": 0.05296, "decode.loss_ce": 0.34112, "decode.acc_seg": 87.0386, "loss": 0.34112, "time": 0.21411}
78
+ {"mode": "train", "epoch": 7, "iter": 3850, "lr": 0.00015, "memory": 19750, "data_time": 0.00656, "decode.loss_ce": 0.34441, "decode.acc_seg": 86.98191, "loss": 0.34441, "time": 0.16859}
79
+ {"mode": "train", "epoch": 7, "iter": 3900, "lr": 0.00015, "memory": 19750, "data_time": 0.00633, "decode.loss_ce": 0.3237, "decode.acc_seg": 87.60705, "loss": 0.3237, "time": 0.17053}
80
+ {"mode": "train", "epoch": 7, "iter": 3950, "lr": 0.00015, "memory": 19750, "data_time": 0.00643, "decode.loss_ce": 0.34264, "decode.acc_seg": 87.06793, "loss": 0.34264, "time": 0.16719}
81
+ {"mode": "train", "epoch": 7, "iter": 4000, "lr": 0.00015, "memory": 19750, "data_time": 0.00677, "decode.loss_ce": 0.33751, "decode.acc_seg": 86.86388, "loss": 0.33751, "time": 0.17013}
82
+ {"mode": "train", "epoch": 7, "iter": 4050, "lr": 0.00015, "memory": 19750, "data_time": 0.00773, "decode.loss_ce": 0.32156, "decode.acc_seg": 87.75299, "loss": 0.32156, "time": 0.17056}
83
+ {"mode": "train", "epoch": 7, "iter": 4100, "lr": 0.00015, "memory": 19750, "data_time": 0.00679, "decode.loss_ce": 0.33281, "decode.acc_seg": 87.0733, "loss": 0.33281, "time": 0.16674}
84
+ {"mode": "train", "epoch": 7, "iter": 4150, "lr": 0.00015, "memory": 19750, "data_time": 0.00678, "decode.loss_ce": 0.29832, "decode.acc_seg": 88.33082, "loss": 0.29832, "time": 0.17049}
85
+ {"mode": "train", "epoch": 7, "iter": 4200, "lr": 0.00015, "memory": 19750, "data_time": 0.00734, "decode.loss_ce": 0.31208, "decode.acc_seg": 88.04595, "loss": 0.31208, "time": 0.17608}
86
+ {"mode": "train", "epoch": 7, "iter": 4250, "lr": 0.00015, "memory": 19750, "data_time": 0.00708, "decode.loss_ce": 0.33336, "decode.acc_seg": 87.25302, "loss": 0.33336, "time": 0.16545}
87
+ {"mode": "train", "epoch": 7, "iter": 4300, "lr": 0.00015, "memory": 19750, "data_time": 0.00665, "decode.loss_ce": 0.32272, "decode.acc_seg": 87.632, "loss": 0.32272, "time": 0.16879}
88
+ {"mode": "train", "epoch": 7, "iter": 4350, "lr": 0.00015, "memory": 19750, "data_time": 0.00716, "decode.loss_ce": 0.32937, "decode.acc_seg": 87.48962, "loss": 0.32937, "time": 0.17403}
89
+ {"mode": "train", "epoch": 7, "iter": 4400, "lr": 0.00015, "memory": 19750, "data_time": 0.00672, "decode.loss_ce": 0.30072, "decode.acc_seg": 88.43203, "loss": 0.30072, "time": 0.17436}
90
+ {"mode": "train", "epoch": 8, "iter": 4450, "lr": 0.00015, "memory": 19750, "data_time": 0.05521, "decode.loss_ce": 0.33604, "decode.acc_seg": 87.22331, "loss": 0.33604, "time": 0.21903}
91
+ {"mode": "train", "epoch": 8, "iter": 4500, "lr": 0.00015, "memory": 19750, "data_time": 0.00658, "decode.loss_ce": 0.34547, "decode.acc_seg": 86.72177, "loss": 0.34547, "time": 0.1719}
92
+ {"mode": "train", "epoch": 8, "iter": 4550, "lr": 0.00015, "memory": 19750, "data_time": 0.00647, "decode.loss_ce": 0.3248, "decode.acc_seg": 87.55375, "loss": 0.3248, "time": 0.16413}
93
+ {"mode": "train", "epoch": 8, "iter": 4600, "lr": 0.00015, "memory": 19750, "data_time": 0.00625, "decode.loss_ce": 0.32307, "decode.acc_seg": 87.64411, "loss": 0.32307, "time": 0.16697}
94
+ {"mode": "train", "epoch": 8, "iter": 4650, "lr": 0.00015, "memory": 19750, "data_time": 0.00598, "decode.loss_ce": 0.31148, "decode.acc_seg": 87.99871, "loss": 0.31148, "time": 0.18192}
95
+ {"mode": "train", "epoch": 8, "iter": 4700, "lr": 0.00015, "memory": 19750, "data_time": 0.00623, "decode.loss_ce": 0.31103, "decode.acc_seg": 87.92522, "loss": 0.31103, "time": 0.17117}
96
+ {"mode": "train", "epoch": 8, "iter": 4750, "lr": 0.00015, "memory": 19750, "data_time": 0.00687, "decode.loss_ce": 0.31458, "decode.acc_seg": 87.946, "loss": 0.31458, "time": 0.17047}
97
+ {"mode": "train", "epoch": 8, "iter": 4800, "lr": 0.00015, "memory": 19750, "data_time": 0.00641, "decode.loss_ce": 0.32922, "decode.acc_seg": 87.34764, "loss": 0.32922, "time": 0.17031}
98
+ {"mode": "train", "epoch": 8, "iter": 4850, "lr": 0.00015, "memory": 19750, "data_time": 0.00626, "decode.loss_ce": 0.3118, "decode.acc_seg": 87.8422, "loss": 0.3118, "time": 0.16599}
99
+ {"mode": "train", "epoch": 8, "iter": 4900, "lr": 0.00015, "memory": 19750, "data_time": 0.00597, "decode.loss_ce": 0.29622, "decode.acc_seg": 88.33316, "loss": 0.29622, "time": 0.17134}
100
+ {"mode": "train", "epoch": 8, "iter": 4950, "lr": 0.00015, "memory": 19750, "data_time": 0.00671, "decode.loss_ce": 0.31746, "decode.acc_seg": 87.63461, "loss": 0.31746, "time": 0.1665}
101
+ {"mode": "train", "epoch": 8, "iter": 5000, "lr": 0.00015, "memory": 19750, "data_time": 0.00622, "decode.loss_ce": 0.30714, "decode.acc_seg": 88.24625, "loss": 0.30714, "time": 0.17579}
102
+ {"mode": "train", "epoch": 9, "iter": 5050, "lr": 0.00015, "memory": 19750, "data_time": 0.05469, "decode.loss_ce": 0.29698, "decode.acc_seg": 88.53975, "loss": 0.29698, "time": 0.21976}
103
+ {"mode": "train", "epoch": 9, "iter": 5100, "lr": 0.00015, "memory": 19750, "data_time": 0.00639, "decode.loss_ce": 0.30807, "decode.acc_seg": 88.01767, "loss": 0.30807, "time": 0.16527}
104
+ {"mode": "train", "epoch": 9, "iter": 5150, "lr": 0.00015, "memory": 19750, "data_time": 0.00624, "decode.loss_ce": 0.3155, "decode.acc_seg": 88.08894, "loss": 0.3155, "time": 0.17069}
105
+ {"mode": "train", "epoch": 9, "iter": 5200, "lr": 0.00015, "memory": 19750, "data_time": 0.00592, "decode.loss_ce": 0.29819, "decode.acc_seg": 88.41932, "loss": 0.29819, "time": 0.16355}
106
+ {"mode": "train", "epoch": 9, "iter": 5250, "lr": 0.00015, "memory": 19750, "data_time": 0.00656, "decode.loss_ce": 0.2976, "decode.acc_seg": 88.17167, "loss": 0.2976, "time": 0.17297}
107
+ {"mode": "train", "epoch": 9, "iter": 5300, "lr": 0.00015, "memory": 19750, "data_time": 0.00595, "decode.loss_ce": 0.31431, "decode.acc_seg": 87.96738, "loss": 0.31431, "time": 0.17553}
108
+ {"mode": "train", "epoch": 9, "iter": 5350, "lr": 0.00015, "memory": 19750, "data_time": 0.00632, "decode.loss_ce": 0.31812, "decode.acc_seg": 87.89434, "loss": 0.31812, "time": 0.16414}
109
+ {"mode": "train", "epoch": 9, "iter": 5400, "lr": 0.00015, "memory": 19750, "data_time": 0.00631, "decode.loss_ce": 0.32172, "decode.acc_seg": 87.40795, "loss": 0.32172, "time": 0.16415}
110
+ {"mode": "train", "epoch": 9, "iter": 5450, "lr": 0.00015, "memory": 19750, "data_time": 0.00636, "decode.loss_ce": 0.29955, "decode.acc_seg": 88.12618, "loss": 0.29955, "time": 0.1643}
111
+ {"mode": "train", "epoch": 9, "iter": 5500, "lr": 0.00015, "memory": 19750, "data_time": 0.00622, "decode.loss_ce": 0.29215, "decode.acc_seg": 88.49556, "loss": 0.29215, "time": 0.16871}
112
+ {"mode": "train", "epoch": 9, "iter": 5550, "lr": 0.00015, "memory": 19750, "data_time": 0.00596, "decode.loss_ce": 0.32296, "decode.acc_seg": 87.6716, "loss": 0.32296, "time": 0.17221}
113
+ {"mode": "train", "epoch": 9, "iter": 5600, "lr": 0.00015, "memory": 19750, "data_time": 0.00725, "decode.loss_ce": 0.30732, "decode.acc_seg": 88.08342, "loss": 0.30732, "time": 0.17205}
114
+ {"mode": "train", "epoch": 9, "iter": 5650, "lr": 0.00015, "memory": 19750, "data_time": 0.00639, "decode.loss_ce": 0.31748, "decode.acc_seg": 87.74626, "loss": 0.31748, "time": 0.17152}
115
+ {"mode": "train", "epoch": 10, "iter": 5700, "lr": 0.00015, "memory": 19750, "data_time": 0.05224, "decode.loss_ce": 0.2968, "decode.acc_seg": 88.33484, "loss": 0.2968, "time": 0.22297}
116
+ {"mode": "train", "epoch": 10, "iter": 5750, "lr": 0.00015, "memory": 19750, "data_time": 0.00603, "decode.loss_ce": 0.2844, "decode.acc_seg": 88.79802, "loss": 0.2844, "time": 0.16865}
117
+ {"mode": "train", "epoch": 10, "iter": 5800, "lr": 0.00015, "memory": 19750, "data_time": 0.00642, "decode.loss_ce": 0.30396, "decode.acc_seg": 88.12515, "loss": 0.30396, "time": 0.16799}
118
+ {"mode": "train", "epoch": 10, "iter": 5850, "lr": 0.00015, "memory": 19750, "data_time": 0.00617, "decode.loss_ce": 0.30582, "decode.acc_seg": 87.97682, "loss": 0.30582, "time": 0.1653}
119
+ {"mode": "train", "epoch": 10, "iter": 5900, "lr": 0.00015, "memory": 19750, "data_time": 0.00661, "decode.loss_ce": 0.29512, "decode.acc_seg": 88.46608, "loss": 0.29512, "time": 0.16527}
120
+ {"mode": "train", "epoch": 10, "iter": 5950, "lr": 0.00015, "memory": 19750, "data_time": 0.00597, "decode.loss_ce": 0.31263, "decode.acc_seg": 87.92211, "loss": 0.31263, "time": 0.17091}
121
+ {"mode": "train", "epoch": 10, "iter": 6000, "lr": 0.00015, "memory": 19750, "data_time": 0.00653, "decode.loss_ce": 0.29545, "decode.acc_seg": 88.35079, "loss": 0.29545, "time": 0.16982}
122
+ {"mode": "train", "epoch": 10, "iter": 6050, "lr": 0.00015, "memory": 19750, "data_time": 0.00631, "decode.loss_ce": 0.28433, "decode.acc_seg": 88.93129, "loss": 0.28433, "time": 0.16613}
123
+ {"mode": "train", "epoch": 10, "iter": 6100, "lr": 0.00015, "memory": 19750, "data_time": 0.00695, "decode.loss_ce": 0.30451, "decode.acc_seg": 88.19103, "loss": 0.30451, "time": 0.16886}
124
+ {"mode": "train", "epoch": 10, "iter": 6150, "lr": 0.00015, "memory": 19750, "data_time": 0.00623, "decode.loss_ce": 0.30189, "decode.acc_seg": 88.29146, "loss": 0.30189, "time": 0.16576}
125
+ {"mode": "train", "epoch": 10, "iter": 6200, "lr": 0.00015, "memory": 19750, "data_time": 0.0065, "decode.loss_ce": 0.30363, "decode.acc_seg": 88.20825, "loss": 0.30363, "time": 0.17004}
126
+ {"mode": "train", "epoch": 10, "iter": 6250, "lr": 0.00015, "memory": 19750, "data_time": 0.00627, "decode.loss_ce": 0.30805, "decode.acc_seg": 88.04902, "loss": 0.30805, "time": 0.16293}
127
+ {"mode": "train", "epoch": 10, "iter": 6300, "lr": 0.00015, "memory": 19750, "data_time": 0.00655, "decode.loss_ce": 0.30825, "decode.acc_seg": 88.29885, "loss": 0.30825, "time": 0.16624}
128
+ {"mode": "train", "epoch": 11, "iter": 6350, "lr": 0.00015, "memory": 19750, "data_time": 0.0546, "decode.loss_ce": 0.30881, "decode.acc_seg": 87.92228, "loss": 0.30881, "time": 0.21774}
129
+ {"mode": "train", "epoch": 11, "iter": 6400, "lr": 0.00015, "memory": 19750, "data_time": 0.0062, "decode.loss_ce": 0.30339, "decode.acc_seg": 88.08356, "loss": 0.30339, "time": 0.17795}
130
+ {"mode": "train", "epoch": 11, "iter": 6450, "lr": 0.00015, "memory": 19750, "data_time": 0.00668, "decode.loss_ce": 0.28164, "decode.acc_seg": 88.88014, "loss": 0.28164, "time": 0.16356}
131
+ {"mode": "train", "epoch": 11, "iter": 6500, "lr": 0.00015, "memory": 19750, "data_time": 0.00635, "decode.loss_ce": 0.29401, "decode.acc_seg": 88.49332, "loss": 0.29401, "time": 0.17147}
132
+ {"mode": "train", "epoch": 11, "iter": 6550, "lr": 0.00015, "memory": 19750, "data_time": 0.00621, "decode.loss_ce": 0.29546, "decode.acc_seg": 88.55466, "loss": 0.29546, "time": 0.17142}
133
+ {"mode": "train", "epoch": 11, "iter": 6600, "lr": 0.00015, "memory": 19750, "data_time": 0.00676, "decode.loss_ce": 0.32099, "decode.acc_seg": 87.74059, "loss": 0.32099, "time": 0.16338}
134
+ {"mode": "train", "epoch": 11, "iter": 6650, "lr": 0.00015, "memory": 19750, "data_time": 0.00609, "decode.loss_ce": 0.31259, "decode.acc_seg": 87.85196, "loss": 0.31259, "time": 0.17431}
135
+ {"mode": "train", "epoch": 11, "iter": 6700, "lr": 0.00015, "memory": 19750, "data_time": 0.00688, "decode.loss_ce": 0.28936, "decode.acc_seg": 88.58093, "loss": 0.28936, "time": 0.16549}
136
+ {"mode": "train", "epoch": 11, "iter": 6750, "lr": 0.00015, "memory": 19750, "data_time": 0.00625, "decode.loss_ce": 0.31332, "decode.acc_seg": 88.07452, "loss": 0.31332, "time": 0.17283}
137
+ {"mode": "train", "epoch": 11, "iter": 6800, "lr": 0.00015, "memory": 19750, "data_time": 0.00632, "decode.loss_ce": 0.30062, "decode.acc_seg": 88.2473, "loss": 0.30062, "time": 0.16795}
138
+ {"mode": "train", "epoch": 11, "iter": 6850, "lr": 0.00015, "memory": 19750, "data_time": 0.00607, "decode.loss_ce": 0.30173, "decode.acc_seg": 88.37736, "loss": 0.30173, "time": 0.17001}
139
+ {"mode": "train", "epoch": 11, "iter": 6900, "lr": 0.00015, "memory": 19750, "data_time": 0.00638, "decode.loss_ce": 0.28008, "decode.acc_seg": 88.99362, "loss": 0.28008, "time": 0.16775}
140
+ {"mode": "train", "epoch": 12, "iter": 6950, "lr": 0.00015, "memory": 19750, "data_time": 0.05632, "decode.loss_ce": 0.28848, "decode.acc_seg": 88.79085, "loss": 0.28848, "time": 0.22683}
141
+ {"mode": "train", "epoch": 12, "iter": 7000, "lr": 0.00015, "memory": 19750, "data_time": 0.00659, "decode.loss_ce": 0.29346, "decode.acc_seg": 88.4865, "loss": 0.29346, "time": 0.16459}
142
+ {"mode": "train", "epoch": 12, "iter": 7050, "lr": 0.00015, "memory": 19750, "data_time": 0.00658, "decode.loss_ce": 0.28864, "decode.acc_seg": 88.43235, "loss": 0.28864, "time": 0.17538}
143
+ {"mode": "train", "epoch": 12, "iter": 7100, "lr": 0.00015, "memory": 19750, "data_time": 0.00727, "decode.loss_ce": 0.29303, "decode.acc_seg": 88.31878, "loss": 0.29303, "time": 0.16954}
144
+ {"mode": "train", "epoch": 12, "iter": 7150, "lr": 0.00015, "memory": 19750, "data_time": 0.00733, "decode.loss_ce": 0.2804, "decode.acc_seg": 89.1287, "loss": 0.2804, "time": 0.16794}
145
+ {"mode": "train", "epoch": 12, "iter": 7200, "lr": 0.00015, "memory": 19750, "data_time": 0.00645, "decode.loss_ce": 0.29176, "decode.acc_seg": 88.75452, "loss": 0.29176, "time": 0.16516}
146
+ {"mode": "train", "epoch": 12, "iter": 7250, "lr": 0.00015, "memory": 19750, "data_time": 0.00696, "decode.loss_ce": 0.27544, "decode.acc_seg": 89.21126, "loss": 0.27544, "time": 0.16841}
147
+ {"mode": "train", "epoch": 12, "iter": 7300, "lr": 0.00015, "memory": 19750, "data_time": 0.00684, "decode.loss_ce": 0.28735, "decode.acc_seg": 88.76757, "loss": 0.28735, "time": 0.16692}
148
+ {"mode": "train", "epoch": 12, "iter": 7350, "lr": 0.00015, "memory": 19750, "data_time": 0.00643, "decode.loss_ce": 0.3013, "decode.acc_seg": 88.42977, "loss": 0.3013, "time": 0.1705}
149
+ {"mode": "train", "epoch": 12, "iter": 7400, "lr": 0.00015, "memory": 19750, "data_time": 0.00599, "decode.loss_ce": 0.27759, "decode.acc_seg": 88.99402, "loss": 0.27759, "time": 0.17244}
150
+ {"mode": "train", "epoch": 12, "iter": 7450, "lr": 0.00015, "memory": 19750, "data_time": 0.00631, "decode.loss_ce": 0.29072, "decode.acc_seg": 88.72828, "loss": 0.29072, "time": 0.16272}
151
+ {"mode": "train", "epoch": 12, "iter": 7500, "lr": 0.00015, "memory": 19750, "data_time": 0.00608, "decode.loss_ce": 0.28911, "decode.acc_seg": 88.82428, "loss": 0.28911, "time": 0.16917}
152
+ {"mode": "train", "epoch": 12, "iter": 7550, "lr": 0.00015, "memory": 19750, "data_time": 0.00694, "decode.loss_ce": 0.26827, "decode.acc_seg": 89.13525, "loss": 0.26827, "time": 0.1668}
153
+ {"mode": "train", "epoch": 13, "iter": 7600, "lr": 0.00015, "memory": 19750, "data_time": 0.05774, "decode.loss_ce": 0.2865, "decode.acc_seg": 88.82241, "loss": 0.2865, "time": 0.22449}
154
+ {"mode": "train", "epoch": 13, "iter": 7650, "lr": 0.00015, "memory": 19750, "data_time": 0.00617, "decode.loss_ce": 0.29029, "decode.acc_seg": 88.68461, "loss": 0.29029, "time": 0.16747}
155
+ {"mode": "train", "epoch": 13, "iter": 7700, "lr": 0.00015, "memory": 19750, "data_time": 0.00615, "decode.loss_ce": 0.27594, "decode.acc_seg": 88.94087, "loss": 0.27594, "time": 0.17627}
156
+ {"mode": "train", "epoch": 13, "iter": 7750, "lr": 0.00015, "memory": 19750, "data_time": 0.00641, "decode.loss_ce": 0.29474, "decode.acc_seg": 88.3407, "loss": 0.29474, "time": 0.16154}
157
+ {"mode": "train", "epoch": 13, "iter": 7800, "lr": 0.00015, "memory": 19750, "data_time": 0.0066, "decode.loss_ce": 0.29451, "decode.acc_seg": 88.5218, "loss": 0.29451, "time": 0.16433}
158
+ {"mode": "train", "epoch": 13, "iter": 7850, "lr": 0.00015, "memory": 19750, "data_time": 0.00625, "decode.loss_ce": 0.29442, "decode.acc_seg": 88.53064, "loss": 0.29442, "time": 0.17425}
159
+ {"mode": "train", "epoch": 13, "iter": 7900, "lr": 0.00015, "memory": 19750, "data_time": 0.0062, "decode.loss_ce": 0.27688, "decode.acc_seg": 89.11096, "loss": 0.27688, "time": 0.17571}
160
+ {"mode": "train", "epoch": 13, "iter": 7950, "lr": 0.00015, "memory": 19750, "data_time": 0.0068, "decode.loss_ce": 0.28287, "decode.acc_seg": 88.81269, "loss": 0.28287, "time": 0.16731}
161
+ {"mode": "train", "epoch": 13, "iter": 8000, "lr": 0.00015, "memory": 19750, "data_time": 0.00737, "decode.loss_ce": 0.28287, "decode.acc_seg": 88.78357, "loss": 0.28287, "time": 0.17956}
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_184631.log ADDED
@@ -0,0 +1,1139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-03-04 18:46:31,289 - mmseg - INFO - Multi-processing start method is `None`
2
+ 2023-03-04 18:46:31,301 - mmseg - INFO - OpenCV num_threads is `128
3
+ 2023-03-04 18:46:31,301 - mmseg - INFO - OMP num threads is 1
4
+ 2023-03-04 18:46:31,368 - mmseg - INFO - Environment info:
5
+ ------------------------------------------------------------
6
+ sys.platform: linux
7
+ Python: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]
8
+ CUDA available: True
9
+ GPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB
10
+ CUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch
11
+ NVCC: Cuda compilation tools, release 11.6, V11.6.124
12
+ GCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)
13
+ PyTorch: 1.13.1
14
+ PyTorch compiling details: PyTorch built with:
15
+ - GCC 9.3
16
+ - C++ Version: 201402
17
+ - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications
18
+ - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
19
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
20
+ - LAPACK is enabled (usually provided by MKL)
21
+ - NNPACK is enabled
22
+ - CPU capability usage: AVX2
23
+ - CUDA Runtime 11.6
24
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37
25
+ - CuDNN 8.3.2 (built against CUDA 11.5)
26
+ - Magma 2.6.1
27
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
28
+
29
+ TorchVision: 0.14.1
30
+ OpenCV: 4.7.0
31
+ MMCV: 1.7.1
32
+ MMCV Compiler: GCC 9.3
33
+ MMCV CUDA Compiler: 11.6
34
+ MMSegmentation: 0.30.0+6749699
35
+ ------------------------------------------------------------
36
+
37
+ 2023-03-04 18:46:31,368 - mmseg - INFO - Distributed training: True
38
+ 2023-03-04 18:46:32,081 - mmseg - INFO - Config:
39
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
40
+ checkpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'
41
+ model = dict(
42
+ type='EncoderDecoderFreeze',
43
+ freeze_parameters=['backbone', 'decode_head'],
44
+ pretrained=
45
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
46
+ backbone=dict(
47
+ type='MixVisionTransformerCustomInitWeights',
48
+ in_channels=3,
49
+ embed_dims=64,
50
+ num_stages=4,
51
+ num_layers=[3, 4, 6, 3],
52
+ num_heads=[1, 2, 5, 8],
53
+ patch_sizes=[7, 3, 3, 3],
54
+ sr_ratios=[8, 4, 2, 1],
55
+ out_indices=(0, 1, 2, 3),
56
+ mlp_ratio=4,
57
+ qkv_bias=True,
58
+ drop_rate=0.0,
59
+ attn_drop_rate=0.0,
60
+ drop_path_rate=0.1),
61
+ decode_head=dict(
62
+ type='SegformerHeadUnetFCHeadSingleStepLogits',
63
+ pretrained=
64
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
65
+ dim=128,
66
+ out_dim=256,
67
+ unet_channels=166,
68
+ dim_mults=[1, 1, 1],
69
+ cat_embedding_dim=16,
70
+ in_channels=[64, 128, 320, 512],
71
+ in_index=[0, 1, 2, 3],
72
+ channels=256,
73
+ dropout_ratio=0.1,
74
+ num_classes=151,
75
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
76
+ align_corners=False,
77
+ ignore_index=0,
78
+ loss_decode=dict(
79
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
80
+ train_cfg=dict(),
81
+ test_cfg=dict(mode='whole'))
82
+ dataset_type = 'ADE20K151Dataset'
83
+ data_root = 'data/ade/ADEChallengeData2016'
84
+ img_norm_cfg = dict(
85
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
86
+ crop_size = (512, 512)
87
+ train_pipeline = [
88
+ dict(type='LoadImageFromFile'),
89
+ dict(type='LoadAnnotations', reduce_zero_label=False),
90
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
91
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
92
+ dict(type='RandomFlip', prob=0.5),
93
+ dict(type='PhotoMetricDistortion'),
94
+ dict(
95
+ type='Normalize',
96
+ mean=[123.675, 116.28, 103.53],
97
+ std=[58.395, 57.12, 57.375],
98
+ to_rgb=True),
99
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
100
+ dict(type='DefaultFormatBundle'),
101
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
102
+ ]
103
+ test_pipeline = [
104
+ dict(type='LoadImageFromFile'),
105
+ dict(
106
+ type='MultiScaleFlipAug',
107
+ img_scale=(2048, 512),
108
+ flip=False,
109
+ transforms=[
110
+ dict(type='Resize', keep_ratio=True),
111
+ dict(type='RandomFlip'),
112
+ dict(
113
+ type='Normalize',
114
+ mean=[123.675, 116.28, 103.53],
115
+ std=[58.395, 57.12, 57.375],
116
+ to_rgb=True),
117
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
118
+ dict(type='ImageToTensor', keys=['img']),
119
+ dict(type='Collect', keys=['img'])
120
+ ])
121
+ ]
122
+ data = dict(
123
+ samples_per_gpu=4,
124
+ workers_per_gpu=4,
125
+ train=dict(
126
+ type='ADE20K151Dataset',
127
+ data_root='data/ade/ADEChallengeData2016',
128
+ img_dir='images/training',
129
+ ann_dir='annotations/training',
130
+ pipeline=[
131
+ dict(type='LoadImageFromFile'),
132
+ dict(type='LoadAnnotations', reduce_zero_label=False),
133
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
134
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
135
+ dict(type='RandomFlip', prob=0.5),
136
+ dict(type='PhotoMetricDistortion'),
137
+ dict(
138
+ type='Normalize',
139
+ mean=[123.675, 116.28, 103.53],
140
+ std=[58.395, 57.12, 57.375],
141
+ to_rgb=True),
142
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
143
+ dict(type='DefaultFormatBundle'),
144
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
145
+ ]),
146
+ val=dict(
147
+ type='ADE20K151Dataset',
148
+ data_root='data/ade/ADEChallengeData2016',
149
+ img_dir='images/validation',
150
+ ann_dir='annotations/validation',
151
+ pipeline=[
152
+ dict(type='LoadImageFromFile'),
153
+ dict(
154
+ type='MultiScaleFlipAug',
155
+ img_scale=(2048, 512),
156
+ flip=False,
157
+ transforms=[
158
+ dict(type='Resize', keep_ratio=True),
159
+ dict(type='RandomFlip'),
160
+ dict(
161
+ type='Normalize',
162
+ mean=[123.675, 116.28, 103.53],
163
+ std=[58.395, 57.12, 57.375],
164
+ to_rgb=True),
165
+ dict(
166
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
167
+ dict(type='ImageToTensor', keys=['img']),
168
+ dict(type='Collect', keys=['img'])
169
+ ])
170
+ ]),
171
+ test=dict(
172
+ type='ADE20K151Dataset',
173
+ data_root='data/ade/ADEChallengeData2016',
174
+ img_dir='images/validation',
175
+ ann_dir='annotations/validation',
176
+ pipeline=[
177
+ dict(type='LoadImageFromFile'),
178
+ dict(
179
+ type='MultiScaleFlipAug',
180
+ img_scale=(2048, 512),
181
+ flip=False,
182
+ transforms=[
183
+ dict(type='Resize', keep_ratio=True),
184
+ dict(type='RandomFlip'),
185
+ dict(
186
+ type='Normalize',
187
+ mean=[123.675, 116.28, 103.53],
188
+ std=[58.395, 57.12, 57.375],
189
+ to_rgb=True),
190
+ dict(
191
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
192
+ dict(type='ImageToTensor', keys=['img']),
193
+ dict(type='Collect', keys=['img'])
194
+ ])
195
+ ]))
196
+ log_config = dict(
197
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
198
+ dist_params = dict(backend='nccl')
199
+ log_level = 'INFO'
200
+ load_from = None
201
+ resume_from = None
202
+ workflow = [('train', 1)]
203
+ cudnn_benchmark = True
204
+ optimizer = dict(
205
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
206
+ optimizer_config = dict()
207
+ lr_config = dict(
208
+ policy='step',
209
+ warmup='linear',
210
+ warmup_iters=1000,
211
+ warmup_ratio=1e-06,
212
+ step=10000,
213
+ gamma=0.5,
214
+ min_lr=1e-06,
215
+ by_epoch=False)
216
+ runner = dict(type='IterBasedRunner', max_iters=80000)
217
+ checkpoint_config = dict(by_epoch=False, interval=8000)
218
+ evaluation = dict(
219
+ interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')
220
+ work_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits'
221
+ gpu_ids = range(0, 8)
222
+ auto_resume = True
223
+
224
+ 2023-03-04 18:46:38,332 - mmseg - INFO - Set random seed to 1082958590, deterministic: False
225
+ 2023-03-04 18:46:38,583 - mmseg - INFO - Parameters in backbone freezed!
226
+ 2023-03-04 18:46:38,583 - mmseg - INFO - Trainable parameters in SegformerHeadUnetFCHeadSingleStep: ['unet.init_conv.weight', 'unet.init_conv.bias', 'unet.time_mlp.1.weight', 'unet.time_mlp.1.bias', 'unet.time_mlp.3.weight', 'unet.time_mlp.3.bias', 'unet.downs.0.0.mlp.1.weight', 'unet.downs.0.0.mlp.1.bias', 'unet.downs.0.0.block1.proj.weight', 'unet.downs.0.0.block1.proj.bias', 'unet.downs.0.0.block1.norm.weight', 'unet.downs.0.0.block1.norm.bias', 'unet.downs.0.0.block2.proj.weight', 'unet.downs.0.0.block2.proj.bias', 'unet.downs.0.0.block2.norm.weight', 'unet.downs.0.0.block2.norm.bias', 'unet.downs.0.1.mlp.1.weight', 'unet.downs.0.1.mlp.1.bias', 'unet.downs.0.1.block1.proj.weight', 'unet.downs.0.1.block1.proj.bias', 'unet.downs.0.1.block1.norm.weight', 'unet.downs.0.1.block1.norm.bias', 'unet.downs.0.1.block2.proj.weight', 'unet.downs.0.1.block2.proj.bias', 'unet.downs.0.1.block2.norm.weight', 'unet.downs.0.1.block2.norm.bias', 'unet.downs.0.2.fn.fn.to_qkv.weight', 'unet.downs.0.2.fn.fn.to_out.0.weight', 'unet.downs.0.2.fn.fn.to_out.0.bias', 'unet.downs.0.2.fn.fn.to_out.1.g', 'unet.downs.0.2.fn.norm.g', 'unet.downs.0.3.weight', 'unet.downs.0.3.bias', 'unet.downs.1.0.mlp.1.weight', 'unet.downs.1.0.mlp.1.bias', 'unet.downs.1.0.block1.proj.weight', 'unet.downs.1.0.block1.proj.bias', 'unet.downs.1.0.block1.norm.weight', 'unet.downs.1.0.block1.norm.bias', 'unet.downs.1.0.block2.proj.weight', 'unet.downs.1.0.block2.proj.bias', 'unet.downs.1.0.block2.norm.weight', 'unet.downs.1.0.block2.norm.bias', 'unet.downs.1.1.mlp.1.weight', 'unet.downs.1.1.mlp.1.bias', 'unet.downs.1.1.block1.proj.weight', 'unet.downs.1.1.block1.proj.bias', 'unet.downs.1.1.block1.norm.weight', 'unet.downs.1.1.block1.norm.bias', 'unet.downs.1.1.block2.proj.weight', 'unet.downs.1.1.block2.proj.bias', 'unet.downs.1.1.block2.norm.weight', 'unet.downs.1.1.block2.norm.bias', 'unet.downs.1.2.fn.fn.to_qkv.weight', 'unet.downs.1.2.fn.fn.to_out.0.weight', 'unet.downs.1.2.fn.fn.to_out.0.bias', 'unet.downs.1.2.fn.fn.to_out.1.g', 'unet.downs.1.2.fn.norm.g', 'unet.downs.1.3.weight', 'unet.downs.1.3.bias', 'unet.downs.2.0.mlp.1.weight', 'unet.downs.2.0.mlp.1.bias', 'unet.downs.2.0.block1.proj.weight', 'unet.downs.2.0.block1.proj.bias', 'unet.downs.2.0.block1.norm.weight', 'unet.downs.2.0.block1.norm.bias', 'unet.downs.2.0.block2.proj.weight', 'unet.downs.2.0.block2.proj.bias', 'unet.downs.2.0.block2.norm.weight', 'unet.downs.2.0.block2.norm.bias', 'unet.downs.2.1.mlp.1.weight', 'unet.downs.2.1.mlp.1.bias', 'unet.downs.2.1.block1.proj.weight', 'unet.downs.2.1.block1.proj.bias', 'unet.downs.2.1.block1.norm.weight', 'unet.downs.2.1.block1.norm.bias', 'unet.downs.2.1.block2.proj.weight', 'unet.downs.2.1.block2.proj.bias', 'unet.downs.2.1.block2.norm.weight', 'unet.downs.2.1.block2.norm.bias', 'unet.downs.2.2.fn.fn.to_qkv.weight', 'unet.downs.2.2.fn.fn.to_out.0.weight', 'unet.downs.2.2.fn.fn.to_out.0.bias', 'unet.downs.2.2.fn.fn.to_out.1.g', 'unet.downs.2.2.fn.norm.g', 'unet.downs.2.3.weight', 'unet.downs.2.3.bias', 'unet.ups.0.0.mlp.1.weight', 'unet.ups.0.0.mlp.1.bias', 'unet.ups.0.0.block1.proj.weight', 'unet.ups.0.0.block1.proj.bias', 'unet.ups.0.0.block1.norm.weight', 'unet.ups.0.0.block1.norm.bias', 'unet.ups.0.0.block2.proj.weight', 'unet.ups.0.0.block2.proj.bias', 'unet.ups.0.0.block2.norm.weight', 'unet.ups.0.0.block2.norm.bias', 'unet.ups.0.0.res_conv.weight', 'unet.ups.0.0.res_conv.bias', 'unet.ups.0.1.mlp.1.weight', 'unet.ups.0.1.mlp.1.bias', 'unet.ups.0.1.block1.proj.weight', 'unet.ups.0.1.block1.proj.bias', 'unet.ups.0.1.block1.norm.weight', 'unet.ups.0.1.block1.norm.bias', 'unet.ups.0.1.block2.proj.weight', 'unet.ups.0.1.block2.proj.bias', 'unet.ups.0.1.block2.norm.weight', 'unet.ups.0.1.block2.norm.bias', 'unet.ups.0.1.res_conv.weight', 'unet.ups.0.1.res_conv.bias', 'unet.ups.0.2.fn.fn.to_qkv.weight', 'unet.ups.0.2.fn.fn.to_out.0.weight', 'unet.ups.0.2.fn.fn.to_out.0.bias', 'unet.ups.0.2.fn.fn.to_out.1.g', 'unet.ups.0.2.fn.norm.g', 'unet.ups.0.3.1.weight', 'unet.ups.0.3.1.bias', 'unet.ups.1.0.mlp.1.weight', 'unet.ups.1.0.mlp.1.bias', 'unet.ups.1.0.block1.proj.weight', 'unet.ups.1.0.block1.proj.bias', 'unet.ups.1.0.block1.norm.weight', 'unet.ups.1.0.block1.norm.bias', 'unet.ups.1.0.block2.proj.weight', 'unet.ups.1.0.block2.proj.bias', 'unet.ups.1.0.block2.norm.weight', 'unet.ups.1.0.block2.norm.bias', 'unet.ups.1.0.res_conv.weight', 'unet.ups.1.0.res_conv.bias', 'unet.ups.1.1.mlp.1.weight', 'unet.ups.1.1.mlp.1.bias', 'unet.ups.1.1.block1.proj.weight', 'unet.ups.1.1.block1.proj.bias', 'unet.ups.1.1.block1.norm.weight', 'unet.ups.1.1.block1.norm.bias', 'unet.ups.1.1.block2.proj.weight', 'unet.ups.1.1.block2.proj.bias', 'unet.ups.1.1.block2.norm.weight', 'unet.ups.1.1.block2.norm.bias', 'unet.ups.1.1.res_conv.weight', 'unet.ups.1.1.res_conv.bias', 'unet.ups.1.2.fn.fn.to_qkv.weight', 'unet.ups.1.2.fn.fn.to_out.0.weight', 'unet.ups.1.2.fn.fn.to_out.0.bias', 'unet.ups.1.2.fn.fn.to_out.1.g', 'unet.ups.1.2.fn.norm.g', 'unet.ups.1.3.1.weight', 'unet.ups.1.3.1.bias', 'unet.ups.2.0.mlp.1.weight', 'unet.ups.2.0.mlp.1.bias', 'unet.ups.2.0.block1.proj.weight', 'unet.ups.2.0.block1.proj.bias', 'unet.ups.2.0.block1.norm.weight', 'unet.ups.2.0.block1.norm.bias', 'unet.ups.2.0.block2.proj.weight', 'unet.ups.2.0.block2.proj.bias', 'unet.ups.2.0.block2.norm.weight', 'unet.ups.2.0.block2.norm.bias', 'unet.ups.2.0.res_conv.weight', 'unet.ups.2.0.res_conv.bias', 'unet.ups.2.1.mlp.1.weight', 'unet.ups.2.1.mlp.1.bias', 'unet.ups.2.1.block1.proj.weight', 'unet.ups.2.1.block1.proj.bias', 'unet.ups.2.1.block1.norm.weight', 'unet.ups.2.1.block1.norm.bias', 'unet.ups.2.1.block2.proj.weight', 'unet.ups.2.1.block2.proj.bias', 'unet.ups.2.1.block2.norm.weight', 'unet.ups.2.1.block2.norm.bias', 'unet.ups.2.1.res_conv.weight', 'unet.ups.2.1.res_conv.bias', 'unet.ups.2.2.fn.fn.to_qkv.weight', 'unet.ups.2.2.fn.fn.to_out.0.weight', 'unet.ups.2.2.fn.fn.to_out.0.bias', 'unet.ups.2.2.fn.fn.to_out.1.g', 'unet.ups.2.2.fn.norm.g', 'unet.ups.2.3.weight', 'unet.ups.2.3.bias', 'unet.mid_block1.mlp.1.weight', 'unet.mid_block1.mlp.1.bias', 'unet.mid_block1.block1.proj.weight', 'unet.mid_block1.block1.proj.bias', 'unet.mid_block1.block1.norm.weight', 'unet.mid_block1.block1.norm.bias', 'unet.mid_block1.block2.proj.weight', 'unet.mid_block1.block2.proj.bias', 'unet.mid_block1.block2.norm.weight', 'unet.mid_block1.block2.norm.bias', 'unet.mid_attn.fn.fn.to_qkv.weight', 'unet.mid_attn.fn.fn.to_out.weight', 'unet.mid_attn.fn.fn.to_out.bias', 'unet.mid_attn.fn.norm.g', 'unet.mid_block2.mlp.1.weight', 'unet.mid_block2.mlp.1.bias', 'unet.mid_block2.block1.proj.weight', 'unet.mid_block2.block1.proj.bias', 'unet.mid_block2.block1.norm.weight', 'unet.mid_block2.block1.norm.bias', 'unet.mid_block2.block2.proj.weight', 'unet.mid_block2.block2.proj.bias', 'unet.mid_block2.block2.norm.weight', 'unet.mid_block2.block2.norm.bias', 'unet.final_res_block.mlp.1.weight', 'unet.final_res_block.mlp.1.bias', 'unet.final_res_block.block1.proj.weight', 'unet.final_res_block.block1.proj.bias', 'unet.final_res_block.block1.norm.weight', 'unet.final_res_block.block1.norm.bias', 'unet.final_res_block.block2.proj.weight', 'unet.final_res_block.block2.proj.bias', 'unet.final_res_block.block2.norm.weight', 'unet.final_res_block.block2.norm.bias', 'unet.final_res_block.res_conv.weight', 'unet.final_res_block.res_conv.bias', 'unet.final_conv.weight', 'unet.final_conv.bias', 'conv_seg_new.weight', 'conv_seg_new.bias']
227
+ 2023-03-04 18:46:38,584 - mmseg - INFO - Parameters in decode_head freezed!
228
+ 2023-03-04 18:46:38,606 - mmseg - INFO - load checkpoint from local path: pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth
229
+ 2023-03-04 18:46:38,852 - mmseg - WARNING - The model and loaded state dict do not match exactly
230
+
231
+ unexpected key in source state_dict: decode_head.conv_seg.weight, decode_head.conv_seg.bias, decode_head.convs.0.conv.weight, decode_head.convs.0.bn.weight, decode_head.convs.0.bn.bias, decode_head.convs.0.bn.running_mean, decode_head.convs.0.bn.running_var, decode_head.convs.0.bn.num_batches_tracked, decode_head.convs.1.conv.weight, decode_head.convs.1.bn.weight, decode_head.convs.1.bn.bias, decode_head.convs.1.bn.running_mean, decode_head.convs.1.bn.running_var, decode_head.convs.1.bn.num_batches_tracked, decode_head.convs.2.conv.weight, decode_head.convs.2.bn.weight, decode_head.convs.2.bn.bias, decode_head.convs.2.bn.running_mean, decode_head.convs.2.bn.running_var, decode_head.convs.2.bn.num_batches_tracked, decode_head.convs.3.conv.weight, decode_head.convs.3.bn.weight, decode_head.convs.3.bn.bias, decode_head.convs.3.bn.running_mean, decode_head.convs.3.bn.running_var, decode_head.convs.3.bn.num_batches_tracked, decode_head.fusion_conv.conv.weight, decode_head.fusion_conv.bn.weight, decode_head.fusion_conv.bn.bias, decode_head.fusion_conv.bn.running_mean, decode_head.fusion_conv.bn.running_var, decode_head.fusion_conv.bn.num_batches_tracked
232
+
233
+ 2023-03-04 18:46:38,865 - mmseg - INFO - load checkpoint from local path: pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth
234
+ 2023-03-04 18:46:39,075 - mmseg - WARNING - The model and loaded state dict do not match exactly
235
+
236
+ unexpected key in source state_dict: backbone.layers.0.0.projection.weight, backbone.layers.0.0.projection.bias, backbone.layers.0.0.norm.weight, backbone.layers.0.0.norm.bias, backbone.layers.0.1.0.norm1.weight, backbone.layers.0.1.0.norm1.bias, backbone.layers.0.1.0.attn.attn.in_proj_weight, backbone.layers.0.1.0.attn.attn.in_proj_bias, backbone.layers.0.1.0.attn.attn.out_proj.weight, backbone.layers.0.1.0.attn.attn.out_proj.bias, backbone.layers.0.1.0.attn.sr.weight, backbone.layers.0.1.0.attn.sr.bias, backbone.layers.0.1.0.attn.norm.weight, backbone.layers.0.1.0.attn.norm.bias, backbone.layers.0.1.0.norm2.weight, backbone.layers.0.1.0.norm2.bias, backbone.layers.0.1.0.ffn.layers.0.weight, backbone.layers.0.1.0.ffn.layers.0.bias, backbone.layers.0.1.0.ffn.layers.1.weight, backbone.layers.0.1.0.ffn.layers.1.bias, backbone.layers.0.1.0.ffn.layers.4.weight, backbone.layers.0.1.0.ffn.layers.4.bias, backbone.layers.0.1.1.norm1.weight, backbone.layers.0.1.1.norm1.bias, backbone.layers.0.1.1.attn.attn.in_proj_weight, backbone.layers.0.1.1.attn.attn.in_proj_bias, backbone.layers.0.1.1.attn.attn.out_proj.weight, backbone.layers.0.1.1.attn.attn.out_proj.bias, backbone.layers.0.1.1.attn.sr.weight, backbone.layers.0.1.1.attn.sr.bias, backbone.layers.0.1.1.attn.norm.weight, backbone.layers.0.1.1.attn.norm.bias, backbone.layers.0.1.1.norm2.weight, backbone.layers.0.1.1.norm2.bias, backbone.layers.0.1.1.ffn.layers.0.weight, backbone.layers.0.1.1.ffn.layers.0.bias, backbone.layers.0.1.1.ffn.layers.1.weight, backbone.layers.0.1.1.ffn.layers.1.bias, backbone.layers.0.1.1.ffn.layers.4.weight, backbone.layers.0.1.1.ffn.layers.4.bias, backbone.layers.0.1.2.norm1.weight, backbone.layers.0.1.2.norm1.bias, backbone.layers.0.1.2.attn.attn.in_proj_weight, backbone.layers.0.1.2.attn.attn.in_proj_bias, backbone.layers.0.1.2.attn.attn.out_proj.weight, backbone.layers.0.1.2.attn.attn.out_proj.bias, backbone.layers.0.1.2.attn.sr.weight, backbone.layers.0.1.2.attn.sr.bias, backbone.layers.0.1.2.attn.norm.weight, backbone.layers.0.1.2.attn.norm.bias, backbone.layers.0.1.2.norm2.weight, backbone.layers.0.1.2.norm2.bias, backbone.layers.0.1.2.ffn.layers.0.weight, backbone.layers.0.1.2.ffn.layers.0.bias, backbone.layers.0.1.2.ffn.layers.1.weight, backbone.layers.0.1.2.ffn.layers.1.bias, backbone.layers.0.1.2.ffn.layers.4.weight, backbone.layers.0.1.2.ffn.layers.4.bias, backbone.layers.0.2.weight, backbone.layers.0.2.bias, backbone.layers.1.0.projection.weight, backbone.layers.1.0.projection.bias, backbone.layers.1.0.norm.weight, backbone.layers.1.0.norm.bias, backbone.layers.1.1.0.norm1.weight, backbone.layers.1.1.0.norm1.bias, backbone.layers.1.1.0.attn.attn.in_proj_weight, backbone.layers.1.1.0.attn.attn.in_proj_bias, backbone.layers.1.1.0.attn.attn.out_proj.weight, backbone.layers.1.1.0.attn.attn.out_proj.bias, backbone.layers.1.1.0.attn.sr.weight, backbone.layers.1.1.0.attn.sr.bias, backbone.layers.1.1.0.attn.norm.weight, backbone.layers.1.1.0.attn.norm.bias, backbone.layers.1.1.0.norm2.weight, backbone.layers.1.1.0.norm2.bias, backbone.layers.1.1.0.ffn.layers.0.weight, backbone.layers.1.1.0.ffn.layers.0.bias, backbone.layers.1.1.0.ffn.layers.1.weight, backbone.layers.1.1.0.ffn.layers.1.bias, backbone.layers.1.1.0.ffn.layers.4.weight, backbone.layers.1.1.0.ffn.layers.4.bias, backbone.layers.1.1.1.norm1.weight, backbone.layers.1.1.1.norm1.bias, backbone.layers.1.1.1.attn.attn.in_proj_weight, backbone.layers.1.1.1.attn.attn.in_proj_bias, backbone.layers.1.1.1.attn.attn.out_proj.weight, backbone.layers.1.1.1.attn.attn.out_proj.bias, backbone.layers.1.1.1.attn.sr.weight, backbone.layers.1.1.1.attn.sr.bias, backbone.layers.1.1.1.attn.norm.weight, backbone.layers.1.1.1.attn.norm.bias, backbone.layers.1.1.1.norm2.weight, backbone.layers.1.1.1.norm2.bias, backbone.layers.1.1.1.ffn.layers.0.weight, backbone.layers.1.1.1.ffn.layers.0.bias, backbone.layers.1.1.1.ffn.layers.1.weight, backbone.layers.1.1.1.ffn.layers.1.bias, backbone.layers.1.1.1.ffn.layers.4.weight, backbone.layers.1.1.1.ffn.layers.4.bias, backbone.layers.1.1.2.norm1.weight, backbone.layers.1.1.2.norm1.bias, backbone.layers.1.1.2.attn.attn.in_proj_weight, backbone.layers.1.1.2.attn.attn.in_proj_bias, backbone.layers.1.1.2.attn.attn.out_proj.weight, backbone.layers.1.1.2.attn.attn.out_proj.bias, backbone.layers.1.1.2.attn.sr.weight, backbone.layers.1.1.2.attn.sr.bias, backbone.layers.1.1.2.attn.norm.weight, backbone.layers.1.1.2.attn.norm.bias, backbone.layers.1.1.2.norm2.weight, backbone.layers.1.1.2.norm2.bias, backbone.layers.1.1.2.ffn.layers.0.weight, backbone.layers.1.1.2.ffn.layers.0.bias, backbone.layers.1.1.2.ffn.layers.1.weight, backbone.layers.1.1.2.ffn.layers.1.bias, backbone.layers.1.1.2.ffn.layers.4.weight, backbone.layers.1.1.2.ffn.layers.4.bias, backbone.layers.1.1.3.norm1.weight, backbone.layers.1.1.3.norm1.bias, backbone.layers.1.1.3.attn.attn.in_proj_weight, backbone.layers.1.1.3.attn.attn.in_proj_bias, backbone.layers.1.1.3.attn.attn.out_proj.weight, backbone.layers.1.1.3.attn.attn.out_proj.bias, backbone.layers.1.1.3.attn.sr.weight, backbone.layers.1.1.3.attn.sr.bias, backbone.layers.1.1.3.attn.norm.weight, backbone.layers.1.1.3.attn.norm.bias, backbone.layers.1.1.3.norm2.weight, backbone.layers.1.1.3.norm2.bias, backbone.layers.1.1.3.ffn.layers.0.weight, backbone.layers.1.1.3.ffn.layers.0.bias, backbone.layers.1.1.3.ffn.layers.1.weight, backbone.layers.1.1.3.ffn.layers.1.bias, backbone.layers.1.1.3.ffn.layers.4.weight, backbone.layers.1.1.3.ffn.layers.4.bias, backbone.layers.1.2.weight, backbone.layers.1.2.bias, backbone.layers.2.0.projection.weight, backbone.layers.2.0.projection.bias, backbone.layers.2.0.norm.weight, backbone.layers.2.0.norm.bias, backbone.layers.2.1.0.norm1.weight, backbone.layers.2.1.0.norm1.bias, backbone.layers.2.1.0.attn.attn.in_proj_weight, backbone.layers.2.1.0.attn.attn.in_proj_bias, backbone.layers.2.1.0.attn.attn.out_proj.weight, backbone.layers.2.1.0.attn.attn.out_proj.bias, backbone.layers.2.1.0.attn.sr.weight, backbone.layers.2.1.0.attn.sr.bias, backbone.layers.2.1.0.attn.norm.weight, backbone.layers.2.1.0.attn.norm.bias, backbone.layers.2.1.0.norm2.weight, backbone.layers.2.1.0.norm2.bias, backbone.layers.2.1.0.ffn.layers.0.weight, backbone.layers.2.1.0.ffn.layers.0.bias, backbone.layers.2.1.0.ffn.layers.1.weight, backbone.layers.2.1.0.ffn.layers.1.bias, backbone.layers.2.1.0.ffn.layers.4.weight, backbone.layers.2.1.0.ffn.layers.4.bias, backbone.layers.2.1.1.norm1.weight, backbone.layers.2.1.1.norm1.bias, backbone.layers.2.1.1.attn.attn.in_proj_weight, backbone.layers.2.1.1.attn.attn.in_proj_bias, backbone.layers.2.1.1.attn.attn.out_proj.weight, backbone.layers.2.1.1.attn.attn.out_proj.bias, backbone.layers.2.1.1.attn.sr.weight, backbone.layers.2.1.1.attn.sr.bias, backbone.layers.2.1.1.attn.norm.weight, backbone.layers.2.1.1.attn.norm.bias, backbone.layers.2.1.1.norm2.weight, backbone.layers.2.1.1.norm2.bias, backbone.layers.2.1.1.ffn.layers.0.weight, backbone.layers.2.1.1.ffn.layers.0.bias, backbone.layers.2.1.1.ffn.layers.1.weight, backbone.layers.2.1.1.ffn.layers.1.bias, backbone.layers.2.1.1.ffn.layers.4.weight, backbone.layers.2.1.1.ffn.layers.4.bias, backbone.layers.2.1.2.norm1.weight, backbone.layers.2.1.2.norm1.bias, backbone.layers.2.1.2.attn.attn.in_proj_weight, backbone.layers.2.1.2.attn.attn.in_proj_bias, backbone.layers.2.1.2.attn.attn.out_proj.weight, backbone.layers.2.1.2.attn.attn.out_proj.bias, backbone.layers.2.1.2.attn.sr.weight, backbone.layers.2.1.2.attn.sr.bias, backbone.layers.2.1.2.attn.norm.weight, backbone.layers.2.1.2.attn.norm.bias, backbone.layers.2.1.2.norm2.weight, backbone.layers.2.1.2.norm2.bias, backbone.layers.2.1.2.ffn.layers.0.weight, backbone.layers.2.1.2.ffn.layers.0.bias, backbone.layers.2.1.2.ffn.layers.1.weight, backbone.layers.2.1.2.ffn.layers.1.bias, backbone.layers.2.1.2.ffn.layers.4.weight, backbone.layers.2.1.2.ffn.layers.4.bias, backbone.layers.2.1.3.norm1.weight, backbone.layers.2.1.3.norm1.bias, backbone.layers.2.1.3.attn.attn.in_proj_weight, backbone.layers.2.1.3.attn.attn.in_proj_bias, backbone.layers.2.1.3.attn.attn.out_proj.weight, backbone.layers.2.1.3.attn.attn.out_proj.bias, backbone.layers.2.1.3.attn.sr.weight, backbone.layers.2.1.3.attn.sr.bias, backbone.layers.2.1.3.attn.norm.weight, backbone.layers.2.1.3.attn.norm.bias, backbone.layers.2.1.3.norm2.weight, backbone.layers.2.1.3.norm2.bias, backbone.layers.2.1.3.ffn.layers.0.weight, backbone.layers.2.1.3.ffn.layers.0.bias, backbone.layers.2.1.3.ffn.layers.1.weight, backbone.layers.2.1.3.ffn.layers.1.bias, backbone.layers.2.1.3.ffn.layers.4.weight, backbone.layers.2.1.3.ffn.layers.4.bias, backbone.layers.2.1.4.norm1.weight, backbone.layers.2.1.4.norm1.bias, backbone.layers.2.1.4.attn.attn.in_proj_weight, backbone.layers.2.1.4.attn.attn.in_proj_bias, backbone.layers.2.1.4.attn.attn.out_proj.weight, backbone.layers.2.1.4.attn.attn.out_proj.bias, backbone.layers.2.1.4.attn.sr.weight, backbone.layers.2.1.4.attn.sr.bias, backbone.layers.2.1.4.attn.norm.weight, backbone.layers.2.1.4.attn.norm.bias, backbone.layers.2.1.4.norm2.weight, backbone.layers.2.1.4.norm2.bias, backbone.layers.2.1.4.ffn.layers.0.weight, backbone.layers.2.1.4.ffn.layers.0.bias, backbone.layers.2.1.4.ffn.layers.1.weight, backbone.layers.2.1.4.ffn.layers.1.bias, backbone.layers.2.1.4.ffn.layers.4.weight, backbone.layers.2.1.4.ffn.layers.4.bias, backbone.layers.2.1.5.norm1.weight, backbone.layers.2.1.5.norm1.bias, backbone.layers.2.1.5.attn.attn.in_proj_weight, backbone.layers.2.1.5.attn.attn.in_proj_bias, backbone.layers.2.1.5.attn.attn.out_proj.weight, backbone.layers.2.1.5.attn.attn.out_proj.bias, backbone.layers.2.1.5.attn.sr.weight, backbone.layers.2.1.5.attn.sr.bias, backbone.layers.2.1.5.attn.norm.weight, backbone.layers.2.1.5.attn.norm.bias, backbone.layers.2.1.5.norm2.weight, backbone.layers.2.1.5.norm2.bias, backbone.layers.2.1.5.ffn.layers.0.weight, backbone.layers.2.1.5.ffn.layers.0.bias, backbone.layers.2.1.5.ffn.layers.1.weight, backbone.layers.2.1.5.ffn.layers.1.bias, backbone.layers.2.1.5.ffn.layers.4.weight, backbone.layers.2.1.5.ffn.layers.4.bias, backbone.layers.2.2.weight, backbone.layers.2.2.bias, backbone.layers.3.0.projection.weight, backbone.layers.3.0.projection.bias, backbone.layers.3.0.norm.weight, backbone.layers.3.0.norm.bias, backbone.layers.3.1.0.norm1.weight, backbone.layers.3.1.0.norm1.bias, backbone.layers.3.1.0.attn.attn.in_proj_weight, backbone.layers.3.1.0.attn.attn.in_proj_bias, backbone.layers.3.1.0.attn.attn.out_proj.weight, backbone.layers.3.1.0.attn.attn.out_proj.bias, backbone.layers.3.1.0.norm2.weight, backbone.layers.3.1.0.norm2.bias, backbone.layers.3.1.0.ffn.layers.0.weight, backbone.layers.3.1.0.ffn.layers.0.bias, backbone.layers.3.1.0.ffn.layers.1.weight, backbone.layers.3.1.0.ffn.layers.1.bias, backbone.layers.3.1.0.ffn.layers.4.weight, backbone.layers.3.1.0.ffn.layers.4.bias, backbone.layers.3.1.1.norm1.weight, backbone.layers.3.1.1.norm1.bias, backbone.layers.3.1.1.attn.attn.in_proj_weight, backbone.layers.3.1.1.attn.attn.in_proj_bias, backbone.layers.3.1.1.attn.attn.out_proj.weight, backbone.layers.3.1.1.attn.attn.out_proj.bias, backbone.layers.3.1.1.norm2.weight, backbone.layers.3.1.1.norm2.bias, backbone.layers.3.1.1.ffn.layers.0.weight, backbone.layers.3.1.1.ffn.layers.0.bias, backbone.layers.3.1.1.ffn.layers.1.weight, backbone.layers.3.1.1.ffn.layers.1.bias, backbone.layers.3.1.1.ffn.layers.4.weight, backbone.layers.3.1.1.ffn.layers.4.bias, backbone.layers.3.1.2.norm1.weight, backbone.layers.3.1.2.norm1.bias, backbone.layers.3.1.2.attn.attn.in_proj_weight, backbone.layers.3.1.2.attn.attn.in_proj_bias, backbone.layers.3.1.2.attn.attn.out_proj.weight, backbone.layers.3.1.2.attn.attn.out_proj.bias, backbone.layers.3.1.2.norm2.weight, backbone.layers.3.1.2.norm2.bias, backbone.layers.3.1.2.ffn.layers.0.weight, backbone.layers.3.1.2.ffn.layers.0.bias, backbone.layers.3.1.2.ffn.layers.1.weight, backbone.layers.3.1.2.ffn.layers.1.bias, backbone.layers.3.1.2.ffn.layers.4.weight, backbone.layers.3.1.2.ffn.layers.4.bias, backbone.layers.3.2.weight, backbone.layers.3.2.bias
237
+
238
+ missing keys in source state_dict: unet.init_conv.weight, unet.init_conv.bias, unet.time_mlp.1.weight, unet.time_mlp.1.bias, unet.time_mlp.3.weight, unet.time_mlp.3.bias, unet.downs.0.0.mlp.1.weight, unet.downs.0.0.mlp.1.bias, unet.downs.0.0.block1.proj.weight, unet.downs.0.0.block1.proj.bias, unet.downs.0.0.block1.norm.weight, unet.downs.0.0.block1.norm.bias, unet.downs.0.0.block2.proj.weight, unet.downs.0.0.block2.proj.bias, unet.downs.0.0.block2.norm.weight, unet.downs.0.0.block2.norm.bias, unet.downs.0.1.mlp.1.weight, unet.downs.0.1.mlp.1.bias, unet.downs.0.1.block1.proj.weight, unet.downs.0.1.block1.proj.bias, unet.downs.0.1.block1.norm.weight, unet.downs.0.1.block1.norm.bias, unet.downs.0.1.block2.proj.weight, unet.downs.0.1.block2.proj.bias, unet.downs.0.1.block2.norm.weight, unet.downs.0.1.block2.norm.bias, unet.downs.0.2.fn.fn.to_qkv.weight, unet.downs.0.2.fn.fn.to_out.0.weight, unet.downs.0.2.fn.fn.to_out.0.bias, unet.downs.0.2.fn.fn.to_out.1.g, unet.downs.0.2.fn.norm.g, unet.downs.0.3.weight, unet.downs.0.3.bias, unet.downs.1.0.mlp.1.weight, unet.downs.1.0.mlp.1.bias, unet.downs.1.0.block1.proj.weight, unet.downs.1.0.block1.proj.bias, unet.downs.1.0.block1.norm.weight, unet.downs.1.0.block1.norm.bias, unet.downs.1.0.block2.proj.weight, unet.downs.1.0.block2.proj.bias, unet.downs.1.0.block2.norm.weight, unet.downs.1.0.block2.norm.bias, unet.downs.1.1.mlp.1.weight, unet.downs.1.1.mlp.1.bias, unet.downs.1.1.block1.proj.weight, unet.downs.1.1.block1.proj.bias, unet.downs.1.1.block1.norm.weight, unet.downs.1.1.block1.norm.bias, unet.downs.1.1.block2.proj.weight, unet.downs.1.1.block2.proj.bias, unet.downs.1.1.block2.norm.weight, unet.downs.1.1.block2.norm.bias, unet.downs.1.2.fn.fn.to_qkv.weight, unet.downs.1.2.fn.fn.to_out.0.weight, unet.downs.1.2.fn.fn.to_out.0.bias, unet.downs.1.2.fn.fn.to_out.1.g, unet.downs.1.2.fn.norm.g, unet.downs.1.3.weight, unet.downs.1.3.bias, unet.downs.2.0.mlp.1.weight, unet.downs.2.0.mlp.1.bias, unet.downs.2.0.block1.proj.weight, unet.downs.2.0.block1.proj.bias, unet.downs.2.0.block1.norm.weight, unet.downs.2.0.block1.norm.bias, unet.downs.2.0.block2.proj.weight, unet.downs.2.0.block2.proj.bias, unet.downs.2.0.block2.norm.weight, unet.downs.2.0.block2.norm.bias, unet.downs.2.1.mlp.1.weight, unet.downs.2.1.mlp.1.bias, unet.downs.2.1.block1.proj.weight, unet.downs.2.1.block1.proj.bias, unet.downs.2.1.block1.norm.weight, unet.downs.2.1.block1.norm.bias, unet.downs.2.1.block2.proj.weight, unet.downs.2.1.block2.proj.bias, unet.downs.2.1.block2.norm.weight, unet.downs.2.1.block2.norm.bias, unet.downs.2.2.fn.fn.to_qkv.weight, unet.downs.2.2.fn.fn.to_out.0.weight, unet.downs.2.2.fn.fn.to_out.0.bias, unet.downs.2.2.fn.fn.to_out.1.g, unet.downs.2.2.fn.norm.g, unet.downs.2.3.weight, unet.downs.2.3.bias, unet.ups.0.0.mlp.1.weight, unet.ups.0.0.mlp.1.bias, unet.ups.0.0.block1.proj.weight, unet.ups.0.0.block1.proj.bias, unet.ups.0.0.block1.norm.weight, unet.ups.0.0.block1.norm.bias, unet.ups.0.0.block2.proj.weight, unet.ups.0.0.block2.proj.bias, unet.ups.0.0.block2.norm.weight, unet.ups.0.0.block2.norm.bias, unet.ups.0.0.res_conv.weight, unet.ups.0.0.res_conv.bias, unet.ups.0.1.mlp.1.weight, unet.ups.0.1.mlp.1.bias, unet.ups.0.1.block1.proj.weight, unet.ups.0.1.block1.proj.bias, unet.ups.0.1.block1.norm.weight, unet.ups.0.1.block1.norm.bias, unet.ups.0.1.block2.proj.weight, unet.ups.0.1.block2.proj.bias, unet.ups.0.1.block2.norm.weight, unet.ups.0.1.block2.norm.bias, unet.ups.0.1.res_conv.weight, unet.ups.0.1.res_conv.bias, unet.ups.0.2.fn.fn.to_qkv.weight, unet.ups.0.2.fn.fn.to_out.0.weight, unet.ups.0.2.fn.fn.to_out.0.bias, unet.ups.0.2.fn.fn.to_out.1.g, unet.ups.0.2.fn.norm.g, unet.ups.0.3.1.weight, unet.ups.0.3.1.bias, unet.ups.1.0.mlp.1.weight, unet.ups.1.0.mlp.1.bias, unet.ups.1.0.block1.proj.weight, unet.ups.1.0.block1.proj.bias, unet.ups.1.0.block1.norm.weight, unet.ups.1.0.block1.norm.bias, unet.ups.1.0.block2.proj.weight, unet.ups.1.0.block2.proj.bias, unet.ups.1.0.block2.norm.weight, unet.ups.1.0.block2.norm.bias, unet.ups.1.0.res_conv.weight, unet.ups.1.0.res_conv.bias, unet.ups.1.1.mlp.1.weight, unet.ups.1.1.mlp.1.bias, unet.ups.1.1.block1.proj.weight, unet.ups.1.1.block1.proj.bias, unet.ups.1.1.block1.norm.weight, unet.ups.1.1.block1.norm.bias, unet.ups.1.1.block2.proj.weight, unet.ups.1.1.block2.proj.bias, unet.ups.1.1.block2.norm.weight, unet.ups.1.1.block2.norm.bias, unet.ups.1.1.res_conv.weight, unet.ups.1.1.res_conv.bias, unet.ups.1.2.fn.fn.to_qkv.weight, unet.ups.1.2.fn.fn.to_out.0.weight, unet.ups.1.2.fn.fn.to_out.0.bias, unet.ups.1.2.fn.fn.to_out.1.g, unet.ups.1.2.fn.norm.g, unet.ups.1.3.1.weight, unet.ups.1.3.1.bias, unet.ups.2.0.mlp.1.weight, unet.ups.2.0.mlp.1.bias, unet.ups.2.0.block1.proj.weight, unet.ups.2.0.block1.proj.bias, unet.ups.2.0.block1.norm.weight, unet.ups.2.0.block1.norm.bias, unet.ups.2.0.block2.proj.weight, unet.ups.2.0.block2.proj.bias, unet.ups.2.0.block2.norm.weight, unet.ups.2.0.block2.norm.bias, unet.ups.2.0.res_conv.weight, unet.ups.2.0.res_conv.bias, unet.ups.2.1.mlp.1.weight, unet.ups.2.1.mlp.1.bias, unet.ups.2.1.block1.proj.weight, unet.ups.2.1.block1.proj.bias, unet.ups.2.1.block1.norm.weight, unet.ups.2.1.block1.norm.bias, unet.ups.2.1.block2.proj.weight, unet.ups.2.1.block2.proj.bias, unet.ups.2.1.block2.norm.weight, unet.ups.2.1.block2.norm.bias, unet.ups.2.1.res_conv.weight, unet.ups.2.1.res_conv.bias, unet.ups.2.2.fn.fn.to_qkv.weight, unet.ups.2.2.fn.fn.to_out.0.weight, unet.ups.2.2.fn.fn.to_out.0.bias, unet.ups.2.2.fn.fn.to_out.1.g, unet.ups.2.2.fn.norm.g, unet.ups.2.3.weight, unet.ups.2.3.bias, unet.mid_block1.mlp.1.weight, unet.mid_block1.mlp.1.bias, unet.mid_block1.block1.proj.weight, unet.mid_block1.block1.proj.bias, unet.mid_block1.block1.norm.weight, unet.mid_block1.block1.norm.bias, unet.mid_block1.block2.proj.weight, unet.mid_block1.block2.proj.bias, unet.mid_block1.block2.norm.weight, unet.mid_block1.block2.norm.bias, unet.mid_attn.fn.fn.to_qkv.weight, unet.mid_attn.fn.fn.to_out.weight, unet.mid_attn.fn.fn.to_out.bias, unet.mid_attn.fn.norm.g, unet.mid_block2.mlp.1.weight, unet.mid_block2.mlp.1.bias, unet.mid_block2.block1.proj.weight, unet.mid_block2.block1.proj.bias, unet.mid_block2.block1.norm.weight, unet.mid_block2.block1.norm.bias, unet.mid_block2.block2.proj.weight, unet.mid_block2.block2.proj.bias, unet.mid_block2.block2.norm.weight, unet.mid_block2.block2.norm.bias, unet.final_res_block.mlp.1.weight, unet.final_res_block.mlp.1.bias, unet.final_res_block.block1.proj.weight, unet.final_res_block.block1.proj.bias, unet.final_res_block.block1.norm.weight, unet.final_res_block.block1.norm.bias, unet.final_res_block.block2.proj.weight, unet.final_res_block.block2.proj.bias, unet.final_res_block.block2.norm.weight, unet.final_res_block.block2.norm.bias, unet.final_res_block.res_conv.weight, unet.final_res_block.res_conv.bias, unet.final_conv.weight, unet.final_conv.bias, conv_seg_new.weight, conv_seg_new.bias, embed.weight
239
+
240
+ 2023-03-04 18:46:39,098 - mmseg - INFO - EncoderDecoderFreeze(
241
+ (backbone): MixVisionTransformerCustomInitWeights(
242
+ (layers): ModuleList(
243
+ (0): ModuleList(
244
+ (0): PatchEmbed(
245
+ (projection): Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
246
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
247
+ )
248
+ (1): ModuleList(
249
+ (0): TransformerEncoderLayer(
250
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
251
+ (attn): EfficientMultiheadAttention(
252
+ (attn): MultiheadAttention(
253
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
254
+ )
255
+ (proj_drop): Dropout(p=0.0, inplace=False)
256
+ (dropout_layer): DropPath()
257
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
258
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
259
+ )
260
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
261
+ (ffn): MixFFN(
262
+ (activate): GELU(approximate='none')
263
+ (layers): Sequential(
264
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
265
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
266
+ (2): GELU(approximate='none')
267
+ (3): Dropout(p=0.0, inplace=False)
268
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
269
+ (5): Dropout(p=0.0, inplace=False)
270
+ )
271
+ (dropout_layer): DropPath()
272
+ )
273
+ )
274
+ (1): TransformerEncoderLayer(
275
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
276
+ (attn): EfficientMultiheadAttention(
277
+ (attn): MultiheadAttention(
278
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
279
+ )
280
+ (proj_drop): Dropout(p=0.0, inplace=False)
281
+ (dropout_layer): DropPath()
282
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
283
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
284
+ )
285
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
286
+ (ffn): MixFFN(
287
+ (activate): GELU(approximate='none')
288
+ (layers): Sequential(
289
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
290
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
291
+ (2): GELU(approximate='none')
292
+ (3): Dropout(p=0.0, inplace=False)
293
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
294
+ (5): Dropout(p=0.0, inplace=False)
295
+ )
296
+ (dropout_layer): DropPath()
297
+ )
298
+ )
299
+ (2): TransformerEncoderLayer(
300
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
301
+ (attn): EfficientMultiheadAttention(
302
+ (attn): MultiheadAttention(
303
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
304
+ )
305
+ (proj_drop): Dropout(p=0.0, inplace=False)
306
+ (dropout_layer): DropPath()
307
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
308
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
309
+ )
310
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
311
+ (ffn): MixFFN(
312
+ (activate): GELU(approximate='none')
313
+ (layers): Sequential(
314
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
315
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
316
+ (2): GELU(approximate='none')
317
+ (3): Dropout(p=0.0, inplace=False)
318
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
319
+ (5): Dropout(p=0.0, inplace=False)
320
+ )
321
+ (dropout_layer): DropPath()
322
+ )
323
+ )
324
+ )
325
+ (2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
326
+ )
327
+ (1): ModuleList(
328
+ (0): PatchEmbed(
329
+ (projection): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
330
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
331
+ )
332
+ (1): ModuleList(
333
+ (0): TransformerEncoderLayer(
334
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
335
+ (attn): EfficientMultiheadAttention(
336
+ (attn): MultiheadAttention(
337
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
338
+ )
339
+ (proj_drop): Dropout(p=0.0, inplace=False)
340
+ (dropout_layer): DropPath()
341
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
342
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
343
+ )
344
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
345
+ (ffn): MixFFN(
346
+ (activate): GELU(approximate='none')
347
+ (layers): Sequential(
348
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
349
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
350
+ (2): GELU(approximate='none')
351
+ (3): Dropout(p=0.0, inplace=False)
352
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
353
+ (5): Dropout(p=0.0, inplace=False)
354
+ )
355
+ (dropout_layer): DropPath()
356
+ )
357
+ )
358
+ (1): TransformerEncoderLayer(
359
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
360
+ (attn): EfficientMultiheadAttention(
361
+ (attn): MultiheadAttention(
362
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
363
+ )
364
+ (proj_drop): Dropout(p=0.0, inplace=False)
365
+ (dropout_layer): DropPath()
366
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
367
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
368
+ )
369
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
370
+ (ffn): MixFFN(
371
+ (activate): GELU(approximate='none')
372
+ (layers): Sequential(
373
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
374
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
375
+ (2): GELU(approximate='none')
376
+ (3): Dropout(p=0.0, inplace=False)
377
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
378
+ (5): Dropout(p=0.0, inplace=False)
379
+ )
380
+ (dropout_layer): DropPath()
381
+ )
382
+ )
383
+ (2): TransformerEncoderLayer(
384
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
385
+ (attn): EfficientMultiheadAttention(
386
+ (attn): MultiheadAttention(
387
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
388
+ )
389
+ (proj_drop): Dropout(p=0.0, inplace=False)
390
+ (dropout_layer): DropPath()
391
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
392
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
393
+ )
394
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
395
+ (ffn): MixFFN(
396
+ (activate): GELU(approximate='none')
397
+ (layers): Sequential(
398
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
399
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
400
+ (2): GELU(approximate='none')
401
+ (3): Dropout(p=0.0, inplace=False)
402
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
403
+ (5): Dropout(p=0.0, inplace=False)
404
+ )
405
+ (dropout_layer): DropPath()
406
+ )
407
+ )
408
+ (3): TransformerEncoderLayer(
409
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
410
+ (attn): EfficientMultiheadAttention(
411
+ (attn): MultiheadAttention(
412
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
413
+ )
414
+ (proj_drop): Dropout(p=0.0, inplace=False)
415
+ (dropout_layer): DropPath()
416
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
417
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
418
+ )
419
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
420
+ (ffn): MixFFN(
421
+ (activate): GELU(approximate='none')
422
+ (layers): Sequential(
423
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
424
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
425
+ (2): GELU(approximate='none')
426
+ (3): Dropout(p=0.0, inplace=False)
427
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
428
+ (5): Dropout(p=0.0, inplace=False)
429
+ )
430
+ (dropout_layer): DropPath()
431
+ )
432
+ )
433
+ )
434
+ (2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
435
+ )
436
+ (2): ModuleList(
437
+ (0): PatchEmbed(
438
+ (projection): Conv2d(128, 320, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
439
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
440
+ )
441
+ (1): ModuleList(
442
+ (0): TransformerEncoderLayer(
443
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
444
+ (attn): EfficientMultiheadAttention(
445
+ (attn): MultiheadAttention(
446
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
447
+ )
448
+ (proj_drop): Dropout(p=0.0, inplace=False)
449
+ (dropout_layer): DropPath()
450
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
451
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
452
+ )
453
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
454
+ (ffn): MixFFN(
455
+ (activate): GELU(approximate='none')
456
+ (layers): Sequential(
457
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
458
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
459
+ (2): GELU(approximate='none')
460
+ (3): Dropout(p=0.0, inplace=False)
461
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
462
+ (5): Dropout(p=0.0, inplace=False)
463
+ )
464
+ (dropout_layer): DropPath()
465
+ )
466
+ )
467
+ (1): TransformerEncoderLayer(
468
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
469
+ (attn): EfficientMultiheadAttention(
470
+ (attn): MultiheadAttention(
471
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
472
+ )
473
+ (proj_drop): Dropout(p=0.0, inplace=False)
474
+ (dropout_layer): DropPath()
475
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
476
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
477
+ )
478
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
479
+ (ffn): MixFFN(
480
+ (activate): GELU(approximate='none')
481
+ (layers): Sequential(
482
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
483
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
484
+ (2): GELU(approximate='none')
485
+ (3): Dropout(p=0.0, inplace=False)
486
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
487
+ (5): Dropout(p=0.0, inplace=False)
488
+ )
489
+ (dropout_layer): DropPath()
490
+ )
491
+ )
492
+ (2): TransformerEncoderLayer(
493
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
494
+ (attn): EfficientMultiheadAttention(
495
+ (attn): MultiheadAttention(
496
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
497
+ )
498
+ (proj_drop): Dropout(p=0.0, inplace=False)
499
+ (dropout_layer): DropPath()
500
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
501
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
502
+ )
503
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
504
+ (ffn): MixFFN(
505
+ (activate): GELU(approximate='none')
506
+ (layers): Sequential(
507
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
508
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
509
+ (2): GELU(approximate='none')
510
+ (3): Dropout(p=0.0, inplace=False)
511
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
512
+ (5): Dropout(p=0.0, inplace=False)
513
+ )
514
+ (dropout_layer): DropPath()
515
+ )
516
+ )
517
+ (3): TransformerEncoderLayer(
518
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
519
+ (attn): EfficientMultiheadAttention(
520
+ (attn): MultiheadAttention(
521
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
522
+ )
523
+ (proj_drop): Dropout(p=0.0, inplace=False)
524
+ (dropout_layer): DropPath()
525
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
526
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
527
+ )
528
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
529
+ (ffn): MixFFN(
530
+ (activate): GELU(approximate='none')
531
+ (layers): Sequential(
532
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
533
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
534
+ (2): GELU(approximate='none')
535
+ (3): Dropout(p=0.0, inplace=False)
536
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
537
+ (5): Dropout(p=0.0, inplace=False)
538
+ )
539
+ (dropout_layer): DropPath()
540
+ )
541
+ )
542
+ (4): TransformerEncoderLayer(
543
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
544
+ (attn): EfficientMultiheadAttention(
545
+ (attn): MultiheadAttention(
546
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
547
+ )
548
+ (proj_drop): Dropout(p=0.0, inplace=False)
549
+ (dropout_layer): DropPath()
550
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
551
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
552
+ )
553
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
554
+ (ffn): MixFFN(
555
+ (activate): GELU(approximate='none')
556
+ (layers): Sequential(
557
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
558
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
559
+ (2): GELU(approximate='none')
560
+ (3): Dropout(p=0.0, inplace=False)
561
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
562
+ (5): Dropout(p=0.0, inplace=False)
563
+ )
564
+ (dropout_layer): DropPath()
565
+ )
566
+ )
567
+ (5): TransformerEncoderLayer(
568
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
569
+ (attn): EfficientMultiheadAttention(
570
+ (attn): MultiheadAttention(
571
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
572
+ )
573
+ (proj_drop): Dropout(p=0.0, inplace=False)
574
+ (dropout_layer): DropPath()
575
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
576
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
577
+ )
578
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
579
+ (ffn): MixFFN(
580
+ (activate): GELU(approximate='none')
581
+ (layers): Sequential(
582
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
583
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
584
+ (2): GELU(approximate='none')
585
+ (3): Dropout(p=0.0, inplace=False)
586
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
587
+ (5): Dropout(p=0.0, inplace=False)
588
+ )
589
+ (dropout_layer): DropPath()
590
+ )
591
+ )
592
+ )
593
+ (2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
594
+ )
595
+ (3): ModuleList(
596
+ (0): PatchEmbed(
597
+ (projection): Conv2d(320, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
598
+ (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
599
+ )
600
+ (1): ModuleList(
601
+ (0): TransformerEncoderLayer(
602
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
603
+ (attn): EfficientMultiheadAttention(
604
+ (attn): MultiheadAttention(
605
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
606
+ )
607
+ (proj_drop): Dropout(p=0.0, inplace=False)
608
+ (dropout_layer): DropPath()
609
+ )
610
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
611
+ (ffn): MixFFN(
612
+ (activate): GELU(approximate='none')
613
+ (layers): Sequential(
614
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
615
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
616
+ (2): GELU(approximate='none')
617
+ (3): Dropout(p=0.0, inplace=False)
618
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
619
+ (5): Dropout(p=0.0, inplace=False)
620
+ )
621
+ (dropout_layer): DropPath()
622
+ )
623
+ )
624
+ (1): TransformerEncoderLayer(
625
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
626
+ (attn): EfficientMultiheadAttention(
627
+ (attn): MultiheadAttention(
628
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
629
+ )
630
+ (proj_drop): Dropout(p=0.0, inplace=False)
631
+ (dropout_layer): DropPath()
632
+ )
633
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
634
+ (ffn): MixFFN(
635
+ (activate): GELU(approximate='none')
636
+ (layers): Sequential(
637
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
638
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
639
+ (2): GELU(approximate='none')
640
+ (3): Dropout(p=0.0, inplace=False)
641
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
642
+ (5): Dropout(p=0.0, inplace=False)
643
+ )
644
+ (dropout_layer): DropPath()
645
+ )
646
+ )
647
+ (2): TransformerEncoderLayer(
648
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
649
+ (attn): EfficientMultiheadAttention(
650
+ (attn): MultiheadAttention(
651
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
652
+ )
653
+ (proj_drop): Dropout(p=0.0, inplace=False)
654
+ (dropout_layer): DropPath()
655
+ )
656
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
657
+ (ffn): MixFFN(
658
+ (activate): GELU(approximate='none')
659
+ (layers): Sequential(
660
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
661
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
662
+ (2): GELU(approximate='none')
663
+ (3): Dropout(p=0.0, inplace=False)
664
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
665
+ (5): Dropout(p=0.0, inplace=False)
666
+ )
667
+ (dropout_layer): DropPath()
668
+ )
669
+ )
670
+ )
671
+ (2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
672
+ )
673
+ )
674
+ )
675
+ init_cfg={'type': 'Pretrained', 'checkpoint': 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'}
676
+ (decode_head): SegformerHeadUnetFCHeadSingleStepLogits(
677
+ input_transform=multiple_select, ignore_index=0, align_corners=False
678
+ (loss_decode): CrossEntropyLoss(avg_non_ignore=False)
679
+ (conv_seg): Conv2d(256, 150, kernel_size=(1, 1), stride=(1, 1))
680
+ (dropout): Dropout2d(p=0.1, inplace=False)
681
+ (convs): ModuleList(
682
+ (0): ConvModule(
683
+ (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
684
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
685
+ (activate): ReLU(inplace=True)
686
+ )
687
+ (1): ConvModule(
688
+ (conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
689
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
690
+ (activate): ReLU(inplace=True)
691
+ )
692
+ (2): ConvModule(
693
+ (conv): Conv2d(320, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
694
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
695
+ (activate): ReLU(inplace=True)
696
+ )
697
+ (3): ConvModule(
698
+ (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
699
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
700
+ (activate): ReLU(inplace=True)
701
+ )
702
+ )
703
+ (fusion_conv): ConvModule(
704
+ (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
705
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
706
+ (activate): ReLU(inplace=True)
707
+ )
708
+ (unet): Unet(
709
+ (init_conv): Conv2d(166, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
710
+ (time_mlp): Sequential(
711
+ (0): SinusoidalPosEmb()
712
+ (1): Linear(in_features=128, out_features=512, bias=True)
713
+ (2): GELU(approximate='none')
714
+ (3): Linear(in_features=512, out_features=512, bias=True)
715
+ )
716
+ (downs): ModuleList(
717
+ (0): ModuleList(
718
+ (0): ResnetBlock(
719
+ (mlp): Sequential(
720
+ (0): SiLU()
721
+ (1): Linear(in_features=512, out_features=256, bias=True)
722
+ )
723
+ (block1): Block(
724
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
725
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
726
+ (act): SiLU()
727
+ )
728
+ (block2): Block(
729
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
730
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
731
+ (act): SiLU()
732
+ )
733
+ (res_conv): Identity()
734
+ )
735
+ (1): ResnetBlock(
736
+ (mlp): Sequential(
737
+ (0): SiLU()
738
+ (1): Linear(in_features=512, out_features=256, bias=True)
739
+ )
740
+ (block1): Block(
741
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
742
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
743
+ (act): SiLU()
744
+ )
745
+ (block2): Block(
746
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
747
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
748
+ (act): SiLU()
749
+ )
750
+ (res_conv): Identity()
751
+ )
752
+ (2): Residual(
753
+ (fn): PreNorm(
754
+ (fn): LinearAttention(
755
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
756
+ (to_out): Sequential(
757
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
758
+ (1): LayerNorm()
759
+ )
760
+ )
761
+ (norm): LayerNorm()
762
+ )
763
+ )
764
+ (3): Conv2d(128, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
765
+ )
766
+ (1): ModuleList(
767
+ (0): ResnetBlock(
768
+ (mlp): Sequential(
769
+ (0): SiLU()
770
+ (1): Linear(in_features=512, out_features=256, bias=True)
771
+ )
772
+ (block1): Block(
773
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
774
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
775
+ (act): SiLU()
776
+ )
777
+ (block2): Block(
778
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
779
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
780
+ (act): SiLU()
781
+ )
782
+ (res_conv): Identity()
783
+ )
784
+ (1): ResnetBlock(
785
+ (mlp): Sequential(
786
+ (0): SiLU()
787
+ (1): Linear(in_features=512, out_features=256, bias=True)
788
+ )
789
+ (block1): Block(
790
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
791
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
792
+ (act): SiLU()
793
+ )
794
+ (block2): Block(
795
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
796
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
797
+ (act): SiLU()
798
+ )
799
+ (res_conv): Identity()
800
+ )
801
+ (2): Residual(
802
+ (fn): PreNorm(
803
+ (fn): LinearAttention(
804
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
805
+ (to_out): Sequential(
806
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
807
+ (1): LayerNorm()
808
+ )
809
+ )
810
+ (norm): LayerNorm()
811
+ )
812
+ )
813
+ (3): Conv2d(128, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
814
+ )
815
+ (2): ModuleList(
816
+ (0): ResnetBlock(
817
+ (mlp): Sequential(
818
+ (0): SiLU()
819
+ (1): Linear(in_features=512, out_features=256, bias=True)
820
+ )
821
+ (block1): Block(
822
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
823
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
824
+ (act): SiLU()
825
+ )
826
+ (block2): Block(
827
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
828
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
829
+ (act): SiLU()
830
+ )
831
+ (res_conv): Identity()
832
+ )
833
+ (1): ResnetBlock(
834
+ (mlp): Sequential(
835
+ (0): SiLU()
836
+ (1): Linear(in_features=512, out_features=256, bias=True)
837
+ )
838
+ (block1): Block(
839
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
840
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
841
+ (act): SiLU()
842
+ )
843
+ (block2): Block(
844
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
845
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
846
+ (act): SiLU()
847
+ )
848
+ (res_conv): Identity()
849
+ )
850
+ (2): Residual(
851
+ (fn): PreNorm(
852
+ (fn): LinearAttention(
853
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
854
+ (to_out): Sequential(
855
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
856
+ (1): LayerNorm()
857
+ )
858
+ )
859
+ (norm): LayerNorm()
860
+ )
861
+ )
862
+ (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
863
+ )
864
+ )
865
+ (ups): ModuleList(
866
+ (0): ModuleList(
867
+ (0): ResnetBlock(
868
+ (mlp): Sequential(
869
+ (0): SiLU()
870
+ (1): Linear(in_features=512, out_features=256, bias=True)
871
+ )
872
+ (block1): Block(
873
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
874
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
875
+ (act): SiLU()
876
+ )
877
+ (block2): Block(
878
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
879
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
880
+ (act): SiLU()
881
+ )
882
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
883
+ )
884
+ (1): ResnetBlock(
885
+ (mlp): Sequential(
886
+ (0): SiLU()
887
+ (1): Linear(in_features=512, out_features=256, bias=True)
888
+ )
889
+ (block1): Block(
890
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
891
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
892
+ (act): SiLU()
893
+ )
894
+ (block2): Block(
895
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
896
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
897
+ (act): SiLU()
898
+ )
899
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
900
+ )
901
+ (2): Residual(
902
+ (fn): PreNorm(
903
+ (fn): LinearAttention(
904
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
905
+ (to_out): Sequential(
906
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
907
+ (1): LayerNorm()
908
+ )
909
+ )
910
+ (norm): LayerNorm()
911
+ )
912
+ )
913
+ (3): Sequential(
914
+ (0): Upsample(scale_factor=2.0, mode=nearest)
915
+ (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
916
+ )
917
+ )
918
+ (1): ModuleList(
919
+ (0): ResnetBlock(
920
+ (mlp): Sequential(
921
+ (0): SiLU()
922
+ (1): Linear(in_features=512, out_features=256, bias=True)
923
+ )
924
+ (block1): Block(
925
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
926
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
927
+ (act): SiLU()
928
+ )
929
+ (block2): Block(
930
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
931
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
932
+ (act): SiLU()
933
+ )
934
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
935
+ )
936
+ (1): ResnetBlock(
937
+ (mlp): Sequential(
938
+ (0): SiLU()
939
+ (1): Linear(in_features=512, out_features=256, bias=True)
940
+ )
941
+ (block1): Block(
942
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
943
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
944
+ (act): SiLU()
945
+ )
946
+ (block2): Block(
947
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
948
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
949
+ (act): SiLU()
950
+ )
951
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
952
+ )
953
+ (2): Residual(
954
+ (fn): PreNorm(
955
+ (fn): LinearAttention(
956
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
957
+ (to_out): Sequential(
958
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
959
+ (1): LayerNorm()
960
+ )
961
+ )
962
+ (norm): LayerNorm()
963
+ )
964
+ )
965
+ (3): Sequential(
966
+ (0): Upsample(scale_factor=2.0, mode=nearest)
967
+ (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
968
+ )
969
+ )
970
+ (2): ModuleList(
971
+ (0): ResnetBlock(
972
+ (mlp): Sequential(
973
+ (0): SiLU()
974
+ (1): Linear(in_features=512, out_features=256, bias=True)
975
+ )
976
+ (block1): Block(
977
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
978
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
979
+ (act): SiLU()
980
+ )
981
+ (block2): Block(
982
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
983
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
984
+ (act): SiLU()
985
+ )
986
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
987
+ )
988
+ (1): ResnetBlock(
989
+ (mlp): Sequential(
990
+ (0): SiLU()
991
+ (1): Linear(in_features=512, out_features=256, bias=True)
992
+ )
993
+ (block1): Block(
994
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
995
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
996
+ (act): SiLU()
997
+ )
998
+ (block2): Block(
999
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1000
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1001
+ (act): SiLU()
1002
+ )
1003
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
1004
+ )
1005
+ (2): Residual(
1006
+ (fn): PreNorm(
1007
+ (fn): LinearAttention(
1008
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1009
+ (to_out): Sequential(
1010
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
1011
+ (1): LayerNorm()
1012
+ )
1013
+ )
1014
+ (norm): LayerNorm()
1015
+ )
1016
+ )
1017
+ (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1018
+ )
1019
+ )
1020
+ (mid_block1): ResnetBlock(
1021
+ (mlp): Sequential(
1022
+ (0): SiLU()
1023
+ (1): Linear(in_features=512, out_features=256, bias=True)
1024
+ )
1025
+ (block1): Block(
1026
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1027
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1028
+ (act): SiLU()
1029
+ )
1030
+ (block2): Block(
1031
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1032
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1033
+ (act): SiLU()
1034
+ )
1035
+ (res_conv): Identity()
1036
+ )
1037
+ (mid_attn): Residual(
1038
+ (fn): PreNorm(
1039
+ (fn): Attention(
1040
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1041
+ (to_out): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
1042
+ )
1043
+ (norm): LayerNorm()
1044
+ )
1045
+ )
1046
+ (mid_block2): ResnetBlock(
1047
+ (mlp): Sequential(
1048
+ (0): SiLU()
1049
+ (1): Linear(in_features=512, out_features=256, bias=True)
1050
+ )
1051
+ (block1): Block(
1052
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1053
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1054
+ (act): SiLU()
1055
+ )
1056
+ (block2): Block(
1057
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1058
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1059
+ (act): SiLU()
1060
+ )
1061
+ (res_conv): Identity()
1062
+ )
1063
+ (final_res_block): ResnetBlock(
1064
+ (mlp): Sequential(
1065
+ (0): SiLU()
1066
+ (1): Linear(in_features=512, out_features=256, bias=True)
1067
+ )
1068
+ (block1): Block(
1069
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1070
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1071
+ (act): SiLU()
1072
+ )
1073
+ (block2): Block(
1074
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1075
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1076
+ (act): SiLU()
1077
+ )
1078
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
1079
+ )
1080
+ (final_conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
1081
+ )
1082
+ (conv_seg_new): Conv2d(256, 151, kernel_size=(1, 1), stride=(1, 1))
1083
+ (embed): Embedding(151, 16)
1084
+ )
1085
+ init_cfg={'type': 'Pretrained', 'checkpoint': 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'}
1086
+ )
1087
+ 2023-03-04 18:46:40,019 - mmseg - INFO - Loaded 20210 images
1088
+ 2023-03-04 18:46:41,028 - mmseg - INFO - Loaded 2000 images
1089
+ 2023-03-04 18:46:41,033 - mmseg - INFO - load checkpoint from local path: ./work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/latest.pth
1090
+ 2023-03-04 18:46:41,696 - mmseg - INFO - resumed from epoch: 13, iter 7999
1091
+ 2023-03-04 18:46:41,697 - mmseg - INFO - Start running, host: laizeqiang@SH-IDC1-10-140-37-114, work_dir: /mnt/petrelfs/laizeqiang/mmseg-baseline/work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits
1092
+ 2023-03-04 18:46:41,697 - mmseg - INFO - Hooks will be executed in the following order:
1093
+ before_run:
1094
+ (VERY_HIGH ) StepLrUpdaterHook
1095
+ (NORMAL ) CheckpointHook
1096
+ (LOW ) DistEvalHook
1097
+ (VERY_LOW ) TextLoggerHook
1098
+ --------------------
1099
+ before_train_epoch:
1100
+ (VERY_HIGH ) StepLrUpdaterHook
1101
+ (LOW ) IterTimerHook
1102
+ (LOW ) DistEvalHook
1103
+ (VERY_LOW ) TextLoggerHook
1104
+ --------------------
1105
+ before_train_iter:
1106
+ (VERY_HIGH ) StepLrUpdaterHook
1107
+ (LOW ) IterTimerHook
1108
+ (LOW ) DistEvalHook
1109
+ --------------------
1110
+ after_train_iter:
1111
+ (ABOVE_NORMAL) OptimizerHook
1112
+ (NORMAL ) CheckpointHook
1113
+ (LOW ) IterTimerHook
1114
+ (LOW ) DistEvalHook
1115
+ (VERY_LOW ) TextLoggerHook
1116
+ --------------------
1117
+ after_train_epoch:
1118
+ (NORMAL ) CheckpointHook
1119
+ (LOW ) DistEvalHook
1120
+ (VERY_LOW ) TextLoggerHook
1121
+ --------------------
1122
+ before_val_epoch:
1123
+ (LOW ) IterTimerHook
1124
+ (VERY_LOW ) TextLoggerHook
1125
+ --------------------
1126
+ before_val_iter:
1127
+ (LOW ) IterTimerHook
1128
+ --------------------
1129
+ after_val_iter:
1130
+ (LOW ) IterTimerHook
1131
+ --------------------
1132
+ after_val_epoch:
1133
+ (VERY_LOW ) TextLoggerHook
1134
+ --------------------
1135
+ after_run:
1136
+ (VERY_LOW ) TextLoggerHook
1137
+ --------------------
1138
+ 2023-03-04 18:46:41,698 - mmseg - INFO - workflow: [('train', 1)], max: 80000 iters
1139
+ 2023-03-04 18:46:41,698 - mmseg - INFO - Checkpoints will be saved to /mnt/petrelfs/laizeqiang/mmseg-baseline/work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits by HardDiskBackend.
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_184631.log.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"env_info": "sys.platform: linux\nPython: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB\nCUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch\nNVCC: Cuda compilation tools, release 11.6, V11.6.124\nGCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)\nPyTorch: 1.13.1\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.6\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.14.1\nOpenCV: 4.7.0\nMMCV: 1.7.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.6\nMMSegmentation: 0.30.0+6749699", "seed": 1082958590, "exp_name": "ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits.py", "mmseg_version": "0.30.0+6749699", "config": "norm_cfg = dict(type='SyncBN', requires_grad=True)\ncheckpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'\nmodel = dict(\n type='EncoderDecoderFreeze',\n freeze_parameters=['backbone', 'decode_head'],\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',\n backbone=dict(\n type='MixVisionTransformerCustomInitWeights',\n in_channels=3,\n embed_dims=64,\n num_stages=4,\n num_layers=[3, 4, 6, 3],\n num_heads=[1, 2, 5, 8],\n patch_sizes=[7, 3, 3, 3],\n sr_ratios=[8, 4, 2, 1],\n out_indices=(0, 1, 2, 3),\n mlp_ratio=4,\n qkv_bias=True,\n drop_rate=0.0,\n attn_drop_rate=0.0,\n drop_path_rate=0.1,\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'\n ),\n decode_head=dict(\n type='SegformerHeadUnetFCHeadSingleStepLogits',\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',\n dim=128,\n out_dim=256,\n unet_channels=166,\n dim_mults=[1, 1, 1],\n cat_embedding_dim=16,\n in_channels=[64, 128, 320, 512],\n in_index=[0, 1, 2, 3],\n channels=256,\n dropout_ratio=0.1,\n num_classes=151,\n norm_cfg=dict(type='SyncBN', requires_grad=True),\n align_corners=False,\n ignore_index=0,\n loss_decode=dict(\n type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),\n train_cfg=dict(),\n test_cfg=dict(mode='whole'))\ndataset_type = 'ADE20K151Dataset'\ndata_root = 'data/ade/ADEChallengeData2016'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ncrop_size = (512, 512)\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=4,\n workers_per_gpu=4,\n train=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/training',\n ann_dir='annotations/training',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n ]),\n val=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]),\n test=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]))\nlog_config = dict(\n interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\ncudnn_benchmark = True\noptimizer = dict(\n type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)\noptimizer_config = dict()\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=1000,\n warmup_ratio=1e-06,\n step=10000,\n gamma=0.5,\n min_lr=1e-06,\n by_epoch=False)\nrunner = dict(type='IterBasedRunner', max_iters=80000)\ncheckpoint_config = dict(by_epoch=False, interval=8000)\nevaluation = dict(\n interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')\nwork_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits'\ngpu_ids = range(0, 8)\nauto_resume = True\ndevice = 'cuda'\nseed = 1082958590\n", "CLASSES": ["background", "wall", "building", "sky", "floor", "tree", "ceiling", "road", "bed ", "windowpane", "grass", "cabinet", "sidewalk", "person", "earth", "door", "table", "mountain", "plant", "curtain", "chair", "car", "water", "painting", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock", "wardrobe", "lamp", "bathtub", "railing", "cushion", "base", "box", "column", "signboard", "chest of drawers", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator", "grandstand", "path", "stairs", "runway", "case", "pool table", "pillow", "screen door", "stairway", "river", "bridge", "bookcase", "blind", "coffee table", "toilet", "flower", "book", "hill", "bench", "countertop", "stove", "palm", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel", "bus", "towel", "light", "truck", "tower", "chandelier", "awning", "streetlight", "booth", "television receiver", "airplane", "dirt track", "apparel", "pole", "land", "bannister", "escalator", "ottoman", "bottle", "buffet", "poster", "stage", "van", "ship", "fountain", "conveyer belt", "canopy", "washer", "plaything", "swimming pool", "stool", "barrel", "basket", "waterfall", "tent", "bag", "minibike", "cradle", "oven", "ball", "food", "step", "tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket", "sculpture", "hood", "sconce", "vase", "traffic light", "tray", "ashcan", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass", "clock", "flag"], "PALETTE": [[0, 0, 0], [120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], [102, 255, 0], [92, 0, 255]], "hook_msgs": {}}
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_190322.log ADDED
@@ -0,0 +1,1139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-03-04 19:03:22,024 - mmseg - INFO - Multi-processing start method is `None`
2
+ 2023-03-04 19:03:22,039 - mmseg - INFO - OpenCV num_threads is `128
3
+ 2023-03-04 19:03:22,039 - mmseg - INFO - OMP num threads is 1
4
+ 2023-03-04 19:03:22,100 - mmseg - INFO - Environment info:
5
+ ------------------------------------------------------------
6
+ sys.platform: linux
7
+ Python: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]
8
+ CUDA available: True
9
+ GPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB
10
+ CUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch
11
+ NVCC: Cuda compilation tools, release 11.6, V11.6.124
12
+ GCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)
13
+ PyTorch: 1.13.1
14
+ PyTorch compiling details: PyTorch built with:
15
+ - GCC 9.3
16
+ - C++ Version: 201402
17
+ - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications
18
+ - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
19
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
20
+ - LAPACK is enabled (usually provided by MKL)
21
+ - NNPACK is enabled
22
+ - CPU capability usage: AVX2
23
+ - CUDA Runtime 11.6
24
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37
25
+ - CuDNN 8.3.2 (built against CUDA 11.5)
26
+ - Magma 2.6.1
27
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
28
+
29
+ TorchVision: 0.14.1
30
+ OpenCV: 4.7.0
31
+ MMCV: 1.7.1
32
+ MMCV Compiler: GCC 9.3
33
+ MMCV CUDA Compiler: 11.6
34
+ MMSegmentation: 0.30.0+6749699
35
+ ------------------------------------------------------------
36
+
37
+ 2023-03-04 19:03:22,100 - mmseg - INFO - Distributed training: True
38
+ 2023-03-04 19:03:22,820 - mmseg - INFO - Config:
39
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
40
+ checkpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'
41
+ model = dict(
42
+ type='EncoderDecoderFreeze',
43
+ freeze_parameters=['backbone', 'decode_head'],
44
+ pretrained=
45
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
46
+ backbone=dict(
47
+ type='MixVisionTransformerCustomInitWeights',
48
+ in_channels=3,
49
+ embed_dims=64,
50
+ num_stages=4,
51
+ num_layers=[3, 4, 6, 3],
52
+ num_heads=[1, 2, 5, 8],
53
+ patch_sizes=[7, 3, 3, 3],
54
+ sr_ratios=[8, 4, 2, 1],
55
+ out_indices=(0, 1, 2, 3),
56
+ mlp_ratio=4,
57
+ qkv_bias=True,
58
+ drop_rate=0.0,
59
+ attn_drop_rate=0.0,
60
+ drop_path_rate=0.1),
61
+ decode_head=dict(
62
+ type='SegformerHeadUnetFCHeadSingleStepLogits',
63
+ pretrained=
64
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
65
+ dim=128,
66
+ out_dim=256,
67
+ unet_channels=166,
68
+ dim_mults=[1, 1, 1],
69
+ cat_embedding_dim=16,
70
+ in_channels=[64, 128, 320, 512],
71
+ in_index=[0, 1, 2, 3],
72
+ channels=256,
73
+ dropout_ratio=0.1,
74
+ num_classes=151,
75
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
76
+ align_corners=False,
77
+ ignore_index=0,
78
+ loss_decode=dict(
79
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
80
+ train_cfg=dict(),
81
+ test_cfg=dict(mode='whole'))
82
+ dataset_type = 'ADE20K151Dataset'
83
+ data_root = 'data/ade/ADEChallengeData2016'
84
+ img_norm_cfg = dict(
85
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
86
+ crop_size = (512, 512)
87
+ train_pipeline = [
88
+ dict(type='LoadImageFromFile'),
89
+ dict(type='LoadAnnotations', reduce_zero_label=False),
90
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
91
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
92
+ dict(type='RandomFlip', prob=0.5),
93
+ dict(type='PhotoMetricDistortion'),
94
+ dict(
95
+ type='Normalize',
96
+ mean=[123.675, 116.28, 103.53],
97
+ std=[58.395, 57.12, 57.375],
98
+ to_rgb=True),
99
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
100
+ dict(type='DefaultFormatBundle'),
101
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
102
+ ]
103
+ test_pipeline = [
104
+ dict(type='LoadImageFromFile'),
105
+ dict(
106
+ type='MultiScaleFlipAug',
107
+ img_scale=(2048, 512),
108
+ flip=False,
109
+ transforms=[
110
+ dict(type='Resize', keep_ratio=True),
111
+ dict(type='RandomFlip'),
112
+ dict(
113
+ type='Normalize',
114
+ mean=[123.675, 116.28, 103.53],
115
+ std=[58.395, 57.12, 57.375],
116
+ to_rgb=True),
117
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
118
+ dict(type='ImageToTensor', keys=['img']),
119
+ dict(type='Collect', keys=['img'])
120
+ ])
121
+ ]
122
+ data = dict(
123
+ samples_per_gpu=4,
124
+ workers_per_gpu=4,
125
+ train=dict(
126
+ type='ADE20K151Dataset',
127
+ data_root='data/ade/ADEChallengeData2016',
128
+ img_dir='images/training',
129
+ ann_dir='annotations/training',
130
+ pipeline=[
131
+ dict(type='LoadImageFromFile'),
132
+ dict(type='LoadAnnotations', reduce_zero_label=False),
133
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
134
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
135
+ dict(type='RandomFlip', prob=0.5),
136
+ dict(type='PhotoMetricDistortion'),
137
+ dict(
138
+ type='Normalize',
139
+ mean=[123.675, 116.28, 103.53],
140
+ std=[58.395, 57.12, 57.375],
141
+ to_rgb=True),
142
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
143
+ dict(type='DefaultFormatBundle'),
144
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
145
+ ]),
146
+ val=dict(
147
+ type='ADE20K151Dataset',
148
+ data_root='data/ade/ADEChallengeData2016',
149
+ img_dir='images/validation',
150
+ ann_dir='annotations/validation',
151
+ pipeline=[
152
+ dict(type='LoadImageFromFile'),
153
+ dict(
154
+ type='MultiScaleFlipAug',
155
+ img_scale=(2048, 512),
156
+ flip=False,
157
+ transforms=[
158
+ dict(type='Resize', keep_ratio=True),
159
+ dict(type='RandomFlip'),
160
+ dict(
161
+ type='Normalize',
162
+ mean=[123.675, 116.28, 103.53],
163
+ std=[58.395, 57.12, 57.375],
164
+ to_rgb=True),
165
+ dict(
166
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
167
+ dict(type='ImageToTensor', keys=['img']),
168
+ dict(type='Collect', keys=['img'])
169
+ ])
170
+ ]),
171
+ test=dict(
172
+ type='ADE20K151Dataset',
173
+ data_root='data/ade/ADEChallengeData2016',
174
+ img_dir='images/validation',
175
+ ann_dir='annotations/validation',
176
+ pipeline=[
177
+ dict(type='LoadImageFromFile'),
178
+ dict(
179
+ type='MultiScaleFlipAug',
180
+ img_scale=(2048, 512),
181
+ flip=False,
182
+ transforms=[
183
+ dict(type='Resize', keep_ratio=True),
184
+ dict(type='RandomFlip'),
185
+ dict(
186
+ type='Normalize',
187
+ mean=[123.675, 116.28, 103.53],
188
+ std=[58.395, 57.12, 57.375],
189
+ to_rgb=True),
190
+ dict(
191
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
192
+ dict(type='ImageToTensor', keys=['img']),
193
+ dict(type='Collect', keys=['img'])
194
+ ])
195
+ ]))
196
+ log_config = dict(
197
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
198
+ dist_params = dict(backend='nccl')
199
+ log_level = 'INFO'
200
+ load_from = None
201
+ resume_from = None
202
+ workflow = [('train', 1)]
203
+ cudnn_benchmark = True
204
+ optimizer = dict(
205
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
206
+ optimizer_config = dict()
207
+ lr_config = dict(
208
+ policy='step',
209
+ warmup='linear',
210
+ warmup_iters=1000,
211
+ warmup_ratio=1e-06,
212
+ step=10000,
213
+ gamma=0.5,
214
+ min_lr=1e-06,
215
+ by_epoch=False)
216
+ runner = dict(type='IterBasedRunner', max_iters=80000)
217
+ checkpoint_config = dict(by_epoch=False, interval=8000)
218
+ evaluation = dict(
219
+ interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')
220
+ work_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits'
221
+ gpu_ids = range(0, 8)
222
+ auto_resume = True
223
+
224
+ 2023-03-04 19:03:27,162 - mmseg - INFO - Set random seed to 1480177113, deterministic: False
225
+ 2023-03-04 19:03:27,413 - mmseg - INFO - Parameters in backbone freezed!
226
+ 2023-03-04 19:03:27,414 - mmseg - INFO - Trainable parameters in SegformerHeadUnetFCHeadSingleStep: ['unet.init_conv.weight', 'unet.init_conv.bias', 'unet.time_mlp.1.weight', 'unet.time_mlp.1.bias', 'unet.time_mlp.3.weight', 'unet.time_mlp.3.bias', 'unet.downs.0.0.mlp.1.weight', 'unet.downs.0.0.mlp.1.bias', 'unet.downs.0.0.block1.proj.weight', 'unet.downs.0.0.block1.proj.bias', 'unet.downs.0.0.block1.norm.weight', 'unet.downs.0.0.block1.norm.bias', 'unet.downs.0.0.block2.proj.weight', 'unet.downs.0.0.block2.proj.bias', 'unet.downs.0.0.block2.norm.weight', 'unet.downs.0.0.block2.norm.bias', 'unet.downs.0.1.mlp.1.weight', 'unet.downs.0.1.mlp.1.bias', 'unet.downs.0.1.block1.proj.weight', 'unet.downs.0.1.block1.proj.bias', 'unet.downs.0.1.block1.norm.weight', 'unet.downs.0.1.block1.norm.bias', 'unet.downs.0.1.block2.proj.weight', 'unet.downs.0.1.block2.proj.bias', 'unet.downs.0.1.block2.norm.weight', 'unet.downs.0.1.block2.norm.bias', 'unet.downs.0.2.fn.fn.to_qkv.weight', 'unet.downs.0.2.fn.fn.to_out.0.weight', 'unet.downs.0.2.fn.fn.to_out.0.bias', 'unet.downs.0.2.fn.fn.to_out.1.g', 'unet.downs.0.2.fn.norm.g', 'unet.downs.0.3.weight', 'unet.downs.0.3.bias', 'unet.downs.1.0.mlp.1.weight', 'unet.downs.1.0.mlp.1.bias', 'unet.downs.1.0.block1.proj.weight', 'unet.downs.1.0.block1.proj.bias', 'unet.downs.1.0.block1.norm.weight', 'unet.downs.1.0.block1.norm.bias', 'unet.downs.1.0.block2.proj.weight', 'unet.downs.1.0.block2.proj.bias', 'unet.downs.1.0.block2.norm.weight', 'unet.downs.1.0.block2.norm.bias', 'unet.downs.1.1.mlp.1.weight', 'unet.downs.1.1.mlp.1.bias', 'unet.downs.1.1.block1.proj.weight', 'unet.downs.1.1.block1.proj.bias', 'unet.downs.1.1.block1.norm.weight', 'unet.downs.1.1.block1.norm.bias', 'unet.downs.1.1.block2.proj.weight', 'unet.downs.1.1.block2.proj.bias', 'unet.downs.1.1.block2.norm.weight', 'unet.downs.1.1.block2.norm.bias', 'unet.downs.1.2.fn.fn.to_qkv.weight', 'unet.downs.1.2.fn.fn.to_out.0.weight', 'unet.downs.1.2.fn.fn.to_out.0.bias', 'unet.downs.1.2.fn.fn.to_out.1.g', 'unet.downs.1.2.fn.norm.g', 'unet.downs.1.3.weight', 'unet.downs.1.3.bias', 'unet.downs.2.0.mlp.1.weight', 'unet.downs.2.0.mlp.1.bias', 'unet.downs.2.0.block1.proj.weight', 'unet.downs.2.0.block1.proj.bias', 'unet.downs.2.0.block1.norm.weight', 'unet.downs.2.0.block1.norm.bias', 'unet.downs.2.0.block2.proj.weight', 'unet.downs.2.0.block2.proj.bias', 'unet.downs.2.0.block2.norm.weight', 'unet.downs.2.0.block2.norm.bias', 'unet.downs.2.1.mlp.1.weight', 'unet.downs.2.1.mlp.1.bias', 'unet.downs.2.1.block1.proj.weight', 'unet.downs.2.1.block1.proj.bias', 'unet.downs.2.1.block1.norm.weight', 'unet.downs.2.1.block1.norm.bias', 'unet.downs.2.1.block2.proj.weight', 'unet.downs.2.1.block2.proj.bias', 'unet.downs.2.1.block2.norm.weight', 'unet.downs.2.1.block2.norm.bias', 'unet.downs.2.2.fn.fn.to_qkv.weight', 'unet.downs.2.2.fn.fn.to_out.0.weight', 'unet.downs.2.2.fn.fn.to_out.0.bias', 'unet.downs.2.2.fn.fn.to_out.1.g', 'unet.downs.2.2.fn.norm.g', 'unet.downs.2.3.weight', 'unet.downs.2.3.bias', 'unet.ups.0.0.mlp.1.weight', 'unet.ups.0.0.mlp.1.bias', 'unet.ups.0.0.block1.proj.weight', 'unet.ups.0.0.block1.proj.bias', 'unet.ups.0.0.block1.norm.weight', 'unet.ups.0.0.block1.norm.bias', 'unet.ups.0.0.block2.proj.weight', 'unet.ups.0.0.block2.proj.bias', 'unet.ups.0.0.block2.norm.weight', 'unet.ups.0.0.block2.norm.bias', 'unet.ups.0.0.res_conv.weight', 'unet.ups.0.0.res_conv.bias', 'unet.ups.0.1.mlp.1.weight', 'unet.ups.0.1.mlp.1.bias', 'unet.ups.0.1.block1.proj.weight', 'unet.ups.0.1.block1.proj.bias', 'unet.ups.0.1.block1.norm.weight', 'unet.ups.0.1.block1.norm.bias', 'unet.ups.0.1.block2.proj.weight', 'unet.ups.0.1.block2.proj.bias', 'unet.ups.0.1.block2.norm.weight', 'unet.ups.0.1.block2.norm.bias', 'unet.ups.0.1.res_conv.weight', 'unet.ups.0.1.res_conv.bias', 'unet.ups.0.2.fn.fn.to_qkv.weight', 'unet.ups.0.2.fn.fn.to_out.0.weight', 'unet.ups.0.2.fn.fn.to_out.0.bias', 'unet.ups.0.2.fn.fn.to_out.1.g', 'unet.ups.0.2.fn.norm.g', 'unet.ups.0.3.1.weight', 'unet.ups.0.3.1.bias', 'unet.ups.1.0.mlp.1.weight', 'unet.ups.1.0.mlp.1.bias', 'unet.ups.1.0.block1.proj.weight', 'unet.ups.1.0.block1.proj.bias', 'unet.ups.1.0.block1.norm.weight', 'unet.ups.1.0.block1.norm.bias', 'unet.ups.1.0.block2.proj.weight', 'unet.ups.1.0.block2.proj.bias', 'unet.ups.1.0.block2.norm.weight', 'unet.ups.1.0.block2.norm.bias', 'unet.ups.1.0.res_conv.weight', 'unet.ups.1.0.res_conv.bias', 'unet.ups.1.1.mlp.1.weight', 'unet.ups.1.1.mlp.1.bias', 'unet.ups.1.1.block1.proj.weight', 'unet.ups.1.1.block1.proj.bias', 'unet.ups.1.1.block1.norm.weight', 'unet.ups.1.1.block1.norm.bias', 'unet.ups.1.1.block2.proj.weight', 'unet.ups.1.1.block2.proj.bias', 'unet.ups.1.1.block2.norm.weight', 'unet.ups.1.1.block2.norm.bias', 'unet.ups.1.1.res_conv.weight', 'unet.ups.1.1.res_conv.bias', 'unet.ups.1.2.fn.fn.to_qkv.weight', 'unet.ups.1.2.fn.fn.to_out.0.weight', 'unet.ups.1.2.fn.fn.to_out.0.bias', 'unet.ups.1.2.fn.fn.to_out.1.g', 'unet.ups.1.2.fn.norm.g', 'unet.ups.1.3.1.weight', 'unet.ups.1.3.1.bias', 'unet.ups.2.0.mlp.1.weight', 'unet.ups.2.0.mlp.1.bias', 'unet.ups.2.0.block1.proj.weight', 'unet.ups.2.0.block1.proj.bias', 'unet.ups.2.0.block1.norm.weight', 'unet.ups.2.0.block1.norm.bias', 'unet.ups.2.0.block2.proj.weight', 'unet.ups.2.0.block2.proj.bias', 'unet.ups.2.0.block2.norm.weight', 'unet.ups.2.0.block2.norm.bias', 'unet.ups.2.0.res_conv.weight', 'unet.ups.2.0.res_conv.bias', 'unet.ups.2.1.mlp.1.weight', 'unet.ups.2.1.mlp.1.bias', 'unet.ups.2.1.block1.proj.weight', 'unet.ups.2.1.block1.proj.bias', 'unet.ups.2.1.block1.norm.weight', 'unet.ups.2.1.block1.norm.bias', 'unet.ups.2.1.block2.proj.weight', 'unet.ups.2.1.block2.proj.bias', 'unet.ups.2.1.block2.norm.weight', 'unet.ups.2.1.block2.norm.bias', 'unet.ups.2.1.res_conv.weight', 'unet.ups.2.1.res_conv.bias', 'unet.ups.2.2.fn.fn.to_qkv.weight', 'unet.ups.2.2.fn.fn.to_out.0.weight', 'unet.ups.2.2.fn.fn.to_out.0.bias', 'unet.ups.2.2.fn.fn.to_out.1.g', 'unet.ups.2.2.fn.norm.g', 'unet.ups.2.3.weight', 'unet.ups.2.3.bias', 'unet.mid_block1.mlp.1.weight', 'unet.mid_block1.mlp.1.bias', 'unet.mid_block1.block1.proj.weight', 'unet.mid_block1.block1.proj.bias', 'unet.mid_block1.block1.norm.weight', 'unet.mid_block1.block1.norm.bias', 'unet.mid_block1.block2.proj.weight', 'unet.mid_block1.block2.proj.bias', 'unet.mid_block1.block2.norm.weight', 'unet.mid_block1.block2.norm.bias', 'unet.mid_attn.fn.fn.to_qkv.weight', 'unet.mid_attn.fn.fn.to_out.weight', 'unet.mid_attn.fn.fn.to_out.bias', 'unet.mid_attn.fn.norm.g', 'unet.mid_block2.mlp.1.weight', 'unet.mid_block2.mlp.1.bias', 'unet.mid_block2.block1.proj.weight', 'unet.mid_block2.block1.proj.bias', 'unet.mid_block2.block1.norm.weight', 'unet.mid_block2.block1.norm.bias', 'unet.mid_block2.block2.proj.weight', 'unet.mid_block2.block2.proj.bias', 'unet.mid_block2.block2.norm.weight', 'unet.mid_block2.block2.norm.bias', 'unet.final_res_block.mlp.1.weight', 'unet.final_res_block.mlp.1.bias', 'unet.final_res_block.block1.proj.weight', 'unet.final_res_block.block1.proj.bias', 'unet.final_res_block.block1.norm.weight', 'unet.final_res_block.block1.norm.bias', 'unet.final_res_block.block2.proj.weight', 'unet.final_res_block.block2.proj.bias', 'unet.final_res_block.block2.norm.weight', 'unet.final_res_block.block2.norm.bias', 'unet.final_res_block.res_conv.weight', 'unet.final_res_block.res_conv.bias', 'unet.final_conv.weight', 'unet.final_conv.bias', 'conv_seg_new.weight', 'conv_seg_new.bias']
227
+ 2023-03-04 19:03:27,414 - mmseg - INFO - Parameters in decode_head freezed!
228
+ 2023-03-04 19:03:27,436 - mmseg - INFO - load checkpoint from local path: pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth
229
+ 2023-03-04 19:03:27,682 - mmseg - WARNING - The model and loaded state dict do not match exactly
230
+
231
+ unexpected key in source state_dict: decode_head.conv_seg.weight, decode_head.conv_seg.bias, decode_head.convs.0.conv.weight, decode_head.convs.0.bn.weight, decode_head.convs.0.bn.bias, decode_head.convs.0.bn.running_mean, decode_head.convs.0.bn.running_var, decode_head.convs.0.bn.num_batches_tracked, decode_head.convs.1.conv.weight, decode_head.convs.1.bn.weight, decode_head.convs.1.bn.bias, decode_head.convs.1.bn.running_mean, decode_head.convs.1.bn.running_var, decode_head.convs.1.bn.num_batches_tracked, decode_head.convs.2.conv.weight, decode_head.convs.2.bn.weight, decode_head.convs.2.bn.bias, decode_head.convs.2.bn.running_mean, decode_head.convs.2.bn.running_var, decode_head.convs.2.bn.num_batches_tracked, decode_head.convs.3.conv.weight, decode_head.convs.3.bn.weight, decode_head.convs.3.bn.bias, decode_head.convs.3.bn.running_mean, decode_head.convs.3.bn.running_var, decode_head.convs.3.bn.num_batches_tracked, decode_head.fusion_conv.conv.weight, decode_head.fusion_conv.bn.weight, decode_head.fusion_conv.bn.bias, decode_head.fusion_conv.bn.running_mean, decode_head.fusion_conv.bn.running_var, decode_head.fusion_conv.bn.num_batches_tracked
232
+
233
+ 2023-03-04 19:03:27,695 - mmseg - INFO - load checkpoint from local path: pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth
234
+ 2023-03-04 19:03:27,908 - mmseg - WARNING - The model and loaded state dict do not match exactly
235
+
236
+ unexpected key in source state_dict: backbone.layers.0.0.projection.weight, backbone.layers.0.0.projection.bias, backbone.layers.0.0.norm.weight, backbone.layers.0.0.norm.bias, backbone.layers.0.1.0.norm1.weight, backbone.layers.0.1.0.norm1.bias, backbone.layers.0.1.0.attn.attn.in_proj_weight, backbone.layers.0.1.0.attn.attn.in_proj_bias, backbone.layers.0.1.0.attn.attn.out_proj.weight, backbone.layers.0.1.0.attn.attn.out_proj.bias, backbone.layers.0.1.0.attn.sr.weight, backbone.layers.0.1.0.attn.sr.bias, backbone.layers.0.1.0.attn.norm.weight, backbone.layers.0.1.0.attn.norm.bias, backbone.layers.0.1.0.norm2.weight, backbone.layers.0.1.0.norm2.bias, backbone.layers.0.1.0.ffn.layers.0.weight, backbone.layers.0.1.0.ffn.layers.0.bias, backbone.layers.0.1.0.ffn.layers.1.weight, backbone.layers.0.1.0.ffn.layers.1.bias, backbone.layers.0.1.0.ffn.layers.4.weight, backbone.layers.0.1.0.ffn.layers.4.bias, backbone.layers.0.1.1.norm1.weight, backbone.layers.0.1.1.norm1.bias, backbone.layers.0.1.1.attn.attn.in_proj_weight, backbone.layers.0.1.1.attn.attn.in_proj_bias, backbone.layers.0.1.1.attn.attn.out_proj.weight, backbone.layers.0.1.1.attn.attn.out_proj.bias, backbone.layers.0.1.1.attn.sr.weight, backbone.layers.0.1.1.attn.sr.bias, backbone.layers.0.1.1.attn.norm.weight, backbone.layers.0.1.1.attn.norm.bias, backbone.layers.0.1.1.norm2.weight, backbone.layers.0.1.1.norm2.bias, backbone.layers.0.1.1.ffn.layers.0.weight, backbone.layers.0.1.1.ffn.layers.0.bias, backbone.layers.0.1.1.ffn.layers.1.weight, backbone.layers.0.1.1.ffn.layers.1.bias, backbone.layers.0.1.1.ffn.layers.4.weight, backbone.layers.0.1.1.ffn.layers.4.bias, backbone.layers.0.1.2.norm1.weight, backbone.layers.0.1.2.norm1.bias, backbone.layers.0.1.2.attn.attn.in_proj_weight, backbone.layers.0.1.2.attn.attn.in_proj_bias, backbone.layers.0.1.2.attn.attn.out_proj.weight, backbone.layers.0.1.2.attn.attn.out_proj.bias, backbone.layers.0.1.2.attn.sr.weight, backbone.layers.0.1.2.attn.sr.bias, backbone.layers.0.1.2.attn.norm.weight, backbone.layers.0.1.2.attn.norm.bias, backbone.layers.0.1.2.norm2.weight, backbone.layers.0.1.2.norm2.bias, backbone.layers.0.1.2.ffn.layers.0.weight, backbone.layers.0.1.2.ffn.layers.0.bias, backbone.layers.0.1.2.ffn.layers.1.weight, backbone.layers.0.1.2.ffn.layers.1.bias, backbone.layers.0.1.2.ffn.layers.4.weight, backbone.layers.0.1.2.ffn.layers.4.bias, backbone.layers.0.2.weight, backbone.layers.0.2.bias, backbone.layers.1.0.projection.weight, backbone.layers.1.0.projection.bias, backbone.layers.1.0.norm.weight, backbone.layers.1.0.norm.bias, backbone.layers.1.1.0.norm1.weight, backbone.layers.1.1.0.norm1.bias, backbone.layers.1.1.0.attn.attn.in_proj_weight, backbone.layers.1.1.0.attn.attn.in_proj_bias, backbone.layers.1.1.0.attn.attn.out_proj.weight, backbone.layers.1.1.0.attn.attn.out_proj.bias, backbone.layers.1.1.0.attn.sr.weight, backbone.layers.1.1.0.attn.sr.bias, backbone.layers.1.1.0.attn.norm.weight, backbone.layers.1.1.0.attn.norm.bias, backbone.layers.1.1.0.norm2.weight, backbone.layers.1.1.0.norm2.bias, backbone.layers.1.1.0.ffn.layers.0.weight, backbone.layers.1.1.0.ffn.layers.0.bias, backbone.layers.1.1.0.ffn.layers.1.weight, backbone.layers.1.1.0.ffn.layers.1.bias, backbone.layers.1.1.0.ffn.layers.4.weight, backbone.layers.1.1.0.ffn.layers.4.bias, backbone.layers.1.1.1.norm1.weight, backbone.layers.1.1.1.norm1.bias, backbone.layers.1.1.1.attn.attn.in_proj_weight, backbone.layers.1.1.1.attn.attn.in_proj_bias, backbone.layers.1.1.1.attn.attn.out_proj.weight, backbone.layers.1.1.1.attn.attn.out_proj.bias, backbone.layers.1.1.1.attn.sr.weight, backbone.layers.1.1.1.attn.sr.bias, backbone.layers.1.1.1.attn.norm.weight, backbone.layers.1.1.1.attn.norm.bias, backbone.layers.1.1.1.norm2.weight, backbone.layers.1.1.1.norm2.bias, backbone.layers.1.1.1.ffn.layers.0.weight, backbone.layers.1.1.1.ffn.layers.0.bias, backbone.layers.1.1.1.ffn.layers.1.weight, backbone.layers.1.1.1.ffn.layers.1.bias, backbone.layers.1.1.1.ffn.layers.4.weight, backbone.layers.1.1.1.ffn.layers.4.bias, backbone.layers.1.1.2.norm1.weight, backbone.layers.1.1.2.norm1.bias, backbone.layers.1.1.2.attn.attn.in_proj_weight, backbone.layers.1.1.2.attn.attn.in_proj_bias, backbone.layers.1.1.2.attn.attn.out_proj.weight, backbone.layers.1.1.2.attn.attn.out_proj.bias, backbone.layers.1.1.2.attn.sr.weight, backbone.layers.1.1.2.attn.sr.bias, backbone.layers.1.1.2.attn.norm.weight, backbone.layers.1.1.2.attn.norm.bias, backbone.layers.1.1.2.norm2.weight, backbone.layers.1.1.2.norm2.bias, backbone.layers.1.1.2.ffn.layers.0.weight, backbone.layers.1.1.2.ffn.layers.0.bias, backbone.layers.1.1.2.ffn.layers.1.weight, backbone.layers.1.1.2.ffn.layers.1.bias, backbone.layers.1.1.2.ffn.layers.4.weight, backbone.layers.1.1.2.ffn.layers.4.bias, backbone.layers.1.1.3.norm1.weight, backbone.layers.1.1.3.norm1.bias, backbone.layers.1.1.3.attn.attn.in_proj_weight, backbone.layers.1.1.3.attn.attn.in_proj_bias, backbone.layers.1.1.3.attn.attn.out_proj.weight, backbone.layers.1.1.3.attn.attn.out_proj.bias, backbone.layers.1.1.3.attn.sr.weight, backbone.layers.1.1.3.attn.sr.bias, backbone.layers.1.1.3.attn.norm.weight, backbone.layers.1.1.3.attn.norm.bias, backbone.layers.1.1.3.norm2.weight, backbone.layers.1.1.3.norm2.bias, backbone.layers.1.1.3.ffn.layers.0.weight, backbone.layers.1.1.3.ffn.layers.0.bias, backbone.layers.1.1.3.ffn.layers.1.weight, backbone.layers.1.1.3.ffn.layers.1.bias, backbone.layers.1.1.3.ffn.layers.4.weight, backbone.layers.1.1.3.ffn.layers.4.bias, backbone.layers.1.2.weight, backbone.layers.1.2.bias, backbone.layers.2.0.projection.weight, backbone.layers.2.0.projection.bias, backbone.layers.2.0.norm.weight, backbone.layers.2.0.norm.bias, backbone.layers.2.1.0.norm1.weight, backbone.layers.2.1.0.norm1.bias, backbone.layers.2.1.0.attn.attn.in_proj_weight, backbone.layers.2.1.0.attn.attn.in_proj_bias, backbone.layers.2.1.0.attn.attn.out_proj.weight, backbone.layers.2.1.0.attn.attn.out_proj.bias, backbone.layers.2.1.0.attn.sr.weight, backbone.layers.2.1.0.attn.sr.bias, backbone.layers.2.1.0.attn.norm.weight, backbone.layers.2.1.0.attn.norm.bias, backbone.layers.2.1.0.norm2.weight, backbone.layers.2.1.0.norm2.bias, backbone.layers.2.1.0.ffn.layers.0.weight, backbone.layers.2.1.0.ffn.layers.0.bias, backbone.layers.2.1.0.ffn.layers.1.weight, backbone.layers.2.1.0.ffn.layers.1.bias, backbone.layers.2.1.0.ffn.layers.4.weight, backbone.layers.2.1.0.ffn.layers.4.bias, backbone.layers.2.1.1.norm1.weight, backbone.layers.2.1.1.norm1.bias, backbone.layers.2.1.1.attn.attn.in_proj_weight, backbone.layers.2.1.1.attn.attn.in_proj_bias, backbone.layers.2.1.1.attn.attn.out_proj.weight, backbone.layers.2.1.1.attn.attn.out_proj.bias, backbone.layers.2.1.1.attn.sr.weight, backbone.layers.2.1.1.attn.sr.bias, backbone.layers.2.1.1.attn.norm.weight, backbone.layers.2.1.1.attn.norm.bias, backbone.layers.2.1.1.norm2.weight, backbone.layers.2.1.1.norm2.bias, backbone.layers.2.1.1.ffn.layers.0.weight, backbone.layers.2.1.1.ffn.layers.0.bias, backbone.layers.2.1.1.ffn.layers.1.weight, backbone.layers.2.1.1.ffn.layers.1.bias, backbone.layers.2.1.1.ffn.layers.4.weight, backbone.layers.2.1.1.ffn.layers.4.bias, backbone.layers.2.1.2.norm1.weight, backbone.layers.2.1.2.norm1.bias, backbone.layers.2.1.2.attn.attn.in_proj_weight, backbone.layers.2.1.2.attn.attn.in_proj_bias, backbone.layers.2.1.2.attn.attn.out_proj.weight, backbone.layers.2.1.2.attn.attn.out_proj.bias, backbone.layers.2.1.2.attn.sr.weight, backbone.layers.2.1.2.attn.sr.bias, backbone.layers.2.1.2.attn.norm.weight, backbone.layers.2.1.2.attn.norm.bias, backbone.layers.2.1.2.norm2.weight, backbone.layers.2.1.2.norm2.bias, backbone.layers.2.1.2.ffn.layers.0.weight, backbone.layers.2.1.2.ffn.layers.0.bias, backbone.layers.2.1.2.ffn.layers.1.weight, backbone.layers.2.1.2.ffn.layers.1.bias, backbone.layers.2.1.2.ffn.layers.4.weight, backbone.layers.2.1.2.ffn.layers.4.bias, backbone.layers.2.1.3.norm1.weight, backbone.layers.2.1.3.norm1.bias, backbone.layers.2.1.3.attn.attn.in_proj_weight, backbone.layers.2.1.3.attn.attn.in_proj_bias, backbone.layers.2.1.3.attn.attn.out_proj.weight, backbone.layers.2.1.3.attn.attn.out_proj.bias, backbone.layers.2.1.3.attn.sr.weight, backbone.layers.2.1.3.attn.sr.bias, backbone.layers.2.1.3.attn.norm.weight, backbone.layers.2.1.3.attn.norm.bias, backbone.layers.2.1.3.norm2.weight, backbone.layers.2.1.3.norm2.bias, backbone.layers.2.1.3.ffn.layers.0.weight, backbone.layers.2.1.3.ffn.layers.0.bias, backbone.layers.2.1.3.ffn.layers.1.weight, backbone.layers.2.1.3.ffn.layers.1.bias, backbone.layers.2.1.3.ffn.layers.4.weight, backbone.layers.2.1.3.ffn.layers.4.bias, backbone.layers.2.1.4.norm1.weight, backbone.layers.2.1.4.norm1.bias, backbone.layers.2.1.4.attn.attn.in_proj_weight, backbone.layers.2.1.4.attn.attn.in_proj_bias, backbone.layers.2.1.4.attn.attn.out_proj.weight, backbone.layers.2.1.4.attn.attn.out_proj.bias, backbone.layers.2.1.4.attn.sr.weight, backbone.layers.2.1.4.attn.sr.bias, backbone.layers.2.1.4.attn.norm.weight, backbone.layers.2.1.4.attn.norm.bias, backbone.layers.2.1.4.norm2.weight, backbone.layers.2.1.4.norm2.bias, backbone.layers.2.1.4.ffn.layers.0.weight, backbone.layers.2.1.4.ffn.layers.0.bias, backbone.layers.2.1.4.ffn.layers.1.weight, backbone.layers.2.1.4.ffn.layers.1.bias, backbone.layers.2.1.4.ffn.layers.4.weight, backbone.layers.2.1.4.ffn.layers.4.bias, backbone.layers.2.1.5.norm1.weight, backbone.layers.2.1.5.norm1.bias, backbone.layers.2.1.5.attn.attn.in_proj_weight, backbone.layers.2.1.5.attn.attn.in_proj_bias, backbone.layers.2.1.5.attn.attn.out_proj.weight, backbone.layers.2.1.5.attn.attn.out_proj.bias, backbone.layers.2.1.5.attn.sr.weight, backbone.layers.2.1.5.attn.sr.bias, backbone.layers.2.1.5.attn.norm.weight, backbone.layers.2.1.5.attn.norm.bias, backbone.layers.2.1.5.norm2.weight, backbone.layers.2.1.5.norm2.bias, backbone.layers.2.1.5.ffn.layers.0.weight, backbone.layers.2.1.5.ffn.layers.0.bias, backbone.layers.2.1.5.ffn.layers.1.weight, backbone.layers.2.1.5.ffn.layers.1.bias, backbone.layers.2.1.5.ffn.layers.4.weight, backbone.layers.2.1.5.ffn.layers.4.bias, backbone.layers.2.2.weight, backbone.layers.2.2.bias, backbone.layers.3.0.projection.weight, backbone.layers.3.0.projection.bias, backbone.layers.3.0.norm.weight, backbone.layers.3.0.norm.bias, backbone.layers.3.1.0.norm1.weight, backbone.layers.3.1.0.norm1.bias, backbone.layers.3.1.0.attn.attn.in_proj_weight, backbone.layers.3.1.0.attn.attn.in_proj_bias, backbone.layers.3.1.0.attn.attn.out_proj.weight, backbone.layers.3.1.0.attn.attn.out_proj.bias, backbone.layers.3.1.0.norm2.weight, backbone.layers.3.1.0.norm2.bias, backbone.layers.3.1.0.ffn.layers.0.weight, backbone.layers.3.1.0.ffn.layers.0.bias, backbone.layers.3.1.0.ffn.layers.1.weight, backbone.layers.3.1.0.ffn.layers.1.bias, backbone.layers.3.1.0.ffn.layers.4.weight, backbone.layers.3.1.0.ffn.layers.4.bias, backbone.layers.3.1.1.norm1.weight, backbone.layers.3.1.1.norm1.bias, backbone.layers.3.1.1.attn.attn.in_proj_weight, backbone.layers.3.1.1.attn.attn.in_proj_bias, backbone.layers.3.1.1.attn.attn.out_proj.weight, backbone.layers.3.1.1.attn.attn.out_proj.bias, backbone.layers.3.1.1.norm2.weight, backbone.layers.3.1.1.norm2.bias, backbone.layers.3.1.1.ffn.layers.0.weight, backbone.layers.3.1.1.ffn.layers.0.bias, backbone.layers.3.1.1.ffn.layers.1.weight, backbone.layers.3.1.1.ffn.layers.1.bias, backbone.layers.3.1.1.ffn.layers.4.weight, backbone.layers.3.1.1.ffn.layers.4.bias, backbone.layers.3.1.2.norm1.weight, backbone.layers.3.1.2.norm1.bias, backbone.layers.3.1.2.attn.attn.in_proj_weight, backbone.layers.3.1.2.attn.attn.in_proj_bias, backbone.layers.3.1.2.attn.attn.out_proj.weight, backbone.layers.3.1.2.attn.attn.out_proj.bias, backbone.layers.3.1.2.norm2.weight, backbone.layers.3.1.2.norm2.bias, backbone.layers.3.1.2.ffn.layers.0.weight, backbone.layers.3.1.2.ffn.layers.0.bias, backbone.layers.3.1.2.ffn.layers.1.weight, backbone.layers.3.1.2.ffn.layers.1.bias, backbone.layers.3.1.2.ffn.layers.4.weight, backbone.layers.3.1.2.ffn.layers.4.bias, backbone.layers.3.2.weight, backbone.layers.3.2.bias
237
+
238
+ missing keys in source state_dict: unet.init_conv.weight, unet.init_conv.bias, unet.time_mlp.1.weight, unet.time_mlp.1.bias, unet.time_mlp.3.weight, unet.time_mlp.3.bias, unet.downs.0.0.mlp.1.weight, unet.downs.0.0.mlp.1.bias, unet.downs.0.0.block1.proj.weight, unet.downs.0.0.block1.proj.bias, unet.downs.0.0.block1.norm.weight, unet.downs.0.0.block1.norm.bias, unet.downs.0.0.block2.proj.weight, unet.downs.0.0.block2.proj.bias, unet.downs.0.0.block2.norm.weight, unet.downs.0.0.block2.norm.bias, unet.downs.0.1.mlp.1.weight, unet.downs.0.1.mlp.1.bias, unet.downs.0.1.block1.proj.weight, unet.downs.0.1.block1.proj.bias, unet.downs.0.1.block1.norm.weight, unet.downs.0.1.block1.norm.bias, unet.downs.0.1.block2.proj.weight, unet.downs.0.1.block2.proj.bias, unet.downs.0.1.block2.norm.weight, unet.downs.0.1.block2.norm.bias, unet.downs.0.2.fn.fn.to_qkv.weight, unet.downs.0.2.fn.fn.to_out.0.weight, unet.downs.0.2.fn.fn.to_out.0.bias, unet.downs.0.2.fn.fn.to_out.1.g, unet.downs.0.2.fn.norm.g, unet.downs.0.3.weight, unet.downs.0.3.bias, unet.downs.1.0.mlp.1.weight, unet.downs.1.0.mlp.1.bias, unet.downs.1.0.block1.proj.weight, unet.downs.1.0.block1.proj.bias, unet.downs.1.0.block1.norm.weight, unet.downs.1.0.block1.norm.bias, unet.downs.1.0.block2.proj.weight, unet.downs.1.0.block2.proj.bias, unet.downs.1.0.block2.norm.weight, unet.downs.1.0.block2.norm.bias, unet.downs.1.1.mlp.1.weight, unet.downs.1.1.mlp.1.bias, unet.downs.1.1.block1.proj.weight, unet.downs.1.1.block1.proj.bias, unet.downs.1.1.block1.norm.weight, unet.downs.1.1.block1.norm.bias, unet.downs.1.1.block2.proj.weight, unet.downs.1.1.block2.proj.bias, unet.downs.1.1.block2.norm.weight, unet.downs.1.1.block2.norm.bias, unet.downs.1.2.fn.fn.to_qkv.weight, unet.downs.1.2.fn.fn.to_out.0.weight, unet.downs.1.2.fn.fn.to_out.0.bias, unet.downs.1.2.fn.fn.to_out.1.g, unet.downs.1.2.fn.norm.g, unet.downs.1.3.weight, unet.downs.1.3.bias, unet.downs.2.0.mlp.1.weight, unet.downs.2.0.mlp.1.bias, unet.downs.2.0.block1.proj.weight, unet.downs.2.0.block1.proj.bias, unet.downs.2.0.block1.norm.weight, unet.downs.2.0.block1.norm.bias, unet.downs.2.0.block2.proj.weight, unet.downs.2.0.block2.proj.bias, unet.downs.2.0.block2.norm.weight, unet.downs.2.0.block2.norm.bias, unet.downs.2.1.mlp.1.weight, unet.downs.2.1.mlp.1.bias, unet.downs.2.1.block1.proj.weight, unet.downs.2.1.block1.proj.bias, unet.downs.2.1.block1.norm.weight, unet.downs.2.1.block1.norm.bias, unet.downs.2.1.block2.proj.weight, unet.downs.2.1.block2.proj.bias, unet.downs.2.1.block2.norm.weight, unet.downs.2.1.block2.norm.bias, unet.downs.2.2.fn.fn.to_qkv.weight, unet.downs.2.2.fn.fn.to_out.0.weight, unet.downs.2.2.fn.fn.to_out.0.bias, unet.downs.2.2.fn.fn.to_out.1.g, unet.downs.2.2.fn.norm.g, unet.downs.2.3.weight, unet.downs.2.3.bias, unet.ups.0.0.mlp.1.weight, unet.ups.0.0.mlp.1.bias, unet.ups.0.0.block1.proj.weight, unet.ups.0.0.block1.proj.bias, unet.ups.0.0.block1.norm.weight, unet.ups.0.0.block1.norm.bias, unet.ups.0.0.block2.proj.weight, unet.ups.0.0.block2.proj.bias, unet.ups.0.0.block2.norm.weight, unet.ups.0.0.block2.norm.bias, unet.ups.0.0.res_conv.weight, unet.ups.0.0.res_conv.bias, unet.ups.0.1.mlp.1.weight, unet.ups.0.1.mlp.1.bias, unet.ups.0.1.block1.proj.weight, unet.ups.0.1.block1.proj.bias, unet.ups.0.1.block1.norm.weight, unet.ups.0.1.block1.norm.bias, unet.ups.0.1.block2.proj.weight, unet.ups.0.1.block2.proj.bias, unet.ups.0.1.block2.norm.weight, unet.ups.0.1.block2.norm.bias, unet.ups.0.1.res_conv.weight, unet.ups.0.1.res_conv.bias, unet.ups.0.2.fn.fn.to_qkv.weight, unet.ups.0.2.fn.fn.to_out.0.weight, unet.ups.0.2.fn.fn.to_out.0.bias, unet.ups.0.2.fn.fn.to_out.1.g, unet.ups.0.2.fn.norm.g, unet.ups.0.3.1.weight, unet.ups.0.3.1.bias, unet.ups.1.0.mlp.1.weight, unet.ups.1.0.mlp.1.bias, unet.ups.1.0.block1.proj.weight, unet.ups.1.0.block1.proj.bias, unet.ups.1.0.block1.norm.weight, unet.ups.1.0.block1.norm.bias, unet.ups.1.0.block2.proj.weight, unet.ups.1.0.block2.proj.bias, unet.ups.1.0.block2.norm.weight, unet.ups.1.0.block2.norm.bias, unet.ups.1.0.res_conv.weight, unet.ups.1.0.res_conv.bias, unet.ups.1.1.mlp.1.weight, unet.ups.1.1.mlp.1.bias, unet.ups.1.1.block1.proj.weight, unet.ups.1.1.block1.proj.bias, unet.ups.1.1.block1.norm.weight, unet.ups.1.1.block1.norm.bias, unet.ups.1.1.block2.proj.weight, unet.ups.1.1.block2.proj.bias, unet.ups.1.1.block2.norm.weight, unet.ups.1.1.block2.norm.bias, unet.ups.1.1.res_conv.weight, unet.ups.1.1.res_conv.bias, unet.ups.1.2.fn.fn.to_qkv.weight, unet.ups.1.2.fn.fn.to_out.0.weight, unet.ups.1.2.fn.fn.to_out.0.bias, unet.ups.1.2.fn.fn.to_out.1.g, unet.ups.1.2.fn.norm.g, unet.ups.1.3.1.weight, unet.ups.1.3.1.bias, unet.ups.2.0.mlp.1.weight, unet.ups.2.0.mlp.1.bias, unet.ups.2.0.block1.proj.weight, unet.ups.2.0.block1.proj.bias, unet.ups.2.0.block1.norm.weight, unet.ups.2.0.block1.norm.bias, unet.ups.2.0.block2.proj.weight, unet.ups.2.0.block2.proj.bias, unet.ups.2.0.block2.norm.weight, unet.ups.2.0.block2.norm.bias, unet.ups.2.0.res_conv.weight, unet.ups.2.0.res_conv.bias, unet.ups.2.1.mlp.1.weight, unet.ups.2.1.mlp.1.bias, unet.ups.2.1.block1.proj.weight, unet.ups.2.1.block1.proj.bias, unet.ups.2.1.block1.norm.weight, unet.ups.2.1.block1.norm.bias, unet.ups.2.1.block2.proj.weight, unet.ups.2.1.block2.proj.bias, unet.ups.2.1.block2.norm.weight, unet.ups.2.1.block2.norm.bias, unet.ups.2.1.res_conv.weight, unet.ups.2.1.res_conv.bias, unet.ups.2.2.fn.fn.to_qkv.weight, unet.ups.2.2.fn.fn.to_out.0.weight, unet.ups.2.2.fn.fn.to_out.0.bias, unet.ups.2.2.fn.fn.to_out.1.g, unet.ups.2.2.fn.norm.g, unet.ups.2.3.weight, unet.ups.2.3.bias, unet.mid_block1.mlp.1.weight, unet.mid_block1.mlp.1.bias, unet.mid_block1.block1.proj.weight, unet.mid_block1.block1.proj.bias, unet.mid_block1.block1.norm.weight, unet.mid_block1.block1.norm.bias, unet.mid_block1.block2.proj.weight, unet.mid_block1.block2.proj.bias, unet.mid_block1.block2.norm.weight, unet.mid_block1.block2.norm.bias, unet.mid_attn.fn.fn.to_qkv.weight, unet.mid_attn.fn.fn.to_out.weight, unet.mid_attn.fn.fn.to_out.bias, unet.mid_attn.fn.norm.g, unet.mid_block2.mlp.1.weight, unet.mid_block2.mlp.1.bias, unet.mid_block2.block1.proj.weight, unet.mid_block2.block1.proj.bias, unet.mid_block2.block1.norm.weight, unet.mid_block2.block1.norm.bias, unet.mid_block2.block2.proj.weight, unet.mid_block2.block2.proj.bias, unet.mid_block2.block2.norm.weight, unet.mid_block2.block2.norm.bias, unet.final_res_block.mlp.1.weight, unet.final_res_block.mlp.1.bias, unet.final_res_block.block1.proj.weight, unet.final_res_block.block1.proj.bias, unet.final_res_block.block1.norm.weight, unet.final_res_block.block1.norm.bias, unet.final_res_block.block2.proj.weight, unet.final_res_block.block2.proj.bias, unet.final_res_block.block2.norm.weight, unet.final_res_block.block2.norm.bias, unet.final_res_block.res_conv.weight, unet.final_res_block.res_conv.bias, unet.final_conv.weight, unet.final_conv.bias, conv_seg_new.weight, conv_seg_new.bias, embed.weight
239
+
240
+ 2023-03-04 19:03:27,934 - mmseg - INFO - EncoderDecoderFreeze(
241
+ (backbone): MixVisionTransformerCustomInitWeights(
242
+ (layers): ModuleList(
243
+ (0): ModuleList(
244
+ (0): PatchEmbed(
245
+ (projection): Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
246
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
247
+ )
248
+ (1): ModuleList(
249
+ (0): TransformerEncoderLayer(
250
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
251
+ (attn): EfficientMultiheadAttention(
252
+ (attn): MultiheadAttention(
253
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
254
+ )
255
+ (proj_drop): Dropout(p=0.0, inplace=False)
256
+ (dropout_layer): DropPath()
257
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
258
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
259
+ )
260
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
261
+ (ffn): MixFFN(
262
+ (activate): GELU(approximate='none')
263
+ (layers): Sequential(
264
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
265
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
266
+ (2): GELU(approximate='none')
267
+ (3): Dropout(p=0.0, inplace=False)
268
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
269
+ (5): Dropout(p=0.0, inplace=False)
270
+ )
271
+ (dropout_layer): DropPath()
272
+ )
273
+ )
274
+ (1): TransformerEncoderLayer(
275
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
276
+ (attn): EfficientMultiheadAttention(
277
+ (attn): MultiheadAttention(
278
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
279
+ )
280
+ (proj_drop): Dropout(p=0.0, inplace=False)
281
+ (dropout_layer): DropPath()
282
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
283
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
284
+ )
285
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
286
+ (ffn): MixFFN(
287
+ (activate): GELU(approximate='none')
288
+ (layers): Sequential(
289
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
290
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
291
+ (2): GELU(approximate='none')
292
+ (3): Dropout(p=0.0, inplace=False)
293
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
294
+ (5): Dropout(p=0.0, inplace=False)
295
+ )
296
+ (dropout_layer): DropPath()
297
+ )
298
+ )
299
+ (2): TransformerEncoderLayer(
300
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
301
+ (attn): EfficientMultiheadAttention(
302
+ (attn): MultiheadAttention(
303
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
304
+ )
305
+ (proj_drop): Dropout(p=0.0, inplace=False)
306
+ (dropout_layer): DropPath()
307
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
308
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
309
+ )
310
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
311
+ (ffn): MixFFN(
312
+ (activate): GELU(approximate='none')
313
+ (layers): Sequential(
314
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
315
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
316
+ (2): GELU(approximate='none')
317
+ (3): Dropout(p=0.0, inplace=False)
318
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
319
+ (5): Dropout(p=0.0, inplace=False)
320
+ )
321
+ (dropout_layer): DropPath()
322
+ )
323
+ )
324
+ )
325
+ (2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
326
+ )
327
+ (1): ModuleList(
328
+ (0): PatchEmbed(
329
+ (projection): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
330
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
331
+ )
332
+ (1): ModuleList(
333
+ (0): TransformerEncoderLayer(
334
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
335
+ (attn): EfficientMultiheadAttention(
336
+ (attn): MultiheadAttention(
337
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
338
+ )
339
+ (proj_drop): Dropout(p=0.0, inplace=False)
340
+ (dropout_layer): DropPath()
341
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
342
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
343
+ )
344
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
345
+ (ffn): MixFFN(
346
+ (activate): GELU(approximate='none')
347
+ (layers): Sequential(
348
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
349
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
350
+ (2): GELU(approximate='none')
351
+ (3): Dropout(p=0.0, inplace=False)
352
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
353
+ (5): Dropout(p=0.0, inplace=False)
354
+ )
355
+ (dropout_layer): DropPath()
356
+ )
357
+ )
358
+ (1): TransformerEncoderLayer(
359
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
360
+ (attn): EfficientMultiheadAttention(
361
+ (attn): MultiheadAttention(
362
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
363
+ )
364
+ (proj_drop): Dropout(p=0.0, inplace=False)
365
+ (dropout_layer): DropPath()
366
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
367
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
368
+ )
369
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
370
+ (ffn): MixFFN(
371
+ (activate): GELU(approximate='none')
372
+ (layers): Sequential(
373
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
374
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
375
+ (2): GELU(approximate='none')
376
+ (3): Dropout(p=0.0, inplace=False)
377
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
378
+ (5): Dropout(p=0.0, inplace=False)
379
+ )
380
+ (dropout_layer): DropPath()
381
+ )
382
+ )
383
+ (2): TransformerEncoderLayer(
384
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
385
+ (attn): EfficientMultiheadAttention(
386
+ (attn): MultiheadAttention(
387
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
388
+ )
389
+ (proj_drop): Dropout(p=0.0, inplace=False)
390
+ (dropout_layer): DropPath()
391
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
392
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
393
+ )
394
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
395
+ (ffn): MixFFN(
396
+ (activate): GELU(approximate='none')
397
+ (layers): Sequential(
398
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
399
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
400
+ (2): GELU(approximate='none')
401
+ (3): Dropout(p=0.0, inplace=False)
402
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
403
+ (5): Dropout(p=0.0, inplace=False)
404
+ )
405
+ (dropout_layer): DropPath()
406
+ )
407
+ )
408
+ (3): TransformerEncoderLayer(
409
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
410
+ (attn): EfficientMultiheadAttention(
411
+ (attn): MultiheadAttention(
412
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
413
+ )
414
+ (proj_drop): Dropout(p=0.0, inplace=False)
415
+ (dropout_layer): DropPath()
416
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
417
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
418
+ )
419
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
420
+ (ffn): MixFFN(
421
+ (activate): GELU(approximate='none')
422
+ (layers): Sequential(
423
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
424
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
425
+ (2): GELU(approximate='none')
426
+ (3): Dropout(p=0.0, inplace=False)
427
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
428
+ (5): Dropout(p=0.0, inplace=False)
429
+ )
430
+ (dropout_layer): DropPath()
431
+ )
432
+ )
433
+ )
434
+ (2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
435
+ )
436
+ (2): ModuleList(
437
+ (0): PatchEmbed(
438
+ (projection): Conv2d(128, 320, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
439
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
440
+ )
441
+ (1): ModuleList(
442
+ (0): TransformerEncoderLayer(
443
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
444
+ (attn): EfficientMultiheadAttention(
445
+ (attn): MultiheadAttention(
446
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
447
+ )
448
+ (proj_drop): Dropout(p=0.0, inplace=False)
449
+ (dropout_layer): DropPath()
450
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
451
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
452
+ )
453
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
454
+ (ffn): MixFFN(
455
+ (activate): GELU(approximate='none')
456
+ (layers): Sequential(
457
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
458
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
459
+ (2): GELU(approximate='none')
460
+ (3): Dropout(p=0.0, inplace=False)
461
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
462
+ (5): Dropout(p=0.0, inplace=False)
463
+ )
464
+ (dropout_layer): DropPath()
465
+ )
466
+ )
467
+ (1): TransformerEncoderLayer(
468
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
469
+ (attn): EfficientMultiheadAttention(
470
+ (attn): MultiheadAttention(
471
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
472
+ )
473
+ (proj_drop): Dropout(p=0.0, inplace=False)
474
+ (dropout_layer): DropPath()
475
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
476
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
477
+ )
478
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
479
+ (ffn): MixFFN(
480
+ (activate): GELU(approximate='none')
481
+ (layers): Sequential(
482
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
483
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
484
+ (2): GELU(approximate='none')
485
+ (3): Dropout(p=0.0, inplace=False)
486
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
487
+ (5): Dropout(p=0.0, inplace=False)
488
+ )
489
+ (dropout_layer): DropPath()
490
+ )
491
+ )
492
+ (2): TransformerEncoderLayer(
493
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
494
+ (attn): EfficientMultiheadAttention(
495
+ (attn): MultiheadAttention(
496
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
497
+ )
498
+ (proj_drop): Dropout(p=0.0, inplace=False)
499
+ (dropout_layer): DropPath()
500
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
501
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
502
+ )
503
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
504
+ (ffn): MixFFN(
505
+ (activate): GELU(approximate='none')
506
+ (layers): Sequential(
507
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
508
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
509
+ (2): GELU(approximate='none')
510
+ (3): Dropout(p=0.0, inplace=False)
511
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
512
+ (5): Dropout(p=0.0, inplace=False)
513
+ )
514
+ (dropout_layer): DropPath()
515
+ )
516
+ )
517
+ (3): TransformerEncoderLayer(
518
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
519
+ (attn): EfficientMultiheadAttention(
520
+ (attn): MultiheadAttention(
521
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
522
+ )
523
+ (proj_drop): Dropout(p=0.0, inplace=False)
524
+ (dropout_layer): DropPath()
525
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
526
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
527
+ )
528
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
529
+ (ffn): MixFFN(
530
+ (activate): GELU(approximate='none')
531
+ (layers): Sequential(
532
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
533
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
534
+ (2): GELU(approximate='none')
535
+ (3): Dropout(p=0.0, inplace=False)
536
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
537
+ (5): Dropout(p=0.0, inplace=False)
538
+ )
539
+ (dropout_layer): DropPath()
540
+ )
541
+ )
542
+ (4): TransformerEncoderLayer(
543
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
544
+ (attn): EfficientMultiheadAttention(
545
+ (attn): MultiheadAttention(
546
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
547
+ )
548
+ (proj_drop): Dropout(p=0.0, inplace=False)
549
+ (dropout_layer): DropPath()
550
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
551
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
552
+ )
553
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
554
+ (ffn): MixFFN(
555
+ (activate): GELU(approximate='none')
556
+ (layers): Sequential(
557
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
558
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
559
+ (2): GELU(approximate='none')
560
+ (3): Dropout(p=0.0, inplace=False)
561
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
562
+ (5): Dropout(p=0.0, inplace=False)
563
+ )
564
+ (dropout_layer): DropPath()
565
+ )
566
+ )
567
+ (5): TransformerEncoderLayer(
568
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
569
+ (attn): EfficientMultiheadAttention(
570
+ (attn): MultiheadAttention(
571
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
572
+ )
573
+ (proj_drop): Dropout(p=0.0, inplace=False)
574
+ (dropout_layer): DropPath()
575
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
576
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
577
+ )
578
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
579
+ (ffn): MixFFN(
580
+ (activate): GELU(approximate='none')
581
+ (layers): Sequential(
582
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
583
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
584
+ (2): GELU(approximate='none')
585
+ (3): Dropout(p=0.0, inplace=False)
586
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
587
+ (5): Dropout(p=0.0, inplace=False)
588
+ )
589
+ (dropout_layer): DropPath()
590
+ )
591
+ )
592
+ )
593
+ (2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
594
+ )
595
+ (3): ModuleList(
596
+ (0): PatchEmbed(
597
+ (projection): Conv2d(320, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
598
+ (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
599
+ )
600
+ (1): ModuleList(
601
+ (0): TransformerEncoderLayer(
602
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
603
+ (attn): EfficientMultiheadAttention(
604
+ (attn): MultiheadAttention(
605
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
606
+ )
607
+ (proj_drop): Dropout(p=0.0, inplace=False)
608
+ (dropout_layer): DropPath()
609
+ )
610
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
611
+ (ffn): MixFFN(
612
+ (activate): GELU(approximate='none')
613
+ (layers): Sequential(
614
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
615
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
616
+ (2): GELU(approximate='none')
617
+ (3): Dropout(p=0.0, inplace=False)
618
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
619
+ (5): Dropout(p=0.0, inplace=False)
620
+ )
621
+ (dropout_layer): DropPath()
622
+ )
623
+ )
624
+ (1): TransformerEncoderLayer(
625
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
626
+ (attn): EfficientMultiheadAttention(
627
+ (attn): MultiheadAttention(
628
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
629
+ )
630
+ (proj_drop): Dropout(p=0.0, inplace=False)
631
+ (dropout_layer): DropPath()
632
+ )
633
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
634
+ (ffn): MixFFN(
635
+ (activate): GELU(approximate='none')
636
+ (layers): Sequential(
637
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
638
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
639
+ (2): GELU(approximate='none')
640
+ (3): Dropout(p=0.0, inplace=False)
641
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
642
+ (5): Dropout(p=0.0, inplace=False)
643
+ )
644
+ (dropout_layer): DropPath()
645
+ )
646
+ )
647
+ (2): TransformerEncoderLayer(
648
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
649
+ (attn): EfficientMultiheadAttention(
650
+ (attn): MultiheadAttention(
651
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
652
+ )
653
+ (proj_drop): Dropout(p=0.0, inplace=False)
654
+ (dropout_layer): DropPath()
655
+ )
656
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
657
+ (ffn): MixFFN(
658
+ (activate): GELU(approximate='none')
659
+ (layers): Sequential(
660
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
661
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
662
+ (2): GELU(approximate='none')
663
+ (3): Dropout(p=0.0, inplace=False)
664
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
665
+ (5): Dropout(p=0.0, inplace=False)
666
+ )
667
+ (dropout_layer): DropPath()
668
+ )
669
+ )
670
+ )
671
+ (2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
672
+ )
673
+ )
674
+ )
675
+ init_cfg={'type': 'Pretrained', 'checkpoint': 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'}
676
+ (decode_head): SegformerHeadUnetFCHeadSingleStepLogits(
677
+ input_transform=multiple_select, ignore_index=0, align_corners=False
678
+ (loss_decode): CrossEntropyLoss(avg_non_ignore=False)
679
+ (conv_seg): Conv2d(256, 150, kernel_size=(1, 1), stride=(1, 1))
680
+ (dropout): Dropout2d(p=0.1, inplace=False)
681
+ (convs): ModuleList(
682
+ (0): ConvModule(
683
+ (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
684
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
685
+ (activate): ReLU(inplace=True)
686
+ )
687
+ (1): ConvModule(
688
+ (conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
689
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
690
+ (activate): ReLU(inplace=True)
691
+ )
692
+ (2): ConvModule(
693
+ (conv): Conv2d(320, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
694
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
695
+ (activate): ReLU(inplace=True)
696
+ )
697
+ (3): ConvModule(
698
+ (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
699
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
700
+ (activate): ReLU(inplace=True)
701
+ )
702
+ )
703
+ (fusion_conv): ConvModule(
704
+ (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
705
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
706
+ (activate): ReLU(inplace=True)
707
+ )
708
+ (unet): Unet(
709
+ (init_conv): Conv2d(166, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
710
+ (time_mlp): Sequential(
711
+ (0): SinusoidalPosEmb()
712
+ (1): Linear(in_features=128, out_features=512, bias=True)
713
+ (2): GELU(approximate='none')
714
+ (3): Linear(in_features=512, out_features=512, bias=True)
715
+ )
716
+ (downs): ModuleList(
717
+ (0): ModuleList(
718
+ (0): ResnetBlock(
719
+ (mlp): Sequential(
720
+ (0): SiLU()
721
+ (1): Linear(in_features=512, out_features=256, bias=True)
722
+ )
723
+ (block1): Block(
724
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
725
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
726
+ (act): SiLU()
727
+ )
728
+ (block2): Block(
729
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
730
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
731
+ (act): SiLU()
732
+ )
733
+ (res_conv): Identity()
734
+ )
735
+ (1): ResnetBlock(
736
+ (mlp): Sequential(
737
+ (0): SiLU()
738
+ (1): Linear(in_features=512, out_features=256, bias=True)
739
+ )
740
+ (block1): Block(
741
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
742
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
743
+ (act): SiLU()
744
+ )
745
+ (block2): Block(
746
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
747
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
748
+ (act): SiLU()
749
+ )
750
+ (res_conv): Identity()
751
+ )
752
+ (2): Residual(
753
+ (fn): PreNorm(
754
+ (fn): LinearAttention(
755
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
756
+ (to_out): Sequential(
757
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
758
+ (1): LayerNorm()
759
+ )
760
+ )
761
+ (norm): LayerNorm()
762
+ )
763
+ )
764
+ (3): Conv2d(128, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
765
+ )
766
+ (1): ModuleList(
767
+ (0): ResnetBlock(
768
+ (mlp): Sequential(
769
+ (0): SiLU()
770
+ (1): Linear(in_features=512, out_features=256, bias=True)
771
+ )
772
+ (block1): Block(
773
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
774
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
775
+ (act): SiLU()
776
+ )
777
+ (block2): Block(
778
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
779
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
780
+ (act): SiLU()
781
+ )
782
+ (res_conv): Identity()
783
+ )
784
+ (1): ResnetBlock(
785
+ (mlp): Sequential(
786
+ (0): SiLU()
787
+ (1): Linear(in_features=512, out_features=256, bias=True)
788
+ )
789
+ (block1): Block(
790
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
791
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
792
+ (act): SiLU()
793
+ )
794
+ (block2): Block(
795
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
796
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
797
+ (act): SiLU()
798
+ )
799
+ (res_conv): Identity()
800
+ )
801
+ (2): Residual(
802
+ (fn): PreNorm(
803
+ (fn): LinearAttention(
804
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
805
+ (to_out): Sequential(
806
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
807
+ (1): LayerNorm()
808
+ )
809
+ )
810
+ (norm): LayerNorm()
811
+ )
812
+ )
813
+ (3): Conv2d(128, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
814
+ )
815
+ (2): ModuleList(
816
+ (0): ResnetBlock(
817
+ (mlp): Sequential(
818
+ (0): SiLU()
819
+ (1): Linear(in_features=512, out_features=256, bias=True)
820
+ )
821
+ (block1): Block(
822
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
823
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
824
+ (act): SiLU()
825
+ )
826
+ (block2): Block(
827
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
828
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
829
+ (act): SiLU()
830
+ )
831
+ (res_conv): Identity()
832
+ )
833
+ (1): ResnetBlock(
834
+ (mlp): Sequential(
835
+ (0): SiLU()
836
+ (1): Linear(in_features=512, out_features=256, bias=True)
837
+ )
838
+ (block1): Block(
839
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
840
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
841
+ (act): SiLU()
842
+ )
843
+ (block2): Block(
844
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
845
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
846
+ (act): SiLU()
847
+ )
848
+ (res_conv): Identity()
849
+ )
850
+ (2): Residual(
851
+ (fn): PreNorm(
852
+ (fn): LinearAttention(
853
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
854
+ (to_out): Sequential(
855
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
856
+ (1): LayerNorm()
857
+ )
858
+ )
859
+ (norm): LayerNorm()
860
+ )
861
+ )
862
+ (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
863
+ )
864
+ )
865
+ (ups): ModuleList(
866
+ (0): ModuleList(
867
+ (0): ResnetBlock(
868
+ (mlp): Sequential(
869
+ (0): SiLU()
870
+ (1): Linear(in_features=512, out_features=256, bias=True)
871
+ )
872
+ (block1): Block(
873
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
874
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
875
+ (act): SiLU()
876
+ )
877
+ (block2): Block(
878
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
879
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
880
+ (act): SiLU()
881
+ )
882
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
883
+ )
884
+ (1): ResnetBlock(
885
+ (mlp): Sequential(
886
+ (0): SiLU()
887
+ (1): Linear(in_features=512, out_features=256, bias=True)
888
+ )
889
+ (block1): Block(
890
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
891
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
892
+ (act): SiLU()
893
+ )
894
+ (block2): Block(
895
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
896
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
897
+ (act): SiLU()
898
+ )
899
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
900
+ )
901
+ (2): Residual(
902
+ (fn): PreNorm(
903
+ (fn): LinearAttention(
904
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
905
+ (to_out): Sequential(
906
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
907
+ (1): LayerNorm()
908
+ )
909
+ )
910
+ (norm): LayerNorm()
911
+ )
912
+ )
913
+ (3): Sequential(
914
+ (0): Upsample(scale_factor=2.0, mode=nearest)
915
+ (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
916
+ )
917
+ )
918
+ (1): ModuleList(
919
+ (0): ResnetBlock(
920
+ (mlp): Sequential(
921
+ (0): SiLU()
922
+ (1): Linear(in_features=512, out_features=256, bias=True)
923
+ )
924
+ (block1): Block(
925
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
926
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
927
+ (act): SiLU()
928
+ )
929
+ (block2): Block(
930
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
931
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
932
+ (act): SiLU()
933
+ )
934
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
935
+ )
936
+ (1): ResnetBlock(
937
+ (mlp): Sequential(
938
+ (0): SiLU()
939
+ (1): Linear(in_features=512, out_features=256, bias=True)
940
+ )
941
+ (block1): Block(
942
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
943
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
944
+ (act): SiLU()
945
+ )
946
+ (block2): Block(
947
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
948
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
949
+ (act): SiLU()
950
+ )
951
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
952
+ )
953
+ (2): Residual(
954
+ (fn): PreNorm(
955
+ (fn): LinearAttention(
956
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
957
+ (to_out): Sequential(
958
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
959
+ (1): LayerNorm()
960
+ )
961
+ )
962
+ (norm): LayerNorm()
963
+ )
964
+ )
965
+ (3): Sequential(
966
+ (0): Upsample(scale_factor=2.0, mode=nearest)
967
+ (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
968
+ )
969
+ )
970
+ (2): ModuleList(
971
+ (0): ResnetBlock(
972
+ (mlp): Sequential(
973
+ (0): SiLU()
974
+ (1): Linear(in_features=512, out_features=256, bias=True)
975
+ )
976
+ (block1): Block(
977
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
978
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
979
+ (act): SiLU()
980
+ )
981
+ (block2): Block(
982
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
983
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
984
+ (act): SiLU()
985
+ )
986
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
987
+ )
988
+ (1): ResnetBlock(
989
+ (mlp): Sequential(
990
+ (0): SiLU()
991
+ (1): Linear(in_features=512, out_features=256, bias=True)
992
+ )
993
+ (block1): Block(
994
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
995
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
996
+ (act): SiLU()
997
+ )
998
+ (block2): Block(
999
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1000
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1001
+ (act): SiLU()
1002
+ )
1003
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
1004
+ )
1005
+ (2): Residual(
1006
+ (fn): PreNorm(
1007
+ (fn): LinearAttention(
1008
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1009
+ (to_out): Sequential(
1010
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
1011
+ (1): LayerNorm()
1012
+ )
1013
+ )
1014
+ (norm): LayerNorm()
1015
+ )
1016
+ )
1017
+ (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1018
+ )
1019
+ )
1020
+ (mid_block1): ResnetBlock(
1021
+ (mlp): Sequential(
1022
+ (0): SiLU()
1023
+ (1): Linear(in_features=512, out_features=256, bias=True)
1024
+ )
1025
+ (block1): Block(
1026
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1027
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1028
+ (act): SiLU()
1029
+ )
1030
+ (block2): Block(
1031
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1032
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1033
+ (act): SiLU()
1034
+ )
1035
+ (res_conv): Identity()
1036
+ )
1037
+ (mid_attn): Residual(
1038
+ (fn): PreNorm(
1039
+ (fn): Attention(
1040
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1041
+ (to_out): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
1042
+ )
1043
+ (norm): LayerNorm()
1044
+ )
1045
+ )
1046
+ (mid_block2): ResnetBlock(
1047
+ (mlp): Sequential(
1048
+ (0): SiLU()
1049
+ (1): Linear(in_features=512, out_features=256, bias=True)
1050
+ )
1051
+ (block1): Block(
1052
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1053
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1054
+ (act): SiLU()
1055
+ )
1056
+ (block2): Block(
1057
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1058
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1059
+ (act): SiLU()
1060
+ )
1061
+ (res_conv): Identity()
1062
+ )
1063
+ (final_res_block): ResnetBlock(
1064
+ (mlp): Sequential(
1065
+ (0): SiLU()
1066
+ (1): Linear(in_features=512, out_features=256, bias=True)
1067
+ )
1068
+ (block1): Block(
1069
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1070
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1071
+ (act): SiLU()
1072
+ )
1073
+ (block2): Block(
1074
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1075
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1076
+ (act): SiLU()
1077
+ )
1078
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
1079
+ )
1080
+ (final_conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
1081
+ )
1082
+ (conv_seg_new): Conv2d(256, 151, kernel_size=(1, 1), stride=(1, 1))
1083
+ (embed): Embedding(151, 16)
1084
+ )
1085
+ init_cfg={'type': 'Pretrained', 'checkpoint': 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'}
1086
+ )
1087
+ 2023-03-04 19:03:28,858 - mmseg - INFO - Loaded 20210 images
1088
+ 2023-03-04 19:03:29,858 - mmseg - INFO - Loaded 2000 images
1089
+ 2023-03-04 19:03:29,859 - mmseg - INFO - load checkpoint from local path: ./work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/latest.pth
1090
+ 2023-03-04 19:03:30,494 - mmseg - INFO - resumed from epoch: 13, iter 7999
1091
+ 2023-03-04 19:03:30,496 - mmseg - INFO - Start running, host: laizeqiang@SH-IDC1-10-140-37-114, work_dir: /mnt/petrelfs/laizeqiang/mmseg-baseline/work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits
1092
+ 2023-03-04 19:03:30,496 - mmseg - INFO - Hooks will be executed in the following order:
1093
+ before_run:
1094
+ (VERY_HIGH ) StepLrUpdaterHook
1095
+ (NORMAL ) CheckpointHook
1096
+ (LOW ) DistEvalHook
1097
+ (VERY_LOW ) TextLoggerHook
1098
+ --------------------
1099
+ before_train_epoch:
1100
+ (VERY_HIGH ) StepLrUpdaterHook
1101
+ (LOW ) IterTimerHook
1102
+ (LOW ) DistEvalHook
1103
+ (VERY_LOW ) TextLoggerHook
1104
+ --------------------
1105
+ before_train_iter:
1106
+ (VERY_HIGH ) StepLrUpdaterHook
1107
+ (LOW ) IterTimerHook
1108
+ (LOW ) DistEvalHook
1109
+ --------------------
1110
+ after_train_iter:
1111
+ (ABOVE_NORMAL) OptimizerHook
1112
+ (NORMAL ) CheckpointHook
1113
+ (LOW ) IterTimerHook
1114
+ (LOW ) DistEvalHook
1115
+ (VERY_LOW ) TextLoggerHook
1116
+ --------------------
1117
+ after_train_epoch:
1118
+ (NORMAL ) CheckpointHook
1119
+ (LOW ) DistEvalHook
1120
+ (VERY_LOW ) TextLoggerHook
1121
+ --------------------
1122
+ before_val_epoch:
1123
+ (LOW ) IterTimerHook
1124
+ (VERY_LOW ) TextLoggerHook
1125
+ --------------------
1126
+ before_val_iter:
1127
+ (LOW ) IterTimerHook
1128
+ --------------------
1129
+ after_val_iter:
1130
+ (LOW ) IterTimerHook
1131
+ --------------------
1132
+ after_val_epoch:
1133
+ (VERY_LOW ) TextLoggerHook
1134
+ --------------------
1135
+ after_run:
1136
+ (VERY_LOW ) TextLoggerHook
1137
+ --------------------
1138
+ 2023-03-04 19:03:30,496 - mmseg - INFO - workflow: [('train', 1)], max: 80000 iters
1139
+ 2023-03-04 19:03:30,496 - mmseg - INFO - Checkpoints will be saved to /mnt/petrelfs/laizeqiang/mmseg-baseline/work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits by HardDiskBackend.
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_190322.log.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"env_info": "sys.platform: linux\nPython: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB\nCUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch\nNVCC: Cuda compilation tools, release 11.6, V11.6.124\nGCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)\nPyTorch: 1.13.1\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.6\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.14.1\nOpenCV: 4.7.0\nMMCV: 1.7.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.6\nMMSegmentation: 0.30.0+6749699", "seed": 1480177113, "exp_name": "ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits.py", "mmseg_version": "0.30.0+6749699", "config": "norm_cfg = dict(type='SyncBN', requires_grad=True)\ncheckpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'\nmodel = dict(\n type='EncoderDecoderFreeze',\n freeze_parameters=['backbone', 'decode_head'],\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',\n backbone=dict(\n type='MixVisionTransformerCustomInitWeights',\n in_channels=3,\n embed_dims=64,\n num_stages=4,\n num_layers=[3, 4, 6, 3],\n num_heads=[1, 2, 5, 8],\n patch_sizes=[7, 3, 3, 3],\n sr_ratios=[8, 4, 2, 1],\n out_indices=(0, 1, 2, 3),\n mlp_ratio=4,\n qkv_bias=True,\n drop_rate=0.0,\n attn_drop_rate=0.0,\n drop_path_rate=0.1,\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'\n ),\n decode_head=dict(\n type='SegformerHeadUnetFCHeadSingleStepLogits',\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',\n dim=128,\n out_dim=256,\n unet_channels=166,\n dim_mults=[1, 1, 1],\n cat_embedding_dim=16,\n in_channels=[64, 128, 320, 512],\n in_index=[0, 1, 2, 3],\n channels=256,\n dropout_ratio=0.1,\n num_classes=151,\n norm_cfg=dict(type='SyncBN', requires_grad=True),\n align_corners=False,\n ignore_index=0,\n loss_decode=dict(\n type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),\n train_cfg=dict(),\n test_cfg=dict(mode='whole'))\ndataset_type = 'ADE20K151Dataset'\ndata_root = 'data/ade/ADEChallengeData2016'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ncrop_size = (512, 512)\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=4,\n workers_per_gpu=4,\n train=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/training',\n ann_dir='annotations/training',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n ]),\n val=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]),\n test=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]))\nlog_config = dict(\n interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\ncudnn_benchmark = True\noptimizer = dict(\n type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)\noptimizer_config = dict()\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=1000,\n warmup_ratio=1e-06,\n step=10000,\n gamma=0.5,\n min_lr=1e-06,\n by_epoch=False)\nrunner = dict(type='IterBasedRunner', max_iters=80000)\ncheckpoint_config = dict(by_epoch=False, interval=8000)\nevaluation = dict(\n interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')\nwork_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits'\ngpu_ids = range(0, 8)\nauto_resume = True\ndevice = 'cuda'\nseed = 1480177113\n", "CLASSES": ["background", "wall", "building", "sky", "floor", "tree", "ceiling", "road", "bed ", "windowpane", "grass", "cabinet", "sidewalk", "person", "earth", "door", "table", "mountain", "plant", "curtain", "chair", "car", "water", "painting", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock", "wardrobe", "lamp", "bathtub", "railing", "cushion", "base", "box", "column", "signboard", "chest of drawers", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator", "grandstand", "path", "stairs", "runway", "case", "pool table", "pillow", "screen door", "stairway", "river", "bridge", "bookcase", "blind", "coffee table", "toilet", "flower", "book", "hill", "bench", "countertop", "stove", "palm", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel", "bus", "towel", "light", "truck", "tower", "chandelier", "awning", "streetlight", "booth", "television receiver", "airplane", "dirt track", "apparel", "pole", "land", "bannister", "escalator", "ottoman", "bottle", "buffet", "poster", "stage", "van", "ship", "fountain", "conveyer belt", "canopy", "washer", "plaything", "swimming pool", "stool", "barrel", "basket", "waterfall", "tent", "bag", "minibike", "cradle", "oven", "ball", "food", "step", "tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket", "sculpture", "hood", "sconce", "vase", "traffic light", "tray", "ashcan", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass", "clock", "flag"], "PALETTE": [[0, 0, 0], [120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], [102, 255, 0], [92, 0, 255]], "hook_msgs": {}}
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_211228.log ADDED
The diff for this file is too large to render. See raw diff
 
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/20230304_211228.log.json ADDED
The diff for this file is too large to render. See raw diff
 
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ checkpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'
3
+ model = dict(
4
+ type='EncoderDecoderFreeze',
5
+ freeze_parameters=['backbone', 'decode_head'],
6
+ pretrained=
7
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
8
+ backbone=dict(
9
+ type='MixVisionTransformerCustomInitWeights',
10
+ in_channels=3,
11
+ embed_dims=64,
12
+ num_stages=4,
13
+ num_layers=[3, 4, 6, 3],
14
+ num_heads=[1, 2, 5, 8],
15
+ patch_sizes=[7, 3, 3, 3],
16
+ sr_ratios=[8, 4, 2, 1],
17
+ out_indices=(0, 1, 2, 3),
18
+ mlp_ratio=4,
19
+ qkv_bias=True,
20
+ drop_rate=0.0,
21
+ attn_drop_rate=0.0,
22
+ drop_path_rate=0.1),
23
+ decode_head=dict(
24
+ type='SegformerHeadUnetFCHeadSingleStepLogits',
25
+ pretrained=
26
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
27
+ dim=128,
28
+ out_dim=256,
29
+ unet_channels=166,
30
+ dim_mults=[1, 1, 1],
31
+ cat_embedding_dim=16,
32
+ in_channels=[64, 128, 320, 512],
33
+ in_index=[0, 1, 2, 3],
34
+ channels=256,
35
+ dropout_ratio=0.1,
36
+ num_classes=151,
37
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
38
+ align_corners=False,
39
+ ignore_index=0,
40
+ loss_decode=dict(
41
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
42
+ train_cfg=dict(),
43
+ test_cfg=dict(mode='whole'))
44
+ dataset_type = 'ADE20K151Dataset'
45
+ data_root = 'data/ade/ADEChallengeData2016'
46
+ img_norm_cfg = dict(
47
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
48
+ crop_size = (512, 512)
49
+ train_pipeline = [
50
+ dict(type='LoadImageFromFile'),
51
+ dict(type='LoadAnnotations', reduce_zero_label=False),
52
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
53
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
54
+ dict(type='RandomFlip', prob=0.5),
55
+ dict(type='PhotoMetricDistortion'),
56
+ dict(
57
+ type='Normalize',
58
+ mean=[123.675, 116.28, 103.53],
59
+ std=[58.395, 57.12, 57.375],
60
+ to_rgb=True),
61
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
62
+ dict(type='DefaultFormatBundle'),
63
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
64
+ ]
65
+ test_pipeline = [
66
+ dict(type='LoadImageFromFile'),
67
+ dict(
68
+ type='MultiScaleFlipAug',
69
+ img_scale=(2048, 512),
70
+ flip=False,
71
+ transforms=[
72
+ dict(type='Resize', keep_ratio=True),
73
+ dict(type='RandomFlip'),
74
+ dict(
75
+ type='Normalize',
76
+ mean=[123.675, 116.28, 103.53],
77
+ std=[58.395, 57.12, 57.375],
78
+ to_rgb=True),
79
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
80
+ dict(type='ImageToTensor', keys=['img']),
81
+ dict(type='Collect', keys=['img'])
82
+ ])
83
+ ]
84
+ data = dict(
85
+ samples_per_gpu=4,
86
+ workers_per_gpu=4,
87
+ train=dict(
88
+ type='ADE20K151Dataset',
89
+ data_root='data/ade/ADEChallengeData2016',
90
+ img_dir='images/training',
91
+ ann_dir='annotations/training',
92
+ pipeline=[
93
+ dict(type='LoadImageFromFile'),
94
+ dict(type='LoadAnnotations', reduce_zero_label=False),
95
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
96
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
97
+ dict(type='RandomFlip', prob=0.5),
98
+ dict(type='PhotoMetricDistortion'),
99
+ dict(
100
+ type='Normalize',
101
+ mean=[123.675, 116.28, 103.53],
102
+ std=[58.395, 57.12, 57.375],
103
+ to_rgb=True),
104
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
105
+ dict(type='DefaultFormatBundle'),
106
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
107
+ ]),
108
+ val=dict(
109
+ type='ADE20K151Dataset',
110
+ data_root='data/ade/ADEChallengeData2016',
111
+ img_dir='images/validation',
112
+ ann_dir='annotations/validation',
113
+ pipeline=[
114
+ dict(type='LoadImageFromFile'),
115
+ dict(
116
+ type='MultiScaleFlipAug',
117
+ img_scale=(2048, 512),
118
+ flip=False,
119
+ transforms=[
120
+ dict(type='Resize', keep_ratio=True),
121
+ dict(type='RandomFlip'),
122
+ dict(
123
+ type='Normalize',
124
+ mean=[123.675, 116.28, 103.53],
125
+ std=[58.395, 57.12, 57.375],
126
+ to_rgb=True),
127
+ dict(
128
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
129
+ dict(type='ImageToTensor', keys=['img']),
130
+ dict(type='Collect', keys=['img'])
131
+ ])
132
+ ]),
133
+ test=dict(
134
+ type='ADE20K151Dataset',
135
+ data_root='data/ade/ADEChallengeData2016',
136
+ img_dir='images/validation',
137
+ ann_dir='annotations/validation',
138
+ pipeline=[
139
+ dict(type='LoadImageFromFile'),
140
+ dict(
141
+ type='MultiScaleFlipAug',
142
+ img_scale=(2048, 512),
143
+ flip=False,
144
+ transforms=[
145
+ dict(type='Resize', keep_ratio=True),
146
+ dict(type='RandomFlip'),
147
+ dict(
148
+ type='Normalize',
149
+ mean=[123.675, 116.28, 103.53],
150
+ std=[58.395, 57.12, 57.375],
151
+ to_rgb=True),
152
+ dict(
153
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
154
+ dict(type='ImageToTensor', keys=['img']),
155
+ dict(type='Collect', keys=['img'])
156
+ ])
157
+ ]))
158
+ log_config = dict(
159
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
160
+ dist_params = dict(backend='nccl')
161
+ log_level = 'INFO'
162
+ load_from = None
163
+ resume_from = None
164
+ workflow = [('train', 1)]
165
+ cudnn_benchmark = True
166
+ optimizer = dict(
167
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
168
+ optimizer_config = dict()
169
+ lr_config = dict(
170
+ policy='step',
171
+ warmup='linear',
172
+ warmup_iters=1000,
173
+ warmup_ratio=1e-06,
174
+ step=10000,
175
+ gamma=0.5,
176
+ min_lr=1e-06,
177
+ by_epoch=False)
178
+ runner = dict(type='IterBasedRunner', max_iters=80000)
179
+ checkpoint_config = dict(by_epoch=False, interval=8000)
180
+ evaluation = dict(
181
+ interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')
182
+ work_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits'
183
+ gpu_ids = range(0, 8)
184
+ auto_resume = True
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/best_mIoU_iter_72000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd124854833d3fc6ae7e9bebd9cd5f44f52b356d910c3636420abfb59c287ac2
3
+ size 227725084
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_16000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f049b01364b5f98b74b11cdd8da2a822c43f907429f22e65833516119b27227
3
+ size 227725084
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_24000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bc558779cba0cb559853e558a4e3807455668b4a4b75cae67985c5090052e19
3
+ size 227725084
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_32000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:126f85ffef4d1ca315447b2f44f3b2392a6957a63c6d8c7c44f537fe558085c1
3
+ size 227725084
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_40000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2a1744562fc230053f45aa4f72a8c66bc1ecfa943ea5c7e3ad0275b6019ca55
3
+ size 227725084
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_48000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:513e0b4afde17af815906fb34ba17e59e0b0c5806b3a7afb0a470406f5ea7cb4
3
+ size 227725084
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_56000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:798e0514054aac62f2a077252794ccb936572c2017ed5cd1ec98ff4576acecee
3
+ size 227725084
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_64000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1a02a98200eaa06575a1502e5da9c2523750268d07cb712a8bf14305e0c20c6
3
+ size 227725084
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_72000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72cdcc0410e7f0e9eadb7a993d874da8207eb8df86a67afca7bc34a37523c3c7
3
+ size 227725084
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_8000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:276c86afdb00e16ac411fb2392aaa4c1ae96a0723072f21bec023394f86f4415
3
+ size 227724444
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/iter_80000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ffe6453b32f3af928bbda5aed4bb23c23a6368d6e82f5e8ab5bb3fb7f185bf4
3
+ size 227725084
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_logits/latest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ffe6453b32f3af928bbda5aed4bb23c23a6368d6e82f5e8ab5bb3fb7f185bf4
3
+ size 227725084
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/20230304_103602.log ADDED
@@ -0,0 +1,1151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-03-04 10:36:02,337 - mmseg - INFO - Multi-processing start method is `None`
2
+ 2023-03-04 10:36:02,353 - mmseg - INFO - OpenCV num_threads is `128
3
+ 2023-03-04 10:36:02,353 - mmseg - INFO - OMP num threads is 1
4
+ 2023-03-04 10:36:02,407 - mmseg - INFO - Environment info:
5
+ ------------------------------------------------------------
6
+ sys.platform: linux
7
+ Python: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]
8
+ CUDA available: True
9
+ GPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB
10
+ CUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch
11
+ NVCC: Cuda compilation tools, release 11.6, V11.6.124
12
+ GCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)
13
+ PyTorch: 1.13.1
14
+ PyTorch compiling details: PyTorch built with:
15
+ - GCC 9.3
16
+ - C++ Version: 201402
17
+ - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications
18
+ - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
19
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
20
+ - LAPACK is enabled (usually provided by MKL)
21
+ - NNPACK is enabled
22
+ - CPU capability usage: AVX2
23
+ - CUDA Runtime 11.6
24
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37
25
+ - CuDNN 8.3.2 (built against CUDA 11.5)
26
+ - Magma 2.6.1
27
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
28
+
29
+ TorchVision: 0.14.1
30
+ OpenCV: 4.7.0
31
+ MMCV: 1.7.1
32
+ MMCV Compiler: GCC 9.3
33
+ MMCV CUDA Compiler: 11.6
34
+ MMSegmentation: 0.30.0+d4f0cb3
35
+ ------------------------------------------------------------
36
+
37
+ 2023-03-04 10:36:02,407 - mmseg - INFO - Distributed training: True
38
+ 2023-03-04 10:36:03,072 - mmseg - INFO - Config:
39
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
40
+ checkpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'
41
+ model = dict(
42
+ type='EncoderDecoderFreeze',
43
+ freeze_parameters=['backbone', 'decode_head'],
44
+ pretrained=
45
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
46
+ backbone=dict(
47
+ type='MixVisionTransformerCustomInitWeights',
48
+ in_channels=3,
49
+ embed_dims=64,
50
+ num_stages=4,
51
+ num_layers=[3, 4, 6, 3],
52
+ num_heads=[1, 2, 5, 8],
53
+ patch_sizes=[7, 3, 3, 3],
54
+ sr_ratios=[8, 4, 2, 1],
55
+ out_indices=(0, 1, 2, 3),
56
+ mlp_ratio=4,
57
+ qkv_bias=True,
58
+ drop_rate=0.0,
59
+ attn_drop_rate=0.0,
60
+ drop_path_rate=0.1),
61
+ decode_head=dict(
62
+ type='SegformerHeadUnetFCHeadSingleStepMask',
63
+ pretrained=
64
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
65
+ dim=128,
66
+ out_dim=256,
67
+ unet_channels=272,
68
+ dim_mults=[1, 1, 1],
69
+ cat_embedding_dim=16,
70
+ in_channels=[64, 128, 320, 512],
71
+ in_index=[0, 1, 2, 3],
72
+ channels=256,
73
+ dropout_ratio=0.1,
74
+ num_classes=151,
75
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
76
+ align_corners=False,
77
+ ignore_index=0,
78
+ loss_decode=dict(
79
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
80
+ train_cfg=dict(),
81
+ test_cfg=dict(mode='whole'))
82
+ dataset_type = 'ADE20K151Dataset'
83
+ data_root = 'data/ade/ADEChallengeData2016'
84
+ img_norm_cfg = dict(
85
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
86
+ crop_size = (512, 512)
87
+ train_pipeline = [
88
+ dict(type='LoadImageFromFile'),
89
+ dict(type='LoadAnnotations', reduce_zero_label=False),
90
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
91
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
92
+ dict(type='RandomFlip', prob=0.5),
93
+ dict(type='PhotoMetricDistortion'),
94
+ dict(
95
+ type='Normalize',
96
+ mean=[123.675, 116.28, 103.53],
97
+ std=[58.395, 57.12, 57.375],
98
+ to_rgb=True),
99
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
100
+ dict(type='DefaultFormatBundle'),
101
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
102
+ ]
103
+ test_pipeline = [
104
+ dict(type='LoadImageFromFile'),
105
+ dict(
106
+ type='MultiScaleFlipAug',
107
+ img_scale=(2048, 512),
108
+ flip=False,
109
+ transforms=[
110
+ dict(type='Resize', keep_ratio=True),
111
+ dict(type='RandomFlip'),
112
+ dict(
113
+ type='Normalize',
114
+ mean=[123.675, 116.28, 103.53],
115
+ std=[58.395, 57.12, 57.375],
116
+ to_rgb=True),
117
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
118
+ dict(type='ImageToTensor', keys=['img']),
119
+ dict(type='Collect', keys=['img'])
120
+ ])
121
+ ]
122
+ data = dict(
123
+ samples_per_gpu=4,
124
+ workers_per_gpu=4,
125
+ train=dict(
126
+ type='ADE20K151Dataset',
127
+ data_root='data/ade/ADEChallengeData2016',
128
+ img_dir='images/training',
129
+ ann_dir='annotations/training',
130
+ pipeline=[
131
+ dict(type='LoadImageFromFile'),
132
+ dict(type='LoadAnnotations', reduce_zero_label=False),
133
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
134
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
135
+ dict(type='RandomFlip', prob=0.5),
136
+ dict(type='PhotoMetricDistortion'),
137
+ dict(
138
+ type='Normalize',
139
+ mean=[123.675, 116.28, 103.53],
140
+ std=[58.395, 57.12, 57.375],
141
+ to_rgb=True),
142
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
143
+ dict(type='DefaultFormatBundle'),
144
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
145
+ ]),
146
+ val=dict(
147
+ type='ADE20K151Dataset',
148
+ data_root='data/ade/ADEChallengeData2016',
149
+ img_dir='images/validation',
150
+ ann_dir='annotations/validation',
151
+ pipeline=[
152
+ dict(type='LoadImageFromFile'),
153
+ dict(
154
+ type='MultiScaleFlipAug',
155
+ img_scale=(2048, 512),
156
+ flip=False,
157
+ transforms=[
158
+ dict(type='Resize', keep_ratio=True),
159
+ dict(type='RandomFlip'),
160
+ dict(
161
+ type='Normalize',
162
+ mean=[123.675, 116.28, 103.53],
163
+ std=[58.395, 57.12, 57.375],
164
+ to_rgb=True),
165
+ dict(
166
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
167
+ dict(type='ImageToTensor', keys=['img']),
168
+ dict(type='Collect', keys=['img'])
169
+ ])
170
+ ]),
171
+ test=dict(
172
+ type='ADE20K151Dataset',
173
+ data_root='data/ade/ADEChallengeData2016',
174
+ img_dir='images/validation',
175
+ ann_dir='annotations/validation',
176
+ pipeline=[
177
+ dict(type='LoadImageFromFile'),
178
+ dict(
179
+ type='MultiScaleFlipAug',
180
+ img_scale=(2048, 512),
181
+ flip=False,
182
+ transforms=[
183
+ dict(type='Resize', keep_ratio=True),
184
+ dict(type='RandomFlip'),
185
+ dict(
186
+ type='Normalize',
187
+ mean=[123.675, 116.28, 103.53],
188
+ std=[58.395, 57.12, 57.375],
189
+ to_rgb=True),
190
+ dict(
191
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
192
+ dict(type='ImageToTensor', keys=['img']),
193
+ dict(type='Collect', keys=['img'])
194
+ ])
195
+ ]))
196
+ log_config = dict(
197
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
198
+ dist_params = dict(backend='nccl')
199
+ log_level = 'INFO'
200
+ load_from = None
201
+ resume_from = None
202
+ workflow = [('train', 1)]
203
+ cudnn_benchmark = True
204
+ optimizer = dict(
205
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
206
+ optimizer_config = dict()
207
+ lr_config = dict(
208
+ policy='step',
209
+ warmup='linear',
210
+ warmup_iters=1000,
211
+ warmup_ratio=1e-06,
212
+ step=10000,
213
+ gamma=0.5,
214
+ min_lr=1e-06,
215
+ by_epoch=False)
216
+ runner = dict(type='IterBasedRunner', max_iters=80000)
217
+ checkpoint_config = dict(by_epoch=False, interval=8000)
218
+ evaluation = dict(
219
+ interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')
220
+ work_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask'
221
+ gpu_ids = range(0, 8)
222
+ auto_resume = True
223
+
224
+ 2023-03-04 10:36:07,359 - mmseg - INFO - Set random seed to 1470787464, deterministic: False
225
+ 2023-03-04 10:36:07,710 - mmseg - INFO - Parameters in backbone freezed!
226
+ 2023-03-04 10:36:07,712 - mmseg - INFO - Trainable parameters in SegformerHeadUnetFCHeadSingleStep: ['unet.init_conv.weight', 'unet.init_conv.bias', 'unet.time_mlp.1.weight', 'unet.time_mlp.1.bias', 'unet.time_mlp.3.weight', 'unet.time_mlp.3.bias', 'unet.downs.0.0.mlp.1.weight', 'unet.downs.0.0.mlp.1.bias', 'unet.downs.0.0.block1.proj.weight', 'unet.downs.0.0.block1.proj.bias', 'unet.downs.0.0.block1.norm.weight', 'unet.downs.0.0.block1.norm.bias', 'unet.downs.0.0.block2.proj.weight', 'unet.downs.0.0.block2.proj.bias', 'unet.downs.0.0.block2.norm.weight', 'unet.downs.0.0.block2.norm.bias', 'unet.downs.0.1.mlp.1.weight', 'unet.downs.0.1.mlp.1.bias', 'unet.downs.0.1.block1.proj.weight', 'unet.downs.0.1.block1.proj.bias', 'unet.downs.0.1.block1.norm.weight', 'unet.downs.0.1.block1.norm.bias', 'unet.downs.0.1.block2.proj.weight', 'unet.downs.0.1.block2.proj.bias', 'unet.downs.0.1.block2.norm.weight', 'unet.downs.0.1.block2.norm.bias', 'unet.downs.0.2.fn.fn.to_qkv.weight', 'unet.downs.0.2.fn.fn.to_out.0.weight', 'unet.downs.0.2.fn.fn.to_out.0.bias', 'unet.downs.0.2.fn.fn.to_out.1.g', 'unet.downs.0.2.fn.norm.g', 'unet.downs.0.3.weight', 'unet.downs.0.3.bias', 'unet.downs.1.0.mlp.1.weight', 'unet.downs.1.0.mlp.1.bias', 'unet.downs.1.0.block1.proj.weight', 'unet.downs.1.0.block1.proj.bias', 'unet.downs.1.0.block1.norm.weight', 'unet.downs.1.0.block1.norm.bias', 'unet.downs.1.0.block2.proj.weight', 'unet.downs.1.0.block2.proj.bias', 'unet.downs.1.0.block2.norm.weight', 'unet.downs.1.0.block2.norm.bias', 'unet.downs.1.1.mlp.1.weight', 'unet.downs.1.1.mlp.1.bias', 'unet.downs.1.1.block1.proj.weight', 'unet.downs.1.1.block1.proj.bias', 'unet.downs.1.1.block1.norm.weight', 'unet.downs.1.1.block1.norm.bias', 'unet.downs.1.1.block2.proj.weight', 'unet.downs.1.1.block2.proj.bias', 'unet.downs.1.1.block2.norm.weight', 'unet.downs.1.1.block2.norm.bias', 'unet.downs.1.2.fn.fn.to_qkv.weight', 'unet.downs.1.2.fn.fn.to_out.0.weight', 'unet.downs.1.2.fn.fn.to_out.0.bias', 'unet.downs.1.2.fn.fn.to_out.1.g', 'unet.downs.1.2.fn.norm.g', 'unet.downs.1.3.weight', 'unet.downs.1.3.bias', 'unet.downs.2.0.mlp.1.weight', 'unet.downs.2.0.mlp.1.bias', 'unet.downs.2.0.block1.proj.weight', 'unet.downs.2.0.block1.proj.bias', 'unet.downs.2.0.block1.norm.weight', 'unet.downs.2.0.block1.norm.bias', 'unet.downs.2.0.block2.proj.weight', 'unet.downs.2.0.block2.proj.bias', 'unet.downs.2.0.block2.norm.weight', 'unet.downs.2.0.block2.norm.bias', 'unet.downs.2.1.mlp.1.weight', 'unet.downs.2.1.mlp.1.bias', 'unet.downs.2.1.block1.proj.weight', 'unet.downs.2.1.block1.proj.bias', 'unet.downs.2.1.block1.norm.weight', 'unet.downs.2.1.block1.norm.bias', 'unet.downs.2.1.block2.proj.weight', 'unet.downs.2.1.block2.proj.bias', 'unet.downs.2.1.block2.norm.weight', 'unet.downs.2.1.block2.norm.bias', 'unet.downs.2.2.fn.fn.to_qkv.weight', 'unet.downs.2.2.fn.fn.to_out.0.weight', 'unet.downs.2.2.fn.fn.to_out.0.bias', 'unet.downs.2.2.fn.fn.to_out.1.g', 'unet.downs.2.2.fn.norm.g', 'unet.downs.2.3.weight', 'unet.downs.2.3.bias', 'unet.ups.0.0.mlp.1.weight', 'unet.ups.0.0.mlp.1.bias', 'unet.ups.0.0.block1.proj.weight', 'unet.ups.0.0.block1.proj.bias', 'unet.ups.0.0.block1.norm.weight', 'unet.ups.0.0.block1.norm.bias', 'unet.ups.0.0.block2.proj.weight', 'unet.ups.0.0.block2.proj.bias', 'unet.ups.0.0.block2.norm.weight', 'unet.ups.0.0.block2.norm.bias', 'unet.ups.0.0.res_conv.weight', 'unet.ups.0.0.res_conv.bias', 'unet.ups.0.1.mlp.1.weight', 'unet.ups.0.1.mlp.1.bias', 'unet.ups.0.1.block1.proj.weight', 'unet.ups.0.1.block1.proj.bias', 'unet.ups.0.1.block1.norm.weight', 'unet.ups.0.1.block1.norm.bias', 'unet.ups.0.1.block2.proj.weight', 'unet.ups.0.1.block2.proj.bias', 'unet.ups.0.1.block2.norm.weight', 'unet.ups.0.1.block2.norm.bias', 'unet.ups.0.1.res_conv.weight', 'unet.ups.0.1.res_conv.bias', 'unet.ups.0.2.fn.fn.to_qkv.weight', 'unet.ups.0.2.fn.fn.to_out.0.weight', 'unet.ups.0.2.fn.fn.to_out.0.bias', 'unet.ups.0.2.fn.fn.to_out.1.g', 'unet.ups.0.2.fn.norm.g', 'unet.ups.0.3.1.weight', 'unet.ups.0.3.1.bias', 'unet.ups.1.0.mlp.1.weight', 'unet.ups.1.0.mlp.1.bias', 'unet.ups.1.0.block1.proj.weight', 'unet.ups.1.0.block1.proj.bias', 'unet.ups.1.0.block1.norm.weight', 'unet.ups.1.0.block1.norm.bias', 'unet.ups.1.0.block2.proj.weight', 'unet.ups.1.0.block2.proj.bias', 'unet.ups.1.0.block2.norm.weight', 'unet.ups.1.0.block2.norm.bias', 'unet.ups.1.0.res_conv.weight', 'unet.ups.1.0.res_conv.bias', 'unet.ups.1.1.mlp.1.weight', 'unet.ups.1.1.mlp.1.bias', 'unet.ups.1.1.block1.proj.weight', 'unet.ups.1.1.block1.proj.bias', 'unet.ups.1.1.block1.norm.weight', 'unet.ups.1.1.block1.norm.bias', 'unet.ups.1.1.block2.proj.weight', 'unet.ups.1.1.block2.proj.bias', 'unet.ups.1.1.block2.norm.weight', 'unet.ups.1.1.block2.norm.bias', 'unet.ups.1.1.res_conv.weight', 'unet.ups.1.1.res_conv.bias', 'unet.ups.1.2.fn.fn.to_qkv.weight', 'unet.ups.1.2.fn.fn.to_out.0.weight', 'unet.ups.1.2.fn.fn.to_out.0.bias', 'unet.ups.1.2.fn.fn.to_out.1.g', 'unet.ups.1.2.fn.norm.g', 'unet.ups.1.3.1.weight', 'unet.ups.1.3.1.bias', 'unet.ups.2.0.mlp.1.weight', 'unet.ups.2.0.mlp.1.bias', 'unet.ups.2.0.block1.proj.weight', 'unet.ups.2.0.block1.proj.bias', 'unet.ups.2.0.block1.norm.weight', 'unet.ups.2.0.block1.norm.bias', 'unet.ups.2.0.block2.proj.weight', 'unet.ups.2.0.block2.proj.bias', 'unet.ups.2.0.block2.norm.weight', 'unet.ups.2.0.block2.norm.bias', 'unet.ups.2.0.res_conv.weight', 'unet.ups.2.0.res_conv.bias', 'unet.ups.2.1.mlp.1.weight', 'unet.ups.2.1.mlp.1.bias', 'unet.ups.2.1.block1.proj.weight', 'unet.ups.2.1.block1.proj.bias', 'unet.ups.2.1.block1.norm.weight', 'unet.ups.2.1.block1.norm.bias', 'unet.ups.2.1.block2.proj.weight', 'unet.ups.2.1.block2.proj.bias', 'unet.ups.2.1.block2.norm.weight', 'unet.ups.2.1.block2.norm.bias', 'unet.ups.2.1.res_conv.weight', 'unet.ups.2.1.res_conv.bias', 'unet.ups.2.2.fn.fn.to_qkv.weight', 'unet.ups.2.2.fn.fn.to_out.0.weight', 'unet.ups.2.2.fn.fn.to_out.0.bias', 'unet.ups.2.2.fn.fn.to_out.1.g', 'unet.ups.2.2.fn.norm.g', 'unet.ups.2.3.weight', 'unet.ups.2.3.bias', 'unet.mid_block1.mlp.1.weight', 'unet.mid_block1.mlp.1.bias', 'unet.mid_block1.block1.proj.weight', 'unet.mid_block1.block1.proj.bias', 'unet.mid_block1.block1.norm.weight', 'unet.mid_block1.block1.norm.bias', 'unet.mid_block1.block2.proj.weight', 'unet.mid_block1.block2.proj.bias', 'unet.mid_block1.block2.norm.weight', 'unet.mid_block1.block2.norm.bias', 'unet.mid_attn.fn.fn.to_qkv.weight', 'unet.mid_attn.fn.fn.to_out.weight', 'unet.mid_attn.fn.fn.to_out.bias', 'unet.mid_attn.fn.norm.g', 'unet.mid_block2.mlp.1.weight', 'unet.mid_block2.mlp.1.bias', 'unet.mid_block2.block1.proj.weight', 'unet.mid_block2.block1.proj.bias', 'unet.mid_block2.block1.norm.weight', 'unet.mid_block2.block1.norm.bias', 'unet.mid_block2.block2.proj.weight', 'unet.mid_block2.block2.proj.bias', 'unet.mid_block2.block2.norm.weight', 'unet.mid_block2.block2.norm.bias', 'unet.final_res_block.mlp.1.weight', 'unet.final_res_block.mlp.1.bias', 'unet.final_res_block.block1.proj.weight', 'unet.final_res_block.block1.proj.bias', 'unet.final_res_block.block1.norm.weight', 'unet.final_res_block.block1.norm.bias', 'unet.final_res_block.block2.proj.weight', 'unet.final_res_block.block2.proj.bias', 'unet.final_res_block.block2.norm.weight', 'unet.final_res_block.block2.norm.bias', 'unet.final_res_block.res_conv.weight', 'unet.final_res_block.res_conv.bias', 'unet.final_conv.weight', 'unet.final_conv.bias', 'conv_seg_new.weight', 'conv_seg_new.bias']
227
+ 2023-03-04 10:36:07,712 - mmseg - INFO - Parameters in decode_head freezed!
228
+ 2023-03-04 10:36:07,736 - mmseg - INFO - load checkpoint from local path: pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth
229
+ 2023-03-04 10:36:07,975 - mmseg - WARNING - The model and loaded state dict do not match exactly
230
+
231
+ unexpected key in source state_dict: decode_head.conv_seg.weight, decode_head.conv_seg.bias, decode_head.convs.0.conv.weight, decode_head.convs.0.bn.weight, decode_head.convs.0.bn.bias, decode_head.convs.0.bn.running_mean, decode_head.convs.0.bn.running_var, decode_head.convs.0.bn.num_batches_tracked, decode_head.convs.1.conv.weight, decode_head.convs.1.bn.weight, decode_head.convs.1.bn.bias, decode_head.convs.1.bn.running_mean, decode_head.convs.1.bn.running_var, decode_head.convs.1.bn.num_batches_tracked, decode_head.convs.2.conv.weight, decode_head.convs.2.bn.weight, decode_head.convs.2.bn.bias, decode_head.convs.2.bn.running_mean, decode_head.convs.2.bn.running_var, decode_head.convs.2.bn.num_batches_tracked, decode_head.convs.3.conv.weight, decode_head.convs.3.bn.weight, decode_head.convs.3.bn.bias, decode_head.convs.3.bn.running_mean, decode_head.convs.3.bn.running_var, decode_head.convs.3.bn.num_batches_tracked, decode_head.fusion_conv.conv.weight, decode_head.fusion_conv.bn.weight, decode_head.fusion_conv.bn.bias, decode_head.fusion_conv.bn.running_mean, decode_head.fusion_conv.bn.running_var, decode_head.fusion_conv.bn.num_batches_tracked
232
+
233
+ 2023-03-04 10:36:07,990 - mmseg - INFO - load checkpoint from local path: pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth
234
+ 2023-03-04 10:36:08,180 - mmseg - WARNING - The model and loaded state dict do not match exactly
235
+
236
+ unexpected key in source state_dict: backbone.layers.0.0.projection.weight, backbone.layers.0.0.projection.bias, backbone.layers.0.0.norm.weight, backbone.layers.0.0.norm.bias, backbone.layers.0.1.0.norm1.weight, backbone.layers.0.1.0.norm1.bias, backbone.layers.0.1.0.attn.attn.in_proj_weight, backbone.layers.0.1.0.attn.attn.in_proj_bias, backbone.layers.0.1.0.attn.attn.out_proj.weight, backbone.layers.0.1.0.attn.attn.out_proj.bias, backbone.layers.0.1.0.attn.sr.weight, backbone.layers.0.1.0.attn.sr.bias, backbone.layers.0.1.0.attn.norm.weight, backbone.layers.0.1.0.attn.norm.bias, backbone.layers.0.1.0.norm2.weight, backbone.layers.0.1.0.norm2.bias, backbone.layers.0.1.0.ffn.layers.0.weight, backbone.layers.0.1.0.ffn.layers.0.bias, backbone.layers.0.1.0.ffn.layers.1.weight, backbone.layers.0.1.0.ffn.layers.1.bias, backbone.layers.0.1.0.ffn.layers.4.weight, backbone.layers.0.1.0.ffn.layers.4.bias, backbone.layers.0.1.1.norm1.weight, backbone.layers.0.1.1.norm1.bias, backbone.layers.0.1.1.attn.attn.in_proj_weight, backbone.layers.0.1.1.attn.attn.in_proj_bias, backbone.layers.0.1.1.attn.attn.out_proj.weight, backbone.layers.0.1.1.attn.attn.out_proj.bias, backbone.layers.0.1.1.attn.sr.weight, backbone.layers.0.1.1.attn.sr.bias, backbone.layers.0.1.1.attn.norm.weight, backbone.layers.0.1.1.attn.norm.bias, backbone.layers.0.1.1.norm2.weight, backbone.layers.0.1.1.norm2.bias, backbone.layers.0.1.1.ffn.layers.0.weight, backbone.layers.0.1.1.ffn.layers.0.bias, backbone.layers.0.1.1.ffn.layers.1.weight, backbone.layers.0.1.1.ffn.layers.1.bias, backbone.layers.0.1.1.ffn.layers.4.weight, backbone.layers.0.1.1.ffn.layers.4.bias, backbone.layers.0.1.2.norm1.weight, backbone.layers.0.1.2.norm1.bias, backbone.layers.0.1.2.attn.attn.in_proj_weight, backbone.layers.0.1.2.attn.attn.in_proj_bias, backbone.layers.0.1.2.attn.attn.out_proj.weight, backbone.layers.0.1.2.attn.attn.out_proj.bias, backbone.layers.0.1.2.attn.sr.weight, backbone.layers.0.1.2.attn.sr.bias, backbone.layers.0.1.2.attn.norm.weight, backbone.layers.0.1.2.attn.norm.bias, backbone.layers.0.1.2.norm2.weight, backbone.layers.0.1.2.norm2.bias, backbone.layers.0.1.2.ffn.layers.0.weight, backbone.layers.0.1.2.ffn.layers.0.bias, backbone.layers.0.1.2.ffn.layers.1.weight, backbone.layers.0.1.2.ffn.layers.1.bias, backbone.layers.0.1.2.ffn.layers.4.weight, backbone.layers.0.1.2.ffn.layers.4.bias, backbone.layers.0.2.weight, backbone.layers.0.2.bias, backbone.layers.1.0.projection.weight, backbone.layers.1.0.projection.bias, backbone.layers.1.0.norm.weight, backbone.layers.1.0.norm.bias, backbone.layers.1.1.0.norm1.weight, backbone.layers.1.1.0.norm1.bias, backbone.layers.1.1.0.attn.attn.in_proj_weight, backbone.layers.1.1.0.attn.attn.in_proj_bias, backbone.layers.1.1.0.attn.attn.out_proj.weight, backbone.layers.1.1.0.attn.attn.out_proj.bias, backbone.layers.1.1.0.attn.sr.weight, backbone.layers.1.1.0.attn.sr.bias, backbone.layers.1.1.0.attn.norm.weight, backbone.layers.1.1.0.attn.norm.bias, backbone.layers.1.1.0.norm2.weight, backbone.layers.1.1.0.norm2.bias, backbone.layers.1.1.0.ffn.layers.0.weight, backbone.layers.1.1.0.ffn.layers.0.bias, backbone.layers.1.1.0.ffn.layers.1.weight, backbone.layers.1.1.0.ffn.layers.1.bias, backbone.layers.1.1.0.ffn.layers.4.weight, backbone.layers.1.1.0.ffn.layers.4.bias, backbone.layers.1.1.1.norm1.weight, backbone.layers.1.1.1.norm1.bias, backbone.layers.1.1.1.attn.attn.in_proj_weight, backbone.layers.1.1.1.attn.attn.in_proj_bias, backbone.layers.1.1.1.attn.attn.out_proj.weight, backbone.layers.1.1.1.attn.attn.out_proj.bias, backbone.layers.1.1.1.attn.sr.weight, backbone.layers.1.1.1.attn.sr.bias, backbone.layers.1.1.1.attn.norm.weight, backbone.layers.1.1.1.attn.norm.bias, backbone.layers.1.1.1.norm2.weight, backbone.layers.1.1.1.norm2.bias, backbone.layers.1.1.1.ffn.layers.0.weight, backbone.layers.1.1.1.ffn.layers.0.bias, backbone.layers.1.1.1.ffn.layers.1.weight, backbone.layers.1.1.1.ffn.layers.1.bias, backbone.layers.1.1.1.ffn.layers.4.weight, backbone.layers.1.1.1.ffn.layers.4.bias, backbone.layers.1.1.2.norm1.weight, backbone.layers.1.1.2.norm1.bias, backbone.layers.1.1.2.attn.attn.in_proj_weight, backbone.layers.1.1.2.attn.attn.in_proj_bias, backbone.layers.1.1.2.attn.attn.out_proj.weight, backbone.layers.1.1.2.attn.attn.out_proj.bias, backbone.layers.1.1.2.attn.sr.weight, backbone.layers.1.1.2.attn.sr.bias, backbone.layers.1.1.2.attn.norm.weight, backbone.layers.1.1.2.attn.norm.bias, backbone.layers.1.1.2.norm2.weight, backbone.layers.1.1.2.norm2.bias, backbone.layers.1.1.2.ffn.layers.0.weight, backbone.layers.1.1.2.ffn.layers.0.bias, backbone.layers.1.1.2.ffn.layers.1.weight, backbone.layers.1.1.2.ffn.layers.1.bias, backbone.layers.1.1.2.ffn.layers.4.weight, backbone.layers.1.1.2.ffn.layers.4.bias, backbone.layers.1.1.3.norm1.weight, backbone.layers.1.1.3.norm1.bias, backbone.layers.1.1.3.attn.attn.in_proj_weight, backbone.layers.1.1.3.attn.attn.in_proj_bias, backbone.layers.1.1.3.attn.attn.out_proj.weight, backbone.layers.1.1.3.attn.attn.out_proj.bias, backbone.layers.1.1.3.attn.sr.weight, backbone.layers.1.1.3.attn.sr.bias, backbone.layers.1.1.3.attn.norm.weight, backbone.layers.1.1.3.attn.norm.bias, backbone.layers.1.1.3.norm2.weight, backbone.layers.1.1.3.norm2.bias, backbone.layers.1.1.3.ffn.layers.0.weight, backbone.layers.1.1.3.ffn.layers.0.bias, backbone.layers.1.1.3.ffn.layers.1.weight, backbone.layers.1.1.3.ffn.layers.1.bias, backbone.layers.1.1.3.ffn.layers.4.weight, backbone.layers.1.1.3.ffn.layers.4.bias, backbone.layers.1.2.weight, backbone.layers.1.2.bias, backbone.layers.2.0.projection.weight, backbone.layers.2.0.projection.bias, backbone.layers.2.0.norm.weight, backbone.layers.2.0.norm.bias, backbone.layers.2.1.0.norm1.weight, backbone.layers.2.1.0.norm1.bias, backbone.layers.2.1.0.attn.attn.in_proj_weight, backbone.layers.2.1.0.attn.attn.in_proj_bias, backbone.layers.2.1.0.attn.attn.out_proj.weight, backbone.layers.2.1.0.attn.attn.out_proj.bias, backbone.layers.2.1.0.attn.sr.weight, backbone.layers.2.1.0.attn.sr.bias, backbone.layers.2.1.0.attn.norm.weight, backbone.layers.2.1.0.attn.norm.bias, backbone.layers.2.1.0.norm2.weight, backbone.layers.2.1.0.norm2.bias, backbone.layers.2.1.0.ffn.layers.0.weight, backbone.layers.2.1.0.ffn.layers.0.bias, backbone.layers.2.1.0.ffn.layers.1.weight, backbone.layers.2.1.0.ffn.layers.1.bias, backbone.layers.2.1.0.ffn.layers.4.weight, backbone.layers.2.1.0.ffn.layers.4.bias, backbone.layers.2.1.1.norm1.weight, backbone.layers.2.1.1.norm1.bias, backbone.layers.2.1.1.attn.attn.in_proj_weight, backbone.layers.2.1.1.attn.attn.in_proj_bias, backbone.layers.2.1.1.attn.attn.out_proj.weight, backbone.layers.2.1.1.attn.attn.out_proj.bias, backbone.layers.2.1.1.attn.sr.weight, backbone.layers.2.1.1.attn.sr.bias, backbone.layers.2.1.1.attn.norm.weight, backbone.layers.2.1.1.attn.norm.bias, backbone.layers.2.1.1.norm2.weight, backbone.layers.2.1.1.norm2.bias, backbone.layers.2.1.1.ffn.layers.0.weight, backbone.layers.2.1.1.ffn.layers.0.bias, backbone.layers.2.1.1.ffn.layers.1.weight, backbone.layers.2.1.1.ffn.layers.1.bias, backbone.layers.2.1.1.ffn.layers.4.weight, backbone.layers.2.1.1.ffn.layers.4.bias, backbone.layers.2.1.2.norm1.weight, backbone.layers.2.1.2.norm1.bias, backbone.layers.2.1.2.attn.attn.in_proj_weight, backbone.layers.2.1.2.attn.attn.in_proj_bias, backbone.layers.2.1.2.attn.attn.out_proj.weight, backbone.layers.2.1.2.attn.attn.out_proj.bias, backbone.layers.2.1.2.attn.sr.weight, backbone.layers.2.1.2.attn.sr.bias, backbone.layers.2.1.2.attn.norm.weight, backbone.layers.2.1.2.attn.norm.bias, backbone.layers.2.1.2.norm2.weight, backbone.layers.2.1.2.norm2.bias, backbone.layers.2.1.2.ffn.layers.0.weight, backbone.layers.2.1.2.ffn.layers.0.bias, backbone.layers.2.1.2.ffn.layers.1.weight, backbone.layers.2.1.2.ffn.layers.1.bias, backbone.layers.2.1.2.ffn.layers.4.weight, backbone.layers.2.1.2.ffn.layers.4.bias, backbone.layers.2.1.3.norm1.weight, backbone.layers.2.1.3.norm1.bias, backbone.layers.2.1.3.attn.attn.in_proj_weight, backbone.layers.2.1.3.attn.attn.in_proj_bias, backbone.layers.2.1.3.attn.attn.out_proj.weight, backbone.layers.2.1.3.attn.attn.out_proj.bias, backbone.layers.2.1.3.attn.sr.weight, backbone.layers.2.1.3.attn.sr.bias, backbone.layers.2.1.3.attn.norm.weight, backbone.layers.2.1.3.attn.norm.bias, backbone.layers.2.1.3.norm2.weight, backbone.layers.2.1.3.norm2.bias, backbone.layers.2.1.3.ffn.layers.0.weight, backbone.layers.2.1.3.ffn.layers.0.bias, backbone.layers.2.1.3.ffn.layers.1.weight, backbone.layers.2.1.3.ffn.layers.1.bias, backbone.layers.2.1.3.ffn.layers.4.weight, backbone.layers.2.1.3.ffn.layers.4.bias, backbone.layers.2.1.4.norm1.weight, backbone.layers.2.1.4.norm1.bias, backbone.layers.2.1.4.attn.attn.in_proj_weight, backbone.layers.2.1.4.attn.attn.in_proj_bias, backbone.layers.2.1.4.attn.attn.out_proj.weight, backbone.layers.2.1.4.attn.attn.out_proj.bias, backbone.layers.2.1.4.attn.sr.weight, backbone.layers.2.1.4.attn.sr.bias, backbone.layers.2.1.4.attn.norm.weight, backbone.layers.2.1.4.attn.norm.bias, backbone.layers.2.1.4.norm2.weight, backbone.layers.2.1.4.norm2.bias, backbone.layers.2.1.4.ffn.layers.0.weight, backbone.layers.2.1.4.ffn.layers.0.bias, backbone.layers.2.1.4.ffn.layers.1.weight, backbone.layers.2.1.4.ffn.layers.1.bias, backbone.layers.2.1.4.ffn.layers.4.weight, backbone.layers.2.1.4.ffn.layers.4.bias, backbone.layers.2.1.5.norm1.weight, backbone.layers.2.1.5.norm1.bias, backbone.layers.2.1.5.attn.attn.in_proj_weight, backbone.layers.2.1.5.attn.attn.in_proj_bias, backbone.layers.2.1.5.attn.attn.out_proj.weight, backbone.layers.2.1.5.attn.attn.out_proj.bias, backbone.layers.2.1.5.attn.sr.weight, backbone.layers.2.1.5.attn.sr.bias, backbone.layers.2.1.5.attn.norm.weight, backbone.layers.2.1.5.attn.norm.bias, backbone.layers.2.1.5.norm2.weight, backbone.layers.2.1.5.norm2.bias, backbone.layers.2.1.5.ffn.layers.0.weight, backbone.layers.2.1.5.ffn.layers.0.bias, backbone.layers.2.1.5.ffn.layers.1.weight, backbone.layers.2.1.5.ffn.layers.1.bias, backbone.layers.2.1.5.ffn.layers.4.weight, backbone.layers.2.1.5.ffn.layers.4.bias, backbone.layers.2.2.weight, backbone.layers.2.2.bias, backbone.layers.3.0.projection.weight, backbone.layers.3.0.projection.bias, backbone.layers.3.0.norm.weight, backbone.layers.3.0.norm.bias, backbone.layers.3.1.0.norm1.weight, backbone.layers.3.1.0.norm1.bias, backbone.layers.3.1.0.attn.attn.in_proj_weight, backbone.layers.3.1.0.attn.attn.in_proj_bias, backbone.layers.3.1.0.attn.attn.out_proj.weight, backbone.layers.3.1.0.attn.attn.out_proj.bias, backbone.layers.3.1.0.norm2.weight, backbone.layers.3.1.0.norm2.bias, backbone.layers.3.1.0.ffn.layers.0.weight, backbone.layers.3.1.0.ffn.layers.0.bias, backbone.layers.3.1.0.ffn.layers.1.weight, backbone.layers.3.1.0.ffn.layers.1.bias, backbone.layers.3.1.0.ffn.layers.4.weight, backbone.layers.3.1.0.ffn.layers.4.bias, backbone.layers.3.1.1.norm1.weight, backbone.layers.3.1.1.norm1.bias, backbone.layers.3.1.1.attn.attn.in_proj_weight, backbone.layers.3.1.1.attn.attn.in_proj_bias, backbone.layers.3.1.1.attn.attn.out_proj.weight, backbone.layers.3.1.1.attn.attn.out_proj.bias, backbone.layers.3.1.1.norm2.weight, backbone.layers.3.1.1.norm2.bias, backbone.layers.3.1.1.ffn.layers.0.weight, backbone.layers.3.1.1.ffn.layers.0.bias, backbone.layers.3.1.1.ffn.layers.1.weight, backbone.layers.3.1.1.ffn.layers.1.bias, backbone.layers.3.1.1.ffn.layers.4.weight, backbone.layers.3.1.1.ffn.layers.4.bias, backbone.layers.3.1.2.norm1.weight, backbone.layers.3.1.2.norm1.bias, backbone.layers.3.1.2.attn.attn.in_proj_weight, backbone.layers.3.1.2.attn.attn.in_proj_bias, backbone.layers.3.1.2.attn.attn.out_proj.weight, backbone.layers.3.1.2.attn.attn.out_proj.bias, backbone.layers.3.1.2.norm2.weight, backbone.layers.3.1.2.norm2.bias, backbone.layers.3.1.2.ffn.layers.0.weight, backbone.layers.3.1.2.ffn.layers.0.bias, backbone.layers.3.1.2.ffn.layers.1.weight, backbone.layers.3.1.2.ffn.layers.1.bias, backbone.layers.3.1.2.ffn.layers.4.weight, backbone.layers.3.1.2.ffn.layers.4.bias, backbone.layers.3.2.weight, backbone.layers.3.2.bias
237
+
238
+ missing keys in source state_dict: unet.init_conv.weight, unet.init_conv.bias, unet.time_mlp.1.weight, unet.time_mlp.1.bias, unet.time_mlp.3.weight, unet.time_mlp.3.bias, unet.downs.0.0.mlp.1.weight, unet.downs.0.0.mlp.1.bias, unet.downs.0.0.block1.proj.weight, unet.downs.0.0.block1.proj.bias, unet.downs.0.0.block1.norm.weight, unet.downs.0.0.block1.norm.bias, unet.downs.0.0.block2.proj.weight, unet.downs.0.0.block2.proj.bias, unet.downs.0.0.block2.norm.weight, unet.downs.0.0.block2.norm.bias, unet.downs.0.1.mlp.1.weight, unet.downs.0.1.mlp.1.bias, unet.downs.0.1.block1.proj.weight, unet.downs.0.1.block1.proj.bias, unet.downs.0.1.block1.norm.weight, unet.downs.0.1.block1.norm.bias, unet.downs.0.1.block2.proj.weight, unet.downs.0.1.block2.proj.bias, unet.downs.0.1.block2.norm.weight, unet.downs.0.1.block2.norm.bias, unet.downs.0.2.fn.fn.to_qkv.weight, unet.downs.0.2.fn.fn.to_out.0.weight, unet.downs.0.2.fn.fn.to_out.0.bias, unet.downs.0.2.fn.fn.to_out.1.g, unet.downs.0.2.fn.norm.g, unet.downs.0.3.weight, unet.downs.0.3.bias, unet.downs.1.0.mlp.1.weight, unet.downs.1.0.mlp.1.bias, unet.downs.1.0.block1.proj.weight, unet.downs.1.0.block1.proj.bias, unet.downs.1.0.block1.norm.weight, unet.downs.1.0.block1.norm.bias, unet.downs.1.0.block2.proj.weight, unet.downs.1.0.block2.proj.bias, unet.downs.1.0.block2.norm.weight, unet.downs.1.0.block2.norm.bias, unet.downs.1.1.mlp.1.weight, unet.downs.1.1.mlp.1.bias, unet.downs.1.1.block1.proj.weight, unet.downs.1.1.block1.proj.bias, unet.downs.1.1.block1.norm.weight, unet.downs.1.1.block1.norm.bias, unet.downs.1.1.block2.proj.weight, unet.downs.1.1.block2.proj.bias, unet.downs.1.1.block2.norm.weight, unet.downs.1.1.block2.norm.bias, unet.downs.1.2.fn.fn.to_qkv.weight, unet.downs.1.2.fn.fn.to_out.0.weight, unet.downs.1.2.fn.fn.to_out.0.bias, unet.downs.1.2.fn.fn.to_out.1.g, unet.downs.1.2.fn.norm.g, unet.downs.1.3.weight, unet.downs.1.3.bias, unet.downs.2.0.mlp.1.weight, unet.downs.2.0.mlp.1.bias, unet.downs.2.0.block1.proj.weight, unet.downs.2.0.block1.proj.bias, unet.downs.2.0.block1.norm.weight, unet.downs.2.0.block1.norm.bias, unet.downs.2.0.block2.proj.weight, unet.downs.2.0.block2.proj.bias, unet.downs.2.0.block2.norm.weight, unet.downs.2.0.block2.norm.bias, unet.downs.2.1.mlp.1.weight, unet.downs.2.1.mlp.1.bias, unet.downs.2.1.block1.proj.weight, unet.downs.2.1.block1.proj.bias, unet.downs.2.1.block1.norm.weight, unet.downs.2.1.block1.norm.bias, unet.downs.2.1.block2.proj.weight, unet.downs.2.1.block2.proj.bias, unet.downs.2.1.block2.norm.weight, unet.downs.2.1.block2.norm.bias, unet.downs.2.2.fn.fn.to_qkv.weight, unet.downs.2.2.fn.fn.to_out.0.weight, unet.downs.2.2.fn.fn.to_out.0.bias, unet.downs.2.2.fn.fn.to_out.1.g, unet.downs.2.2.fn.norm.g, unet.downs.2.3.weight, unet.downs.2.3.bias, unet.ups.0.0.mlp.1.weight, unet.ups.0.0.mlp.1.bias, unet.ups.0.0.block1.proj.weight, unet.ups.0.0.block1.proj.bias, unet.ups.0.0.block1.norm.weight, unet.ups.0.0.block1.norm.bias, unet.ups.0.0.block2.proj.weight, unet.ups.0.0.block2.proj.bias, unet.ups.0.0.block2.norm.weight, unet.ups.0.0.block2.norm.bias, unet.ups.0.0.res_conv.weight, unet.ups.0.0.res_conv.bias, unet.ups.0.1.mlp.1.weight, unet.ups.0.1.mlp.1.bias, unet.ups.0.1.block1.proj.weight, unet.ups.0.1.block1.proj.bias, unet.ups.0.1.block1.norm.weight, unet.ups.0.1.block1.norm.bias, unet.ups.0.1.block2.proj.weight, unet.ups.0.1.block2.proj.bias, unet.ups.0.1.block2.norm.weight, unet.ups.0.1.block2.norm.bias, unet.ups.0.1.res_conv.weight, unet.ups.0.1.res_conv.bias, unet.ups.0.2.fn.fn.to_qkv.weight, unet.ups.0.2.fn.fn.to_out.0.weight, unet.ups.0.2.fn.fn.to_out.0.bias, unet.ups.0.2.fn.fn.to_out.1.g, unet.ups.0.2.fn.norm.g, unet.ups.0.3.1.weight, unet.ups.0.3.1.bias, unet.ups.1.0.mlp.1.weight, unet.ups.1.0.mlp.1.bias, unet.ups.1.0.block1.proj.weight, unet.ups.1.0.block1.proj.bias, unet.ups.1.0.block1.norm.weight, unet.ups.1.0.block1.norm.bias, unet.ups.1.0.block2.proj.weight, unet.ups.1.0.block2.proj.bias, unet.ups.1.0.block2.norm.weight, unet.ups.1.0.block2.norm.bias, unet.ups.1.0.res_conv.weight, unet.ups.1.0.res_conv.bias, unet.ups.1.1.mlp.1.weight, unet.ups.1.1.mlp.1.bias, unet.ups.1.1.block1.proj.weight, unet.ups.1.1.block1.proj.bias, unet.ups.1.1.block1.norm.weight, unet.ups.1.1.block1.norm.bias, unet.ups.1.1.block2.proj.weight, unet.ups.1.1.block2.proj.bias, unet.ups.1.1.block2.norm.weight, unet.ups.1.1.block2.norm.bias, unet.ups.1.1.res_conv.weight, unet.ups.1.1.res_conv.bias, unet.ups.1.2.fn.fn.to_qkv.weight, unet.ups.1.2.fn.fn.to_out.0.weight, unet.ups.1.2.fn.fn.to_out.0.bias, unet.ups.1.2.fn.fn.to_out.1.g, unet.ups.1.2.fn.norm.g, unet.ups.1.3.1.weight, unet.ups.1.3.1.bias, unet.ups.2.0.mlp.1.weight, unet.ups.2.0.mlp.1.bias, unet.ups.2.0.block1.proj.weight, unet.ups.2.0.block1.proj.bias, unet.ups.2.0.block1.norm.weight, unet.ups.2.0.block1.norm.bias, unet.ups.2.0.block2.proj.weight, unet.ups.2.0.block2.proj.bias, unet.ups.2.0.block2.norm.weight, unet.ups.2.0.block2.norm.bias, unet.ups.2.0.res_conv.weight, unet.ups.2.0.res_conv.bias, unet.ups.2.1.mlp.1.weight, unet.ups.2.1.mlp.1.bias, unet.ups.2.1.block1.proj.weight, unet.ups.2.1.block1.proj.bias, unet.ups.2.1.block1.norm.weight, unet.ups.2.1.block1.norm.bias, unet.ups.2.1.block2.proj.weight, unet.ups.2.1.block2.proj.bias, unet.ups.2.1.block2.norm.weight, unet.ups.2.1.block2.norm.bias, unet.ups.2.1.res_conv.weight, unet.ups.2.1.res_conv.bias, unet.ups.2.2.fn.fn.to_qkv.weight, unet.ups.2.2.fn.fn.to_out.0.weight, unet.ups.2.2.fn.fn.to_out.0.bias, unet.ups.2.2.fn.fn.to_out.1.g, unet.ups.2.2.fn.norm.g, unet.ups.2.3.weight, unet.ups.2.3.bias, unet.mid_block1.mlp.1.weight, unet.mid_block1.mlp.1.bias, unet.mid_block1.block1.proj.weight, unet.mid_block1.block1.proj.bias, unet.mid_block1.block1.norm.weight, unet.mid_block1.block1.norm.bias, unet.mid_block1.block2.proj.weight, unet.mid_block1.block2.proj.bias, unet.mid_block1.block2.norm.weight, unet.mid_block1.block2.norm.bias, unet.mid_attn.fn.fn.to_qkv.weight, unet.mid_attn.fn.fn.to_out.weight, unet.mid_attn.fn.fn.to_out.bias, unet.mid_attn.fn.norm.g, unet.mid_block2.mlp.1.weight, unet.mid_block2.mlp.1.bias, unet.mid_block2.block1.proj.weight, unet.mid_block2.block1.proj.bias, unet.mid_block2.block1.norm.weight, unet.mid_block2.block1.norm.bias, unet.mid_block2.block2.proj.weight, unet.mid_block2.block2.proj.bias, unet.mid_block2.block2.norm.weight, unet.mid_block2.block2.norm.bias, unet.final_res_block.mlp.1.weight, unet.final_res_block.mlp.1.bias, unet.final_res_block.block1.proj.weight, unet.final_res_block.block1.proj.bias, unet.final_res_block.block1.norm.weight, unet.final_res_block.block1.norm.bias, unet.final_res_block.block2.proj.weight, unet.final_res_block.block2.proj.bias, unet.final_res_block.block2.norm.weight, unet.final_res_block.block2.norm.bias, unet.final_res_block.res_conv.weight, unet.final_res_block.res_conv.bias, unet.final_conv.weight, unet.final_conv.bias, conv_seg_new.weight, conv_seg_new.bias, embed.weight
239
+
240
+ 2023-03-04 10:36:08,206 - mmseg - INFO - EncoderDecoderFreeze(
241
+ (backbone): MixVisionTransformerCustomInitWeights(
242
+ (layers): ModuleList(
243
+ (0): ModuleList(
244
+ (0): PatchEmbed(
245
+ (projection): Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
246
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
247
+ )
248
+ (1): ModuleList(
249
+ (0): TransformerEncoderLayer(
250
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
251
+ (attn): EfficientMultiheadAttention(
252
+ (attn): MultiheadAttention(
253
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
254
+ )
255
+ (proj_drop): Dropout(p=0.0, inplace=False)
256
+ (dropout_layer): DropPath()
257
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
258
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
259
+ )
260
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
261
+ (ffn): MixFFN(
262
+ (activate): GELU(approximate='none')
263
+ (layers): Sequential(
264
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
265
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
266
+ (2): GELU(approximate='none')
267
+ (3): Dropout(p=0.0, inplace=False)
268
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
269
+ (5): Dropout(p=0.0, inplace=False)
270
+ )
271
+ (dropout_layer): DropPath()
272
+ )
273
+ )
274
+ (1): TransformerEncoderLayer(
275
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
276
+ (attn): EfficientMultiheadAttention(
277
+ (attn): MultiheadAttention(
278
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
279
+ )
280
+ (proj_drop): Dropout(p=0.0, inplace=False)
281
+ (dropout_layer): DropPath()
282
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
283
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
284
+ )
285
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
286
+ (ffn): MixFFN(
287
+ (activate): GELU(approximate='none')
288
+ (layers): Sequential(
289
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
290
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
291
+ (2): GELU(approximate='none')
292
+ (3): Dropout(p=0.0, inplace=False)
293
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
294
+ (5): Dropout(p=0.0, inplace=False)
295
+ )
296
+ (dropout_layer): DropPath()
297
+ )
298
+ )
299
+ (2): TransformerEncoderLayer(
300
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
301
+ (attn): EfficientMultiheadAttention(
302
+ (attn): MultiheadAttention(
303
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
304
+ )
305
+ (proj_drop): Dropout(p=0.0, inplace=False)
306
+ (dropout_layer): DropPath()
307
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
308
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
309
+ )
310
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
311
+ (ffn): MixFFN(
312
+ (activate): GELU(approximate='none')
313
+ (layers): Sequential(
314
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
315
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
316
+ (2): GELU(approximate='none')
317
+ (3): Dropout(p=0.0, inplace=False)
318
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
319
+ (5): Dropout(p=0.0, inplace=False)
320
+ )
321
+ (dropout_layer): DropPath()
322
+ )
323
+ )
324
+ )
325
+ (2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
326
+ )
327
+ (1): ModuleList(
328
+ (0): PatchEmbed(
329
+ (projection): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
330
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
331
+ )
332
+ (1): ModuleList(
333
+ (0): TransformerEncoderLayer(
334
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
335
+ (attn): EfficientMultiheadAttention(
336
+ (attn): MultiheadAttention(
337
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
338
+ )
339
+ (proj_drop): Dropout(p=0.0, inplace=False)
340
+ (dropout_layer): DropPath()
341
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
342
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
343
+ )
344
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
345
+ (ffn): MixFFN(
346
+ (activate): GELU(approximate='none')
347
+ (layers): Sequential(
348
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
349
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
350
+ (2): GELU(approximate='none')
351
+ (3): Dropout(p=0.0, inplace=False)
352
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
353
+ (5): Dropout(p=0.0, inplace=False)
354
+ )
355
+ (dropout_layer): DropPath()
356
+ )
357
+ )
358
+ (1): TransformerEncoderLayer(
359
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
360
+ (attn): EfficientMultiheadAttention(
361
+ (attn): MultiheadAttention(
362
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
363
+ )
364
+ (proj_drop): Dropout(p=0.0, inplace=False)
365
+ (dropout_layer): DropPath()
366
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
367
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
368
+ )
369
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
370
+ (ffn): MixFFN(
371
+ (activate): GELU(approximate='none')
372
+ (layers): Sequential(
373
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
374
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
375
+ (2): GELU(approximate='none')
376
+ (3): Dropout(p=0.0, inplace=False)
377
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
378
+ (5): Dropout(p=0.0, inplace=False)
379
+ )
380
+ (dropout_layer): DropPath()
381
+ )
382
+ )
383
+ (2): TransformerEncoderLayer(
384
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
385
+ (attn): EfficientMultiheadAttention(
386
+ (attn): MultiheadAttention(
387
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
388
+ )
389
+ (proj_drop): Dropout(p=0.0, inplace=False)
390
+ (dropout_layer): DropPath()
391
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
392
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
393
+ )
394
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
395
+ (ffn): MixFFN(
396
+ (activate): GELU(approximate='none')
397
+ (layers): Sequential(
398
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
399
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
400
+ (2): GELU(approximate='none')
401
+ (3): Dropout(p=0.0, inplace=False)
402
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
403
+ (5): Dropout(p=0.0, inplace=False)
404
+ )
405
+ (dropout_layer): DropPath()
406
+ )
407
+ )
408
+ (3): TransformerEncoderLayer(
409
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
410
+ (attn): EfficientMultiheadAttention(
411
+ (attn): MultiheadAttention(
412
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
413
+ )
414
+ (proj_drop): Dropout(p=0.0, inplace=False)
415
+ (dropout_layer): DropPath()
416
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
417
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
418
+ )
419
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
420
+ (ffn): MixFFN(
421
+ (activate): GELU(approximate='none')
422
+ (layers): Sequential(
423
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
424
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
425
+ (2): GELU(approximate='none')
426
+ (3): Dropout(p=0.0, inplace=False)
427
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
428
+ (5): Dropout(p=0.0, inplace=False)
429
+ )
430
+ (dropout_layer): DropPath()
431
+ )
432
+ )
433
+ )
434
+ (2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
435
+ )
436
+ (2): ModuleList(
437
+ (0): PatchEmbed(
438
+ (projection): Conv2d(128, 320, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
439
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
440
+ )
441
+ (1): ModuleList(
442
+ (0): TransformerEncoderLayer(
443
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
444
+ (attn): EfficientMultiheadAttention(
445
+ (attn): MultiheadAttention(
446
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
447
+ )
448
+ (proj_drop): Dropout(p=0.0, inplace=False)
449
+ (dropout_layer): DropPath()
450
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
451
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
452
+ )
453
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
454
+ (ffn): MixFFN(
455
+ (activate): GELU(approximate='none')
456
+ (layers): Sequential(
457
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
458
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
459
+ (2): GELU(approximate='none')
460
+ (3): Dropout(p=0.0, inplace=False)
461
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
462
+ (5): Dropout(p=0.0, inplace=False)
463
+ )
464
+ (dropout_layer): DropPath()
465
+ )
466
+ )
467
+ (1): TransformerEncoderLayer(
468
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
469
+ (attn): EfficientMultiheadAttention(
470
+ (attn): MultiheadAttention(
471
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
472
+ )
473
+ (proj_drop): Dropout(p=0.0, inplace=False)
474
+ (dropout_layer): DropPath()
475
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
476
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
477
+ )
478
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
479
+ (ffn): MixFFN(
480
+ (activate): GELU(approximate='none')
481
+ (layers): Sequential(
482
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
483
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
484
+ (2): GELU(approximate='none')
485
+ (3): Dropout(p=0.0, inplace=False)
486
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
487
+ (5): Dropout(p=0.0, inplace=False)
488
+ )
489
+ (dropout_layer): DropPath()
490
+ )
491
+ )
492
+ (2): TransformerEncoderLayer(
493
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
494
+ (attn): EfficientMultiheadAttention(
495
+ (attn): MultiheadAttention(
496
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
497
+ )
498
+ (proj_drop): Dropout(p=0.0, inplace=False)
499
+ (dropout_layer): DropPath()
500
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
501
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
502
+ )
503
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
504
+ (ffn): MixFFN(
505
+ (activate): GELU(approximate='none')
506
+ (layers): Sequential(
507
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
508
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
509
+ (2): GELU(approximate='none')
510
+ (3): Dropout(p=0.0, inplace=False)
511
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
512
+ (5): Dropout(p=0.0, inplace=False)
513
+ )
514
+ (dropout_layer): DropPath()
515
+ )
516
+ )
517
+ (3): TransformerEncoderLayer(
518
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
519
+ (attn): EfficientMultiheadAttention(
520
+ (attn): MultiheadAttention(
521
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
522
+ )
523
+ (proj_drop): Dropout(p=0.0, inplace=False)
524
+ (dropout_layer): DropPath()
525
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
526
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
527
+ )
528
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
529
+ (ffn): MixFFN(
530
+ (activate): GELU(approximate='none')
531
+ (layers): Sequential(
532
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
533
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
534
+ (2): GELU(approximate='none')
535
+ (3): Dropout(p=0.0, inplace=False)
536
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
537
+ (5): Dropout(p=0.0, inplace=False)
538
+ )
539
+ (dropout_layer): DropPath()
540
+ )
541
+ )
542
+ (4): TransformerEncoderLayer(
543
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
544
+ (attn): EfficientMultiheadAttention(
545
+ (attn): MultiheadAttention(
546
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
547
+ )
548
+ (proj_drop): Dropout(p=0.0, inplace=False)
549
+ (dropout_layer): DropPath()
550
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
551
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
552
+ )
553
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
554
+ (ffn): MixFFN(
555
+ (activate): GELU(approximate='none')
556
+ (layers): Sequential(
557
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
558
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
559
+ (2): GELU(approximate='none')
560
+ (3): Dropout(p=0.0, inplace=False)
561
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
562
+ (5): Dropout(p=0.0, inplace=False)
563
+ )
564
+ (dropout_layer): DropPath()
565
+ )
566
+ )
567
+ (5): TransformerEncoderLayer(
568
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
569
+ (attn): EfficientMultiheadAttention(
570
+ (attn): MultiheadAttention(
571
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
572
+ )
573
+ (proj_drop): Dropout(p=0.0, inplace=False)
574
+ (dropout_layer): DropPath()
575
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
576
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
577
+ )
578
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
579
+ (ffn): MixFFN(
580
+ (activate): GELU(approximate='none')
581
+ (layers): Sequential(
582
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
583
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
584
+ (2): GELU(approximate='none')
585
+ (3): Dropout(p=0.0, inplace=False)
586
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
587
+ (5): Dropout(p=0.0, inplace=False)
588
+ )
589
+ (dropout_layer): DropPath()
590
+ )
591
+ )
592
+ )
593
+ (2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
594
+ )
595
+ (3): ModuleList(
596
+ (0): PatchEmbed(
597
+ (projection): Conv2d(320, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
598
+ (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
599
+ )
600
+ (1): ModuleList(
601
+ (0): TransformerEncoderLayer(
602
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
603
+ (attn): EfficientMultiheadAttention(
604
+ (attn): MultiheadAttention(
605
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
606
+ )
607
+ (proj_drop): Dropout(p=0.0, inplace=False)
608
+ (dropout_layer): DropPath()
609
+ )
610
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
611
+ (ffn): MixFFN(
612
+ (activate): GELU(approximate='none')
613
+ (layers): Sequential(
614
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
615
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
616
+ (2): GELU(approximate='none')
617
+ (3): Dropout(p=0.0, inplace=False)
618
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
619
+ (5): Dropout(p=0.0, inplace=False)
620
+ )
621
+ (dropout_layer): DropPath()
622
+ )
623
+ )
624
+ (1): TransformerEncoderLayer(
625
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
626
+ (attn): EfficientMultiheadAttention(
627
+ (attn): MultiheadAttention(
628
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
629
+ )
630
+ (proj_drop): Dropout(p=0.0, inplace=False)
631
+ (dropout_layer): DropPath()
632
+ )
633
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
634
+ (ffn): MixFFN(
635
+ (activate): GELU(approximate='none')
636
+ (layers): Sequential(
637
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
638
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
639
+ (2): GELU(approximate='none')
640
+ (3): Dropout(p=0.0, inplace=False)
641
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
642
+ (5): Dropout(p=0.0, inplace=False)
643
+ )
644
+ (dropout_layer): DropPath()
645
+ )
646
+ )
647
+ (2): TransformerEncoderLayer(
648
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
649
+ (attn): EfficientMultiheadAttention(
650
+ (attn): MultiheadAttention(
651
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
652
+ )
653
+ (proj_drop): Dropout(p=0.0, inplace=False)
654
+ (dropout_layer): DropPath()
655
+ )
656
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
657
+ (ffn): MixFFN(
658
+ (activate): GELU(approximate='none')
659
+ (layers): Sequential(
660
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
661
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
662
+ (2): GELU(approximate='none')
663
+ (3): Dropout(p=0.0, inplace=False)
664
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
665
+ (5): Dropout(p=0.0, inplace=False)
666
+ )
667
+ (dropout_layer): DropPath()
668
+ )
669
+ )
670
+ )
671
+ (2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
672
+ )
673
+ )
674
+ )
675
+ init_cfg={'type': 'Pretrained', 'checkpoint': 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'}
676
+ (decode_head): SegformerHeadUnetFCHeadSingleStepMask(
677
+ input_transform=multiple_select, ignore_index=0, align_corners=False
678
+ (loss_decode): CrossEntropyLoss(avg_non_ignore=False)
679
+ (conv_seg): None
680
+ (dropout): Dropout2d(p=0.1, inplace=False)
681
+ (convs): ModuleList(
682
+ (0): ConvModule(
683
+ (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
684
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
685
+ (activate): ReLU(inplace=True)
686
+ )
687
+ (1): ConvModule(
688
+ (conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
689
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
690
+ (activate): ReLU(inplace=True)
691
+ )
692
+ (2): ConvModule(
693
+ (conv): Conv2d(320, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
694
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
695
+ (activate): ReLU(inplace=True)
696
+ )
697
+ (3): ConvModule(
698
+ (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
699
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
700
+ (activate): ReLU(inplace=True)
701
+ )
702
+ )
703
+ (fusion_conv): ConvModule(
704
+ (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
705
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
706
+ (activate): ReLU(inplace=True)
707
+ )
708
+ (unet): Unet(
709
+ (init_conv): Conv2d(272, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
710
+ (time_mlp): Sequential(
711
+ (0): SinusoidalPosEmb()
712
+ (1): Linear(in_features=128, out_features=512, bias=True)
713
+ (2): GELU(approximate='none')
714
+ (3): Linear(in_features=512, out_features=512, bias=True)
715
+ )
716
+ (downs): ModuleList(
717
+ (0): ModuleList(
718
+ (0): ResnetBlock(
719
+ (mlp): Sequential(
720
+ (0): SiLU()
721
+ (1): Linear(in_features=512, out_features=256, bias=True)
722
+ )
723
+ (block1): Block(
724
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
725
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
726
+ (act): SiLU()
727
+ )
728
+ (block2): Block(
729
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
730
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
731
+ (act): SiLU()
732
+ )
733
+ (res_conv): Identity()
734
+ )
735
+ (1): ResnetBlock(
736
+ (mlp): Sequential(
737
+ (0): SiLU()
738
+ (1): Linear(in_features=512, out_features=256, bias=True)
739
+ )
740
+ (block1): Block(
741
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
742
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
743
+ (act): SiLU()
744
+ )
745
+ (block2): Block(
746
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
747
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
748
+ (act): SiLU()
749
+ )
750
+ (res_conv): Identity()
751
+ )
752
+ (2): Residual(
753
+ (fn): PreNorm(
754
+ (fn): LinearAttention(
755
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
756
+ (to_out): Sequential(
757
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
758
+ (1): LayerNorm()
759
+ )
760
+ )
761
+ (norm): LayerNorm()
762
+ )
763
+ )
764
+ (3): Conv2d(128, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
765
+ )
766
+ (1): ModuleList(
767
+ (0): ResnetBlock(
768
+ (mlp): Sequential(
769
+ (0): SiLU()
770
+ (1): Linear(in_features=512, out_features=256, bias=True)
771
+ )
772
+ (block1): Block(
773
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
774
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
775
+ (act): SiLU()
776
+ )
777
+ (block2): Block(
778
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
779
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
780
+ (act): SiLU()
781
+ )
782
+ (res_conv): Identity()
783
+ )
784
+ (1): ResnetBlock(
785
+ (mlp): Sequential(
786
+ (0): SiLU()
787
+ (1): Linear(in_features=512, out_features=256, bias=True)
788
+ )
789
+ (block1): Block(
790
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
791
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
792
+ (act): SiLU()
793
+ )
794
+ (block2): Block(
795
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
796
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
797
+ (act): SiLU()
798
+ )
799
+ (res_conv): Identity()
800
+ )
801
+ (2): Residual(
802
+ (fn): PreNorm(
803
+ (fn): LinearAttention(
804
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
805
+ (to_out): Sequential(
806
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
807
+ (1): LayerNorm()
808
+ )
809
+ )
810
+ (norm): LayerNorm()
811
+ )
812
+ )
813
+ (3): Conv2d(128, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
814
+ )
815
+ (2): ModuleList(
816
+ (0): ResnetBlock(
817
+ (mlp): Sequential(
818
+ (0): SiLU()
819
+ (1): Linear(in_features=512, out_features=256, bias=True)
820
+ )
821
+ (block1): Block(
822
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
823
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
824
+ (act): SiLU()
825
+ )
826
+ (block2): Block(
827
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
828
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
829
+ (act): SiLU()
830
+ )
831
+ (res_conv): Identity()
832
+ )
833
+ (1): ResnetBlock(
834
+ (mlp): Sequential(
835
+ (0): SiLU()
836
+ (1): Linear(in_features=512, out_features=256, bias=True)
837
+ )
838
+ (block1): Block(
839
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
840
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
841
+ (act): SiLU()
842
+ )
843
+ (block2): Block(
844
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
845
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
846
+ (act): SiLU()
847
+ )
848
+ (res_conv): Identity()
849
+ )
850
+ (2): Residual(
851
+ (fn): PreNorm(
852
+ (fn): LinearAttention(
853
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
854
+ (to_out): Sequential(
855
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
856
+ (1): LayerNorm()
857
+ )
858
+ )
859
+ (norm): LayerNorm()
860
+ )
861
+ )
862
+ (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
863
+ )
864
+ )
865
+ (ups): ModuleList(
866
+ (0): ModuleList(
867
+ (0): ResnetBlock(
868
+ (mlp): Sequential(
869
+ (0): SiLU()
870
+ (1): Linear(in_features=512, out_features=256, bias=True)
871
+ )
872
+ (block1): Block(
873
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
874
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
875
+ (act): SiLU()
876
+ )
877
+ (block2): Block(
878
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
879
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
880
+ (act): SiLU()
881
+ )
882
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
883
+ )
884
+ (1): ResnetBlock(
885
+ (mlp): Sequential(
886
+ (0): SiLU()
887
+ (1): Linear(in_features=512, out_features=256, bias=True)
888
+ )
889
+ (block1): Block(
890
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
891
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
892
+ (act): SiLU()
893
+ )
894
+ (block2): Block(
895
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
896
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
897
+ (act): SiLU()
898
+ )
899
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
900
+ )
901
+ (2): Residual(
902
+ (fn): PreNorm(
903
+ (fn): LinearAttention(
904
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
905
+ (to_out): Sequential(
906
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
907
+ (1): LayerNorm()
908
+ )
909
+ )
910
+ (norm): LayerNorm()
911
+ )
912
+ )
913
+ (3): Sequential(
914
+ (0): Upsample(scale_factor=2.0, mode=nearest)
915
+ (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
916
+ )
917
+ )
918
+ (1): ModuleList(
919
+ (0): ResnetBlock(
920
+ (mlp): Sequential(
921
+ (0): SiLU()
922
+ (1): Linear(in_features=512, out_features=256, bias=True)
923
+ )
924
+ (block1): Block(
925
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
926
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
927
+ (act): SiLU()
928
+ )
929
+ (block2): Block(
930
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
931
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
932
+ (act): SiLU()
933
+ )
934
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
935
+ )
936
+ (1): ResnetBlock(
937
+ (mlp): Sequential(
938
+ (0): SiLU()
939
+ (1): Linear(in_features=512, out_features=256, bias=True)
940
+ )
941
+ (block1): Block(
942
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
943
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
944
+ (act): SiLU()
945
+ )
946
+ (block2): Block(
947
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
948
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
949
+ (act): SiLU()
950
+ )
951
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
952
+ )
953
+ (2): Residual(
954
+ (fn): PreNorm(
955
+ (fn): LinearAttention(
956
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
957
+ (to_out): Sequential(
958
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
959
+ (1): LayerNorm()
960
+ )
961
+ )
962
+ (norm): LayerNorm()
963
+ )
964
+ )
965
+ (3): Sequential(
966
+ (0): Upsample(scale_factor=2.0, mode=nearest)
967
+ (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
968
+ )
969
+ )
970
+ (2): ModuleList(
971
+ (0): ResnetBlock(
972
+ (mlp): Sequential(
973
+ (0): SiLU()
974
+ (1): Linear(in_features=512, out_features=256, bias=True)
975
+ )
976
+ (block1): Block(
977
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
978
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
979
+ (act): SiLU()
980
+ )
981
+ (block2): Block(
982
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
983
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
984
+ (act): SiLU()
985
+ )
986
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
987
+ )
988
+ (1): ResnetBlock(
989
+ (mlp): Sequential(
990
+ (0): SiLU()
991
+ (1): Linear(in_features=512, out_features=256, bias=True)
992
+ )
993
+ (block1): Block(
994
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
995
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
996
+ (act): SiLU()
997
+ )
998
+ (block2): Block(
999
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1000
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1001
+ (act): SiLU()
1002
+ )
1003
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
1004
+ )
1005
+ (2): Residual(
1006
+ (fn): PreNorm(
1007
+ (fn): LinearAttention(
1008
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1009
+ (to_out): Sequential(
1010
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
1011
+ (1): LayerNorm()
1012
+ )
1013
+ )
1014
+ (norm): LayerNorm()
1015
+ )
1016
+ )
1017
+ (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1018
+ )
1019
+ )
1020
+ (mid_block1): ResnetBlock(
1021
+ (mlp): Sequential(
1022
+ (0): SiLU()
1023
+ (1): Linear(in_features=512, out_features=256, bias=True)
1024
+ )
1025
+ (block1): Block(
1026
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1027
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1028
+ (act): SiLU()
1029
+ )
1030
+ (block2): Block(
1031
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1032
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1033
+ (act): SiLU()
1034
+ )
1035
+ (res_conv): Identity()
1036
+ )
1037
+ (mid_attn): Residual(
1038
+ (fn): PreNorm(
1039
+ (fn): Attention(
1040
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1041
+ (to_out): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
1042
+ )
1043
+ (norm): LayerNorm()
1044
+ )
1045
+ )
1046
+ (mid_block2): ResnetBlock(
1047
+ (mlp): Sequential(
1048
+ (0): SiLU()
1049
+ (1): Linear(in_features=512, out_features=256, bias=True)
1050
+ )
1051
+ (block1): Block(
1052
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1053
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1054
+ (act): SiLU()
1055
+ )
1056
+ (block2): Block(
1057
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1058
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1059
+ (act): SiLU()
1060
+ )
1061
+ (res_conv): Identity()
1062
+ )
1063
+ (final_res_block): ResnetBlock(
1064
+ (mlp): Sequential(
1065
+ (0): SiLU()
1066
+ (1): Linear(in_features=512, out_features=256, bias=True)
1067
+ )
1068
+ (block1): Block(
1069
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1070
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1071
+ (act): SiLU()
1072
+ )
1073
+ (block2): Block(
1074
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1075
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1076
+ (act): SiLU()
1077
+ )
1078
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
1079
+ )
1080
+ (final_conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
1081
+ )
1082
+ (conv_seg_new): Conv2d(256, 151, kernel_size=(1, 1), stride=(1, 1))
1083
+ (embed): Embedding(152, 16)
1084
+ )
1085
+ init_cfg={'type': 'Pretrained', 'checkpoint': 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'}
1086
+ )
1087
+ 2023-03-04 10:36:09,087 - mmseg - INFO - Loaded 20210 images
1088
+ 2023-03-04 10:36:10,053 - mmseg - INFO - Loaded 2000 images
1089
+ 2023-03-04 10:36:10,056 - mmseg - INFO - Start running, host: laizeqiang@SH-IDC1-10-140-37-113, work_dir: /mnt/petrelfs/laizeqiang/mmseg-baseline/work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask
1090
+ 2023-03-04 10:36:10,056 - mmseg - INFO - Hooks will be executed in the following order:
1091
+ before_run:
1092
+ (VERY_HIGH ) StepLrUpdaterHook
1093
+ (NORMAL ) CheckpointHook
1094
+ (LOW ) DistEvalHookMultiSteps
1095
+ (VERY_LOW ) TextLoggerHook
1096
+ --------------------
1097
+ before_train_epoch:
1098
+ (VERY_HIGH ) StepLrUpdaterHook
1099
+ (LOW ) IterTimerHook
1100
+ (LOW ) DistEvalHookMultiSteps
1101
+ (VERY_LOW ) TextLoggerHook
1102
+ --------------------
1103
+ before_train_iter:
1104
+ (VERY_HIGH ) StepLrUpdaterHook
1105
+ (LOW ) IterTimerHook
1106
+ (LOW ) DistEvalHookMultiSteps
1107
+ --------------------
1108
+ after_train_iter:
1109
+ (ABOVE_NORMAL) OptimizerHook
1110
+ (NORMAL ) CheckpointHook
1111
+ (LOW ) IterTimerHook
1112
+ (LOW ) DistEvalHookMultiSteps
1113
+ (VERY_LOW ) TextLoggerHook
1114
+ --------------------
1115
+ after_train_epoch:
1116
+ (NORMAL ) CheckpointHook
1117
+ (LOW ) DistEvalHookMultiSteps
1118
+ (VERY_LOW ) TextLoggerHook
1119
+ --------------------
1120
+ before_val_epoch:
1121
+ (LOW ) IterTimerHook
1122
+ (VERY_LOW ) TextLoggerHook
1123
+ --------------------
1124
+ before_val_iter:
1125
+ (LOW ) IterTimerHook
1126
+ --------------------
1127
+ after_val_iter:
1128
+ (LOW ) IterTimerHook
1129
+ --------------------
1130
+ after_val_epoch:
1131
+ (VERY_LOW ) TextLoggerHook
1132
+ --------------------
1133
+ after_run:
1134
+ (VERY_LOW ) TextLoggerHook
1135
+ --------------------
1136
+ 2023-03-04 10:36:10,056 - mmseg - INFO - workflow: [('train', 1)], max: 80000 iters
1137
+ 2023-03-04 10:36:10,056 - mmseg - INFO - Checkpoints will be saved to /mnt/petrelfs/laizeqiang/mmseg-baseline/work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask by HardDiskBackend.
1138
+ 2023-03-04 10:36:47,431 - mmseg - INFO - Iter [50/80000] lr: 7.350e-06, eta: 6:21:07, time: 0.286, data_time: 0.014, memory: 19783, decode.loss_ce: 3.8243, decode.acc_seg: 14.2062, loss: 3.8243
1139
+ 2023-03-04 10:36:55,947 - mmseg - INFO - Iter [100/80000] lr: 1.485e-05, eta: 5:03:50, time: 0.170, data_time: 0.007, memory: 19783, decode.loss_ce: 2.9748, decode.acc_seg: 42.5617, loss: 2.9748
1140
+ 2023-03-04 10:37:04,369 - mmseg - INFO - Iter [150/80000] lr: 2.235e-05, eta: 4:37:05, time: 0.168, data_time: 0.008, memory: 19783, decode.loss_ce: 2.2347, decode.acc_seg: 49.3174, loss: 2.2347
1141
+ 2023-03-04 10:37:12,840 - mmseg - INFO - Iter [200/80000] lr: 2.985e-05, eta: 4:24:00, time: 0.169, data_time: 0.007, memory: 19783, decode.loss_ce: 1.6971, decode.acc_seg: 60.8327, loss: 1.6971
1142
+ 2023-03-04 10:37:21,248 - mmseg - INFO - Iter [250/80000] lr: 3.735e-05, eta: 4:15:49, time: 0.168, data_time: 0.007, memory: 19783, decode.loss_ce: 1.3266, decode.acc_seg: 69.1492, loss: 1.3266
1143
+ 2023-03-04 10:37:29,768 - mmseg - INFO - Iter [300/80000] lr: 4.485e-05, eta: 4:10:46, time: 0.170, data_time: 0.007, memory: 19783, decode.loss_ce: 1.1094, decode.acc_seg: 74.3359, loss: 1.1094
1144
+ 2023-03-04 10:37:38,291 - mmseg - INFO - Iter [350/80000] lr: 5.235e-05, eta: 4:07:08, time: 0.170, data_time: 0.007, memory: 19783, decode.loss_ce: 0.8799, decode.acc_seg: 78.0994, loss: 0.8799
1145
+ 2023-03-04 10:37:46,865 - mmseg - INFO - Iter [400/80000] lr: 5.985e-05, eta: 4:04:32, time: 0.171, data_time: 0.007, memory: 19783, decode.loss_ce: 0.7519, decode.acc_seg: 80.2631, loss: 0.7519
1146
+ 2023-03-04 10:37:55,418 - mmseg - INFO - Iter [450/80000] lr: 6.735e-05, eta: 4:02:26, time: 0.171, data_time: 0.007, memory: 19783, decode.loss_ce: 0.6707, decode.acc_seg: 81.7875, loss: 0.6707
1147
+ 2023-03-04 10:38:03,650 - mmseg - INFO - Iter [500/80000] lr: 7.485e-05, eta: 3:59:52, time: 0.165, data_time: 0.007, memory: 19783, decode.loss_ce: 0.5952, decode.acc_seg: 82.6875, loss: 0.5952
1148
+ 2023-03-04 10:38:12,334 - mmseg - INFO - Iter [550/80000] lr: 8.235e-05, eta: 3:58:49, time: 0.174, data_time: 0.008, memory: 19783, decode.loss_ce: 0.5100, decode.acc_seg: 84.7401, loss: 0.5100
1149
+ 2023-03-04 10:38:20,396 - mmseg - INFO - Iter [600/80000] lr: 8.985e-05, eta: 3:56:34, time: 0.161, data_time: 0.008, memory: 19783, decode.loss_ce: 0.4445, decode.acc_seg: 85.9801, loss: 0.4445
1150
+ 2023-03-04 10:38:31,315 - mmseg - INFO - Iter [650/80000] lr: 9.735e-05, eta: 4:00:27, time: 0.218, data_time: 0.054, memory: 19783, decode.loss_ce: 0.4351, decode.acc_seg: 86.1025, loss: 0.4351
1151
+ 2023-03-04 10:38:39,745 - mmseg - INFO - Iter [700/80000] lr: 1.049e-04, eta: 3:59:03, time: 0.169, data_time: 0.007, memory: 19783, decode.loss_ce: 0.4012, decode.acc_seg: 86.6512, loss: 0.4012
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/20230304_103602.log.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"env_info": "sys.platform: linux\nPython: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB\nCUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch\nNVCC: Cuda compilation tools, release 11.6, V11.6.124\nGCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)\nPyTorch: 1.13.1\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.6\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.14.1\nOpenCV: 4.7.0\nMMCV: 1.7.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.6\nMMSegmentation: 0.30.0+d4f0cb3", "seed": 1470787464, "exp_name": "ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask.py", "mmseg_version": "0.30.0+d4f0cb3", "config": "norm_cfg = dict(type='SyncBN', requires_grad=True)\ncheckpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'\nmodel = dict(\n type='EncoderDecoderFreeze',\n freeze_parameters=['backbone', 'decode_head'],\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',\n backbone=dict(\n type='MixVisionTransformerCustomInitWeights',\n in_channels=3,\n embed_dims=64,\n num_stages=4,\n num_layers=[3, 4, 6, 3],\n num_heads=[1, 2, 5, 8],\n patch_sizes=[7, 3, 3, 3],\n sr_ratios=[8, 4, 2, 1],\n out_indices=(0, 1, 2, 3),\n mlp_ratio=4,\n qkv_bias=True,\n drop_rate=0.0,\n attn_drop_rate=0.0,\n drop_path_rate=0.1,\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'\n ),\n decode_head=dict(\n type='SegformerHeadUnetFCHeadSingleStepMask',\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',\n dim=128,\n out_dim=256,\n unet_channels=272,\n dim_mults=[1, 1, 1],\n cat_embedding_dim=16,\n in_channels=[64, 128, 320, 512],\n in_index=[0, 1, 2, 3],\n channels=256,\n dropout_ratio=0.1,\n num_classes=151,\n norm_cfg=dict(type='SyncBN', requires_grad=True),\n align_corners=False,\n ignore_index=0,\n loss_decode=dict(\n type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),\n train_cfg=dict(),\n test_cfg=dict(mode='whole'))\ndataset_type = 'ADE20K151Dataset'\ndata_root = 'data/ade/ADEChallengeData2016'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ncrop_size = (512, 512)\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=4,\n workers_per_gpu=4,\n train=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/training',\n ann_dir='annotations/training',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n ]),\n val=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]),\n test=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]))\nlog_config = dict(\n interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\ncudnn_benchmark = True\noptimizer = dict(\n type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)\noptimizer_config = dict()\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=1000,\n warmup_ratio=1e-06,\n step=10000,\n gamma=0.5,\n min_lr=1e-06,\n by_epoch=False)\nrunner = dict(type='IterBasedRunner', max_iters=80000)\ncheckpoint_config = dict(by_epoch=False, interval=8000)\nevaluation = dict(\n interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')\nwork_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask'\ngpu_ids = range(0, 8)\nauto_resume = True\ndevice = 'cuda'\nseed = 1470787464\n", "CLASSES": ["background", "wall", "building", "sky", "floor", "tree", "ceiling", "road", "bed ", "windowpane", "grass", "cabinet", "sidewalk", "person", "earth", "door", "table", "mountain", "plant", "curtain", "chair", "car", "water", "painting", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock", "wardrobe", "lamp", "bathtub", "railing", "cushion", "base", "box", "column", "signboard", "chest of drawers", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator", "grandstand", "path", "stairs", "runway", "case", "pool table", "pillow", "screen door", "stairway", "river", "bridge", "bookcase", "blind", "coffee table", "toilet", "flower", "book", "hill", "bench", "countertop", "stove", "palm", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel", "bus", "towel", "light", "truck", "tower", "chandelier", "awning", "streetlight", "booth", "television receiver", "airplane", "dirt track", "apparel", "pole", "land", "bannister", "escalator", "ottoman", "bottle", "buffet", "poster", "stage", "van", "ship", "fountain", "conveyer belt", "canopy", "washer", "plaything", "swimming pool", "stool", "barrel", "basket", "waterfall", "tent", "bag", "minibike", "cradle", "oven", "ball", "food", "step", "tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket", "sculpture", "hood", "sconce", "vase", "traffic light", "tray", "ashcan", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass", "clock", "flag"], "PALETTE": [[0, 0, 0], [120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], [102, 255, 0], [92, 0, 255]], "hook_msgs": {}}
2
+ {"mode": "train", "epoch": 1, "iter": 50, "lr": 1e-05, "memory": 19783, "data_time": 0.01408, "decode.loss_ce": 3.82431, "decode.acc_seg": 14.20625, "loss": 3.82431, "time": 0.28603}
3
+ {"mode": "train", "epoch": 1, "iter": 100, "lr": 1e-05, "memory": 19783, "data_time": 0.0072, "decode.loss_ce": 2.9748, "decode.acc_seg": 42.56172, "loss": 2.9748, "time": 0.17031}
4
+ {"mode": "train", "epoch": 1, "iter": 150, "lr": 2e-05, "memory": 19783, "data_time": 0.00767, "decode.loss_ce": 2.23466, "decode.acc_seg": 49.31739, "loss": 2.23466, "time": 0.16828}
5
+ {"mode": "train", "epoch": 1, "iter": 200, "lr": 3e-05, "memory": 19783, "data_time": 0.00714, "decode.loss_ce": 1.69707, "decode.acc_seg": 60.8327, "loss": 1.69707, "time": 0.16941}
6
+ {"mode": "train", "epoch": 1, "iter": 250, "lr": 4e-05, "memory": 19783, "data_time": 0.00685, "decode.loss_ce": 1.32658, "decode.acc_seg": 69.1492, "loss": 1.32658, "time": 0.16832}
7
+ {"mode": "train", "epoch": 1, "iter": 300, "lr": 4e-05, "memory": 19783, "data_time": 0.00661, "decode.loss_ce": 1.10944, "decode.acc_seg": 74.3359, "loss": 1.10944, "time": 0.17039}
8
+ {"mode": "train", "epoch": 1, "iter": 350, "lr": 5e-05, "memory": 19783, "data_time": 0.00731, "decode.loss_ce": 0.87994, "decode.acc_seg": 78.09939, "loss": 0.87994, "time": 0.17046}
9
+ {"mode": "train", "epoch": 1, "iter": 400, "lr": 6e-05, "memory": 19783, "data_time": 0.00667, "decode.loss_ce": 0.75186, "decode.acc_seg": 80.26307, "loss": 0.75186, "time": 0.17146}
10
+ {"mode": "train", "epoch": 1, "iter": 450, "lr": 7e-05, "memory": 19783, "data_time": 0.00738, "decode.loss_ce": 0.67067, "decode.acc_seg": 81.78752, "loss": 0.67067, "time": 0.17104}
11
+ {"mode": "train", "epoch": 1, "iter": 500, "lr": 7e-05, "memory": 19783, "data_time": 0.00748, "decode.loss_ce": 0.59517, "decode.acc_seg": 82.68755, "loss": 0.59517, "time": 0.16462}
12
+ {"mode": "train", "epoch": 1, "iter": 550, "lr": 8e-05, "memory": 19783, "data_time": 0.00755, "decode.loss_ce": 0.51003, "decode.acc_seg": 84.74014, "loss": 0.51003, "time": 0.17368}
13
+ {"mode": "train", "epoch": 1, "iter": 600, "lr": 9e-05, "memory": 19783, "data_time": 0.00752, "decode.loss_ce": 0.44453, "decode.acc_seg": 85.98014, "loss": 0.44453, "time": 0.16123}
14
+ {"mode": "train", "epoch": 2, "iter": 650, "lr": 0.0001, "memory": 19783, "data_time": 0.05353, "decode.loss_ce": 0.43514, "decode.acc_seg": 86.10254, "loss": 0.43514, "time": 0.21836}
15
+ {"mode": "train", "epoch": 2, "iter": 700, "lr": 0.0001, "memory": 19783, "data_time": 0.00717, "decode.loss_ce": 0.40124, "decode.acc_seg": 86.65119, "loss": 0.40124, "time": 0.16862}
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/20230304_103934.log ADDED
The diff for this file is too large to render. See raw diff
 
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/20230304_103934.log.json ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"env_info": "sys.platform: linux\nPython: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB\nCUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch\nNVCC: Cuda compilation tools, release 11.6, V11.6.124\nGCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)\nPyTorch: 1.13.1\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.6\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.14.1\nOpenCV: 4.7.0\nMMCV: 1.7.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.6\nMMSegmentation: 0.30.0+d4f0cb3", "seed": 1648012630, "exp_name": "ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask.py", "mmseg_version": "0.30.0+d4f0cb3", "config": "norm_cfg = dict(type='SyncBN', requires_grad=True)\ncheckpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'\nmodel = dict(\n type='EncoderDecoderFreeze',\n freeze_parameters=['backbone', 'decode_head'],\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',\n backbone=dict(\n type='MixVisionTransformerCustomInitWeights',\n in_channels=3,\n embed_dims=64,\n num_stages=4,\n num_layers=[3, 4, 6, 3],\n num_heads=[1, 2, 5, 8],\n patch_sizes=[7, 3, 3, 3],\n sr_ratios=[8, 4, 2, 1],\n out_indices=(0, 1, 2, 3),\n mlp_ratio=4,\n qkv_bias=True,\n drop_rate=0.0,\n attn_drop_rate=0.0,\n drop_path_rate=0.1,\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'\n ),\n decode_head=dict(\n type='SegformerHeadUnetFCHeadSingleStepMask',\n pretrained=\n 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',\n dim=128,\n out_dim=256,\n unet_channels=272,\n dim_mults=[1, 1, 1],\n cat_embedding_dim=16,\n in_channels=[64, 128, 320, 512],\n in_index=[0, 1, 2, 3],\n channels=256,\n dropout_ratio=0.1,\n num_classes=151,\n norm_cfg=dict(type='SyncBN', requires_grad=True),\n align_corners=False,\n ignore_index=0,\n loss_decode=dict(\n type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),\n train_cfg=dict(),\n test_cfg=dict(mode='whole'))\ndataset_type = 'ADE20K151Dataset'\ndata_root = 'data/ade/ADEChallengeData2016'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ncrop_size = (512, 512)\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=4,\n workers_per_gpu=4,\n train=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/training',\n ann_dir='annotations/training',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n ]),\n val=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]),\n test=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]))\nlog_config = dict(\n interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\ncudnn_benchmark = True\noptimizer = dict(\n type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)\noptimizer_config = dict()\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=1000,\n warmup_ratio=1e-06,\n step=10000,\n gamma=0.5,\n min_lr=1e-06,\n by_epoch=False)\nrunner = dict(type='IterBasedRunner', max_iters=80000)\ncheckpoint_config = dict(by_epoch=False, interval=8000)\nevaluation = dict(\n interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')\nwork_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask'\ngpu_ids = range(0, 8)\nauto_resume = True\ndevice = 'cuda'\nseed = 1648012630\n", "CLASSES": ["background", "wall", "building", "sky", "floor", "tree", "ceiling", "road", "bed ", "windowpane", "grass", "cabinet", "sidewalk", "person", "earth", "door", "table", "mountain", "plant", "curtain", "chair", "car", "water", "painting", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock", "wardrobe", "lamp", "bathtub", "railing", "cushion", "base", "box", "column", "signboard", "chest of drawers", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator", "grandstand", "path", "stairs", "runway", "case", "pool table", "pillow", "screen door", "stairway", "river", "bridge", "bookcase", "blind", "coffee table", "toilet", "flower", "book", "hill", "bench", "countertop", "stove", "palm", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel", "bus", "towel", "light", "truck", "tower", "chandelier", "awning", "streetlight", "booth", "television receiver", "airplane", "dirt track", "apparel", "pole", "land", "bannister", "escalator", "ottoman", "bottle", "buffet", "poster", "stage", "van", "ship", "fountain", "conveyer belt", "canopy", "washer", "plaything", "swimming pool", "stool", "barrel", "basket", "waterfall", "tent", "bag", "minibike", "cradle", "oven", "ball", "food", "step", "tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket", "sculpture", "hood", "sconce", "vase", "traffic light", "tray", "ashcan", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass", "clock", "flag"], "PALETTE": [[0, 0, 0], [120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], [102, 255, 0], [92, 0, 255]], "hook_msgs": {}}
2
+ {"mode": "train", "epoch": 1, "iter": 50, "lr": 1e-05, "memory": 19783, "data_time": 0.01464, "decode.loss_ce": 3.78586, "decode.acc_seg": 13.44899, "loss": 3.78586, "time": 0.28897}
3
+ {"mode": "train", "epoch": 1, "iter": 100, "lr": 1e-05, "memory": 19783, "data_time": 0.00682, "decode.loss_ce": 2.91953, "decode.acc_seg": 44.1399, "loss": 2.91953, "time": 0.17252}
4
+ {"mode": "train", "epoch": 1, "iter": 150, "lr": 2e-05, "memory": 19783, "data_time": 0.00701, "decode.loss_ce": 2.10409, "decode.acc_seg": 53.58975, "loss": 2.10409, "time": 0.17548}
5
+ {"mode": "train", "epoch": 1, "iter": 200, "lr": 3e-05, "memory": 19783, "data_time": 0.0073, "decode.loss_ce": 1.6526, "decode.acc_seg": 62.47836, "loss": 1.6526, "time": 0.16673}
6
+ {"mode": "train", "epoch": 1, "iter": 250, "lr": 4e-05, "memory": 19783, "data_time": 0.00784, "decode.loss_ce": 1.32199, "decode.acc_seg": 68.99333, "loss": 1.32199, "time": 0.18392}
7
+ {"mode": "train", "epoch": 1, "iter": 300, "lr": 4e-05, "memory": 19783, "data_time": 0.00685, "decode.loss_ce": 1.07112, "decode.acc_seg": 74.76253, "loss": 1.07112, "time": 0.16585}
8
+ {"mode": "train", "epoch": 1, "iter": 350, "lr": 5e-05, "memory": 19783, "data_time": 0.00832, "decode.loss_ce": 0.87855, "decode.acc_seg": 77.97355, "loss": 0.87855, "time": 0.17731}
9
+ {"mode": "train", "epoch": 1, "iter": 400, "lr": 6e-05, "memory": 19783, "data_time": 0.00796, "decode.loss_ce": 0.75946, "decode.acc_seg": 80.1142, "loss": 0.75946, "time": 0.16399}
10
+ {"mode": "train", "epoch": 1, "iter": 450, "lr": 7e-05, "memory": 19783, "data_time": 0.00752, "decode.loss_ce": 0.70385, "decode.acc_seg": 80.5049, "loss": 0.70385, "time": 0.16518}
11
+ {"mode": "train", "epoch": 1, "iter": 500, "lr": 7e-05, "memory": 19783, "data_time": 0.00758, "decode.loss_ce": 0.56582, "decode.acc_seg": 84.02578, "loss": 0.56582, "time": 0.16899}
12
+ {"mode": "train", "epoch": 1, "iter": 550, "lr": 8e-05, "memory": 19783, "data_time": 0.00745, "decode.loss_ce": 0.51044, "decode.acc_seg": 84.65132, "loss": 0.51044, "time": 0.17316}
13
+ {"mode": "train", "epoch": 1, "iter": 600, "lr": 9e-05, "memory": 19783, "data_time": 0.00692, "decode.loss_ce": 0.46692, "decode.acc_seg": 85.42044, "loss": 0.46692, "time": 0.17047}
14
+ {"mode": "train", "epoch": 2, "iter": 650, "lr": 0.0001, "memory": 19783, "data_time": 0.05563, "decode.loss_ce": 0.42618, "decode.acc_seg": 86.16397, "loss": 0.42618, "time": 0.22341}
15
+ {"mode": "train", "epoch": 2, "iter": 700, "lr": 0.0001, "memory": 19783, "data_time": 0.00702, "decode.loss_ce": 0.39092, "decode.acc_seg": 86.64982, "loss": 0.39092, "time": 0.16783}
16
+ {"mode": "train", "epoch": 2, "iter": 750, "lr": 0.00011, "memory": 19783, "data_time": 0.00746, "decode.loss_ce": 0.35796, "decode.acc_seg": 87.63995, "loss": 0.35796, "time": 0.16982}
17
+ {"mode": "train", "epoch": 2, "iter": 800, "lr": 0.00012, "memory": 19783, "data_time": 0.00735, "decode.loss_ce": 0.37266, "decode.acc_seg": 87.08973, "loss": 0.37266, "time": 0.16822}
18
+ {"mode": "train", "epoch": 2, "iter": 850, "lr": 0.00013, "memory": 19783, "data_time": 0.00717, "decode.loss_ce": 0.3519, "decode.acc_seg": 87.47414, "loss": 0.3519, "time": 0.17154}
19
+ {"mode": "train", "epoch": 2, "iter": 900, "lr": 0.00013, "memory": 19783, "data_time": 0.00814, "decode.loss_ce": 0.33697, "decode.acc_seg": 87.83325, "loss": 0.33697, "time": 0.17741}
20
+ {"mode": "train", "epoch": 2, "iter": 950, "lr": 0.00014, "memory": 19783, "data_time": 0.007, "decode.loss_ce": 0.33214, "decode.acc_seg": 87.90779, "loss": 0.33214, "time": 0.17481}
21
+ {"mode": "train", "epoch": 2, "iter": 1000, "lr": 0.00015, "memory": 19783, "data_time": 0.00707, "decode.loss_ce": 0.32533, "decode.acc_seg": 87.80602, "loss": 0.32533, "time": 0.17805}
22
+ {"mode": "train", "epoch": 2, "iter": 1050, "lr": 0.00015, "memory": 19783, "data_time": 0.00713, "decode.loss_ce": 0.31727, "decode.acc_seg": 88.31456, "loss": 0.31727, "time": 0.16702}
23
+ {"mode": "train", "epoch": 2, "iter": 1100, "lr": 0.00015, "memory": 19783, "data_time": 0.00683, "decode.loss_ce": 0.31942, "decode.acc_seg": 88.159, "loss": 0.31942, "time": 0.1682}
24
+ {"mode": "train", "epoch": 2, "iter": 1150, "lr": 0.00015, "memory": 19783, "data_time": 0.0074, "decode.loss_ce": 0.31511, "decode.acc_seg": 88.10372, "loss": 0.31511, "time": 0.16781}
25
+ {"mode": "train", "epoch": 2, "iter": 1200, "lr": 0.00015, "memory": 19783, "data_time": 0.00735, "decode.loss_ce": 0.29375, "decode.acc_seg": 89.09847, "loss": 0.29375, "time": 0.16453}
26
+ {"mode": "train", "epoch": 2, "iter": 1250, "lr": 0.00015, "memory": 19783, "data_time": 0.00714, "decode.loss_ce": 0.29113, "decode.acc_seg": 88.97655, "loss": 0.29113, "time": 0.16846}
27
+ {"mode": "train", "epoch": 3, "iter": 1300, "lr": 0.00015, "memory": 19783, "data_time": 0.05392, "decode.loss_ce": 0.28411, "decode.acc_seg": 89.29744, "loss": 0.28411, "time": 0.21186}
28
+ {"mode": "train", "epoch": 3, "iter": 1350, "lr": 0.00015, "memory": 19783, "data_time": 0.0071, "decode.loss_ce": 0.28474, "decode.acc_seg": 88.92053, "loss": 0.28474, "time": 0.16269}
29
+ {"mode": "train", "epoch": 3, "iter": 1400, "lr": 0.00015, "memory": 19783, "data_time": 0.00671, "decode.loss_ce": 0.28921, "decode.acc_seg": 89.11701, "loss": 0.28921, "time": 0.16276}
30
+ {"mode": "train", "epoch": 3, "iter": 1450, "lr": 0.00015, "memory": 19783, "data_time": 0.00739, "decode.loss_ce": 0.28266, "decode.acc_seg": 89.08791, "loss": 0.28266, "time": 0.16348}
31
+ {"mode": "train", "epoch": 3, "iter": 1500, "lr": 0.00015, "memory": 19783, "data_time": 0.00699, "decode.loss_ce": 0.28323, "decode.acc_seg": 88.94225, "loss": 0.28323, "time": 0.16677}
32
+ {"mode": "train", "epoch": 3, "iter": 1550, "lr": 0.00015, "memory": 19783, "data_time": 0.0075, "decode.loss_ce": 0.29209, "decode.acc_seg": 88.851, "loss": 0.29209, "time": 0.16656}
33
+ {"mode": "train", "epoch": 3, "iter": 1600, "lr": 0.00015, "memory": 19783, "data_time": 0.00727, "decode.loss_ce": 0.28026, "decode.acc_seg": 89.16342, "loss": 0.28026, "time": 0.16899}
34
+ {"mode": "train", "epoch": 3, "iter": 1650, "lr": 0.00015, "memory": 19783, "data_time": 0.00713, "decode.loss_ce": 0.27397, "decode.acc_seg": 89.47841, "loss": 0.27397, "time": 0.17142}
35
+ {"mode": "train", "epoch": 3, "iter": 1700, "lr": 0.00015, "memory": 19783, "data_time": 0.00748, "decode.loss_ce": 0.27943, "decode.acc_seg": 89.04867, "loss": 0.27943, "time": 0.17033}
36
+ {"mode": "train", "epoch": 3, "iter": 1750, "lr": 0.00015, "memory": 19783, "data_time": 0.00739, "decode.loss_ce": 0.27289, "decode.acc_seg": 89.30963, "loss": 0.27289, "time": 0.16775}
37
+ {"mode": "train", "epoch": 3, "iter": 1800, "lr": 0.00015, "memory": 19783, "data_time": 0.00709, "decode.loss_ce": 0.25431, "decode.acc_seg": 89.92175, "loss": 0.25431, "time": 0.17115}
38
+ {"mode": "train", "epoch": 3, "iter": 1850, "lr": 0.00015, "memory": 19783, "data_time": 0.00764, "decode.loss_ce": 0.27098, "decode.acc_seg": 89.38316, "loss": 0.27098, "time": 0.16868}
39
+ {"mode": "train", "epoch": 4, "iter": 1900, "lr": 0.00015, "memory": 19783, "data_time": 0.05451, "decode.loss_ce": 0.26261, "decode.acc_seg": 89.79397, "loss": 0.26261, "time": 0.2239}
40
+ {"mode": "train", "epoch": 4, "iter": 1950, "lr": 0.00015, "memory": 19783, "data_time": 0.00677, "decode.loss_ce": 0.27538, "decode.acc_seg": 89.28754, "loss": 0.27538, "time": 0.17293}
41
+ {"mode": "train", "epoch": 4, "iter": 2000, "lr": 0.00015, "memory": 19783, "data_time": 0.00683, "decode.loss_ce": 0.26254, "decode.acc_seg": 89.72531, "loss": 0.26254, "time": 0.16631}
42
+ {"mode": "train", "epoch": 4, "iter": 2050, "lr": 0.00015, "memory": 19783, "data_time": 0.00672, "decode.loss_ce": 0.26892, "decode.acc_seg": 89.60717, "loss": 0.26892, "time": 0.17634}
43
+ {"mode": "train", "epoch": 4, "iter": 2100, "lr": 0.00015, "memory": 19783, "data_time": 0.0071, "decode.loss_ce": 0.25522, "decode.acc_seg": 89.88685, "loss": 0.25522, "time": 0.16423}
44
+ {"mode": "train", "epoch": 4, "iter": 2150, "lr": 0.00015, "memory": 19783, "data_time": 0.00737, "decode.loss_ce": 0.26096, "decode.acc_seg": 89.79101, "loss": 0.26096, "time": 0.16638}
45
+ {"mode": "train", "epoch": 4, "iter": 2200, "lr": 0.00015, "memory": 19783, "data_time": 0.00718, "decode.loss_ce": 0.26273, "decode.acc_seg": 89.645, "loss": 0.26273, "time": 0.17165}
46
+ {"mode": "train", "epoch": 4, "iter": 2250, "lr": 0.00015, "memory": 19783, "data_time": 0.00694, "decode.loss_ce": 0.26784, "decode.acc_seg": 89.34889, "loss": 0.26784, "time": 0.16663}
47
+ {"mode": "train", "epoch": 4, "iter": 2300, "lr": 0.00015, "memory": 19783, "data_time": 0.00715, "decode.loss_ce": 0.26124, "decode.acc_seg": 89.65508, "loss": 0.26124, "time": 0.17081}
48
+ {"mode": "train", "epoch": 4, "iter": 2350, "lr": 0.00015, "memory": 19783, "data_time": 0.00701, "decode.loss_ce": 0.26513, "decode.acc_seg": 89.44922, "loss": 0.26513, "time": 0.16596}
49
+ {"mode": "train", "epoch": 4, "iter": 2400, "lr": 0.00015, "memory": 19783, "data_time": 0.00661, "decode.loss_ce": 0.25128, "decode.acc_seg": 89.97749, "loss": 0.25128, "time": 0.17423}
50
+ {"mode": "train", "epoch": 4, "iter": 2450, "lr": 0.00015, "memory": 19783, "data_time": 0.00745, "decode.loss_ce": 0.26641, "decode.acc_seg": 89.34108, "loss": 0.26641, "time": 0.17297}
51
+ {"mode": "train", "epoch": 4, "iter": 2500, "lr": 0.00015, "memory": 19783, "data_time": 0.0072, "decode.loss_ce": 0.2621, "decode.acc_seg": 89.7861, "loss": 0.2621, "time": 0.17021}
52
+ {"mode": "train", "epoch": 5, "iter": 2550, "lr": 0.00015, "memory": 19783, "data_time": 0.0521, "decode.loss_ce": 0.24304, "decode.acc_seg": 90.10812, "loss": 0.24304, "time": 0.21542}
53
+ {"mode": "train", "epoch": 5, "iter": 2600, "lr": 0.00015, "memory": 19783, "data_time": 0.00686, "decode.loss_ce": 0.25384, "decode.acc_seg": 90.02081, "loss": 0.25384, "time": 0.17489}
54
+ {"mode": "train", "epoch": 5, "iter": 2650, "lr": 0.00015, "memory": 19783, "data_time": 0.00716, "decode.loss_ce": 0.26031, "decode.acc_seg": 89.77231, "loss": 0.26031, "time": 0.16684}
55
+ {"mode": "train", "epoch": 5, "iter": 2700, "lr": 0.00015, "memory": 19783, "data_time": 0.0071, "decode.loss_ce": 0.2666, "decode.acc_seg": 89.22362, "loss": 0.2666, "time": 0.16719}
56
+ {"mode": "train", "epoch": 5, "iter": 2750, "lr": 0.00015, "memory": 19783, "data_time": 0.00801, "decode.loss_ce": 0.26155, "decode.acc_seg": 89.54207, "loss": 0.26155, "time": 0.16738}
57
+ {"mode": "train", "epoch": 5, "iter": 2800, "lr": 0.00015, "memory": 19783, "data_time": 0.00727, "decode.loss_ce": 0.2504, "decode.acc_seg": 89.91151, "loss": 0.2504, "time": 0.16287}
58
+ {"mode": "train", "epoch": 5, "iter": 2850, "lr": 0.00015, "memory": 19783, "data_time": 0.00674, "decode.loss_ce": 0.25434, "decode.acc_seg": 89.87366, "loss": 0.25434, "time": 0.17361}
59
+ {"mode": "train", "epoch": 5, "iter": 2900, "lr": 0.00015, "memory": 19783, "data_time": 0.00791, "decode.loss_ce": 0.2511, "decode.acc_seg": 89.8458, "loss": 0.2511, "time": 0.17571}
60
+ {"mode": "train", "epoch": 5, "iter": 2950, "lr": 0.00015, "memory": 19783, "data_time": 0.00664, "decode.loss_ce": 0.25013, "decode.acc_seg": 90.12326, "loss": 0.25013, "time": 0.17105}
61
+ {"mode": "train", "epoch": 5, "iter": 3000, "lr": 0.00015, "memory": 19783, "data_time": 0.00693, "decode.loss_ce": 0.25603, "decode.acc_seg": 89.82247, "loss": 0.25603, "time": 0.1639}
62
+ {"mode": "train", "epoch": 5, "iter": 3050, "lr": 0.00015, "memory": 19783, "data_time": 0.00718, "decode.loss_ce": 0.25956, "decode.acc_seg": 89.82373, "loss": 0.25956, "time": 0.17946}
63
+ {"mode": "train", "epoch": 5, "iter": 3100, "lr": 0.00015, "memory": 19783, "data_time": 0.00707, "decode.loss_ce": 0.25529, "decode.acc_seg": 89.77675, "loss": 0.25529, "time": 0.16852}
64
+ {"mode": "train", "epoch": 5, "iter": 3150, "lr": 0.00015, "memory": 19783, "data_time": 0.00737, "decode.loss_ce": 0.25803, "decode.acc_seg": 89.64798, "loss": 0.25803, "time": 0.17423}
65
+ {"mode": "train", "epoch": 6, "iter": 3200, "lr": 0.00015, "memory": 19783, "data_time": 0.05491, "decode.loss_ce": 0.24815, "decode.acc_seg": 90.10166, "loss": 0.24815, "time": 0.21712}
66
+ {"mode": "train", "epoch": 6, "iter": 3250, "lr": 0.00015, "memory": 19783, "data_time": 0.00732, "decode.loss_ce": 0.24073, "decode.acc_seg": 90.3456, "loss": 0.24073, "time": 0.1644}
67
+ {"mode": "train", "epoch": 6, "iter": 3300, "lr": 0.00015, "memory": 19783, "data_time": 0.00742, "decode.loss_ce": 0.24323, "decode.acc_seg": 90.39054, "loss": 0.24323, "time": 0.16306}
68
+ {"mode": "train", "epoch": 6, "iter": 3350, "lr": 0.00015, "memory": 19783, "data_time": 0.00727, "decode.loss_ce": 0.24765, "decode.acc_seg": 90.05928, "loss": 0.24765, "time": 0.16683}
69
+ {"mode": "train", "epoch": 6, "iter": 3400, "lr": 0.00015, "memory": 19783, "data_time": 0.00744, "decode.loss_ce": 0.24116, "decode.acc_seg": 90.39199, "loss": 0.24116, "time": 0.16243}
70
+ {"mode": "train", "epoch": 6, "iter": 3450, "lr": 0.00015, "memory": 19783, "data_time": 0.00781, "decode.loss_ce": 0.24984, "decode.acc_seg": 89.77229, "loss": 0.24984, "time": 0.16597}
71
+ {"mode": "train", "epoch": 6, "iter": 3500, "lr": 0.00015, "memory": 19783, "data_time": 0.00758, "decode.loss_ce": 0.2521, "decode.acc_seg": 89.79541, "loss": 0.2521, "time": 0.16949}
72
+ {"mode": "train", "epoch": 6, "iter": 3550, "lr": 0.00015, "memory": 19783, "data_time": 0.00802, "decode.loss_ce": 0.26202, "decode.acc_seg": 89.40474, "loss": 0.26202, "time": 0.16849}
73
+ {"mode": "train", "epoch": 6, "iter": 3600, "lr": 0.00015, "memory": 19783, "data_time": 0.00764, "decode.loss_ce": 0.24962, "decode.acc_seg": 89.7942, "loss": 0.24962, "time": 0.16373}
74
+ {"mode": "train", "epoch": 6, "iter": 3650, "lr": 0.00015, "memory": 19783, "data_time": 0.00706, "decode.loss_ce": 0.23575, "decode.acc_seg": 90.55114, "loss": 0.23575, "time": 0.1724}
75
+ {"mode": "train", "epoch": 6, "iter": 3700, "lr": 0.00015, "memory": 19783, "data_time": 0.00707, "decode.loss_ce": 0.25319, "decode.acc_seg": 89.95363, "loss": 0.25319, "time": 0.17091}
76
+ {"mode": "train", "epoch": 6, "iter": 3750, "lr": 0.00015, "memory": 19783, "data_time": 0.00681, "decode.loss_ce": 0.25331, "decode.acc_seg": 89.74242, "loss": 0.25331, "time": 0.1678}
77
+ {"mode": "train", "epoch": 7, "iter": 3800, "lr": 0.00015, "memory": 19783, "data_time": 0.05404, "decode.loss_ce": 0.24787, "decode.acc_seg": 89.94419, "loss": 0.24787, "time": 0.23071}
78
+ {"mode": "train", "epoch": 7, "iter": 3850, "lr": 0.00015, "memory": 19783, "data_time": 0.00738, "decode.loss_ce": 0.23945, "decode.acc_seg": 90.18102, "loss": 0.23945, "time": 0.1685}
79
+ {"mode": "train", "epoch": 7, "iter": 3900, "lr": 0.00015, "memory": 19783, "data_time": 0.0076, "decode.loss_ce": 0.23457, "decode.acc_seg": 90.46158, "loss": 0.23457, "time": 0.17543}
80
+ {"mode": "train", "epoch": 7, "iter": 3950, "lr": 0.00015, "memory": 19783, "data_time": 0.0076, "decode.loss_ce": 0.24769, "decode.acc_seg": 90.01688, "loss": 0.24769, "time": 0.16782}
81
+ {"mode": "train", "epoch": 7, "iter": 4000, "lr": 0.00015, "memory": 19783, "data_time": 0.00717, "decode.loss_ce": 0.24628, "decode.acc_seg": 90.21013, "loss": 0.24628, "time": 0.16726}
82
+ {"mode": "train", "epoch": 7, "iter": 4050, "lr": 0.00015, "memory": 19783, "data_time": 0.00754, "decode.loss_ce": 0.2341, "decode.acc_seg": 90.579, "loss": 0.2341, "time": 0.17162}
83
+ {"mode": "train", "epoch": 7, "iter": 4100, "lr": 0.00015, "memory": 19783, "data_time": 0.00746, "decode.loss_ce": 0.24573, "decode.acc_seg": 90.16542, "loss": 0.24573, "time": 0.16339}
84
+ {"mode": "train", "epoch": 7, "iter": 4150, "lr": 0.00015, "memory": 19783, "data_time": 0.00731, "decode.loss_ce": 0.24285, "decode.acc_seg": 90.28304, "loss": 0.24285, "time": 0.16864}
85
+ {"mode": "train", "epoch": 7, "iter": 4200, "lr": 0.00015, "memory": 19783, "data_time": 0.00756, "decode.loss_ce": 0.23958, "decode.acc_seg": 90.30665, "loss": 0.23958, "time": 0.16759}
86
+ {"mode": "train", "epoch": 7, "iter": 4250, "lr": 0.00015, "memory": 19783, "data_time": 0.00689, "decode.loss_ce": 0.24788, "decode.acc_seg": 90.00322, "loss": 0.24788, "time": 0.16993}
87
+ {"mode": "train", "epoch": 7, "iter": 4300, "lr": 0.00015, "memory": 19783, "data_time": 0.00734, "decode.loss_ce": 0.23125, "decode.acc_seg": 90.54874, "loss": 0.23125, "time": 0.16811}
88
+ {"mode": "train", "epoch": 7, "iter": 4350, "lr": 0.00015, "memory": 19783, "data_time": 0.00728, "decode.loss_ce": 0.24042, "decode.acc_seg": 90.47029, "loss": 0.24042, "time": 0.16826}
89
+ {"mode": "train", "epoch": 7, "iter": 4400, "lr": 0.00015, "memory": 19783, "data_time": 0.00747, "decode.loss_ce": 0.24551, "decode.acc_seg": 89.95885, "loss": 0.24551, "time": 0.17054}
90
+ {"mode": "train", "epoch": 8, "iter": 4450, "lr": 0.00015, "memory": 19783, "data_time": 0.05442, "decode.loss_ce": 0.2265, "decode.acc_seg": 90.90395, "loss": 0.2265, "time": 0.21336}
91
+ {"mode": "train", "epoch": 8, "iter": 4500, "lr": 0.00015, "memory": 19783, "data_time": 0.00733, "decode.loss_ce": 0.247, "decode.acc_seg": 90.04266, "loss": 0.247, "time": 0.16689}
92
+ {"mode": "train", "epoch": 8, "iter": 4550, "lr": 0.00015, "memory": 19783, "data_time": 0.0076, "decode.loss_ce": 0.2394, "decode.acc_seg": 90.46837, "loss": 0.2394, "time": 0.17266}
93
+ {"mode": "train", "epoch": 8, "iter": 4600, "lr": 0.00015, "memory": 19783, "data_time": 0.00739, "decode.loss_ce": 0.24674, "decode.acc_seg": 90.0508, "loss": 0.24674, "time": 0.16544}
94
+ {"mode": "train", "epoch": 8, "iter": 4650, "lr": 0.00015, "memory": 19783, "data_time": 0.00713, "decode.loss_ce": 0.24889, "decode.acc_seg": 89.97996, "loss": 0.24889, "time": 0.17361}
95
+ {"mode": "train", "epoch": 8, "iter": 4700, "lr": 0.00015, "memory": 19783, "data_time": 0.00723, "decode.loss_ce": 0.25066, "decode.acc_seg": 90.10819, "loss": 0.25066, "time": 0.17526}
96
+ {"mode": "train", "epoch": 8, "iter": 4750, "lr": 0.00015, "memory": 19783, "data_time": 0.00725, "decode.loss_ce": 0.23078, "decode.acc_seg": 90.68572, "loss": 0.23078, "time": 0.16917}
97
+ {"mode": "train", "epoch": 8, "iter": 4800, "lr": 0.00015, "memory": 19783, "data_time": 0.00754, "decode.loss_ce": 0.24164, "decode.acc_seg": 90.21678, "loss": 0.24164, "time": 0.16936}
98
+ {"mode": "train", "epoch": 8, "iter": 4850, "lr": 0.00015, "memory": 19783, "data_time": 0.00749, "decode.loss_ce": 0.24123, "decode.acc_seg": 90.2115, "loss": 0.24123, "time": 0.17225}
99
+ {"mode": "train", "epoch": 8, "iter": 4900, "lr": 0.00015, "memory": 19783, "data_time": 0.00787, "decode.loss_ce": 0.24872, "decode.acc_seg": 90.06069, "loss": 0.24872, "time": 0.16871}
100
+ {"mode": "train", "epoch": 8, "iter": 4950, "lr": 0.00015, "memory": 19783, "data_time": 0.00731, "decode.loss_ce": 0.24157, "decode.acc_seg": 90.28286, "loss": 0.24157, "time": 0.1622}
101
+ {"mode": "train", "epoch": 8, "iter": 5000, "lr": 0.00015, "memory": 19783, "data_time": 0.00723, "decode.loss_ce": 0.23888, "decode.acc_seg": 90.337, "loss": 0.23888, "time": 0.17729}
102
+ {"mode": "train", "epoch": 9, "iter": 5050, "lr": 0.00015, "memory": 19783, "data_time": 0.05372, "decode.loss_ce": 0.23444, "decode.acc_seg": 90.65998, "loss": 0.23444, "time": 0.21327}
103
+ {"mode": "train", "epoch": 9, "iter": 5100, "lr": 0.00015, "memory": 19783, "data_time": 0.00674, "decode.loss_ce": 0.24385, "decode.acc_seg": 90.19634, "loss": 0.24385, "time": 0.17532}
104
+ {"mode": "train", "epoch": 9, "iter": 5150, "lr": 0.00015, "memory": 19783, "data_time": 0.00754, "decode.loss_ce": 0.24576, "decode.acc_seg": 90.23702, "loss": 0.24576, "time": 0.16991}
105
+ {"mode": "train", "epoch": 9, "iter": 5200, "lr": 0.00015, "memory": 19783, "data_time": 0.007, "decode.loss_ce": 0.2333, "decode.acc_seg": 90.51888, "loss": 0.2333, "time": 0.16263}
106
+ {"mode": "train", "epoch": 9, "iter": 5250, "lr": 0.00015, "memory": 19783, "data_time": 0.00714, "decode.loss_ce": 0.24466, "decode.acc_seg": 90.27321, "loss": 0.24466, "time": 0.16789}
107
+ {"mode": "train", "epoch": 9, "iter": 5300, "lr": 0.00015, "memory": 19783, "data_time": 0.0078, "decode.loss_ce": 0.24114, "decode.acc_seg": 90.39097, "loss": 0.24114, "time": 0.16739}
108
+ {"mode": "train", "epoch": 9, "iter": 5350, "lr": 0.00015, "memory": 19783, "data_time": 0.0073, "decode.loss_ce": 0.23671, "decode.acc_seg": 90.32327, "loss": 0.23671, "time": 0.1613}
109
+ {"mode": "train", "epoch": 9, "iter": 5400, "lr": 0.00015, "memory": 19783, "data_time": 0.00732, "decode.loss_ce": 0.23675, "decode.acc_seg": 90.48409, "loss": 0.23675, "time": 0.16479}
110
+ {"mode": "train", "epoch": 9, "iter": 5450, "lr": 0.00015, "memory": 19783, "data_time": 0.00771, "decode.loss_ce": 0.24392, "decode.acc_seg": 90.07503, "loss": 0.24392, "time": 0.16243}
111
+ {"mode": "train", "epoch": 9, "iter": 5500, "lr": 0.00015, "memory": 19783, "data_time": 0.0071, "decode.loss_ce": 0.2508, "decode.acc_seg": 89.99546, "loss": 0.2508, "time": 0.17402}
112
+ {"mode": "train", "epoch": 9, "iter": 5550, "lr": 0.00015, "memory": 19783, "data_time": 0.00696, "decode.loss_ce": 0.24176, "decode.acc_seg": 90.22726, "loss": 0.24176, "time": 0.17048}
113
+ {"mode": "train", "epoch": 9, "iter": 5600, "lr": 0.00015, "memory": 19783, "data_time": 0.00667, "decode.loss_ce": 0.23641, "decode.acc_seg": 90.32053, "loss": 0.23641, "time": 0.17978}
114
+ {"mode": "train", "epoch": 9, "iter": 5650, "lr": 0.00015, "memory": 19783, "data_time": 0.008, "decode.loss_ce": 0.22384, "decode.acc_seg": 90.89573, "loss": 0.22384, "time": 0.17304}
115
+ {"mode": "train", "epoch": 10, "iter": 5700, "lr": 0.00015, "memory": 19783, "data_time": 0.05376, "decode.loss_ce": 0.23805, "decode.acc_seg": 90.35932, "loss": 0.23805, "time": 0.21666}
116
+ {"mode": "train", "epoch": 10, "iter": 5750, "lr": 0.00015, "memory": 19783, "data_time": 0.00731, "decode.loss_ce": 0.23584, "decode.acc_seg": 90.47929, "loss": 0.23584, "time": 0.16638}
117
+ {"mode": "train", "epoch": 10, "iter": 5800, "lr": 0.00015, "memory": 19783, "data_time": 0.00671, "decode.loss_ce": 0.25196, "decode.acc_seg": 89.81801, "loss": 0.25196, "time": 0.16849}
118
+ {"mode": "train", "epoch": 10, "iter": 5850, "lr": 0.00015, "memory": 19783, "data_time": 0.00707, "decode.loss_ce": 0.23571, "decode.acc_seg": 90.36798, "loss": 0.23571, "time": 0.17485}
119
+ {"mode": "train", "epoch": 10, "iter": 5900, "lr": 0.00015, "memory": 19783, "data_time": 0.00676, "decode.loss_ce": 0.2416, "decode.acc_seg": 90.27747, "loss": 0.2416, "time": 0.17023}
120
+ {"mode": "train", "epoch": 10, "iter": 5950, "lr": 0.00015, "memory": 19783, "data_time": 0.00715, "decode.loss_ce": 0.23559, "decode.acc_seg": 90.43211, "loss": 0.23559, "time": 0.17164}
121
+ {"mode": "train", "epoch": 10, "iter": 6000, "lr": 0.00015, "memory": 19783, "data_time": 0.00697, "decode.loss_ce": 0.2205, "decode.acc_seg": 91.05051, "loss": 0.2205, "time": 0.17556}
122
+ {"mode": "train", "epoch": 10, "iter": 6050, "lr": 0.00015, "memory": 19783, "data_time": 0.00694, "decode.loss_ce": 0.23222, "decode.acc_seg": 90.57611, "loss": 0.23222, "time": 0.17327}
123
+ {"mode": "train", "epoch": 10, "iter": 6100, "lr": 0.00015, "memory": 19783, "data_time": 0.00714, "decode.loss_ce": 0.24064, "decode.acc_seg": 90.31305, "loss": 0.24064, "time": 0.16579}
124
+ {"mode": "train", "epoch": 10, "iter": 6150, "lr": 0.00015, "memory": 19783, "data_time": 0.00751, "decode.loss_ce": 0.23976, "decode.acc_seg": 90.38844, "loss": 0.23976, "time": 0.16872}
125
+ {"mode": "train", "epoch": 10, "iter": 6200, "lr": 0.00015, "memory": 19783, "data_time": 0.00733, "decode.loss_ce": 0.23414, "decode.acc_seg": 90.44385, "loss": 0.23414, "time": 0.16422}
126
+ {"mode": "train", "epoch": 10, "iter": 6250, "lr": 0.00015, "memory": 19783, "data_time": 0.00727, "decode.loss_ce": 0.24566, "decode.acc_seg": 90.27457, "loss": 0.24566, "time": 0.17081}
127
+ {"mode": "train", "epoch": 10, "iter": 6300, "lr": 0.00015, "memory": 19783, "data_time": 0.00741, "decode.loss_ce": 0.22406, "decode.acc_seg": 90.73291, "loss": 0.22406, "time": 0.172}
128
+ {"mode": "train", "epoch": 11, "iter": 6350, "lr": 0.00015, "memory": 19783, "data_time": 0.05658, "decode.loss_ce": 0.23578, "decode.acc_seg": 90.38232, "loss": 0.23578, "time": 0.21898}
129
+ {"mode": "train", "epoch": 11, "iter": 6400, "lr": 0.00015, "memory": 19783, "data_time": 0.00723, "decode.loss_ce": 0.22355, "decode.acc_seg": 90.76867, "loss": 0.22355, "time": 0.17097}
130
+ {"mode": "train", "epoch": 11, "iter": 6450, "lr": 0.00015, "memory": 19783, "data_time": 0.00727, "decode.loss_ce": 0.22216, "decode.acc_seg": 90.82901, "loss": 0.22216, "time": 0.17446}
131
+ {"mode": "train", "epoch": 11, "iter": 6500, "lr": 0.00015, "memory": 19783, "data_time": 0.00713, "decode.loss_ce": 0.22971, "decode.acc_seg": 90.70459, "loss": 0.22971, "time": 0.17473}
132
+ {"mode": "train", "epoch": 11, "iter": 6550, "lr": 0.00015, "memory": 19783, "data_time": 0.00783, "decode.loss_ce": 0.22432, "decode.acc_seg": 90.83311, "loss": 0.22432, "time": 0.1701}
133
+ {"mode": "train", "epoch": 11, "iter": 6600, "lr": 0.00015, "memory": 19783, "data_time": 0.00738, "decode.loss_ce": 0.24216, "decode.acc_seg": 90.42721, "loss": 0.24216, "time": 0.16582}
134
+ {"mode": "train", "epoch": 11, "iter": 6650, "lr": 0.00015, "memory": 19783, "data_time": 0.00664, "decode.loss_ce": 0.23376, "decode.acc_seg": 90.60508, "loss": 0.23376, "time": 0.17496}
135
+ {"mode": "train", "epoch": 11, "iter": 6700, "lr": 0.00015, "memory": 19783, "data_time": 0.00714, "decode.loss_ce": 0.24306, "decode.acc_seg": 90.1269, "loss": 0.24306, "time": 0.16614}
136
+ {"mode": "train", "epoch": 11, "iter": 6750, "lr": 0.00015, "memory": 19783, "data_time": 0.00718, "decode.loss_ce": 0.23822, "decode.acc_seg": 90.38081, "loss": 0.23822, "time": 0.16329}
137
+ {"mode": "train", "epoch": 11, "iter": 6800, "lr": 0.00015, "memory": 19783, "data_time": 0.00724, "decode.loss_ce": 0.2379, "decode.acc_seg": 90.49255, "loss": 0.2379, "time": 0.16966}
138
+ {"mode": "train", "epoch": 11, "iter": 6850, "lr": 0.00015, "memory": 19783, "data_time": 0.00708, "decode.loss_ce": 0.24489, "decode.acc_seg": 90.19225, "loss": 0.24489, "time": 0.16918}
139
+ {"mode": "train", "epoch": 11, "iter": 6900, "lr": 0.00015, "memory": 19783, "data_time": 0.00737, "decode.loss_ce": 0.23591, "decode.acc_seg": 90.47066, "loss": 0.23591, "time": 0.16545}
140
+ {"mode": "train", "epoch": 12, "iter": 6950, "lr": 0.00015, "memory": 19783, "data_time": 0.05516, "decode.loss_ce": 0.22491, "decode.acc_seg": 90.74824, "loss": 0.22491, "time": 0.22021}
141
+ {"mode": "train", "epoch": 12, "iter": 7000, "lr": 0.00015, "memory": 19783, "data_time": 0.00698, "decode.loss_ce": 0.22866, "decode.acc_seg": 90.84347, "loss": 0.22866, "time": 0.16929}
142
+ {"mode": "train", "epoch": 12, "iter": 7050, "lr": 0.00015, "memory": 19783, "data_time": 0.00713, "decode.loss_ce": 0.23445, "decode.acc_seg": 90.53955, "loss": 0.23445, "time": 0.16909}
143
+ {"mode": "train", "epoch": 12, "iter": 7100, "lr": 0.00015, "memory": 19783, "data_time": 0.00688, "decode.loss_ce": 0.22867, "decode.acc_seg": 90.72875, "loss": 0.22867, "time": 0.17427}
144
+ {"mode": "train", "epoch": 12, "iter": 7150, "lr": 0.00015, "memory": 19783, "data_time": 0.00774, "decode.loss_ce": 0.23175, "decode.acc_seg": 90.64853, "loss": 0.23175, "time": 0.16845}
145
+ {"mode": "train", "epoch": 12, "iter": 7200, "lr": 0.00015, "memory": 19783, "data_time": 0.00753, "decode.loss_ce": 0.23831, "decode.acc_seg": 90.29184, "loss": 0.23831, "time": 0.17243}
146
+ {"mode": "train", "epoch": 12, "iter": 7250, "lr": 0.00015, "memory": 19783, "data_time": 0.00724, "decode.loss_ce": 0.23129, "decode.acc_seg": 90.67923, "loss": 0.23129, "time": 0.16382}
147
+ {"mode": "train", "epoch": 12, "iter": 7300, "lr": 0.00015, "memory": 19783, "data_time": 0.00728, "decode.loss_ce": 0.23042, "decode.acc_seg": 90.6756, "loss": 0.23042, "time": 0.17514}
148
+ {"mode": "train", "epoch": 12, "iter": 7350, "lr": 0.00015, "memory": 19783, "data_time": 0.00762, "decode.loss_ce": 0.23096, "decode.acc_seg": 90.4861, "loss": 0.23096, "time": 0.17494}
149
+ {"mode": "train", "epoch": 12, "iter": 7400, "lr": 0.00015, "memory": 19783, "data_time": 0.00722, "decode.loss_ce": 0.23648, "decode.acc_seg": 90.41459, "loss": 0.23648, "time": 0.16612}
150
+ {"mode": "train", "epoch": 12, "iter": 7450, "lr": 0.00015, "memory": 19783, "data_time": 0.00698, "decode.loss_ce": 0.23389, "decode.acc_seg": 90.56373, "loss": 0.23389, "time": 0.16687}
151
+ {"mode": "train", "epoch": 12, "iter": 7500, "lr": 0.00015, "memory": 19783, "data_time": 0.00707, "decode.loss_ce": 0.22833, "decode.acc_seg": 90.66262, "loss": 0.22833, "time": 0.16645}
152
+ {"mode": "train", "epoch": 12, "iter": 7550, "lr": 0.00015, "memory": 19783, "data_time": 0.00708, "decode.loss_ce": 0.23725, "decode.acc_seg": 90.30798, "loss": 0.23725, "time": 0.16547}
153
+ {"mode": "train", "epoch": 13, "iter": 7600, "lr": 0.00015, "memory": 19783, "data_time": 0.05558, "decode.loss_ce": 0.23152, "decode.acc_seg": 90.56859, "loss": 0.23152, "time": 0.22103}
154
+ {"mode": "train", "epoch": 13, "iter": 7650, "lr": 0.00015, "memory": 19783, "data_time": 0.00695, "decode.loss_ce": 0.23743, "decode.acc_seg": 90.40197, "loss": 0.23743, "time": 0.17243}
155
+ {"mode": "train", "epoch": 13, "iter": 7700, "lr": 0.00015, "memory": 19783, "data_time": 0.0071, "decode.loss_ce": 0.23772, "decode.acc_seg": 90.38538, "loss": 0.23772, "time": 0.16691}
156
+ {"mode": "train", "epoch": 13, "iter": 7750, "lr": 0.00015, "memory": 19783, "data_time": 0.00726, "decode.loss_ce": 0.23096, "decode.acc_seg": 90.64128, "loss": 0.23096, "time": 0.16896}
157
+ {"mode": "train", "epoch": 13, "iter": 7800, "lr": 0.00015, "memory": 19783, "data_time": 0.00713, "decode.loss_ce": 0.23442, "decode.acc_seg": 90.45397, "loss": 0.23442, "time": 0.16189}
158
+ {"mode": "train", "epoch": 13, "iter": 7850, "lr": 0.00015, "memory": 19783, "data_time": 0.007, "decode.loss_ce": 0.23541, "decode.acc_seg": 90.56818, "loss": 0.23541, "time": 0.16867}
159
+ {"mode": "train", "epoch": 13, "iter": 7900, "lr": 0.00015, "memory": 19783, "data_time": 0.00689, "decode.loss_ce": 0.23165, "decode.acc_seg": 90.62381, "loss": 0.23165, "time": 0.16922}
160
+ {"mode": "train", "epoch": 13, "iter": 7950, "lr": 0.00015, "memory": 19783, "data_time": 0.0072, "decode.loss_ce": 0.22949, "decode.acc_seg": 90.78639, "loss": 0.22949, "time": 0.16628}
161
+ {"mode": "train", "epoch": 13, "iter": 8000, "lr": 0.00015, "memory": 19783, "data_time": 0.00678, "decode.loss_ce": 0.22824, "decode.acc_seg": 90.75584, "loss": 0.22824, "time": 0.18786}
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/20230304_122534.log ADDED
The diff for this file is too large to render. See raw diff
 
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/20230304_122534.log.json ADDED
The diff for this file is too large to render. See raw diff
 
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ checkpoint = 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth'
3
+ model = dict(
4
+ type='EncoderDecoderFreeze',
5
+ freeze_parameters=['backbone', 'decode_head'],
6
+ pretrained=
7
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
8
+ backbone=dict(
9
+ type='MixVisionTransformerCustomInitWeights',
10
+ in_channels=3,
11
+ embed_dims=64,
12
+ num_stages=4,
13
+ num_layers=[3, 4, 6, 3],
14
+ num_heads=[1, 2, 5, 8],
15
+ patch_sizes=[7, 3, 3, 3],
16
+ sr_ratios=[8, 4, 2, 1],
17
+ out_indices=(0, 1, 2, 3),
18
+ mlp_ratio=4,
19
+ qkv_bias=True,
20
+ drop_rate=0.0,
21
+ attn_drop_rate=0.0,
22
+ drop_path_rate=0.1),
23
+ decode_head=dict(
24
+ type='SegformerHeadUnetFCHeadSingleStepMask',
25
+ pretrained=
26
+ 'pretrained/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth',
27
+ dim=128,
28
+ out_dim=256,
29
+ unet_channels=272,
30
+ dim_mults=[1, 1, 1],
31
+ cat_embedding_dim=16,
32
+ in_channels=[64, 128, 320, 512],
33
+ in_index=[0, 1, 2, 3],
34
+ channels=256,
35
+ dropout_ratio=0.1,
36
+ num_classes=151,
37
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
38
+ align_corners=False,
39
+ ignore_index=0,
40
+ loss_decode=dict(
41
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
42
+ train_cfg=dict(),
43
+ test_cfg=dict(mode='whole'))
44
+ dataset_type = 'ADE20K151Dataset'
45
+ data_root = 'data/ade/ADEChallengeData2016'
46
+ img_norm_cfg = dict(
47
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
48
+ crop_size = (512, 512)
49
+ train_pipeline = [
50
+ dict(type='LoadImageFromFile'),
51
+ dict(type='LoadAnnotations', reduce_zero_label=False),
52
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
53
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
54
+ dict(type='RandomFlip', prob=0.5),
55
+ dict(type='PhotoMetricDistortion'),
56
+ dict(
57
+ type='Normalize',
58
+ mean=[123.675, 116.28, 103.53],
59
+ std=[58.395, 57.12, 57.375],
60
+ to_rgb=True),
61
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
62
+ dict(type='DefaultFormatBundle'),
63
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
64
+ ]
65
+ test_pipeline = [
66
+ dict(type='LoadImageFromFile'),
67
+ dict(
68
+ type='MultiScaleFlipAug',
69
+ img_scale=(2048, 512),
70
+ flip=False,
71
+ transforms=[
72
+ dict(type='Resize', keep_ratio=True),
73
+ dict(type='RandomFlip'),
74
+ dict(
75
+ type='Normalize',
76
+ mean=[123.675, 116.28, 103.53],
77
+ std=[58.395, 57.12, 57.375],
78
+ to_rgb=True),
79
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
80
+ dict(type='ImageToTensor', keys=['img']),
81
+ dict(type='Collect', keys=['img'])
82
+ ])
83
+ ]
84
+ data = dict(
85
+ samples_per_gpu=4,
86
+ workers_per_gpu=4,
87
+ train=dict(
88
+ type='ADE20K151Dataset',
89
+ data_root='data/ade/ADEChallengeData2016',
90
+ img_dir='images/training',
91
+ ann_dir='annotations/training',
92
+ pipeline=[
93
+ dict(type='LoadImageFromFile'),
94
+ dict(type='LoadAnnotations', reduce_zero_label=False),
95
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
96
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
97
+ dict(type='RandomFlip', prob=0.5),
98
+ dict(type='PhotoMetricDistortion'),
99
+ dict(
100
+ type='Normalize',
101
+ mean=[123.675, 116.28, 103.53],
102
+ std=[58.395, 57.12, 57.375],
103
+ to_rgb=True),
104
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
105
+ dict(type='DefaultFormatBundle'),
106
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
107
+ ]),
108
+ val=dict(
109
+ type='ADE20K151Dataset',
110
+ data_root='data/ade/ADEChallengeData2016',
111
+ img_dir='images/validation',
112
+ ann_dir='annotations/validation',
113
+ pipeline=[
114
+ dict(type='LoadImageFromFile'),
115
+ dict(
116
+ type='MultiScaleFlipAug',
117
+ img_scale=(2048, 512),
118
+ flip=False,
119
+ transforms=[
120
+ dict(type='Resize', keep_ratio=True),
121
+ dict(type='RandomFlip'),
122
+ dict(
123
+ type='Normalize',
124
+ mean=[123.675, 116.28, 103.53],
125
+ std=[58.395, 57.12, 57.375],
126
+ to_rgb=True),
127
+ dict(
128
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
129
+ dict(type='ImageToTensor', keys=['img']),
130
+ dict(type='Collect', keys=['img'])
131
+ ])
132
+ ]),
133
+ test=dict(
134
+ type='ADE20K151Dataset',
135
+ data_root='data/ade/ADEChallengeData2016',
136
+ img_dir='images/validation',
137
+ ann_dir='annotations/validation',
138
+ pipeline=[
139
+ dict(type='LoadImageFromFile'),
140
+ dict(
141
+ type='MultiScaleFlipAug',
142
+ img_scale=(2048, 512),
143
+ flip=False,
144
+ transforms=[
145
+ dict(type='Resize', keep_ratio=True),
146
+ dict(type='RandomFlip'),
147
+ dict(
148
+ type='Normalize',
149
+ mean=[123.675, 116.28, 103.53],
150
+ std=[58.395, 57.12, 57.375],
151
+ to_rgb=True),
152
+ dict(
153
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
154
+ dict(type='ImageToTensor', keys=['img']),
155
+ dict(type='Collect', keys=['img'])
156
+ ])
157
+ ]))
158
+ log_config = dict(
159
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
160
+ dist_params = dict(backend='nccl')
161
+ log_level = 'INFO'
162
+ load_from = None
163
+ resume_from = None
164
+ workflow = [('train', 1)]
165
+ cudnn_benchmark = True
166
+ optimizer = dict(
167
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
168
+ optimizer_config = dict()
169
+ lr_config = dict(
170
+ policy='step',
171
+ warmup='linear',
172
+ warmup_iters=1000,
173
+ warmup_ratio=1e-06,
174
+ step=10000,
175
+ gamma=0.5,
176
+ min_lr=1e-06,
177
+ by_epoch=False)
178
+ runner = dict(type='IterBasedRunner', max_iters=80000)
179
+ checkpoint_config = dict(by_epoch=False, interval=8000)
180
+ evaluation = dict(
181
+ interval=8000, metric='mIoU', pre_eval=True, save_best='mIoU')
182
+ work_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask'
183
+ gpu_ids = range(0, 8)
184
+ auto_resume = True
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/best_mIoU_iter_80000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:081c08833e6ff1558fecbbb2faf46fb5d8eac1ac84e8262ffdcc37b15a7a0a14
3
+ size 235548318
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_16000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d535ddd93d708f0cb46b2d4f94066a6e7b3e28a15c5ba3e149ea7faa8d23f91e
3
+ size 235548318
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_24000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d00a51da87942936bd198abc14b563649c0b16cf50b05a213c851e5796d9f0f
3
+ size 235548318
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_32000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7275faca1dd9925c1cc880b13a61d9e85021f7312bdcd0b6aa56091e1aa0a5f8
3
+ size 235548318
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_40000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7df7c1d42c842cfab9edbd862afd1922dccf0335b32af1f83faa24483a1c8191
3
+ size 235548318
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_48000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da5506d445951321beeb2ed61005415e06a3ca9f286236fa597d067971954695
3
+ size 235548318
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_56000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cd4ede6001fea5831fe892f2ba8b8e222ccf8019c592ed0763b443b86847f97
3
+ size 235548318
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_64000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae9768a3d8d2a7b90cb594ceca93ab5233c99ad513cc404b86ebb4c68801d807
3
+ size 235548318
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_72000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63d77ed6173ae933c1ba225b175550faeb7e09f1e1ae334dbce6437d88cec317
3
+ size 235548318
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_8000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:584332478ca3ec6ccb3370d1bdb91adc01484d87ffbec918c777050cda731c53
3
+ size 235547678
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/iter_80000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca82e7718d63dce0a800ea751b899f4f38cf0d4788a4378c5dc07a616e783d41
3
+ size 235548318
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151_mask/latest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca82e7718d63dce0a800ea751b899f4f38cf0d4788a4378c5dc07a616e783d41
3
+ size 235548318
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/20230305_231050.log ADDED
@@ -0,0 +1,1152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-03-05 23:10:50,099 - mmseg - INFO - Multi-processing start method is `None`
2
+ 2023-03-05 23:10:50,117 - mmseg - INFO - OpenCV num_threads is `128
3
+ 2023-03-05 23:10:50,117 - mmseg - INFO - OMP num threads is 1
4
+ 2023-03-05 23:10:50,169 - mmseg - INFO - Environment info:
5
+ ------------------------------------------------------------
6
+ sys.platform: linux
7
+ Python: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]
8
+ CUDA available: True
9
+ GPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB
10
+ CUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch
11
+ NVCC: Cuda compilation tools, release 11.6, V11.6.124
12
+ GCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)
13
+ PyTorch: 1.13.1
14
+ PyTorch compiling details: PyTorch built with:
15
+ - GCC 9.3
16
+ - C++ Version: 201402
17
+ - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications
18
+ - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
19
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
20
+ - LAPACK is enabled (usually provided by MKL)
21
+ - NNPACK is enabled
22
+ - CPU capability usage: AVX2
23
+ - CUDA Runtime 11.6
24
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37
25
+ - CuDNN 8.3.2 (built against CUDA 11.5)
26
+ - Magma 2.6.1
27
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
28
+
29
+ TorchVision: 0.14.1
30
+ OpenCV: 4.7.0
31
+ MMCV: 1.7.1
32
+ MMCV Compiler: GCC 9.3
33
+ MMCV CUDA Compiler: 11.6
34
+ MMSegmentation: 0.30.0+6db5ece
35
+ ------------------------------------------------------------
36
+
37
+ 2023-03-05 23:10:50,169 - mmseg - INFO - Distributed training: True
38
+ 2023-03-05 23:10:50,859 - mmseg - INFO - Config:
39
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
40
+ checkpoint = 'work_dirs2/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth'
41
+ model = dict(
42
+ type='EncoderDecoderDiffusion',
43
+ freeze_parameters=['backbone', 'decode_head'],
44
+ pretrained=
45
+ 'work_dirs2/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth',
46
+ backbone=dict(
47
+ type='MixVisionTransformerCustomInitWeights',
48
+ in_channels=3,
49
+ embed_dims=64,
50
+ num_stages=4,
51
+ num_layers=[3, 4, 6, 3],
52
+ num_heads=[1, 2, 5, 8],
53
+ patch_sizes=[7, 3, 3, 3],
54
+ sr_ratios=[8, 4, 2, 1],
55
+ out_indices=(0, 1, 2, 3),
56
+ mlp_ratio=4,
57
+ qkv_bias=True,
58
+ drop_rate=0.0,
59
+ attn_drop_rate=0.0,
60
+ drop_path_rate=0.1),
61
+ decode_head=dict(
62
+ type='SegformerHeadUnetFCHeadMultiStepCE',
63
+ pretrained=
64
+ 'work_dirs2/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth',
65
+ dim=128,
66
+ out_dim=256,
67
+ unet_channels=272,
68
+ dim_mults=[1, 1, 1],
69
+ cat_embedding_dim=16,
70
+ diffusion_timesteps=100,
71
+ collect_timesteps=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 99],
72
+ in_channels=[64, 128, 320, 512],
73
+ in_index=[0, 1, 2, 3],
74
+ channels=256,
75
+ dropout_ratio=0.1,
76
+ num_classes=151,
77
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
78
+ align_corners=False,
79
+ ignore_index=0,
80
+ loss_decode=dict(
81
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.1)),
82
+ train_cfg=dict(),
83
+ test_cfg=dict(mode='whole'))
84
+ dataset_type = 'ADE20K151Dataset'
85
+ data_root = 'data/ade/ADEChallengeData2016'
86
+ img_norm_cfg = dict(
87
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
88
+ crop_size = (512, 512)
89
+ train_pipeline = [
90
+ dict(type='LoadImageFromFile'),
91
+ dict(type='LoadAnnotations', reduce_zero_label=False),
92
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
93
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
94
+ dict(type='RandomFlip', prob=0.5),
95
+ dict(type='PhotoMetricDistortion'),
96
+ dict(
97
+ type='Normalize',
98
+ mean=[123.675, 116.28, 103.53],
99
+ std=[58.395, 57.12, 57.375],
100
+ to_rgb=True),
101
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
102
+ dict(type='DefaultFormatBundle'),
103
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
104
+ ]
105
+ test_pipeline = [
106
+ dict(type='LoadImageFromFile'),
107
+ dict(
108
+ type='MultiScaleFlipAug',
109
+ img_scale=(2048, 512),
110
+ flip=False,
111
+ transforms=[
112
+ dict(type='Resize', keep_ratio=True),
113
+ dict(type='RandomFlip'),
114
+ dict(
115
+ type='Normalize',
116
+ mean=[123.675, 116.28, 103.53],
117
+ std=[58.395, 57.12, 57.375],
118
+ to_rgb=True),
119
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
120
+ dict(type='ImageToTensor', keys=['img']),
121
+ dict(type='Collect', keys=['img'])
122
+ ])
123
+ ]
124
+ data = dict(
125
+ samples_per_gpu=4,
126
+ workers_per_gpu=4,
127
+ train=dict(
128
+ type='ADE20K151Dataset',
129
+ data_root='data/ade/ADEChallengeData2016',
130
+ img_dir='images/training',
131
+ ann_dir='annotations/training',
132
+ pipeline=[
133
+ dict(type='LoadImageFromFile'),
134
+ dict(type='LoadAnnotations', reduce_zero_label=False),
135
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
136
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
137
+ dict(type='RandomFlip', prob=0.5),
138
+ dict(type='PhotoMetricDistortion'),
139
+ dict(
140
+ type='Normalize',
141
+ mean=[123.675, 116.28, 103.53],
142
+ std=[58.395, 57.12, 57.375],
143
+ to_rgb=True),
144
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
145
+ dict(type='DefaultFormatBundle'),
146
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
147
+ ]),
148
+ val=dict(
149
+ type='ADE20K151Dataset',
150
+ data_root='data/ade/ADEChallengeData2016',
151
+ img_dir='images/validation',
152
+ ann_dir='annotations/validation',
153
+ pipeline=[
154
+ dict(type='LoadImageFromFile'),
155
+ dict(
156
+ type='MultiScaleFlipAug',
157
+ img_scale=(2048, 512),
158
+ flip=False,
159
+ transforms=[
160
+ dict(type='Resize', keep_ratio=True),
161
+ dict(type='RandomFlip'),
162
+ dict(
163
+ type='Normalize',
164
+ mean=[123.675, 116.28, 103.53],
165
+ std=[58.395, 57.12, 57.375],
166
+ to_rgb=True),
167
+ dict(
168
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
169
+ dict(type='ImageToTensor', keys=['img']),
170
+ dict(type='Collect', keys=['img'])
171
+ ])
172
+ ]),
173
+ test=dict(
174
+ type='ADE20K151Dataset',
175
+ data_root='data/ade/ADEChallengeData2016',
176
+ img_dir='images/validation',
177
+ ann_dir='annotations/validation',
178
+ pipeline=[
179
+ dict(type='LoadImageFromFile'),
180
+ dict(
181
+ type='MultiScaleFlipAug',
182
+ img_scale=(2048, 512),
183
+ flip=False,
184
+ transforms=[
185
+ dict(type='Resize', keep_ratio=True),
186
+ dict(type='RandomFlip'),
187
+ dict(
188
+ type='Normalize',
189
+ mean=[123.675, 116.28, 103.53],
190
+ std=[58.395, 57.12, 57.375],
191
+ to_rgb=True),
192
+ dict(
193
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
194
+ dict(type='ImageToTensor', keys=['img']),
195
+ dict(type='Collect', keys=['img'])
196
+ ])
197
+ ]))
198
+ log_config = dict(
199
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
200
+ dist_params = dict(backend='nccl')
201
+ log_level = 'INFO'
202
+ load_from = None
203
+ resume_from = None
204
+ workflow = [('train', 1)]
205
+ cudnn_benchmark = True
206
+ optimizer = dict(
207
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
208
+ optimizer_config = dict()
209
+ lr_config = dict(
210
+ policy='step',
211
+ warmup='linear',
212
+ warmup_iters=1000,
213
+ warmup_ratio=1e-06,
214
+ step=20000,
215
+ gamma=0.5,
216
+ min_lr=1e-06,
217
+ by_epoch=False)
218
+ runner = dict(type='IterBasedRunner', max_iters=160000)
219
+ checkpoint_config = dict(by_epoch=False, interval=16000, max_keep_ckpts=1)
220
+ evaluation = dict(
221
+ interval=16000, metric='mIoU', pre_eval=True, save_best='mIoU')
222
+ custom_hooks = [
223
+ dict(
224
+ type='ConstantMomentumEMAHook',
225
+ momentum=0.01,
226
+ interval=25,
227
+ eval_interval=16000,
228
+ auto_resume=True,
229
+ priority=49)
230
+ ]
231
+ work_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce'
232
+ gpu_ids = range(0, 8)
233
+ auto_resume = True
234
+
235
+ 2023-03-05 23:10:55,198 - mmseg - INFO - Set random seed to 1580901347, deterministic: False
236
+ 2023-03-05 23:10:55,464 - mmseg - INFO - Parameters in backbone freezed!
237
+ 2023-03-05 23:10:55,465 - mmseg - INFO - Trainable parameters in SegformerHeadUnetFCHeadMultiStep: ['unet.init_conv.weight', 'unet.init_conv.bias', 'unet.time_mlp.1.weight', 'unet.time_mlp.1.bias', 'unet.time_mlp.3.weight', 'unet.time_mlp.3.bias', 'unet.downs.0.0.mlp.1.weight', 'unet.downs.0.0.mlp.1.bias', 'unet.downs.0.0.block1.proj.weight', 'unet.downs.0.0.block1.proj.bias', 'unet.downs.0.0.block1.norm.weight', 'unet.downs.0.0.block1.norm.bias', 'unet.downs.0.0.block2.proj.weight', 'unet.downs.0.0.block2.proj.bias', 'unet.downs.0.0.block2.norm.weight', 'unet.downs.0.0.block2.norm.bias', 'unet.downs.0.1.mlp.1.weight', 'unet.downs.0.1.mlp.1.bias', 'unet.downs.0.1.block1.proj.weight', 'unet.downs.0.1.block1.proj.bias', 'unet.downs.0.1.block1.norm.weight', 'unet.downs.0.1.block1.norm.bias', 'unet.downs.0.1.block2.proj.weight', 'unet.downs.0.1.block2.proj.bias', 'unet.downs.0.1.block2.norm.weight', 'unet.downs.0.1.block2.norm.bias', 'unet.downs.0.2.fn.fn.to_qkv.weight', 'unet.downs.0.2.fn.fn.to_out.0.weight', 'unet.downs.0.2.fn.fn.to_out.0.bias', 'unet.downs.0.2.fn.fn.to_out.1.g', 'unet.downs.0.2.fn.norm.g', 'unet.downs.0.3.weight', 'unet.downs.0.3.bias', 'unet.downs.1.0.mlp.1.weight', 'unet.downs.1.0.mlp.1.bias', 'unet.downs.1.0.block1.proj.weight', 'unet.downs.1.0.block1.proj.bias', 'unet.downs.1.0.block1.norm.weight', 'unet.downs.1.0.block1.norm.bias', 'unet.downs.1.0.block2.proj.weight', 'unet.downs.1.0.block2.proj.bias', 'unet.downs.1.0.block2.norm.weight', 'unet.downs.1.0.block2.norm.bias', 'unet.downs.1.1.mlp.1.weight', 'unet.downs.1.1.mlp.1.bias', 'unet.downs.1.1.block1.proj.weight', 'unet.downs.1.1.block1.proj.bias', 'unet.downs.1.1.block1.norm.weight', 'unet.downs.1.1.block1.norm.bias', 'unet.downs.1.1.block2.proj.weight', 'unet.downs.1.1.block2.proj.bias', 'unet.downs.1.1.block2.norm.weight', 'unet.downs.1.1.block2.norm.bias', 'unet.downs.1.2.fn.fn.to_qkv.weight', 'unet.downs.1.2.fn.fn.to_out.0.weight', 'unet.downs.1.2.fn.fn.to_out.0.bias', 'unet.downs.1.2.fn.fn.to_out.1.g', 'unet.downs.1.2.fn.norm.g', 'unet.downs.1.3.weight', 'unet.downs.1.3.bias', 'unet.downs.2.0.mlp.1.weight', 'unet.downs.2.0.mlp.1.bias', 'unet.downs.2.0.block1.proj.weight', 'unet.downs.2.0.block1.proj.bias', 'unet.downs.2.0.block1.norm.weight', 'unet.downs.2.0.block1.norm.bias', 'unet.downs.2.0.block2.proj.weight', 'unet.downs.2.0.block2.proj.bias', 'unet.downs.2.0.block2.norm.weight', 'unet.downs.2.0.block2.norm.bias', 'unet.downs.2.1.mlp.1.weight', 'unet.downs.2.1.mlp.1.bias', 'unet.downs.2.1.block1.proj.weight', 'unet.downs.2.1.block1.proj.bias', 'unet.downs.2.1.block1.norm.weight', 'unet.downs.2.1.block1.norm.bias', 'unet.downs.2.1.block2.proj.weight', 'unet.downs.2.1.block2.proj.bias', 'unet.downs.2.1.block2.norm.weight', 'unet.downs.2.1.block2.norm.bias', 'unet.downs.2.2.fn.fn.to_qkv.weight', 'unet.downs.2.2.fn.fn.to_out.0.weight', 'unet.downs.2.2.fn.fn.to_out.0.bias', 'unet.downs.2.2.fn.fn.to_out.1.g', 'unet.downs.2.2.fn.norm.g', 'unet.downs.2.3.weight', 'unet.downs.2.3.bias', 'unet.ups.0.0.mlp.1.weight', 'unet.ups.0.0.mlp.1.bias', 'unet.ups.0.0.block1.proj.weight', 'unet.ups.0.0.block1.proj.bias', 'unet.ups.0.0.block1.norm.weight', 'unet.ups.0.0.block1.norm.bias', 'unet.ups.0.0.block2.proj.weight', 'unet.ups.0.0.block2.proj.bias', 'unet.ups.0.0.block2.norm.weight', 'unet.ups.0.0.block2.norm.bias', 'unet.ups.0.0.res_conv.weight', 'unet.ups.0.0.res_conv.bias', 'unet.ups.0.1.mlp.1.weight', 'unet.ups.0.1.mlp.1.bias', 'unet.ups.0.1.block1.proj.weight', 'unet.ups.0.1.block1.proj.bias', 'unet.ups.0.1.block1.norm.weight', 'unet.ups.0.1.block1.norm.bias', 'unet.ups.0.1.block2.proj.weight', 'unet.ups.0.1.block2.proj.bias', 'unet.ups.0.1.block2.norm.weight', 'unet.ups.0.1.block2.norm.bias', 'unet.ups.0.1.res_conv.weight', 'unet.ups.0.1.res_conv.bias', 'unet.ups.0.2.fn.fn.to_qkv.weight', 'unet.ups.0.2.fn.fn.to_out.0.weight', 'unet.ups.0.2.fn.fn.to_out.0.bias', 'unet.ups.0.2.fn.fn.to_out.1.g', 'unet.ups.0.2.fn.norm.g', 'unet.ups.0.3.1.weight', 'unet.ups.0.3.1.bias', 'unet.ups.1.0.mlp.1.weight', 'unet.ups.1.0.mlp.1.bias', 'unet.ups.1.0.block1.proj.weight', 'unet.ups.1.0.block1.proj.bias', 'unet.ups.1.0.block1.norm.weight', 'unet.ups.1.0.block1.norm.bias', 'unet.ups.1.0.block2.proj.weight', 'unet.ups.1.0.block2.proj.bias', 'unet.ups.1.0.block2.norm.weight', 'unet.ups.1.0.block2.norm.bias', 'unet.ups.1.0.res_conv.weight', 'unet.ups.1.0.res_conv.bias', 'unet.ups.1.1.mlp.1.weight', 'unet.ups.1.1.mlp.1.bias', 'unet.ups.1.1.block1.proj.weight', 'unet.ups.1.1.block1.proj.bias', 'unet.ups.1.1.block1.norm.weight', 'unet.ups.1.1.block1.norm.bias', 'unet.ups.1.1.block2.proj.weight', 'unet.ups.1.1.block2.proj.bias', 'unet.ups.1.1.block2.norm.weight', 'unet.ups.1.1.block2.norm.bias', 'unet.ups.1.1.res_conv.weight', 'unet.ups.1.1.res_conv.bias', 'unet.ups.1.2.fn.fn.to_qkv.weight', 'unet.ups.1.2.fn.fn.to_out.0.weight', 'unet.ups.1.2.fn.fn.to_out.0.bias', 'unet.ups.1.2.fn.fn.to_out.1.g', 'unet.ups.1.2.fn.norm.g', 'unet.ups.1.3.1.weight', 'unet.ups.1.3.1.bias', 'unet.ups.2.0.mlp.1.weight', 'unet.ups.2.0.mlp.1.bias', 'unet.ups.2.0.block1.proj.weight', 'unet.ups.2.0.block1.proj.bias', 'unet.ups.2.0.block1.norm.weight', 'unet.ups.2.0.block1.norm.bias', 'unet.ups.2.0.block2.proj.weight', 'unet.ups.2.0.block2.proj.bias', 'unet.ups.2.0.block2.norm.weight', 'unet.ups.2.0.block2.norm.bias', 'unet.ups.2.0.res_conv.weight', 'unet.ups.2.0.res_conv.bias', 'unet.ups.2.1.mlp.1.weight', 'unet.ups.2.1.mlp.1.bias', 'unet.ups.2.1.block1.proj.weight', 'unet.ups.2.1.block1.proj.bias', 'unet.ups.2.1.block1.norm.weight', 'unet.ups.2.1.block1.norm.bias', 'unet.ups.2.1.block2.proj.weight', 'unet.ups.2.1.block2.proj.bias', 'unet.ups.2.1.block2.norm.weight', 'unet.ups.2.1.block2.norm.bias', 'unet.ups.2.1.res_conv.weight', 'unet.ups.2.1.res_conv.bias', 'unet.ups.2.2.fn.fn.to_qkv.weight', 'unet.ups.2.2.fn.fn.to_out.0.weight', 'unet.ups.2.2.fn.fn.to_out.0.bias', 'unet.ups.2.2.fn.fn.to_out.1.g', 'unet.ups.2.2.fn.norm.g', 'unet.ups.2.3.weight', 'unet.ups.2.3.bias', 'unet.mid_block1.mlp.1.weight', 'unet.mid_block1.mlp.1.bias', 'unet.mid_block1.block1.proj.weight', 'unet.mid_block1.block1.proj.bias', 'unet.mid_block1.block1.norm.weight', 'unet.mid_block1.block1.norm.bias', 'unet.mid_block1.block2.proj.weight', 'unet.mid_block1.block2.proj.bias', 'unet.mid_block1.block2.norm.weight', 'unet.mid_block1.block2.norm.bias', 'unet.mid_attn.fn.fn.to_qkv.weight', 'unet.mid_attn.fn.fn.to_out.weight', 'unet.mid_attn.fn.fn.to_out.bias', 'unet.mid_attn.fn.norm.g', 'unet.mid_block2.mlp.1.weight', 'unet.mid_block2.mlp.1.bias', 'unet.mid_block2.block1.proj.weight', 'unet.mid_block2.block1.proj.bias', 'unet.mid_block2.block1.norm.weight', 'unet.mid_block2.block1.norm.bias', 'unet.mid_block2.block2.proj.weight', 'unet.mid_block2.block2.proj.bias', 'unet.mid_block2.block2.norm.weight', 'unet.mid_block2.block2.norm.bias', 'unet.final_res_block.mlp.1.weight', 'unet.final_res_block.mlp.1.bias', 'unet.final_res_block.block1.proj.weight', 'unet.final_res_block.block1.proj.bias', 'unet.final_res_block.block1.norm.weight', 'unet.final_res_block.block1.norm.bias', 'unet.final_res_block.block2.proj.weight', 'unet.final_res_block.block2.proj.bias', 'unet.final_res_block.block2.norm.weight', 'unet.final_res_block.block2.norm.bias', 'unet.final_res_block.res_conv.weight', 'unet.final_res_block.res_conv.bias', 'unet.final_conv.weight', 'unet.final_conv.bias', 'conv_seg_new.weight', 'conv_seg_new.bias']
238
+ 2023-03-05 23:10:55,465 - mmseg - INFO - Parameters in decode_head freezed!
239
+ 2023-03-05 23:10:55,486 - mmseg - INFO - load checkpoint from local path: work_dirs2/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth
240
+ 2023-03-05 23:10:56,307 - mmseg - WARNING - The model and loaded state dict do not match exactly
241
+
242
+ unexpected key in source state_dict: decode_head.convs.0.conv.weight, decode_head.convs.0.bn.weight, decode_head.convs.0.bn.bias, decode_head.convs.0.bn.running_mean, decode_head.convs.0.bn.running_var, decode_head.convs.0.bn.num_batches_tracked, decode_head.convs.1.conv.weight, decode_head.convs.1.bn.weight, decode_head.convs.1.bn.bias, decode_head.convs.1.bn.running_mean, decode_head.convs.1.bn.running_var, decode_head.convs.1.bn.num_batches_tracked, decode_head.convs.2.conv.weight, decode_head.convs.2.bn.weight, decode_head.convs.2.bn.bias, decode_head.convs.2.bn.running_mean, decode_head.convs.2.bn.running_var, decode_head.convs.2.bn.num_batches_tracked, decode_head.convs.3.conv.weight, decode_head.convs.3.bn.weight, decode_head.convs.3.bn.bias, decode_head.convs.3.bn.running_mean, decode_head.convs.3.bn.running_var, decode_head.convs.3.bn.num_batches_tracked, decode_head.fusion_conv.conv.weight, decode_head.fusion_conv.bn.weight, decode_head.fusion_conv.bn.bias, decode_head.fusion_conv.bn.running_mean, decode_head.fusion_conv.bn.running_var, decode_head.fusion_conv.bn.num_batches_tracked, decode_head.unet.init_conv.weight, decode_head.unet.init_conv.bias, decode_head.unet.time_mlp.1.weight, decode_head.unet.time_mlp.1.bias, decode_head.unet.time_mlp.3.weight, decode_head.unet.time_mlp.3.bias, decode_head.unet.downs.0.0.mlp.1.weight, decode_head.unet.downs.0.0.mlp.1.bias, decode_head.unet.downs.0.0.block1.proj.weight, decode_head.unet.downs.0.0.block1.proj.bias, decode_head.unet.downs.0.0.block1.norm.weight, decode_head.unet.downs.0.0.block1.norm.bias, decode_head.unet.downs.0.0.block2.proj.weight, decode_head.unet.downs.0.0.block2.proj.bias, decode_head.unet.downs.0.0.block2.norm.weight, decode_head.unet.downs.0.0.block2.norm.bias, decode_head.unet.downs.0.1.mlp.1.weight, decode_head.unet.downs.0.1.mlp.1.bias, decode_head.unet.downs.0.1.block1.proj.weight, decode_head.unet.downs.0.1.block1.proj.bias, decode_head.unet.downs.0.1.block1.norm.weight, decode_head.unet.downs.0.1.block1.norm.bias, decode_head.unet.downs.0.1.block2.proj.weight, decode_head.unet.downs.0.1.block2.proj.bias, decode_head.unet.downs.0.1.block2.norm.weight, decode_head.unet.downs.0.1.block2.norm.bias, decode_head.unet.downs.0.2.fn.fn.to_qkv.weight, decode_head.unet.downs.0.2.fn.fn.to_out.0.weight, decode_head.unet.downs.0.2.fn.fn.to_out.0.bias, decode_head.unet.downs.0.2.fn.fn.to_out.1.g, decode_head.unet.downs.0.2.fn.norm.g, decode_head.unet.downs.0.3.weight, decode_head.unet.downs.0.3.bias, decode_head.unet.downs.1.0.mlp.1.weight, decode_head.unet.downs.1.0.mlp.1.bias, decode_head.unet.downs.1.0.block1.proj.weight, decode_head.unet.downs.1.0.block1.proj.bias, decode_head.unet.downs.1.0.block1.norm.weight, decode_head.unet.downs.1.0.block1.norm.bias, decode_head.unet.downs.1.0.block2.proj.weight, decode_head.unet.downs.1.0.block2.proj.bias, decode_head.unet.downs.1.0.block2.norm.weight, decode_head.unet.downs.1.0.block2.norm.bias, decode_head.unet.downs.1.1.mlp.1.weight, decode_head.unet.downs.1.1.mlp.1.bias, decode_head.unet.downs.1.1.block1.proj.weight, decode_head.unet.downs.1.1.block1.proj.bias, decode_head.unet.downs.1.1.block1.norm.weight, decode_head.unet.downs.1.1.block1.norm.bias, decode_head.unet.downs.1.1.block2.proj.weight, decode_head.unet.downs.1.1.block2.proj.bias, decode_head.unet.downs.1.1.block2.norm.weight, decode_head.unet.downs.1.1.block2.norm.bias, decode_head.unet.downs.1.2.fn.fn.to_qkv.weight, decode_head.unet.downs.1.2.fn.fn.to_out.0.weight, decode_head.unet.downs.1.2.fn.fn.to_out.0.bias, decode_head.unet.downs.1.2.fn.fn.to_out.1.g, decode_head.unet.downs.1.2.fn.norm.g, decode_head.unet.downs.1.3.weight, decode_head.unet.downs.1.3.bias, decode_head.unet.downs.2.0.mlp.1.weight, decode_head.unet.downs.2.0.mlp.1.bias, decode_head.unet.downs.2.0.block1.proj.weight, decode_head.unet.downs.2.0.block1.proj.bias, decode_head.unet.downs.2.0.block1.norm.weight, decode_head.unet.downs.2.0.block1.norm.bias, decode_head.unet.downs.2.0.block2.proj.weight, decode_head.unet.downs.2.0.block2.proj.bias, decode_head.unet.downs.2.0.block2.norm.weight, decode_head.unet.downs.2.0.block2.norm.bias, decode_head.unet.downs.2.1.mlp.1.weight, decode_head.unet.downs.2.1.mlp.1.bias, decode_head.unet.downs.2.1.block1.proj.weight, decode_head.unet.downs.2.1.block1.proj.bias, decode_head.unet.downs.2.1.block1.norm.weight, decode_head.unet.downs.2.1.block1.norm.bias, decode_head.unet.downs.2.1.block2.proj.weight, decode_head.unet.downs.2.1.block2.proj.bias, decode_head.unet.downs.2.1.block2.norm.weight, decode_head.unet.downs.2.1.block2.norm.bias, decode_head.unet.downs.2.2.fn.fn.to_qkv.weight, decode_head.unet.downs.2.2.fn.fn.to_out.0.weight, decode_head.unet.downs.2.2.fn.fn.to_out.0.bias, decode_head.unet.downs.2.2.fn.fn.to_out.1.g, decode_head.unet.downs.2.2.fn.norm.g, decode_head.unet.downs.2.3.weight, decode_head.unet.downs.2.3.bias, decode_head.unet.ups.0.0.mlp.1.weight, decode_head.unet.ups.0.0.mlp.1.bias, decode_head.unet.ups.0.0.block1.proj.weight, decode_head.unet.ups.0.0.block1.proj.bias, decode_head.unet.ups.0.0.block1.norm.weight, decode_head.unet.ups.0.0.block1.norm.bias, decode_head.unet.ups.0.0.block2.proj.weight, decode_head.unet.ups.0.0.block2.proj.bias, decode_head.unet.ups.0.0.block2.norm.weight, decode_head.unet.ups.0.0.block2.norm.bias, decode_head.unet.ups.0.0.res_conv.weight, decode_head.unet.ups.0.0.res_conv.bias, decode_head.unet.ups.0.1.mlp.1.weight, decode_head.unet.ups.0.1.mlp.1.bias, decode_head.unet.ups.0.1.block1.proj.weight, decode_head.unet.ups.0.1.block1.proj.bias, decode_head.unet.ups.0.1.block1.norm.weight, decode_head.unet.ups.0.1.block1.norm.bias, decode_head.unet.ups.0.1.block2.proj.weight, decode_head.unet.ups.0.1.block2.proj.bias, decode_head.unet.ups.0.1.block2.norm.weight, decode_head.unet.ups.0.1.block2.norm.bias, decode_head.unet.ups.0.1.res_conv.weight, decode_head.unet.ups.0.1.res_conv.bias, decode_head.unet.ups.0.2.fn.fn.to_qkv.weight, decode_head.unet.ups.0.2.fn.fn.to_out.0.weight, decode_head.unet.ups.0.2.fn.fn.to_out.0.bias, decode_head.unet.ups.0.2.fn.fn.to_out.1.g, decode_head.unet.ups.0.2.fn.norm.g, decode_head.unet.ups.0.3.1.weight, decode_head.unet.ups.0.3.1.bias, decode_head.unet.ups.1.0.mlp.1.weight, decode_head.unet.ups.1.0.mlp.1.bias, decode_head.unet.ups.1.0.block1.proj.weight, decode_head.unet.ups.1.0.block1.proj.bias, decode_head.unet.ups.1.0.block1.norm.weight, decode_head.unet.ups.1.0.block1.norm.bias, decode_head.unet.ups.1.0.block2.proj.weight, decode_head.unet.ups.1.0.block2.proj.bias, decode_head.unet.ups.1.0.block2.norm.weight, decode_head.unet.ups.1.0.block2.norm.bias, decode_head.unet.ups.1.0.res_conv.weight, decode_head.unet.ups.1.0.res_conv.bias, decode_head.unet.ups.1.1.mlp.1.weight, decode_head.unet.ups.1.1.mlp.1.bias, decode_head.unet.ups.1.1.block1.proj.weight, decode_head.unet.ups.1.1.block1.proj.bias, decode_head.unet.ups.1.1.block1.norm.weight, decode_head.unet.ups.1.1.block1.norm.bias, decode_head.unet.ups.1.1.block2.proj.weight, decode_head.unet.ups.1.1.block2.proj.bias, decode_head.unet.ups.1.1.block2.norm.weight, decode_head.unet.ups.1.1.block2.norm.bias, decode_head.unet.ups.1.1.res_conv.weight, decode_head.unet.ups.1.1.res_conv.bias, decode_head.unet.ups.1.2.fn.fn.to_qkv.weight, decode_head.unet.ups.1.2.fn.fn.to_out.0.weight, decode_head.unet.ups.1.2.fn.fn.to_out.0.bias, decode_head.unet.ups.1.2.fn.fn.to_out.1.g, decode_head.unet.ups.1.2.fn.norm.g, decode_head.unet.ups.1.3.1.weight, decode_head.unet.ups.1.3.1.bias, decode_head.unet.ups.2.0.mlp.1.weight, decode_head.unet.ups.2.0.mlp.1.bias, decode_head.unet.ups.2.0.block1.proj.weight, decode_head.unet.ups.2.0.block1.proj.bias, decode_head.unet.ups.2.0.block1.norm.weight, decode_head.unet.ups.2.0.block1.norm.bias, decode_head.unet.ups.2.0.block2.proj.weight, decode_head.unet.ups.2.0.block2.proj.bias, decode_head.unet.ups.2.0.block2.norm.weight, decode_head.unet.ups.2.0.block2.norm.bias, decode_head.unet.ups.2.0.res_conv.weight, decode_head.unet.ups.2.0.res_conv.bias, decode_head.unet.ups.2.1.mlp.1.weight, decode_head.unet.ups.2.1.mlp.1.bias, decode_head.unet.ups.2.1.block1.proj.weight, decode_head.unet.ups.2.1.block1.proj.bias, decode_head.unet.ups.2.1.block1.norm.weight, decode_head.unet.ups.2.1.block1.norm.bias, decode_head.unet.ups.2.1.block2.proj.weight, decode_head.unet.ups.2.1.block2.proj.bias, decode_head.unet.ups.2.1.block2.norm.weight, decode_head.unet.ups.2.1.block2.norm.bias, decode_head.unet.ups.2.1.res_conv.weight, decode_head.unet.ups.2.1.res_conv.bias, decode_head.unet.ups.2.2.fn.fn.to_qkv.weight, decode_head.unet.ups.2.2.fn.fn.to_out.0.weight, decode_head.unet.ups.2.2.fn.fn.to_out.0.bias, decode_head.unet.ups.2.2.fn.fn.to_out.1.g, decode_head.unet.ups.2.2.fn.norm.g, decode_head.unet.ups.2.3.weight, decode_head.unet.ups.2.3.bias, decode_head.unet.mid_block1.mlp.1.weight, decode_head.unet.mid_block1.mlp.1.bias, decode_head.unet.mid_block1.block1.proj.weight, decode_head.unet.mid_block1.block1.proj.bias, decode_head.unet.mid_block1.block1.norm.weight, decode_head.unet.mid_block1.block1.norm.bias, decode_head.unet.mid_block1.block2.proj.weight, decode_head.unet.mid_block1.block2.proj.bias, decode_head.unet.mid_block1.block2.norm.weight, decode_head.unet.mid_block1.block2.norm.bias, decode_head.unet.mid_attn.fn.fn.to_qkv.weight, decode_head.unet.mid_attn.fn.fn.to_out.weight, decode_head.unet.mid_attn.fn.fn.to_out.bias, decode_head.unet.mid_attn.fn.norm.g, decode_head.unet.mid_block2.mlp.1.weight, decode_head.unet.mid_block2.mlp.1.bias, decode_head.unet.mid_block2.block1.proj.weight, decode_head.unet.mid_block2.block1.proj.bias, decode_head.unet.mid_block2.block1.norm.weight, decode_head.unet.mid_block2.block1.norm.bias, decode_head.unet.mid_block2.block2.proj.weight, decode_head.unet.mid_block2.block2.proj.bias, decode_head.unet.mid_block2.block2.norm.weight, decode_head.unet.mid_block2.block2.norm.bias, decode_head.unet.final_res_block.mlp.1.weight, decode_head.unet.final_res_block.mlp.1.bias, decode_head.unet.final_res_block.block1.proj.weight, decode_head.unet.final_res_block.block1.proj.bias, decode_head.unet.final_res_block.block1.norm.weight, decode_head.unet.final_res_block.block1.norm.bias, decode_head.unet.final_res_block.block2.proj.weight, decode_head.unet.final_res_block.block2.proj.bias, decode_head.unet.final_res_block.block2.norm.weight, decode_head.unet.final_res_block.block2.norm.bias, decode_head.unet.final_res_block.res_conv.weight, decode_head.unet.final_res_block.res_conv.bias, decode_head.unet.final_conv.weight, decode_head.unet.final_conv.bias, decode_head.conv_seg_new.weight, decode_head.conv_seg_new.bias, decode_head.embed.weight
243
+
244
+ 2023-03-05 23:10:56,324 - mmseg - INFO - load checkpoint from local path: work_dirs2/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth
245
+ 2023-03-05 23:10:56,771 - mmseg - WARNING - The model and loaded state dict do not match exactly
246
+
247
+ unexpected key in source state_dict: backbone.layers.0.0.projection.weight, backbone.layers.0.0.projection.bias, backbone.layers.0.0.norm.weight, backbone.layers.0.0.norm.bias, backbone.layers.0.1.0.norm1.weight, backbone.layers.0.1.0.norm1.bias, backbone.layers.0.1.0.attn.attn.in_proj_weight, backbone.layers.0.1.0.attn.attn.in_proj_bias, backbone.layers.0.1.0.attn.attn.out_proj.weight, backbone.layers.0.1.0.attn.attn.out_proj.bias, backbone.layers.0.1.0.attn.sr.weight, backbone.layers.0.1.0.attn.sr.bias, backbone.layers.0.1.0.attn.norm.weight, backbone.layers.0.1.0.attn.norm.bias, backbone.layers.0.1.0.norm2.weight, backbone.layers.0.1.0.norm2.bias, backbone.layers.0.1.0.ffn.layers.0.weight, backbone.layers.0.1.0.ffn.layers.0.bias, backbone.layers.0.1.0.ffn.layers.1.weight, backbone.layers.0.1.0.ffn.layers.1.bias, backbone.layers.0.1.0.ffn.layers.4.weight, backbone.layers.0.1.0.ffn.layers.4.bias, backbone.layers.0.1.1.norm1.weight, backbone.layers.0.1.1.norm1.bias, backbone.layers.0.1.1.attn.attn.in_proj_weight, backbone.layers.0.1.1.attn.attn.in_proj_bias, backbone.layers.0.1.1.attn.attn.out_proj.weight, backbone.layers.0.1.1.attn.attn.out_proj.bias, backbone.layers.0.1.1.attn.sr.weight, backbone.layers.0.1.1.attn.sr.bias, backbone.layers.0.1.1.attn.norm.weight, backbone.layers.0.1.1.attn.norm.bias, backbone.layers.0.1.1.norm2.weight, backbone.layers.0.1.1.norm2.bias, backbone.layers.0.1.1.ffn.layers.0.weight, backbone.layers.0.1.1.ffn.layers.0.bias, backbone.layers.0.1.1.ffn.layers.1.weight, backbone.layers.0.1.1.ffn.layers.1.bias, backbone.layers.0.1.1.ffn.layers.4.weight, backbone.layers.0.1.1.ffn.layers.4.bias, backbone.layers.0.1.2.norm1.weight, backbone.layers.0.1.2.norm1.bias, backbone.layers.0.1.2.attn.attn.in_proj_weight, backbone.layers.0.1.2.attn.attn.in_proj_bias, backbone.layers.0.1.2.attn.attn.out_proj.weight, backbone.layers.0.1.2.attn.attn.out_proj.bias, backbone.layers.0.1.2.attn.sr.weight, backbone.layers.0.1.2.attn.sr.bias, backbone.layers.0.1.2.attn.norm.weight, backbone.layers.0.1.2.attn.norm.bias, backbone.layers.0.1.2.norm2.weight, backbone.layers.0.1.2.norm2.bias, backbone.layers.0.1.2.ffn.layers.0.weight, backbone.layers.0.1.2.ffn.layers.0.bias, backbone.layers.0.1.2.ffn.layers.1.weight, backbone.layers.0.1.2.ffn.layers.1.bias, backbone.layers.0.1.2.ffn.layers.4.weight, backbone.layers.0.1.2.ffn.layers.4.bias, backbone.layers.0.2.weight, backbone.layers.0.2.bias, backbone.layers.1.0.projection.weight, backbone.layers.1.0.projection.bias, backbone.layers.1.0.norm.weight, backbone.layers.1.0.norm.bias, backbone.layers.1.1.0.norm1.weight, backbone.layers.1.1.0.norm1.bias, backbone.layers.1.1.0.attn.attn.in_proj_weight, backbone.layers.1.1.0.attn.attn.in_proj_bias, backbone.layers.1.1.0.attn.attn.out_proj.weight, backbone.layers.1.1.0.attn.attn.out_proj.bias, backbone.layers.1.1.0.attn.sr.weight, backbone.layers.1.1.0.attn.sr.bias, backbone.layers.1.1.0.attn.norm.weight, backbone.layers.1.1.0.attn.norm.bias, backbone.layers.1.1.0.norm2.weight, backbone.layers.1.1.0.norm2.bias, backbone.layers.1.1.0.ffn.layers.0.weight, backbone.layers.1.1.0.ffn.layers.0.bias, backbone.layers.1.1.0.ffn.layers.1.weight, backbone.layers.1.1.0.ffn.layers.1.bias, backbone.layers.1.1.0.ffn.layers.4.weight, backbone.layers.1.1.0.ffn.layers.4.bias, backbone.layers.1.1.1.norm1.weight, backbone.layers.1.1.1.norm1.bias, backbone.layers.1.1.1.attn.attn.in_proj_weight, backbone.layers.1.1.1.attn.attn.in_proj_bias, backbone.layers.1.1.1.attn.attn.out_proj.weight, backbone.layers.1.1.1.attn.attn.out_proj.bias, backbone.layers.1.1.1.attn.sr.weight, backbone.layers.1.1.1.attn.sr.bias, backbone.layers.1.1.1.attn.norm.weight, backbone.layers.1.1.1.attn.norm.bias, backbone.layers.1.1.1.norm2.weight, backbone.layers.1.1.1.norm2.bias, backbone.layers.1.1.1.ffn.layers.0.weight, backbone.layers.1.1.1.ffn.layers.0.bias, backbone.layers.1.1.1.ffn.layers.1.weight, backbone.layers.1.1.1.ffn.layers.1.bias, backbone.layers.1.1.1.ffn.layers.4.weight, backbone.layers.1.1.1.ffn.layers.4.bias, backbone.layers.1.1.2.norm1.weight, backbone.layers.1.1.2.norm1.bias, backbone.layers.1.1.2.attn.attn.in_proj_weight, backbone.layers.1.1.2.attn.attn.in_proj_bias, backbone.layers.1.1.2.attn.attn.out_proj.weight, backbone.layers.1.1.2.attn.attn.out_proj.bias, backbone.layers.1.1.2.attn.sr.weight, backbone.layers.1.1.2.attn.sr.bias, backbone.layers.1.1.2.attn.norm.weight, backbone.layers.1.1.2.attn.norm.bias, backbone.layers.1.1.2.norm2.weight, backbone.layers.1.1.2.norm2.bias, backbone.layers.1.1.2.ffn.layers.0.weight, backbone.layers.1.1.2.ffn.layers.0.bias, backbone.layers.1.1.2.ffn.layers.1.weight, backbone.layers.1.1.2.ffn.layers.1.bias, backbone.layers.1.1.2.ffn.layers.4.weight, backbone.layers.1.1.2.ffn.layers.4.bias, backbone.layers.1.1.3.norm1.weight, backbone.layers.1.1.3.norm1.bias, backbone.layers.1.1.3.attn.attn.in_proj_weight, backbone.layers.1.1.3.attn.attn.in_proj_bias, backbone.layers.1.1.3.attn.attn.out_proj.weight, backbone.layers.1.1.3.attn.attn.out_proj.bias, backbone.layers.1.1.3.attn.sr.weight, backbone.layers.1.1.3.attn.sr.bias, backbone.layers.1.1.3.attn.norm.weight, backbone.layers.1.1.3.attn.norm.bias, backbone.layers.1.1.3.norm2.weight, backbone.layers.1.1.3.norm2.bias, backbone.layers.1.1.3.ffn.layers.0.weight, backbone.layers.1.1.3.ffn.layers.0.bias, backbone.layers.1.1.3.ffn.layers.1.weight, backbone.layers.1.1.3.ffn.layers.1.bias, backbone.layers.1.1.3.ffn.layers.4.weight, backbone.layers.1.1.3.ffn.layers.4.bias, backbone.layers.1.2.weight, backbone.layers.1.2.bias, backbone.layers.2.0.projection.weight, backbone.layers.2.0.projection.bias, backbone.layers.2.0.norm.weight, backbone.layers.2.0.norm.bias, backbone.layers.2.1.0.norm1.weight, backbone.layers.2.1.0.norm1.bias, backbone.layers.2.1.0.attn.attn.in_proj_weight, backbone.layers.2.1.0.attn.attn.in_proj_bias, backbone.layers.2.1.0.attn.attn.out_proj.weight, backbone.layers.2.1.0.attn.attn.out_proj.bias, backbone.layers.2.1.0.attn.sr.weight, backbone.layers.2.1.0.attn.sr.bias, backbone.layers.2.1.0.attn.norm.weight, backbone.layers.2.1.0.attn.norm.bias, backbone.layers.2.1.0.norm2.weight, backbone.layers.2.1.0.norm2.bias, backbone.layers.2.1.0.ffn.layers.0.weight, backbone.layers.2.1.0.ffn.layers.0.bias, backbone.layers.2.1.0.ffn.layers.1.weight, backbone.layers.2.1.0.ffn.layers.1.bias, backbone.layers.2.1.0.ffn.layers.4.weight, backbone.layers.2.1.0.ffn.layers.4.bias, backbone.layers.2.1.1.norm1.weight, backbone.layers.2.1.1.norm1.bias, backbone.layers.2.1.1.attn.attn.in_proj_weight, backbone.layers.2.1.1.attn.attn.in_proj_bias, backbone.layers.2.1.1.attn.attn.out_proj.weight, backbone.layers.2.1.1.attn.attn.out_proj.bias, backbone.layers.2.1.1.attn.sr.weight, backbone.layers.2.1.1.attn.sr.bias, backbone.layers.2.1.1.attn.norm.weight, backbone.layers.2.1.1.attn.norm.bias, backbone.layers.2.1.1.norm2.weight, backbone.layers.2.1.1.norm2.bias, backbone.layers.2.1.1.ffn.layers.0.weight, backbone.layers.2.1.1.ffn.layers.0.bias, backbone.layers.2.1.1.ffn.layers.1.weight, backbone.layers.2.1.1.ffn.layers.1.bias, backbone.layers.2.1.1.ffn.layers.4.weight, backbone.layers.2.1.1.ffn.layers.4.bias, backbone.layers.2.1.2.norm1.weight, backbone.layers.2.1.2.norm1.bias, backbone.layers.2.1.2.attn.attn.in_proj_weight, backbone.layers.2.1.2.attn.attn.in_proj_bias, backbone.layers.2.1.2.attn.attn.out_proj.weight, backbone.layers.2.1.2.attn.attn.out_proj.bias, backbone.layers.2.1.2.attn.sr.weight, backbone.layers.2.1.2.attn.sr.bias, backbone.layers.2.1.2.attn.norm.weight, backbone.layers.2.1.2.attn.norm.bias, backbone.layers.2.1.2.norm2.weight, backbone.layers.2.1.2.norm2.bias, backbone.layers.2.1.2.ffn.layers.0.weight, backbone.layers.2.1.2.ffn.layers.0.bias, backbone.layers.2.1.2.ffn.layers.1.weight, backbone.layers.2.1.2.ffn.layers.1.bias, backbone.layers.2.1.2.ffn.layers.4.weight, backbone.layers.2.1.2.ffn.layers.4.bias, backbone.layers.2.1.3.norm1.weight, backbone.layers.2.1.3.norm1.bias, backbone.layers.2.1.3.attn.attn.in_proj_weight, backbone.layers.2.1.3.attn.attn.in_proj_bias, backbone.layers.2.1.3.attn.attn.out_proj.weight, backbone.layers.2.1.3.attn.attn.out_proj.bias, backbone.layers.2.1.3.attn.sr.weight, backbone.layers.2.1.3.attn.sr.bias, backbone.layers.2.1.3.attn.norm.weight, backbone.layers.2.1.3.attn.norm.bias, backbone.layers.2.1.3.norm2.weight, backbone.layers.2.1.3.norm2.bias, backbone.layers.2.1.3.ffn.layers.0.weight, backbone.layers.2.1.3.ffn.layers.0.bias, backbone.layers.2.1.3.ffn.layers.1.weight, backbone.layers.2.1.3.ffn.layers.1.bias, backbone.layers.2.1.3.ffn.layers.4.weight, backbone.layers.2.1.3.ffn.layers.4.bias, backbone.layers.2.1.4.norm1.weight, backbone.layers.2.1.4.norm1.bias, backbone.layers.2.1.4.attn.attn.in_proj_weight, backbone.layers.2.1.4.attn.attn.in_proj_bias, backbone.layers.2.1.4.attn.attn.out_proj.weight, backbone.layers.2.1.4.attn.attn.out_proj.bias, backbone.layers.2.1.4.attn.sr.weight, backbone.layers.2.1.4.attn.sr.bias, backbone.layers.2.1.4.attn.norm.weight, backbone.layers.2.1.4.attn.norm.bias, backbone.layers.2.1.4.norm2.weight, backbone.layers.2.1.4.norm2.bias, backbone.layers.2.1.4.ffn.layers.0.weight, backbone.layers.2.1.4.ffn.layers.0.bias, backbone.layers.2.1.4.ffn.layers.1.weight, backbone.layers.2.1.4.ffn.layers.1.bias, backbone.layers.2.1.4.ffn.layers.4.weight, backbone.layers.2.1.4.ffn.layers.4.bias, backbone.layers.2.1.5.norm1.weight, backbone.layers.2.1.5.norm1.bias, backbone.layers.2.1.5.attn.attn.in_proj_weight, backbone.layers.2.1.5.attn.attn.in_proj_bias, backbone.layers.2.1.5.attn.attn.out_proj.weight, backbone.layers.2.1.5.attn.attn.out_proj.bias, backbone.layers.2.1.5.attn.sr.weight, backbone.layers.2.1.5.attn.sr.bias, backbone.layers.2.1.5.attn.norm.weight, backbone.layers.2.1.5.attn.norm.bias, backbone.layers.2.1.5.norm2.weight, backbone.layers.2.1.5.norm2.bias, backbone.layers.2.1.5.ffn.layers.0.weight, backbone.layers.2.1.5.ffn.layers.0.bias, backbone.layers.2.1.5.ffn.layers.1.weight, backbone.layers.2.1.5.ffn.layers.1.bias, backbone.layers.2.1.5.ffn.layers.4.weight, backbone.layers.2.1.5.ffn.layers.4.bias, backbone.layers.2.2.weight, backbone.layers.2.2.bias, backbone.layers.3.0.projection.weight, backbone.layers.3.0.projection.bias, backbone.layers.3.0.norm.weight, backbone.layers.3.0.norm.bias, backbone.layers.3.1.0.norm1.weight, backbone.layers.3.1.0.norm1.bias, backbone.layers.3.1.0.attn.attn.in_proj_weight, backbone.layers.3.1.0.attn.attn.in_proj_bias, backbone.layers.3.1.0.attn.attn.out_proj.weight, backbone.layers.3.1.0.attn.attn.out_proj.bias, backbone.layers.3.1.0.norm2.weight, backbone.layers.3.1.0.norm2.bias, backbone.layers.3.1.0.ffn.layers.0.weight, backbone.layers.3.1.0.ffn.layers.0.bias, backbone.layers.3.1.0.ffn.layers.1.weight, backbone.layers.3.1.0.ffn.layers.1.bias, backbone.layers.3.1.0.ffn.layers.4.weight, backbone.layers.3.1.0.ffn.layers.4.bias, backbone.layers.3.1.1.norm1.weight, backbone.layers.3.1.1.norm1.bias, backbone.layers.3.1.1.attn.attn.in_proj_weight, backbone.layers.3.1.1.attn.attn.in_proj_bias, backbone.layers.3.1.1.attn.attn.out_proj.weight, backbone.layers.3.1.1.attn.attn.out_proj.bias, backbone.layers.3.1.1.norm2.weight, backbone.layers.3.1.1.norm2.bias, backbone.layers.3.1.1.ffn.layers.0.weight, backbone.layers.3.1.1.ffn.layers.0.bias, backbone.layers.3.1.1.ffn.layers.1.weight, backbone.layers.3.1.1.ffn.layers.1.bias, backbone.layers.3.1.1.ffn.layers.4.weight, backbone.layers.3.1.1.ffn.layers.4.bias, backbone.layers.3.1.2.norm1.weight, backbone.layers.3.1.2.norm1.bias, backbone.layers.3.1.2.attn.attn.in_proj_weight, backbone.layers.3.1.2.attn.attn.in_proj_bias, backbone.layers.3.1.2.attn.attn.out_proj.weight, backbone.layers.3.1.2.attn.attn.out_proj.bias, backbone.layers.3.1.2.norm2.weight, backbone.layers.3.1.2.norm2.bias, backbone.layers.3.1.2.ffn.layers.0.weight, backbone.layers.3.1.2.ffn.layers.0.bias, backbone.layers.3.1.2.ffn.layers.1.weight, backbone.layers.3.1.2.ffn.layers.1.bias, backbone.layers.3.1.2.ffn.layers.4.weight, backbone.layers.3.1.2.ffn.layers.4.bias, backbone.layers.3.2.weight, backbone.layers.3.2.bias
248
+
249
+ missing keys in source state_dict: log_cumprod_at, log_cumprod_bt, log_at, log_bt
250
+
251
+ 2023-03-05 23:10:56,795 - mmseg - INFO - EncoderDecoderDiffusion(
252
+ (backbone): MixVisionTransformerCustomInitWeights(
253
+ (layers): ModuleList(
254
+ (0): ModuleList(
255
+ (0): PatchEmbed(
256
+ (projection): Conv2d(3, 64, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
257
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
258
+ )
259
+ (1): ModuleList(
260
+ (0): TransformerEncoderLayer(
261
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
262
+ (attn): EfficientMultiheadAttention(
263
+ (attn): MultiheadAttention(
264
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
265
+ )
266
+ (proj_drop): Dropout(p=0.0, inplace=False)
267
+ (dropout_layer): DropPath()
268
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
269
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
270
+ )
271
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
272
+ (ffn): MixFFN(
273
+ (activate): GELU(approximate='none')
274
+ (layers): Sequential(
275
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
276
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
277
+ (2): GELU(approximate='none')
278
+ (3): Dropout(p=0.0, inplace=False)
279
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
280
+ (5): Dropout(p=0.0, inplace=False)
281
+ )
282
+ (dropout_layer): DropPath()
283
+ )
284
+ )
285
+ (1): TransformerEncoderLayer(
286
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
287
+ (attn): EfficientMultiheadAttention(
288
+ (attn): MultiheadAttention(
289
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
290
+ )
291
+ (proj_drop): Dropout(p=0.0, inplace=False)
292
+ (dropout_layer): DropPath()
293
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
294
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
295
+ )
296
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
297
+ (ffn): MixFFN(
298
+ (activate): GELU(approximate='none')
299
+ (layers): Sequential(
300
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
301
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
302
+ (2): GELU(approximate='none')
303
+ (3): Dropout(p=0.0, inplace=False)
304
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
305
+ (5): Dropout(p=0.0, inplace=False)
306
+ )
307
+ (dropout_layer): DropPath()
308
+ )
309
+ )
310
+ (2): TransformerEncoderLayer(
311
+ (norm1): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
312
+ (attn): EfficientMultiheadAttention(
313
+ (attn): MultiheadAttention(
314
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
315
+ )
316
+ (proj_drop): Dropout(p=0.0, inplace=False)
317
+ (dropout_layer): DropPath()
318
+ (sr): Conv2d(64, 64, kernel_size=(8, 8), stride=(8, 8))
319
+ (norm): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
320
+ )
321
+ (norm2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
322
+ (ffn): MixFFN(
323
+ (activate): GELU(approximate='none')
324
+ (layers): Sequential(
325
+ (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
326
+ (1): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256)
327
+ (2): GELU(approximate='none')
328
+ (3): Dropout(p=0.0, inplace=False)
329
+ (4): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
330
+ (5): Dropout(p=0.0, inplace=False)
331
+ )
332
+ (dropout_layer): DropPath()
333
+ )
334
+ )
335
+ )
336
+ (2): LayerNorm((64,), eps=1e-06, elementwise_affine=True)
337
+ )
338
+ (1): ModuleList(
339
+ (0): PatchEmbed(
340
+ (projection): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
341
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
342
+ )
343
+ (1): ModuleList(
344
+ (0): TransformerEncoderLayer(
345
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
346
+ (attn): EfficientMultiheadAttention(
347
+ (attn): MultiheadAttention(
348
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
349
+ )
350
+ (proj_drop): Dropout(p=0.0, inplace=False)
351
+ (dropout_layer): DropPath()
352
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
353
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
354
+ )
355
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
356
+ (ffn): MixFFN(
357
+ (activate): GELU(approximate='none')
358
+ (layers): Sequential(
359
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
360
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
361
+ (2): GELU(approximate='none')
362
+ (3): Dropout(p=0.0, inplace=False)
363
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
364
+ (5): Dropout(p=0.0, inplace=False)
365
+ )
366
+ (dropout_layer): DropPath()
367
+ )
368
+ )
369
+ (1): TransformerEncoderLayer(
370
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
371
+ (attn): EfficientMultiheadAttention(
372
+ (attn): MultiheadAttention(
373
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
374
+ )
375
+ (proj_drop): Dropout(p=0.0, inplace=False)
376
+ (dropout_layer): DropPath()
377
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
378
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
379
+ )
380
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
381
+ (ffn): MixFFN(
382
+ (activate): GELU(approximate='none')
383
+ (layers): Sequential(
384
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
385
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
386
+ (2): GELU(approximate='none')
387
+ (3): Dropout(p=0.0, inplace=False)
388
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
389
+ (5): Dropout(p=0.0, inplace=False)
390
+ )
391
+ (dropout_layer): DropPath()
392
+ )
393
+ )
394
+ (2): TransformerEncoderLayer(
395
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
396
+ (attn): EfficientMultiheadAttention(
397
+ (attn): MultiheadAttention(
398
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
399
+ )
400
+ (proj_drop): Dropout(p=0.0, inplace=False)
401
+ (dropout_layer): DropPath()
402
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
403
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
404
+ )
405
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
406
+ (ffn): MixFFN(
407
+ (activate): GELU(approximate='none')
408
+ (layers): Sequential(
409
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
410
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
411
+ (2): GELU(approximate='none')
412
+ (3): Dropout(p=0.0, inplace=False)
413
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
414
+ (5): Dropout(p=0.0, inplace=False)
415
+ )
416
+ (dropout_layer): DropPath()
417
+ )
418
+ )
419
+ (3): TransformerEncoderLayer(
420
+ (norm1): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
421
+ (attn): EfficientMultiheadAttention(
422
+ (attn): MultiheadAttention(
423
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
424
+ )
425
+ (proj_drop): Dropout(p=0.0, inplace=False)
426
+ (dropout_layer): DropPath()
427
+ (sr): Conv2d(128, 128, kernel_size=(4, 4), stride=(4, 4))
428
+ (norm): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
429
+ )
430
+ (norm2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
431
+ (ffn): MixFFN(
432
+ (activate): GELU(approximate='none')
433
+ (layers): Sequential(
434
+ (0): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
435
+ (1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=512)
436
+ (2): GELU(approximate='none')
437
+ (3): Dropout(p=0.0, inplace=False)
438
+ (4): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1))
439
+ (5): Dropout(p=0.0, inplace=False)
440
+ )
441
+ (dropout_layer): DropPath()
442
+ )
443
+ )
444
+ )
445
+ (2): LayerNorm((128,), eps=1e-06, elementwise_affine=True)
446
+ )
447
+ (2): ModuleList(
448
+ (0): PatchEmbed(
449
+ (projection): Conv2d(128, 320, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
450
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
451
+ )
452
+ (1): ModuleList(
453
+ (0): TransformerEncoderLayer(
454
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
455
+ (attn): EfficientMultiheadAttention(
456
+ (attn): MultiheadAttention(
457
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
458
+ )
459
+ (proj_drop): Dropout(p=0.0, inplace=False)
460
+ (dropout_layer): DropPath()
461
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
462
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
463
+ )
464
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
465
+ (ffn): MixFFN(
466
+ (activate): GELU(approximate='none')
467
+ (layers): Sequential(
468
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
469
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
470
+ (2): GELU(approximate='none')
471
+ (3): Dropout(p=0.0, inplace=False)
472
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
473
+ (5): Dropout(p=0.0, inplace=False)
474
+ )
475
+ (dropout_layer): DropPath()
476
+ )
477
+ )
478
+ (1): TransformerEncoderLayer(
479
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
480
+ (attn): EfficientMultiheadAttention(
481
+ (attn): MultiheadAttention(
482
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
483
+ )
484
+ (proj_drop): Dropout(p=0.0, inplace=False)
485
+ (dropout_layer): DropPath()
486
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
487
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
488
+ )
489
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
490
+ (ffn): MixFFN(
491
+ (activate): GELU(approximate='none')
492
+ (layers): Sequential(
493
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
494
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
495
+ (2): GELU(approximate='none')
496
+ (3): Dropout(p=0.0, inplace=False)
497
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
498
+ (5): Dropout(p=0.0, inplace=False)
499
+ )
500
+ (dropout_layer): DropPath()
501
+ )
502
+ )
503
+ (2): TransformerEncoderLayer(
504
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
505
+ (attn): EfficientMultiheadAttention(
506
+ (attn): MultiheadAttention(
507
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
508
+ )
509
+ (proj_drop): Dropout(p=0.0, inplace=False)
510
+ (dropout_layer): DropPath()
511
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
512
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
513
+ )
514
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
515
+ (ffn): MixFFN(
516
+ (activate): GELU(approximate='none')
517
+ (layers): Sequential(
518
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
519
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
520
+ (2): GELU(approximate='none')
521
+ (3): Dropout(p=0.0, inplace=False)
522
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
523
+ (5): Dropout(p=0.0, inplace=False)
524
+ )
525
+ (dropout_layer): DropPath()
526
+ )
527
+ )
528
+ (3): TransformerEncoderLayer(
529
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
530
+ (attn): EfficientMultiheadAttention(
531
+ (attn): MultiheadAttention(
532
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
533
+ )
534
+ (proj_drop): Dropout(p=0.0, inplace=False)
535
+ (dropout_layer): DropPath()
536
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
537
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
538
+ )
539
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
540
+ (ffn): MixFFN(
541
+ (activate): GELU(approximate='none')
542
+ (layers): Sequential(
543
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
544
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
545
+ (2): GELU(approximate='none')
546
+ (3): Dropout(p=0.0, inplace=False)
547
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
548
+ (5): Dropout(p=0.0, inplace=False)
549
+ )
550
+ (dropout_layer): DropPath()
551
+ )
552
+ )
553
+ (4): TransformerEncoderLayer(
554
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
555
+ (attn): EfficientMultiheadAttention(
556
+ (attn): MultiheadAttention(
557
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
558
+ )
559
+ (proj_drop): Dropout(p=0.0, inplace=False)
560
+ (dropout_layer): DropPath()
561
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
562
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
563
+ )
564
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
565
+ (ffn): MixFFN(
566
+ (activate): GELU(approximate='none')
567
+ (layers): Sequential(
568
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
569
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
570
+ (2): GELU(approximate='none')
571
+ (3): Dropout(p=0.0, inplace=False)
572
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
573
+ (5): Dropout(p=0.0, inplace=False)
574
+ )
575
+ (dropout_layer): DropPath()
576
+ )
577
+ )
578
+ (5): TransformerEncoderLayer(
579
+ (norm1): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
580
+ (attn): EfficientMultiheadAttention(
581
+ (attn): MultiheadAttention(
582
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=320, out_features=320, bias=True)
583
+ )
584
+ (proj_drop): Dropout(p=0.0, inplace=False)
585
+ (dropout_layer): DropPath()
586
+ (sr): Conv2d(320, 320, kernel_size=(2, 2), stride=(2, 2))
587
+ (norm): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
588
+ )
589
+ (norm2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
590
+ (ffn): MixFFN(
591
+ (activate): GELU(approximate='none')
592
+ (layers): Sequential(
593
+ (0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1))
594
+ (1): Conv2d(1280, 1280, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1280)
595
+ (2): GELU(approximate='none')
596
+ (3): Dropout(p=0.0, inplace=False)
597
+ (4): Conv2d(1280, 320, kernel_size=(1, 1), stride=(1, 1))
598
+ (5): Dropout(p=0.0, inplace=False)
599
+ )
600
+ (dropout_layer): DropPath()
601
+ )
602
+ )
603
+ )
604
+ (2): LayerNorm((320,), eps=1e-06, elementwise_affine=True)
605
+ )
606
+ (3): ModuleList(
607
+ (0): PatchEmbed(
608
+ (projection): Conv2d(320, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
609
+ (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
610
+ )
611
+ (1): ModuleList(
612
+ (0): TransformerEncoderLayer(
613
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
614
+ (attn): EfficientMultiheadAttention(
615
+ (attn): MultiheadAttention(
616
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
617
+ )
618
+ (proj_drop): Dropout(p=0.0, inplace=False)
619
+ (dropout_layer): DropPath()
620
+ )
621
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
622
+ (ffn): MixFFN(
623
+ (activate): GELU(approximate='none')
624
+ (layers): Sequential(
625
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
626
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
627
+ (2): GELU(approximate='none')
628
+ (3): Dropout(p=0.0, inplace=False)
629
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
630
+ (5): Dropout(p=0.0, inplace=False)
631
+ )
632
+ (dropout_layer): DropPath()
633
+ )
634
+ )
635
+ (1): TransformerEncoderLayer(
636
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
637
+ (attn): EfficientMultiheadAttention(
638
+ (attn): MultiheadAttention(
639
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
640
+ )
641
+ (proj_drop): Dropout(p=0.0, inplace=False)
642
+ (dropout_layer): DropPath()
643
+ )
644
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
645
+ (ffn): MixFFN(
646
+ (activate): GELU(approximate='none')
647
+ (layers): Sequential(
648
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
649
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
650
+ (2): GELU(approximate='none')
651
+ (3): Dropout(p=0.0, inplace=False)
652
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
653
+ (5): Dropout(p=0.0, inplace=False)
654
+ )
655
+ (dropout_layer): DropPath()
656
+ )
657
+ )
658
+ (2): TransformerEncoderLayer(
659
+ (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
660
+ (attn): EfficientMultiheadAttention(
661
+ (attn): MultiheadAttention(
662
+ (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
663
+ )
664
+ (proj_drop): Dropout(p=0.0, inplace=False)
665
+ (dropout_layer): DropPath()
666
+ )
667
+ (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
668
+ (ffn): MixFFN(
669
+ (activate): GELU(approximate='none')
670
+ (layers): Sequential(
671
+ (0): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
672
+ (1): Conv2d(2048, 2048, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2048)
673
+ (2): GELU(approximate='none')
674
+ (3): Dropout(p=0.0, inplace=False)
675
+ (4): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1))
676
+ (5): Dropout(p=0.0, inplace=False)
677
+ )
678
+ (dropout_layer): DropPath()
679
+ )
680
+ )
681
+ )
682
+ (2): LayerNorm((512,), eps=1e-06, elementwise_affine=True)
683
+ )
684
+ )
685
+ )
686
+ init_cfg={'type': 'Pretrained', 'checkpoint': 'work_dirs2/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth'}
687
+ (decode_head): SegformerHeadUnetFCHeadMultiStepCE(
688
+ input_transform=multiple_select, ignore_index=0, align_corners=False
689
+ (loss_decode): CrossEntropyLoss(avg_non_ignore=False)
690
+ (conv_seg): None
691
+ (dropout): Dropout2d(p=0.1, inplace=False)
692
+ (convs): ModuleList(
693
+ (0): ConvModule(
694
+ (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
695
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
696
+ (activate): ReLU(inplace=True)
697
+ )
698
+ (1): ConvModule(
699
+ (conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
700
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
701
+ (activate): ReLU(inplace=True)
702
+ )
703
+ (2): ConvModule(
704
+ (conv): Conv2d(320, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
705
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
706
+ (activate): ReLU(inplace=True)
707
+ )
708
+ (3): ConvModule(
709
+ (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
710
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
711
+ (activate): ReLU(inplace=True)
712
+ )
713
+ )
714
+ (fusion_conv): ConvModule(
715
+ (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
716
+ (bn): SyncBatchNorm(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
717
+ (activate): ReLU(inplace=True)
718
+ )
719
+ (unet): Unet(
720
+ (init_conv): Conv2d(272, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
721
+ (time_mlp): Sequential(
722
+ (0): SinusoidalPosEmb()
723
+ (1): Linear(in_features=128, out_features=512, bias=True)
724
+ (2): GELU(approximate='none')
725
+ (3): Linear(in_features=512, out_features=512, bias=True)
726
+ )
727
+ (downs): ModuleList(
728
+ (0): ModuleList(
729
+ (0): ResnetBlock(
730
+ (mlp): Sequential(
731
+ (0): SiLU()
732
+ (1): Linear(in_features=512, out_features=256, bias=True)
733
+ )
734
+ (block1): Block(
735
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
736
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
737
+ (act): SiLU()
738
+ )
739
+ (block2): Block(
740
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
741
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
742
+ (act): SiLU()
743
+ )
744
+ (res_conv): Identity()
745
+ )
746
+ (1): ResnetBlock(
747
+ (mlp): Sequential(
748
+ (0): SiLU()
749
+ (1): Linear(in_features=512, out_features=256, bias=True)
750
+ )
751
+ (block1): Block(
752
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
753
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
754
+ (act): SiLU()
755
+ )
756
+ (block2): Block(
757
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
758
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
759
+ (act): SiLU()
760
+ )
761
+ (res_conv): Identity()
762
+ )
763
+ (2): Residual(
764
+ (fn): PreNorm(
765
+ (fn): LinearAttention(
766
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
767
+ (to_out): Sequential(
768
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
769
+ (1): LayerNorm()
770
+ )
771
+ )
772
+ (norm): LayerNorm()
773
+ )
774
+ )
775
+ (3): Conv2d(128, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
776
+ )
777
+ (1): ModuleList(
778
+ (0): ResnetBlock(
779
+ (mlp): Sequential(
780
+ (0): SiLU()
781
+ (1): Linear(in_features=512, out_features=256, bias=True)
782
+ )
783
+ (block1): Block(
784
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
785
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
786
+ (act): SiLU()
787
+ )
788
+ (block2): Block(
789
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
790
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
791
+ (act): SiLU()
792
+ )
793
+ (res_conv): Identity()
794
+ )
795
+ (1): ResnetBlock(
796
+ (mlp): Sequential(
797
+ (0): SiLU()
798
+ (1): Linear(in_features=512, out_features=256, bias=True)
799
+ )
800
+ (block1): Block(
801
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
802
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
803
+ (act): SiLU()
804
+ )
805
+ (block2): Block(
806
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
807
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
808
+ (act): SiLU()
809
+ )
810
+ (res_conv): Identity()
811
+ )
812
+ (2): Residual(
813
+ (fn): PreNorm(
814
+ (fn): LinearAttention(
815
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
816
+ (to_out): Sequential(
817
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
818
+ (1): LayerNorm()
819
+ )
820
+ )
821
+ (norm): LayerNorm()
822
+ )
823
+ )
824
+ (3): Conv2d(128, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
825
+ )
826
+ (2): ModuleList(
827
+ (0): ResnetBlock(
828
+ (mlp): Sequential(
829
+ (0): SiLU()
830
+ (1): Linear(in_features=512, out_features=256, bias=True)
831
+ )
832
+ (block1): Block(
833
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
834
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
835
+ (act): SiLU()
836
+ )
837
+ (block2): Block(
838
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
839
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
840
+ (act): SiLU()
841
+ )
842
+ (res_conv): Identity()
843
+ )
844
+ (1): ResnetBlock(
845
+ (mlp): Sequential(
846
+ (0): SiLU()
847
+ (1): Linear(in_features=512, out_features=256, bias=True)
848
+ )
849
+ (block1): Block(
850
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
851
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
852
+ (act): SiLU()
853
+ )
854
+ (block2): Block(
855
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
856
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
857
+ (act): SiLU()
858
+ )
859
+ (res_conv): Identity()
860
+ )
861
+ (2): Residual(
862
+ (fn): PreNorm(
863
+ (fn): LinearAttention(
864
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
865
+ (to_out): Sequential(
866
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
867
+ (1): LayerNorm()
868
+ )
869
+ )
870
+ (norm): LayerNorm()
871
+ )
872
+ )
873
+ (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
874
+ )
875
+ )
876
+ (ups): ModuleList(
877
+ (0): ModuleList(
878
+ (0): ResnetBlock(
879
+ (mlp): Sequential(
880
+ (0): SiLU()
881
+ (1): Linear(in_features=512, out_features=256, bias=True)
882
+ )
883
+ (block1): Block(
884
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
885
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
886
+ (act): SiLU()
887
+ )
888
+ (block2): Block(
889
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
890
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
891
+ (act): SiLU()
892
+ )
893
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
894
+ )
895
+ (1): ResnetBlock(
896
+ (mlp): Sequential(
897
+ (0): SiLU()
898
+ (1): Linear(in_features=512, out_features=256, bias=True)
899
+ )
900
+ (block1): Block(
901
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
902
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
903
+ (act): SiLU()
904
+ )
905
+ (block2): Block(
906
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
907
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
908
+ (act): SiLU()
909
+ )
910
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
911
+ )
912
+ (2): Residual(
913
+ (fn): PreNorm(
914
+ (fn): LinearAttention(
915
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
916
+ (to_out): Sequential(
917
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
918
+ (1): LayerNorm()
919
+ )
920
+ )
921
+ (norm): LayerNorm()
922
+ )
923
+ )
924
+ (3): Sequential(
925
+ (0): Upsample(scale_factor=2.0, mode=nearest)
926
+ (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
927
+ )
928
+ )
929
+ (1): ModuleList(
930
+ (0): ResnetBlock(
931
+ (mlp): Sequential(
932
+ (0): SiLU()
933
+ (1): Linear(in_features=512, out_features=256, bias=True)
934
+ )
935
+ (block1): Block(
936
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
937
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
938
+ (act): SiLU()
939
+ )
940
+ (block2): Block(
941
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
942
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
943
+ (act): SiLU()
944
+ )
945
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
946
+ )
947
+ (1): ResnetBlock(
948
+ (mlp): Sequential(
949
+ (0): SiLU()
950
+ (1): Linear(in_features=512, out_features=256, bias=True)
951
+ )
952
+ (block1): Block(
953
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
954
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
955
+ (act): SiLU()
956
+ )
957
+ (block2): Block(
958
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
959
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
960
+ (act): SiLU()
961
+ )
962
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
963
+ )
964
+ (2): Residual(
965
+ (fn): PreNorm(
966
+ (fn): LinearAttention(
967
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
968
+ (to_out): Sequential(
969
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
970
+ (1): LayerNorm()
971
+ )
972
+ )
973
+ (norm): LayerNorm()
974
+ )
975
+ )
976
+ (3): Sequential(
977
+ (0): Upsample(scale_factor=2.0, mode=nearest)
978
+ (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
979
+ )
980
+ )
981
+ (2): ModuleList(
982
+ (0): ResnetBlock(
983
+ (mlp): Sequential(
984
+ (0): SiLU()
985
+ (1): Linear(in_features=512, out_features=256, bias=True)
986
+ )
987
+ (block1): Block(
988
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
989
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
990
+ (act): SiLU()
991
+ )
992
+ (block2): Block(
993
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
994
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
995
+ (act): SiLU()
996
+ )
997
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
998
+ )
999
+ (1): ResnetBlock(
1000
+ (mlp): Sequential(
1001
+ (0): SiLU()
1002
+ (1): Linear(in_features=512, out_features=256, bias=True)
1003
+ )
1004
+ (block1): Block(
1005
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1006
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1007
+ (act): SiLU()
1008
+ )
1009
+ (block2): Block(
1010
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1011
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1012
+ (act): SiLU()
1013
+ )
1014
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
1015
+ )
1016
+ (2): Residual(
1017
+ (fn): PreNorm(
1018
+ (fn): LinearAttention(
1019
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1020
+ (to_out): Sequential(
1021
+ (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
1022
+ (1): LayerNorm()
1023
+ )
1024
+ )
1025
+ (norm): LayerNorm()
1026
+ )
1027
+ )
1028
+ (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1029
+ )
1030
+ )
1031
+ (mid_block1): ResnetBlock(
1032
+ (mlp): Sequential(
1033
+ (0): SiLU()
1034
+ (1): Linear(in_features=512, out_features=256, bias=True)
1035
+ )
1036
+ (block1): Block(
1037
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1038
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1039
+ (act): SiLU()
1040
+ )
1041
+ (block2): Block(
1042
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1043
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1044
+ (act): SiLU()
1045
+ )
1046
+ (res_conv): Identity()
1047
+ )
1048
+ (mid_attn): Residual(
1049
+ (fn): PreNorm(
1050
+ (fn): Attention(
1051
+ (to_qkv): Conv2d(128, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
1052
+ (to_out): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
1053
+ )
1054
+ (norm): LayerNorm()
1055
+ )
1056
+ )
1057
+ (mid_block2): ResnetBlock(
1058
+ (mlp): Sequential(
1059
+ (0): SiLU()
1060
+ (1): Linear(in_features=512, out_features=256, bias=True)
1061
+ )
1062
+ (block1): Block(
1063
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1064
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1065
+ (act): SiLU()
1066
+ )
1067
+ (block2): Block(
1068
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1069
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1070
+ (act): SiLU()
1071
+ )
1072
+ (res_conv): Identity()
1073
+ )
1074
+ (final_res_block): ResnetBlock(
1075
+ (mlp): Sequential(
1076
+ (0): SiLU()
1077
+ (1): Linear(in_features=512, out_features=256, bias=True)
1078
+ )
1079
+ (block1): Block(
1080
+ (proj): WeightStandardizedConv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1081
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1082
+ (act): SiLU()
1083
+ )
1084
+ (block2): Block(
1085
+ (proj): WeightStandardizedConv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
1086
+ (norm): GroupNorm(8, 128, eps=1e-05, affine=True)
1087
+ (act): SiLU()
1088
+ )
1089
+ (res_conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1))
1090
+ )
1091
+ (final_conv): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1))
1092
+ )
1093
+ (conv_seg_new): Conv2d(256, 151, kernel_size=(1, 1), stride=(1, 1))
1094
+ (embed): Embedding(151, 16)
1095
+ )
1096
+ init_cfg={'type': 'Pretrained', 'checkpoint': 'work_dirs2/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth'}
1097
+ )
1098
+ 2023-03-05 23:10:57,286 - mmseg - INFO - Loaded 20210 images
1099
+ 2023-03-05 23:11:00,862 - mmseg - INFO - Loaded 2000 images
1100
+ 2023-03-05 23:11:00,864 - mmseg - INFO - Start running, host: laizeqiang@SH-IDC1-10-140-37-110, work_dir: /mnt/petrelfs/laizeqiang/mmseg-baseline/work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce
1101
+ 2023-03-05 23:11:00,865 - mmseg - INFO - Hooks will be executed in the following order:
1102
+ before_run:
1103
+ (VERY_HIGH ) StepLrUpdaterHook
1104
+ (49 ) ConstantMomentumEMAHook
1105
+ (NORMAL ) CheckpointHook
1106
+ (LOW ) DistEvalHookMultiSteps
1107
+ (VERY_LOW ) TextLoggerHook
1108
+ --------------------
1109
+ before_train_epoch:
1110
+ (VERY_HIGH ) StepLrUpdaterHook
1111
+ (LOW ) IterTimerHook
1112
+ (LOW ) DistEvalHookMultiSteps
1113
+ (VERY_LOW ) TextLoggerHook
1114
+ --------------------
1115
+ before_train_iter:
1116
+ (VERY_HIGH ) StepLrUpdaterHook
1117
+ (49 ) ConstantMomentumEMAHook
1118
+ (LOW ) IterTimerHook
1119
+ (LOW ) DistEvalHookMultiSteps
1120
+ --------------------
1121
+ after_train_iter:
1122
+ (ABOVE_NORMAL) OptimizerHook
1123
+ (49 ) ConstantMomentumEMAHook
1124
+ (NORMAL ) CheckpointHook
1125
+ (LOW ) IterTimerHook
1126
+ (LOW ) DistEvalHookMultiSteps
1127
+ (VERY_LOW ) TextLoggerHook
1128
+ --------------------
1129
+ after_train_epoch:
1130
+ (NORMAL ) CheckpointHook
1131
+ (LOW ) DistEvalHookMultiSteps
1132
+ (VERY_LOW ) TextLoggerHook
1133
+ --------------------
1134
+ before_val_epoch:
1135
+ (LOW ) IterTimerHook
1136
+ (VERY_LOW ) TextLoggerHook
1137
+ --------------------
1138
+ before_val_iter:
1139
+ (LOW ) IterTimerHook
1140
+ --------------------
1141
+ after_val_iter:
1142
+ (LOW ) IterTimerHook
1143
+ --------------------
1144
+ after_val_epoch:
1145
+ (VERY_LOW ) TextLoggerHook
1146
+ --------------------
1147
+ after_run:
1148
+ (VERY_LOW ) TextLoggerHook
1149
+ --------------------
1150
+ 2023-03-05 23:11:00,865 - mmseg - INFO - workflow: [('train', 1)], max: 160000 iters
1151
+ 2023-03-05 23:11:00,901 - mmseg - INFO - Checkpoints will be saved to /mnt/petrelfs/laizeqiang/mmseg-baseline/work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce by HardDiskBackend.
1152
+ 2023-03-05 23:11:25,138 - mmseg - INFO - Swap parameters (before train) before iter [1]
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/20230305_231050.log.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"env_info": "sys.platform: linux\nPython: 3.7.16 (default, Jan 17 2023, 22:20:44) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB\nCUDA_HOME: /mnt/petrelfs/laizeqiang/miniconda3/envs/torch\nNVCC: Cuda compilation tools, release 11.6, V11.6.124\nGCC: gcc (GCC) 4.8.5 20150623 (Red Hat 4.8.5-44)\nPyTorch: 1.13.1\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.6\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wunused-local-typedefs -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.13.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.14.1\nOpenCV: 4.7.0\nMMCV: 1.7.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.6\nMMSegmentation: 0.30.0+6db5ece", "seed": 1580901347, "exp_name": "ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce.py", "mmseg_version": "0.30.0+6db5ece", "config": "norm_cfg = dict(type='SyncBN', requires_grad=True)\ncheckpoint = 'work_dirs2/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth'\nmodel = dict(\n type='EncoderDecoderDiffusion',\n freeze_parameters=['backbone', 'decode_head'],\n pretrained=\n 'work_dirs2/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth',\n backbone=dict(\n type='MixVisionTransformerCustomInitWeights',\n in_channels=3,\n embed_dims=64,\n num_stages=4,\n num_layers=[3, 4, 6, 3],\n num_heads=[1, 2, 5, 8],\n patch_sizes=[7, 3, 3, 3],\n sr_ratios=[8, 4, 2, 1],\n out_indices=(0, 1, 2, 3),\n mlp_ratio=4,\n qkv_bias=True,\n drop_rate=0.0,\n attn_drop_rate=0.0,\n drop_path_rate=0.1,\n pretrained=\n 'work_dirs2/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth'\n ),\n decode_head=dict(\n type='SegformerHeadUnetFCHeadMultiStepCE',\n pretrained=\n 'work_dirs2/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth',\n dim=128,\n out_dim=256,\n unet_channels=272,\n dim_mults=[1, 1, 1],\n cat_embedding_dim=16,\n diffusion_timesteps=100,\n collect_timesteps=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 99],\n in_channels=[64, 128, 320, 512],\n in_index=[0, 1, 2, 3],\n channels=256,\n dropout_ratio=0.1,\n num_classes=151,\n norm_cfg=dict(type='SyncBN', requires_grad=True),\n align_corners=False,\n ignore_index=0,\n loss_decode=dict(\n type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.1)),\n train_cfg=dict(),\n test_cfg=dict(mode='whole'))\ndataset_type = 'ADE20K151Dataset'\ndata_root = 'data/ade/ADEChallengeData2016'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ncrop_size = (512, 512)\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=4,\n workers_per_gpu=4,\n train=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/training',\n ann_dir='annotations/training',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', reduce_zero_label=False),\n dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),\n dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),\n dict(type='RandomFlip', prob=0.5),\n dict(type='PhotoMetricDistortion'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_semantic_seg'])\n ]),\n val=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]),\n test=dict(\n type='ADE20K151Dataset',\n data_root='data/ade/ADEChallengeData2016',\n img_dir='images/validation',\n ann_dir='annotations/validation',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(2048, 512),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=True),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(\n type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]))\nlog_config = dict(\n interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\ncudnn_benchmark = True\noptimizer = dict(\n type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)\noptimizer_config = dict()\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=1000,\n warmup_ratio=1e-06,\n step=20000,\n gamma=0.5,\n min_lr=1e-06,\n by_epoch=False)\nrunner = dict(type='IterBasedRunner', max_iters=160000)\ncheckpoint_config = dict(by_epoch=False, interval=16000, max_keep_ckpts=1)\nevaluation = dict(\n interval=16000, metric='mIoU', pre_eval=True, save_best='mIoU')\ncustom_hooks = [\n dict(\n type='ConstantMomentumEMAHook',\n momentum=0.01,\n interval=25,\n eval_interval=16000,\n auto_resume=True,\n priority=49)\n]\nwork_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce'\ngpu_ids = range(0, 8)\nauto_resume = True\ndevice = 'cuda'\nseed = 1580901347\n", "CLASSES": ["background", "wall", "building", "sky", "floor", "tree", "ceiling", "road", "bed ", "windowpane", "grass", "cabinet", "sidewalk", "person", "earth", "door", "table", "mountain", "plant", "curtain", "chair", "car", "water", "painting", "sofa", "shelf", "house", "sea", "mirror", "rug", "field", "armchair", "seat", "fence", "desk", "rock", "wardrobe", "lamp", "bathtub", "railing", "cushion", "base", "box", "column", "signboard", "chest of drawers", "counter", "sand", "sink", "skyscraper", "fireplace", "refrigerator", "grandstand", "path", "stairs", "runway", "case", "pool table", "pillow", "screen door", "stairway", "river", "bridge", "bookcase", "blind", "coffee table", "toilet", "flower", "book", "hill", "bench", "countertop", "stove", "palm", "kitchen island", "computer", "swivel chair", "boat", "bar", "arcade machine", "hovel", "bus", "towel", "light", "truck", "tower", "chandelier", "awning", "streetlight", "booth", "television receiver", "airplane", "dirt track", "apparel", "pole", "land", "bannister", "escalator", "ottoman", "bottle", "buffet", "poster", "stage", "van", "ship", "fountain", "conveyer belt", "canopy", "washer", "plaything", "swimming pool", "stool", "barrel", "basket", "waterfall", "tent", "bag", "minibike", "cradle", "oven", "ball", "food", "step", "tank", "trade name", "microwave", "pot", "animal", "bicycle", "lake", "dishwasher", "screen", "blanket", "sculpture", "hood", "sconce", "vase", "traffic light", "tray", "ashcan", "fan", "pier", "crt screen", "plate", "monitor", "bulletin board", "shower", "radiator", "glass", "clock", "flag"], "PALETTE": [[0, 0, 0], [120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], [102, 255, 0], [92, 0, 255]], "hook_msgs": {}}
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/20230305_231207.log ADDED
The diff for this file is too large to render. See raw diff
 
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/20230305_231207.log.json ADDED
The diff for this file is too large to render. See raw diff
 
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
2
+ checkpoint = 'work_dirs2/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth'
3
+ model = dict(
4
+ type='EncoderDecoderDiffusion',
5
+ freeze_parameters=['backbone', 'decode_head'],
6
+ pretrained=
7
+ 'work_dirs2/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth',
8
+ backbone=dict(
9
+ type='MixVisionTransformerCustomInitWeights',
10
+ in_channels=3,
11
+ embed_dims=64,
12
+ num_stages=4,
13
+ num_layers=[3, 4, 6, 3],
14
+ num_heads=[1, 2, 5, 8],
15
+ patch_sizes=[7, 3, 3, 3],
16
+ sr_ratios=[8, 4, 2, 1],
17
+ out_indices=(0, 1, 2, 3),
18
+ mlp_ratio=4,
19
+ qkv_bias=True,
20
+ drop_rate=0.0,
21
+ attn_drop_rate=0.0,
22
+ drop_path_rate=0.1),
23
+ decode_head=dict(
24
+ type='SegformerHeadUnetFCHeadMultiStepCE',
25
+ pretrained=
26
+ 'work_dirs2/segformer_mit_b2_segformer_head_unet_fc_single_step_ade_pretrained_freeze_embed_80k_ade20k151/best_mIoU_iter_64000.pth',
27
+ dim=128,
28
+ out_dim=256,
29
+ unet_channels=272,
30
+ dim_mults=[1, 1, 1],
31
+ cat_embedding_dim=16,
32
+ diffusion_timesteps=100,
33
+ collect_timesteps=[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 99],
34
+ in_channels=[64, 128, 320, 512],
35
+ in_index=[0, 1, 2, 3],
36
+ channels=256,
37
+ dropout_ratio=0.1,
38
+ num_classes=151,
39
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
40
+ align_corners=False,
41
+ ignore_index=0,
42
+ loss_decode=dict(
43
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.1)),
44
+ train_cfg=dict(),
45
+ test_cfg=dict(mode='whole'))
46
+ dataset_type = 'ADE20K151Dataset'
47
+ data_root = 'data/ade/ADEChallengeData2016'
48
+ img_norm_cfg = dict(
49
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
50
+ crop_size = (512, 512)
51
+ train_pipeline = [
52
+ dict(type='LoadImageFromFile'),
53
+ dict(type='LoadAnnotations', reduce_zero_label=False),
54
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
55
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
56
+ dict(type='RandomFlip', prob=0.5),
57
+ dict(type='PhotoMetricDistortion'),
58
+ dict(
59
+ type='Normalize',
60
+ mean=[123.675, 116.28, 103.53],
61
+ std=[58.395, 57.12, 57.375],
62
+ to_rgb=True),
63
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
64
+ dict(type='DefaultFormatBundle'),
65
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
66
+ ]
67
+ test_pipeline = [
68
+ dict(type='LoadImageFromFile'),
69
+ dict(
70
+ type='MultiScaleFlipAug',
71
+ img_scale=(2048, 512),
72
+ flip=False,
73
+ transforms=[
74
+ dict(type='Resize', keep_ratio=True),
75
+ dict(type='RandomFlip'),
76
+ dict(
77
+ type='Normalize',
78
+ mean=[123.675, 116.28, 103.53],
79
+ std=[58.395, 57.12, 57.375],
80
+ to_rgb=True),
81
+ dict(type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
82
+ dict(type='ImageToTensor', keys=['img']),
83
+ dict(type='Collect', keys=['img'])
84
+ ])
85
+ ]
86
+ data = dict(
87
+ samples_per_gpu=4,
88
+ workers_per_gpu=4,
89
+ train=dict(
90
+ type='ADE20K151Dataset',
91
+ data_root='data/ade/ADEChallengeData2016',
92
+ img_dir='images/training',
93
+ ann_dir='annotations/training',
94
+ pipeline=[
95
+ dict(type='LoadImageFromFile'),
96
+ dict(type='LoadAnnotations', reduce_zero_label=False),
97
+ dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)),
98
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
99
+ dict(type='RandomFlip', prob=0.5),
100
+ dict(type='PhotoMetricDistortion'),
101
+ dict(
102
+ type='Normalize',
103
+ mean=[123.675, 116.28, 103.53],
104
+ std=[58.395, 57.12, 57.375],
105
+ to_rgb=True),
106
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=0),
107
+ dict(type='DefaultFormatBundle'),
108
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
109
+ ]),
110
+ val=dict(
111
+ type='ADE20K151Dataset',
112
+ data_root='data/ade/ADEChallengeData2016',
113
+ img_dir='images/validation',
114
+ ann_dir='annotations/validation',
115
+ pipeline=[
116
+ dict(type='LoadImageFromFile'),
117
+ dict(
118
+ type='MultiScaleFlipAug',
119
+ img_scale=(2048, 512),
120
+ flip=False,
121
+ transforms=[
122
+ dict(type='Resize', keep_ratio=True),
123
+ dict(type='RandomFlip'),
124
+ dict(
125
+ type='Normalize',
126
+ mean=[123.675, 116.28, 103.53],
127
+ std=[58.395, 57.12, 57.375],
128
+ to_rgb=True),
129
+ dict(
130
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
131
+ dict(type='ImageToTensor', keys=['img']),
132
+ dict(type='Collect', keys=['img'])
133
+ ])
134
+ ]),
135
+ test=dict(
136
+ type='ADE20K151Dataset',
137
+ data_root='data/ade/ADEChallengeData2016',
138
+ img_dir='images/validation',
139
+ ann_dir='annotations/validation',
140
+ pipeline=[
141
+ dict(type='LoadImageFromFile'),
142
+ dict(
143
+ type='MultiScaleFlipAug',
144
+ img_scale=(2048, 512),
145
+ flip=False,
146
+ transforms=[
147
+ dict(type='Resize', keep_ratio=True),
148
+ dict(type='RandomFlip'),
149
+ dict(
150
+ type='Normalize',
151
+ mean=[123.675, 116.28, 103.53],
152
+ std=[58.395, 57.12, 57.375],
153
+ to_rgb=True),
154
+ dict(
155
+ type='Pad', size_divisor=16, pad_val=0, seg_pad_val=0),
156
+ dict(type='ImageToTensor', keys=['img']),
157
+ dict(type='Collect', keys=['img'])
158
+ ])
159
+ ]))
160
+ log_config = dict(
161
+ interval=50, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
162
+ dist_params = dict(backend='nccl')
163
+ log_level = 'INFO'
164
+ load_from = None
165
+ resume_from = None
166
+ workflow = [('train', 1)]
167
+ cudnn_benchmark = True
168
+ optimizer = dict(
169
+ type='AdamW', lr=0.00015, betas=[0.9, 0.96], weight_decay=0.045)
170
+ optimizer_config = dict()
171
+ lr_config = dict(
172
+ policy='step',
173
+ warmup='linear',
174
+ warmup_iters=1000,
175
+ warmup_ratio=1e-06,
176
+ step=20000,
177
+ gamma=0.5,
178
+ min_lr=1e-06,
179
+ by_epoch=False)
180
+ runner = dict(type='IterBasedRunner', max_iters=160000)
181
+ checkpoint_config = dict(by_epoch=False, interval=16000, max_keep_ckpts=1)
182
+ evaluation = dict(
183
+ interval=16000, metric='mIoU', pre_eval=True, save_best='mIoU')
184
+ custom_hooks = [
185
+ dict(
186
+ type='ConstantMomentumEMAHook',
187
+ momentum=0.01,
188
+ interval=25,
189
+ eval_interval=16000,
190
+ auto_resume=True,
191
+ priority=49)
192
+ ]
193
+ work_dir = './work_dirs2/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce'
194
+ gpu_ids = range(0, 8)
195
+ auto_resume = True
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/best_mIoU_iter_32000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a66cad0c00553fd60ce7f9480e3f7d1df97731fd92aa99695ddc1bf240a6d274
3
+ size 380051503
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/iter_160000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4c68bd09b726447e49b3a8d30c7a888f069b446c65d971ff6d1eb2e93130120
3
+ size 380051503
ablation/ablation_segformer_mit_b2_segformer_head_unet_fc_small_multi_step_ade_pretrained_freeze_embed_160k_ade20k151_finetune_ema_ce/latest.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4c68bd09b726447e49b3a8d30c7a888f069b446c65d971ff6d1eb2e93130120
3
+ size 380051503