yunyangx commited on
Commit
0086a54
·
verified ·
1 Parent(s): 2d145ab

Delete sam2/configs/sam2_hiera_b+.yaml

Browse files
Files changed (1) hide show
  1. sam2/configs/sam2_hiera_b+.yaml +0 -113
sam2/configs/sam2_hiera_b+.yaml DELETED
@@ -1,113 +0,0 @@
1
- # @package _global_
2
-
3
- # Model
4
- model:
5
- _target_: sam2.modeling.sam2_base.SAM2Base
6
- image_encoder:
7
- _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
8
- scalp: 1
9
- trunk:
10
- _target_: sam2.modeling.backbones.hieradet.Hiera
11
- embed_dim: 112
12
- num_heads: 2
13
- neck:
14
- _target_: sam2.modeling.backbones.image_encoder.FpnNeck
15
- position_encoding:
16
- _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
17
- num_pos_feats: 256
18
- normalize: true
19
- scale: null
20
- temperature: 10000
21
- d_model: 256
22
- backbone_channel_list: [896, 448, 224, 112]
23
- fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features
24
- fpn_interp_model: nearest
25
-
26
- memory_attention:
27
- _target_: sam2.modeling.memory_attention.MemoryAttention
28
- d_model: 256
29
- pos_enc_at_input: true
30
- layer:
31
- _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
32
- activation: relu
33
- dim_feedforward: 2048
34
- dropout: 0.1
35
- pos_enc_at_attn: false
36
- self_attention:
37
- _target_: sam2.modeling.sam.transformer.RoPEAttention
38
- rope_theta: 10000.0
39
- feat_sizes: [32, 32]
40
- embedding_dim: 256
41
- num_heads: 1
42
- downsample_rate: 1
43
- dropout: 0.1
44
- d_model: 256
45
- pos_enc_at_cross_attn_keys: true
46
- pos_enc_at_cross_attn_queries: false
47
- cross_attention:
48
- _target_: sam2.modeling.sam.transformer.RoPEAttention
49
- rope_theta: 10000.0
50
- feat_sizes: [32, 32]
51
- rope_k_repeat: True
52
- embedding_dim: 256
53
- num_heads: 1
54
- downsample_rate: 1
55
- dropout: 0.1
56
- kv_in_dim: 64
57
- num_layers: 4
58
-
59
- memory_encoder:
60
- _target_: sam2.modeling.memory_encoder.MemoryEncoder
61
- out_dim: 64
62
- position_encoding:
63
- _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
64
- num_pos_feats: 64
65
- normalize: true
66
- scale: null
67
- temperature: 10000
68
- mask_downsampler:
69
- _target_: sam2.modeling.memory_encoder.MaskDownSampler
70
- kernel_size: 3
71
- stride: 2
72
- padding: 1
73
- fuser:
74
- _target_: sam2.modeling.memory_encoder.Fuser
75
- layer:
76
- _target_: sam2.modeling.memory_encoder.CXBlock
77
- dim: 256
78
- kernel_size: 7
79
- padding: 3
80
- layer_scale_init_value: 1e-6
81
- use_dwconv: True # depth-wise convs
82
- num_layers: 2
83
-
84
- num_maskmem: 7
85
- image_size: 1024
86
- # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
87
- sigmoid_scale_for_mem_enc: 20.0
88
- sigmoid_bias_for_mem_enc: -10.0
89
- use_mask_input_as_output_without_sam: true
90
- # Memory
91
- directly_add_no_mem_embed: true
92
- # use high-resolution feature map in the SAM mask decoder
93
- use_high_res_features_in_sam: true
94
- # output 3 masks on the first click on initial conditioning frames
95
- multimask_output_in_sam: true
96
- # SAM heads
97
- iou_prediction_use_sigmoid: True
98
- # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
99
- use_obj_ptrs_in_encoder: true
100
- add_tpos_enc_to_obj_ptrs: false
101
- only_obj_ptrs_in_the_past_for_eval: true
102
- # object occlusion prediction
103
- pred_obj_scores: true
104
- pred_obj_scores_mlp: true
105
- fixed_no_obj_ptr: true
106
- # multimask tracking settings
107
- multimask_output_for_tracking: true
108
- use_multimask_token_for_obj_ptr: true
109
- multimask_min_pt_num: 0
110
- multimask_max_pt_num: 1
111
- use_mlp_for_obj_ptr_proj: true
112
- # Compilation flag
113
- compile_image_encoder: False