yunyangx commited on
Commit
dc278d9
·
verified ·
1 Parent(s): 518096e

Delete sam2/configs/sam2_hiera_s.yaml

Browse files
Files changed (1) hide show
  1. sam2/configs/sam2_hiera_s.yaml +0 -116
sam2/configs/sam2_hiera_s.yaml DELETED
@@ -1,116 +0,0 @@
1
- # @package _global_
2
-
3
- # Model
4
- model:
5
- _target_: sam2.modeling.sam2_base.SAM2Base
6
- image_encoder:
7
- _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
8
- scalp: 1
9
- trunk:
10
- _target_: sam2.modeling.backbones.hieradet.Hiera
11
- embed_dim: 96
12
- num_heads: 1
13
- stages: [1, 2, 11, 2]
14
- global_att_blocks: [7, 10, 13]
15
- window_pos_embed_bkg_spatial_size: [7, 7]
16
- neck:
17
- _target_: sam2.modeling.backbones.image_encoder.FpnNeck
18
- position_encoding:
19
- _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
20
- num_pos_feats: 256
21
- normalize: true
22
- scale: null
23
- temperature: 10000
24
- d_model: 256
25
- backbone_channel_list: [768, 384, 192, 96]
26
- fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features
27
- fpn_interp_model: nearest
28
-
29
- memory_attention:
30
- _target_: sam2.modeling.memory_attention.MemoryAttention
31
- d_model: 256
32
- pos_enc_at_input: true
33
- layer:
34
- _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
35
- activation: relu
36
- dim_feedforward: 2048
37
- dropout: 0.1
38
- pos_enc_at_attn: false
39
- self_attention:
40
- _target_: sam2.modeling.sam.transformer.RoPEAttention
41
- rope_theta: 10000.0
42
- feat_sizes: [32, 32]
43
- embedding_dim: 256
44
- num_heads: 1
45
- downsample_rate: 1
46
- dropout: 0.1
47
- d_model: 256
48
- pos_enc_at_cross_attn_keys: true
49
- pos_enc_at_cross_attn_queries: false
50
- cross_attention:
51
- _target_: sam2.modeling.sam.transformer.RoPEAttention
52
- rope_theta: 10000.0
53
- feat_sizes: [32, 32]
54
- rope_k_repeat: True
55
- embedding_dim: 256
56
- num_heads: 1
57
- downsample_rate: 1
58
- dropout: 0.1
59
- kv_in_dim: 64
60
- num_layers: 4
61
-
62
- memory_encoder:
63
- _target_: sam2.modeling.memory_encoder.MemoryEncoder
64
- out_dim: 64
65
- position_encoding:
66
- _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
67
- num_pos_feats: 64
68
- normalize: true
69
- scale: null
70
- temperature: 10000
71
- mask_downsampler:
72
- _target_: sam2.modeling.memory_encoder.MaskDownSampler
73
- kernel_size: 3
74
- stride: 2
75
- padding: 1
76
- fuser:
77
- _target_: sam2.modeling.memory_encoder.Fuser
78
- layer:
79
- _target_: sam2.modeling.memory_encoder.CXBlock
80
- dim: 256
81
- kernel_size: 7
82
- padding: 3
83
- layer_scale_init_value: 1e-6
84
- use_dwconv: True # depth-wise convs
85
- num_layers: 2
86
-
87
- num_maskmem: 7
88
- image_size: 1024
89
- # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
90
- sigmoid_scale_for_mem_enc: 20.0
91
- sigmoid_bias_for_mem_enc: -10.0
92
- use_mask_input_as_output_without_sam: true
93
- # Memory
94
- directly_add_no_mem_embed: true
95
- # use high-resolution feature map in the SAM mask decoder
96
- use_high_res_features_in_sam: true
97
- # output 3 masks on the first click on initial conditioning frames
98
- multimask_output_in_sam: true
99
- # SAM heads
100
- iou_prediction_use_sigmoid: True
101
- # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
102
- use_obj_ptrs_in_encoder: true
103
- add_tpos_enc_to_obj_ptrs: false
104
- only_obj_ptrs_in_the_past_for_eval: true
105
- # object occlusion prediction
106
- pred_obj_scores: true
107
- pred_obj_scores_mlp: true
108
- fixed_no_obj_ptr: true
109
- # multimask tracking settings
110
- multimask_output_for_tracking: true
111
- use_multimask_token_for_obj_ptr: true
112
- multimask_min_pt_num: 0
113
- multimask_max_pt_num: 1
114
- use_mlp_for_obj_ptr_proj: true
115
- # Compilation flag
116
- compile_image_encoder: False