Spaces:
Running
on
Zero
Running
on
Zero
Delete sam2/configs/sam2_hiera_l.yaml
Browse files- sam2/configs/sam2_hiera_l.yaml +0 -117
sam2/configs/sam2_hiera_l.yaml
DELETED
@@ -1,117 +0,0 @@
|
|
1 |
-
# @package _global_
|
2 |
-
|
3 |
-
# Model
|
4 |
-
model:
|
5 |
-
_target_: sam2.modeling.sam2_base.SAM2Base
|
6 |
-
image_encoder:
|
7 |
-
_target_: sam2.modeling.backbones.image_encoder.ImageEncoder
|
8 |
-
scalp: 1
|
9 |
-
trunk:
|
10 |
-
_target_: sam2.modeling.backbones.hieradet.Hiera
|
11 |
-
embed_dim: 144
|
12 |
-
num_heads: 2
|
13 |
-
stages: [2, 6, 36, 4]
|
14 |
-
global_att_blocks: [23, 33, 43]
|
15 |
-
window_pos_embed_bkg_spatial_size: [7, 7]
|
16 |
-
window_spec: [8, 4, 16, 8]
|
17 |
-
neck:
|
18 |
-
_target_: sam2.modeling.backbones.image_encoder.FpnNeck
|
19 |
-
position_encoding:
|
20 |
-
_target_: sam2.modeling.position_encoding.PositionEmbeddingSine
|
21 |
-
num_pos_feats: 256
|
22 |
-
normalize: true
|
23 |
-
scale: null
|
24 |
-
temperature: 10000
|
25 |
-
d_model: 256
|
26 |
-
backbone_channel_list: [1152, 576, 288, 144]
|
27 |
-
fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features
|
28 |
-
fpn_interp_model: nearest
|
29 |
-
|
30 |
-
memory_attention:
|
31 |
-
_target_: sam2.modeling.memory_attention.MemoryAttention
|
32 |
-
d_model: 256
|
33 |
-
pos_enc_at_input: true
|
34 |
-
layer:
|
35 |
-
_target_: sam2.modeling.memory_attention.MemoryAttentionLayer
|
36 |
-
activation: relu
|
37 |
-
dim_feedforward: 2048
|
38 |
-
dropout: 0.1
|
39 |
-
pos_enc_at_attn: false
|
40 |
-
self_attention:
|
41 |
-
_target_: sam2.modeling.sam.transformer.RoPEAttention
|
42 |
-
rope_theta: 10000.0
|
43 |
-
feat_sizes: [32, 32]
|
44 |
-
embedding_dim: 256
|
45 |
-
num_heads: 1
|
46 |
-
downsample_rate: 1
|
47 |
-
dropout: 0.1
|
48 |
-
d_model: 256
|
49 |
-
pos_enc_at_cross_attn_keys: true
|
50 |
-
pos_enc_at_cross_attn_queries: false
|
51 |
-
cross_attention:
|
52 |
-
_target_: sam2.modeling.sam.transformer.RoPEAttention
|
53 |
-
rope_theta: 10000.0
|
54 |
-
feat_sizes: [32, 32]
|
55 |
-
rope_k_repeat: True
|
56 |
-
embedding_dim: 256
|
57 |
-
num_heads: 1
|
58 |
-
downsample_rate: 1
|
59 |
-
dropout: 0.1
|
60 |
-
kv_in_dim: 64
|
61 |
-
num_layers: 4
|
62 |
-
|
63 |
-
memory_encoder:
|
64 |
-
_target_: sam2.modeling.memory_encoder.MemoryEncoder
|
65 |
-
out_dim: 64
|
66 |
-
position_encoding:
|
67 |
-
_target_: sam2.modeling.position_encoding.PositionEmbeddingSine
|
68 |
-
num_pos_feats: 64
|
69 |
-
normalize: true
|
70 |
-
scale: null
|
71 |
-
temperature: 10000
|
72 |
-
mask_downsampler:
|
73 |
-
_target_: sam2.modeling.memory_encoder.MaskDownSampler
|
74 |
-
kernel_size: 3
|
75 |
-
stride: 2
|
76 |
-
padding: 1
|
77 |
-
fuser:
|
78 |
-
_target_: sam2.modeling.memory_encoder.Fuser
|
79 |
-
layer:
|
80 |
-
_target_: sam2.modeling.memory_encoder.CXBlock
|
81 |
-
dim: 256
|
82 |
-
kernel_size: 7
|
83 |
-
padding: 3
|
84 |
-
layer_scale_init_value: 1e-6
|
85 |
-
use_dwconv: True # depth-wise convs
|
86 |
-
num_layers: 2
|
87 |
-
|
88 |
-
num_maskmem: 7
|
89 |
-
image_size: 1024
|
90 |
-
# apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
|
91 |
-
sigmoid_scale_for_mem_enc: 20.0
|
92 |
-
sigmoid_bias_for_mem_enc: -10.0
|
93 |
-
use_mask_input_as_output_without_sam: true
|
94 |
-
# Memory
|
95 |
-
directly_add_no_mem_embed: true
|
96 |
-
# use high-resolution feature map in the SAM mask decoder
|
97 |
-
use_high_res_features_in_sam: true
|
98 |
-
# output 3 masks on the first click on initial conditioning frames
|
99 |
-
multimask_output_in_sam: true
|
100 |
-
# SAM heads
|
101 |
-
iou_prediction_use_sigmoid: True
|
102 |
-
# cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
|
103 |
-
use_obj_ptrs_in_encoder: true
|
104 |
-
add_tpos_enc_to_obj_ptrs: false
|
105 |
-
only_obj_ptrs_in_the_past_for_eval: true
|
106 |
-
# object occlusion prediction
|
107 |
-
pred_obj_scores: true
|
108 |
-
pred_obj_scores_mlp: true
|
109 |
-
fixed_no_obj_ptr: true
|
110 |
-
# multimask tracking settings
|
111 |
-
multimask_output_for_tracking: true
|
112 |
-
use_multimask_token_for_obj_ptr: true
|
113 |
-
multimask_min_pt_num: 0
|
114 |
-
multimask_max_pt_num: 1
|
115 |
-
use_mlp_for_obj_ptr_proj: true
|
116 |
-
# Compilation flag
|
117 |
-
compile_image_encoder: False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|