Add config from convert_rt_detr_original_pytorch_checkpoint_to_pytorch.py
Browse files- config.json +26 -35
config.json
CHANGED
@@ -10,6 +10,32 @@
|
|
10 |
],
|
11 |
"attention_dropout": 0.0,
|
12 |
"auxiliary_loss": true,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
"batch_norm_eps": 1e-05,
|
14 |
"box_noise_scale": 1.0,
|
15 |
"d_model": 256,
|
@@ -23,17 +49,8 @@
|
|
23 |
],
|
24 |
"decoder_layers": 3,
|
25 |
"decoder_n_points": 4,
|
26 |
-
"depths": [
|
27 |
-
2,
|
28 |
-
2,
|
29 |
-
2,
|
30 |
-
2
|
31 |
-
],
|
32 |
"disable_custom_kernels": true,
|
33 |
-
"downsample_in_bottleneck": false,
|
34 |
-
"downsample_in_first_stage": false,
|
35 |
"dropout": 0.0,
|
36 |
-
"embedding_size": 64,
|
37 |
"encode_proj_layers": [
|
38 |
2
|
39 |
],
|
@@ -56,14 +73,7 @@
|
|
56 |
],
|
57 |
"focal_loss_alpha": 0.75,
|
58 |
"focal_loss_gamma": 2.0,
|
59 |
-
"hidden_act": "relu",
|
60 |
"hidden_expansion": 0.5,
|
61 |
-
"hidden_sizes": [
|
62 |
-
64,
|
63 |
-
128,
|
64 |
-
256,
|
65 |
-
512
|
66 |
-
],
|
67 |
"id2label": {
|
68 |
"0": "person",
|
69 |
"1": "bicycle",
|
@@ -232,7 +242,6 @@
|
|
232 |
},
|
233 |
"label_noise_ratio": 0.5,
|
234 |
"layer_norm_eps": 1e-05,
|
235 |
-
"layer_type": "basic",
|
236 |
"learn_initial_query": false,
|
237 |
"matcher_alpha": 0.25,
|
238 |
"matcher_bbox_cost": 5.0,
|
@@ -241,28 +250,10 @@
|
|
241 |
"matcher_giou_cost": 2.0,
|
242 |
"model_type": "rt_detr",
|
243 |
"normalize_before": false,
|
244 |
-
"num_channels": 3,
|
245 |
"num_denoising": 100,
|
246 |
"num_feature_levels": 3,
|
247 |
"num_queries": 300,
|
248 |
-
"out_features": [
|
249 |
-
"stage2",
|
250 |
-
"stage3",
|
251 |
-
"stage4"
|
252 |
-
],
|
253 |
-
"out_indices": [
|
254 |
-
2,
|
255 |
-
3,
|
256 |
-
4
|
257 |
-
],
|
258 |
"positional_encoding_temperature": 10000,
|
259 |
-
"stage_names": [
|
260 |
-
"stem",
|
261 |
-
"stage1",
|
262 |
-
"stage2",
|
263 |
-
"stage3",
|
264 |
-
"stage4"
|
265 |
-
],
|
266 |
"torch_dtype": "float32",
|
267 |
"transformers_version": "4.42.0.dev0",
|
268 |
"use_focal_loss": true,
|
|
|
10 |
],
|
11 |
"attention_dropout": 0.0,
|
12 |
"auxiliary_loss": true,
|
13 |
+
"backbone_config": {
|
14 |
+
"depths": [
|
15 |
+
2,
|
16 |
+
2,
|
17 |
+
2,
|
18 |
+
2
|
19 |
+
],
|
20 |
+
"hidden_sizes": [
|
21 |
+
64,
|
22 |
+
128,
|
23 |
+
256,
|
24 |
+
512
|
25 |
+
],
|
26 |
+
"layer_type": "basic",
|
27 |
+
"model_type": "rtdetr-resnet",
|
28 |
+
"out_features": [
|
29 |
+
"stage2",
|
30 |
+
"stage3",
|
31 |
+
"stage4"
|
32 |
+
],
|
33 |
+
"out_indices": [
|
34 |
+
2,
|
35 |
+
3,
|
36 |
+
4
|
37 |
+
]
|
38 |
+
},
|
39 |
"batch_norm_eps": 1e-05,
|
40 |
"box_noise_scale": 1.0,
|
41 |
"d_model": 256,
|
|
|
49 |
],
|
50 |
"decoder_layers": 3,
|
51 |
"decoder_n_points": 4,
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
"disable_custom_kernels": true,
|
|
|
|
|
53 |
"dropout": 0.0,
|
|
|
54 |
"encode_proj_layers": [
|
55 |
2
|
56 |
],
|
|
|
73 |
],
|
74 |
"focal_loss_alpha": 0.75,
|
75 |
"focal_loss_gamma": 2.0,
|
|
|
76 |
"hidden_expansion": 0.5,
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
"id2label": {
|
78 |
"0": "person",
|
79 |
"1": "bicycle",
|
|
|
242 |
},
|
243 |
"label_noise_ratio": 0.5,
|
244 |
"layer_norm_eps": 1e-05,
|
|
|
245 |
"learn_initial_query": false,
|
246 |
"matcher_alpha": 0.25,
|
247 |
"matcher_bbox_cost": 5.0,
|
|
|
250 |
"matcher_giou_cost": 2.0,
|
251 |
"model_type": "rt_detr",
|
252 |
"normalize_before": false,
|
|
|
253 |
"num_denoising": 100,
|
254 |
"num_feature_levels": 3,
|
255 |
"num_queries": 300,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
"positional_encoding_temperature": 10000,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
"torch_dtype": "float32",
|
258 |
"transformers_version": "4.42.0.dev0",
|
259 |
"use_focal_loss": true,
|