PekingU
/

rtdetr_r18vd

@@ -10,6 +10,32 @@
   ],
   "attention_dropout": 0.0,
   "auxiliary_loss": true,
   "batch_norm_eps": 1e-05,
   "box_noise_scale": 1.0,
   "d_model": 256,
@@ -23,17 +49,8 @@
   ],
   "decoder_layers": 3,
   "decoder_n_points": 4,
-  "depths": [
-    2,
-    2,
-    2,
-    2
-  ],
   "disable_custom_kernels": true,
-  "downsample_in_bottleneck": false,
-  "downsample_in_first_stage": false,
   "dropout": 0.0,
-  "embedding_size": 64,
   "encode_proj_layers": [
     2
   ],
@@ -56,14 +73,7 @@
   ],
   "focal_loss_alpha": 0.75,
   "focal_loss_gamma": 2.0,
-  "hidden_act": "relu",
   "hidden_expansion": 0.5,
-  "hidden_sizes": [
-    64,
-    128,
-    256,
-    512
-  ],
   "id2label": {
     "0": "person",
     "1": "bicycle",
@@ -232,7 +242,6 @@
   },
   "label_noise_ratio": 0.5,
   "layer_norm_eps": 1e-05,
-  "layer_type": "basic",
   "learn_initial_query": false,
   "matcher_alpha": 0.25,
   "matcher_bbox_cost": 5.0,
@@ -241,28 +250,10 @@
   "matcher_giou_cost": 2.0,
   "model_type": "rt_detr",
   "normalize_before": false,
-  "num_channels": 3,
   "num_denoising": 100,
   "num_feature_levels": 3,
   "num_queries": 300,
-  "out_features": [
-    "stage2",
-    "stage3",
-    "stage4"
-  ],
-  "out_indices": [
-    2,
-    3,
-    4
-  ],
   "positional_encoding_temperature": 10000,
-  "stage_names": [
-    "stem",
-    "stage1",
-    "stage2",
-    "stage3",
-    "stage4"
-  ],
   "torch_dtype": "float32",
   "transformers_version": "4.42.0.dev0",
   "use_focal_loss": true,

   ],
   "attention_dropout": 0.0,
   "auxiliary_loss": true,
+  "backbone_config": {
+    "depths": [
+      2,
+      2,
+      2,
+      2
+    ],
+    "hidden_sizes": [
+      64,
+      128,
+      256,
+      512
+    ],
+    "layer_type": "basic",
+    "model_type": "rtdetr-resnet",
+    "out_features": [
+      "stage2",
+      "stage3",
+      "stage4"
+    ],
+    "out_indices": [
+      2,
+      3,
+      4
+    ]
+  },
   "batch_norm_eps": 1e-05,
   "box_noise_scale": 1.0,
   "d_model": 256,
   ],
   "decoder_layers": 3,
   "decoder_n_points": 4,
   "disable_custom_kernels": true,
   "dropout": 0.0,
   "encode_proj_layers": [
     2
   ],
   ],
   "focal_loss_alpha": 0.75,
   "focal_loss_gamma": 2.0,
   "hidden_expansion": 0.5,
   "id2label": {
     "0": "person",
     "1": "bicycle",
   },
   "label_noise_ratio": 0.5,
   "layer_norm_eps": 1e-05,
   "learn_initial_query": false,
   "matcher_alpha": 0.25,
   "matcher_bbox_cost": 5.0,
   "matcher_giou_cost": 2.0,
   "model_type": "rt_detr",
   "normalize_before": false,
   "num_denoising": 100,
   "num_feature_levels": 3,
   "num_queries": 300,
   "positional_encoding_temperature": 10000,
   "torch_dtype": "float32",
   "transformers_version": "4.42.0.dev0",
   "use_focal_loss": true,