praeclarumjj3's picture
Add model
695c75c
raw
history blame
3.42 kB
{
"architectures": [
"OneFormerForUniversalSegmentation"
],
"backbone_config": {
"attention_probs_dropout_prob": 0.0,
"depths": [
3,
4,
18,
5
],
"dilations": [
[
1,
18,
1
],
[
1,
5,
1,
9
],
[
1,
2,
1,
3,
1,
4,
1,
2,
1,
3,
1,
4,
1,
2,
1,
3,
1,
4
],
[
1,
2,
1,
2,
1
]
],
"drop_path_rate": 0.3,
"embed_dim": 192,
"encoder_stride": 32,
"feature_channels": [
192,
384,
768,
1536
],
"hidden_act": "gelu",
"hidden_dropout_prob": 0,
"kernel_size": 7,
"layer_scale_init_value": 0.0,
"mlp_ratio": 2.0,
"num_channels": 3,
"num_heads": [
6,
12,
24,
48
],
"patch_size": 4,
"qkv_bias": true,
"strides": [
4,
8,
16,
32
]
},
"decoder_config": {
"common_stride": 4,
"conv_dim": 256,
"decoder_layers": 10,
"dim_feedforward": 2048,
"dropout": 0.1,
"encoder_feedforward_dim": 1024,
"encoder_layers": 6,
"enforce_input_proj": false,
"hidden_dim": 256,
"mask_dim": 256,
"norm": "GN",
"num_heads": 8,
"pre_norm": false,
"query_dec_layers": 2,
"use_task_norm": true
},
"general_config": {
"backbone_type": "dinat",
"class_weight": 2.0,
"contrastive_temperature": 0.07,
"contrastive_weight": 0.5,
"deep_supervision": true,
"dice_weight": 5.0,
"ignore_value": 255,
"importance_sample_ratio": 0.75,
"init_std": 0.02,
"init_xavier_std": 1.0,
"is_train": false,
"layer_norm_eps": 1e-05,
"mask_weight": 5.0,
"no_object_weight": 0.1,
"num_classes": 19,
"num_queries": 250,
"output_auxiliary_logits": true,
"oversample_ratio": 3.0,
"train_num_points": 12544,
"use_auxiliary_loss": true
},
"hidden_size": 256,
"id2label": {
"0": "road",
"1": "sidewalk",
"2": "building",
"3": "wall",
"4": "fence",
"5": "pole",
"6": "traffic light",
"7": "traffic sign",
"8": "vegetation",
"9": "terrain",
"10": "sky",
"11": "person",
"12": "rider",
"13": "car",
"14": "truck",
"15": "bus",
"16": "train",
"17": "motorcycle",
"18": "bicycle"
},
"init_std": 0.02,
"init_xavier_std": 1.0,
"label2id": {
"bicycle": 18,
"building": 2,
"bus": 15,
"car": 13,
"fence": 4,
"motorcycle": 17,
"person": 11,
"pole": 5,
"rider": 12,
"road": 0,
"sidewalk": 1,
"sky": 10,
"terrain": 9,
"traffic light": 6,
"traffic sign": 7,
"train": 16,
"truck": 14,
"vegetation": 8,
"wall": 3
},
"model_type": "oneformer",
"num_attention_heads": 8,
"num_hidden_layers": 10,
"output_attentions": true,
"output_hidden_states": true,
"text_encoder_config": {
"max_seq_len": 77,
"task_seq_len": 77,
"text_encoder_context_length": 77,
"text_encoder_n_ctx": 16,
"text_encoder_num_layers": 6,
"text_encoder_proj_layers": 2,
"text_encoder_vocab_size": 49408,
"text_encoder_width": 256
},
"torch_dtype": "float32",
"transformers_version": "4.25.0.dev0"
}