{ "_name_or_path": "facebook/detr-resnet-50", "activation_dropout": 0.0, "activation_function": "relu", "architectures": [ "DetrForObjectDetection" ], "attention_dropout": 0.0, "auxiliary_loss": false, "backbone": "resnet50", "backbone_config": null, "backbone_kwargs": { "in_chans": 3, "out_indices": [ 1, 2, 3, 4 ] }, "bbox_cost": 5, "bbox_loss_coefficient": 5, "class_cost": 1, "classifier_dropout": 0.0, "d_model": 256, "decoder_attention_heads": 8, "decoder_ffn_dim": 2048, "decoder_layerdrop": 0.0, "decoder_layers": 6, "dice_loss_coefficient": 1, "dilation": false, "dropout": 0.1, "encoder_attention_heads": 8, "encoder_ffn_dim": 2048, "encoder_layerdrop": 0.0, "encoder_layers": 6, "eos_coefficient": 0.1, "giou_cost": 2, "giou_loss_coefficient": 2, "id2label": { "1": 53, "2": 98, "3": 99, "4": 47, "5": 48, "6": 28, "7": 29, "8": 51, "9": 97, "10": 98, "11": 99, "12": 14, "13": 16, "14": 50, "15": 76, "16": 77, "17": 32, "18": 33, "19": 74, "20": 83, "21": 67, "22": 101, "23": 100, "24": 102, "25": 31, "26": 4, "27": 87, "28": 89, "29": 15, "30": 12, "31": 7, "32": 92, "33": 3, "34": 52, "35": 20, "36": 19, "37": 57, "38": 58, "39": 35, "40": 34, "41": 39, "42": 40, "43": 49, "44": 20, "45": 19, "46": 85, "47": 86, "48": 74, "49": 75, "50": 61, "51": 62, "52": 18, "53": 14, "54": 35, "55": 34, "56": 1, "57": 28, "58": 30, "59": 10, "60": 53, "61": 22, "62": 20, "63": 19, "64": 74, "65": 101, "66": 100, "67": 102, "68": 72, "69": 73, "70": 21, "71": 57, "72": 58, "73": 36, "74": 37, "75": 38, "76": 63, "77": 53, "78": 90, "79": 91, "80": 7, "81": 9, "82": 97, "83": 98, "84": 99, "85": 26, "86": 27, "87": 92, "88": 93, "89": 66, "90": 61, "91": 62, "92": 94, "93": 92, "94": 87, "95": 81, "96": 80, "97": 78, "98": 79, "99": 51, "100": 68, "101": 54, "102": 55, "103": 56, "104": 69, "105": 8, "106": 103, "107": 104, "108": 105, "109": 25, "110": 23, "111": 24, "112": 72, "113": 95, "114": 96, "115": 41, "116": 42, "117": 6, "118": 70, "119": 71, "120": 44, "121": 45, "122": 46, "123": 15, "124": 12, "125": 87, "126": 88, "127": 28, "128": 29, "129": 82, "130": 83, "131": 106, "132": 107, "133": 2, "134": 42, "135": 43, "136": 81, "137": 80, "138": 78, "139": 67, "140": 8, "141": 26, "142": 32, "143": 11, "144": 13, "145": 17, "146": 23, "147": 24, "148": 63, "149": 63, "150": 70, "151": 84, "152": 106, "153": 107, "154": 2, "155": 59, "156": 60, "157": 10, "158": 63, "159": 64 }, "init_std": 0.02, "init_xavier_std": 1.0, "is_encoder_decoder": true, "label2id": { "1": 56, "2": 154, "3": 33, "4": 26, "6": 117, "7": 80, "8": 140, "9": 81, "10": 157, "11": 143, "12": 124, "13": 144, "14": 53, "15": 123, "16": 13, "17": 145, "18": 52, "19": 63, "20": 62, "21": 70, "22": 61, "23": 146, "24": 147, "25": 109, "26": 141, "27": 86, "28": 127, "29": 128, "30": 58, "31": 25, "32": 142, "33": 18, "34": 55, "35": 54, "36": 73, "37": 74, "38": 75, "39": 41, "40": 42, "41": 115, "42": 134, "43": 135, "44": 120, "45": 121, "46": 122, "47": 4, "48": 5, "49": 43, "50": 14, "51": 99, "52": 34, "53": 77, "54": 101, "55": 102, "56": 103, "57": 71, "58": 72, "59": 155, "60": 156, "61": 90, "62": 91, "63": 158, "64": 159, "66": 89, "67": 139, "68": 100, "69": 104, "70": 150, "71": 119, "72": 112, "73": 69, "74": 64, "75": 49, "76": 15, "77": 16, "78": 138, "79": 98, "80": 137, "81": 136, "82": 129, "83": 130, "84": 151, "85": 46, "86": 47, "87": 125, "88": 126, "89": 28, "90": 78, "91": 79, "92": 93, "93": 88, "94": 92, "95": 113, "96": 114, "97": 82, "98": 83, "99": 84, "100": 66, "101": 65, "102": 67, "103": 106, "104": 107, "105": 108, "106": 152, "107": 153 }, "mask_loss_coefficient": 1, "max_position_embeddings": 1024, "model_type": "detr", "num_channels": 3, "num_hidden_layers": 6, "num_queries": 100, "position_embedding_type": "sine", "scale_embedding": false, "torch_dtype": "float32", "transformers_version": "4.41.1", "use_pretrained_backbone": true, "use_timm_backbone": true }