{ "_name_or_path": "dandelin/vilt-b32-mlm", "architectures": [ "ViltForQuestionAnswering" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "england", "1": "refrigerator", "2": "sun", "3": "cows", "4": "bus", "5": "woods", "6": "car", "7": "home", "8": "hallway", "9": "windows", "10": "america", "11": "smile", "12": "fridge", "13": "yes", "14": "room", "15": "on sidewalk", "16": "hotel", "17": "us", "18": "nothing", "19": "australia", "20": "usa", "21": "freezer", "22": "buildings", "23": "living room", "24": "park", "25": "uk", "26": "indoors", "27": "dog", "28": "airport", "29": "ground", "30": "inside", "31": "in car", "32": "nowhere", "33": "united states", "34": "british", "35": "mouse" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "airport": 28, "america": 10, "australia": 19, "british": 34, "buildings": 22, "bus": 4, "car": 6, "cows": 3, "dog": 27, "england": 0, "freezer": 21, "fridge": 12, "ground": 29, "hallway": 8, "home": 7, "hotel": 16, "in car": 31, "indoors": 26, "inside": 30, "living room": 23, "mouse": 35, "nothing": 18, "nowhere": 32, "on sidewalk": 15, "park": 24, "refrigerator": 1, "room": 14, "smile": 11, "sun": 2, "uk": 25, "united states": 33, "us": 17, "usa": 20, "windows": 9, "woods": 5, "yes": 13 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.40.2", "type_vocab_size": 2, "vocab_size": 30522 }