{ "_name_or_path": "dandelin/vilt-b32-mlm", "architectures": [ "ViltForQuestionAnswering" ], "attention_probs_dropout_prob": 0.0, "hidden_act": "gelu", "hidden_dropout_prob": 0.0, "hidden_size": 768, "id2label": { "0": "7:35", "1": "in car", "2": "natural", "3": "clock tower", "4": "man", "5": "black", "6": "birthday", "7": "human", "8": "stripes", "9": "beagle", "10": "can't tell", "11": "talking", "12": "woods", "13": "sleeping", "14": "tv", "15": "no", "16": "camera", "17": "plain", "18": "curtains", "19": "roof", "20": "blue", "21": "dirt", "22": "double", "23": "crown", "24": "green", "25": "many", "26": "giraffes", "27": "brown", "28": "lying down", "29": "person", "30": "door", "31": "large", "32": "don't know", "33": "necklace", "34": "shrimp", "35": "brick", "36": "screen", "37": "africa", "38": "not sure", "39": "plate", "40": "snowboard", "41": "fashion", "42": "9:35", "43": "snow", "44": "orange", "45": "rack", "46": "women", "47": "red", "48": "air", "49": "clear", "50": "picnic table", "51": "beige", "52": "shelter", "53": "not there", "54": "soccer ball", "55": "tent", "56": "name tag", "57": "station", "58": "at table", "59": "forest", "60": "clock", "61": "7", "62": "ground", "63": "lanyard", "64": "talking on phone", "65": "laying down", "66": "jeep", "67": "skiing", "68": "girl", "69": "street", "70": "they aren't", "71": "trees", "72": "net", "73": "chopsticks", "74": "curtain", "75": "lady", "76": "plastic", "77": "unknown", "78": "photographer", "79": "red and blue", "80": "10", "81": "pink", "82": "snowboarder", "83": "lg", "84": "windows", "85": "hawaii", "86": "blonde", "87": "shadow", "88": "woman", "89": "smile", "90": "bricks", "91": "big ben", "92": "fence", "93": "2013", "94": "bikes", "95": "yes", "96": "down", "97": "wine tasting", "98": "wine", "99": "purple", "100": "style", "101": "shadows", "102": "skateboard", "103": "ball", "104": "blue and white", "105": "chair", "106": "shade", "107": "tan", "108": "soccer", "109": "5", "110": "window", "111": "car", "112": "calico", "113": "bike rack", "114": "boy", "115": "cloudy", "116": "suv", "117": "8", "118": "skier", "119": "canopy", "120": "cage", "121": "skateboarding", "122": "white and black", "123": "security", "124": "right", "125": "monitor", "126": "giraffe", "127": "on street", "128": "yellow", "129": "stand", "130": "1", "131": "resting", "132": "table", "133": "4", "134": "tabby", "135": "zoo", "136": "red and yellow", "137": "ice cream", "138": "desert", "139": "0", "140": "sidewalk", "141": "cat", "142": "white and blue", "143": "little girl", "144": "nothing", "145": "snowboarding", "146": "on road", "147": "2", "148": "doughnut", "149": "wedding", "150": "bicycles", "151": "8:35", "152": "arrow", "153": "leather", "154": "bedroom", "155": "hair", "156": "out", "157": "walking", "158": "6", "159": "white", "160": "bicycle", "161": "gray", "162": "outside", "163": "solid", "164": "king", "165": "sky", "166": "donut", "167": "french", "168": "watching", "169": "happy", "170": "queen", "171": "full", "172": "tower", "173": "exit", "174": "smiling", "175": "2010", "176": "dog", "177": "hat", "178": "7:45", "179": "park", "180": "church", "181": "low", "182": "3", "183": "black and white", "184": "crossing", "185": "cross", "186": "sun", "187": "tired", "188": "2000", "189": "neon", "190": "protection", "191": "backpack", "192": "platform", "193": "bus", "194": "gray and black", "195": "wall", "196": "train", "197": "cup", "198": "small" }, "image_size": 384, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": 139, "1": 130, "10": 80, "2": 147, "2000": 188, "2010": 175, "2013": 93, "3": 182, "4": 133, "5": 109, "6": 158, "7": 61, "7:35": 0, "7:45": 178, "8": 117, "8:35": 151, "9:35": 42, "africa": 37, "air": 48, "arrow": 152, "at table": 58, "backpack": 191, "ball": 103, "beagle": 9, "bedroom": 154, "beige": 51, "bicycle": 160, "bicycles": 150, "big ben": 91, "bike rack": 113, "bikes": 94, "birthday": 6, "black": 5, "black and white": 183, "blonde": 86, "blue": 20, "blue and white": 104, "boy": 114, "brick": 35, "bricks": 90, "brown": 27, "bus": 193, "cage": 120, "calico": 112, "camera": 16, "can't tell": 10, "canopy": 119, "car": 111, "cat": 141, "chair": 105, "chopsticks": 73, "church": 180, "clear": 49, "clock": 60, "clock tower": 3, "cloudy": 115, "cross": 185, "crossing": 184, "crown": 23, "cup": 197, "curtain": 74, "curtains": 18, "desert": 138, "dirt": 21, "dog": 176, "don't know": 32, "donut": 166, "door": 30, "double": 22, "doughnut": 148, "down": 96, "exit": 173, "fashion": 41, "fence": 92, "forest": 59, "french": 167, "full": 171, "giraffe": 126, "giraffes": 26, "girl": 68, "gray": 161, "gray and black": 194, "green": 24, "ground": 62, "hair": 155, "happy": 169, "hat": 177, "hawaii": 85, "human": 7, "ice cream": 137, "in car": 1, "jeep": 66, "king": 164, "lady": 75, "lanyard": 63, "large": 31, "laying down": 65, "leather": 153, "lg": 83, "little girl": 143, "low": 181, "lying down": 28, "man": 4, "many": 25, "monitor": 125, "name tag": 56, "natural": 2, "necklace": 33, "neon": 189, "net": 72, "no": 15, "not sure": 38, "not there": 53, "nothing": 144, "on road": 146, "on street": 127, "orange": 44, "out": 156, "outside": 162, "park": 179, "person": 29, "photographer": 78, "picnic table": 50, "pink": 81, "plain": 17, "plastic": 76, "plate": 39, "platform": 192, "protection": 190, "purple": 99, "queen": 170, "rack": 45, "red": 47, "red and blue": 79, "red and yellow": 136, "resting": 131, "right": 124, "roof": 19, "screen": 36, "security": 123, "shade": 106, "shadow": 87, "shadows": 101, "shelter": 52, "shrimp": 34, "sidewalk": 140, "skateboard": 102, "skateboarding": 121, "skier": 118, "skiing": 67, "sky": 165, "sleeping": 13, "small": 198, "smile": 89, "smiling": 174, "snow": 43, "snowboard": 40, "snowboarder": 82, "snowboarding": 145, "soccer": 108, "soccer ball": 54, "solid": 163, "stand": 129, "station": 57, "street": 69, "stripes": 8, "style": 100, "sun": 186, "suv": 116, "tabby": 134, "table": 132, "talking": 11, "talking on phone": 64, "tan": 107, "tent": 55, "they aren't": 70, "tired": 187, "tower": 172, "train": 196, "trees": 71, "tv": 14, "unknown": 77, "walking": 157, "wall": 195, "watching": 168, "wedding": 149, "white": 159, "white and black": 122, "white and blue": 142, "window": 110, "windows": 84, "wine": 98, "wine tasting": 97, "woman": 88, "women": 46, "woods": 12, "yellow": 128, "yes": 95, "zoo": 135 }, "layer_norm_eps": 1e-12, "max_image_length": -1, "max_position_embeddings": 40, "modality_type_vocab_size": 2, "model_type": "vilt", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "num_images": -1, "patch_size": 32, "qkv_bias": true, "tie_word_embeddings": false, "torch_dtype": "float32", "transformers_version": "4.38.1", "type_vocab_size": 2, "vocab_size": 30522 }