{ "_name_or_path": "Salesforce/blip-vqa-base", "architectures": [ "BlipForQuestionAnswering" ], "id2label": { "0": "c\u00e1i th\u00eca", "1": "g\u00e0 t\u00e2y", "2": "l\u1ed3ng", "3": "ngo\u00e0i", "4": "gia s\u00fac", "5": "c\u00e1i gh\u1ebf", "6": "ch\u00e9n \u0111\u0129a", "7": "m\u00e1y xay", "8": "rau qu\u1ea3", "9": "chung c\u01b0", "10": "\u0111i\u00eau kh\u1eafc", "11": "c\u1eeda h\u00e0ng", "12": "c\u00e1i b\u00e0n", "13": "n\u01b0\u1edbc", "14": "lon", "15": "s\u00e2n v\u01b0\u1eddn", "16": "\u00e1o cho\u00e0ng", "17": "tr\u1ea1m", "18": "d\u1ea5u hi\u1ec7u", "19": "s\u00e2n kh\u1ea5u", "20": "\u0111\u1ed3ng h\u1ed3", "21": "xe l\u0103n", "22": "g\u1ea5u tr\u00fac", "23": "t\u00e1m", "24": "c\u00e1i k\u1ec7", "25": "con b\u00f2", "26": "th\u00e0nh ph\u1ea7n", "27": "ph\u00f2ng ng\u1ee7", "28": "m\u1eaft", "29": "gi\u1ecf", "30": "xe c\u1ed9", "31": "b\u1ea7u tr\u1eddi", "32": "b\u1ed9 \u0111\u1ed3", "33": "t\u1ee7 l\u1ea1nh", "34": "rau x\u00e0 l\u00e1ch", "35": "xe \u0111i\u1ec7n ng\u1ea7m", "36": "\u0111\u00e8n \u0111\u1ec3 b\u00e0n", "37": "m\u00e0u t\u00eda", "38": "balo", "39": "gi\u00e1 \u0111\u1ee1", "40": "s\u00e2n bay", "41": "nh\u00e0", "42": "t\u00e1ch", "43": "d\u0129a", "44": "d\u00f2ng s\u00f4ng", "45": "bia", "46": "v\u00f4 tuy\u1ebfn", "47": "m\u00e1y bay", "48": "t\u01b0\u1eddng", "49": "n\u00f3n", "50": "m\u00e0n", "51": "b\u1ea3o t\u00e0ng", "52": "m\u1ed9t", "53": "nhi\u1ec1u c\u00e1i gh\u1ebf", "54": "k\u00ednh \u0111eo", "55": "m\u00e1y \u1ea3nh", "56": "\u00f4 c\u1eeda", "57": "\u00e1o ba l\u1ed7", "58": "\u0111\u01b0\u1eddng \u0111ua", "59": "sandwich", "60": "b\u1ee9c t\u01b0\u1ee3ng", "61": "chu\u1ed9t", "62": "qu\u1ea7n \u00e1o", "63": "m\u00f3n tr\u00e1ng mi\u1ec7ng", "64": "c\u00e1i t\u00fai", "65": "ph\u01b0\u01a1ng ti\u1ec7n giao th\u00f4ng", "66": "qu\u1ea3 cam", "67": "con v\u1eb9t", "68": "\u0111\u1ed3 n\u1ed9i th\u1ea5t", "69": "m\u00e1y t\u00ednh", "70": "v\u00f2i", "71": "ng\u0103n", "72": "theo d\u00f5i", "73": "\u0111\u1ea1i d\u01b0\u01a1ng", "74": "s\u00f4 c\u00f4 la", "75": "chu\u1ed3ng", "76": "c\u00e1i l\u1ec1u", "77": "xa l\u1ed9", "78": "hotdog", "79": "r\u01b0\u1ee3u", "80": "b\u00e1nh", "81": "hoa qu\u1ea3", "82": "qu\u00e1n \u0103n", "83": "c\u1eeda", "84": "b\u1ee9c tranh", "85": "m\u00e0u \u0111\u1ecf", "86": "ga-ra", "87": "chi\u1ebfc \u00f4", "88": "ng\u1ef1a r\u1eb1n", "89": "ch\u00edn", "90": "s\u00e2n v\u1eadn \u0111\u1ed9ng", "91": "h\u1ed9p s\u1ed1", "92": "chu\u1ed3ng tr\u1ea1i", "93": "trang thi\u1ebft b\u1ecb", "94": "\u0111\u01b0\u1eddng ph\u1ed1", "95": "l\u00e1t g\u1ea1ch", "96": "b\u1ed3n t\u1eafm", "97": "xe m\u00e1y", "98": "b\u1ee9c m\u00e0n", "99": "con ng\u1ef1a", "100": "cupcake", "101": "g\u0103ng tay", "102": "b\u00ean", "103": "ng\u1ef1a v\u1eb1n", "104": "c\u00e2y g\u1eady", "105": "tr\u01b0\u1eddng h\u1ee3p", "106": "con heo", "107": "m\u00e0u v\u00e0ng", "108": "qu\u1ea7y t\u00ednh ti\u1ec1n", "109": "hoa h\u1ed3ng", "110": "d\u0129a nh\u1ef1a", "111": "t\u1ee7 \u0111\u00f4ng", "112": "xe tay ga", "113": "hay", "114": "\u0111\u0129a", "115": "b\u00e1t", "116": "ph\u00f2ng t\u1eafm", "117": "g\u01b0\u01a1ng", "118": "con m\u00e8o", "119": "nh\u1eefng qu\u1ea3 cam", "120": "m\u0169 l\u01b0\u1ee1i trai", "121": "b\u0103ng gh\u1ebf", "122": "m\u00f3n \u0103n", "123": "chim b\u1ed3 c\u00e2u", "124": "c\u00e1i l\u1ecd", "125": "h\u1ed3 b\u01a1i", "126": "t\u1ee7 \u0111\u00e1", "127": "t\u00f2a th\u00e1p", "128": "c\u1eeda ra v\u00e0o", "129": "con v\u1ecbt", "130": "nh\u00e0 \u1edf", "131": "m\u00e0u n\u00e2u", "132": "m\u00e0u tr\u1eafng", "133": "k\u00ednh r\u00e2m", "134": "thuy\u1ec1n bu\u1ed3m", "135": "ly", "136": "c\u00f4ng c\u1ee5", "137": "b\u1eefa tr\u01b0a", "138": "t\u00e0u h\u1ecfa", "139": "xe \u0111\u1ea9y", "140": "m\u01b0\u1eddi", "141": "\u0111o\u1ea1n phim gi\u1edbi thi\u1ec7u", "142": "b\u1ed1n", "143": "b\u1ed3n ti\u1ec3u", "144": "con ch\u00f3", "145": "di\u1ec1u", "146": "b\u1ee9c \u1ea3nh", "147": "m\u00e0u \u0111en", "148": "l\u00e1t c\u1eaft", "149": "r\u00e0o ch\u1eafn", "150": "l\u0103n tr\u00f2n", "151": "b\u1eafp ch\u00e2n", "152": "v\u1ea1ch k\u1ebb \u0111\u01b0\u1eddng", "153": "t\u1ea1p d\u1ec1", "154": "con d\u00ea", "155": "ng\u01b0\u1eddi gi\u1eef", "156": "c\u00e1i m\u00e2m", "157": "ngo\u00e0i tr\u1eddi", "158": "h\u1ed3", "159": "th\u00f9ng", "160": "v\u00e1n l\u01b0\u1edbt s\u00f3ng", "161": "v\u01b0\u1eddn b\u00e1ch th\u00fa", "162": "g\u00e0", "163": "ch\u00ecm xu\u1ed1ng", "164": "pug", "165": "b\u00f2", "166": "b\u1ea3ng", "167": "con thuy\u1ec1n", "168": "t\u00f2a nh\u00e0", "169": "m\u00e1y bay tr\u1ef1c th\u0103ng", "170": "tr\u01b0\u1ee3t tuy\u1ebft", "171": "\u0111\u01b0\u1eddng ray", "172": "con thoi", "173": "\u0111\u0129a \u0103n", "174": "\u0111\u1ea7m", "175": "m\u00e1y s\u1ea5y kh\u00f4", "176": "ba lan", "177": "ca n\u00f4", "178": "m\u0169 b\u1ea3o hi\u1ec3m", "179": "g\u1ea7u m\u00fac", "180": "c\u1ecf", "181": "g\u1ea5u", "182": "b\u00f4ng hoa", "183": "c\u00e1i n\u0129a", "184": "c\u1eeda s\u1ed5", "185": "d\u0129a ", "186": "qu\u1ea3 b\u00f3ng", "187": "ch\u1ea3o", "188": "gh\u1ebf s\u00f4 pha", "189": "th\u00f9ng ch\u1ee9a", "190": "chim \u01b0ng", "191": "c\u00e1i v\u00ed", "192": "l\u1edbp h\u1ecdc", "193": "c\u1ecf kh\u00f4", "194": "b\u1ea3ng hi\u1ec7u", "195": "\u0111\u01b0\u1eddng \u1ed1ng", "196": "c\u00e2y s\u00e0o", "197": "b\u1ebfn t\u00e0u", "198": "gi\u1ea5y b\u1ea1c", "199": "\u0111i v\u0103ng", "200": "b\u1ebfn du thuy\u1ec1n", "201": "th\u1ecbt", "202": "b\u1ea3y", "203": "g\u1ea1ch", "204": "c\u00e2y k\u00e9o", "205": "l\u1ed1i \u0111i", "206": "nh\u00e0 t\u1eafm", "207": "l\u1ecd c\u1eafm hoa", "208": "ng\u0103n k\u00e9o", "209": "kho", "210": "h\u00e0nh l\u00fd", "211": "c\u00e2y", "212": "ph\u00f2ng b\u1ebfp", "213": "\u0111\u01b0\u1eddng \u0111i b\u1ed9", "214": "\u00e1o kho\u00e1c", "215": "t\u00e0u ho\u1ea3", "216": "\u0111\u1ed3ng ph\u1ee5c", "217": "c\u1ea7u", "218": "\u0111\u1ed9ng c\u01a1", "219": "qu\u1ea3 cam ", "220": "vali", "221": "b\u00e1nh ng\u1ecdt", "222": "v\u0103n ph\u00f2ng", "223": "m\u00e8o con", "224": "v\u00e1n tr\u01b0\u1ee3t tuy\u1ebft", "225": "pizza", "226": "donut", "227": "c\u1ed7 m\u00e1y", "228": "v\u1ee3t", "229": "thi\u1ebft b\u1ecb", "230": "k\u00fd t\u00ean", "231": "qu\u1ea3 t\u00e1o", "232": "t\u00e1o", "233": "m\u1eb7t tr\u1eddi", "234": "kh\u0103n t\u1eafm", "235": "c\u00e1", "236": "laptop", "237": "h\u00e0ng h\u00f3a", "238": "m\u00e1y vi t\u00ednh", "239": "ph\u00f4 mai", "240": "n\u01b0\u1edbc ti\u1ec3u", "241": "c\u00fan y\u00eau", "242": "n\u00fai", "243": "ph\u00f2ng", "244": "h\u00e0ng ho\u00e1", "245": "con voi", "246": "con d\u1ed1c", "247": "m\u00e0u xanh l\u00e1", "248": "kh\u0103n", "249": "b\u00e1nh xe", "250": "m\u00f3ng vu\u1ed1t", "251": "t\u00e0u", "252": "\u0111\u01b0\u1eddng b\u1ed9", "253": "b\u00e0n ch\u1ea3i", "254": "\u0111\u1ed3i", "255": "v\u1ebd tranh l\u00ean t\u01b0\u1eddng", "256": "l\u00f2 s\u01b0\u1edfi", "257": "\u0111\u01b0\u1eddng", "258": "ch\u1eadu", "259": "b\u00e0n ch\u1ea3i \u0111\u00e1nh r\u0103ng.", "260": "xe t\u1ea3i", "261": "h\u00e0nh lang", "262": "b\u00e0n ph\u00edm", "263": "b\u00e3i c\u1ecf", "264": "xe \u0111\u1ea1p", "265": "c\u00e1i r\u1ed5", "266": "con c\u1eebu", "267": "xe \u0111i\u1ec7n", "268": "b\u00f4ng c\u1ea3i xanh", "269": "m\u1eb7t", "270": "\u0111\u0129a n\u00e9m", "271": "nh\u00e0 v\u1ec7 sinh", "272": "chai", "273": "b\u00ecnh", "274": "n\u0103m", "275": "b\u00fai t\u00f3c", "276": "bao b\u00ec", "277": "truy\u1ec1n h\u00ecnh", "278": "c\u1eeda ti\u1ec7m", "279": "s\u00e2n", "280": "hai", "281": "\u0111i\u1ec7n tho\u1ea1i", "282": "b\u1eefa \u0103n", "283": "\u00e1o s\u01a1 mi", "284": "xe \u00f4 t\u00f4", "285": "xe bu\u00fdt", "286": "khung", "287": "b\u1edd bi\u1ec3n", "288": "b\u00ecnh hoa", "289": "th\u00e2n c\u00e2y", "290": "xe", "291": "h\u1ed9p \u0111\u1ef1ng", "292": "qu\u1ea7n short", "293": "\u0111\u01b0\u1eddng s\u1eaft", "294": "\u00e1o vest", "295": "m\u1eb7t n\u1ea1", "296": "cam", "297": "\u0111\u1ed3 u\u1ed1ng", "298": "b\u1eefa \u0103n t\u1ed1i", "299": "m\u0169", "300": "ba", "301": "t\u01b0\u1ee3ng \u0111\u00e0i", "302": "c\u00e1i n\u1ed3i", "303": "gi\u01b0\u1eddng", "304": "s\u00e0n nh\u00e0", "305": "bu\u1ed3ng", "306": "l\u00e1", "307": "c\u00e1c t\u00f2a nh\u00e0", "308": "t\u00fai", "309": "c\u1edd", "310": "rau", "311": "\u0111i\u1ec3m t\u00e2m", "312": "tr\u00e1i c\u00e2y", "313": "l\u00f2 vi s\u00f3ng", "314": "m\u00e0u xanh d\u01b0\u01a1ng", "315": "con chim", "316": "gian h\u00e0ng", "317": "m\u00e0u cam", "318": "h\u1ea3i \u00e2u", "319": "c\u00e0 v\u1ea1t", "320": "n\u1ebfn", "321": "b\u1ebfp", "322": "v\u00f2i n\u01b0\u1edbc", "323": "\u0111\u1ed3 ch\u01a1i", "324": "m\u00e1i ch\u00e8o", "325": "chu\u1ed1i", "326": "v\u00f2i hoa sen", "327": "thu\u1ed1c l\u00e1", "328": "th\u01b0 vi\u1ec7n", "329": "l\u00e1 c\u1edd", "330": "m\u00e0u s\u1eafc", "331": "con kh\u1ec9", "332": "\u0111\u1ea7u m\u00e1y", "333": "g\u1eady", "334": "v\u00e1n tr\u01b0\u1ee3t", "335": "ng\u00e2n h\u00e0ng", "336": "qu\u00e1n bar", "337": "\u1ea3nh ch\u1ee5p", "338": "nh\u1eefng b\u1ee9c \u1ea3nh", "339": "h\u1ed9p", "340": "dao", "341": "toa xe", "342": "h\u01b0\u01a1u cao c\u1ed5", "343": "xe l\u1eeda", "344": "b\u00e1nh hamburger", "345": "m\u00e0u x\u00e1m", "346": "\u0111\u01b0a \u0111\u00f3n", "347": "b\u00f2 \u0111\u1ef1c", "348": "c\u00e0 ph\u00ea", "349": "s\u00e1u", "350": "s\u1eefa", "351": "c\u1eebu", "352": "c\u00e0 r\u1ed1t" }, "image_text_hidden_size": 256, "initializer_factor": 1.0, "initializer_range": 0.02, "label2id": { "ba": 300, "ba lan": 176, "balo": 38, "bao b\u00ec": 276, "bia": 45, "bu\u1ed3ng": 305, "b\u00e0n ch\u1ea3i": 253, "b\u00e0n ch\u1ea3i \u0111\u00e1nh r\u0103ng.": 259, "b\u00e0n ph\u00edm": 262, "b\u00e1nh": 80, "b\u00e1nh hamburger": 344, "b\u00e1nh ng\u1ecdt": 221, "b\u00e1nh xe": 249, "b\u00e1t": 115, "b\u00e3i c\u1ecf": 263, "b\u00ean": 102, "b\u00ecnh": 273, "b\u00ecnh hoa": 288, "b\u00f2": 165, "b\u00f2 \u0111\u1ef1c": 347, "b\u00f4ng c\u1ea3i xanh": 268, "b\u00f4ng hoa": 182, "b\u00fai t\u00f3c": 275, "b\u0103ng gh\u1ebf": 121, "b\u1ea3ng": 166, "b\u1ea3ng hi\u1ec7u": 194, "b\u1ea3o t\u00e0ng": 51, "b\u1ea3y": 202, "b\u1ea7u tr\u1eddi": 31, "b\u1eafp ch\u00e2n": 151, "b\u1ebfn du thuy\u1ec1n": 200, "b\u1ebfn t\u00e0u": 197, "b\u1ebfp": 321, "b\u1ed1n": 142, "b\u1ed3n ti\u1ec3u": 143, "b\u1ed3n t\u1eafm": 96, "b\u1ed9 \u0111\u1ed3": 32, "b\u1edd bi\u1ec3n": 287, "b\u1ee9c m\u00e0n": 98, "b\u1ee9c tranh": 84, "b\u1ee9c t\u01b0\u1ee3ng": 60, "b\u1ee9c \u1ea3nh": 146, "b\u1eefa tr\u01b0a": 137, "b\u1eefa \u0103n": 282, "b\u1eefa \u0103n t\u1ed1i": 298, "ca n\u00f4": 177, "cam": 296, "chai": 272, "chim b\u1ed3 c\u00e2u": 123, "chim \u01b0ng": 190, "chi\u1ebfc \u00f4": 87, "chung c\u01b0": 9, "chu\u1ed1i": 325, "chu\u1ed3ng": 75, "chu\u1ed3ng tr\u1ea1i": 92, "chu\u1ed9t": 61, "ch\u00e9n \u0111\u0129a": 6, "ch\u00ecm xu\u1ed1ng": 163, "ch\u00edn": 89, "ch\u1ea3o": 187, "ch\u1eadu": 258, "con b\u00f2": 25, "con chim": 315, "con ch\u00f3": 144, "con c\u1eebu": 266, "con d\u00ea": 154, "con d\u1ed1c": 246, "con heo": 106, "con kh\u1ec9": 331, "con m\u00e8o": 118, "con ng\u1ef1a": 99, "con thoi": 172, "con thuy\u1ec1n": 167, "con voi": 245, "con v\u1eb9t": 67, "con v\u1ecbt": 129, "cupcake": 100, "c\u00e0 ph\u00ea": 348, "c\u00e0 r\u1ed1t": 352, "c\u00e0 v\u1ea1t": 319, "c\u00e1": 235, "c\u00e1c t\u00f2a nh\u00e0": 307, "c\u00e1i b\u00e0n": 12, "c\u00e1i gh\u1ebf": 5, "c\u00e1i k\u1ec7": 24, "c\u00e1i l\u1ec1u": 76, "c\u00e1i l\u1ecd": 124, "c\u00e1i m\u00e2m": 156, "c\u00e1i n\u0129a": 183, "c\u00e1i n\u1ed3i": 302, "c\u00e1i r\u1ed5": 265, "c\u00e1i th\u00eca": 0, "c\u00e1i t\u00fai": 64, "c\u00e1i v\u00ed": 191, "c\u00e2y": 211, "c\u00e2y g\u1eady": 104, "c\u00e2y k\u00e9o": 204, "c\u00e2y s\u00e0o": 196, "c\u00f4ng c\u1ee5": 136, "c\u00fan y\u00eau": 241, "c\u1ea7u": 217, "c\u1ecf": 180, "c\u1ecf kh\u00f4": 193, "c\u1ed7 m\u00e1y": 227, "c\u1edd": 309, "c\u1eebu": 351, "c\u1eeda": 83, "c\u1eeda h\u00e0ng": 11, "c\u1eeda ra v\u00e0o": 128, "c\u1eeda s\u1ed5": 184, "c\u1eeda ti\u1ec7m": 278, "dao": 340, "di\u1ec1u": 145, "donut": 226, "d\u00f2ng s\u00f4ng": 44, "d\u0129a": 43, "d\u0129a ": 185, "d\u0129a nh\u1ef1a": 110, "d\u1ea5u hi\u1ec7u": 18, "ga-ra": 86, "gh\u1ebf s\u00f4 pha": 188, "gia s\u00fac": 4, "gian h\u00e0ng": 316, "gi\u00e1 \u0111\u1ee1": 39, "gi\u01b0\u1eddng": 303, "gi\u1ea5y b\u1ea1c": 198, "gi\u1ecf": 29, "g\u00e0": 162, "g\u00e0 t\u00e2y": 1, "g\u0103ng tay": 101, "g\u01b0\u01a1ng": 117, "g\u1ea1ch": 203, "g\u1ea5u": 181, "g\u1ea5u tr\u00fac": 22, "g\u1ea7u m\u00fac": 179, "g\u1eady": 333, "hai": 280, "hay": 113, "hoa h\u1ed3ng": 109, "hoa qu\u1ea3": 81, "hotdog": 78, "h\u00e0ng ho\u00e1": 244, "h\u00e0ng h\u00f3a": 237, "h\u00e0nh lang": 261, "h\u00e0nh l\u00fd": 210, "h\u01b0\u01a1u cao c\u1ed5": 342, "h\u1ea3i \u00e2u": 318, "h\u1ed3": 158, "h\u1ed3 b\u01a1i": 125, "h\u1ed9p": 339, "h\u1ed9p s\u1ed1": 91, "h\u1ed9p \u0111\u1ef1ng": 291, "kho": 209, "khung": 286, "kh\u0103n": 248, "kh\u0103n t\u1eafm": 234, "k\u00ednh r\u00e2m": 133, "k\u00ednh \u0111eo": 54, "k\u00fd t\u00ean": 230, "laptop": 236, "lon": 14, "ly": 135, "l\u00e1": 306, "l\u00e1 c\u1edd": 329, "l\u00e1t c\u1eaft": 148, "l\u00e1t g\u1ea1ch": 95, "l\u00f2 s\u01b0\u1edfi": 256, "l\u00f2 vi s\u00f3ng": 313, "l\u0103n tr\u00f2n": 150, "l\u1ecd c\u1eafm hoa": 207, "l\u1ed1i \u0111i": 205, "l\u1ed3ng": 2, "l\u1edbp h\u1ecdc": 192, "m\u00e0n": 50, "m\u00e0u cam": 317, "m\u00e0u n\u00e2u": 131, "m\u00e0u s\u1eafc": 330, "m\u00e0u tr\u1eafng": 132, "m\u00e0u t\u00eda": 37, "m\u00e0u v\u00e0ng": 107, "m\u00e0u xanh d\u01b0\u01a1ng": 314, "m\u00e0u xanh l\u00e1": 247, "m\u00e0u x\u00e1m": 345, "m\u00e0u \u0111en": 147, "m\u00e0u \u0111\u1ecf": 85, "m\u00e1i ch\u00e8o": 324, "m\u00e1y bay": 47, "m\u00e1y bay tr\u1ef1c th\u0103ng": 169, "m\u00e1y s\u1ea5y kh\u00f4": 175, "m\u00e1y t\u00ednh": 69, "m\u00e1y vi t\u00ednh": 238, "m\u00e1y xay": 7, "m\u00e1y \u1ea3nh": 55, "m\u00e8o con": 223, "m\u00f3n tr\u00e1ng mi\u1ec7ng": 63, "m\u00f3n \u0103n": 122, "m\u00f3ng vu\u1ed1t": 250, "m\u0169": 299, "m\u0169 b\u1ea3o hi\u1ec3m": 178, "m\u0169 l\u01b0\u1ee1i trai": 120, "m\u01b0\u1eddi": 140, "m\u1eaft": 28, "m\u1eb7t": 269, "m\u1eb7t n\u1ea1": 295, "m\u1eb7t tr\u1eddi": 233, "m\u1ed9t": 52, "ngo\u00e0i": 3, "ngo\u00e0i tr\u1eddi": 157, "ng\u00e2n h\u00e0ng": 335, "ng\u0103n": 71, "ng\u0103n k\u00e9o": 208, "ng\u01b0\u1eddi gi\u1eef": 155, "ng\u1ef1a r\u1eb1n": 88, "ng\u1ef1a v\u1eb1n": 103, "nhi\u1ec1u c\u00e1i gh\u1ebf": 53, "nh\u00e0": 41, "nh\u00e0 t\u1eafm": 206, "nh\u00e0 v\u1ec7 sinh": 271, "nh\u00e0 \u1edf": 130, "nh\u1eefng b\u1ee9c \u1ea3nh": 338, "nh\u1eefng qu\u1ea3 cam": 119, "n\u00f3n": 49, "n\u00fai": 242, "n\u0103m": 274, "n\u01b0\u1edbc": 13, "n\u01b0\u1edbc ti\u1ec3u": 240, "n\u1ebfn": 320, "ph\u00f2ng": 243, "ph\u00f2ng b\u1ebfp": 212, "ph\u00f2ng ng\u1ee7": 27, "ph\u00f2ng t\u1eafm": 116, "ph\u00f4 mai": 239, "ph\u01b0\u01a1ng ti\u1ec7n giao th\u00f4ng": 65, "pizza": 225, "pug": 164, "qu\u00e1n bar": 336, "qu\u00e1n \u0103n": 82, "qu\u1ea3 b\u00f3ng": 186, "qu\u1ea3 cam": 66, "qu\u1ea3 cam ": 219, "qu\u1ea3 t\u00e1o": 231, "qu\u1ea7n short": 292, "qu\u1ea7n \u00e1o": 62, "qu\u1ea7y t\u00ednh ti\u1ec1n": 108, "rau": 310, "rau qu\u1ea3": 8, "rau x\u00e0 l\u00e1ch": 34, "r\u00e0o ch\u1eafn": 149, "r\u01b0\u1ee3u": 79, "sandwich": 59, "s\u00e0n nh\u00e0": 304, "s\u00e1u": 349, "s\u00e2n": 279, "s\u00e2n bay": 40, "s\u00e2n kh\u1ea5u": 19, "s\u00e2n v\u01b0\u1eddn": 15, "s\u00e2n v\u1eadn \u0111\u1ed9ng": 90, "s\u00f4 c\u00f4 la": 74, "s\u1eefa": 350, "theo d\u00f5i": 72, "thi\u1ebft b\u1ecb": 229, "thuy\u1ec1n bu\u1ed3m": 134, "thu\u1ed1c l\u00e1": 327, "th\u00e0nh ph\u1ea7n": 26, "th\u00e2n c\u00e2y": 289, "th\u00f9ng": 159, "th\u00f9ng ch\u1ee9a": 189, "th\u01b0 vi\u1ec7n": 328, "th\u1ecbt": 201, "toa xe": 341, "trang thi\u1ebft b\u1ecb": 93, "truy\u1ec1n h\u00ecnh": 277, "tr\u00e1i c\u00e2y": 312, "tr\u01b0\u1eddng h\u1ee3p": 105, "tr\u01b0\u1ee3t tuy\u1ebft": 170, "tr\u1ea1m": 17, "t\u00e0u": 251, "t\u00e0u ho\u1ea3": 215, "t\u00e0u h\u1ecfa": 138, "t\u00e1ch": 42, "t\u00e1m": 23, "t\u00e1o": 232, "t\u00f2a nh\u00e0": 168, "t\u00f2a th\u00e1p": 127, "t\u00fai": 308, "t\u01b0\u1eddng": 48, "t\u01b0\u1ee3ng \u0111\u00e0i": 301, "t\u1ea1p d\u1ec1": 153, "t\u1ee7 l\u1ea1nh": 33, "t\u1ee7 \u0111\u00e1": 126, "t\u1ee7 \u0111\u00f4ng": 111, "vali": 220, "v\u00e1n l\u01b0\u1edbt s\u00f3ng": 160, "v\u00e1n tr\u01b0\u1ee3t": 334, "v\u00e1n tr\u01b0\u1ee3t tuy\u1ebft": 224, "v\u00f2i": 70, "v\u00f2i hoa sen": 326, "v\u00f2i n\u01b0\u1edbc": 322, "v\u00f4 tuy\u1ebfn": 46, "v\u0103n ph\u00f2ng": 222, "v\u01b0\u1eddn b\u00e1ch th\u00fa": 161, "v\u1ea1ch k\u1ebb \u0111\u01b0\u1eddng": 152, "v\u1ebd tranh l\u00ean t\u01b0\u1eddng": 255, "v\u1ee3t": 228, "xa l\u1ed9": 77, "xe": 290, "xe bu\u00fdt": 285, "xe c\u1ed9": 30, "xe l\u0103n": 21, "xe l\u1eeda": 343, "xe m\u00e1y": 97, "xe tay ga": 112, "xe t\u1ea3i": 260, "xe \u00f4 t\u00f4": 284, "xe \u0111i\u1ec7n": 267, "xe \u0111i\u1ec7n ng\u1ea7m": 35, "xe \u0111\u1ea1p": 264, "xe \u0111\u1ea9y": 139, "\u00e1o ba l\u1ed7": 57, "\u00e1o cho\u00e0ng": 16, "\u00e1o kho\u00e1c": 214, "\u00e1o s\u01a1 mi": 283, "\u00e1o vest": 294, "\u00f4 c\u1eeda": 56, "\u0111i v\u0103ng": 199, "\u0111i\u00eau kh\u1eafc": 10, "\u0111i\u1ec3m t\u00e2m": 311, "\u0111i\u1ec7n tho\u1ea1i": 281, "\u0111o\u1ea1n phim gi\u1edbi thi\u1ec7u": 141, "\u0111\u00e8n \u0111\u1ec3 b\u00e0n": 36, "\u0111\u0129a": 114, "\u0111\u0129a n\u00e9m": 270, "\u0111\u0129a \u0103n": 173, "\u0111\u01b0a \u0111\u00f3n": 346, "\u0111\u01b0\u1eddng": 257, "\u0111\u01b0\u1eddng b\u1ed9": 252, "\u0111\u01b0\u1eddng ph\u1ed1": 94, "\u0111\u01b0\u1eddng ray": 171, "\u0111\u01b0\u1eddng s\u1eaft": 293, "\u0111\u01b0\u1eddng \u0111i b\u1ed9": 213, "\u0111\u01b0\u1eddng \u0111ua": 58, "\u0111\u01b0\u1eddng \u1ed1ng": 195, "\u0111\u1ea1i d\u01b0\u01a1ng": 73, "\u0111\u1ea7m": 174, "\u0111\u1ea7u m\u00e1y": 332, "\u0111\u1ed3 ch\u01a1i": 323, "\u0111\u1ed3 n\u1ed9i th\u1ea5t": 68, "\u0111\u1ed3 u\u1ed1ng": 297, "\u0111\u1ed3i": 254, "\u0111\u1ed3ng h\u1ed3": 20, "\u0111\u1ed3ng ph\u1ee5c": 216, "\u0111\u1ed9ng c\u01a1": 218, "\u1ea3nh ch\u1ee5p": 337 }, "label_smoothing": 0.0, "logit_scale_init_value": 2.6592, "model_type": "blip", "projection_dim": 512, "text_config": { "initializer_factor": 1.0, "model_type": "blip_text_model", "num_attention_heads": 12 }, "torch_dtype": "float32", "transformers_version": "4.39.3", "use_cache": false, "vision_config": { "dropout": 0.0, "initializer_factor": 1.0, "initializer_range": 0.02, "model_type": "blip_vision_model", "num_channels": 3 } }