microchar_moe / tokenizer.json
Corianas's picture
Upload folder using huggingface_hub
cb6a088 verified
raw
history blame contribute delete
No virus
9.04 kB
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "<s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "</s>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Sequence",
"normalizers": [
{
"type": "Prepend",
"prepend": "▁"
},
{
"type": "Replace",
"pattern": {
"String": " "
},
"content": "▁"
}
]
},
"pre_tokenizer": null,
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "<s>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "<s>",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "<s>",
"type_id": 1
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"<s>": {
"id": "<s>",
"ids": [
1
],
"tokens": [
"<s>"
]
}
}
},
"decoder": {
"type": "Sequence",
"decoders": [
{
"type": "Replace",
"pattern": {
"String": "▁"
},
"content": " "
},
{
"type": "ByteFallback"
},
{
"type": "Fuse"
},
{
"type": "Strip",
"content": " ",
"start": 1,
"stop": 0
}
]
},
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "<unk>",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": true,
"byte_fallback": true,
"vocab": {
"<unk>": 0,
"<s>": 1,
"</s>": 2,
"<0x00>": 3,
"<0x01>": 4,
"<0x02>": 5,
"<0x03>": 6,
"<0x04>": 7,
"<0x05>": 8,
"<0x06>": 9,
"<0x07>": 10,
"<0x08>": 11,
"<0x09>": 12,
"<0x0A>": 13,
"<0x0B>": 14,
"<0x0C>": 15,
"<0x0D>": 16,
"<0x0E>": 17,
"<0x0F>": 18,
"<0x10>": 19,
"<0x11>": 20,
"<0x12>": 21,
"<0x13>": 22,
"<0x14>": 23,
"<0x15>": 24,
"<0x16>": 25,
"<0x17>": 26,
"<0x18>": 27,
"<0x19>": 28,
"<0x1A>": 29,
"<0x1B>": 30,
"<0x1C>": 31,
"<0x1D>": 32,
"<0x1E>": 33,
"<0x1F>": 34,
"<0x20>": 35,
"<0x21>": 36,
"<0x22>": 37,
"<0x23>": 38,
"<0x24>": 39,
"<0x25>": 40,
"<0x26>": 41,
"<0x27>": 42,
"<0x28>": 43,
"<0x29>": 44,
"<0x2A>": 45,
"<0x2B>": 46,
"<0x2C>": 47,
"<0x2D>": 48,
"<0x2E>": 49,
"<0x2F>": 50,
"<0x30>": 51,
"<0x31>": 52,
"<0x32>": 53,
"<0x33>": 54,
"<0x34>": 55,
"<0x35>": 56,
"<0x36>": 57,
"<0x37>": 58,
"<0x38>": 59,
"<0x39>": 60,
"<0x3A>": 61,
"<0x3B>": 62,
"<0x3C>": 63,
"<0x3D>": 64,
"<0x3E>": 65,
"<0x3F>": 66,
"<0x40>": 67,
"<0x41>": 68,
"<0x42>": 69,
"<0x43>": 70,
"<0x44>": 71,
"<0x45>": 72,
"<0x46>": 73,
"<0x47>": 74,
"<0x48>": 75,
"<0x49>": 76,
"<0x4A>": 77,
"<0x4B>": 78,
"<0x4C>": 79,
"<0x4D>": 80,
"<0x4E>": 81,
"<0x4F>": 82,
"<0x50>": 83,
"<0x51>": 84,
"<0x52>": 85,
"<0x53>": 86,
"<0x54>": 87,
"<0x55>": 88,
"<0x56>": 89,
"<0x57>": 90,
"<0x58>": 91,
"<0x59>": 92,
"<0x5A>": 93,
"<0x5B>": 94,
"<0x5C>": 95,
"<0x5D>": 96,
"<0x5E>": 97,
"<0x5F>": 98,
"<0x60>": 99,
"<0x61>": 100,
"<0x62>": 101,
"<0x63>": 102,
"<0x64>": 103,
"<0x65>": 104,
"<0x66>": 105,
"<0x67>": 106,
"<0x68>": 107,
"<0x69>": 108,
"<0x6A>": 109,
"<0x6B>": 110,
"<0x6C>": 111,
"<0x6D>": 112,
"<0x6E>": 113,
"<0x6F>": 114,
"<0x70>": 115,
"<0x71>": 116,
"<0x72>": 117,
"<0x73>": 118,
"<0x74>": 119,
"<0x75>": 120,
"<0x76>": 121,
"<0x77>": 122,
"<0x78>": 123,
"<0x79>": 124,
"<0x7A>": 125,
"<0x7B>": 126,
"<0x7C>": 127,
"<0x7D>": 128,
"<0x7E>": 129,
"<0x7F>": 130,
"<0x80>": 131,
"<0x81>": 132,
"<0x82>": 133,
"<0x83>": 134,
"<0x84>": 135,
"<0x85>": 136,
"<0x86>": 137,
"<0x87>": 138,
"<0x88>": 139,
"<0x89>": 140,
"<0x8A>": 141,
"<0x8B>": 142,
"<0x8C>": 143,
"<0x8D>": 144,
"<0x8E>": 145,
"<0x8F>": 146,
"<0x90>": 147,
"<0x91>": 148,
"<0x92>": 149,
"<0x93>": 150,
"<0x94>": 151,
"<0x95>": 152,
"<0x96>": 153,
"<0x97>": 154,
"<0x98>": 155,
"<0x99>": 156,
"<0x9A>": 157,
"<0x9B>": 158,
"<0x9C>": 159,
"<0x9D>": 160,
"<0x9E>": 161,
"<0x9F>": 162,
"<0xA0>": 163,
"<0xA1>": 164,
"<0xA2>": 165,
"<0xA3>": 166,
"<0xA4>": 167,
"<0xA5>": 168,
"<0xA6>": 169,
"<0xA7>": 170,
"<0xA8>": 171,
"<0xA9>": 172,
"<0xAA>": 173,
"<0xAB>": 174,
"<0xAC>": 175,
"<0xAD>": 176,
"<0xAE>": 177,
"<0xAF>": 178,
"<0xB0>": 179,
"<0xB1>": 180,
"<0xB2>": 181,
"<0xB3>": 182,
"<0xB4>": 183,
"<0xB5>": 184,
"<0xB6>": 185,
"<0xB7>": 186,
"<0xB8>": 187,
"<0xB9>": 188,
"<0xBA>": 189,
"<0xBB>": 190,
"<0xBC>": 191,
"<0xBD>": 192,
"<0xBE>": 193,
"<0xBF>": 194,
"<0xC0>": 195,
"<0xC1>": 196,
"<0xC2>": 197,
"<0xC3>": 198,
"<0xC4>": 199,
"<0xC5>": 200,
"<0xC6>": 201,
"<0xC7>": 202,
"<0xC8>": 203,
"<0xC9>": 204,
"<0xCA>": 205,
"<0xCB>": 206,
"<0xCC>": 207,
"<0xCD>": 208,
"<0xCE>": 209,
"<0xCF>": 210,
"<0xD0>": 211,
"<0xD1>": 212,
"<0xD2>": 213,
"<0xD3>": 214,
"<0xD4>": 215,
"<0xD5>": 216,
"<0xD6>": 217,
"<0xD7>": 218,
"<0xD8>": 219,
"<0xD9>": 220,
"<0xDA>": 221,
"<0xDB>": 222,
"<0xDC>": 223,
"<0xDD>": 224,
"<0xDE>": 225,
"<0xDF>": 226,
"<0xE0>": 227,
"<0xE1>": 228,
"<0xE2>": 229,
"<0xE3>": 230,
"<0xE4>": 231,
"<0xE5>": 232,
"<0xE6>": 233,
"<0xE7>": 234,
"<0xE8>": 235,
"<0xE9>": 236,
"<0xEA>": 237,
"<0xEB>": 238,
"<0xEC>": 239,
"<0xED>": 240,
"<0xEE>": 241,
"<0xEF>": 242,
"<0xF0>": 243,
"<0xF1>": 244,
"<0xF2>": 245,
"<0xF3>": 246,
"<0xF4>": 247,
"<0xF5>": 248,
"<0xF6>": 249,
"<0xF7>": 250,
"<0xF8>": 251,
"<0xF9>": 252,
"<0xFA>": 253,
"<0xFB>": 254,
"<0xFC>": 255,
"<0xFD>": 256,
"<0xFE>": 257,
"<0xFF>": 258,
"▁": 259,
"e": 260,
"t": 261,
"a": 262,
"o": 263,
"h": 264,
"n": 265,
"s": 266,
"i": 267,
"r": 268,
"d": 269,
"l": 270,
"u": 271,
"w": 272,
"m": 273,
"↨": 274,
"g": 275,
"c": 276,
"f": 277,
"y": 278,
".": 279,
"p": 280,
",": 281,
"b": 282,
"\r": 283,
"k": 284,
"v": 285,
"\"": 286,
"'": 287,
"j": 288,
"x": 289,
"z": 290,
"q": 291,
"§": 292,
"?": 293,
"-": 294,
"!": 295,
":": 296,
"1": 297,
"0": 298,
"2": 299,
"5": 300,
"3": 301,
";": 302,
"4": 303,
"9": 304,
"8": 305,
"$": 306,
"6": 307,
"7": 308,
"+": 309,
"=": 310,
")": 311,
"(": 312,
"&": 313,
"/": 314,
"%": 315,
"^": 316,
"*": 317,
"`": 318,
"_": 319,
"]": 320,
"<": 321,
"[": 322,
"\\": 323,
">": 324,
"{": 325,
"}": 326,
"|": 327,
"¼": 328,
"½": 329,
"π": 330,
"←": 331,
"↑": 332,
"→": 333,
"↓": 334,
"▲": 335,
"►": 336,
"▼": 337,
"◄": 338,
"♪": 339,
"♫": 340
},
"merges": []
}
}