llm-numbers / bpe_tokenizer.json
ChavyvAkvar's picture
Upload folder using huggingface_hub
7c6fd76 verified
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[PAD]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[EOS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "BPE",
"dropout": null,
"unk_token": "[UNK]",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"ignore_merges": false,
"vocab": {
"[UNK]": 0,
"[PAD]": 1,
"[EOS]": 2,
"a": 3,
"d": 4,
"e": 5,
"f": 6,
"g": 7,
"h": 8,
"i": 9,
"l": 10,
"n": 11,
"o": 12,
"r": 13,
"s": 14,
"t": 15,
"u": 16,
"v": 17,
"w": 18,
"x": 19,
"y": 20,
"nd": 21,
"and": 22,
"ty": 23,
"th": 24,
"ou": 25,
"re": 26,
"sand": 27,
"thou": 28,
"thousand": 29,
"hu": 30,
"ndre": 31,
"hundre": 32,
"hundred": 33,
"en": 34,
"ne": 35,
"ev": 36,
"even": 37,
"ei": 38,
"gh": 39,
"eigh": 40,
"tw": 41,
"seven": 42,
"fi": 43,
"ix": 44,
"six": 45,
"ine": 46,
"nine": 47,
"rty": 48,
"eight": 49,
"ve": 50,
"fou": 51,
"four": 52,
"two": 53,
"one": 54,
"five": 55,
"thre": 56,
"three": 57,
"thi": 58,
"fif": 59,
"sixty": 60,
"fo": 61,
"forty": 62,
"enty": 63,
"twenty": 64,
"seventy": 65,
"thirty": 66,
"fifty": 67,
"eighty": 68,
"ninety": 69,
"een": 70,
"teen": 71,
"el": 72,
"twel": 73,
"twelve": 74,
"rteen": 75,
"thirteen": 76,
"fifteen": 77,
"nineteen": 78,
"seventeen": 79,
"eleven": 80,
"fourteen": 81,
"eighteen": 82,
"ten": 83,
"sixteen": 84
},
"merges": [
[
"n",
"d"
],
[
"a",
"nd"
],
[
"t",
"y"
],
[
"t",
"h"
],
[
"o",
"u"
],
[
"r",
"e"
],
[
"s",
"and"
],
[
"th",
"ou"
],
[
"thou",
"sand"
],
[
"h",
"u"
],
[
"nd",
"re"
],
[
"hu",
"ndre"
],
[
"hundre",
"d"
],
[
"e",
"n"
],
[
"n",
"e"
],
[
"e",
"v"
],
[
"ev",
"en"
],
[
"e",
"i"
],
[
"g",
"h"
],
[
"ei",
"gh"
],
[
"t",
"w"
],
[
"s",
"even"
],
[
"f",
"i"
],
[
"i",
"x"
],
[
"s",
"ix"
],
[
"i",
"ne"
],
[
"n",
"ine"
],
[
"r",
"ty"
],
[
"eigh",
"t"
],
[
"v",
"e"
],
[
"f",
"ou"
],
[
"fou",
"r"
],
[
"tw",
"o"
],
[
"o",
"ne"
],
[
"fi",
"ve"
],
[
"th",
"re"
],
[
"thre",
"e"
],
[
"th",
"i"
],
[
"fi",
"f"
],
[
"six",
"ty"
],
[
"f",
"o"
],
[
"fo",
"rty"
],
[
"en",
"ty"
],
[
"tw",
"enty"
],
[
"seven",
"ty"
],
[
"thi",
"rty"
],
[
"fif",
"ty"
],
[
"eigh",
"ty"
],
[
"nine",
"ty"
],
[
"e",
"en"
],
[
"t",
"een"
],
[
"e",
"l"
],
[
"tw",
"el"
],
[
"twel",
"ve"
],
[
"r",
"teen"
],
[
"thi",
"rteen"
],
[
"fif",
"teen"
],
[
"nine",
"teen"
],
[
"seven",
"teen"
],
[
"el",
"even"
],
[
"four",
"teen"
],
[
"eight",
"een"
],
[
"t",
"en"
],
[
"six",
"teen"
]
]
}
}