yzimmermann
/

REGEX-1B

Inference Endpoints

Model card Files Files and versions Community

REGEX-1B / tokenizer.json

yzimmermann's picture

Upload tokenizer

465e89b verified about 1 month ago

history blame contribute delete

4.21 kB

	{
	"version": "1.0",
	"truncation": null,
	"padding": null,
	"added_tokens": [
	{
	"id": 0,
	"content": "[BOS]",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 1,
	"content": "[EOS]",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 2,
	"content": "[PAD]",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 3,
	"content": "[UNK]",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 4,
	"content": "[MASK]",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 5,
	"content": "[CLS]",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 6,
	"content": "[SEP]",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	}
	],
	"normalizer": null,
	"pre_tokenizer": {
	"type": "Split",
	"pattern": {
	"Regex": "(\\[[^\\]]+]\|Br?\|Cl?\|N\|O\|S\|P\|F\|I\|b\|c\|n\|o\|s\|p\|\\(\|\\)\|\\.\|=\|-\|\\+\|\\\\\|\\/\|:\|~\|@\|\\?\|>>?\|\\*\|\\$\|\\%[0-9]{2}\|[0-9])"
	},
	"behavior": "Isolated",
	"invert": false
	},
	"post_processor": null,
	"decoder": null,
	"model": {
	"type": "WordLevel",
	"vocab": {
	"[BOS]": 0,
	"[EOS]": 1,
	"[PAD]": 2,
	"[UNK]": 3,
	"[MASK]": 4,
	"[CLS]": 5,
	"[SEP]": 6,
	"C": 7,
	"c": 8,
	"(": 9,
	")": 10,
	"O": 11,
	"1": 12,
	"2": 13,
	"=": 14,
	"N": 15,
	"3": 16,
	"n": 17,
	"[C@H]": 18,
	"[C@@H]": 19,
	"4": 20,
	"F": 21,
	"[NH+]": 22,
	"S": 23,
	"o": 24,
	"Cl": 25,
	"s": 26,
	"[nH]": 27,
	"5": 28,
	"[NH2+]": 29,
	"#": 30,
	"/": 31,
	"Br": 32,
	"[C@@]": 33,
	"[C@]": 34,
	"[O-]": 35,
	"\\": 36,
	"[nH+]": 37,
	"[NH3+]": 38,
	"[n-]": 39,
	"6": 40,
	"I": 41,
	"[N-]": 42,
	"-": 43,
	"7": 44,
	"[H]": 45,
	"[Si]": 46,
	"P": 47,
	"[n+]": 48,
	"[S-]": 49,
	"[S@]": 50,
	"[S@@]": 51,
	"[N+]": 52,
	"8": 53,
	"B": 54,
	"[CH]": 55,
	"[C]": 56,
	"9": 57,
	"[P@@]": 58,
	"[P@]": 59,
	"[S+]": 60,
	"[N@@+]": 61,
	"[N@+]": 62,
	"[CH2]": 63,
	"[O]": 64,
	"[s+]": 65,
	"[Sn]": 66,
	"[P+]": 67,
	"[B-]": 68,
	"[S@@+]": 69,
	"[S@+]": 70,
	"p": 71,
	"[N]": 72,
	"%10": 73,
	"[C+]": 74,
	"[o+]": 75,
	"%11": 76,
	"[N@]": 77,
	"[P@@H]": 78,
	"[n@]": 79,
	"[C-]": 80,
	"[c+]": 81,
	"[IH2]": 82,
	"%13": 83,
	"[Si@@]": 84,
	"%12": 85,
	"[Si@]": 86,
	"[N@@]": 87,
	"[BH3-]": 88,
	"[P@H]": 89,
	"[CH-]": 90,
	"[Sn@]": 91,
	"[s@@]": 92,
	"[s@]": 93,
	"[P@+]": 94,
	"[P@@+]": 95,
	"[Sn@@]": 96,
	"[c-]": 97,
	"[17O]": 98,
	"[BH-]": 99,
	"[SnH4+2]": 100,
	"[B@-]": 101,
	"[B@@-]": 102,
	"[cH-]": 103,
	"[O+]": 104,
	"[SnH2+]": 105,
	"[SnH]": 106,
	"%14": 107,
	"[Sn+2]": 108,
	"[I+]": 109,
	"[P@@H+]": 110,
	"%15": 111,
	"%16": 112,
	"%18": 113,
	"[Br+]": 114,
	"[NH]": 115,
	"[Sn+]": 116,
	"[n@@]": 117,
	"%17": 118,
	"%19": 119,
	"%20": 120,
	"%21": 121,
	"%22": 122,
	"[18OH]": 123,
	"[BH2-]": 124,
	"[S@@-]": 125,
	"[S@@H]": 126,
	"[Sn+3]": 127,
	"[SnH2]": 128,
	"[SnH6+3]": 129,
	"[pH]": 130,
	"[S@H]": 131,
	"[SH3]": 132,
	"[SiH2]": 133,
	"[SiH3]": 134,
	"[Sn-]": 135,
	"[p+]": 136
	},
	"unk_token": "[UNK]"
	}
	}