tortoise_tts_dutch / dutch_vl.json
arrivederci19's picture
added tokenizer
b8d92b6
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[STOP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[SPACE]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "Whitespace"
},
"post_processor": null,
"decoder": null,
"model": {
"type": "BPE",
"language": "nl",
"dropout": null,
"unk_token": "[UNK]",
"continuing_subword_prefix": null,
"end_of_word_suffix": null,
"fuse_unk": false,
"byte_fallback": false,
"vocab": {
"[STOP]": 0,
"[UNK]": 1,
"[SPACE]": 2,
"!": 3,
"\"": 4,
"$": 5,
"&": 6,
"'": 7,
"(": 8,
")": 9,
"*": 10,
",": 11,
"-": 12,
".": 13,
"/": 14,
"0": 15,
"1": 16,
"2": 17,
"3": 18,
"4": 19,
"5": 20,
"6": 21,
"7": 22,
"8": 23,
"9": 24,
":": 25,
";": 26,
"<": 27,
"=": 28,
">": 29,
"?": 30,
"A": 31,
"B": 32,
"C": 33,
"D": 34,
"E": 35,
"F": 36,
"G": 37,
"H": 38,
"I": 39,
"J": 40,
"K": 41,
"L": 42,
"M": 43,
"N": 44,
"O": 45,
"P": 46,
"Q": 47,
"R": 48,
"S": 49,
"T": 50,
"U": 51,
"V": 52,
"W": 53,
"X": 54,
"Y": 55,
"Z": 56,
"a": 57,
"b": 58,
"c": 59,
"d": 60,
"e": 61,
"f": 62,
"g": 63,
"h": 64,
"i": 65,
"j": 66,
"k": 67,
"l": 68,
"m": 69,
"n": 70,
"o": 71,
"p": 72,
"q": 73,
"r": 74,
"s": 75,
"t": 76,
"u": 77,
"v": 78,
"w": 79,
"x": 80,
"y": 81,
"z": 82,
"©": 83,
"«": 84,
"°": 85,
"»": 86,
"¿": 87,
"Ó": 88,
"Ö": 89,
"Ü": 90,
"ß": 91,
"à": 92,
"á": 93,
"ä": 94,
"ç": 95,
"è": 96,
"é": 97,
"ê": 98,
"ë": 99,
"í": 100,
"î": 101,
"ï": 102,
"ñ": 103,
"ò": 104,
"ó": 105,
"ô": 106,
"ö": 107,
"ú": 108,
"û": 109,
"ü": 110,
"č": 111,
"ę": 112,
"ł": 113,
"œ": 114,
"ř": 115,
"ś": 116,
"ƒ": 117,
"α": 118,
"π": 119,
"–": 120,
"‘": 121,
"’": 122,
"“": 123,
"”": 124,
"•": 125,
"…": 126,
"Ω": 127,
"ℵ": 128,
"en": 129,
"er": 130,
"ij": 131,
"de": 132,
"et": 133,
"aa": 134,
"an": 135,
"el": 136,
"in": 137,
"st": 138,
"ch": 139,
"aar": 140,
"oo": 141,
"at": 142,
"een": 143,
"ge": 144,
"on": 145,
"ie": 146,
"te": 147,
"het": 148,
"al": 149,
"ver": 150,
"op": 151,
"ijn": 152,
"van": 153,
"ze": 154,
"gen": 155,
"oe": 156,
"wa": 157,
"ee": 158,
"it": 159,
"den": 160,
"oor": 161,
"hij": 162,
"dat": 163,
"cht": 164,
"der": 165,
"is": 166,
"iet": 167,
"zijn": 168,
"he": 169,
"om": 170,
"be": 171,
"aan": 172,
"je": 173,
"ou": 174,
"ken": 175,
"niet": 176,
"ik": 177,
"ar": 178,
"eer": 179,
"or": 180,
"sch": 181,
"was": 182,
"le": 183,
"die": 184,
"met": 185,
"ad": 186,
"ijk": 187,
"zi": 188,
"ing": 189,
"re": 190,
"ur": 191,
"uit": 192,
"we": 193,
"had": 194,
"il": 195,
"to": 196,
"ig": 197,
"ven": 198,
"voor": 199,
"zei": 200,
"ol": 201,
"no": 202,
"acht": 203,
"am": 204,
"maar": 205,
"ten": 206,
"als": 207,
"naar": 208,
"us": 209,
"ien": 210,
"gr": 211,
"hem": 212,
"gel": 213,
"un": 214,
"af": 215,
"vr": 216,
"over": 217,
"id": 218,
"haar": 219,
"of": 220,
"zo": 221,
"ste": 222,
"and": 223,
"Hij": 224,
"men": 225,
"sp": 226,
"dr": 227,
"la": 228,
"waar": 229,
"arr": 230,
"Harr": 231,
"lijk": 232,
"Harry": 233,
"zich": 234,
"ter": 235,
"ond": 236,
".’": 237,
"aal": 238,
"ui": 239,
"wer": 240,
"ier": 241,
"nog": 242,
"door": 243,
"Ik": 244,
"dan": 245,
"ro": 246,
"ook": 247,
"aat": 248,
"heb": 249,
"ben": 250,
"bl": 251,
"ag": 252,
"bij": 253,
"ak": 254
},
"merges": [
"e n",
"e r",
"i j",
"d e",
"e t",
"a a",
"a n",
"e l",
"i n",
"s t",
"c h",
"aa r",
"o o",
"a t",
"e en",
"g e",
"o n",
"i e",
"t e",
"h et",
"a l",
"v er",
"o p",
"ij n",
"v an",
"z e",
"g en",
"o e",
"w a",
"e e",
"i t",
"d en",
"oo r",
"h ij",
"d at",
"ch t",
"d er",
"i s",
"i et",
"z ijn",
"h e",
"o m",
"b e",
"aa n",
"j e",
"o u",
"k en",
"n iet",
"i k",
"a r",
"e er",
"o r",
"s ch",
"wa s",
"l e",
"d ie",
"m et",
"a d",
"ij k",
"z i",
"in g",
"r e",
"u r",
"u it",
"w e",
"h ad",
"i l",
"t o",
"i g",
"v en",
"v oor",
"ze i",
"o l",
"n o",
"a cht",
"a m",
"m aar",
"t en",
"al s",
"n aar",
"u s",
"i en",
"g r",
"he m",
"g el",
"u n",
"a f",
"v r",
"o ver",
"i d",
"h aar",
"o f",
"z o",
"st e",
"an d",
"H ij",
"m en",
"s p",
"d r",
"l a",
"w aar",
"ar r",
"H arr",
"l ijk",
"Harr y",
"zi ch",
"t er",
"on d",
". ’",
"aa l",
"u i",
"w er",
"i er",
"no g",
"d oor",
"I k",
"d an",
"r o",
"oo k",
"aa t",
"he b",
"b en",
"b l",
"a g",
"b ij",
"a k"
]
}
}