add tokenizer
Browse files- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"д": 0, "đ": 1, "ō": 2, "â": 3, "ת": 4, "蝦": 5, "‘": 6, "ة": 7, "”": 8, "J": 9, "≪": 10, "―": 11, "ਮ": 12, "ө": 13, "ð": 14, "î": 15, "真": 16, "н": 17, "Î": 18, "背": 19, "ț": 20, "ź": 21, "…": 22, "ю": 23, "ä": 24, "网": 25, "л": 26, "ö": 27, "p": 28, "r": 29, "ł": 30, "°": 31, "鮓": 32, "Ϙ": 33, "е": 34, "ồ": 35, "Ó": 36, "¡": 37, "ú": 38, "В": 39, "肌": 40, "n": 41, "ā": 42, "K": 43, "с": 44, "}": 45, "ı": 46, "ñ": 47, "山": 48, "ő": 49, "´": 50, "À": 51, "ʻ": 52, "戌": 53, "т": 54, "ذ": 55, "肋": 56, "ਸ": 57, "ý": 58, "מ": 59, "С": 60, "é": 61, "ó": 62, "L": 63, "›": 64, "ṁ": 65, "®": 66, "~": 67, "à": 68, "ਨ": 69, "ү": 70, "ਤ": 71, "y": 72, "ヒ": 73, "ô": 74, "&": 75, "Е": 76, "`": 77, "ы": 78, "ì": 79, "„": 80, "ч": 81, "j": 82, "ש": 83, "¿": 84, "Ú": 85, "ğ": 86, "ù": 87, "‐": 88, "ֹ": 89, "נ": 90, "к": 91, "B": 92, "箱": 93, "م": 94, "ć": 95, "ṇ": 96, "q": 97, "{": 98, "ё": 99, "罓": 100, "ª": 101, "ş": 102, "ひ": 103, "Š": 104, "Þ": 105, "ï": 106, "р": 107, "申": 108, "ü": 109, "ṃ": 110, "ゴ": 111, "本": 112, "ا": 113, "Ò": 114, "ּ": 115, "T": 116, "Ö": 117, "ê": 118, "í": 119, "Q": 120, "g": 121, "e": 122, "á": 123, "Б": 124, "Ä": 125, "H": 126, "‑": 127, "ل": 128, "Ō": 129, "ū": 130, "罒": 131, "š": 132, ",": 133, "́": 134, "Г": 135, ";": 136, "'": 137, "А": 138, "ֵ": 139, "|": 237, "D": 141, "É": 142, "ੰ": 143, "ب": 144, "ラ": 145, "ִ": 146, "比": 147, "_": 148, "Ø": 149, "f": 150, "я": 151, "ְ": 152, "ם": 153, "Y": 154, "و": 155, "I": 156, "k": 157, "→": 158, "R": 159, "o": 160, "Ł": 161, ".": 162, "·": 163, "п": 164, "Ñ": 165, "Œ": 166, "и": 167, "W": 168, "ø": 169, "h": 170, "ə": 171, "ر": 172, "כ": 173, "A": 174, "Ż": 175, "ק": 176, "י": 177, "ě": 178, "消": 179, "ਘ": 180, "å": 181, "-": 182, "X": 183, "l": 184, "\"": 185, "—": 186, "æ": 187, "日": 188, "m": 189, "ʿ": 190, "ń": 191, "周": 192, "ç": 193, "ę": 194, "v": 195, "≫": 196, "a": 197, "о": 198, "S": 199, "г": 200, "Æ": 201, "E": 202, "良": 203, "生": 204, "F": 205, "毵": 206, "=": 207, "Z": 208, "Ś": 209, "ב": 210, "t": 211, "й": 212, "ß": 213, "ш": 214, "ه": 215, "!": 216, "Á": 217, ":": 218, "ら": 219, "د": 220, "N": 221, "の": 222, "Ш": 223, "Í": 224, "ь": 225, "в": 226, "ś": 227, "Ð": 228, "w": 229, "õ": 230, "U": 231, "ミ": 232, "口": 233, "O": 234, "$": 235, "ː": 236, "ר": 238, "fi": 239, "ס": 240, "б": 241, "«": 242, "夷": 243, "s": 244, "Ч": 245, "d": 246, "»": 247, "ਆ": 248, "Č": 249, "а": 250, "ă": 251, "G": 252, "": 253, "ي": 254, "ム": 255, "㓁": 256, "C": 257, "x": 258, "ʷ": 259, "V": 260, "č": 261, "M": 262, "–": 263, "ŏ": 264, "z": 265, "u": 266, "Ş": 267, "ș": 268, "c": 269, "İ": 270, "ž": 271, "鮨": 272, "“": 273, "ਾ": 274, "ָ": 275, "ř": 276, "’": 277, "‹": 278, "œ": 279, "ī": 280, "P": 281, "ė": 282, "b": 283, "?": 284, "¨": 285, "ʽ": 286, "ה": 287, "û": 288, "し": 289, "ò": 290, "Ž": 291, "ו": 292, "ë": 293, "ਿ": 294, "‧": 295, "ל": 296, "i": 297, "ã": 298, "[UNK]": 298, "[PAD]": 299}
|