slplab's picture
add tokenizer
ef9de15
raw
history blame
1.57 kB
{
"-": 1,
"0": 2,
"1": 3,
"2": 4,
"3": 5,
"4": 6,
"5": 7,
"6": 8,
"7": 9,
"8": 10,
"9": 11,
"[PAD]": 121,
"[UNK]": 120,
"a": 12,
"c": 13,
"f": 14,
"h": 15,
"o": 16,
"p": 17,
"u": 18,
"|": 0,
"々": 19,
"〇": 20,
"ぁ": 21,
"あ": 22,
"ぃ": 23,
"い": 24,
"ぅ": 25,
"う": 26,
"ぇ": 27,
"え": 28,
"ぉ": 29,
"お": 30,
"か": 31,
"が": 32,
"き": 33,
"ぎ": 34,
"く": 35,
"ぐ": 36,
"け": 37,
"げ": 38,
"こ": 39,
"ご": 40,
"さ": 41,
"ざ": 42,
"し": 43,
"じ": 44,
"す": 45,
"ず": 46,
"せ": 47,
"ぜ": 48,
"そ": 49,
"ぞ": 50,
"た": 51,
"だ": 52,
"ち": 53,
"ぢ": 54,
"っ": 55,
"つ": 56,
"づ": 57,
"て": 58,
"で": 59,
"と": 60,
"ど": 61,
"な": 62,
"に": 63,
"ぬ": 64,
"ね": 65,
"の": 66,
"は": 67,
"ば": 68,
"ぱ": 69,
"ひ": 70,
"び": 71,
"ぴ": 72,
"ふ": 73,
"ぶ": 74,
"ぷ": 75,
"へ": 76,
"べ": 77,
"ぺ": 78,
"ほ": 79,
"ぼ": 80,
"ぽ": 81,
"ま": 82,
"み": 83,
"む": 84,
"め": 85,
"も": 86,
"ゃ": 87,
"や": 88,
"ゅ": 89,
"ゆ": 90,
"ょ": 91,
"よ": 92,
"ら": 93,
"り": 94,
"る": 95,
"れ": 96,
"ろ": 97,
"わ": 98,
"を": 99,
"ん": 100,
"ゔ": 101,
"ゖ": 102,
"ー": 103,
"%": 104,
"0": 105,
"1": 106,
"2": 107,
"3": 108,
"4": 109,
"5": 110,
"6": 111,
"7": 112,
"8": 113,
"9": 114,
"D": 115,
"G": 116,
"P": 117,
"i": 118,
"x": 119
}