add tokenizer
ef9de15
|
{ |
|
"-": 1, |
|
"0": 2, |
|
"1": 3, |
|
"2": 4, |
|
"3": 5, |
|
"4": 6, |
|
"5": 7, |
|
"6": 8, |
|
"7": 9, |
|
"8": 10, |
|
"9": 11, |
|
"[PAD]": 121, |
|
"[UNK]": 120, |
|
"a": 12, |
|
"c": 13, |
|
"f": 14, |
|
"h": 15, |
|
"o": 16, |
|
"p": 17, |
|
"u": 18, |
|
"|": 0, |
|
"々": 19, |
|
"〇": 20, |
|
"ぁ": 21, |
|
"あ": 22, |
|
"ぃ": 23, |
|
"い": 24, |
|
"ぅ": 25, |
|
"う": 26, |
|
"ぇ": 27, |
|
"え": 28, |
|
"ぉ": 29, |
|
"お": 30, |
|
"か": 31, |
|
"が": 32, |
|
"き": 33, |
|
"ぎ": 34, |
|
"く": 35, |
|
"ぐ": 36, |
|
"け": 37, |
|
"げ": 38, |
|
"こ": 39, |
|
"ご": 40, |
|
"さ": 41, |
|
"ざ": 42, |
|
"し": 43, |
|
"じ": 44, |
|
"す": 45, |
|
"ず": 46, |
|
"せ": 47, |
|
"ぜ": 48, |
|
"そ": 49, |
|
"ぞ": 50, |
|
"た": 51, |
|
"だ": 52, |
|
"ち": 53, |
|
"ぢ": 54, |
|
"っ": 55, |
|
"つ": 56, |
|
"づ": 57, |
|
"て": 58, |
|
"で": 59, |
|
"と": 60, |
|
"ど": 61, |
|
"な": 62, |
|
"に": 63, |
|
"ぬ": 64, |
|
"ね": 65, |
|
"の": 66, |
|
"は": 67, |
|
"ば": 68, |
|
"ぱ": 69, |
|
"ひ": 70, |
|
"び": 71, |
|
"ぴ": 72, |
|
"ふ": 73, |
|
"ぶ": 74, |
|
"ぷ": 75, |
|
"へ": 76, |
|
"べ": 77, |
|
"ぺ": 78, |
|
"ほ": 79, |
|
"ぼ": 80, |
|
"ぽ": 81, |
|
"ま": 82, |
|
"み": 83, |
|
"む": 84, |
|
"め": 85, |
|
"も": 86, |
|
"ゃ": 87, |
|
"や": 88, |
|
"ゅ": 89, |
|
"ゆ": 90, |
|
"ょ": 91, |
|
"よ": 92, |
|
"ら": 93, |
|
"り": 94, |
|
"る": 95, |
|
"れ": 96, |
|
"ろ": 97, |
|
"わ": 98, |
|
"を": 99, |
|
"ん": 100, |
|
"ゔ": 101, |
|
"ゖ": 102, |
|
"ー": 103, |
|
"%": 104, |
|
"0": 105, |
|
"1": 106, |
|
"2": 107, |
|
"3": 108, |
|
"4": 109, |
|
"5": 110, |
|
"6": 111, |
|
"7": 112, |
|
"8": 113, |
|
"9": 114, |
|
"D": 115, |
|
"G": 116, |
|
"P": 117, |
|
"i": 118, |
|
"x": 119 |
|
} |
|
|