add tokenizer
368a8e5
|
{ |
|
"!": 1, |
|
"'": 2, |
|
",": 3, |
|
"-": 4, |
|
".": 5, |
|
":": 6, |
|
";": 7, |
|
"=": 8, |
|
"?": 9, |
|
"[PAD]": 84, |
|
"[UNK]": 83, |
|
"|": 0, |
|
"।": 10, |
|
"ঁ": 11, |
|
"ং": 12, |
|
"ঃ": 13, |
|
"অ": 14, |
|
"আ": 15, |
|
"ই": 16, |
|
"ঈ": 17, |
|
"উ": 18, |
|
"ঊ": 19, |
|
"ঋ": 20, |
|
"এ": 21, |
|
"ঐ": 22, |
|
"ও": 23, |
|
"ঔ": 24, |
|
"ক": 25, |
|
"খ": 26, |
|
"গ": 27, |
|
"ঘ": 28, |
|
"ঙ": 29, |
|
"চ": 30, |
|
"ছ": 31, |
|
"জ": 32, |
|
"ঝ": 33, |
|
"ঞ": 34, |
|
"ট": 35, |
|
"ঠ": 36, |
|
"ড": 37, |
|
"ঢ": 38, |
|
"ণ": 39, |
|
"ত": 40, |
|
"থ": 41, |
|
"দ": 42, |
|
"ধ": 43, |
|
"ন": 44, |
|
"প": 45, |
|
"ফ": 46, |
|
"ব": 47, |
|
"ভ": 48, |
|
"ম": 49, |
|
"য": 50, |
|
"র": 51, |
|
"ল": 52, |
|
"শ": 53, |
|
"ষ": 54, |
|
"স": 55, |
|
"হ": 56, |
|
"া": 57, |
|
"ি": 58, |
|
"ী": 59, |
|
"ু": 60, |
|
"ূ": 61, |
|
"ৃ": 62, |
|
"ে": 63, |
|
"ৈ": 64, |
|
"ো": 65, |
|
"ৌ": 66, |
|
"্": 67, |
|
"ৎ": 68, |
|
"ড়": 69, |
|
"ঢ়": 70, |
|
"য়": 71, |
|
"০": 72, |
|
"১": 73, |
|
"২": 74, |
|
"৩": 75, |
|
"৪": 76, |
|
"৫": 77, |
|
"৬": 78, |
|
"৭": 79, |
|
"৮": 80, |
|
"৯": 81, |
|
"": 82 |
|
} |
|
|