low-resource-language-model-adaptation
/
adapted_tokenizers
/added-opt-amh
/opt_100-add_full-amh-opt
/added_tokens.json
{ | |
" ለ": 50313, | |
" ላይ": 50358, | |
" መ": 50287, | |
" ማ": 50343, | |
" ም": 50344, | |
" ሰ": 50349, | |
" ሲ": 50354, | |
" ስ": 50340, | |
" በ": 50271, | |
" ባ": 50359, | |
" ብ": 50351, | |
" ተ": 50301, | |
" ነ": 50308, | |
" ነው": 50346, | |
" አ": 50274, | |
" እ": 50280, | |
" እና": 50361, | |
" እን": 50304, | |
" እንደ": 50350, | |
" ከ": 50299, | |
" ወ": 50324, | |
" ው": 50353, | |
" የ": 50267, | |
" የሚ": 50332, | |
" የተ": 50342, | |
" ያ": 50327, | |
" ይ": 50319, | |
" ግ": 50355, | |
"ሁ": 50363, | |
"ህ": 50318, | |
"ሆ": 50328, | |
"ለ": 50275, | |
"ሉ": 50315, | |
"ላ": 50290, | |
"ል": 50270, | |
"መ": 50282, | |
"ሚ": 50298, | |
"ማ": 50296, | |
"ም": 50272, | |
"ሞ": 50360, | |
"ረ": 50285, | |
"ሩ": 50337, | |
"ሪ": 50302, | |
"ራ": 50288, | |
"ር": 50269, | |
"ሮ": 50335, | |
"ሰ": 50300, | |
"ሳ": 50321, | |
"ስ": 50273, | |
"ቀ": 50309, | |
"ቃ": 50347, | |
"ቅ": 50325, | |
"በ": 50292, | |
"ባ": 50297, | |
"ብ": 50295, | |
"ተ": 50277, | |
"ቱ": 50330, | |
"ታ": 50293, | |
"ት": 50266, | |
"ቶ": 50336, | |
"ቸ": 50306, | |
"ቸው": 50310, | |
"ች": 50276, | |
"ነ": 50294, | |
"ነት": 50364, | |
"ኑ": 50356, | |
"ና": 50278, | |
"ን": 50265, | |
"ኛ": 50338, | |
"አ": 50305, | |
"ከ": 50317, | |
"ካ": 50316, | |
"ክ": 50303, | |
"ወ": 50323, | |
"ዊ": 50345, | |
"ዋ": 50314, | |
"ዋል": 50348, | |
"ው": 50268, | |
"ዎች": 50333, | |
"ዚ": 50362, | |
"ዝ": 50357, | |
"የ": 50329, | |
"ያ": 50279, | |
"ይ": 50281, | |
"ዮ": 50352, | |
"ደ": 50284, | |
"ዲ": 50341, | |
"ዳ": 50307, | |
"ድ": 50289, | |
"ገ": 50286, | |
"ጋ": 50331, | |
"ግ": 50291, | |
"ጠ": 50320, | |
"ጣ": 50334, | |
"ጥ": 50311, | |
"ፈ": 50326, | |
"ፍ": 50312, | |
"፡": 50322, | |
"።": 50283, | |
"፣": 50339 | |
} | |