low-resource-language-model-adaptation
/
adapted_tokenizers
/added-opt-yor
/opt_100-add_full-yor-opt
/added_tokens.json
{ | |
" Awọn": 50326, | |
" ala": 50350, | |
" ara": 50339, | |
" ati": 50275, | |
" awọn": 50271, | |
" gb": 50274, | |
" gba": 50318, | |
" gbo": 50322, | |
" gbogbo": 50331, | |
" ile": 50319, | |
" iṣẹ": 50345, | |
" jẹ": 50290, | |
" kọ": 50355, | |
" lati": 50282, | |
" lori": 50321, | |
" lá": 50332, | |
" ló": 50335, | |
" lẹ": 50316, | |
" lọ": 50296, | |
" máa": 50344, | |
" mẹ": 50343, | |
" mọ": 50334, | |
" naa": 50297, | |
" ninu": 50320, | |
" nipa": 50333, | |
" ní": 50289, | |
" ohun": 50311, | |
" pé": 50317, | |
" pẹ": 50308, | |
" pẹlu": 50315, | |
" rẹ": 50292, | |
" sí": 50337, | |
" sọ": 50304, | |
" tabi": 50360, | |
" tí": 50307, | |
" tó": 50305, | |
" wọn": 50281, | |
" yii": 50341, | |
" yoo": 50327, | |
" Ì": 50361, | |
" àwọn": 50309, | |
" è": 50325, | |
" ì": 50287, | |
" ò": 50313, | |
" ó": 50348, | |
" ń": 50312, | |
" ṣ": 50273, | |
" ṣe": 50278, | |
" ẹ": 50279, | |
" Ọ": 50301, | |
" ọ": 50272, | |
" ọmọ": 50330, | |
" “": 50351, | |
"bẹ": 50340, | |
"dun": 50338, | |
"gba": 50283, | |
"gbe": 50294, | |
"gbo": 50295, | |
"gbà": 50356, | |
"gbẹ": 50336, | |
"iṣẹ": 50353, | |
"jọ": 50288, | |
"kọ": 50286, | |
"lẹ": 50277, | |
"lọ": 50293, | |
"mọ": 50284, | |
"pọ": 50298, | |
"rí": 50310, | |
"rẹ": 50314, | |
"rọ": 50300, | |
"sẹ": 50323, | |
"tẹ": 50358, | |
"tọ": 50324, | |
"wọ": 50299, | |
"wọn": 50268, | |
"yin": 50328, | |
"yẹ": 50349, | |
"àn": 50303, | |
"áa": 50329, | |
"áà": 50347, | |
"ìn": 50352, | |
"ùn": 50357, | |
"ún": 50302, | |
"̀": 50280, | |
"́": 50276, | |
"̣": 50270, | |
"Ṣ": 50354, | |
"ṣ": 50269, | |
"ṣe": 50306, | |
"ṣẹ": 50285, | |
"Ẹ": 50346, | |
"ẹ": 50266, | |
"Ọ": 50291, | |
"ọ": 50265, | |
"ọn": 50267, | |
"ọna": 50359, | |
"”": 50342 | |
} | |