low-resource-language-model-adaptation
/
adapted_tokenizers
/added-opt-hau
/opt_500-add_full-hau-opt
/added_tokens.json
{ | |
" APC": 50469, | |
" Abdu": 50610, | |
" Abubakar": 50742, | |
" Abuja": 50629, | |
" Afirka": 50701, | |
" Alha": 50726, | |
" Amurka": 50484, | |
" Arewa": 50652, | |
" Bayan": 50698, | |
" Buhari": 50392, | |
" DUBA": 50764, | |
" Daga": 50693, | |
" Gwamnan": 50750, | |
" Gwamnat": 50680, | |
" Hausa": 50447, | |
" Huk": 50569, | |
" Hukumar": 50627, | |
" Idan": 50623, | |
" Jami": 50757, | |
" Jihar": 50474, | |
" KAR": 50607, | |
" KARANTA": 50633, | |
" KU": 50582, | |
" Kaduna": 50630, | |
" Kano": 50451, | |
" Kuma": 50687, | |
" Legit": 50450, | |
" Majalisar": 50699, | |
" Mala": 50639, | |
" Muh": 50487, | |
" Muhamma": 50504, | |
" Muhammadu": 50691, | |
" Musul": 50708, | |
" Najeriya": 50318, | |
" PDP": 50525, | |
" Shugaban": 50442, | |
" Wannan": 50578, | |
" abin": 50337, | |
" abo": 50673, | |
" abu": 50411, | |
" abubu": 50572, | |
" ada": 50521, | |
" addin": 50718, | |
" ai": 50730, | |
" aika": 50601, | |
" aiki": 50384, | |
" aikin": 50420, | |
" akan": 50489, | |
" ake": 50341, | |
" akwai": 50483, | |
" ala": 50554, | |
" amf": 50359, | |
" amfani": 50369, | |
" amin": 50548, | |
" amince": 50670, | |
" amma": 50376, | |
" arz": 50675, | |
" ayy": 50559, | |
" ayyukan": 50672, | |
" babban": 50468, | |
" babu": 50495, | |
" bada": 50697, | |
" bai": 50400, | |
" baki": 50743, | |
" bakin": 50717, | |
" bangar": 50711, | |
" bara": 50722, | |
" bata": 50744, | |
" baya": 50422, | |
" bayan": 50300, | |
" bayar": 50625, | |
" bayyana": 50343, | |
" binc": 50513, | |
" bincike": 50707, | |
" bindi": 50573, | |
" birnin": 50545, | |
" bisa": 50590, | |
" biy": 50349, | |
" biyu": 50372, | |
" buka": 50403, | |
" bukatar": 50587, | |
" cewa": 50281, | |
" cewar": 50517, | |
" ci": 50299, | |
" cika": 50600, | |
" ciki": 50460, | |
" cikin": 50278, | |
" cin": 50477, | |
" cutar": 50544, | |
" daban": 50602, | |
" dace": 50700, | |
" daga": 50282, | |
" dai": 50312, | |
" daidai": 50728, | |
" dake": 50455, | |
" dali": 50512, | |
" dama": 50496, | |
" damar": 50686, | |
" dauk": 50409, | |
" dauki": 50713, | |
" dawo": 50685, | |
" daya": 50379, | |
" duk": 50314, | |
" dukkan": 50752, | |
" duniya": 50435, | |
" fada": 50657, | |
" fara": 50365, | |
" farko": 50480, | |
" fitar": 50640, | |
" fito": 50577, | |
" fus": 50608, | |
" gaba": 50311, | |
" gaban": 50620, | |
" gabata": 50703, | |
" gabatar": 50724, | |
" gan": 50688, | |
" ganin": 50458, | |
" gano": 50732, | |
" garin": 50568, | |
" gi": 50320, | |
" gida": 50463, | |
" gidan": 50448, | |
" girma": 50729, | |
" guda": 50539, | |
" gudan": 50438, | |
" gudanar": 50473, | |
" gwa": 50694, | |
" gwamnan": 50655, | |
" gwamnat": 50353, | |
" gwamnati": 50541, | |
" gwamnatin": 50443, | |
" hada": 50490, | |
" hai": 50540, | |
" haka": 50336, | |
" hakan": 50444, | |
" halin": 50660, | |
" han": 50289, | |
" hana": 50658, | |
" hanka": 50533, | |
" hankali": 50651, | |
" hann": 50406, | |
" hannu": 50552, | |
" hannun": 50659, | |
" hanyar": 50439, | |
" harkokin": 50761, | |
" hu": 50551, | |
" huk": 50342, | |
" hukum": 50751, | |
" hukumar": 50433, | |
" idan": 50389, | |
" ili": 50741, | |
" ina": 50598, | |
" inda": 50348, | |
" irin": 50413, | |
" ita": 50429, | |
" iya": 50307, | |
" jagor": 50734, | |
" jama": 50505, | |
" jami": 50430, | |
" jari": 50594, | |
" ji": 50287, | |
" jihar": 50319, | |
" jin": 50478, | |
" jir": 50676, | |
" kafa": 50494, | |
" kafin": 50580, | |
" kai": 50316, | |
" kal": 50756, | |
" kam": 50401, | |
" kama": 50382, | |
" kamar": 50378, | |
" kamata": 50589, | |
" kamf": 50570, | |
" kana": 50740, | |
" kar": 50279, | |
" kara": 50331, | |
" karanta": 50727, | |
" kare": 50662, | |
" karin": 50654, | |
" karka": 50584, | |
" karkashin": 50644, | |
" kasa": 50296, | |
" kasan": 50414, | |
" kasance": 50431, | |
" kasar": 50297, | |
" kasashen": 50536, | |
" kashe": 50476, | |
" kasu": 50509, | |
" kawai": 50563, | |
" kawo": 50502, | |
" kaya": 50747, | |
" kayan": 50571, | |
" kir": 50611, | |
" kira": 50499, | |
" kokar": 50592, | |
" kokarin": 50653, | |
" koma": 50531, | |
" kowa": 50566, | |
" kowan": 50696, | |
" ku": 50265, | |
" kudi": 50558, | |
" kudin": 50579, | |
" kula": 50616, | |
" kuma": 50272, | |
" kun": 50518, | |
" kungiyar": 50432, | |
" kuwa": 50619, | |
" kwa": 50347, | |
" kwam": 50588, | |
" kwan": 50427, | |
" kwana": 50647, | |
" kya": 50364, | |
" kyau": 50609, | |
" kyauta": 50763, | |
" labar": 50418, | |
" labarai": 50532, | |
" labaran": 50628, | |
" labarin": 50650, | |
" lafiya": 50510, | |
" lai": 50664, | |
" lamarin": 50716, | |
" loka": 50324, | |
" lokaci": 50557, | |
" lokacin": 50356, | |
" mafi": 50528, | |
" magana": 50646, | |
" mai": 50286, | |
" majalisar": 50553, | |
" maka": 50522, | |
" makar": 50576, | |
" mala": 50690, | |
" mama": 50738, | |
" manyan": 50604, | |
" mara": 50731, | |
" masa": 50434, | |
" masu": 50304, | |
" mata": 50308, | |
" matsa": 50333, | |
" matsayin": 50410, | |
" maza": 50631, | |
" miliyan": 50746, | |
" muka": 50549, | |
" mulki": 50733, | |
" muna": 50762, | |
" musa": 50491, | |
" musamman": 50535, | |
" musu": 50486, | |
" mutan": 50332, | |
" mutane": 50360, | |
" mutanen": 50596, | |
" mutu": 50677, | |
" mutum": 50421, | |
" nau": 50739, | |
" neman": 50500, | |
" nuna": 50397, | |
" raho": 50575, | |
" ranar": 50344, | |
" rashin": 50527, | |
" ri": 50523, | |
" riki": 50754, | |
" rubu": 50583, | |
" ruwa": 50479, | |
" saboda": 50404, | |
" sabon": 50681, | |
" sai": 50317, | |
" saka": 50424, | |
" sakamakon": 50624, | |
" sama": 50538, | |
" samar": 50520, | |
" samu": 50345, | |
" samun": 50395, | |
" sana": 50649, | |
" sanar": 50472, | |
" sanda": 50618, | |
" sannan": 50550, | |
" sanya": 50560, | |
" sau": 50534, | |
" sauran": 50457, | |
" sha": 50292, | |
" shafin": 50666, | |
" shai": 50695, | |
" shawara": 50759, | |
" shekar": 50352, | |
" shekara": 50445, | |
" shekarar": 50493, | |
" shekaru": 50511, | |
" shi": 50275, | |
" shiga": 50426, | |
" shir": 50501, | |
" shirin": 50586, | |
" shirya": 50702, | |
" shugaban": 50322, | |
" siya": 50530, | |
" siyasa": 50667, | |
" sojo": 50671, | |
" sosai": 50684, | |
" suka": 50283, | |
" suke": 50387, | |
" suna": 50329, | |
" taba": 50758, | |
" tabba": 50399, | |
" tabbatar": 50459, | |
" taima": 50492, | |
" taimaka": 50614, | |
" takar": 50454, | |
" takarar": 50599, | |
" tana": 50449, | |
" tara": 50526, | |
" tarayya": 50720, | |
" tare": 50321, | |
" taron": 50564, | |
" tatta": 50497, | |
" tattauna": 50736, | |
" tsa": 50303, | |
" tsakanin": 50461, | |
" tsar": 50338, | |
" tsarin": 50581, | |
" tsaro": 50508, | |
" tso": 50475, | |
" tsohon": 50632, | |
" uku": 50562, | |
" wadan": 50350, | |
" wadanda": 50419, | |
" wadannan": 50648, | |
" wajen": 50357, | |
" waki": 50645, | |
" wan": 50268, | |
" wanda": 50302, | |
" wani": 50291, | |
" wannan": 50288, | |
" wasa": 50723, | |
" wasan": 50428, | |
" wasu": 50323, | |
" wata": 50339, | |
" watan": 50373, | |
" wato": 50755, | |
" wayar": 50689, | |
" waɗ": 50705, | |
" wu": 50556, | |
" wurin": 50617, | |
" yace": 50547, | |
" yadda": 50328, | |
" yake": 50366, | |
" yan": 50274, | |
" yana": 50310, | |
" yankin": 50485, | |
" yanzu": 50374, | |
" yar": 50407, | |
" yau": 50466, | |
" yawa": 50519, | |
" yawan": 50506, | |
" yayi": 50524, | |
" yayin": 50425, | |
" yi": 50271, | |
" yin": 50363, | |
" za": 50266, | |
" zabe": 50626, | |
" zaben": 50456, | |
" zai": 50315, | |
" zama": 50388, | |
" zaman": 50417, | |
" zan": 50481, | |
" zar": 50423, | |
" zargin": 50612, | |
" zu": 50622, | |
" zuwa": 50313, | |
" Ƙ": 50712, | |
" ƙ": 50298, | |
" ƙa": 50415, | |
" ƙar": 50661, | |
" ƙasa": 50692, | |
" ɗ": 50380, | |
" ɗa": 50507, | |
" ‘": 50325, | |
" ’": 50488, | |
" “": 50334, | |
".”": 50514, | |
"ANTA": 50606, | |
"Ya": 50543, | |
"aika": 50452, | |
"amma": 50440, | |
"anar": 50305, | |
"ancin": 50725, | |
"anin": 50327, | |
"annan": 50391, | |
"ansa": 50656, | |
"antar": 50635, | |
"araba": 50636, | |
"arai": 50446, | |
"bakar": 50715, | |
"bba": 50354, | |
"bban": 50381, | |
"bbar": 50748, | |
"boda": 50375, | |
"bun": 50714, | |
"buwar": 50704, | |
"cce": 50678, | |
"cebook": 50561, | |
"cen": 50621, | |
"dai": 50470, | |
"dda": 50290, | |
"dun": 50396, | |
"duna": 50585, | |
"fiya": 50398, | |
"fiyar": 50605, | |
"gaba": 50760, | |
"ganta": 50668, | |
"hausa": 50441, | |
"himman": 50641, | |
"ikin": 50273, | |
"inu": 50679, | |
"irka": 50615, | |
"itin": 50436, | |
"iyar": 50284, | |
"iye": 50546, | |
"iyo": 50453, | |
"iyoyin": 50749, | |
"iyya": 50465, | |
"iyyar": 50358, | |
"jali": 50370, | |
"jalisar": 50416, | |
"jeriya": 50309, | |
"jiya": 50642, | |
"kanin": 50412, | |
"kara": 50361, | |
"kiya": 50482, | |
"kokin": 50471, | |
"kon": 50355, | |
"kwa": 50335, | |
"kwai": 50393, | |
"lata": 50735, | |
"legit": 50542, | |
"liyan": 50537, | |
"llafa": 50683, | |
"llon": 50593, | |
"mai": 50408, | |
"makon": 50503, | |
"mala": 50719, | |
"marin": 50706, | |
"mba": 50462, | |
"mman": 50377, | |
"onin": 50753, | |
"rewa": 50634, | |
"sai": 50595, | |
"sali": 50515, | |
"sana": 50709, | |
"sar": 50269, | |
"sara": 50437, | |
"sarar": 50710, | |
"shen": 50351, | |
"shin": 50294, | |
"sib": 50737, | |
"sino": 50597, | |
"ssa": 50721, | |
"sul": 50669, | |
"tsa": 50306, | |
"tum": 50394, | |
"ugaba": 50516, | |
"ugaban": 50295, | |
"uhari": 50386, | |
"ukan": 50368, | |
"ukar": 50591, | |
"umma": 50643, | |
"unan": 50529, | |
"undun": 50663, | |
"undunar": 50674, | |
"ungiyar": 50362, | |
"uniya": 50371, | |
"urar": 50567, | |
"urin": 50383, | |
"urka": 50467, | |
"utar": 50385, | |
"uwa": 50276, | |
"uwar": 50390, | |
"wai": 50498, | |
"wam": 50285, | |
"wamn": 50301, | |
"wamna": 50574, | |
"wamnan": 50464, | |
"wamnat": 50326, | |
"wara": 50603, | |
"warar": 50682, | |
"witter": 50613, | |
"yar": 50267, | |
"yen": 50402, | |
"yin": 50277, | |
"yoyi": 50745, | |
"yoyin": 50565, | |
"yya": 50367, | |
"yyan": 50330, | |
"yyana": 50340, | |
"ƙ": 50280, | |
"ƙa": 50555, | |
"ɓ": 50405, | |
"ɔ": 50665, | |
"ɗ": 50293, | |
"ɗa": 50637, | |
"ɛ": 50638, | |
"’": 50270, | |
"”": 50346 | |
} | |