low-resource-language-model-adaptation
/
adapted_tokenizers
/added-opt-fra
/opt_500-add_full-fra-opt
/added_tokens.json
{ | |
"\n\n\n": 50425, | |
"\n\n\n\n": 50375, | |
" \n": 50316, | |
" Après": 50702, | |
" Cette": 50525, | |
" Dans": 50577, | |
" Elle": 50337, | |
" Ils": 50680, | |
" Mais": 50741, | |
" Une": 50629, | |
" afin": 50663, | |
" ainsi": 50352, | |
" alors": 50358, | |
" améric": 50484, | |
" année": 50659, | |
" années": 50403, | |
" appel": 50583, | |
" après": 50347, | |
" aussi": 50349, | |
" autre": 50623, | |
" autres": 50458, | |
" avait": 50457, | |
" avant": 50418, | |
" avec": 50271, | |
" avoir": 50467, | |
" ayant": 50692, | |
" bien": 50450, | |
" bâ": 50683, | |
" bâti": 50752, | |
" carac": 50632, | |
" carrière": 50730, | |
" celle": 50590, | |
" celui": 50585, | |
" ces": 50372, | |
" cette": 50308, | |
" chaque": 50738, | |
" chez": 50670, | |
" cin": 50445, | |
" cinq": 50711, | |
" classe": 50685, | |
" comme": 50288, | |
" commune": 50340, | |
" communes": 50734, | |
" compte": 50608, | |
" conse": 50648, | |
" consid": 50603, | |
" constru": 50459, | |
" contre": 50364, | |
" cours": 50476, | |
" cri": 50647, | |
" cré": 50363, | |
" cô": 50516, | |
" dans": 50267, | |
" depuis": 50400, | |
" derni": 50385, | |
" dernier": 50582, | |
" deux": 50284, | |
" deuxième": 50620, | |
" deven": 50622, | |
" devient": 50515, | |
" différ": 50472, | |
" doit": 50720, | |
" donc": 50570, | |
" durant": 50708, | |
" déb": 50376, | |
" début": 50471, | |
" déc": 50295, | |
" déf": 50462, | |
" dém": 50482, | |
" dép": 50434, | |
" dépar": 50429, | |
" département": 50505, | |
" dés": 50684, | |
" développ": 50481, | |
" effet": 50740, | |
" elle": 50305, | |
" emp": 50543, | |
" encore": 50490, | |
" enf": 50594, | |
" ense": 50751, | |
" ensuite": 50613, | |
" entre": 50296, | |
" environ": 50706, | |
" espè": 50587, | |
" europé": 50689, | |
" exemp": 50679, | |
" fait": 50299, | |
" famille": 50473, | |
" fils": 50635, | |
" fois": 50432, | |
" fonction": 50549, | |
" forme": 50584, | |
" fran": 50317, | |
" français": 50346, | |
" française": 50596, | |
" gouver": 50478, | |
" gouvernement": 50586, | |
" grande": 50545, | |
" groupe": 50419, | |
" guerre": 50534, | |
" géné": 50355, | |
" général": 50529, | |
" hab": 50520, | |
" ils": 50540, | |
" indi": 50547, | |
" internation": 50556, | |
" jeu": 50673, | |
" jou": 50359, | |
" joue": 50599, | |
" journ": 50759, | |
" jours": 50756, | |
" jus": 50354, | |
" jusqu": 50409, | |
" leur": 50326, | |
" leurs": 50474, | |
" ligne": 50564, | |
" lors": 50328, | |
" lui": 50324, | |
" lég": 50546, | |
" mais": 50292, | |
" meille": 50662, | |
" membre": 50742, | |
" membres": 50700, | |
" milli": 50640, | |
" moins": 50513, | |
" mois": 50609, | |
" monde": 50451, | |
" mondi": 50575, | |
" mondiale": 50758, | |
" moy": 50565, | |
" muni": 50715, | |
" méd": 50643, | |
" médi": 50723, | |
" mét": 50568, | |
" même": 50339, | |
" nationale": 50631, | |
" niv": 50571, | |
" niveau": 50638, | |
" nombre": 50353, | |
" nombreux": 50646, | |
" nommé": 50749, | |
" nord": 50494, | |
" notamment": 50452, | |
" nouv": 50319, | |
" nouveau": 50503, | |
" nouvelle": 50589, | |
" né": 50367, | |
" oc": 50598, | |
" offici": 50633, | |
" ou": 50278, | |
" ouv": 50576, | |
" où": 50338, | |
" parc": 50750, | |
" parti": 50275, | |
" particu": 50579, | |
" partie": 50361, | |
" partir": 50499, | |
" pendant": 50519, | |
" permet": 50430, | |
" personn": 50406, | |
" personnes": 50710, | |
" peti": 50650, | |
" peu": 50433, | |
" peut": 50389, | |
" peuvent": 50727, | |
" plusieurs": 50416, | |
" poin": 50460, | |
" poli": 50391, | |
" politique": 50517, | |
" porte": 50760, | |
" pouv": 50528, | |
" pouvoir": 50743, | |
" premi": 50285, | |
" première": 50368, | |
" prend": 50707, | |
" pris": 50762, | |
" projet": 50567, | |
" provin": 50728, | |
" près": 50661, | |
" pré": 50274, | |
" préc": 50532, | |
" présent": 50401, | |
" présid": 50437, | |
" président": 50554, | |
" publi": 50350, | |
" puis": 50332, | |
" père": 50701, | |
" péri": 50487, | |
" période": 50639, | |
" quat": 50442, | |
" quatre": 50521, | |
" quel": 50485, | |
" quelques": 50604, | |
" recher": 50716, | |
" remp": 50404, | |
" rencont": 50580, | |
" représent": 50468, | |
" reste": 50691, | |
" retour": 50682, | |
" retr": 50694, | |
" reç": 50637, | |
" ri": 50480, | |
" roi": 50709, | |
" réalis": 50477, | |
" réc": 50693, | |
" réf": 50618, | |
" rég": 50315, | |
" région": 50393, | |
" rép": 50479, | |
" rés": 50502, | |
" rév": 50672, | |
" rôle": 50748, | |
" saison": 50483, | |
" selon": 50559, | |
" sera": 50688, | |
" servi": 50475, | |
" ses": 50289, | |
" seul": 50746, | |
" situé": 50657, | |
" située": 50578, | |
" siè": 50588, | |
" soci": 50396, | |
" société": 50636, | |
" soit": 50498, | |
" sont": 50273, | |
" sorti": 50625, | |
" sous": 50318, | |
" souvent": 50736, | |
" spéci": 50560, | |
" sud": 50493, | |
" suiv": 50411, | |
" syst": 50656, | |
" sé": 50440, | |
" série": 50533, | |
" tard": 50601, | |
" temps": 50465, | |
" territoire": 50724, | |
" thé": 50593, | |
" titre": 50507, | |
" toujours": 50722, | |
" tous": 50500, | |
" tout": 50360, | |
" toute": 50605, | |
" trav": 50322, | |
" trois": 50343, | |
" trouve": 50614, | |
" très": 50422, | |
" télé": 50658, | |
" utilis": 50441, | |
" vie": 50512, | |
" ville": 50387, | |
" »,": 50548, | |
" ».": 50501, | |
" À": 50511, | |
" États": 50522, | |
" éc": 50334, | |
" éch": 50671, | |
" écrit": 50753, | |
" ég": 50313, | |
" également": 50331, | |
" éle": 50518, | |
" ép": 50456, | |
" équi": 50761, | |
" ét": 50279, | |
" étaient": 50652, | |
" était": 50341, | |
" étant": 50703, | |
" étu": 50466, | |
" été": 50282, | |
" év": 50402, | |
" ê": 50362, | |
" être": 50369, | |
" œuv": 50705, | |
"Au": 50655, | |
"Biographie": 50510, | |
"Dans": 50669, | |
"Elle": 50678, | |
"Histoire": 50642, | |
"Unis": 50539, | |
"aient": 50323, | |
"aill": 50497, | |
"aille": 50408, | |
"aines": 50436, | |
"aise": 50597, | |
"aison": 50384, | |
"aiss": 50562, | |
"alement": 50294, | |
"alif": 50739, | |
"aliste": 50747, | |
"alité": 50351, | |
"amment": 50426, | |
"anci": 50704, | |
"anis": 50386, | |
"année": 50651, | |
"atique": 50644, | |
"aume": 50690, | |
"autres": 50514, | |
"aî": 50380, | |
"aît": 50610, | |
"aï": 50572, | |
"ché": 50504, | |
"cid": 50524, | |
"cident": 50755, | |
"cien": 50390, | |
"cip": 50302, | |
"cipe": 50726, | |
"cole": 50551, | |
"cou": 50573, | |
"cri": 50495, | |
"cteur": 50602, | |
"dic": 50731, | |
"dition": 50553, | |
"dre": 50624, | |
"eau": 50301, | |
"eaux": 50737, | |
"ellement": 50461, | |
"elles": 50330, | |
"elon": 50395, | |
"enant": 50581, | |
"endre": 50449, | |
"ennent": 50732, | |
"entes": 50566, | |
"enti": 50394, | |
"entre": 50280, | |
"erre": 50300, | |
"fic": 50506, | |
"fois": 50558, | |
"ganis": 50455, | |
"histoire": 50718, | |
"hui": 50649, | |
"hé": 50398, | |
"ieurs": 50378, | |
"ieux": 50469, | |
"igine": 50668, | |
"ilité": 50687, | |
"insi": 50333, | |
"iographie": 50491, | |
"isme": 50626, | |
"istes": 50489, | |
"istoire": 50410, | |
"istre": 50537, | |
"ités": 50399, | |
"iversité": 50544, | |
"iè": 50298, | |
"ième": 50374, | |
"ière": 50681, | |
"jourd": 50714, | |
"jours": 50654, | |
"lac": 50735, | |
"lise": 50563, | |
"lle": 50277, | |
"lles": 50591, | |
"lè": 50357, | |
"mande": 50695, | |
"mbl": 50665, | |
"mbre": 50388, | |
"mbres": 50664, | |
"mé": 50379, | |
"mée": 50464, | |
"nement": 50342, | |
"né": 50270, | |
"née": 50336, | |
"nées": 50321, | |
"ographie": 50381, | |
"oig": 50744, | |
"oins": 50421, | |
"oires": 50595, | |
"oix": 50719, | |
"ologie": 50486, | |
"olu": 50424, | |
"omme": 50439, | |
"ommes": 50729, | |
"onc": 50373, | |
"onction": 50508, | |
"onn": 50281, | |
"opul": 50428, | |
"origine": 50725, | |
"orte": 50366, | |
"orti": 50538, | |
"ouest": 50677, | |
"ouis": 50675, | |
"ourd": 50600, | |
"ourg": 50555, | |
"ouv": 50268, | |
"ouve": 50392, | |
"ouvent": 50542, | |
"pendant": 50619, | |
"poque": 50617, | |
"pris": 50550, | |
"près": 50290, | |
"puis": 50344, | |
"qui": 50320, | |
"quipe": 50427, | |
"qué": 50607, | |
"rale": 50569, | |
"rande": 50448, | |
"remi": 50283, | |
"ribu": 50530, | |
"rière": 50423, | |
"rois": 50310, | |
"roupe": 50377, | |
"rée": 50492, | |
"rés": 50536, | |
"résent": 50438, | |
"rê": 50526, | |
"seau": 50676, | |
"sig": 50674, | |
"sion": 50303, | |
"sionn": 50630, | |
"sions": 50616, | |
"sition": 50454, | |
"sse": 50557, | |
"suite": 50552, | |
"tant": 50535, | |
"tat": 50306, | |
"tats": 50417, | |
"tement": 50356, | |
"tent": 50733, | |
"teur": 50287, | |
"teurs": 50435, | |
"tient": 50541, | |
"tifs": 50712, | |
"tique": 50291, | |
"tiques": 50382, | |
"tit": 50443, | |
"titu": 50365, | |
"tiv": 50371, | |
"tive": 50446, | |
"tres": 50293, | |
"ture": 50307, | |
"té": 50266, | |
"tég": 50561, | |
"téri": 50612, | |
"térieur": 50757, | |
"téris": 50698, | |
"tés": 50447, | |
"ubli": 50311, | |
"uerre": 50420, | |
"uire": 50745, | |
"uis": 50276, | |
"urant": 50488, | |
"urent": 50611, | |
"urop": 50628, | |
"urope": 50666, | |
"uropé": 50653, | |
"ussi": 50329, | |
"utilis": 50397, | |
"uvent": 50699, | |
"velopp": 50470, | |
"vient": 50415, | |
"yant": 50621, | |
"État": 50606, | |
"âce": 50754, | |
"âte": 50641, | |
"âteau": 50717, | |
"çais": 50335, | |
"ède": 50627, | |
"èg": 50697, | |
"èle": 50721, | |
"ème": 50414, | |
"ène": 50453, | |
"èrement": 50634, | |
"èrent": 50696, | |
"ères": 50345, | |
"ès": 50269, | |
"ètres": 50713, | |
"éb": 50615, | |
"éc": 50327, | |
"éci": 50527, | |
"éd": 50297, | |
"édi": 50431, | |
"ées": 50286, | |
"ég": 50304, | |
"él": 50574, | |
"éli": 50667, | |
"élé": 50463, | |
"ém": 50325, | |
"éné": 50312, | |
"ép": 50383, | |
"équipe": 50592, | |
"éra": 50314, | |
"ération": 50496, | |
"éri": 50272, | |
"éric": 50413, | |
"érie": 50407, | |
"éro": 50645, | |
"éré": 50412, | |
"éti": 50660, | |
"été": 50348, | |
"év": 50405, | |
"ême": 50309, | |
"ête": 50444, | |
"être": 50531, | |
"ôle": 50509, | |
"ôt": 50686, | |
"œ": 50370, | |
"œuv": 50523, | |
"’": 50265 | |
} | |