low-resource-language-model-adaptation
/
adapted_tokenizers
/added-opt-eng
/opt_100-add_full-eng-opt
/added_tokens.json
{ | |
" Academ": 50331, | |
" Biography": 50362, | |
" Emp": 50360, | |
" Geor": 50300, | |
" Histor": 50353, | |
" Legit": 50351, | |
" Oly": 50310, | |
" Pla": 50350, | |
" Profess": 50358, | |
" anc": 50363, | |
" appoin": 50313, | |
" becom": 50335, | |
" colla": 50346, | |
" consid": 50303, | |
" constru": 50308, | |
" daugh": 50325, | |
" dif": 50287, | |
" earl": 50336, | |
" emp": 50320, | |
" examp": 50323, | |
" gradu": 50324, | |
" nomin": 50355, | |
" nov": 50327, | |
" performan": 50322, | |
" pla": 50265, | |
" plann": 50359, | |
" poin": 50296, | |
" produc": 50330, | |
" publ": 50271, | |
" repla": 50314, | |
" sele": 50339, | |
" tourn": 50334, | |
" villa": 50289, | |
" β": 50306, | |
"adio": 50311, | |
"ampionship": 50291, | |
"areer": 50276, | |
"arliam": 50326, | |
"arliament": 50328, | |
"arly": 50270, | |
"arri": 50279, | |
"arried": 50301, | |
"cted": 50269, | |
"ctober": 50286, | |
"duced": 50292, | |
"ebru": 50293, | |
"ebruary": 50294, | |
"ecess": 50364, | |
"eck": 50338, | |
"ecut": 50312, | |
"ecutive": 50333, | |
"eep": 50340, | |
"eleb": 50349, | |
"ely": 50267, | |
"embers": 50295, | |
"embly": 50337, | |
"emor": 50344, | |
"eneral": 50309, | |
"eptember": 50285, | |
"etwork": 50321, | |
"fess": 50281, | |
"fic": 50342, | |
"forman": 50316, | |
"gether": 50329, | |
"iforn": 50317, | |
"ifornia": 50318, | |
"ilm": 50268, | |
"incip": 50347, | |
"istory": 50275, | |
"ivision": 50297, | |
"lack": 50298, | |
"lant": 50356, | |
"loy": 50305, | |
"ociety": 50315, | |
"ollowing": 50348, | |
"ootball": 50274, | |
"ounc": 50299, | |
"ouncil": 50302, | |
"overn": 50272, | |
"overnment": 50278, | |
"overnor": 50332, | |
"oviet": 50352, | |
"owever": 50280, | |
"pril": 50288, | |
"reland": 50343, | |
"retary": 50354, | |
"stit": 50277, | |
"stitut": 50361, | |
"stitute": 50319, | |
"tend": 50284, | |
"tended": 50304, | |
"ternational": 50283, | |
"thlet": 50341, | |
"ubl": 50266, | |
"ugust": 50282, | |
"ularly": 50345, | |
"usic": 50273, | |
"zil": 50357, | |
"β": 50290, | |
"β": 50307 | |
} | |