goldfish-models commited on
Commit
df1cb94
·
1 Parent(s): 7dcb9ab

Upload tzo_latn_5mb tokenizer.

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[XXXXX52]": 32285, "[XXXXX330]": 32563, "[XXXXX202]": 32435, "[XXXXX390]": 32623, "[XXXXX383]": 32616, "[XXXXX248]": 32481, "[XXXXX132]": 32365, "[XXXXX491]": 32724, "[XXXXX385]": 32618, "[XXXXX304]": 32537, "[XXXXX0]": 32233, "[XXXXX258]": 32491, "[XXXXX217]": 32450, "[XXXXX396]": 32629, "[XXXXX250]": 32483, "[XXXXX140]": 32373, "[XXXXX333]": 32566, "[XXXXX104]": 32337, "[XXXXX416]": 32649, "[XXXXX136]": 32369, "[XXXXX408]": 32641, "[XXXXX329]": 32562, "[XXXXX175]": 32408, "[XXXXX225]": 32458, "[XXXXX290]": 32523, "[XXXXX410]": 32643, "[XXXXX310]": 32543, "[XXXXX151]": 32384, "[XXXXX260]": 32493, "[XXXXX432]": 32665, "[XXXXX124]": 32357, "[XXXXX521]": 32754, "[XXXXX357]": 32590, "[XXXXX165]": 32398, "[XXXXX247]": 32480, "[XXXXX298]": 32531, "[XXXXX276]": 32509, "[XXXXX368]": 32601, "[XXXXX93]": 32326, "[XXXXX194]": 32427, "[XXXXX72]": 32305, "[XXXXX160]": 32393, "[XXXXX238]": 32471, "[XXXXX352]": 32585, "[XXXXX341]": 32574, "[XXXXX90]": 32323, "[XXXXX455]": 32688, "[XXXXX313]": 32546, "[XXXXX120]": 32353, "[XXXXX215]": 32448, "[XXXXX41]": 32274, "[XXXXX386]": 32619, "[XXXXX201]": 32434, "[XXXXX518]": 32751, "[XXXXX77]": 32310, "[XXXXX37]": 32270, "[XXXXX95]": 32328, "[XXXXX44]": 32277, "[XXXXX474]": 32707, "[XXXXX348]": 32581, "[XXXXX45]": 32278, "[XXXXX527]": 32760, "[XXXXX464]": 32697, "[XXXXX150]": 32383, "[XXXXX167]": 32400, "[XXXXX235]": 32468, "[XXXXX22]": 32255, "[XXXXX213]": 32446, "[XXXXX289]": 32522, "[XXXXX372]": 32605, "[XXXXX306]": 32539, "[XXXXX130]": 32363, "[XXXXX152]": 32385, "[XXXXX223]": 32456, "[XXXXX436]": 32669, "[XXXXX515]": 32748, "[XXXXX307]": 32540, "[XXXXX2]": 32235, "[XXXXX275]": 32508, "[XXXXX287]": 32520, "[XXXXX362]": 32595, "[XXXXX486]": 32719, "[XXXXX112]": 32345, "[XXXXX505]": 32738, "[XXXXX113]": 32346, "[XXXXX89]": 32322, "[XXXXX129]": 32362, "[XXXXX27]": 32260, "[XXXXX40]": 32273, "[XXXXX10]": 32243, "[XXXXX134]": 32367, "[XXXXX65]": 32298, "[XXXXX305]": 32538, "[XXXXX378]": 32611, "[XXXXX264]": 32497, "[XXXXX128]": 32361, "[XXXXX85]": 32318, "[XXXXX471]": 32704, "[XXXXX249]": 32482, "[XXXXX523]": 32756, "[XXXXX481]": 32714, "[XXXXX252]": 32485, "[XXXXX111]": 32344, "[XXXXX291]": 32524, "[XXXXX336]": 32569, "[XXXXX135]": 32368, "[XXXXX343]": 32576, "[XXXXX70]": 32303, "<pad>": 32231, "[XXXXX183]": 32416, "[XXXXX206]": 32439, "[XXXXX394]": 32627, "[XXXXX78]": 32311, "[XXXXX209]": 32442, "[XXXXX6]": 32239, "[XXXXX266]": 32499, "[XXXXX25]": 32258, "[XXXXX17]": 32250, "[XXXXX55]": 32288, "[XXXXX308]": 32541, "[XXXXX418]": 32651, "[XXXXX466]": 32699, "[XXXXX375]": 32608, "[XXXXX48]": 32281, "[XXXXX105]": 32338, "[XXXXX277]": 32510, "[XXXXX212]": 32445, "[XXXXX434]": 32667, "[XXXXX114]": 32347, "[XXXXX23]": 32256, "[XXXXX381]": 32614, "[XXXXX283]": 32516, "[XXXXX61]": 32294, "[XXXXX192]": 32425, "[XXXXX224]": 32457, "[XXXXX230]": 32463, "[XXXXX265]": 32498, "[XXXXX512]": 32745, "[XXXXX246]": 32479, "[XXXXX244]": 32477, "[XXXXX125]": 32358, "[XXXXX5]": 32238, "[XXXXX115]": 32348, "[XXXXX228]": 32461, "[XXXXX106]": 32339, "[XXXXX484]": 32717, "[XXXXX427]": 32660, "[XXXXX32]": 32265, "[XXXXX204]": 32437, "[XXXXX281]": 32514, "[XXXXX451]": 32684, "[XXXXX483]": 32716, "[XXXXX63]": 32296, "[XXXXX161]": 32394, "[XXXXX482]": 32715, "[XXXXX123]": 32356, "[XXXXX448]": 32681, "[XXXXX163]": 32396, "[XXXXX452]": 32685, "[XXXXX214]": 32447, "[XXXXX145]": 32378, "[XXXXX401]": 32634, "[XXXXX229]": 32462, "[XXXXX379]": 32612, "[XXXXX460]": 32693, "[XXXXX170]": 32403, "[XXXXX462]": 32695, "[XXXXX263]": 32496, "[XXXXX420]": 32653, "[XXXXX86]": 32319, "[XXXXX391]": 32624, "[XXXXX364]": 32597, "[XXXXX69]": 32302, "[XXXXX164]": 32397, "[XXXXX327]": 32560, "[XXXXX425]": 32658, "[XXXXX493]": 32726, "[XXXXX498]": 32731, "[XXXXX517]": 32750, "[XXXXX447]": 32680, "[XXXXX393]": 32626, "[XXXXX457]": 32690, "[XXXXX107]": 32340, "[XXXXX232]": 32465, "[XXXXX399]": 32632, "[XXXXX426]": 32659, "[XXXXX36]": 32269, "[XXXXX159]": 32392, "[XXXXX24]": 32257, "[XXXXX16]": 32249, "[XXXXX443]": 32676, "[XXXXX389]": 32622, "[XXXXX210]": 32443, "[XXXXX279]": 32512, "[XXXXX199]": 32432, "[XXXXX530]": 32763, "[XXXXX433]": 32666, "[XXXXX269]": 32502, "[XXXXX533]": 32766, "[XXXXX359]": 32592, "[XXXXX349]": 32582, "[XXXXX226]": 32459, "[XXXXX12]": 32245, "[XXXXX99]": 32332, "[XXXXX56]": 32289, "[XXXXX169]": 32402, "[XXXXX424]": 32657, "[XXXXX171]": 32404, "[XXXXX340]": 32573, "[XXXXX255]": 32488, "[XXXXX257]": 32490, "[XXXXX301]": 32534, "[XXXXX193]": 32426, "[XXXXX178]": 32411, "[XXXXX407]": 32640, "[XXXXX267]": 32500, "[XXXXX240]": 32473, "[XXXXX101]": 32334, "[XXXXX188]": 32421, "[XXXXX81]": 32314, "[XXXXX83]": 32316, "[XXXXX146]": 32379, "[XXXXX531]": 32764, "[XXXXX480]": 32713, "[XXXXX121]": 32354, "[XXXXX384]": 32617, "[XXXXX479]": 32712, "[XXXXX102]": 32335, "[XXXXX236]": 32469, "[XXXXX142]": 32375, "[XXXXX98]": 32331, "[XXXXX300]": 32533, "[XXXXX34]": 32267, "[XXXXX205]": 32438, "[XXXXX174]": 32407, "[XXXXX62]": 32295, "[XXXXX221]": 32454, "[XXXXX449]": 32682, "[XXXXX477]": 32710, "[XXXXX299]": 32532, "[XXXXX487]": 32720, "[XXXXX87]": 32320, "[XXXXX79]": 32312, "[XXXXX354]": 32587, "[XXXXX504]": 32737, "[XXXXX529]": 32762, "[XXXXX413]": 32646, "[XXXXX110]": 32343, "[XXXXX179]": 32412, "[XXXXX440]": 32673, "[XXXXX422]": 32655, "[XXXXX139]": 32372, "[XXXXX26]": 32259, "[XXXXX524]": 32757, "[XXXXX203]": 32436, "[XXXXX350]": 32583, "[XXXXX516]": 32749, "[XXXXX64]": 32297, "[XXXXX387]": 32620, "[XXXXX435]": 32668, "[XXXXX3]": 32236, "[XXXXX149]": 32382, "[XXXXX431]": 32664, "[XXXXX268]": 32501, "[XXXXX117]": 32350, "[XXXXX133]": 32366, "[XXXXX118]": 32351, "[XXXXX360]": 32593, "[XXXXX501]": 32734, "[XXXXX497]": 32730, "[XXXXX33]": 32266, "[XXXXX67]": 32300, "[XXXXX80]": 32313, "[XXXXX377]": 32610, "[XXXXX158]": 32391, "[XXXXX469]": 32702, "[XXXXX367]": 32600, "[XXXXX220]": 32453, "[XXXXX450]": 32683, "[XXXXX297]": 32530, "[XXXXX325]": 32558, "[XXXXX382]": 32615, "[XXXXX271]": 32504, "[XXXXX406]": 32639, "[XXXXX430]": 32663, "[XXXXX463]": 32696, "[XXXXX197]": 32430, "[XXXXX346]": 32579, "[XXXXX286]": 32519, "[XXXXX335]": 32568, "[XXXXX282]": 32515, "[XXXXX321]": 32554, "[XXXXX185]": 32418, "[XXXXX441]": 32674, "[XXXXX138]": 32371, "[XXXXX19]": 32252, "[XXXXX417]": 32650, "[XXXXX351]": 32584, "[XXXXX239]": 32472, "[XXXXX510]": 32743, "[XXXXX143]": 32376, "[XXXXX496]": 32729, "[XXXXX13]": 32246, "[XXXXX284]": 32517, "[XXXXX92]": 32325, "[XXXXX14]": 32247, "[XXXXX388]": 32621, "[XXXXX526]": 32759, "[XXXXX315]": 32548, "[XXXXX317]": 32550, "[XXXXX190]": 32423, "[XXXXX369]": 32602, "[XXXXX444]": 32677, "[XXXXX356]": 32589, "[XXXXX326]": 32559, "[XXXXX97]": 32330, "[XXXXX126]": 32359, "[XXXXX189]": 32422, "[XXXXX39]": 32272, "[XXXXX273]": 32506, "[XXXXX84]": 32317, "[XXXXX520]": 32753, "[XXXXX47]": 32280, "[XXXXX421]": 32654, "[XXXXX8]": 32241, "[XXXXX519]": 32752, "[XXXXX500]": 32733, "[XXXXX88]": 32321, "[XXXXX68]": 32301, "[XXXXX154]": 32387, "[XXXXX173]": 32406, "[XXXXX11]": 32244, "[XXXXX454]": 32687, "[XXXXX75]": 32308, "[XXXXX334]": 32567, "[MASK]": 32232, "[XXXXX453]": 32686, "[XXXXX119]": 32352, "[XXXXX513]": 32746, "[XXXXX20]": 32253, "[XXXXX207]": 32440, "[XXXXX18]": 32251, "[XXXXX507]": 32740, "[XXXXX198]": 32431, "[XXXXX475]": 32708, "[XXXXX294]": 32527, "[XXXXX366]": 32599, "[XXXXX412]": 32645, "[XXXXX534]": 32767, "[XXXXX58]": 32291, "[XXXXX337]": 32570, "[XXXXX280]": 32513, "[XXXXX278]": 32511, "[XXXXX131]": 32364, "[XXXXX445]": 32678, "[XXXXX465]": 32698, "[XXXXX438]": 32671, "[XXXXX488]": 32721, "[XXXXX423]": 32656, "[XXXXX473]": 32706, "[XXXXX485]": 32718, "[XXXXX176]": 32409, "[XXXXX261]": 32494, "[XXXXX338]": 32571, "[XXXXX218]": 32451, "[XXXXX182]": 32415, "[XXXXX344]": 32577, "[XXXXX186]": 32419, "[XXXXX309]": 32542, "[XXXXX274]": 32507, "[XXXXX415]": 32648, "[XXXXX373]": 32606, "[XXXXX253]": 32486, "[XXXXX490]": 32723, "[XXXXX439]": 32672, "[XXXXX177]": 32410, "[XXXXX144]": 32377, "[XXXXX180]": 32413, "[XXXXX29]": 32262, "[XXXXX195]": 32428, "[XXXXX153]": 32386, "[XXXXX28]": 32261, "[XXXXX318]": 32551, "[XXXXX380]": 32613, "[XXXXX38]": 32271, "[XXXXX162]": 32395, "[XXXXX241]": 32474, "[XXXXX303]": 32536, "[XXXXX46]": 32279, "[XXXXX494]": 32727, "[XXXXX472]": 32705, "[XXXXX296]": 32529, "[XXXXX314]": 32547, "[XXXXX503]": 32736, "[XXXXX127]": 32360, "[XXXXX66]": 32299, "[XXXXX227]": 32460, "[XXXXX285]": 32518, "[XXXXX100]": 32333, "[XXXXX259]": 32492, "[XXXXX489]": 32722, "[XXXXX222]": 32455, "[XXXXX331]": 32564, "[XXXXX181]": 32414, "[XXXXX414]": 32647, "[XXXXX148]": 32381, "[XXXXX233]": 32466, "[XXXXX54]": 32287, "[XXXXX429]": 32662, "[XXXXX108]": 32341, "[XXXXX511]": 32744, "[XXXXX53]": 32286, "[XXXXX365]": 32598, "[XXXXX94]": 32327, "[XXXXX532]": 32765, "[XXXXX342]": 32575, "[XXXXX1]": 32234, "[XXXXX328]": 32561, "[XXXXX397]": 32630, "[XXXXX514]": 32747, "[XXXXX254]": 32487, "[XXXXX196]": 32429, "[XXXXX74]": 32307, "[XXXXX392]": 32625, "[XXXXX31]": 32264, "[XXXXX293]": 32526, "[SEP]": 32230, "[XXXXX363]": 32596, "[XXXXX262]": 32495, "[XXXXX141]": 32374, "[XXXXX76]": 32309, "[XXXXX43]": 32276, "[XXXXX96]": 32329, "[XXXXX245]": 32478, "[XXXXX361]": 32594, "[XXXXX411]": 32644, "[XXXXX509]": 32742, "[XXXXX15]": 32248, "[XXXXX446]": 32679, "[XXXXX476]": 32709, "[XXXXX456]": 32689, "[XXXXX404]": 32637, "[XXXXX345]": 32578, "[XXXXX187]": 32420, "[CLS]": 32229, "[XXXXX30]": 32263, "[XXXXX428]": 32661, "[XXXXX419]": 32652, "[XXXXX355]": 32588, "[XXXXX50]": 32283, "[XXXXX184]": 32417, "[XXXXX57]": 32290, "[XXXXX147]": 32380, "[XXXXX324]": 32557, "[XXXXX288]": 32521, "[XXXXX9]": 32242, "[XXXXX339]": 32572, "[XXXXX398]": 32631, "[XXXXX458]": 32691, "[XXXXX405]": 32638, "[XXXXX103]": 32336, "[XXXXX442]": 32675, "[XXXXX332]": 32565, "[XXXXX109]": 32342, "[XXXXX347]": 32580, "[XXXXX400]": 32633, "[XXXXX7]": 32240, "[XXXXX216]": 32449, "[XXXXX157]": 32390, "[XXXXX242]": 32475, "[XXXXX316]": 32549, "[XXXXX467]": 32700, "[XXXXX468]": 32701, "[XXXXX358]": 32591, "[XXXXX21]": 32254, "[XXXXX231]": 32464, "[XXXXX470]": 32703, "[XXXXX71]": 32304, "[XXXXX155]": 32388, "[XXXXX495]": 32728, "[XXXXX395]": 32628, "[XXXXX371]": 32604, "[XXXXX243]": 32476, "[XXXXX256]": 32489, "[XXXXX499]": 32732, "[XXXXX234]": 32467, "[XXXXX200]": 32433, "[XXXXX323]": 32556, "[XXXXX525]": 32758, "[XXXXX528]": 32761, "[XXXXX82]": 32315, "[XXXXX319]": 32552, "[XXXXX219]": 32452, "[XXXXX122]": 32355, "[XXXXX35]": 32268, "[XXXXX59]": 32292, "[XXXXX478]": 32711, "[XXXXX295]": 32528, "[XXXXX166]": 32399, "[XXXXX459]": 32692, "[XXXXX168]": 32401, "[XXXXX211]": 32444, "[XXXXX91]": 32324, "[XXXXX292]": 32525, "[XXXXX208]": 32441, "[XXXXX42]": 32275, "[XXXXX461]": 32694, "[XXXXX191]": 32424, "[XXXXX403]": 32636, "[XXXXX370]": 32603, "[XXXXX312]": 32545, "[XXXXX376]": 32609, "[XXXXX374]": 32607, "[XXXXX4]": 32237, "[XXXXX522]": 32755, "[XXXXX492]": 32725, "[XXXXX353]": 32586, "[XXXXX60]": 32293, "[XXXXX302]": 32535, "[XXXXX311]": 32544, "[XXXXX49]": 32282, "[XXXXX502]": 32735, "[XXXXX409]": 32642, "[XXXXX251]": 32484, "[XXXXX272]": 32505, "[XXXXX116]": 32349, "[XXXXX506]": 32739, "[XXXXX320]": 32553, "[XXXXX237]": 32470, "[XXXXX270]": 32503, "[XXXXX156]": 32389, "[XXXXX508]": 32741, "[XXXXX402]": 32635, "[XXXXX437]": 32670, "[XXXXX137]": 32370, "[XXXXX172]": 32405, "[XXXXX322]": 32555, "[XXXXX51]": 32284, "[XXXXX73]": 32306}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[XXXXX0]", "[XXXXX1]", "[XXXXX2]", "[XXXXX3]", "[XXXXX4]", "[XXXXX5]", "[XXXXX6]", "[XXXXX7]", "[XXXXX8]", "[XXXXX9]", "[XXXXX10]", "[XXXXX11]", "[XXXXX12]", "[XXXXX13]", "[XXXXX14]", "[XXXXX15]", "[XXXXX16]", "[XXXXX17]", "[XXXXX18]", "[XXXXX19]", "[XXXXX20]", "[XXXXX21]", "[XXXXX22]", "[XXXXX23]", "[XXXXX24]", "[XXXXX25]", "[XXXXX26]", "[XXXXX27]", "[XXXXX28]", "[XXXXX29]", "[XXXXX30]", "[XXXXX31]", "[XXXXX32]", "[XXXXX33]", "[XXXXX34]", "[XXXXX35]", "[XXXXX36]", "[XXXXX37]", "[XXXXX38]", "[XXXXX39]", "[XXXXX40]", "[XXXXX41]", "[XXXXX42]", "[XXXXX43]", "[XXXXX44]", "[XXXXX45]", "[XXXXX46]", "[XXXXX47]", "[XXXXX48]", "[XXXXX49]", "[XXXXX50]", "[XXXXX51]", "[XXXXX52]", "[XXXXX53]", "[XXXXX54]", "[XXXXX55]", "[XXXXX56]", "[XXXXX57]", "[XXXXX58]", "[XXXXX59]", "[XXXXX60]", "[XXXXX61]", "[XXXXX62]", "[XXXXX63]", "[XXXXX64]", "[XXXXX65]", "[XXXXX66]", "[XXXXX67]", "[XXXXX68]", "[XXXXX69]", "[XXXXX70]", "[XXXXX71]", "[XXXXX72]", "[XXXXX73]", "[XXXXX74]", "[XXXXX75]", "[XXXXX76]", "[XXXXX77]", "[XXXXX78]", "[XXXXX79]", "[XXXXX80]", "[XXXXX81]", "[XXXXX82]", "[XXXXX83]", "[XXXXX84]", "[XXXXX85]", "[XXXXX86]", "[XXXXX87]", "[XXXXX88]", "[XXXXX89]", "[XXXXX90]", "[XXXXX91]", "[XXXXX92]", "[XXXXX93]", "[XXXXX94]", "[XXXXX95]", "[XXXXX96]", "[XXXXX97]", "[XXXXX98]", "[XXXXX99]", "[XXXXX100]", "[XXXXX101]", "[XXXXX102]", "[XXXXX103]", "[XXXXX104]", "[XXXXX105]", "[XXXXX106]", "[XXXXX107]", "[XXXXX108]", "[XXXXX109]", "[XXXXX110]", "[XXXXX111]", "[XXXXX112]", "[XXXXX113]", "[XXXXX114]", "[XXXXX115]", "[XXXXX116]", "[XXXXX117]", "[XXXXX118]", "[XXXXX119]", "[XXXXX120]", "[XXXXX121]", "[XXXXX122]", "[XXXXX123]", "[XXXXX124]", "[XXXXX125]", "[XXXXX126]", "[XXXXX127]", "[XXXXX128]", "[XXXXX129]", "[XXXXX130]", "[XXXXX131]", "[XXXXX132]", "[XXXXX133]", "[XXXXX134]", "[XXXXX135]", "[XXXXX136]", "[XXXXX137]", "[XXXXX138]", "[XXXXX139]", "[XXXXX140]", "[XXXXX141]", "[XXXXX142]", "[XXXXX143]", "[XXXXX144]", "[XXXXX145]", "[XXXXX146]", "[XXXXX147]", "[XXXXX148]", "[XXXXX149]", "[XXXXX150]", "[XXXXX151]", "[XXXXX152]", "[XXXXX153]", "[XXXXX154]", "[XXXXX155]", "[XXXXX156]", "[XXXXX157]", "[XXXXX158]", "[XXXXX159]", "[XXXXX160]", "[XXXXX161]", "[XXXXX162]", "[XXXXX163]", "[XXXXX164]", "[XXXXX165]", "[XXXXX166]", "[XXXXX167]", "[XXXXX168]", "[XXXXX169]", "[XXXXX170]", "[XXXXX171]", "[XXXXX172]", "[XXXXX173]", "[XXXXX174]", "[XXXXX175]", "[XXXXX176]", "[XXXXX177]", "[XXXXX178]", "[XXXXX179]", "[XXXXX180]", "[XXXXX181]", "[XXXXX182]", "[XXXXX183]", "[XXXXX184]", "[XXXXX185]", "[XXXXX186]", "[XXXXX187]", "[XXXXX188]", "[XXXXX189]", "[XXXXX190]", "[XXXXX191]", "[XXXXX192]", "[XXXXX193]", "[XXXXX194]", "[XXXXX195]", "[XXXXX196]", "[XXXXX197]", "[XXXXX198]", "[XXXXX199]", "[XXXXX200]", "[XXXXX201]", "[XXXXX202]", "[XXXXX203]", "[XXXXX204]", "[XXXXX205]", "[XXXXX206]", "[XXXXX207]", "[XXXXX208]", "[XXXXX209]", "[XXXXX210]", "[XXXXX211]", "[XXXXX212]", "[XXXXX213]", "[XXXXX214]", "[XXXXX215]", "[XXXXX216]", "[XXXXX217]", "[XXXXX218]", "[XXXXX219]", "[XXXXX220]", "[XXXXX221]", "[XXXXX222]", "[XXXXX223]", "[XXXXX224]", "[XXXXX225]", "[XXXXX226]", "[XXXXX227]", "[XXXXX228]", "[XXXXX229]", "[XXXXX230]", "[XXXXX231]", "[XXXXX232]", "[XXXXX233]", "[XXXXX234]", "[XXXXX235]", "[XXXXX236]", "[XXXXX237]", "[XXXXX238]", "[XXXXX239]", "[XXXXX240]", "[XXXXX241]", "[XXXXX242]", "[XXXXX243]", "[XXXXX244]", "[XXXXX245]", "[XXXXX246]", "[XXXXX247]", "[XXXXX248]", "[XXXXX249]", "[XXXXX250]", "[XXXXX251]", "[XXXXX252]", "[XXXXX253]", "[XXXXX254]", "[XXXXX255]", "[XXXXX256]", "[XXXXX257]", "[XXXXX258]", "[XXXXX259]", "[XXXXX260]", "[XXXXX261]", "[XXXXX262]", "[XXXXX263]", "[XXXXX264]", "[XXXXX265]", "[XXXXX266]", "[XXXXX267]", "[XXXXX268]", "[XXXXX269]", "[XXXXX270]", "[XXXXX271]", "[XXXXX272]", "[XXXXX273]", "[XXXXX274]", "[XXXXX275]", "[XXXXX276]", "[XXXXX277]", "[XXXXX278]", "[XXXXX279]", "[XXXXX280]", "[XXXXX281]", "[XXXXX282]", "[XXXXX283]", "[XXXXX284]", "[XXXXX285]", "[XXXXX286]", "[XXXXX287]", "[XXXXX288]", "[XXXXX289]", "[XXXXX290]", "[XXXXX291]", "[XXXXX292]", "[XXXXX293]", "[XXXXX294]", "[XXXXX295]", "[XXXXX296]", "[XXXXX297]", "[XXXXX298]", "[XXXXX299]", "[XXXXX300]", "[XXXXX301]", "[XXXXX302]", "[XXXXX303]", "[XXXXX304]", "[XXXXX305]", "[XXXXX306]", "[XXXXX307]", "[XXXXX308]", "[XXXXX309]", "[XXXXX310]", "[XXXXX311]", "[XXXXX312]", "[XXXXX313]", "[XXXXX314]", "[XXXXX315]", "[XXXXX316]", "[XXXXX317]", "[XXXXX318]", "[XXXXX319]", "[XXXXX320]", "[XXXXX321]", "[XXXXX322]", "[XXXXX323]", "[XXXXX324]", "[XXXXX325]", "[XXXXX326]", "[XXXXX327]", "[XXXXX328]", "[XXXXX329]", "[XXXXX330]", "[XXXXX331]", "[XXXXX332]", "[XXXXX333]", "[XXXXX334]", "[XXXXX335]", "[XXXXX336]", "[XXXXX337]", "[XXXXX338]", "[XXXXX339]", "[XXXXX340]", "[XXXXX341]", "[XXXXX342]", "[XXXXX343]", "[XXXXX344]", "[XXXXX345]", "[XXXXX346]", "[XXXXX347]", "[XXXXX348]", "[XXXXX349]", "[XXXXX350]", "[XXXXX351]", "[XXXXX352]", "[XXXXX353]", "[XXXXX354]", "[XXXXX355]", "[XXXXX356]", "[XXXXX357]", "[XXXXX358]", "[XXXXX359]", "[XXXXX360]", "[XXXXX361]", "[XXXXX362]", "[XXXXX363]", "[XXXXX364]", "[XXXXX365]", "[XXXXX366]", "[XXXXX367]", "[XXXXX368]", "[XXXXX369]", "[XXXXX370]", "[XXXXX371]", "[XXXXX372]", "[XXXXX373]", "[XXXXX374]", "[XXXXX375]", "[XXXXX376]", "[XXXXX377]", "[XXXXX378]", "[XXXXX379]", "[XXXXX380]", "[XXXXX381]", "[XXXXX382]", "[XXXXX383]", "[XXXXX384]", "[XXXXX385]", "[XXXXX386]", "[XXXXX387]", "[XXXXX388]", "[XXXXX389]", "[XXXXX390]", "[XXXXX391]", "[XXXXX392]", "[XXXXX393]", "[XXXXX394]", "[XXXXX395]", "[XXXXX396]", "[XXXXX397]", "[XXXXX398]", "[XXXXX399]", "[XXXXX400]", "[XXXXX401]", "[XXXXX402]", "[XXXXX403]", "[XXXXX404]", "[XXXXX405]", "[XXXXX406]", "[XXXXX407]", "[XXXXX408]", "[XXXXX409]", "[XXXXX410]", "[XXXXX411]", "[XXXXX412]", "[XXXXX413]", "[XXXXX414]", "[XXXXX415]", "[XXXXX416]", "[XXXXX417]", "[XXXXX418]", "[XXXXX419]", "[XXXXX420]", "[XXXXX421]", "[XXXXX422]", "[XXXXX423]", "[XXXXX424]", "[XXXXX425]", "[XXXXX426]", "[XXXXX427]", "[XXXXX428]", "[XXXXX429]", "[XXXXX430]", "[XXXXX431]", "[XXXXX432]", "[XXXXX433]", "[XXXXX434]", "[XXXXX435]", "[XXXXX436]", "[XXXXX437]", "[XXXXX438]", "[XXXXX439]", "[XXXXX440]", "[XXXXX441]", "[XXXXX442]", "[XXXXX443]", "[XXXXX444]", "[XXXXX445]", "[XXXXX446]", "[XXXXX447]", "[XXXXX448]", "[XXXXX449]", "[XXXXX450]", "[XXXXX451]", "[XXXXX452]", "[XXXXX453]", "[XXXXX454]", "[XXXXX455]", "[XXXXX456]", "[XXXXX457]", "[XXXXX458]", "[XXXXX459]", "[XXXXX460]", "[XXXXX461]", "[XXXXX462]", "[XXXXX463]", "[XXXXX464]", "[XXXXX465]", "[XXXXX466]", "[XXXXX467]", "[XXXXX468]", "[XXXXX469]", "[XXXXX470]", "[XXXXX471]", "[XXXXX472]", "[XXXXX473]", "[XXXXX474]", "[XXXXX475]", "[XXXXX476]", "[XXXXX477]", "[XXXXX478]", "[XXXXX479]", "[XXXXX480]", "[XXXXX481]", "[XXXXX482]", "[XXXXX483]", "[XXXXX484]", "[XXXXX485]", "[XXXXX486]", "[XXXXX487]", "[XXXXX488]", "[XXXXX489]", "[XXXXX490]", "[XXXXX491]", "[XXXXX492]", "[XXXXX493]", "[XXXXX494]", "[XXXXX495]", "[XXXXX496]", "[XXXXX497]", "[XXXXX498]", "[XXXXX499]", "[XXXXX500]", "[XXXXX501]", "[XXXXX502]", "[XXXXX503]", "[XXXXX504]", "[XXXXX505]", "[XXXXX506]", "[XXXXX507]", "[XXXXX508]", "[XXXXX509]", "[XXXXX510]", "[XXXXX511]", "[XXXXX512]", "[XXXXX513]", "[XXXXX514]", "[XXXXX515]", "[XXXXX516]", "[XXXXX517]", "[XXXXX518]", "[XXXXX519]", "[XXXXX520]", "[XXXXX521]", "[XXXXX522]", "[XXXXX523]", "[XXXXX524]", "[XXXXX525]", "[XXXXX526]", "[XXXXX527]", "[XXXXX528]", "[XXXXX529]", "[XXXXX530]", "[XXXXX531]", "[XXXXX532]", "[XXXXX533]", "[XXXXX534]"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ac396cf57831af1c45382bb736245f8848d1ae9db43f932f3cc2528172d1cae
3
+ size 804525
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "__type": "AddedToken"}, "sp_model_kwargs": {}, "name_or_path": "models/5mb/tzo_latn_5mb", "model_input_names": ["input_ids", "attention_mask"], "special_tokens_map_file": "models/5mb/tzo_latn_5mb/special_tokens_map.json", "tokenizer_class": "AlbertTokenizer"}