goldfish-models commited on
Commit
0ac7fb2
1 Parent(s): 6a7e621

Upload eng_latn_5mb tokenizer.

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[XXXXX87]": 44757, "[XXXXX381]": 45051, "[XXXXX245]": 44915, "[XXXXX137]": 44807, "[XXXXX217]": 44887, "[XXXXX211]": 44881, "[XXXXX139]": 44809, "[XXXXX29]": 44699, "[XXXXX368]": 45038, "[XXXXX46]": 44716, "[XXXXX265]": 44935, "[XXXXX271]": 44941, "[XXXXX227]": 44897, "[XXXXX270]": 44940, "[XXXXX172]": 44842, "[XXXXX262]": 44932, "[XXXXX242]": 44912, "[XXXXX181]": 44851, "[XXXXX353]": 45023, "[XXXXX44]": 44714, "[XXXXX201]": 44871, "[XXXXX215]": 44885, "[XXXXX94]": 44764, "[XXXXX197]": 44867, "[XXXXX236]": 44906, "[XXXXX21]": 44691, "[XXXXX361]": 45031, "[XXXXX67]": 44737, "[XXXXX183]": 44853, "[XXXXX8]": 44678, "[XXXXX320]": 44990, "[XXXXX321]": 44991, "[XXXXX48]": 44718, "[XXXXX32]": 44702, "[XXXXX251]": 44921, "[XXXXX357]": 45027, "[XXXXX34]": 44704, "[XXXXX41]": 44711, "[XXXXX186]": 44856, "[XXXXX138]": 44808, "[XXXXX1]": 44671, "[XXXXX339]": 45009, "[XXXXX182]": 44852, "[XXXXX52]": 44722, "[XXXXX288]": 44958, "[XXXXX250]": 44920, "[XXXXX93]": 44763, "[XXXXX376]": 45046, "[XXXXX344]": 45014, "[XXXXX153]": 44823, "[XXXXX163]": 44833, "[XXXXX188]": 44858, "[XXXXX55]": 44725, "[XXXXX173]": 44843, "[XXXXX202]": 44872, "[XXXXX184]": 44854, "[XXXXX281]": 44951, "[XXXXX135]": 44805, "[XXXXX148]": 44818, "[XXXXX19]": 44689, "[XXXXX6]": 44676, "[XXXXX7]": 44677, "[XXXXX334]": 45004, "[XXXXX103]": 44773, "[XXXXX330]": 45000, "[XXXXX214]": 44884, "[XXXXX310]": 44980, "[XXXXX20]": 44690, "[XXXXX348]": 45018, "[XXXXX49]": 44719, "[XXXXX316]": 44986, "[XXXXX35]": 44705, "[XXXXX266]": 44936, "[XXXXX158]": 44828, "[XXXXX125]": 44795, "[XXXXX2]": 44672, "[XXXXX324]": 44994, "[XXXXX241]": 44911, "[XXXXX350]": 45020, "[XXXXX168]": 44838, "[XXXXX119]": 44789, "[XXXXX170]": 44840, "[XXXXX23]": 44693, "[XXXXX366]": 45036, "[XXXXX149]": 44819, "[XXXXX54]": 44724, "[XXXXX280]": 44950, "[XXXXX343]": 45013, "[XXXXX31]": 44701, "[XXXXX16]": 44686, "[XXXXX253]": 44923, "[XXXXX126]": 44796, "[XXXXX301]": 44971, "[XXXXX74]": 44744, "[XXXXX0]": 44670, "[XXXXX275]": 44945, "[XXXXX40]": 44710, "[SEP]": 44667, "[XXXXX286]": 44956, "[XXXXX349]": 45019, "[XXXXX65]": 44735, "[XXXXX83]": 44753, "[XXXXX102]": 44772, "[XXXXX375]": 45045, "[XXXXX273]": 44943, "[XXXXX39]": 44709, "[XXXXX192]": 44862, "[XXXXX10]": 44680, "[XXXXX369]": 45039, "[XXXXX64]": 44734, "[XXXXX185]": 44855, "[XXXXX277]": 44947, "<pad>": 44668, "[XXXXX384]": 45054, "[XXXXX287]": 44957, "[XXXXX13]": 44683, "[XXXXX18]": 44688, "[XXXXX112]": 44782, "[XXXXX333]": 45003, "[XXXXX66]": 44736, "[XXXXX187]": 44857, "[XXXXX146]": 44816, "[XXXXX47]": 44717, "[XXXXX171]": 44841, "[XXXXX264]": 44934, "[XXXXX340]": 45010, "[XXXXX239]": 44909, "[XXXXX77]": 44747, "[XXXXX69]": 44739, "[XXXXX89]": 44759, "[XXXXX319]": 44989, "[XXXXX248]": 44918, "[XXXXX79]": 44749, "[XXXXX150]": 44820, "[XXXXX272]": 44942, "[XXXXX383]": 45053, "[XXXXX327]": 44997, "[XXXXX152]": 44822, "[XXXXX43]": 44713, "[XXXXX130]": 44800, "[XXXXX12]": 44682, "[XXXXX374]": 45044, "[MASK]": 44669, "[XXXXX335]": 45005, "[XXXXX37]": 44707, "[XXXXX80]": 44750, "[XXXXX326]": 44996, "[XXXXX88]": 44758, "[XXXXX151]": 44821, "[XXXXX92]": 44762, "[XXXXX317]": 44987, "[XXXXX282]": 44952, "[XXXXX284]": 44954, "[XXXXX91]": 44761, "[XXXXX78]": 44748, "[XXXXX36]": 44706, "[XXXXX155]": 44825, "[XXXXX45]": 44715, "[XXXXX72]": 44742, "[XXXXX243]": 44913, "[XXXXX60]": 44730, "[XXXXX27]": 44697, "[XXXXX169]": 44839, "[XXXXX209]": 44879, "[XXXXX189]": 44859, "[XXXXX143]": 44813, "[XXXXX107]": 44777, "[XXXXX165]": 44835, "[XXXXX208]": 44878, "[XXXXX225]": 44895, "[XXXXX68]": 44738, "[XXXXX290]": 44960, "[XXXXX252]": 44922, "[XXXXX279]": 44949, "[XXXXX15]": 44685, "[XXXXX161]": 44831, "[XXXXX297]": 44967, "[XXXXX206]": 44876, "[XXXXX256]": 44926, "[XXXXX133]": 44803, "[XXXXX3]": 44673, "[XXXXX51]": 44721, "[XXXXX33]": 44703, "[XXXXX331]": 45001, "[XXXXX244]": 44914, "[XXXXX230]": 44900, "[XXXXX128]": 44798, "[XXXXX346]": 45016, "[XXXXX132]": 44802, "[XXXXX322]": 44992, "[XXXXX342]": 45012, "[XXXXX296]": 44966, "[XXXXX120]": 44790, "[XXXXX258]": 44928, "[XXXXX207]": 44877, "[XXXXX63]": 44733, "[XXXXX278]": 44948, "[XXXXX362]": 45032, "[XXXXX249]": 44919, "[XXXXX159]": 44829, "[XXXXX314]": 44984, "[XXXXX194]": 44864, "[XXXXX303]": 44973, "[XXXXX84]": 44754, "[XXXXX274]": 44944, "[XXXXX354]": 45024, "[XXXXX358]": 45028, "[XXXXX26]": 44696, "[XXXXX226]": 44896, "[XXXXX82]": 44752, "[XXXXX307]": 44977, "[XXXXX59]": 44729, "[XXXXX283]": 44953, "[CLS]": 44666, "[XXXXX9]": 44679, "[XXXXX116]": 44786, "[XXXXX177]": 44847, "[XXXXX338]": 45008, "[XXXXX123]": 44793, "[XXXXX106]": 44776, "[XXXXX190]": 44860, "[XXXXX142]": 44812, "[XXXXX238]": 44908, "[XXXXX164]": 44834, "[XXXXX233]": 44903, "[XXXXX178]": 44848, "[XXXXX276]": 44946, "[XXXXX193]": 44863, "[XXXXX50]": 44720, "[XXXXX203]": 44873, "[XXXXX246]": 44916, "[XXXXX121]": 44791, "[XXXXX154]": 44824, "[XXXXX144]": 44814, "[XXXXX267]": 44937, "[XXXXX302]": 44972, "[XXXXX11]": 44681, "[XXXXX380]": 45050, "[XXXXX76]": 44746, "[XXXXX198]": 44868, "[XXXXX289]": 44959, "[XXXXX359]": 45029, "[XXXXX212]": 44882, "[XXXXX17]": 44687, "[XXXXX129]": 44799, "[XXXXX268]": 44938, "[XXXXX223]": 44893, "[XXXXX156]": 44826, "[XXXXX298]": 44968, "[XXXXX57]": 44727, "[XXXXX311]": 44981, "[XXXXX42]": 44712, "[XXXXX325]": 44995, "[XXXXX105]": 44775, "[XXXXX22]": 44692, "[XXXXX237]": 44907, "[XXXXX347]": 45017, "[XXXXX176]": 44846, "[XXXXX53]": 44723, "[XXXXX122]": 44792, "[XXXXX263]": 44933, "[XXXXX351]": 45021, "[XXXXX134]": 44804, "[XXXXX385]": 45055, "[XXXXX99]": 44769, "[XXXXX210]": 44880, "[XXXXX118]": 44788, "[XXXXX71]": 44741, "[XXXXX269]": 44939, "[XXXXX213]": 44883, "[XXXXX259]": 44929, "[XXXXX300]": 44970, "[XXXXX363]": 45033, "[XXXXX372]": 45042, "[XXXXX56]": 44726, "[XXXXX309]": 44979, "[XXXXX114]": 44784, "[XXXXX306]": 44976, "[XXXXX180]": 44850, "[XXXXX291]": 44961, "[XXXXX166]": 44836, "[XXXXX221]": 44891, "[XXXXX96]": 44766, "[XXXXX255]": 44925, "[XXXXX204]": 44874, "[XXXXX160]": 44830, "[XXXXX28]": 44698, "[XXXXX304]": 44974, "[XXXXX294]": 44964, "[XXXXX200]": 44870, "[XXXXX5]": 44675, "[XXXXX145]": 44815, "[XXXXX97]": 44767, "[XXXXX24]": 44694, "[XXXXX292]": 44962, "[XXXXX285]": 44955, "[XXXXX195]": 44865, "[XXXXX355]": 45025, "[XXXXX174]": 44844, "[XXXXX235]": 44905, "[XXXXX371]": 45041, "[XXXXX115]": 44785, "[XXXXX231]": 44901, "[XXXXX336]": 45006, "[XXXXX147]": 44817, "[XXXXX315]": 44985, "[XXXXX337]": 45007, "[XXXXX254]": 44924, "[XXXXX257]": 44927, "[XXXXX191]": 44861, "[XXXXX377]": 45047, "[XXXXX162]": 44832, "[XXXXX247]": 44917, "[XXXXX196]": 44866, "[XXXXX356]": 45026, "[XXXXX260]": 44930, "[XXXXX179]": 44849, "[XXXXX312]": 44982, "[XXXXX175]": 44845, "[XXXXX367]": 45037, "[XXXXX295]": 44965, "[XXXXX218]": 44888, "[XXXXX30]": 44700, "[XXXXX110]": 44780, "[XXXXX38]": 44708, "[XXXXX62]": 44732, "[XXXXX323]": 44993, "[XXXXX4]": 44674, "[XXXXX364]": 45034, "[XXXXX141]": 44811, "[XXXXX61]": 44731, "[XXXXX73]": 44743, "[XXXXX100]": 44770, "[XXXXX220]": 44890, "[XXXXX293]": 44963, "[XXXXX81]": 44751, "[XXXXX140]": 44810, "[XXXXX329]": 44999, "[XXXXX228]": 44898, "[XXXXX136]": 44806, "[XXXXX205]": 44875, "[XXXXX299]": 44969, "[XXXXX360]": 45030, "[XXXXX25]": 44695, "[XXXXX308]": 44978, "[XXXXX14]": 44684, "[XXXXX240]": 44910, "[XXXXX382]": 45052, "[XXXXX232]": 44902, "[XXXXX219]": 44889, "[XXXXX58]": 44728, "[XXXXX229]": 44899, "[XXXXX157]": 44827, "[XXXXX124]": 44794, "[XXXXX113]": 44783, "[XXXXX111]": 44781, "[XXXXX199]": 44869, "[XXXXX234]": 44904, "[XXXXX85]": 44755, "[XXXXX379]": 45049, "[XXXXX86]": 44756, "[XXXXX365]": 45035, "[XXXXX104]": 44774, "[XXXXX373]": 45043, "[XXXXX370]": 45040, "[XXXXX332]": 45002, "[XXXXX131]": 44801, "[XXXXX95]": 44765, "[XXXXX109]": 44779, "[XXXXX313]": 44983, "[XXXXX108]": 44778, "[XXXXX70]": 44740, "[XXXXX216]": 44886, "[XXXXX352]": 45022, "[XXXXX341]": 45011, "[XXXXX222]": 44892, "[XXXXX167]": 44837, "[XXXXX75]": 44745, "[XXXXX318]": 44988, "[XXXXX378]": 45048, "[XXXXX345]": 45015, "[XXXXX101]": 44771, "[XXXXX305]": 44975, "[XXXXX117]": 44787, "[XXXXX224]": 44894, "[XXXXX98]": 44768, "[XXXXX328]": 44998, "[XXXXX261]": 44931, "[XXXXX127]": 44797, "[XXXXX90]": 44760}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[XXXXX0]", "[XXXXX1]", "[XXXXX2]", "[XXXXX3]", "[XXXXX4]", "[XXXXX5]", "[XXXXX6]", "[XXXXX7]", "[XXXXX8]", "[XXXXX9]", "[XXXXX10]", "[XXXXX11]", "[XXXXX12]", "[XXXXX13]", "[XXXXX14]", "[XXXXX15]", "[XXXXX16]", "[XXXXX17]", "[XXXXX18]", "[XXXXX19]", "[XXXXX20]", "[XXXXX21]", "[XXXXX22]", "[XXXXX23]", "[XXXXX24]", "[XXXXX25]", "[XXXXX26]", "[XXXXX27]", "[XXXXX28]", "[XXXXX29]", "[XXXXX30]", "[XXXXX31]", "[XXXXX32]", "[XXXXX33]", "[XXXXX34]", "[XXXXX35]", "[XXXXX36]", "[XXXXX37]", "[XXXXX38]", "[XXXXX39]", "[XXXXX40]", "[XXXXX41]", "[XXXXX42]", "[XXXXX43]", "[XXXXX44]", "[XXXXX45]", "[XXXXX46]", "[XXXXX47]", "[XXXXX48]", "[XXXXX49]", "[XXXXX50]", "[XXXXX51]", "[XXXXX52]", "[XXXXX53]", "[XXXXX54]", "[XXXXX55]", "[XXXXX56]", "[XXXXX57]", "[XXXXX58]", "[XXXXX59]", "[XXXXX60]", "[XXXXX61]", "[XXXXX62]", "[XXXXX63]", "[XXXXX64]", "[XXXXX65]", "[XXXXX66]", "[XXXXX67]", "[XXXXX68]", "[XXXXX69]", "[XXXXX70]", "[XXXXX71]", "[XXXXX72]", "[XXXXX73]", "[XXXXX74]", "[XXXXX75]", "[XXXXX76]", "[XXXXX77]", "[XXXXX78]", "[XXXXX79]", "[XXXXX80]", "[XXXXX81]", "[XXXXX82]", "[XXXXX83]", "[XXXXX84]", "[XXXXX85]", "[XXXXX86]", "[XXXXX87]", "[XXXXX88]", "[XXXXX89]", "[XXXXX90]", "[XXXXX91]", "[XXXXX92]", "[XXXXX93]", "[XXXXX94]", "[XXXXX95]", "[XXXXX96]", "[XXXXX97]", "[XXXXX98]", "[XXXXX99]", "[XXXXX100]", "[XXXXX101]", "[XXXXX102]", "[XXXXX103]", "[XXXXX104]", "[XXXXX105]", "[XXXXX106]", "[XXXXX107]", "[XXXXX108]", "[XXXXX109]", "[XXXXX110]", "[XXXXX111]", "[XXXXX112]", "[XXXXX113]", "[XXXXX114]", "[XXXXX115]", "[XXXXX116]", "[XXXXX117]", "[XXXXX118]", "[XXXXX119]", "[XXXXX120]", "[XXXXX121]", "[XXXXX122]", "[XXXXX123]", "[XXXXX124]", "[XXXXX125]", "[XXXXX126]", "[XXXXX127]", "[XXXXX128]", "[XXXXX129]", "[XXXXX130]", "[XXXXX131]", "[XXXXX132]", "[XXXXX133]", "[XXXXX134]", "[XXXXX135]", "[XXXXX136]", "[XXXXX137]", "[XXXXX138]", "[XXXXX139]", "[XXXXX140]", "[XXXXX141]", "[XXXXX142]", "[XXXXX143]", "[XXXXX144]", "[XXXXX145]", "[XXXXX146]", "[XXXXX147]", "[XXXXX148]", "[XXXXX149]", "[XXXXX150]", "[XXXXX151]", "[XXXXX152]", "[XXXXX153]", "[XXXXX154]", "[XXXXX155]", "[XXXXX156]", "[XXXXX157]", "[XXXXX158]", "[XXXXX159]", "[XXXXX160]", "[XXXXX161]", "[XXXXX162]", "[XXXXX163]", "[XXXXX164]", "[XXXXX165]", "[XXXXX166]", "[XXXXX167]", "[XXXXX168]", "[XXXXX169]", "[XXXXX170]", "[XXXXX171]", "[XXXXX172]", "[XXXXX173]", "[XXXXX174]", "[XXXXX175]", "[XXXXX176]", "[XXXXX177]", "[XXXXX178]", "[XXXXX179]", "[XXXXX180]", "[XXXXX181]", "[XXXXX182]", "[XXXXX183]", "[XXXXX184]", "[XXXXX185]", "[XXXXX186]", "[XXXXX187]", "[XXXXX188]", "[XXXXX189]", "[XXXXX190]", "[XXXXX191]", "[XXXXX192]", "[XXXXX193]", "[XXXXX194]", "[XXXXX195]", "[XXXXX196]", "[XXXXX197]", "[XXXXX198]", "[XXXXX199]", "[XXXXX200]", "[XXXXX201]", "[XXXXX202]", "[XXXXX203]", "[XXXXX204]", "[XXXXX205]", "[XXXXX206]", "[XXXXX207]", "[XXXXX208]", "[XXXXX209]", "[XXXXX210]", "[XXXXX211]", "[XXXXX212]", "[XXXXX213]", "[XXXXX214]", "[XXXXX215]", "[XXXXX216]", "[XXXXX217]", "[XXXXX218]", "[XXXXX219]", "[XXXXX220]", "[XXXXX221]", "[XXXXX222]", "[XXXXX223]", "[XXXXX224]", "[XXXXX225]", "[XXXXX226]", "[XXXXX227]", "[XXXXX228]", "[XXXXX229]", "[XXXXX230]", "[XXXXX231]", "[XXXXX232]", "[XXXXX233]", "[XXXXX234]", "[XXXXX235]", "[XXXXX236]", "[XXXXX237]", "[XXXXX238]", "[XXXXX239]", "[XXXXX240]", "[XXXXX241]", "[XXXXX242]", "[XXXXX243]", "[XXXXX244]", "[XXXXX245]", "[XXXXX246]", "[XXXXX247]", "[XXXXX248]", "[XXXXX249]", "[XXXXX250]", "[XXXXX251]", "[XXXXX252]", "[XXXXX253]", "[XXXXX254]", "[XXXXX255]", "[XXXXX256]", "[XXXXX257]", "[XXXXX258]", "[XXXXX259]", "[XXXXX260]", "[XXXXX261]", "[XXXXX262]", "[XXXXX263]", "[XXXXX264]", "[XXXXX265]", "[XXXXX266]", "[XXXXX267]", "[XXXXX268]", "[XXXXX269]", "[XXXXX270]", "[XXXXX271]", "[XXXXX272]", "[XXXXX273]", "[XXXXX274]", "[XXXXX275]", "[XXXXX276]", "[XXXXX277]", "[XXXXX278]", "[XXXXX279]", "[XXXXX280]", "[XXXXX281]", "[XXXXX282]", "[XXXXX283]", "[XXXXX284]", "[XXXXX285]", "[XXXXX286]", "[XXXXX287]", "[XXXXX288]", "[XXXXX289]", "[XXXXX290]", "[XXXXX291]", "[XXXXX292]", "[XXXXX293]", "[XXXXX294]", "[XXXXX295]", "[XXXXX296]", "[XXXXX297]", "[XXXXX298]", "[XXXXX299]", "[XXXXX300]", "[XXXXX301]", "[XXXXX302]", "[XXXXX303]", "[XXXXX304]", "[XXXXX305]", "[XXXXX306]", "[XXXXX307]", "[XXXXX308]", "[XXXXX309]", "[XXXXX310]", "[XXXXX311]", "[XXXXX312]", "[XXXXX313]", "[XXXXX314]", "[XXXXX315]", "[XXXXX316]", "[XXXXX317]", "[XXXXX318]", "[XXXXX319]", "[XXXXX320]", "[XXXXX321]", "[XXXXX322]", "[XXXXX323]", "[XXXXX324]", "[XXXXX325]", "[XXXXX326]", "[XXXXX327]", "[XXXXX328]", "[XXXXX329]", "[XXXXX330]", "[XXXXX331]", "[XXXXX332]", "[XXXXX333]", "[XXXXX334]", "[XXXXX335]", "[XXXXX336]", "[XXXXX337]", "[XXXXX338]", "[XXXXX339]", "[XXXXX340]", "[XXXXX341]", "[XXXXX342]", "[XXXXX343]", "[XXXXX344]", "[XXXXX345]", "[XXXXX346]", "[XXXXX347]", "[XXXXX348]", "[XXXXX349]", "[XXXXX350]", "[XXXXX351]", "[XXXXX352]", "[XXXXX353]", "[XXXXX354]", "[XXXXX355]", "[XXXXX356]", "[XXXXX357]", "[XXXXX358]", "[XXXXX359]", "[XXXXX360]", "[XXXXX361]", "[XXXXX362]", "[XXXXX363]", "[XXXXX364]", "[XXXXX365]", "[XXXXX366]", "[XXXXX367]", "[XXXXX368]", "[XXXXX369]", "[XXXXX370]", "[XXXXX371]", "[XXXXX372]", "[XXXXX373]", "[XXXXX374]", "[XXXXX375]", "[XXXXX376]", "[XXXXX377]", "[XXXXX378]", "[XXXXX379]", "[XXXXX380]", "[XXXXX381]", "[XXXXX382]", "[XXXXX383]", "[XXXXX384]", "[XXXXX385]"]}
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0287d13c7349325e73245df5fc994a8c5c28dc7f5b574a5c53933802db47769a
3
+ size 979664
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "__type": "AddedToken"}, "sp_model_kwargs": {}, "name_or_path": "models/5mb/eng_latn_5mb", "model_input_names": ["input_ids", "attention_mask"], "special_tokens_map_file": "models/5mb/eng_latn_5mb/special_tokens_map.json", "tokenizer_class": "AlbertTokenizer"}