goldfish-models
commited on
Commit
•
b842726
1
Parent(s):
e061432
Upload snd_arab_5mb tokenizer.
Browse files- added_tokens.json +1 -0
- special_tokens_map.json +1 -0
- spiece.model +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"[XXXXX77]": 47096, "[XXXXX74]": 47093, "[XXXXX37]": 47056, "[XXXXX35]": 47054, "[XXXXX12]": 47031, "[XXXXX52]": 47071, "[XXXXX70]": 47089, "[XXXXX57]": 47076, "[XXXXX72]": 47091, "[XXXXX32]": 47051, "[XXXXX84]": 47103, "[XXXXX5]": 47024, "[XXXXX34]": 47053, "[XXXXX83]": 47102, "[XXXXX4]": 47023, "[XXXXX43]": 47062, "[XXXXX11]": 47030, "[XXXXX61]": 47080, "[XXXXX78]": 47097, "[XXXXX18]": 47037, "[XXXXX75]": 47094, "<pad>": 47017, "[XXXXX62]": 47081, "[MASK]": 47018, "[XXXXX51]": 47070, "[XXXXX29]": 47048, "[XXXXX67]": 47086, "[XXXXX45]": 47064, "[XXXXX82]": 47101, "[XXXXX30]": 47049, "[XXXXX56]": 47075, "[XXXXX3]": 47022, "[XXXXX44]": 47063, "[XXXXX66]": 47085, "[XXXXX9]": 47028, "[XXXXX27]": 47046, "[XXXXX54]": 47073, "[XXXXX49]": 47068, "[XXXXX23]": 47042, "[XXXXX42]": 47061, "[XXXXX31]": 47050, "[XXXXX65]": 47084, "[XXXXX36]": 47055, "[XXXXX38]": 47057, "[XXXXX2]": 47021, "[XXXXX25]": 47044, "[XXXXX63]": 47082, "[XXXXX39]": 47058, "[XXXXX53]": 47072, "[XXXXX46]": 47065, "[XXXXX16]": 47035, "[XXXXX15]": 47034, "[XXXXX21]": 47040, "[XXXXX68]": 47087, "[XXXXX69]": 47088, "[XXXXX81]": 47100, "[SEP]": 47016, "[XXXXX80]": 47099, "[XXXXX55]": 47074, "[XXXXX20]": 47039, "[XXXXX24]": 47043, "[XXXXX13]": 47032, "[XXXXX26]": 47045, "[XXXXX50]": 47069, "[XXXXX41]": 47060, "[XXXXX58]": 47077, "[XXXXX0]": 47019, "[CLS]": 47015, "[XXXXX19]": 47038, "[XXXXX73]": 47092, "[XXXXX28]": 47047, "[XXXXX7]": 47026, "[XXXXX22]": 47041, "[XXXXX33]": 47052, "[XXXXX17]": 47036, "[XXXXX14]": 47033, "[XXXXX79]": 47098, "[XXXXX71]": 47090, "[XXXXX40]": 47059, "[XXXXX1]": 47020, "[XXXXX48]": 47067, "[XXXXX60]": 47079, "[XXXXX76]": 47095, "[XXXXX64]": 47083, "[XXXXX10]": 47029, "[XXXXX8]": 47027, "[XXXXX47]": 47066, "[XXXXX6]": 47025, "[XXXXX59]": 47078}
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[XXXXX0]", "[XXXXX1]", "[XXXXX2]", "[XXXXX3]", "[XXXXX4]", "[XXXXX5]", "[XXXXX6]", "[XXXXX7]", "[XXXXX8]", "[XXXXX9]", "[XXXXX10]", "[XXXXX11]", "[XXXXX12]", "[XXXXX13]", "[XXXXX14]", "[XXXXX15]", "[XXXXX16]", "[XXXXX17]", "[XXXXX18]", "[XXXXX19]", "[XXXXX20]", "[XXXXX21]", "[XXXXX22]", "[XXXXX23]", "[XXXXX24]", "[XXXXX25]", "[XXXXX26]", "[XXXXX27]", "[XXXXX28]", "[XXXXX29]", "[XXXXX30]", "[XXXXX31]", "[XXXXX32]", "[XXXXX33]", "[XXXXX34]", "[XXXXX35]", "[XXXXX36]", "[XXXXX37]", "[XXXXX38]", "[XXXXX39]", "[XXXXX40]", "[XXXXX41]", "[XXXXX42]", "[XXXXX43]", "[XXXXX44]", "[XXXXX45]", "[XXXXX46]", "[XXXXX47]", "[XXXXX48]", "[XXXXX49]", "[XXXXX50]", "[XXXXX51]", "[XXXXX52]", "[XXXXX53]", "[XXXXX54]", "[XXXXX55]", "[XXXXX56]", "[XXXXX57]", "[XXXXX58]", "[XXXXX59]", "[XXXXX60]", "[XXXXX61]", "[XXXXX62]", "[XXXXX63]", "[XXXXX64]", "[XXXXX65]", "[XXXXX66]", "[XXXXX67]", "[XXXXX68]", "[XXXXX69]", "[XXXXX70]", "[XXXXX71]", "[XXXXX72]", "[XXXXX73]", "[XXXXX74]", "[XXXXX75]", "[XXXXX76]", "[XXXXX77]", "[XXXXX78]", "[XXXXX79]", "[XXXXX80]", "[XXXXX81]", "[XXXXX82]", "[XXXXX83]", "[XXXXX84]"]}
|
spiece.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f913a09acdd1582ea8336371ed29a6c1629b7a5aee4cb5e695f959033b23083c
|
3 |
+
size 1085740
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": false, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "__type": "AddedToken"}, "sp_model_kwargs": {}, "name_or_path": "models/5mb/snd_arab_5mb", "model_input_names": ["input_ids", "attention_mask"], "special_tokens_map_file": "models/5mb/snd_arab_5mb/special_tokens_map.json", "tokenizer_class": "AlbertTokenizer"}
|