Upload tokenizer
Browse files- added_tokens.json +5 -1
- tokenizer_config.json +32 -0
added_tokens.json
CHANGED
@@ -1538,6 +1538,8 @@
|
|
1538 |
"<|hu|>": 50286,
|
1539 |
"<|hy|>": 50312,
|
1540 |
"<|id|>": 50275,
|
|
|
|
|
1541 |
"<|is|>": 50311,
|
1542 |
"<|it|>": 50274,
|
1543 |
"<|ja|>": 50266,
|
@@ -1607,5 +1609,7 @@
|
|
1607 |
"<|yi|>": 50335,
|
1608 |
"<|yo|>": 50325,
|
1609 |
"<|yue|>": 50358,
|
1610 |
-
"<|zh|>": 50260
|
|
|
|
|
1611 |
}
|
|
|
1538 |
"<|hu|>": 50286,
|
1539 |
"<|hy|>": 50312,
|
1540 |
"<|id|>": 50275,
|
1541 |
+
"<|im_end|>": 51869,
|
1542 |
+
"<|im_start|>": 51868,
|
1543 |
"<|is|>": 50311,
|
1544 |
"<|it|>": 50274,
|
1545 |
"<|ja|>": 50266,
|
|
|
1609 |
"<|yi|>": 50335,
|
1610 |
"<|yo|>": 50325,
|
1611 |
"<|yue|>": 50358,
|
1612 |
+
"<|zh|>": 50260,
|
1613 |
+
"[END_PAD]": 51867,
|
1614 |
+
"[PAD]": 51866
|
1615 |
}
|
tokenizer_config.json
CHANGED
@@ -12872,6 +12872,38 @@
|
|
12872 |
"rstrip": false,
|
12873 |
"single_word": false,
|
12874 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12875 |
}
|
12876 |
},
|
12877 |
"additional_special_tokens": [
|
|
|
12872 |
"rstrip": false,
|
12873 |
"single_word": false,
|
12874 |
"special": false
|
12875 |
+
},
|
12876 |
+
"51866": {
|
12877 |
+
"content": "[PAD]",
|
12878 |
+
"lstrip": false,
|
12879 |
+
"normalized": true,
|
12880 |
+
"rstrip": false,
|
12881 |
+
"single_word": false,
|
12882 |
+
"special": false
|
12883 |
+
},
|
12884 |
+
"51867": {
|
12885 |
+
"content": "[END_PAD]",
|
12886 |
+
"lstrip": false,
|
12887 |
+
"normalized": true,
|
12888 |
+
"rstrip": false,
|
12889 |
+
"single_word": false,
|
12890 |
+
"special": false
|
12891 |
+
},
|
12892 |
+
"51868": {
|
12893 |
+
"content": "<|im_start|>",
|
12894 |
+
"lstrip": false,
|
12895 |
+
"normalized": true,
|
12896 |
+
"rstrip": false,
|
12897 |
+
"single_word": false,
|
12898 |
+
"special": false
|
12899 |
+
},
|
12900 |
+
"51869": {
|
12901 |
+
"content": "<|im_end|>",
|
12902 |
+
"lstrip": false,
|
12903 |
+
"normalized": true,
|
12904 |
+
"rstrip": false,
|
12905 |
+
"single_word": false,
|
12906 |
+
"special": false
|
12907 |
}
|
12908 |
},
|
12909 |
"additional_special_tokens": [
|