Upload tokenizer
Browse files- added_tokens.json +3 -0
- tokenizer_config.json +24 -0
added_tokens.json
CHANGED
@@ -1506,6 +1506,7 @@
|
|
1506 |
"<|as|>": 50350,
|
1507 |
"<|az|>": 50304,
|
1508 |
"<|ba|>": 50355,
|
|
|
1509 |
"<|be|>": 50330,
|
1510 |
"<|bg|>": 50292,
|
1511 |
"<|bn|>": 50302,
|
@@ -1518,12 +1519,14 @@
|
|
1518 |
"<|da|>": 50285,
|
1519 |
"<|de|>": 50261,
|
1520 |
"<|el|>": 50281,
|
|
|
1521 |
"<|endoftext|>": 50257,
|
1522 |
"<|en|>": 50259,
|
1523 |
"<|es|>": 50262,
|
1524 |
"<|et|>": 50307,
|
1525 |
"<|eu|>": 50310,
|
1526 |
"<|fa|>": 50300,
|
|
|
1527 |
"<|fi|>": 50277,
|
1528 |
"<|fo|>": 50338,
|
1529 |
"<|fr|>": 50265,
|
|
|
1506 |
"<|as|>": 50350,
|
1507 |
"<|az|>": 50304,
|
1508 |
"<|ba|>": 50355,
|
1509 |
+
"<|begin_of_text|>": 51868,
|
1510 |
"<|be|>": 50330,
|
1511 |
"<|bg|>": 50292,
|
1512 |
"<|bn|>": 50302,
|
|
|
1519 |
"<|da|>": 50285,
|
1520 |
"<|de|>": 50261,
|
1521 |
"<|el|>": 50281,
|
1522 |
+
"<|end_of_text|>": 51869,
|
1523 |
"<|endoftext|>": 50257,
|
1524 |
"<|en|>": 50259,
|
1525 |
"<|es|>": 50262,
|
1526 |
"<|et|>": 50307,
|
1527 |
"<|eu|>": 50310,
|
1528 |
"<|fa|>": 50300,
|
1529 |
+
"<|finetune_right_pad_id|>": 51870,
|
1530 |
"<|fi|>": 50277,
|
1531 |
"<|fo|>": 50338,
|
1532 |
"<|fr|>": 50265,
|
tokenizer_config.json
CHANGED
@@ -12888,6 +12888,30 @@
|
|
12888 |
"rstrip": false,
|
12889 |
"single_word": false,
|
12890 |
"special": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12891 |
}
|
12892 |
},
|
12893 |
"additional_special_tokens": [
|
|
|
12888 |
"rstrip": false,
|
12889 |
"single_word": false,
|
12890 |
"special": false
|
12891 |
+
},
|
12892 |
+
"51868": {
|
12893 |
+
"content": "<|begin_of_text|>",
|
12894 |
+
"lstrip": false,
|
12895 |
+
"normalized": true,
|
12896 |
+
"rstrip": false,
|
12897 |
+
"single_word": false,
|
12898 |
+
"special": false
|
12899 |
+
},
|
12900 |
+
"51869": {
|
12901 |
+
"content": "<|end_of_text|>",
|
12902 |
+
"lstrip": false,
|
12903 |
+
"normalized": true,
|
12904 |
+
"rstrip": false,
|
12905 |
+
"single_word": false,
|
12906 |
+
"special": false
|
12907 |
+
},
|
12908 |
+
"51870": {
|
12909 |
+
"content": "<|finetune_right_pad_id|>",
|
12910 |
+
"lstrip": false,
|
12911 |
+
"normalized": true,
|
12912 |
+
"rstrip": false,
|
12913 |
+
"single_word": false,
|
12914 |
+
"special": false
|
12915 |
}
|
12916 |
},
|
12917 |
"additional_special_tokens": [
|