voidful commited on
Commit
2418413
1 Parent(s): ce836e6

Upload tokenizer

Browse files
Files changed (2) hide show
  1. added_tokens.json +3 -0
  2. tokenizer_config.json +24 -0
added_tokens.json CHANGED
@@ -1506,6 +1506,7 @@
1506
  "<|as|>": 50350,
1507
  "<|az|>": 50304,
1508
  "<|ba|>": 50355,
 
1509
  "<|be|>": 50330,
1510
  "<|bg|>": 50292,
1511
  "<|bn|>": 50302,
@@ -1518,12 +1519,14 @@
1518
  "<|da|>": 50285,
1519
  "<|de|>": 50261,
1520
  "<|el|>": 50281,
 
1521
  "<|endoftext|>": 50257,
1522
  "<|en|>": 50259,
1523
  "<|es|>": 50262,
1524
  "<|et|>": 50307,
1525
  "<|eu|>": 50310,
1526
  "<|fa|>": 50300,
 
1527
  "<|fi|>": 50277,
1528
  "<|fo|>": 50338,
1529
  "<|fr|>": 50265,
 
1506
  "<|as|>": 50350,
1507
  "<|az|>": 50304,
1508
  "<|ba|>": 50355,
1509
+ "<|begin_of_text|>": 51868,
1510
  "<|be|>": 50330,
1511
  "<|bg|>": 50292,
1512
  "<|bn|>": 50302,
 
1519
  "<|da|>": 50285,
1520
  "<|de|>": 50261,
1521
  "<|el|>": 50281,
1522
+ "<|end_of_text|>": 51869,
1523
  "<|endoftext|>": 50257,
1524
  "<|en|>": 50259,
1525
  "<|es|>": 50262,
1526
  "<|et|>": 50307,
1527
  "<|eu|>": 50310,
1528
  "<|fa|>": 50300,
1529
+ "<|finetune_right_pad_id|>": 51870,
1530
  "<|fi|>": 50277,
1531
  "<|fo|>": 50338,
1532
  "<|fr|>": 50265,
tokenizer_config.json CHANGED
@@ -12888,6 +12888,30 @@
12888
  "rstrip": false,
12889
  "single_word": false,
12890
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12891
  }
12892
  },
12893
  "additional_special_tokens": [
 
12888
  "rstrip": false,
12889
  "single_word": false,
12890
  "special": false
12891
+ },
12892
+ "51868": {
12893
+ "content": "<|begin_of_text|>",
12894
+ "lstrip": false,
12895
+ "normalized": true,
12896
+ "rstrip": false,
12897
+ "single_word": false,
12898
+ "special": false
12899
+ },
12900
+ "51869": {
12901
+ "content": "<|end_of_text|>",
12902
+ "lstrip": false,
12903
+ "normalized": true,
12904
+ "rstrip": false,
12905
+ "single_word": false,
12906
+ "special": false
12907
+ },
12908
+ "51870": {
12909
+ "content": "<|finetune_right_pad_id|>",
12910
+ "lstrip": false,
12911
+ "normalized": true,
12912
+ "rstrip": false,
12913
+ "single_word": false,
12914
+ "special": false
12915
  }
12916
  },
12917
  "additional_special_tokens": [