sanchit42 commited on
Commit
ae06033
1 Parent(s): f90a401

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +7 -16
tokenizer.json CHANGED
@@ -6943,23 +6943,13 @@
6943
  "special": true
6944
  }
6945
  ],
6946
- "normalizer": {
6947
- "type": "Sequence",
6948
- "normalizers": [
6949
- {
6950
- "type": "Prepend",
6951
- "prepend": "▁"
6952
- },
6953
- {
6954
- "type": "Replace",
6955
- "pattern": {
6956
- "String": " "
6957
- },
6958
- "content": "▁"
6959
- }
6960
- ]
6961
  },
6962
- "pre_tokenizer": null,
6963
  "post_processor": {
6964
  "type": "TemplateProcessing",
6965
  "single": [
@@ -7046,6 +7036,7 @@
7046
  "end_of_word_suffix": null,
7047
  "fuse_unk": true,
7048
  "byte_fallback": true,
 
7049
  "vocab": {
7050
  "<unk>": 0,
7051
  "<s>": 1,
 
6943
  "special": true
6944
  }
6945
  ],
6946
+ "normalizer": null,
6947
+ "pre_tokenizer": {
6948
+ "type": "Metaspace",
6949
+ "replacement": "▁",
6950
+ "prepend_scheme": "first",
6951
+ "split": false
 
 
 
 
 
 
 
 
 
6952
  },
 
6953
  "post_processor": {
6954
  "type": "TemplateProcessing",
6955
  "single": [
 
7036
  "end_of_word_suffix": null,
7037
  "fuse_unk": true,
7038
  "byte_fallback": true,
7039
+ "ignore_merges": false,
7040
  "vocab": {
7041
  "<unk>": 0,
7042
  "<s>": 1,