MariaFjodorowa commited on
Commit
acf17d3
·
verified ·
1 Parent(s): 828b3fe

Fix tokenizer decoders

Browse files
Files changed (1) hide show
  1. tokenizer.json +18 -10
tokenizer.json CHANGED
@@ -1103,16 +1103,11 @@
1103
  "use_regex": true
1104
  },
1105
  {
1106
- "type": "Metaspace",
1107
- "replacement": "▁",
1108
- "prepend_scheme": "always",
1109
- "split": true
1110
- },
1111
- {
1112
- "type": "Strip",
1113
- "content": " ",
1114
- "start": 1,
1115
- "stop": 0
1116
  },
1117
  {
1118
  "type": "Replace",
@@ -1127,6 +1122,19 @@
1127
  "Regex": "█"
1128
  },
1129
  "content": "\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
1130
  }
1131
  ]
1132
  },
 
1103
  "use_regex": true
1104
  },
1105
  {
1106
+ "type": "Replace",
1107
+ "pattern": {
1108
+ "String": "âĸģ"
1109
+ },
1110
+ "content": " "
 
 
 
 
 
1111
  },
1112
  {
1113
  "type": "Replace",
 
1122
  "Regex": "█"
1123
  },
1124
  "content": "\n"
1125
+ },
1126
+ {
1127
+ "type": "Replace",
1128
+ "pattern": {
1129
+ "String": "▁"
1130
+ },
1131
+ "content": " "
1132
+ },
1133
+ {
1134
+ "type": "Strip",
1135
+ "content": " ",
1136
+ "start": 1,
1137
+ "stop": 0
1138
  }
1139
  ]
1140
  },