Xenova HF staff commited on
Commit
35c9bdf
1 Parent(s): 9fdc0ab

Update tokenizer.json

Browse files
Files changed (1) hide show
  1. tokenizer.json +53 -3
tokenizer.json CHANGED
@@ -2,9 +2,59 @@
2
  "version": "1.0",
3
  "truncation": null,
4
  "padding": null,
5
- "added_tokens": [],
6
- "normalizer": null,
7
- "pre_tokenizer": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  "post_processor": null,
9
  "decoder": {
10
  "type": "CTC",
 
2
  "version": "1.0",
3
  "truncation": null,
4
  "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<pad>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "<s>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "</s>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "<unk>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ }
42
+ ],
43
+ "normalizer": {
44
+ "type": "Replace",
45
+ "pattern": {
46
+ "String": " "
47
+ },
48
+ "content": "|"
49
+ },
50
+ "pre_tokenizer": {
51
+ "type": "Split",
52
+ "pattern": {
53
+ "Regex": ""
54
+ },
55
+ "behavior": "Isolated",
56
+ "invert": false
57
+ },
58
  "post_processor": null,
59
  "decoder": {
60
  "type": "CTC",