atgarcia commited on
Commit
700003c
1 Parent(s): 98e2f4a

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +2 -2
  2. tokenizer_config.json +4 -4
  3. vocab.json +45 -52
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "</s>": 54,
3
- "<s>": 53
4
  }
 
1
  {
2
+ "</s>": 47,
3
+ "<s>": 46
4
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "added_tokens_decoder": {
3
- "51": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
@@ -8,7 +8,7 @@
8
  "single_word": false,
9
  "special": false
10
  },
11
- "52": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
@@ -16,7 +16,7 @@
16
  "single_word": false,
17
  "special": false
18
  },
19
- "53": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
@@ -24,7 +24,7 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "54": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
 
1
  {
2
  "added_tokens_decoder": {
3
+ "44": {
4
  "content": "[UNK]",
5
  "lstrip": true,
6
  "normalized": false,
 
8
  "single_word": false,
9
  "special": false
10
  },
11
+ "45": {
12
  "content": "[PAD]",
13
  "lstrip": true,
14
  "normalized": false,
 
16
  "single_word": false,
17
  "special": false
18
  },
19
+ "46": {
20
  "content": "<s>",
21
  "lstrip": false,
22
  "normalized": false,
 
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "47": {
28
  "content": "</s>",
29
  "lstrip": false,
30
  "normalized": false,
vocab.json CHANGED
@@ -1,55 +1,48 @@
1
  {
2
- "&": 24,
3
- "'": 21,
4
- "(": 16,
5
- ")": 0,
6
- "*": 45,
7
- "/": 26,
8
- "0": 18,
9
- "1": 19,
10
- "2": 4,
11
- "3": 14,
12
- "4": 41,
13
- "5": 23,
14
- "6": 42,
15
- "7": 49,
16
- "8": 33,
17
- "9": 13,
18
- "[": 47,
19
- "[PAD]": 52,
20
- "[UNK]": 51,
21
- "]": 34,
22
- "_": 29,
23
- "a": 37,
24
- "b": 22,
25
- "c": 2,
26
- "d": 12,
27
- "e": 15,
28
  "f": 20,
29
- "g": 3,
30
- "h": 35,
31
- "i": 27,
32
- "j": 10,
33
- "k": 8,
34
- "l": 30,
35
- "m": 11,
36
- "n": 50,
37
- "o": 43,
38
- "p": 28,
39
- "q": 9,
40
- "r": 39,
41
- "s": 31,
42
- "t": 5,
43
- "u": 46,
44
- "v": 7,
45
- "w": 36,
46
- "x": 17,
47
- "y": 1,
48
- "z": 44,
49
- "|": 25,
50
- "£": 48,
51
- "æ": 32,
52
- "è": 6,
53
- "é": 40,
54
- "œ": 38
55
  }
 
1
  {
2
+ "&": 25,
3
+ "'": 10,
4
+ "0": 41,
5
+ "1": 8,
6
+ "2": 33,
7
+ "3": 13,
8
+ "4": 23,
9
+ "5": 40,
10
+ "6": 24,
11
+ "7": 12,
12
+ "8": 38,
13
+ "9": 1,
14
+ "[PAD]": 45,
15
+ "[UNK]": 44,
16
+ "a": 15,
17
+ "b": 16,
18
+ "c": 30,
19
+ "d": 28,
20
+ "e": 36,
 
 
 
 
 
 
 
21
  "f": 20,
22
+ "g": 0,
23
+ "h": 11,
24
+ "i": 37,
25
+ "j": 39,
26
+ "k": 19,
27
+ "l": 2,
28
+ "m": 5,
29
+ "n": 14,
30
+ "o": 32,
31
+ "p": 29,
32
+ "q": 43,
33
+ "r": 21,
34
+ "s": 3,
35
+ "t": 27,
36
+ "u": 26,
37
+ "v": 18,
38
+ "w": 35,
39
+ "x": 4,
40
+ "y": 7,
41
+ "z": 42,
42
+ "|": 9,
43
+ "£": 6,
44
+ "æ": 34,
45
+ "è": 22,
46
+ "é": 31,
47
+ "œ": 17
48
  }