MRNH commited on
Commit
ebd60af
1 Parent(s): 082722b

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +0 -51
  2. tokenizer.json +0 -63
  3. tokenizer_config.json +0 -65
special_tokens_map.json CHANGED
@@ -1,55 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- {
4
- "content": "<question>",
5
- "lstrip": true,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "<stopTMP>",
12
- "lstrip": true,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "</question>",
19
- "lstrip": true,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- {
25
- "content": "<answer>",
26
- "lstrip": true,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- },
31
- {
32
- "content": "</answer>",
33
- "lstrip": true,
34
- "normalized": false,
35
- "rstrip": false,
36
- "single_word": false
37
- },
38
- {
39
- "content": "</context>",
40
- "lstrip": true,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false
44
- },
45
- {
46
- "content": "<context>",
47
- "lstrip": true,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false
51
- }
52
- ],
53
  "bos_token": {
54
  "content": "<s>",
55
  "lstrip": false,
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
tokenizer.json CHANGED
@@ -50,69 +50,6 @@
50
  "rstrip": false,
51
  "normalized": true,
52
  "special": false
53
- },
54
- {
55
- "id": 32001,
56
- "content": "<question>",
57
- "single_word": false,
58
- "lstrip": true,
59
- "rstrip": false,
60
- "normalized": false,
61
- "special": true
62
- },
63
- {
64
- "id": 32002,
65
- "content": "<stopTMP>",
66
- "single_word": false,
67
- "lstrip": true,
68
- "rstrip": false,
69
- "normalized": false,
70
- "special": true
71
- },
72
- {
73
- "id": 32003,
74
- "content": "</question>",
75
- "single_word": false,
76
- "lstrip": true,
77
- "rstrip": false,
78
- "normalized": false,
79
- "special": true
80
- },
81
- {
82
- "id": 32004,
83
- "content": "<answer>",
84
- "single_word": false,
85
- "lstrip": true,
86
- "rstrip": false,
87
- "normalized": false,
88
- "special": true
89
- },
90
- {
91
- "id": 32005,
92
- "content": "</answer>",
93
- "single_word": false,
94
- "lstrip": true,
95
- "rstrip": false,
96
- "normalized": false,
97
- "special": true
98
- },
99
- {
100
- "id": 32006,
101
- "content": "</context>",
102
- "single_word": false,
103
- "lstrip": true,
104
- "rstrip": false,
105
- "normalized": false,
106
- "special": true
107
- },
108
- {
109
- "id": 32007,
110
- "content": "<context>",
111
- "single_word": false,
112
- "lstrip": true,
113
- "rstrip": false,
114
- "normalized": false,
115
- "special": true
116
  }
117
  ],
118
  "normalizer": {
 
50
  "rstrip": false,
51
  "normalized": true,
52
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  }
54
  ],
55
  "normalizer": {
tokenizer_config.json CHANGED
@@ -31,73 +31,8 @@
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": false
34
- },
35
- "32001": {
36
- "content": "<question>",
37
- "lstrip": true,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- },
43
- "32002": {
44
- "content": "<stopTMP>",
45
- "lstrip": true,
46
- "normalized": false,
47
- "rstrip": false,
48
- "single_word": false,
49
- "special": true
50
- },
51
- "32003": {
52
- "content": "</question>",
53
- "lstrip": true,
54
- "normalized": false,
55
- "rstrip": false,
56
- "single_word": false,
57
- "special": true
58
- },
59
- "32004": {
60
- "content": "<answer>",
61
- "lstrip": true,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false,
65
- "special": true
66
- },
67
- "32005": {
68
- "content": "</answer>",
69
- "lstrip": true,
70
- "normalized": false,
71
- "rstrip": false,
72
- "single_word": false,
73
- "special": true
74
- },
75
- "32006": {
76
- "content": "</context>",
77
- "lstrip": true,
78
- "normalized": false,
79
- "rstrip": false,
80
- "single_word": false,
81
- "special": true
82
- },
83
- "32007": {
84
- "content": "<context>",
85
- "lstrip": true,
86
- "normalized": false,
87
- "rstrip": false,
88
- "single_word": false,
89
- "special": true
90
  }
91
  },
92
- "additional_special_tokens": [
93
- "<question>",
94
- "<stopTMP>",
95
- "</question>",
96
- "<answer>",
97
- "</answer>",
98
- "</context>",
99
- "<context>"
100
- ],
101
  "bos_token": "<s>",
102
  "clean_up_tokenization_spaces": false,
103
  "eos_token": "</s>",
 
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
  },
 
 
 
 
 
 
 
 
 
36
  "bos_token": "<s>",
37
  "clean_up_tokenization_spaces": false,
38
  "eos_token": "</s>",