Gunulhona commited on
Commit
1c33f7b
1 Parent(s): 596f7d8

add tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +1 -1
  2. tokenizer.json +81 -0
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>"}
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "additional_special_tokens": ["<P01>", "<P02>", "<P03>", "<P04>", "<P05>", "<P06>", "<P07>", "<P08>", "<P09>"]}
tokenizer.json CHANGED
@@ -2063,6 +2063,87 @@
2063
  "rstrip": false,
2064
  "normalized": false,
2065
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2066
  }
2067
  ],
2068
  "normalizer": {
 
2063
  "rstrip": false,
2064
  "normalized": false,
2065
  "special": true
2066
+ },
2067
+ {
2068
+ "id": 30000,
2069
+ "content": "<P01>",
2070
+ "single_word": false,
2071
+ "lstrip": false,
2072
+ "rstrip": false,
2073
+ "normalized": false,
2074
+ "special": true
2075
+ },
2076
+ {
2077
+ "id": 30001,
2078
+ "content": "<P02>",
2079
+ "single_word": false,
2080
+ "lstrip": false,
2081
+ "rstrip": false,
2082
+ "normalized": false,
2083
+ "special": true
2084
+ },
2085
+ {
2086
+ "id": 30002,
2087
+ "content": "<P03>",
2088
+ "single_word": false,
2089
+ "lstrip": false,
2090
+ "rstrip": false,
2091
+ "normalized": false,
2092
+ "special": true
2093
+ },
2094
+ {
2095
+ "id": 30003,
2096
+ "content": "<P04>",
2097
+ "single_word": false,
2098
+ "lstrip": false,
2099
+ "rstrip": false,
2100
+ "normalized": false,
2101
+ "special": true
2102
+ },
2103
+ {
2104
+ "id": 30004,
2105
+ "content": "<P05>",
2106
+ "single_word": false,
2107
+ "lstrip": false,
2108
+ "rstrip": false,
2109
+ "normalized": false,
2110
+ "special": true
2111
+ },
2112
+ {
2113
+ "id": 30005,
2114
+ "content": "<P06>",
2115
+ "single_word": false,
2116
+ "lstrip": false,
2117
+ "rstrip": false,
2118
+ "normalized": false,
2119
+ "special": true
2120
+ },
2121
+ {
2122
+ "id": 30006,
2123
+ "content": "<P07>",
2124
+ "single_word": false,
2125
+ "lstrip": false,
2126
+ "rstrip": false,
2127
+ "normalized": false,
2128
+ "special": true
2129
+ },
2130
+ {
2131
+ "id": 30007,
2132
+ "content": "<P08>",
2133
+ "single_word": false,
2134
+ "lstrip": false,
2135
+ "rstrip": false,
2136
+ "normalized": false,
2137
+ "special": true
2138
+ },
2139
+ {
2140
+ "id": 30008,
2141
+ "content": "<P09>",
2142
+ "single_word": false,
2143
+ "lstrip": false,
2144
+ "rstrip": false,
2145
+ "normalized": false,
2146
+ "special": true
2147
  }
2148
  ],
2149
  "normalizer": {