Gunulhona commited on
Commit
39073da
1 Parent(s): cbca73a

add tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +1 -1
  2. tokenizer.json +0 -81
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "additional_special_tokens": ["<P01>", "<P02>", "<P03>", "<P04>", "<P05>", "<P06>", "<P07>", "<P08>", "<P09>"]}
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>"}
tokenizer.json CHANGED
@@ -2063,87 +2063,6 @@
2063
  "rstrip": false,
2064
  "normalized": false,
2065
  "special": true
2066
- },
2067
- {
2068
- "id": 30000,
2069
- "content": "<P01>",
2070
- "single_word": false,
2071
- "lstrip": false,
2072
- "rstrip": false,
2073
- "normalized": false,
2074
- "special": true
2075
- },
2076
- {
2077
- "id": 30001,
2078
- "content": "<P02>",
2079
- "single_word": false,
2080
- "lstrip": false,
2081
- "rstrip": false,
2082
- "normalized": false,
2083
- "special": true
2084
- },
2085
- {
2086
- "id": 30002,
2087
- "content": "<P03>",
2088
- "single_word": false,
2089
- "lstrip": false,
2090
- "rstrip": false,
2091
- "normalized": false,
2092
- "special": true
2093
- },
2094
- {
2095
- "id": 30003,
2096
- "content": "<P04>",
2097
- "single_word": false,
2098
- "lstrip": false,
2099
- "rstrip": false,
2100
- "normalized": false,
2101
- "special": true
2102
- },
2103
- {
2104
- "id": 30004,
2105
- "content": "<P05>",
2106
- "single_word": false,
2107
- "lstrip": false,
2108
- "rstrip": false,
2109
- "normalized": false,
2110
- "special": true
2111
- },
2112
- {
2113
- "id": 30005,
2114
- "content": "<P06>",
2115
- "single_word": false,
2116
- "lstrip": false,
2117
- "rstrip": false,
2118
- "normalized": false,
2119
- "special": true
2120
- },
2121
- {
2122
- "id": 30006,
2123
- "content": "<P07>",
2124
- "single_word": false,
2125
- "lstrip": false,
2126
- "rstrip": false,
2127
- "normalized": false,
2128
- "special": true
2129
- },
2130
- {
2131
- "id": 30007,
2132
- "content": "<P08>",
2133
- "single_word": false,
2134
- "lstrip": false,
2135
- "rstrip": false,
2136
- "normalized": false,
2137
- "special": true
2138
- },
2139
- {
2140
- "id": 30008,
2141
- "content": "<P09>",
2142
- "single_word": false,
2143
- "lstrip": false,
2144
- "rstrip": false,
2145
- "normalized": false,
2146
- "special": true
2147
  }
2148
  ],
2149
  "normalizer": {
 
2063
  "rstrip": false,
2064
  "normalized": false,
2065
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2066
  }
2067
  ],
2068
  "normalizer": {