Update tokenization_bart_japanese_news.py
Browse files
tokenization_bart_japanese_news.py
CHANGED
@@ -142,6 +142,14 @@ class BartJapaneseNewsTokenizer(BartTokenizer):
|
|
142 |
|
143 |
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
|
144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
super(BartTokenizer, self).__init__(
|
146 |
do_lower_case=do_lower_case,
|
147 |
remove_space=remove_space,
|
@@ -157,14 +165,6 @@ class BartJapaneseNewsTokenizer(BartTokenizer):
|
|
157 |
**kwargs,
|
158 |
)
|
159 |
|
160 |
-
self.do_lower_case = do_lower_case
|
161 |
-
self.remove_space = remove_space
|
162 |
-
self.clean_text = clean_text
|
163 |
-
self.vocab_file = vocab_file
|
164 |
-
|
165 |
-
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
|
166 |
-
self.sp_model.Load(vocab_file)
|
167 |
-
|
168 |
@property
|
169 |
def vocab_size(self):
|
170 |
return len(self.sp_model)
|
|
|
142 |
|
143 |
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
|
144 |
|
145 |
+
self.do_lower_case = do_lower_case
|
146 |
+
self.remove_space = remove_space
|
147 |
+
self.clean_text = clean_text
|
148 |
+
self.vocab_file = vocab_file
|
149 |
+
|
150 |
+
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
|
151 |
+
self.sp_model.Load(vocab_file)
|
152 |
+
|
153 |
super(BartTokenizer, self).__init__(
|
154 |
do_lower_case=do_lower_case,
|
155 |
remove_space=remove_space,
|
|
|
165 |
**kwargs,
|
166 |
)
|
167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
@property
|
169 |
def vocab_size(self):
|
170 |
return len(self.sp_model)
|