Update tokenization_chatglm.py
Browse files- tokenization_chatglm.py +18 -0
tokenization_chatglm.py
CHANGED
@@ -103,6 +103,24 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
103 |
def eos_token_id(self):
|
104 |
return self.get_command("<eos>")
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
@property
|
107 |
def vocab_size(self):
|
108 |
return self.tokenizer.n_words
|
|
|
103 |
def eos_token_id(self):
|
104 |
return self.get_command("<eos>")
|
105 |
|
106 |
+
@eos_token.setter
|
107 |
+
def eos_token(self, value):
|
108 |
+
if not isinstance(value, (str, AddedToken)) and value is not None:
|
109 |
+
raise ValueError("Cannot set a non-string value as the EOS token")
|
110 |
+
self._eos_token = value
|
111 |
+
|
112 |
+
@unk_token.setter
|
113 |
+
def unk_token(self, value):
|
114 |
+
if not isinstance(value, (str, AddedToken)) and value is not None:
|
115 |
+
raise ValueError("Cannot set a non-string value as the UNK token")
|
116 |
+
self._unk_token = value
|
117 |
+
|
118 |
+
@pad_token.setter
|
119 |
+
def pad_token(self, value):
|
120 |
+
if not isinstance(value, (str, AddedToken)) and value is not None:
|
121 |
+
raise ValueError("Cannot set a non-string value as the PAD token")
|
122 |
+
self._pad_token = value
|
123 |
+
|
124 |
@property
|
125 |
def vocab_size(self):
|
126 |
return self.tokenizer.n_words
|