tianxie-sf
commited on
Commit
•
aa0c006
1
Parent(s):
f12f979
_convert_id_to_token
Browse files- tokenization_xgen.py +2 -2
tokenization_xgen.py
CHANGED
@@ -155,7 +155,7 @@ class XgenTokenizer(PreTrainedTokenizer):
|
|
155 |
|
156 |
def _convert_id_to_token(self, index):
|
157 |
"""Converts an index (integer) in a token (str) using the vocab."""
|
158 |
-
return self.encoder.decode_single_token_bytes(index)
|
159 |
|
160 |
def _decode(self, token_ids: List[int], skip_special_tokens: bool = False, **kwargs):
|
161 |
if skip_special_tokens:
|
@@ -231,4 +231,4 @@ class XgenTokenizer(PreTrainedTokenizer):
|
|
231 |
|
232 |
# has no vocab file
|
233 |
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None):
|
234 |
-
return ()
|
|
|
155 |
|
156 |
def _convert_id_to_token(self, index):
|
157 |
"""Converts an index (integer) in a token (str) using the vocab."""
|
158 |
+
return self.encoder.decode_single_token_bytes(index).decode("utf-8")
|
159 |
|
160 |
def _decode(self, token_ids: List[int], skip_special_tokens: bool = False, **kwargs):
|
161 |
if skip_special_tokens:
|
|
|
231 |
|
232 |
# has no vocab file
|
233 |
def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None):
|
234 |
+
return ()
|