xu-song's picture
fix chatglm; new feature about add_special_tokens;
d27a756
raw
history blame
291 Bytes
from vocab.gpt_35_turbo import tokenizer
text = "你好,请告诉我聚乙烯是什么"
encoding = tokenizer.encode(text)
print(tokenizer.decode([6744]))
print(tokenizer.convert_ids_to_tokens([6744]))
print(tokenizer.decode([100256]))
print(tokenizer.convert_ids_to_tokens([100256]))