tokenizer.decode output '??'

#2
by SamSha1971 - opened

for i in range(hparams["vocab_size"]):
if i == 46134:
text = tokenizer.decode([i])
print(str(i) + ": " + text)
print(text.encode('utf-8'))

output:
46134: ��
b'\xef\xbf\xbd\xef\xbf\xbd'

Sign up or log in to comment