Annorita's picture
add process function
35996ec
raw
history blame
641 Bytes
from transformers import AutoTokenizer
import itertools
def get_color():
colors = [i for i in range(41, 48)]
return itertools.cycle(colors)
def get_res(model_name, input_sentence, single_print=True):
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
color_iterator = get_color()
out = tokenizer.encode(input_sentence, add_special_tokens=False)
token_num = len(out)
w = [ '\033[''1;'+str(next(color_iterator))+f'm {tokenizer.decode(x)}\033[m' for x in out]
res = ''.join(w) + f' {str(token_num)}'
if single_print:
print(res)
else:
return res