Spaces:
Sleeping
Sleeping
File size: 1,470 Bytes
35996ec 8c31c63 ab98424 35996ec bfc00c8 62a5026 a56d29b aa0404d a56d29b aa0404d 62a5026 a56d29b aa0404d a56d29b aa0404d a56d29b 35996ec 224f5e0 35996ec 224f5e0 35996ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
from transformers import AutoTokenizer
import itertools
def get_color():
colors = ['#df7b55', '#2c7482', '#2c8234', '#5581df', '#822c63','#b355df']
return itertools.cycle(colors)
def get_res(model_name, input_sentence, single_print=True):
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
color_iterator = get_color()
out = tokenizer.encode(input_sentence, add_special_tokens=False)
token_num = len(out)
work_around = True
if work_around:
w = []
pre = ""
for i in range(len(out)):
res = tokenizer.decode(out[:i+1])
if w == []:
w.append(res)
else:
pre_len = len(pre) #0
w.append(res[pre_len:])
pre = res
res = []
for x in w:
if x == '\n':
res.append(' \n')
else:
res.append(f'<span style="font-size:1.25em;background-color:{next(color_iterator)}">{x.replace(" ", " ")}</span>')
else:
res = []
for x in out:
if x == '\n':
res.append(' \n')
else:
res.append(f'<span style="font-size:1.25em;background-color:{next(color_iterator)}">{tokenizer.decode(x).replace(" ", " ")}</span>')
res = ''.join(res)
if single_print:
print(res + str(token_num))
else:
return res, token_num
|