File size: 2,011 Bytes
3d8b295
 
814e23a
3d8b295
 
 
814e23a
3d8b295
 
814e23a
3d8b295
814e23a
3d8b295
 
814e23a
3d8b295
69460b6
fcf6d89
69460b6
 
fcf6d89
69460b6
 
3d8b295
 
fcf6d89
 
 
3d8b295
9512d4d
76841eb
71364cd
19568e8
565c08f
3396f08
3d8b295
19568e8
69460b6
 
814e23a
 
fcf6d89
814e23a
 
 
4041585
3d8b295
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import gradio as gr
from transformers import AutoTokenizer
import json

tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")

demo_conversation1 = """[
    {"role": "user", "content": "Hi there!"},
    {"role": "assistant", "content": "Hello, human!"}
]"""

demo_conversation2 = """[
    {"role": "system", "content": "You are a helpful chatbot."},
    {"role": "user", "content": "Hi there!"}
]"""

default_template = """{% for message in messages %}
    {{ "<|im_start|>" + message["role"] + "\\n" + message["content"] + "<|im_end|>\\n" }}
{% endfor %}
{% if add_generation_prompt %}
    {{ "<|im_start|>assistant\\n" }}
{% endif %}"""

conversations = [demo_conversation1, demo_conversation2]

def apply_chat_template(template, cleanup_whitespace, test_conversation1, test_conversation2):
    if cleanup_whitespace:
        template = "".join([line.strip() for line in template.split('\n')])
    tokenizer.chat_template = template
    outputs = []
    for i, conversation_str in enumerate((test_conversation1, test_conversation2)):
        conversation = json.loads(conversation_str)
        outputs.append(tokenizer.apply_chat_template(conversation, tokenize=False))
        outputs.append(tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True))
    return tuple(outputs)

output_names = ["Conversation 1 without generation prompt", "Conversation 1 with generation prompt", "Conversation 2 without generation prompt", "Conversation 2 with generation prompt"]
iface = gr.Interface(
    fn=apply_chat_template,
    inputs=[
        gr.TextArea(value=default_template, lines=10, max_lines=30, label="Chat Template"),
        gr.CheckBox(value=True, label="Cleanup template whitespace"),
        gr.TextArea(value=str(demo_conversation1), lines=5, label="Conversation 1"),
        gr.TextArea(value=str(demo_conversation2), lines=5, label="Conversation 2")
    ],
    outputs=[gr.TextArea(label=output_name) for output_name in output_names])
iface.launch()