Matt
Add cleanup whitespace
fcf6d89
raw
history blame
2.01 kB
import gradio as gr
from transformers import AutoTokenizer
import json
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
demo_conversation1 = """[
{"role": "user", "content": "Hi there!"},
{"role": "assistant", "content": "Hello, human!"}
]"""
demo_conversation2 = """[
{"role": "system", "content": "You are a helpful chatbot."},
{"role": "user", "content": "Hi there!"}
]"""
default_template = """{% for message in messages %}
{{ "<|im_start|>" + message["role"] + "\\n" + message["content"] + "<|im_end|>\\n" }}
{% endfor %}
{% if add_generation_prompt %}
{{ "<|im_start|>assistant\\n" }}
{% endif %}"""
conversations = [demo_conversation1, demo_conversation2]
def apply_chat_template(template, cleanup_whitespace, test_conversation1, test_conversation2):
if cleanup_whitespace:
template = "".join([line.strip() for line in template.split('\n')])
tokenizer.chat_template = template
outputs = []
for i, conversation_str in enumerate((test_conversation1, test_conversation2)):
conversation = json.loads(conversation_str)
outputs.append(tokenizer.apply_chat_template(conversation, tokenize=False))
outputs.append(tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True))
return tuple(outputs)
output_names = ["Conversation 1 without generation prompt", "Conversation 1 with generation prompt", "Conversation 2 without generation prompt", "Conversation 2 with generation prompt"]
iface = gr.Interface(
fn=apply_chat_template,
inputs=[
gr.TextArea(value=default_template, lines=10, max_lines=30, label="Chat Template"),
gr.CheckBox(value=True, label="Cleanup template whitespace"),
gr.TextArea(value=str(demo_conversation1), lines=5, label="Conversation 1"),
gr.TextArea(value=str(demo_conversation2), lines=5, label="Conversation 2")
],
outputs=[gr.TextArea(label=output_name) for output_name in output_names])
iface.launch()