llava-hf/llava-interleave-qwen-0.5b-hf · Update chat_template.json to incorporate `generation` tag

Incorporating generation tag to chat_template.json so that return_assistant_tokens_mask can work correctly with the tokenizer (https://github.com/huggingface/transformers/pull/30650/files). A self-contained test script is pasted below.

from transformers import AutoProcessor

processor = AutoProcessor.from_pretrained("llava-hf/llava-interleave-qwen-0.5b-hf")

# Define a chat histiry and use `apply_chat_template` to get correctly formatted prompt
# Each value in "content" has to be a list of dicts with types ("text", "image") 
conversation = [
    {
        "role": "system",
        "content": [
            {"type": "text", "text": "You are a helpful assistant."},
        ]
    },
    {
        "role": "user",
        "content": [
            {"type": "text", "text": "What is shown in this image?"},
            {"type": "image"},
        ],
    },
    {
        "role": "assistant",
        "content": [
            {"type": "text", "text": "This is a picture of a cat."},
        ]
    },
    {
        "role": "user",
        "content": [
            {"type": "text", "text": "What is the cat doing?"},
        ]
    },
    {
        "role": "assistant",
        "content": [
            {"type": "text", "text": "The cat is sleeping on a sofa. It looks very comfortable."},
        ]
    }
]

template = (
    "{% for message in messages %}"
    "{{'<|im_start|>' + message['role'] + '\n'}}"
    "{# Render all images first #}"
    "{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}"
    "{{ '<image>' }}"
    "{% endfor %}"
    "{# Render all text next #}"
    "{% if message['role'] != 'assistant' %}"
    "{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}"
    "{{ '\n' + content['text'] }}"
    "{% endfor %}"
    "{% else %}"
    "{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}"
    "{% generation %}"
    "{{ '\n' + content['text'] }}"
    "{% endgeneration %}"
    "{% endfor %}"
    "{% endif %}"
    "{{'<|im_end|>' + '\n'}}"
    "{% endfor %}"
    "{% if add_generation_prompt %}"
    "{{ '<|im_start|>assistant\n' }}"
    "{% endif %}"
)
print("\n")
print(repr(template))
print("\n")

prompt = processor.apply_chat_template(
    conversation,
    chat_template=template,
    add_generation_prompt=False,
    tokenize=False
)
print(prompt)

inputs = processor.apply_chat_template(
    conversation, 
    chat_template=template,
    add_generation_prompt=False,
    tokenize=True,
    return_assistant_tokens_mask=True,
    return_dict=True
)
print(inputs['assistant_masks'])