from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("MarsupialAI/Monstral-123B-v2") chat = [ {"role": "system", "content": "3525265246346?"}, {"role": "user", "content": "Hello, how are you?I'm doing great. How can I help you today?I'm doing great. How can I help you today?I'm doing great. How can I help you today?I'm doing great. How can I help you today?I'm doing great. How can I help you today?"}, {"role": "assistant", "content": "I'm doing great. How can I help you today?"}, {"role": "user", "content": "I'd like to show off how chat templating works!"}, ] print(tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)) def apply_chat_template_with_length_limit(tokenizer, conversations, max_length, chat_template=None): """ Apply a chat template with a length limit. Parameters: - tokenizer: The tokenizer object that provides the apply_chat_template method. - conversations: List of messages to include in the chat. - max_length: Maximum token length allowed. - chat_template: Optional custom chat template. Returns: - A string containing the chat template filled with valid messages. """ # 确保至少有一条消息 if not conversations: return "" # 保留第一条消息 first_msg = conversations[0] remaining_msgs = conversations[1:] valid_conv = [] # 计算模板和第一条消息需要的token数 template_tokens = len(tokenizer.apply_chat_template([first_msg], chat_template=chat_template)) if template_tokens <= max_length: valid_conv.append(first_msg) remaining_length = max_length - template_tokens else: # 第一条消息超出限制,跳过 remaining_length = max_length # 从旧到新逐条添加消息 for message in remaining_msgs: # 临时添加当前消息 temp_conv = valid_conv + [message] tokens = tokenizer.apply_chat_template(temp_conv, chat_template=chat_template) # 检查添加这条消息后是否超长 if len(tokens) <= max_length: valid_conv = temp_conv remaining_length -= len(tokens) - ( template_tokens if len(valid_conv) == 1 else 0 ) else: break return tokenizer.apply_chat_template(valid_conv, tokenize=False, add_generation_prompt=True, chat_template=chat_template) #re = apply_chat_template_with_length_limit(tokenizer,chat, 100) #print(re)