File size: 973 Bytes
5071898 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
from langchain.llms.base import LLM
from langchain.memory import ConversationBufferWindowMemory
from transformers import GPT2TokenizerFast
from langchain.schema.messages import get_buffer_string
def get_num_tokens(text):
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
return len(tokenizer.tokenize(text))
def get_memory_num_tokens(memory):
buffer = memory.chat_memory.messages
return sum([get_num_tokens(get_buffer_string([m])) for m in buffer])
def validate_memory_len(memory, max_token_limit=2000):
buffer = memory.chat_memory.messages
curr_buffer_length = get_memory_num_tokens(memory)
if curr_buffer_length > max_token_limit:
while curr_buffer_length > max_token_limit:
buffer.pop(0)
curr_buffer_length = get_memory_num_tokens(memory)
return memory
if __name__ == '__main__':
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
text = '''Hi'''
print(len(tokenizer.tokenize(text))) |