tokenizer-arena / config.py
xu-song's picture
add compress rate
814ee6b
raw
history blame
304 Bytes
USE_REMOTE = False # use remote tokenizer or local tokenizer
# load_vocab_with_SPECIAL_TOKEN = True # 如果不包含会导致计算词典大小错误、overlap_token计算不一致。
# encoding config
ADD_SPECIAL_TOKEN = False
#
LAZY_IMPORT = True
# DEBUG: 设置环境变量 RUST_BACKTRACE=full
#