File size: 688 Bytes
d76004b 2c3adeb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
{
"tokenizer_name": "CSUMLM Tokenizer",
"model_name": "CSUMLM",
"description": "Tokenizer for the CognoSphere Unified Multimodal Language Model",
"author": "Or4cl3 AI Solutions",
"language": "Multimodal (Text, Image, Audio)",
"vocab_size": 32000,
"max_sequence_length": 512,
"special_tokens": {
"bos_token": "<BOS>",
"eos_token": "<EOS>",
"pad_token": "<PAD>",
"unk_token": "<UNK>",
"mask_token": "<MASK>"
},
"tokenization_method": "Byte Pair Encoding (BPE)",
"training_data": "Custom 1500 Example Dataset",
"chat_template": "[BOS] {context} {user_input} {response} [EOS]",
"pad_to_max_length": true,
"truncation_strategy": "only_second"
}
|