Transformers
PyTorch
code
custom_code
Inference Endpoints
Dejiao Z commited on
Commit
2228afc
1 Parent(s): a53aec6

fixed tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +6 -4
tokenizer_config.json CHANGED
@@ -23,10 +23,12 @@
23
  ],
24
  "bos_token": "<|endoftext|>",
25
  "eos_token": "<|endoftext|>",
 
26
  "model_max_length": 1000000000000000019884624838656,
27
- "name_or_path": "/mnt/efs/people/dejiaoz/universal_embedding/codesage_v3/tokenizer/starcoder/",
28
- "special_tokens_map_file": "/mnt/efs/people/dejiaoz/universal_embedding/codesage_v3/tokenizer/starcoder/special_tokens_map.json",
29
- "tokenizer_class": "GPT2Tokenizer",
30
  "unk_token": "<|endoftext|>",
31
- "vocab_size": 49152
 
 
 
 
32
  }
 
23
  ],
24
  "bos_token": "<|endoftext|>",
25
  "eos_token": "<|endoftext|>",
26
+ "add_eos_token": true,
27
  "model_max_length": 1000000000000000019884624838656,
 
 
 
28
  "unk_token": "<|endoftext|>",
29
+ "vocab_size": 49152,
30
+ "tokenizer_class": "CodeSageTokenizer",
31
+ "auto_map": {
32
+ "AutoTokenizer": ["tokenization_codesage.CodeSageTokenizer", null]
33
+ }
34
  }