Locutusque commited on
Commit
976904c
1 Parent(s): b723019

Upload tokenizer

Browse files
Files changed (2) hide show
  1. README.md +3 -3
  2. tokenizer_config.json +7 -0
README.md CHANGED
@@ -1,14 +1,14 @@
1
  ---
 
 
 
2
  library_name: transformers
3
  tags:
4
  - code
5
  - chemistry
6
  - medical
7
- license: other
8
  datasets:
9
  - Locutusque/hyperion-v3.0
10
- language:
11
- - en
12
  ---
13
  # Locutusque/Hyperion-3.0-Yi-34B
14
  ## Model Details
 
1
  ---
2
+ language:
3
+ - en
4
+ license: other
5
  library_name: transformers
6
  tags:
7
  - code
8
  - chemistry
9
  - medical
 
10
  datasets:
11
  - Locutusque/hyperion-v3.0
 
 
12
  ---
13
  # Locutusque/Hyperion-3.0-Yi-34B
14
  ## Model Details
tokenizer_config.json CHANGED
@@ -31,10 +31,17 @@
31
  "clean_up_tokenization_spaces": false,
32
  "eos_token": "<|endoftext|>",
33
  "legacy": true,
 
34
  "model_max_length": 4096,
 
35
  "pad_token": "<|endoftext|>",
 
 
36
  "sp_model_kwargs": {},
 
37
  "tokenizer_class": "LlamaTokenizer",
 
 
38
  "unk_token": "<unk>",
39
  "use_default_system_prompt": false
40
  }
 
31
  "clean_up_tokenization_spaces": false,
32
  "eos_token": "<|endoftext|>",
33
  "legacy": true,
34
+ "max_length": 512,
35
  "model_max_length": 4096,
36
+ "pad_to_multiple_of": null,
37
  "pad_token": "<|endoftext|>",
38
+ "pad_token_type_id": 0,
39
+ "padding_side": "left",
40
  "sp_model_kwargs": {},
41
+ "stride": 0,
42
  "tokenizer_class": "LlamaTokenizer",
43
+ "truncation_side": "right",
44
+ "truncation_strategy": "longest_first",
45
  "unk_token": "<unk>",
46
  "use_default_system_prompt": false
47
  }