sikoraaxd commited on
Commit
f1fa563
1 Parent(s): 96243cc

Upload tokenizer

Browse files
Files changed (3) hide show
  1. README.md +4 -4
  2. special_tokens_map.json +7 -1
  3. tokenizer_config.json +2 -2
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
- license: apache-2.0
 
 
3
  library_name: peft
 
4
  tags:
5
  - trl
6
  - sft
7
  - generated_from_trainer
8
- base_model: Qwen/Qwen2-0.5B-Instruct
9
- datasets:
10
- - generator
11
  model-index:
12
  - name: qwen2
13
  results: []
 
1
  ---
2
+ base_model: Qwen/Qwen2-0.5B-Instruct
3
+ datasets:
4
+ - generator
5
  library_name: peft
6
+ license: apache-2.0
7
  tags:
8
  - trl
9
  - sft
10
  - generated_from_trainer
 
 
 
11
  model-index:
12
  - name: qwen2
13
  results: []
special_tokens_map.json CHANGED
@@ -10,5 +10,11 @@
10
  "rstrip": false,
11
  "single_word": false
12
  },
13
- "pad_token": "<|im_end|>"
 
 
 
 
 
 
14
  }
 
10
  "rstrip": false,
11
  "single_word": false
12
  },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
  }
tokenizer_config.json CHANGED
@@ -35,8 +35,8 @@
35
  "clean_up_tokenization_spaces": false,
36
  "eos_token": "<|im_end|>",
37
  "errors": "replace",
38
- "model_max_length": 2048,
39
- "pad_token": "<|im_end|>",
40
  "split_special_tokens": false,
41
  "tokenizer_class": "Qwen2Tokenizer",
42
  "unk_token": null
 
35
  "clean_up_tokenization_spaces": false,
36
  "eos_token": "<|im_end|>",
37
  "errors": "replace",
38
+ "model_max_length": 32768,
39
+ "pad_token": "<|endoftext|>",
40
  "split_special_tokens": false,
41
  "tokenizer_class": "Qwen2Tokenizer",
42
  "unk_token": null