pauhidalgoo commited on
Commit
d9445f6
1 Parent(s): a9f067f

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +17 -23
  2. tokenizer.json +6 -1
special_tokens_map.json CHANGED
@@ -1,29 +1,23 @@
1
  {
2
  "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>"
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
- "bos_token": {
7
- "content": "<|im_start|>",
8
- "lstrip": false,
9
- "normalized": false,
10
- "rstrip": false,
11
- "single_word": false
12
- },
13
- "eos_token": {
14
- "content": "<|im_end|>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false
19
- },
20
- "pad_token": {
21
- "content": "<pad>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false
26
- },
27
  "unk_token": {
28
  "content": "<unk>",
29
  "lstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_start|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|im_end|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
  ],
18
+ "bos_token": "<|im_start|>",
19
+ "eos_token": "<|im_end|>",
20
+ "pad_token": "<pad>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  "unk_token": {
22
  "content": "<unk>",
23
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 2048,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {