mcavus commited on
Commit
8be6739
·
verified ·
1 Parent(s): 075d77e

Create tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +135 -0
tokenizer_config.json ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ json
2
+ {
3
+ "auto_map": {
4
+ "AutoTokenizer": [
5
+ "tokenization_chatglm.ChatGLM4Tokenizer",
6
+ null
7
+ ]
8
+ },
9
+ "added_tokens_decoder": {
10
+ "151329": {
11
+ "content": "<|endoftext|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false,
16
+ "special": true
17
+ },
18
+ "151330": {
19
+ "content": "[MASK]",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false,
24
+ "special": true
25
+ },
26
+ "151331": {
27
+ "content": "[gMASK]",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false,
32
+ "special": true
33
+ },
34
+ "151332": {
35
+ "content": "[sMASK]",
36
+ "lstrip": false,
37
+ "normalized": false,
38
+ "rstrip": false,
39
+ "single_word": false,
40
+ "special": true
41
+ },
42
+ "151333": {
43
+ "content": "<sop>",
44
+ "lstrip": false,
45
+ "normalized": false,
46
+ "rstrip": false,
47
+ "single_word": false,
48
+ "special": true
49
+ },
50
+ "151334": {
51
+ "content": "<eop>",
52
+ "lstrip": false,
53
+ "normalized": false,
54
+ "rstrip": false,
55
+ "single_word": false,
56
+ "special": true
57
+ },
58
+ "151335": {
59
+ "content": "<|system|>",
60
+ "lstrip": false,
61
+ "normalized": false,
62
+ "rstrip": false,
63
+ "single_word": false,
64
+ "special": true
65
+ },
66
+ "151336": {
67
+ "content": "<|user|>",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false,
72
+ "special": true
73
+ },
74
+ "151337": {
75
+ "content": "<|assistant|>",
76
+ "lstrip": false,
77
+ "normalized": false,
78
+ "rstrip": false,
79
+ "single_word": false,
80
+ "special": true
81
+ },
82
+ "151338": {
83
+ "content": "<|observation|>",
84
+ "lstrip": false,
85
+ "normalized": false,
86
+ "rstrip": false,
87
+ "single_word": false,
88
+ "special": true
89
+ },
90
+ "151339": {
91
+ "content": "<|begin_of_image|>",
92
+ "lstrip": false,
93
+ "normalized": false,
94
+ "rstrip": false,
95
+ "single_word": false,
96
+ "special": true
97
+ },
98
+ "151340": {
99
+ "content": "<|end_of_image|>",
100
+ "lstrip": false,
101
+ "normalized": false,
102
+ "rstrip": false,
103
+ "single_word": false,
104
+ "special": true
105
+ },
106
+ "151341": {
107
+ "content": "<|begin_of_video|>",
108
+ "lstrip": false,
109
+ "normalized": false,
110
+ "rstrip": false,
111
+ "single_word": false,
112
+ "special": true
113
+ },
114
+ "151342": {
115
+ "content": "<|end_of_video|>",
116
+ "lstrip": false,
117
+ "normalized": false,
118
+ "rstrip": false,
119
+ "single_word": false,
120
+ "special": true
121
+ }
122
+ },
123
+ "additional_special_tokens": ["<|endoftext|>", "[MASK]", "[gMASK]", "[sMASK]", "<sop>", "<eop>", "<|system|>",
124
+ "<|user|>", "<|assistant|>", "<|observation|>", "<|begin_of_image|>", "<|end_of_image|>",
125
+ "<|begin_of_video|>", "<|end_of_video|>"],
126
+ "clean_up_tokenization_spaces": false,
127
+ "do_lower_case": false,
128
+ "eos_token": "<|endoftext|>",
129
+ "pad_token": "<|endoftext|>",
130
+ "model_max_length": 8192,
131
+ "padding_side": "left",
132
+ "remove_space": false,
133
+ "tokenizer_class": "ChatGLM4Tokenizer",
134
+ "image_size": 1120
135
+ }