Upload folder using huggingface_hub
Browse files- README.md +182 -0
- added_tokens.json +7 -0
- config.json +29 -0
- mergekit_config.yml +137 -0
- model-00001-of-00001.safetensors +3 -0
- model.safetensors.index.json +1 -0
- special_tokens_map.json +37 -0
- tokenizer.model +3 -0
- tokenizer_config.json +95 -0
README.md
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model:
|
3 |
+
- DewEfresh/neo_7b
|
4 |
+
- DewEfresh/neo_7b
|
5 |
+
tags:
|
6 |
+
- merge
|
7 |
+
- mergekit
|
8 |
+
- lazymergekit
|
9 |
+
- DewEfresh/neo_7b
|
10 |
+
---
|
11 |
+
|
12 |
+
# Neo_7b-merge16
|
13 |
+
|
14 |
+
Neo_7b-merge16 is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
|
15 |
+
* [DewEfresh/neo_7b](https://huggingface.co/DewEfresh/neo_7b)
|
16 |
+
* [DewEfresh/neo_7b](https://huggingface.co/DewEfresh/neo_7b)
|
17 |
+
|
18 |
+
## 🧩 Configuration
|
19 |
+
|
20 |
+
```yaml
|
21 |
+
# Define the slices for the model merging process
|
22 |
+
slices:
|
23 |
+
- sources:
|
24 |
+
# Merge layer 3 with layer 0
|
25 |
+
- model: DewEfresh/neo_7b
|
26 |
+
layer_range: [3, 3]
|
27 |
+
- model: DewEfresh/neo_7b
|
28 |
+
layer_range: [0, 0]
|
29 |
+
- sources:
|
30 |
+
# Merge layer 3 with layer 1
|
31 |
+
- model: DewEfresh/neo_7b
|
32 |
+
layer_range: [3, 3]
|
33 |
+
- model: DewEfresh/neo_7b
|
34 |
+
layer_range: [1, 1]
|
35 |
+
- sources:
|
36 |
+
# Merge layer 3 with layer 2
|
37 |
+
- model: DewEfresh/neo_7b
|
38 |
+
layer_range: [3, 3]
|
39 |
+
- model: DewEfresh/neo_7b
|
40 |
+
layer_range: [2, 2]
|
41 |
+
- sources:
|
42 |
+
# Merge layer 7 with layer 4
|
43 |
+
- model: DewEfresh/neo_7b
|
44 |
+
layer_range: [7, 7]
|
45 |
+
- model: DewEfresh/neo_7b
|
46 |
+
layer_range: [4, 4]
|
47 |
+
- sources:
|
48 |
+
# Merge layer 7 with layer 5
|
49 |
+
- model: DewEfresh/neo_7b
|
50 |
+
layer_range: [7, 7]
|
51 |
+
- model: DewEfresh/neo_7b
|
52 |
+
layer_range: [5, 5]
|
53 |
+
- sources:
|
54 |
+
# Merge layer 7 with layer 6
|
55 |
+
- model: DewEfresh/neo_7b
|
56 |
+
layer_range: [7, 7]
|
57 |
+
- model: DewEfresh/neo_7b
|
58 |
+
layer_range: [6, 6]
|
59 |
+
- sources:
|
60 |
+
# Merge layer 11 with layer 8
|
61 |
+
- model: DewEfresh/neo_7b
|
62 |
+
layer_range: [11, 11]
|
63 |
+
- model: DewEfresh/neo_7b
|
64 |
+
layer_range: [8, 8]
|
65 |
+
- sources:
|
66 |
+
# Merge layer 11 with layer 9
|
67 |
+
- model: DewEfresh/neo_7b
|
68 |
+
layer_range: [11, 11]
|
69 |
+
- model: DewEfresh/neo_7b
|
70 |
+
layer_range: [9, 9]
|
71 |
+
- sources:
|
72 |
+
# Merge layer 11 with layer 10
|
73 |
+
- model: DewEfresh/neo_7b
|
74 |
+
layer_range: [11, 11]
|
75 |
+
- model: DewEfresh/neo_7b
|
76 |
+
layer_range: [10, 10]
|
77 |
+
- sources:
|
78 |
+
# Merge layer 15 with layer 12
|
79 |
+
- model: DewEfresh/neo_7b
|
80 |
+
layer_range: [15, 15]
|
81 |
+
- model: DewEfresh/neo_7b
|
82 |
+
layer_range: [12, 12]
|
83 |
+
- sources:
|
84 |
+
# Merge layer 15 with layer 13
|
85 |
+
- model: DewEfresh/neo_7b
|
86 |
+
layer_range: [15, 15]
|
87 |
+
- model: DewEfresh/neo_7b
|
88 |
+
layer_range: [13, 13]
|
89 |
+
- sources:
|
90 |
+
# Merge layer 15 with layer 14
|
91 |
+
- model: DewEfresh/neo_7b
|
92 |
+
layer_range: [15, 15]
|
93 |
+
- model: DewEfresh/neo_7b
|
94 |
+
layer_range: [14, 14]
|
95 |
+
- sources:
|
96 |
+
# Merge layer 19 with layer 16
|
97 |
+
- model: DewEfresh/neo_7b
|
98 |
+
layer_range: [19, 19]
|
99 |
+
- model: DewEfresh/neo_7b
|
100 |
+
layer_range: [16, 16]
|
101 |
+
- sources:
|
102 |
+
# Merge layer 19 with layer 17
|
103 |
+
- model: DewEfresh/neo_7b
|
104 |
+
layer_range: [19, 19]
|
105 |
+
- model: DewEfresh/neo_7b
|
106 |
+
layer_range: [17, 17]
|
107 |
+
- sources:
|
108 |
+
# Merge layer 19 with layer 18
|
109 |
+
- model: DewEfresh/neo_7b
|
110 |
+
layer_range: [19, 19]
|
111 |
+
- model: DewEfresh/neo_7b
|
112 |
+
layer_range: [18, 18]
|
113 |
+
- sources:
|
114 |
+
# Merge layer 23 with layer 20
|
115 |
+
- model: DewEfresh/neo_7b
|
116 |
+
layer_range: [23, 23]
|
117 |
+
- model: DewEfresh/neo_7b
|
118 |
+
layer_range: [20, 20]
|
119 |
+
- sources:
|
120 |
+
# Merge layer 23 with layer 21
|
121 |
+
- model: DewEfresh/neo_7b
|
122 |
+
layer_range: [23, 23]
|
123 |
+
- model: DewEfresh/neo_7b
|
124 |
+
layer_range: [21, 21]
|
125 |
+
- sources:
|
126 |
+
# Merge layer 23 with layer 22
|
127 |
+
- model: DewEfresh/neo_7b
|
128 |
+
layer_range: [23, 23]
|
129 |
+
- model: DewEfresh/neo_7b
|
130 |
+
layer_range: [22, 22]
|
131 |
+
- sources:
|
132 |
+
# Merge layer 27 with layer 24
|
133 |
+
- model: DewEfresh/neo_7b
|
134 |
+
layer_range: [27, 27]
|
135 |
+
- model: DewEfresh/neo_7b
|
136 |
+
layer_range: [24, 24]
|
137 |
+
- sources:
|
138 |
+
# Merge layer 27 with layer 25
|
139 |
+
- model: DewEfresh/neo_7b
|
140 |
+
layer_range: [27, 27]
|
141 |
+
- model: DewEfresh/neo_7b
|
142 |
+
layer_range: [25, 25]
|
143 |
+
- sources:
|
144 |
+
# Merge layer 27 with layer 26
|
145 |
+
- model: DewEfresh/neo_7b
|
146 |
+
layer_range: [27, 27]
|
147 |
+
- model: DewEfresh/neo_7b
|
148 |
+
layer_range: [26, 26]
|
149 |
+
# Specify the merging method for the slices
|
150 |
+
merge_method: slerp
|
151 |
+
base_model: DewEfresh/neo_7b
|
152 |
+
parameters:
|
153 |
+
t: 0.3333 # Set global interpolation value to 33.33%
|
154 |
+
dtype: bfloat16
|
155 |
+
|
156 |
+
|
157 |
+
```
|
158 |
+
|
159 |
+
## 💻 Usage
|
160 |
+
|
161 |
+
```python
|
162 |
+
!pip install -qU transformers accelerate
|
163 |
+
|
164 |
+
from transformers import AutoTokenizer
|
165 |
+
import transformers
|
166 |
+
import torch
|
167 |
+
|
168 |
+
model = "DewEfresh/Neo_7b-merge16"
|
169 |
+
messages = [{"role": "user", "content": "What is a large language model?"}]
|
170 |
+
|
171 |
+
tokenizer = AutoTokenizer.from_pretrained(model)
|
172 |
+
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
173 |
+
pipeline = transformers.pipeline(
|
174 |
+
"text-generation",
|
175 |
+
model=model,
|
176 |
+
torch_dtype=torch.float16,
|
177 |
+
device_map="auto",
|
178 |
+
)
|
179 |
+
|
180 |
+
outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
|
181 |
+
print(outputs[0]["generated_text"])
|
182 |
+
```
|
added_tokens.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<|CLS|>": 64000,
|
3 |
+
"<|EOD|>": 64002,
|
4 |
+
"<|MASK|>": 64003,
|
5 |
+
"<|PAD|>": 64004,
|
6 |
+
"<|SEP|>": 64001
|
7 |
+
}
|
config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "DewEfresh/neo_7b",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 3072,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 24576,
|
14 |
+
"max_position_embeddings": 8192,
|
15 |
+
"mlp_bias": false,
|
16 |
+
"model_type": "llama",
|
17 |
+
"num_attention_heads": 16,
|
18 |
+
"num_hidden_layers": 0,
|
19 |
+
"num_key_value_heads": 16,
|
20 |
+
"pretraining_tp": 1,
|
21 |
+
"rms_norm_eps": 1e-05,
|
22 |
+
"rope_scaling": null,
|
23 |
+
"rope_theta": 10000.0,
|
24 |
+
"tie_word_embeddings": false,
|
25 |
+
"torch_dtype": "bfloat16",
|
26 |
+
"transformers_version": "4.42.3",
|
27 |
+
"use_cache": true,
|
28 |
+
"vocab_size": 64256
|
29 |
+
}
|
mergekit_config.yml
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Define the slices for the model merging process
|
3 |
+
slices:
|
4 |
+
- sources:
|
5 |
+
# Merge layer 3 with layer 0
|
6 |
+
- model: DewEfresh/neo_7b
|
7 |
+
layer_range: [3, 3]
|
8 |
+
- model: DewEfresh/neo_7b
|
9 |
+
layer_range: [0, 0]
|
10 |
+
- sources:
|
11 |
+
# Merge layer 3 with layer 1
|
12 |
+
- model: DewEfresh/neo_7b
|
13 |
+
layer_range: [3, 3]
|
14 |
+
- model: DewEfresh/neo_7b
|
15 |
+
layer_range: [1, 1]
|
16 |
+
- sources:
|
17 |
+
# Merge layer 3 with layer 2
|
18 |
+
- model: DewEfresh/neo_7b
|
19 |
+
layer_range: [3, 3]
|
20 |
+
- model: DewEfresh/neo_7b
|
21 |
+
layer_range: [2, 2]
|
22 |
+
- sources:
|
23 |
+
# Merge layer 7 with layer 4
|
24 |
+
- model: DewEfresh/neo_7b
|
25 |
+
layer_range: [7, 7]
|
26 |
+
- model: DewEfresh/neo_7b
|
27 |
+
layer_range: [4, 4]
|
28 |
+
- sources:
|
29 |
+
# Merge layer 7 with layer 5
|
30 |
+
- model: DewEfresh/neo_7b
|
31 |
+
layer_range: [7, 7]
|
32 |
+
- model: DewEfresh/neo_7b
|
33 |
+
layer_range: [5, 5]
|
34 |
+
- sources:
|
35 |
+
# Merge layer 7 with layer 6
|
36 |
+
- model: DewEfresh/neo_7b
|
37 |
+
layer_range: [7, 7]
|
38 |
+
- model: DewEfresh/neo_7b
|
39 |
+
layer_range: [6, 6]
|
40 |
+
- sources:
|
41 |
+
# Merge layer 11 with layer 8
|
42 |
+
- model: DewEfresh/neo_7b
|
43 |
+
layer_range: [11, 11]
|
44 |
+
- model: DewEfresh/neo_7b
|
45 |
+
layer_range: [8, 8]
|
46 |
+
- sources:
|
47 |
+
# Merge layer 11 with layer 9
|
48 |
+
- model: DewEfresh/neo_7b
|
49 |
+
layer_range: [11, 11]
|
50 |
+
- model: DewEfresh/neo_7b
|
51 |
+
layer_range: [9, 9]
|
52 |
+
- sources:
|
53 |
+
# Merge layer 11 with layer 10
|
54 |
+
- model: DewEfresh/neo_7b
|
55 |
+
layer_range: [11, 11]
|
56 |
+
- model: DewEfresh/neo_7b
|
57 |
+
layer_range: [10, 10]
|
58 |
+
- sources:
|
59 |
+
# Merge layer 15 with layer 12
|
60 |
+
- model: DewEfresh/neo_7b
|
61 |
+
layer_range: [15, 15]
|
62 |
+
- model: DewEfresh/neo_7b
|
63 |
+
layer_range: [12, 12]
|
64 |
+
- sources:
|
65 |
+
# Merge layer 15 with layer 13
|
66 |
+
- model: DewEfresh/neo_7b
|
67 |
+
layer_range: [15, 15]
|
68 |
+
- model: DewEfresh/neo_7b
|
69 |
+
layer_range: [13, 13]
|
70 |
+
- sources:
|
71 |
+
# Merge layer 15 with layer 14
|
72 |
+
- model: DewEfresh/neo_7b
|
73 |
+
layer_range: [15, 15]
|
74 |
+
- model: DewEfresh/neo_7b
|
75 |
+
layer_range: [14, 14]
|
76 |
+
- sources:
|
77 |
+
# Merge layer 19 with layer 16
|
78 |
+
- model: DewEfresh/neo_7b
|
79 |
+
layer_range: [19, 19]
|
80 |
+
- model: DewEfresh/neo_7b
|
81 |
+
layer_range: [16, 16]
|
82 |
+
- sources:
|
83 |
+
# Merge layer 19 with layer 17
|
84 |
+
- model: DewEfresh/neo_7b
|
85 |
+
layer_range: [19, 19]
|
86 |
+
- model: DewEfresh/neo_7b
|
87 |
+
layer_range: [17, 17]
|
88 |
+
- sources:
|
89 |
+
# Merge layer 19 with layer 18
|
90 |
+
- model: DewEfresh/neo_7b
|
91 |
+
layer_range: [19, 19]
|
92 |
+
- model: DewEfresh/neo_7b
|
93 |
+
layer_range: [18, 18]
|
94 |
+
- sources:
|
95 |
+
# Merge layer 23 with layer 20
|
96 |
+
- model: DewEfresh/neo_7b
|
97 |
+
layer_range: [23, 23]
|
98 |
+
- model: DewEfresh/neo_7b
|
99 |
+
layer_range: [20, 20]
|
100 |
+
- sources:
|
101 |
+
# Merge layer 23 with layer 21
|
102 |
+
- model: DewEfresh/neo_7b
|
103 |
+
layer_range: [23, 23]
|
104 |
+
- model: DewEfresh/neo_7b
|
105 |
+
layer_range: [21, 21]
|
106 |
+
- sources:
|
107 |
+
# Merge layer 23 with layer 22
|
108 |
+
- model: DewEfresh/neo_7b
|
109 |
+
layer_range: [23, 23]
|
110 |
+
- model: DewEfresh/neo_7b
|
111 |
+
layer_range: [22, 22]
|
112 |
+
- sources:
|
113 |
+
# Merge layer 27 with layer 24
|
114 |
+
- model: DewEfresh/neo_7b
|
115 |
+
layer_range: [27, 27]
|
116 |
+
- model: DewEfresh/neo_7b
|
117 |
+
layer_range: [24, 24]
|
118 |
+
- sources:
|
119 |
+
# Merge layer 27 with layer 25
|
120 |
+
- model: DewEfresh/neo_7b
|
121 |
+
layer_range: [27, 27]
|
122 |
+
- model: DewEfresh/neo_7b
|
123 |
+
layer_range: [25, 25]
|
124 |
+
- sources:
|
125 |
+
# Merge layer 27 with layer 26
|
126 |
+
- model: DewEfresh/neo_7b
|
127 |
+
layer_range: [27, 27]
|
128 |
+
- model: DewEfresh/neo_7b
|
129 |
+
layer_range: [26, 26]
|
130 |
+
# Specify the merging method for the slices
|
131 |
+
merge_method: slerp
|
132 |
+
base_model: DewEfresh/neo_7b
|
133 |
+
parameters:
|
134 |
+
t: 0.3333 # Set global interpolation value to 33.33%
|
135 |
+
dtype: bfloat16
|
136 |
+
|
137 |
+
|
model-00001-of-00001.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41209159383a6438fa1e73146df470aae582c40bf510ec2495a3f8780477ce87
|
3 |
+
size 789584192
|
model.safetensors.index.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metadata": {"mergekit_version": "0.0.4.4", "total_size": 789583872}, "weight_map": {"lm_head.weight": "model-00001-of-00001.safetensors", "model.embed_tokens.weight": "model-00001-of-00001.safetensors", "model.norm.weight": "model-00001-of-00001.safetensors"}}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<|CLS|>",
|
4 |
+
"<|SEP|>",
|
5 |
+
"<|EOD|>",
|
6 |
+
"<|MASK|>",
|
7 |
+
"<|PAD|>"
|
8 |
+
],
|
9 |
+
"bos_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": true,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": true
|
22 |
+
},
|
23 |
+
"pad_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": true,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": true
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "<unk>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": true,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": true
|
36 |
+
}
|
37 |
+
}
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6a2447b0e5664cabb2481587597102d82f42f0ccb7ef22e1c2d95494a8b03c5
|
3 |
+
size 1002561
|
tokenizer_config.json
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": true,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": true,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": true,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": true,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": true,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
"64000": {
|
30 |
+
"content": "<|CLS|>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": false,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": true
|
36 |
+
},
|
37 |
+
"64001": {
|
38 |
+
"content": "<|SEP|>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": true
|
44 |
+
},
|
45 |
+
"64002": {
|
46 |
+
"content": "<|EOD|>",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false,
|
51 |
+
"special": true
|
52 |
+
},
|
53 |
+
"64003": {
|
54 |
+
"content": "<|MASK|>",
|
55 |
+
"lstrip": false,
|
56 |
+
"normalized": false,
|
57 |
+
"rstrip": false,
|
58 |
+
"single_word": false,
|
59 |
+
"special": true
|
60 |
+
},
|
61 |
+
"64004": {
|
62 |
+
"content": "<|PAD|>",
|
63 |
+
"lstrip": false,
|
64 |
+
"normalized": false,
|
65 |
+
"rstrip": false,
|
66 |
+
"single_word": false,
|
67 |
+
"special": true
|
68 |
+
}
|
69 |
+
},
|
70 |
+
"additional_special_tokens": [
|
71 |
+
"<|CLS|>",
|
72 |
+
"<|SEP|>",
|
73 |
+
"<|EOD|>",
|
74 |
+
"<|MASK|>",
|
75 |
+
"<|PAD|>"
|
76 |
+
],
|
77 |
+
"auto_map": {
|
78 |
+
"AutoTokenizer": [
|
79 |
+
"DewEfresh/neo_7b--tokenization_neo.NEOTokenizer",
|
80 |
+
null
|
81 |
+
]
|
82 |
+
},
|
83 |
+
"bos_token": "<s>",
|
84 |
+
"chat_template": "{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if loop.index0 == 0 and system_message is defined %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>' + '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
|
85 |
+
"clean_up_tokenization_spaces": false,
|
86 |
+
"eos_token": "</s>",
|
87 |
+
"model_max_length": 4096,
|
88 |
+
"pad_token": "<unk>",
|
89 |
+
"padding_side": "right",
|
90 |
+
"sp_model_kwargs": {},
|
91 |
+
"split_special_tokens": false,
|
92 |
+
"tokenizer_class": "NEOTokenizer",
|
93 |
+
"unk_token": "<unk>",
|
94 |
+
"use_fast": false
|
95 |
+
}
|