Upload folder using huggingface_hub
Browse files- README.md +182 -0
- added_tokens.json +7 -0
- config.json +29 -0
- mergekit_config.yml +136 -0
- model-00001-of-00001.safetensors +3 -0
- model-00001-of-00003.safetensors +3 -0
- model-00002-of-00003.safetensors +3 -0
- model-00003-of-00003.safetensors +3 -0
- model.safetensors.index.json +1 -0
- special_tokens_map.json +37 -0
- tokenizer.model +3 -0
- tokenizer_config.json +95 -0
README.md
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model:
|
3 |
+
- DewEfresh/neo_7b
|
4 |
+
- m-a-p/neo_7b
|
5 |
+
tags:
|
6 |
+
- merge
|
7 |
+
- mergekit
|
8 |
+
- lazymergekit
|
9 |
+
- DewEfresh/neo_7b
|
10 |
+
- m-a-p/neo_7b
|
11 |
+
---
|
12 |
+
|
13 |
+
# Neo_7b-merge15
|
14 |
+
|
15 |
+
Neo_7b-merge15 is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
|
16 |
+
* [DewEfresh/neo_7b](https://huggingface.co/DewEfresh/neo_7b)
|
17 |
+
* [m-a-p/neo_7b](https://huggingface.co/m-a-p/neo_7b)
|
18 |
+
|
19 |
+
## 🧩 Configuration
|
20 |
+
|
21 |
+
```yaml
|
22 |
+
# Define the slices for the model merging process
|
23 |
+
slices:
|
24 |
+
- sources:
|
25 |
+
# First part: merge layer 0 with layer 3
|
26 |
+
- model: DewEfresh/neo_7b
|
27 |
+
layer_range: [0, 0]
|
28 |
+
- model: m-a-p/neo_7b
|
29 |
+
layer_range: [3, 3]
|
30 |
+
- sources:
|
31 |
+
# Second part: merge layer 1 with layer 3
|
32 |
+
- model: DewEfresh/neo_7b
|
33 |
+
layer_range: [1, 1]
|
34 |
+
- model: m-a-p/neo_7b
|
35 |
+
layer_range: [3, 3]
|
36 |
+
- sources:
|
37 |
+
# Third part: merge layer 2 with layer 3
|
38 |
+
- model: DewEfresh/neo_7b
|
39 |
+
layer_range: [2, 2]
|
40 |
+
- model: m-a-p/neo_7b
|
41 |
+
layer_range: [3, 3]
|
42 |
+
- sources:
|
43 |
+
# Fourth part: merge layer 4 with layer 7
|
44 |
+
- model: DewEfresh/neo_7b
|
45 |
+
layer_range: [4, 4]
|
46 |
+
- model: m-a-p/neo_7b
|
47 |
+
layer_range: [7, 7]
|
48 |
+
- sources:
|
49 |
+
# Fifth part: merge layer 5 with layer 7
|
50 |
+
- model: DewEfresh/neo_7b
|
51 |
+
layer_range: [5, 5]
|
52 |
+
- model: m-a-p/neo_7b
|
53 |
+
layer_range: [7, 7]
|
54 |
+
- sources:
|
55 |
+
# Sixth part: merge layer 6 with layer 7
|
56 |
+
- model: DewEfresh/neo_7b
|
57 |
+
layer_range: [6, 6]
|
58 |
+
- model: m-a-p/neo_7b
|
59 |
+
layer_range: [7, 7]
|
60 |
+
- sources:
|
61 |
+
# Seventh part: merge layer 8 with layer 11
|
62 |
+
- model: DewEfresh/neo_7b
|
63 |
+
layer_range: [8, 8]
|
64 |
+
- model: m-a-p/neo_7b
|
65 |
+
layer_range: [11, 11]
|
66 |
+
- sources:
|
67 |
+
# Eighth part: merge layer 9 with layer 11
|
68 |
+
- model: DewEfresh/neo_7b
|
69 |
+
layer_range: [9, 9]
|
70 |
+
- model: m-a-p/neo_7b
|
71 |
+
layer_range: [11, 11]
|
72 |
+
- sources:
|
73 |
+
# Ninth part: merge layer 10 with layer 11
|
74 |
+
- model: DewEfresh/neo_7b
|
75 |
+
layer_range: [10, 10]
|
76 |
+
- model: m-a-p/neo_7b
|
77 |
+
layer_range: [11, 11]
|
78 |
+
- sources:
|
79 |
+
# Tenth part: merge layer 12 with layer 15
|
80 |
+
- model: DewEfresh/neo_7b
|
81 |
+
layer_range: [12, 12]
|
82 |
+
- model: m-a-p/neo_7b
|
83 |
+
layer_range: [15, 15]
|
84 |
+
- sources:
|
85 |
+
# Eleventh part: merge layer 13 with layer 15
|
86 |
+
- model: DewEfresh/neo_7b
|
87 |
+
layer_range: [13, 13]
|
88 |
+
- model: m-a-p/neo_7b
|
89 |
+
layer_range: [15, 15]
|
90 |
+
- sources:
|
91 |
+
# Twelfth part: merge layer 14 with layer 15
|
92 |
+
- model: DewEfresh/neo_7b
|
93 |
+
layer_range: [14, 14]
|
94 |
+
- model: m-a-p/neo_7b
|
95 |
+
layer_range: [15, 15]
|
96 |
+
- sources:
|
97 |
+
# Thirteenth part: merge layer 16 with layer 19
|
98 |
+
- model: DewEfresh/neo_7b
|
99 |
+
layer_range: [16, 16]
|
100 |
+
- model: m-a-p/neo_7b
|
101 |
+
layer_range: [19, 19]
|
102 |
+
- sources:
|
103 |
+
# Fourteenth part: merge layer 17 with layer 19
|
104 |
+
- model: DewEfresh/neo_7b
|
105 |
+
layer_range: [17, 17]
|
106 |
+
- model: m-a-p/neo_7b
|
107 |
+
layer_range: [19, 19]
|
108 |
+
- sources:
|
109 |
+
# Fifteenth part: merge layer 18 with layer 19
|
110 |
+
- model: DewEfresh/neo_7b
|
111 |
+
layer_range: [18, 18]
|
112 |
+
- model: m-a-p/neo_7b
|
113 |
+
layer_range: [19, 19]
|
114 |
+
- sources:
|
115 |
+
# Sixteenth part: merge layer 20 with layer 23
|
116 |
+
- model: DewEfresh/neo_7b
|
117 |
+
layer_range: [20, 20]
|
118 |
+
- model: m-a-p/neo_7b
|
119 |
+
layer_range: [23, 23]
|
120 |
+
- sources:
|
121 |
+
# Seventeenth part: merge layer 21 with layer 23
|
122 |
+
- model: DewEfresh/neo_7b
|
123 |
+
layer_range: [21, 21]
|
124 |
+
- model: m-a-p/neo_7b
|
125 |
+
layer_range: [23, 23]
|
126 |
+
- sources:
|
127 |
+
# Eighteenth part: merge layer 22 with layer 23
|
128 |
+
- model: DewEfresh/neo_7b
|
129 |
+
layer_range: [22, 22]
|
130 |
+
- model: m-a-p/neo_7b
|
131 |
+
layer_range: [23, 23]
|
132 |
+
- sources:
|
133 |
+
# Nineteenth part: merge layer 24 with layer 27
|
134 |
+
- model: DewEfresh/neo_7b
|
135 |
+
layer_range: [24, 24]
|
136 |
+
- model: m-a-p/neo_7b
|
137 |
+
layer_range: [27, 27]
|
138 |
+
- sources:
|
139 |
+
# Twentieth part: merge layer 25 with layer 27
|
140 |
+
- model: DewEfresh/neo_7b
|
141 |
+
layer_range: [25, 25]
|
142 |
+
- model: m-a-p/neo_7b
|
143 |
+
layer_range: [27, 27]
|
144 |
+
- sources:
|
145 |
+
# Twenty-first part: merge layer 26 with layer 27
|
146 |
+
- model: DewEfresh/neo_7b
|
147 |
+
layer_range: [26, 26]
|
148 |
+
- model: m-a-p/neo_7b
|
149 |
+
layer_range: [27, 27]
|
150 |
+
# Specify the merging method for the slices
|
151 |
+
merge_method: slerp
|
152 |
+
base_model: DewEfresh/neo_7b
|
153 |
+
parameters:
|
154 |
+
t: 0.3333 # Set global interpolation value to 33.33%
|
155 |
+
dtype: bfloat16
|
156 |
+
|
157 |
+
```
|
158 |
+
|
159 |
+
## 💻 Usage
|
160 |
+
|
161 |
+
```python
|
162 |
+
!pip install -qU transformers accelerate
|
163 |
+
|
164 |
+
from transformers import AutoTokenizer
|
165 |
+
import transformers
|
166 |
+
import torch
|
167 |
+
|
168 |
+
model = "DewEfresh/Neo_7b-merge15"
|
169 |
+
messages = [{"role": "user", "content": "What is a large language model?"}]
|
170 |
+
|
171 |
+
tokenizer = AutoTokenizer.from_pretrained(model)
|
172 |
+
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
173 |
+
pipeline = transformers.pipeline(
|
174 |
+
"text-generation",
|
175 |
+
model=model,
|
176 |
+
torch_dtype=torch.float16,
|
177 |
+
device_map="auto",
|
178 |
+
)
|
179 |
+
|
180 |
+
outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
|
181 |
+
print(outputs[0]["generated_text"])
|
182 |
+
```
|
added_tokens.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<|CLS|>": 64000,
|
3 |
+
"<|EOD|>": 64002,
|
4 |
+
"<|MASK|>": 64003,
|
5 |
+
"<|PAD|>": 64004,
|
6 |
+
"<|SEP|>": 64001
|
7 |
+
}
|
config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "DewEfresh/neo_7b",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 1,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 3072,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 24576,
|
14 |
+
"max_position_embeddings": 8192,
|
15 |
+
"mlp_bias": false,
|
16 |
+
"model_type": "llama",
|
17 |
+
"num_attention_heads": 16,
|
18 |
+
"num_hidden_layers": 0,
|
19 |
+
"num_key_value_heads": 16,
|
20 |
+
"pretraining_tp": 1,
|
21 |
+
"rms_norm_eps": 1e-05,
|
22 |
+
"rope_scaling": null,
|
23 |
+
"rope_theta": 10000.0,
|
24 |
+
"tie_word_embeddings": false,
|
25 |
+
"torch_dtype": "bfloat16",
|
26 |
+
"transformers_version": "4.42.3",
|
27 |
+
"use_cache": true,
|
28 |
+
"vocab_size": 64256
|
29 |
+
}
|
mergekit_config.yml
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Define the slices for the model merging process
|
3 |
+
slices:
|
4 |
+
- sources:
|
5 |
+
# First part: merge layer 0 with layer 3
|
6 |
+
- model: DewEfresh/neo_7b
|
7 |
+
layer_range: [0, 0]
|
8 |
+
- model: m-a-p/neo_7b
|
9 |
+
layer_range: [3, 3]
|
10 |
+
- sources:
|
11 |
+
# Second part: merge layer 1 with layer 3
|
12 |
+
- model: DewEfresh/neo_7b
|
13 |
+
layer_range: [1, 1]
|
14 |
+
- model: m-a-p/neo_7b
|
15 |
+
layer_range: [3, 3]
|
16 |
+
- sources:
|
17 |
+
# Third part: merge layer 2 with layer 3
|
18 |
+
- model: DewEfresh/neo_7b
|
19 |
+
layer_range: [2, 2]
|
20 |
+
- model: m-a-p/neo_7b
|
21 |
+
layer_range: [3, 3]
|
22 |
+
- sources:
|
23 |
+
# Fourth part: merge layer 4 with layer 7
|
24 |
+
- model: DewEfresh/neo_7b
|
25 |
+
layer_range: [4, 4]
|
26 |
+
- model: m-a-p/neo_7b
|
27 |
+
layer_range: [7, 7]
|
28 |
+
- sources:
|
29 |
+
# Fifth part: merge layer 5 with layer 7
|
30 |
+
- model: DewEfresh/neo_7b
|
31 |
+
layer_range: [5, 5]
|
32 |
+
- model: m-a-p/neo_7b
|
33 |
+
layer_range: [7, 7]
|
34 |
+
- sources:
|
35 |
+
# Sixth part: merge layer 6 with layer 7
|
36 |
+
- model: DewEfresh/neo_7b
|
37 |
+
layer_range: [6, 6]
|
38 |
+
- model: m-a-p/neo_7b
|
39 |
+
layer_range: [7, 7]
|
40 |
+
- sources:
|
41 |
+
# Seventh part: merge layer 8 with layer 11
|
42 |
+
- model: DewEfresh/neo_7b
|
43 |
+
layer_range: [8, 8]
|
44 |
+
- model: m-a-p/neo_7b
|
45 |
+
layer_range: [11, 11]
|
46 |
+
- sources:
|
47 |
+
# Eighth part: merge layer 9 with layer 11
|
48 |
+
- model: DewEfresh/neo_7b
|
49 |
+
layer_range: [9, 9]
|
50 |
+
- model: m-a-p/neo_7b
|
51 |
+
layer_range: [11, 11]
|
52 |
+
- sources:
|
53 |
+
# Ninth part: merge layer 10 with layer 11
|
54 |
+
- model: DewEfresh/neo_7b
|
55 |
+
layer_range: [10, 10]
|
56 |
+
- model: m-a-p/neo_7b
|
57 |
+
layer_range: [11, 11]
|
58 |
+
- sources:
|
59 |
+
# Tenth part: merge layer 12 with layer 15
|
60 |
+
- model: DewEfresh/neo_7b
|
61 |
+
layer_range: [12, 12]
|
62 |
+
- model: m-a-p/neo_7b
|
63 |
+
layer_range: [15, 15]
|
64 |
+
- sources:
|
65 |
+
# Eleventh part: merge layer 13 with layer 15
|
66 |
+
- model: DewEfresh/neo_7b
|
67 |
+
layer_range: [13, 13]
|
68 |
+
- model: m-a-p/neo_7b
|
69 |
+
layer_range: [15, 15]
|
70 |
+
- sources:
|
71 |
+
# Twelfth part: merge layer 14 with layer 15
|
72 |
+
- model: DewEfresh/neo_7b
|
73 |
+
layer_range: [14, 14]
|
74 |
+
- model: m-a-p/neo_7b
|
75 |
+
layer_range: [15, 15]
|
76 |
+
- sources:
|
77 |
+
# Thirteenth part: merge layer 16 with layer 19
|
78 |
+
- model: DewEfresh/neo_7b
|
79 |
+
layer_range: [16, 16]
|
80 |
+
- model: m-a-p/neo_7b
|
81 |
+
layer_range: [19, 19]
|
82 |
+
- sources:
|
83 |
+
# Fourteenth part: merge layer 17 with layer 19
|
84 |
+
- model: DewEfresh/neo_7b
|
85 |
+
layer_range: [17, 17]
|
86 |
+
- model: m-a-p/neo_7b
|
87 |
+
layer_range: [19, 19]
|
88 |
+
- sources:
|
89 |
+
# Fifteenth part: merge layer 18 with layer 19
|
90 |
+
- model: DewEfresh/neo_7b
|
91 |
+
layer_range: [18, 18]
|
92 |
+
- model: m-a-p/neo_7b
|
93 |
+
layer_range: [19, 19]
|
94 |
+
- sources:
|
95 |
+
# Sixteenth part: merge layer 20 with layer 23
|
96 |
+
- model: DewEfresh/neo_7b
|
97 |
+
layer_range: [20, 20]
|
98 |
+
- model: m-a-p/neo_7b
|
99 |
+
layer_range: [23, 23]
|
100 |
+
- sources:
|
101 |
+
# Seventeenth part: merge layer 21 with layer 23
|
102 |
+
- model: DewEfresh/neo_7b
|
103 |
+
layer_range: [21, 21]
|
104 |
+
- model: m-a-p/neo_7b
|
105 |
+
layer_range: [23, 23]
|
106 |
+
- sources:
|
107 |
+
# Eighteenth part: merge layer 22 with layer 23
|
108 |
+
- model: DewEfresh/neo_7b
|
109 |
+
layer_range: [22, 22]
|
110 |
+
- model: m-a-p/neo_7b
|
111 |
+
layer_range: [23, 23]
|
112 |
+
- sources:
|
113 |
+
# Nineteenth part: merge layer 24 with layer 27
|
114 |
+
- model: DewEfresh/neo_7b
|
115 |
+
layer_range: [24, 24]
|
116 |
+
- model: m-a-p/neo_7b
|
117 |
+
layer_range: [27, 27]
|
118 |
+
- sources:
|
119 |
+
# Twentieth part: merge layer 25 with layer 27
|
120 |
+
- model: DewEfresh/neo_7b
|
121 |
+
layer_range: [25, 25]
|
122 |
+
- model: m-a-p/neo_7b
|
123 |
+
layer_range: [27, 27]
|
124 |
+
- sources:
|
125 |
+
# Twenty-first part: merge layer 26 with layer 27
|
126 |
+
- model: DewEfresh/neo_7b
|
127 |
+
layer_range: [26, 26]
|
128 |
+
- model: m-a-p/neo_7b
|
129 |
+
layer_range: [27, 27]
|
130 |
+
# Specify the merging method for the slices
|
131 |
+
merge_method: slerp
|
132 |
+
base_model: DewEfresh/neo_7b
|
133 |
+
parameters:
|
134 |
+
t: 0.3333 # Set global interpolation value to 33.33%
|
135 |
+
dtype: bfloat16
|
136 |
+
|
model-00001-of-00001.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:41209159383a6438fa1e73146df470aae582c40bf510ec2495a3f8780477ce87
|
3 |
+
size 789584192
|
model-00001-of-00003.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4235f15b69871ff7026193156d555c33c61671569b44c1eaf0bc53445bd2014
|
3 |
+
size 4998668576
|
model-00002-of-00003.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cb0f120f24ce52d9f1f4b81b42b45adcf23386e2776a092c26fcfa71612c608
|
3 |
+
size 4926336584
|
model-00003-of-00003.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44e521e63a165fd616257f81ca7f33e2bba57601fccd6fe71d4da4a989d5ab8b
|
3 |
+
size 3019978728
|
model.safetensors.index.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metadata": {"mergekit_version": "0.0.4.4", "total_size": 789583872}, "weight_map": {"lm_head.weight": "model-00001-of-00001.safetensors", "model.embed_tokens.weight": "model-00001-of-00001.safetensors", "model.norm.weight": "model-00001-of-00001.safetensors"}}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<|CLS|>",
|
4 |
+
"<|SEP|>",
|
5 |
+
"<|EOD|>",
|
6 |
+
"<|MASK|>",
|
7 |
+
"<|PAD|>"
|
8 |
+
],
|
9 |
+
"bos_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": true,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": true
|
22 |
+
},
|
23 |
+
"pad_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": true,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": true
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "<unk>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": true,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": true
|
36 |
+
}
|
37 |
+
}
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6a2447b0e5664cabb2481587597102d82f42f0ccb7ef22e1c2d95494a8b03c5
|
3 |
+
size 1002561
|
tokenizer_config.json
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": true,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": true,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": true,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": true,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": true,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
"64000": {
|
30 |
+
"content": "<|CLS|>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": false,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": true
|
36 |
+
},
|
37 |
+
"64001": {
|
38 |
+
"content": "<|SEP|>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": true
|
44 |
+
},
|
45 |
+
"64002": {
|
46 |
+
"content": "<|EOD|>",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false,
|
51 |
+
"special": true
|
52 |
+
},
|
53 |
+
"64003": {
|
54 |
+
"content": "<|MASK|>",
|
55 |
+
"lstrip": false,
|
56 |
+
"normalized": false,
|
57 |
+
"rstrip": false,
|
58 |
+
"single_word": false,
|
59 |
+
"special": true
|
60 |
+
},
|
61 |
+
"64004": {
|
62 |
+
"content": "<|PAD|>",
|
63 |
+
"lstrip": false,
|
64 |
+
"normalized": false,
|
65 |
+
"rstrip": false,
|
66 |
+
"single_word": false,
|
67 |
+
"special": true
|
68 |
+
}
|
69 |
+
},
|
70 |
+
"additional_special_tokens": [
|
71 |
+
"<|CLS|>",
|
72 |
+
"<|SEP|>",
|
73 |
+
"<|EOD|>",
|
74 |
+
"<|MASK|>",
|
75 |
+
"<|PAD|>"
|
76 |
+
],
|
77 |
+
"auto_map": {
|
78 |
+
"AutoTokenizer": [
|
79 |
+
"DewEfresh/neo_7b--tokenization_neo.NEOTokenizer",
|
80 |
+
null
|
81 |
+
]
|
82 |
+
},
|
83 |
+
"bos_token": "<s>",
|
84 |
+
"chat_template": "{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if loop.index0 == 0 and system_message is defined %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>' + '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
|
85 |
+
"clean_up_tokenization_spaces": false,
|
86 |
+
"eos_token": "</s>",
|
87 |
+
"model_max_length": 4096,
|
88 |
+
"pad_token": "<unk>",
|
89 |
+
"padding_side": "right",
|
90 |
+
"sp_model_kwargs": {},
|
91 |
+
"split_special_tokens": false,
|
92 |
+
"tokenizer_class": "NEOTokenizer",
|
93 |
+
"unk_token": "<unk>",
|
94 |
+
"use_fast": false
|
95 |
+
}
|