DewEfresh commited on
Commit
f22a3e1
1 Parent(s): d686793

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - DewEfresh/neo_7b
4
+ - DewEfresh/neo_7b
5
+ tags:
6
+ - merge
7
+ - mergekit
8
+ - lazymergekit
9
+ - DewEfresh/neo_7b
10
+ ---
11
+
12
+ # Neo_7b-merge14
13
+
14
+ Neo_7b-merge14 is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
15
+ * [DewEfresh/neo_7b](https://huggingface.co/DewEfresh/neo_7b)
16
+ * [DewEfresh/neo_7b](https://huggingface.co/DewEfresh/neo_7b)
17
+
18
+ ## 🧩 Configuration
19
+
20
+ ```yaml
21
+ # Define the slices for the model merging process
22
+ slices:
23
+ - sources:
24
+ # First part: merge layer 0 with layer 3
25
+ - model: DewEfresh/neo_7b
26
+ layer_range: [0, 0]
27
+ - model: DewEfresh/neo_7b
28
+ layer_range: [3, 3]
29
+ - sources:
30
+ # Second part: merge layer 1 with layer 3
31
+ - model: DewEfresh/neo_7b
32
+ layer_range: [1, 1]
33
+ - model: DewEfresh/neo_7b
34
+ layer_range: [3, 3]
35
+ - sources:
36
+ # Third part: merge layer 2 with layer 3
37
+ - model: DewEfresh/neo_7b
38
+ layer_range: [2, 2]
39
+ - model: DewEfresh/neo_7b
40
+ layer_range: [3, 3]
41
+ - sources:
42
+ # Fourth part: merge layer 4 with layer 7
43
+ - model: DewEfresh/neo_7b
44
+ layer_range: [4, 4]
45
+ - model: DewEfresh/neo_7b
46
+ layer_range: [7, 7]
47
+ - sources:
48
+ # Fifth part: merge layer 5 with layer 7
49
+ - model: DewEfresh/neo_7b
50
+ layer_range: [5, 5]
51
+ - model: DewEfresh/neo_7b
52
+ layer_range: [7, 7]
53
+ - sources:
54
+ # Sixth part: merge layer 6 with layer 7
55
+ - model: DewEfresh/neo_7b
56
+ layer_range: [6, 6]
57
+ - model: DewEfresh/neo_7b
58
+ layer_range: [7, 7]
59
+ - sources:
60
+ # Seventh part: merge layer 8 with layer 11
61
+ - model: DewEfresh/neo_7b
62
+ layer_range: [8, 8]
63
+ - model: DewEfresh/neo_7b
64
+ layer_range: [11, 11]
65
+ - sources:
66
+ # Eighth part: merge layer 9 with layer 11
67
+ - model: DewEfresh/neo_7b
68
+ layer_range: [9, 9]
69
+ - model: DewEfresh/neo_7b
70
+ layer_range: [11, 11]
71
+ - sources:
72
+ # Ninth part: merge layer 10 with layer 11
73
+ - model: DewEfresh/neo_7b
74
+ layer_range: [10, 10]
75
+ - model: DewEfresh/neo_7b
76
+ layer_range: [11, 11]
77
+ - sources:
78
+ # Tenth part: merge layer 12 with layer 15
79
+ - model: DewEfresh/neo_7b
80
+ layer_range: [12, 12]
81
+ - model: DewEfresh/neo_7b
82
+ layer_range: [15, 15]
83
+ - sources:
84
+ # Eleventh part: merge layer 13 with layer 15
85
+ - model: DewEfresh/neo_7b
86
+ layer_range: [13, 13]
87
+ - model: DewEfresh/neo_7b
88
+ layer_range: [15, 15]
89
+ - sources:
90
+ # Twelfth part: merge layer 14 with layer 15
91
+ - model: DewEfresh/neo_7b
92
+ layer_range: [14, 14]
93
+ - model: DewEfresh/neo_7b
94
+ layer_range: [15, 15]
95
+ - sources:
96
+ # Thirteenth part: merge layer 16 with layer 19
97
+ - model: DewEfresh/neo_7b
98
+ layer_range: [16, 16]
99
+ - model: DewEfresh/neo_7b
100
+ layer_range: [19, 19]
101
+ - sources:
102
+ # Fourteenth part: merge layer 17 with layer 19
103
+ - model: DewEfresh/neo_7b
104
+ layer_range: [17, 17]
105
+ - model: DewEfresh/neo_7b
106
+ layer_range: [19, 19]
107
+ - sources:
108
+ # Fifteenth part: merge layer 18 with layer 19
109
+ - model: DewEfresh/neo_7b
110
+ layer_range: [18, 18]
111
+ - model: DewEfresh/neo_7b
112
+ layer_range: [19, 19]
113
+ - sources:
114
+ # Sixteenth part: merge layer 20 with layer 23
115
+ - model: DewEfresh/neo_7b
116
+ layer_range: [20, 20]
117
+ - model: DewEfresh/neo_7b
118
+ layer_range: [23, 23]
119
+ - sources:
120
+ # Seventeenth part: merge layer 21 with layer 23
121
+ - model: DewEfresh/neo_7b
122
+ layer_range: [21, 21]
123
+ - model: DewEfresh/neo_7b
124
+ layer_range: [23, 23]
125
+ - sources:
126
+ # Eighteenth part: merge layer 22 with layer 23
127
+ - model: DewEfresh/neo_7b
128
+ layer_range: [22, 22]
129
+ - model: DewEfresh/neo_7b
130
+ layer_range: [23, 23]
131
+ - sources:
132
+ # Nineteenth part: merge layer 24 with layer 27
133
+ - model: DewEfresh/neo_7b
134
+ layer_range: [24, 24]
135
+ - model: DewEfresh/neo_7b
136
+ layer_range: [27, 27]
137
+ - sources:
138
+ # Twentieth part: merge layer 25 with layer 27
139
+ - model: DewEfresh/neo_7b
140
+ layer_range: [25, 25]
141
+ - model: DewEfresh/neo_7b
142
+ layer_range: [27, 27]
143
+ - sources:
144
+ # Twenty-first part: merge layer 26 with layer 27
145
+ - model: DewEfresh/neo_7b
146
+ layer_range: [26, 26]
147
+ - model: DewEfresh/neo_7b
148
+ layer_range: [27, 27]
149
+ # Specify the merging method for the slices
150
+ merge_method: slerp
151
+ base_model: DewEfresh/neo_7b
152
+ parameters:
153
+ t: 0.3333 # Set global interpolation value to 33.33%
154
+ dtype: bfloat16
155
+
156
+ ```
157
+
158
+ ## 💻 Usage
159
+
160
+ ```python
161
+ !pip install -qU transformers accelerate
162
+
163
+ from transformers import AutoTokenizer
164
+ import transformers
165
+ import torch
166
+
167
+ model = "DewEfresh/Neo_7b-merge14"
168
+ messages = [{"role": "user", "content": "What is a large language model?"}]
169
+
170
+ tokenizer = AutoTokenizer.from_pretrained(model)
171
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
172
+ pipeline = transformers.pipeline(
173
+ "text-generation",
174
+ model=model,
175
+ torch_dtype=torch.float16,
176
+ device_map="auto",
177
+ )
178
+
179
+ outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
180
+ print(outputs[0]["generated_text"])
181
+ ```
added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|CLS|>": 64000,
3
+ "<|EOD|>": 64002,
4
+ "<|MASK|>": 64003,
5
+ "<|PAD|>": 64004,
6
+ "<|SEP|>": 64001
7
+ }
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "DewEfresh/neo_7b",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 3072,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 24576,
14
+ "max_position_embeddings": 8192,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 0,
19
+ "num_key_value_heads": 16,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": null,
23
+ "rope_theta": 10000.0,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.42.3",
27
+ "use_cache": true,
28
+ "vocab_size": 64256
29
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Define the slices for the model merging process
3
+ slices:
4
+ - sources:
5
+ # First part: merge layer 0 with layer 3
6
+ - model: DewEfresh/neo_7b
7
+ layer_range: [0, 0]
8
+ - model: DewEfresh/neo_7b
9
+ layer_range: [3, 3]
10
+ - sources:
11
+ # Second part: merge layer 1 with layer 3
12
+ - model: DewEfresh/neo_7b
13
+ layer_range: [1, 1]
14
+ - model: DewEfresh/neo_7b
15
+ layer_range: [3, 3]
16
+ - sources:
17
+ # Third part: merge layer 2 with layer 3
18
+ - model: DewEfresh/neo_7b
19
+ layer_range: [2, 2]
20
+ - model: DewEfresh/neo_7b
21
+ layer_range: [3, 3]
22
+ - sources:
23
+ # Fourth part: merge layer 4 with layer 7
24
+ - model: DewEfresh/neo_7b
25
+ layer_range: [4, 4]
26
+ - model: DewEfresh/neo_7b
27
+ layer_range: [7, 7]
28
+ - sources:
29
+ # Fifth part: merge layer 5 with layer 7
30
+ - model: DewEfresh/neo_7b
31
+ layer_range: [5, 5]
32
+ - model: DewEfresh/neo_7b
33
+ layer_range: [7, 7]
34
+ - sources:
35
+ # Sixth part: merge layer 6 with layer 7
36
+ - model: DewEfresh/neo_7b
37
+ layer_range: [6, 6]
38
+ - model: DewEfresh/neo_7b
39
+ layer_range: [7, 7]
40
+ - sources:
41
+ # Seventh part: merge layer 8 with layer 11
42
+ - model: DewEfresh/neo_7b
43
+ layer_range: [8, 8]
44
+ - model: DewEfresh/neo_7b
45
+ layer_range: [11, 11]
46
+ - sources:
47
+ # Eighth part: merge layer 9 with layer 11
48
+ - model: DewEfresh/neo_7b
49
+ layer_range: [9, 9]
50
+ - model: DewEfresh/neo_7b
51
+ layer_range: [11, 11]
52
+ - sources:
53
+ # Ninth part: merge layer 10 with layer 11
54
+ - model: DewEfresh/neo_7b
55
+ layer_range: [10, 10]
56
+ - model: DewEfresh/neo_7b
57
+ layer_range: [11, 11]
58
+ - sources:
59
+ # Tenth part: merge layer 12 with layer 15
60
+ - model: DewEfresh/neo_7b
61
+ layer_range: [12, 12]
62
+ - model: DewEfresh/neo_7b
63
+ layer_range: [15, 15]
64
+ - sources:
65
+ # Eleventh part: merge layer 13 with layer 15
66
+ - model: DewEfresh/neo_7b
67
+ layer_range: [13, 13]
68
+ - model: DewEfresh/neo_7b
69
+ layer_range: [15, 15]
70
+ - sources:
71
+ # Twelfth part: merge layer 14 with layer 15
72
+ - model: DewEfresh/neo_7b
73
+ layer_range: [14, 14]
74
+ - model: DewEfresh/neo_7b
75
+ layer_range: [15, 15]
76
+ - sources:
77
+ # Thirteenth part: merge layer 16 with layer 19
78
+ - model: DewEfresh/neo_7b
79
+ layer_range: [16, 16]
80
+ - model: DewEfresh/neo_7b
81
+ layer_range: [19, 19]
82
+ - sources:
83
+ # Fourteenth part: merge layer 17 with layer 19
84
+ - model: DewEfresh/neo_7b
85
+ layer_range: [17, 17]
86
+ - model: DewEfresh/neo_7b
87
+ layer_range: [19, 19]
88
+ - sources:
89
+ # Fifteenth part: merge layer 18 with layer 19
90
+ - model: DewEfresh/neo_7b
91
+ layer_range: [18, 18]
92
+ - model: DewEfresh/neo_7b
93
+ layer_range: [19, 19]
94
+ - sources:
95
+ # Sixteenth part: merge layer 20 with layer 23
96
+ - model: DewEfresh/neo_7b
97
+ layer_range: [20, 20]
98
+ - model: DewEfresh/neo_7b
99
+ layer_range: [23, 23]
100
+ - sources:
101
+ # Seventeenth part: merge layer 21 with layer 23
102
+ - model: DewEfresh/neo_7b
103
+ layer_range: [21, 21]
104
+ - model: DewEfresh/neo_7b
105
+ layer_range: [23, 23]
106
+ - sources:
107
+ # Eighteenth part: merge layer 22 with layer 23
108
+ - model: DewEfresh/neo_7b
109
+ layer_range: [22, 22]
110
+ - model: DewEfresh/neo_7b
111
+ layer_range: [23, 23]
112
+ - sources:
113
+ # Nineteenth part: merge layer 24 with layer 27
114
+ - model: DewEfresh/neo_7b
115
+ layer_range: [24, 24]
116
+ - model: DewEfresh/neo_7b
117
+ layer_range: [27, 27]
118
+ - sources:
119
+ # Twentieth part: merge layer 25 with layer 27
120
+ - model: DewEfresh/neo_7b
121
+ layer_range: [25, 25]
122
+ - model: DewEfresh/neo_7b
123
+ layer_range: [27, 27]
124
+ - sources:
125
+ # Twenty-first part: merge layer 26 with layer 27
126
+ - model: DewEfresh/neo_7b
127
+ layer_range: [26, 26]
128
+ - model: DewEfresh/neo_7b
129
+ layer_range: [27, 27]
130
+ # Specify the merging method for the slices
131
+ merge_method: slerp
132
+ base_model: DewEfresh/neo_7b
133
+ parameters:
134
+ t: 0.3333 # Set global interpolation value to 33.33%
135
+ dtype: bfloat16
136
+
model-00001-of-00001.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41209159383a6438fa1e73146df470aae582c40bf510ec2495a3f8780477ce87
3
+ size 789584192
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4235f15b69871ff7026193156d555c33c61671569b44c1eaf0bc53445bd2014
3
+ size 4998668576
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cb0f120f24ce52d9f1f4b81b42b45adcf23386e2776a092c26fcfa71612c608
3
+ size 4926336584
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e521e63a165fd616257f81ca7f33e2bba57601fccd6fe71d4da4a989d5ab8b
3
+ size 3019978728
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"mergekit_version": "0.0.4.4", "total_size": 789583872}, "weight_map": {"lm_head.weight": "model-00001-of-00001.safetensors", "model.embed_tokens.weight": "model-00001-of-00001.safetensors", "model.norm.weight": "model-00001-of-00001.safetensors"}}
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|CLS|>",
4
+ "<|SEP|>",
5
+ "<|EOD|>",
6
+ "<|MASK|>",
7
+ "<|PAD|>"
8
+ ],
9
+ "bos_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": true
22
+ },
23
+ "pad_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": true
29
+ },
30
+ "unk_token": {
31
+ "content": "<unk>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": true
36
+ }
37
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6a2447b0e5664cabb2481587597102d82f42f0ccb7ef22e1c2d95494a8b03c5
3
+ size 1002561
tokenizer_config.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": true,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": true,
27
+ "special": true
28
+ },
29
+ "64000": {
30
+ "content": "<|CLS|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "64001": {
38
+ "content": "<|SEP|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "64002": {
46
+ "content": "<|EOD|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "64003": {
54
+ "content": "<|MASK|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "64004": {
62
+ "content": "<|PAD|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ }
69
+ },
70
+ "additional_special_tokens": [
71
+ "<|CLS|>",
72
+ "<|SEP|>",
73
+ "<|EOD|>",
74
+ "<|MASK|>",
75
+ "<|PAD|>"
76
+ ],
77
+ "auto_map": {
78
+ "AutoTokenizer": [
79
+ "DewEfresh/neo_7b--tokenization_neo.NEOTokenizer",
80
+ null
81
+ ]
82
+ },
83
+ "bos_token": "<s>",
84
+ "chat_template": "{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if loop.index0 == 0 and system_message is defined %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>' + '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
85
+ "clean_up_tokenization_spaces": false,
86
+ "eos_token": "</s>",
87
+ "model_max_length": 4096,
88
+ "pad_token": "<unk>",
89
+ "padding_side": "right",
90
+ "sp_model_kwargs": {},
91
+ "split_special_tokens": false,
92
+ "tokenizer_class": "NEOTokenizer",
93
+ "unk_token": "<unk>",
94
+ "use_fast": false
95
+ }