Upload folder using huggingface_hub
Browse files- README.md +18 -17
- config.json +4 -8
- mergekit_config.yml +12 -11
- model-00001-of-00015.safetensors +1 -1
- model-00002-of-00015.safetensors +1 -1
- model-00003-of-00015.safetensors +1 -1
- model-00004-of-00015.safetensors +1 -1
- model-00005-of-00015.safetensors +1 -1
- model-00006-of-00015.safetensors +1 -1
- model-00007-of-00015.safetensors +1 -1
- model-00008-of-00015.safetensors +1 -1
- model-00009-of-00015.safetensors +1 -1
- model-00010-of-00015.safetensors +1 -1
- model-00011-of-00015.safetensors +1 -1
- model-00012-of-00015.safetensors +1 -1
- model-00013-of-00015.safetensors +1 -1
- model-00014-of-00015.safetensors +1 -1
- model-00015-of-00015.safetensors +1 -1
- special_tokens_map.json +7 -0
- tokenizer_config.json +3 -1
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
base_model:
|
3 |
-
- kromcomp/L3.1-
|
4 |
-
- kromcomp/L3.1-
|
5 |
-
- kromcomp/L3.1-
|
6 |
library_name: transformers
|
7 |
tags:
|
8 |
- mergekit
|
@@ -16,20 +16,21 @@ This is a merge of pre-trained language models created using [mergekit](https://
|
|
16 |
## Merge Details
|
17 |
### Merge Method
|
18 |
|
19 |
-
This model was merged using the
|
20 |
|
21 |
### Models Merged
|
22 |
|
23 |
The following models were included in the merge:
|
24 |
-
* [kromcomp/L3.1-
|
25 |
-
* [kromcomp/L3.1-
|
26 |
|
27 |
### Configuration
|
28 |
|
29 |
The following YAML configuration was used to produce this model:
|
30 |
|
31 |
```yaml
|
32 |
-
base_model: kromcomp/L3.1-
|
|
|
33 |
dtype: float32
|
34 |
merge_method: della
|
35 |
parameters:
|
@@ -38,22 +39,22 @@ parameters:
|
|
38 |
slices:
|
39 |
- sources:
|
40 |
- layer_range: [0, 42]
|
41 |
-
model: kromcomp/L3.1-
|
42 |
parameters:
|
43 |
density: 0.7
|
44 |
epsilon: 0.05
|
45 |
-
weight: 0.
|
46 |
- layer_range: [0, 42]
|
47 |
-
model: kromcomp/L3.1-
|
48 |
parameters:
|
49 |
-
density: 0.
|
50 |
epsilon: 0.05
|
51 |
-
weight: 0.3
|
52 |
- layer_range: [0, 42]
|
53 |
-
model: kromcomp/L3.1-
|
54 |
parameters:
|
55 |
-
density: 0.
|
56 |
-
epsilon: 0.
|
57 |
-
weight: 0.
|
58 |
-
|
59 |
```
|
|
|
1 |
---
|
2 |
base_model:
|
3 |
+
- kromcomp/L3.1-Rays-10B
|
4 |
+
- kromcomp/L3.1-Bloat-10B
|
5 |
+
- kromcomp/L3.1-Extend-10B
|
6 |
library_name: transformers
|
7 |
tags:
|
8 |
- mergekit
|
|
|
16 |
## Merge Details
|
17 |
### Merge Method
|
18 |
|
19 |
+
This model was merged using the [DELLA](https://arxiv.org/abs/2406.11617) merge method using [kromcomp/L3.1-Extend-10B](https://huggingface.co/kromcomp/L3.1-Extend-10B) as a base.
|
20 |
|
21 |
### Models Merged
|
22 |
|
23 |
The following models were included in the merge:
|
24 |
+
* [kromcomp/L3.1-Rays-10B](https://huggingface.co/kromcomp/L3.1-Rays-10B)
|
25 |
+
* [kromcomp/L3.1-Bloat-10B](https://huggingface.co/kromcomp/L3.1-Bloat-10B)
|
26 |
|
27 |
### Configuration
|
28 |
|
29 |
The following YAML configuration was used to produce this model:
|
30 |
|
31 |
```yaml
|
32 |
+
base_model: kromcomp/L3.1-Extend-10B
|
33 |
+
chat_template: llama3
|
34 |
dtype: float32
|
35 |
merge_method: della
|
36 |
parameters:
|
|
|
39 |
slices:
|
40 |
- sources:
|
41 |
- layer_range: [0, 42]
|
42 |
+
model: kromcomp/L3.1-Rays-10B
|
43 |
parameters:
|
44 |
density: 0.7
|
45 |
epsilon: 0.05
|
46 |
+
weight: [0.0, 0.2, 0.6]
|
47 |
- layer_range: [0, 42]
|
48 |
+
model: kromcomp/L3.1-Bloat-10B
|
49 |
parameters:
|
50 |
+
density: 0.7
|
51 |
epsilon: 0.05
|
52 |
+
weight: [0.9, 0.3, 0.3]
|
53 |
- layer_range: [0, 42]
|
54 |
+
model: kromcomp/L3.1-Extend-10B
|
55 |
parameters:
|
56 |
+
density: 0.65
|
57 |
+
epsilon: 0.05
|
58 |
+
weight: [0.1, 0.5, 0.1]
|
59 |
+
tokenizer: {}
|
60 |
```
|
config.json
CHANGED
@@ -1,16 +1,12 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "kromcomp/L3.1-
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
6 |
"attention_bias": false,
|
7 |
"attention_dropout": 0.0,
|
8 |
"bos_token_id": 128000,
|
9 |
-
"eos_token_id":
|
10 |
-
128001,
|
11 |
-
128008,
|
12 |
-
128009
|
13 |
-
],
|
14 |
"head_dim": 128,
|
15 |
"hidden_act": "silu",
|
16 |
"hidden_size": 4096,
|
@@ -34,7 +30,7 @@
|
|
34 |
"rope_theta": 500000.0,
|
35 |
"tie_word_embeddings": false,
|
36 |
"torch_dtype": "float32",
|
37 |
-
"transformers_version": "4.
|
38 |
-
"use_cache":
|
39 |
"vocab_size": 128256
|
40 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "kromcomp/L3.1-Extend-10B",
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
6 |
"attention_bias": false,
|
7 |
"attention_dropout": 0.0,
|
8 |
"bos_token_id": 128000,
|
9 |
+
"eos_token_id": 128009,
|
|
|
|
|
|
|
|
|
10 |
"head_dim": 128,
|
11 |
"hidden_act": "silu",
|
12 |
"hidden_size": 4096,
|
|
|
30 |
"rope_theta": 500000.0,
|
31 |
"tie_word_embeddings": false,
|
32 |
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.47.0",
|
34 |
+
"use_cache": false,
|
35 |
"vocab_size": 128256
|
36 |
}
|
mergekit_config.yml
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
-
base_model: kromcomp/L3.1-
|
|
|
2 |
dtype: float32
|
3 |
merge_method: della
|
4 |
parameters:
|
@@ -7,21 +8,21 @@ parameters:
|
|
7 |
slices:
|
8 |
- sources:
|
9 |
- layer_range: [0, 42]
|
10 |
-
model: kromcomp/L3.1-
|
11 |
parameters:
|
12 |
density: 0.7
|
13 |
epsilon: 0.05
|
14 |
-
weight: 0.
|
15 |
- layer_range: [0, 42]
|
16 |
-
model: kromcomp/L3.1-
|
17 |
parameters:
|
18 |
-
density: 0.
|
19 |
epsilon: 0.05
|
20 |
-
weight: 0.3
|
21 |
- layer_range: [0, 42]
|
22 |
-
model: kromcomp/L3.1-
|
23 |
parameters:
|
24 |
-
density: 0.
|
25 |
-
epsilon: 0.
|
26 |
-
weight: 0.
|
27 |
-
|
|
|
1 |
+
base_model: kromcomp/L3.1-Extend-10B
|
2 |
+
chat_template: llama3
|
3 |
dtype: float32
|
4 |
merge_method: della
|
5 |
parameters:
|
|
|
8 |
slices:
|
9 |
- sources:
|
10 |
- layer_range: [0, 42]
|
11 |
+
model: kromcomp/L3.1-Rays-10B
|
12 |
parameters:
|
13 |
density: 0.7
|
14 |
epsilon: 0.05
|
15 |
+
weight: [0.0, 0.2, 0.6]
|
16 |
- layer_range: [0, 42]
|
17 |
+
model: kromcomp/L3.1-Bloat-10B
|
18 |
parameters:
|
19 |
+
density: 0.7
|
20 |
epsilon: 0.05
|
21 |
+
weight: [0.9, 0.3, 0.3]
|
22 |
- layer_range: [0, 42]
|
23 |
+
model: kromcomp/L3.1-Extend-10B
|
24 |
parameters:
|
25 |
+
density: 0.65
|
26 |
+
epsilon: 0.05
|
27 |
+
weight: [0.1, 0.5, 0.1]
|
28 |
+
tokenizer: {}
|
model-00001-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2101346432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8d29c8f2aeb3140e9e890ef57e8168adc7780372e895ef5aed1c909b4a24ddf
|
3 |
size 2101346432
|
model-00002-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2973811968
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3814600395597f0cfc96a402d522f4fb7ebfd4d18e4f872fc35724ef537e74c
|
3 |
size 2973811968
|
model-00003-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2852228272
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83cf478bfc3539a6a38099f9f9164727b44ce643dc5396ebb0cec3d7a65f906a
|
3 |
size 2852228272
|
model-00004-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2852228280
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aff07065f3f7ce95548dce8e57fd6876ffc169195142f92130352c778262d3d5
|
3 |
size 2852228280
|
model-00005-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2936131096
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9dca2b2bd10bb442136f9d87ec681debd1e192a1e70db28d94495f6a9ed2eef
|
3 |
size 2936131096
|
model-00006-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2936131072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8415562f84c1e1db4a2395e1e7bc2520b4f003b916232b3395c43d9166a27afe
|
3 |
size 2936131072
|
model-00007-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2852228280
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd76bf0ffdb79aea6ab1bc418d8373154e44fa925bf5b5208fe0d982a155dbf3
|
3 |
size 2852228280
|
model-00008-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2936131096
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fad8211d6a842d267c5b68d087f04e03155c5b5310608a60f80972f25a2149b
|
3 |
size 2936131096
|
model-00009-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2936131072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1604aef44348df28904b31e1601c365e7a2034fc548699bbe0a63c8769860c3
|
3 |
size 2936131072
|
model-00010-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2852228280
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc6d61c73a872eec6922d3dca2ea674a419ecefec9f60913213eb8072b39e36d
|
3 |
size 2852228280
|
model-00011-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2936131096
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5289042677fdf966875ee07c5526052e1d63fac827ed527ec10d902a0f5d591
|
3 |
size 2936131096
|
model-00012-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2936131080
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8539922f0371ae204b1ff38898e68c173d12ff00ba5f71a6d591bd9eee427082
|
3 |
size 2936131080
|
model-00013-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2852228272
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9fb388bc32e97a288ec9f686a670ec018d3e83127d8b6194cbd6601db5d0dc6
|
3 |
size 2852228272
|
model-00014-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2936131064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8961b528499497cd875269edd244d42976a3e10d7d7fce58112ee51bf5b2af6
|
3 |
size 2936131064
|
model-00015-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 956351824
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e6d3ec5d559d9f4a6d38b8f1c913c5b95c70f4a369f367b24093a51ad059a86
|
3 |
size 956351824
|
special_tokens_map.json
CHANGED
@@ -12,5 +12,12 @@
|
|
12 |
"normalized": false,
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
}
|
16 |
}
|
|
|
12 |
"normalized": false,
|
13 |
"rstrip": false,
|
14 |
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<|end_of_text|>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
}
|
23 |
}
|
tokenizer_config.json
CHANGED
@@ -2050,13 +2050,15 @@
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
-
"chat_template": "{
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|eot_id|>",
|
|
|
2056 |
"model_input_names": [
|
2057 |
"input_ids",
|
2058 |
"attention_mask"
|
2059 |
],
|
2060 |
"model_max_length": 131072,
|
|
|
2061 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
2062 |
}
|
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
2053 |
+
"chat_template": "{% set loop_messages = messages %}\n{% for message in loop_messages %}\n{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' %}\n{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}\n{{ content }}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}{% endif %}\n",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|eot_id|>",
|
2056 |
+
"extra_special_tokens": {},
|
2057 |
"model_input_names": [
|
2058 |
"input_ids",
|
2059 |
"attention_mask"
|
2060 |
],
|
2061 |
"model_max_length": 131072,
|
2062 |
+
"pad_token": "<|end_of_text|>",
|
2063 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
2064 |
}
|