kromeurus commited on
Commit
4d34bc1
·
verified ·
1 Parent(s): 45a7010

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  base_model:
3
- - kromcomp/L3.1-Fatboy-10B
4
- - kromcomp/L3.1-Npos-10B
5
- - kromcomp/L3.1-Adlib-10B
6
  library_name: transformers
7
  tags:
8
  - mergekit
@@ -16,20 +16,21 @@ This is a merge of pre-trained language models created using [mergekit](https://
16
  ## Merge Details
17
  ### Merge Method
18
 
19
- This model was merged using the della merge method using [kromcomp/L3.1-Fatboy-10B](https://huggingface.co/kromcomp/L3.1-Fatboy-10B) as a base.
20
 
21
  ### Models Merged
22
 
23
  The following models were included in the merge:
24
- * [kromcomp/L3.1-Npos-10B](https://huggingface.co/kromcomp/L3.1-Npos-10B)
25
- * [kromcomp/L3.1-Adlib-10B](https://huggingface.co/kromcomp/L3.1-Adlib-10B)
26
 
27
  ### Configuration
28
 
29
  The following YAML configuration was used to produce this model:
30
 
31
  ```yaml
32
- base_model: kromcomp/L3.1-Fatboy-10B
 
33
  dtype: float32
34
  merge_method: della
35
  parameters:
@@ -38,22 +39,22 @@ parameters:
38
  slices:
39
  - sources:
40
  - layer_range: [0, 42]
41
- model: kromcomp/L3.1-Npos-10B
42
  parameters:
43
  density: 0.7
44
  epsilon: 0.05
45
- weight: 0.4
46
  - layer_range: [0, 42]
47
- model: kromcomp/L3.1-Adlib-10B
48
  parameters:
49
- density: 0.5
50
  epsilon: 0.05
51
- weight: 0.3
52
  - layer_range: [0, 42]
53
- model: kromcomp/L3.1-Fatboy-10B
54
  parameters:
55
- density: 0.7
56
- epsilon: 0.1
57
- weight: 0.4
58
- tokenizer_source: union
59
  ```
 
1
  ---
2
  base_model:
3
+ - kromcomp/L3.1-Rays-10B
4
+ - kromcomp/L3.1-Bloat-10B
5
+ - kromcomp/L3.1-Extend-10B
6
  library_name: transformers
7
  tags:
8
  - mergekit
 
16
  ## Merge Details
17
  ### Merge Method
18
 
19
+ This model was merged using the [DELLA](https://arxiv.org/abs/2406.11617) merge method using [kromcomp/L3.1-Extend-10B](https://huggingface.co/kromcomp/L3.1-Extend-10B) as a base.
20
 
21
  ### Models Merged
22
 
23
  The following models were included in the merge:
24
+ * [kromcomp/L3.1-Rays-10B](https://huggingface.co/kromcomp/L3.1-Rays-10B)
25
+ * [kromcomp/L3.1-Bloat-10B](https://huggingface.co/kromcomp/L3.1-Bloat-10B)
26
 
27
  ### Configuration
28
 
29
  The following YAML configuration was used to produce this model:
30
 
31
  ```yaml
32
+ base_model: kromcomp/L3.1-Extend-10B
33
+ chat_template: llama3
34
  dtype: float32
35
  merge_method: della
36
  parameters:
 
39
  slices:
40
  - sources:
41
  - layer_range: [0, 42]
42
+ model: kromcomp/L3.1-Rays-10B
43
  parameters:
44
  density: 0.7
45
  epsilon: 0.05
46
+ weight: [0.0, 0.2, 0.6]
47
  - layer_range: [0, 42]
48
+ model: kromcomp/L3.1-Bloat-10B
49
  parameters:
50
+ density: 0.7
51
  epsilon: 0.05
52
+ weight: [0.9, 0.3, 0.3]
53
  - layer_range: [0, 42]
54
+ model: kromcomp/L3.1-Extend-10B
55
  parameters:
56
+ density: 0.65
57
+ epsilon: 0.05
58
+ weight: [0.1, 0.5, 0.1]
59
+ tokenizer: {}
60
  ```
config.json CHANGED
@@ -1,16 +1,12 @@
1
  {
2
- "_name_or_path": "kromcomp/L3.1-Fatboy-10B",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
- "eos_token_id": [
10
- 128001,
11
- 128008,
12
- 128009
13
- ],
14
  "head_dim": 128,
15
  "hidden_act": "silu",
16
  "hidden_size": 4096,
@@ -34,7 +30,7 @@
34
  "rope_theta": 500000.0,
35
  "tie_word_embeddings": false,
36
  "torch_dtype": "float32",
37
- "transformers_version": "4.46.3",
38
- "use_cache": true,
39
  "vocab_size": 128256
40
  }
 
1
  {
2
+ "_name_or_path": "kromcomp/L3.1-Extend-10B",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
  "bos_token_id": 128000,
9
+ "eos_token_id": 128009,
 
 
 
 
10
  "head_dim": 128,
11
  "hidden_act": "silu",
12
  "hidden_size": 4096,
 
30
  "rope_theta": 500000.0,
31
  "tie_word_embeddings": false,
32
  "torch_dtype": "float32",
33
+ "transformers_version": "4.47.0",
34
+ "use_cache": false,
35
  "vocab_size": 128256
36
  }
mergekit_config.yml CHANGED
@@ -1,4 +1,5 @@
1
- base_model: kromcomp/L3.1-Fatboy-10B
 
2
  dtype: float32
3
  merge_method: della
4
  parameters:
@@ -7,21 +8,21 @@ parameters:
7
  slices:
8
  - sources:
9
  - layer_range: [0, 42]
10
- model: kromcomp/L3.1-Npos-10B
11
  parameters:
12
  density: 0.7
13
  epsilon: 0.05
14
- weight: 0.4
15
  - layer_range: [0, 42]
16
- model: kromcomp/L3.1-Adlib-10B
17
  parameters:
18
- density: 0.5
19
  epsilon: 0.05
20
- weight: 0.3
21
  - layer_range: [0, 42]
22
- model: kromcomp/L3.1-Fatboy-10B
23
  parameters:
24
- density: 0.7
25
- epsilon: 0.1
26
- weight: 0.4
27
- tokenizer_source: union
 
1
+ base_model: kromcomp/L3.1-Extend-10B
2
+ chat_template: llama3
3
  dtype: float32
4
  merge_method: della
5
  parameters:
 
8
  slices:
9
  - sources:
10
  - layer_range: [0, 42]
11
+ model: kromcomp/L3.1-Rays-10B
12
  parameters:
13
  density: 0.7
14
  epsilon: 0.05
15
+ weight: [0.0, 0.2, 0.6]
16
  - layer_range: [0, 42]
17
+ model: kromcomp/L3.1-Bloat-10B
18
  parameters:
19
+ density: 0.7
20
  epsilon: 0.05
21
+ weight: [0.9, 0.3, 0.3]
22
  - layer_range: [0, 42]
23
+ model: kromcomp/L3.1-Extend-10B
24
  parameters:
25
+ density: 0.65
26
+ epsilon: 0.05
27
+ weight: [0.1, 0.5, 0.1]
28
+ tokenizer: {}
model-00001-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c4040d796795b41536d5f4c5de8979e985265189d110974a5e0c6136c06fa9f
3
  size 2101346432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8d29c8f2aeb3140e9e890ef57e8168adc7780372e895ef5aed1c909b4a24ddf
3
  size 2101346432
model-00002-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:687695ecca93afa394410508a8f95fef23046dcd88c00f40a9eedff91f2de093
3
  size 2973811968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3814600395597f0cfc96a402d522f4fb7ebfd4d18e4f872fc35724ef537e74c
3
  size 2973811968
model-00003-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d80a0404bfbc0be583120ac7d9ed598650cb466eb3607ea73088a96bd0e13ba
3
  size 2852228272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83cf478bfc3539a6a38099f9f9164727b44ce643dc5396ebb0cec3d7a65f906a
3
  size 2852228272
model-00004-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2587e9daeeecc073c47ae942a3ddedbe1598159df12c82b0812555784d82afc
3
  size 2852228280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aff07065f3f7ce95548dce8e57fd6876ffc169195142f92130352c778262d3d5
3
  size 2852228280
model-00005-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e35e5a8a94f1b4496088471c2b73d50280e168c1c70a5ee79a97fed8b0f6577f
3
  size 2936131096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9dca2b2bd10bb442136f9d87ec681debd1e192a1e70db28d94495f6a9ed2eef
3
  size 2936131096
model-00006-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:021380e7effe9854c2ea4c1fc2b55de556d6b5ab7722d879fac3a62c80e0bb65
3
  size 2936131072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8415562f84c1e1db4a2395e1e7bc2520b4f003b916232b3395c43d9166a27afe
3
  size 2936131072
model-00007-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c440072ebb0b814d966fa40bf117adc19e8ab13872fa8810020c68aad2979a02
3
  size 2852228280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd76bf0ffdb79aea6ab1bc418d8373154e44fa925bf5b5208fe0d982a155dbf3
3
  size 2852228280
model-00008-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0c3372a4af7b335e36f30d253187507d2eea2c145d0ef054452ea8cb9453389
3
  size 2936131096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fad8211d6a842d267c5b68d087f04e03155c5b5310608a60f80972f25a2149b
3
  size 2936131096
model-00009-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17231f2cdf1895e97051df766f35d651d6f4e8b44c569cede0d174d6d6bd13b5
3
  size 2936131072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1604aef44348df28904b31e1601c365e7a2034fc548699bbe0a63c8769860c3
3
  size 2936131072
model-00010-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2ae43537b651da70c02cffa28699782076cc85cbc9934e7b3c8eab2b618d4f5
3
  size 2852228280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc6d61c73a872eec6922d3dca2ea674a419ecefec9f60913213eb8072b39e36d
3
  size 2852228280
model-00011-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:779c8dc7fb771324b89534ed26e156e6acd3d6faa33129a3f57de3dedc1370c9
3
  size 2936131096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5289042677fdf966875ee07c5526052e1d63fac827ed527ec10d902a0f5d591
3
  size 2936131096
model-00012-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a876d02d2409da48a86979ee9be308ecd7958a69a9be78575b1a3752dc7347e
3
  size 2936131080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8539922f0371ae204b1ff38898e68c173d12ff00ba5f71a6d591bd9eee427082
3
  size 2936131080
model-00013-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0edc1efe2cbdd45952f7132f7d6f435474c6c4178d8fa75d29e2223983dc952e
3
  size 2852228272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9fb388bc32e97a288ec9f686a670ec018d3e83127d8b6194cbd6601db5d0dc6
3
  size 2852228272
model-00014-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da704bd0800ed01bda6423e9da386839a20ffaee797337eac6054a508ed83b4b
3
  size 2936131064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8961b528499497cd875269edd244d42976a3e10d7d7fce58112ee51bf5b2af6
3
  size 2936131064
model-00015-of-00015.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:192b811fc7fb1eadc4d87d9c3f808249de05d1bee43217ad50dbdd19d388fbd4
3
  size 956351824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e6d3ec5d559d9f4a6d38b8f1c913c5b95c70f4a369f367b24093a51ad059a86
3
  size 956351824
special_tokens_map.json CHANGED
@@ -12,5 +12,12 @@
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
 
 
 
 
 
 
 
15
  }
16
  }
 
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|end_of_text|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
  }
23
  }
tokenizer_config.json CHANGED
@@ -2050,13 +2050,15 @@
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
- "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
 
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 131072,
 
2061
  "tokenizer_class": "PreTrainedTokenizerFast"
2062
  }
 
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% set loop_messages = messages %}\n{% for message in loop_messages %}\n{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' %}\n{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}\n{{ content }}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}{% endif %}\n",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
2056
+ "extra_special_tokens": {},
2057
  "model_input_names": [
2058
  "input_ids",
2059
  "attention_mask"
2060
  ],
2061
  "model_max_length": 131072,
2062
+ "pad_token": "<|end_of_text|>",
2063
  "tokenizer_class": "PreTrainedTokenizerFast"
2064
  }