aipib commited on
Commit
afdfa70
1 Parent(s): faeabb2

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -3,26 +3,26 @@ tags:
3
  - merge
4
  - mergekit
5
  - lazymergekit
6
- - llm-jp-1.3b
7
  base_model:
8
- - llm-jp-1.3b
9
- - llm-jp-1.3b
10
  ---
11
 
12
  # llmjp-linear
13
 
14
  llmjp-linear is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
15
- * [llm-jp-1.3b](https://huggingface.co/llm-jp-1.3b)
16
- * [llm-jp-1.3b](https://huggingface.co/llm-jp-1.3b)
17
 
18
  ## 🧩 Configuration
19
 
20
  ```yaml
21
  models:
22
- - model: llm-jp-1.3b
23
  parameters:
24
  weight: 0.5
25
- - model: llm-jp-1.3b
26
  parameters:
27
  weight: 0.5
28
  merge_method: linear
 
3
  - merge
4
  - mergekit
5
  - lazymergekit
6
+ - llm-jp/llm-jp-1.3b-v1.0
7
  base_model:
8
+ - llm-jp/llm-jp-1.3b-v1.0
9
+ - llm-jp/llm-jp-1.3b-v1.0
10
  ---
11
 
12
  # llmjp-linear
13
 
14
  llmjp-linear is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
15
+ * [llm-jp/llm-jp-1.3b-v1.0](https://huggingface.co/llm-jp/llm-jp-1.3b-v1.0)
16
+ * [llm-jp/llm-jp-1.3b-v1.0](https://huggingface.co/llm-jp/llm-jp-1.3b-v1.0)
17
 
18
  ## 🧩 Configuration
19
 
20
  ```yaml
21
  models:
22
+ - model: llm-jp/llm-jp-1.3b-v1.0
23
  parameters:
24
  weight: 0.5
25
+ - model: llm-jp/llm-jp-1.3b-v1.0
26
  parameters:
27
  weight: 0.5
28
  merge_method: linear
config.json CHANGED
@@ -16,7 +16,7 @@
16
  "n_embd": 2048,
17
  "n_head": 16,
18
  "n_inner": 8192,
19
- "n_layer": 22,
20
  "n_positions": 2048,
21
  "reorder_and_upcast_attn": false,
22
  "resid_pdrop": 0.1,
@@ -27,7 +27,7 @@
27
  "summary_proj_to_labels": true,
28
  "summary_type": "cls_index",
29
  "summary_use_proj": true,
30
- "torch_dtype": "bfloat16",
31
  "transformers_version": "4.41.2",
32
  "use_cache": true,
33
  "vocab_size": 50688
 
16
  "n_embd": 2048,
17
  "n_head": 16,
18
  "n_inner": 8192,
19
+ "n_layer": 24,
20
  "n_positions": 2048,
21
  "reorder_and_upcast_attn": false,
22
  "resid_pdrop": 0.1,
 
27
  "summary_proj_to_labels": true,
28
  "summary_type": "cls_index",
29
  "summary_use_proj": true,
30
+ "torch_dtype": "float16",
31
  "transformers_version": "4.41.2",
32
  "use_cache": true,
33
  "vocab_size": 50688
mergekit_config.yml CHANGED
@@ -1,17 +1,10 @@
1
 
2
- slices:
3
- - sources:
4
- - model: llm-jp/llm-jp-1.3b-v1.0
5
- layer_range: [0, 22]
6
- - model: llm-jp/llm-jp-1.3b-v1.0
7
- layer_range: [0, 22]
8
- merge_method: slerp
9
- base_model: llm-jp/llm-jp-1.3b-v1.0
10
- parameters:
11
- t:
12
- - filter: self_attn
13
- value: [0, 0.5, 0.3, 0.7, 1]
14
- - filter: mlp
15
- value: [1, 0.5, 0.7, 0.3, 0]
16
- - value: 0.5
17
- dtype: bfloat16
 
1
 
2
+ models:
3
+ - model: llm-jp/llm-jp-1.3b-v1.0
4
+ parameters:
5
+ weight: 0.5
6
+ - model: llm-jp/llm-jp-1.3b-v1.0
7
+ parameters:
8
+ weight: 0.5
9
+ merge_method: linear
10
+ dtype: float16
 
 
 
 
 
 
 
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d43b2cddea2c8404cd734113d21927f673ae84eae584304b1521a8fb05cfd4e1
3
- size 973623440
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03388c67f4332f079a7dda8beb61739340414ac249cfbc7cb3903b63f6837cdd
3
+ size 973623320
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29f48914f0f2bf6ccc95d053bbbe7924cd8aca5b841abafd9070cd2ebcc147db
3
- size 973619208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:126a50fd40f9f0029d965abebd01582597d002ee3ed1eb3f08b09b01fd9e5478
3
+ size 973619112
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c555038c07f4bcb87eaaa17836ec330b861382898a1aaa889ec09386a521c9f
3
- size 484564112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:915ae68df4d16804e481e02ac58c8c81136fa42f055802f498bd7f02c42cf399
3
+ size 685999640
model.safetensors.index.json CHANGED
@@ -1 +1 @@
1
- {"metadata": {"mergekit_version": "0.0.4.2", "total_size": 2431778816}, "weight_map": {"transformer.h.0.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.0.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.0.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.0.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.0.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.0.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.0.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.0.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.1.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.1.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.1.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.1.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.10.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.10.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.10.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.10.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.11.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.11.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.11.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.11.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.12.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.12.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.12.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.12.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.13.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.13.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.13.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.13.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.14.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.14.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.14.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.14.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.15.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.15.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.15.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.15.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.16.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.16.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.16.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.16.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.17.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.17.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.17.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.17.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.18.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.18.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.18.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.18.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.19.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.19.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.19.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.19.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.2.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.2.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.2.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.2.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.20.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.20.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.20.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.20.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.21.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.21.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.21.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.21.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.3.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.3.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.3.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.3.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.4.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.4.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.4.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.4.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.5.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.5.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.5.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.5.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.5.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.5.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.5.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.5.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.6.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.6.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.6.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.6.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.6.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.6.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.6.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.6.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.6.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.6.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.6.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.6.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.7.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.7.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.7.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.7.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.7.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.7.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.7.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.7.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.7.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.7.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_attn.bias": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_attn.weight": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.8.ln_1.bias": "model-00003-of-00003.safetensors", "transformer.h.8.ln_1.weight": "model-00003-of-00003.safetensors", "transformer.h.8.ln_2.bias": "model-00003-of-00003.safetensors", "transformer.h.8.ln_2.weight": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_fc.bias": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_attn.bias": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_attn.weight": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.9.ln_1.bias": "model-00003-of-00003.safetensors", "transformer.h.9.ln_1.weight": "model-00003-of-00003.safetensors", "transformer.h.9.ln_2.bias": "model-00003-of-00003.safetensors", "transformer.h.9.ln_2.weight": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_fc.bias": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.ln_f.bias": "model-00003-of-00003.safetensors", "transformer.ln_f.weight": "model-00003-of-00003.safetensors", "transformer.wpe.weight": "model-00003-of-00003.safetensors", "transformer.wte.weight": "model-00003-of-00003.safetensors"}}
 
1
+ {"metadata": {"mergekit_version": "0.0.4.2", "total_size": 2633211904}, "weight_map": {"transformer.h.0.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.0.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.0.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.0.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.0.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.0.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.0.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.0.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.0.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.1.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.1.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.1.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.1.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.1.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.1.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.10.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.10.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.10.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.10.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.10.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.10.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.11.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.11.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.11.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.11.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.11.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.11.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.12.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.12.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.12.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.12.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.12.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.12.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.13.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.13.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.13.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.13.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.13.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.13.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.14.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.14.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.14.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.14.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.14.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.14.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.15.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.15.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.15.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.15.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.15.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.15.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.16.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.16.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.16.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.16.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.16.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.16.mlp.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_attn.bias": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_attn.weight": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.17.attn.c_proj.weight": "model-00001-of-00003.safetensors", "transformer.h.17.ln_1.bias": "model-00001-of-00003.safetensors", "transformer.h.17.ln_1.weight": "model-00001-of-00003.safetensors", "transformer.h.17.ln_2.bias": "model-00001-of-00003.safetensors", "transformer.h.17.ln_2.weight": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_fc.bias": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_fc.weight": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_proj.bias": "model-00001-of-00003.safetensors", "transformer.h.17.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.18.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.18.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.18.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.18.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.18.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.18.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.19.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.19.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.19.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.19.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.19.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.19.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.2.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.2.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.2.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.2.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.2.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.2.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.20.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.20.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.20.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.20.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.20.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.20.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.21.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.21.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.21.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.21.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.21.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.21.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.22.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.22.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.22.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.22.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.22.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.22.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.22.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.22.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.22.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.22.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.22.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.22.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.23.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.23.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.23.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.23.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.23.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.23.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.23.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.23.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.23.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.23.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.23.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.23.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.3.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.3.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.3.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.3.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.3.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.3.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.4.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.4.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.4.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.4.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.4.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_fc.weight": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.4.mlp.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_attn.bias": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_attn.weight": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_proj.bias": "model-00002-of-00003.safetensors", "transformer.h.5.attn.c_proj.weight": "model-00002-of-00003.safetensors", "transformer.h.5.ln_1.bias": "model-00002-of-00003.safetensors", "transformer.h.5.ln_1.weight": "model-00002-of-00003.safetensors", "transformer.h.5.ln_2.bias": "model-00002-of-00003.safetensors", "transformer.h.5.ln_2.weight": "model-00002-of-00003.safetensors", "transformer.h.5.mlp.c_fc.bias": "model-00002-of-00003.safetensors", "transformer.h.5.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.5.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.5.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.6.attn.c_attn.bias": "model-00003-of-00003.safetensors", "transformer.h.6.attn.c_attn.weight": "model-00003-of-00003.safetensors", "transformer.h.6.attn.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.6.attn.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.6.ln_1.bias": "model-00003-of-00003.safetensors", "transformer.h.6.ln_1.weight": "model-00003-of-00003.safetensors", "transformer.h.6.ln_2.bias": "model-00003-of-00003.safetensors", "transformer.h.6.ln_2.weight": "model-00003-of-00003.safetensors", "transformer.h.6.mlp.c_fc.bias": "model-00003-of-00003.safetensors", "transformer.h.6.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.6.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.6.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.7.attn.c_attn.bias": "model-00003-of-00003.safetensors", "transformer.h.7.attn.c_attn.weight": "model-00003-of-00003.safetensors", "transformer.h.7.attn.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.7.attn.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.7.ln_1.bias": "model-00003-of-00003.safetensors", "transformer.h.7.ln_1.weight": "model-00003-of-00003.safetensors", "transformer.h.7.ln_2.bias": "model-00003-of-00003.safetensors", "transformer.h.7.ln_2.weight": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.c_fc.bias": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.7.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_attn.bias": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_attn.weight": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.8.attn.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.8.ln_1.bias": "model-00003-of-00003.safetensors", "transformer.h.8.ln_1.weight": "model-00003-of-00003.safetensors", "transformer.h.8.ln_2.bias": "model-00003-of-00003.safetensors", "transformer.h.8.ln_2.weight": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_fc.bias": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.8.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_attn.bias": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_attn.weight": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.9.attn.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.h.9.ln_1.bias": "model-00003-of-00003.safetensors", "transformer.h.9.ln_1.weight": "model-00003-of-00003.safetensors", "transformer.h.9.ln_2.bias": "model-00003-of-00003.safetensors", "transformer.h.9.ln_2.weight": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_fc.bias": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_fc.weight": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_proj.bias": "model-00003-of-00003.safetensors", "transformer.h.9.mlp.c_proj.weight": "model-00003-of-00003.safetensors", "transformer.ln_f.bias": "model-00003-of-00003.safetensors", "transformer.ln_f.weight": "model-00003-of-00003.safetensors", "transformer.wpe.weight": "model-00003-of-00003.safetensors", "transformer.wte.weight": "model-00003-of-00003.safetensors"}}