voidful commited on
Commit
ce836e6
1 Parent(s): 58a8c96

Upload MllamaForCausalLM

Browse files
config.json CHANGED
@@ -38,5 +38,5 @@
38
  "torch_dtype": "float32",
39
  "transformers_version": "4.45.2",
40
  "use_cache": true,
41
- "vocab_size": 128257
42
  }
 
38
  "torch_dtype": "float32",
39
  "transformers_version": "4.45.2",
40
  "use_cache": true,
41
+ "vocab_size": 51868
42
  }
model-00001-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bde7afd4ba15082421019362e80997bae4d20150b0a6196cfd82cd475a1f2cc9
3
- size 4886500512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f6a627fdd24cd25838cf054ae392a4734696bd582a63073e22580f2ae5e8f60
3
+ size 4977137944
model-00002-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03a19a2573e7c827d01e42e814ab8b91c19f12b0661969cc926038afb09d8f22
3
- size 4832008928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e192b8eb2c04f2b649a6911d9cb948d98e1367198098f93dc7d5a177dccd25f
3
+ size 4999814552
model-00003-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bae5faddb113d7637c7a6adb70da821554fda3f359f52067db7d103986f3b5c3
3
- size 4999798088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:708d4413c6a4487e7ef0e869ab2f705714ba440dd6359ee3c46eb27c909550e1
3
+ size 4999814600
model-00004-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:112df2b7cebda00a941e9ea288cfd65b181d4cd4cd809ac7594c8b404fa9a160
3
- size 4999814600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42b2160d24b8c9aef3e5ee63fb9d3ca9473980ed691cb263e62ebc2a7b862332
3
+ size 4832008968
model-00005-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15b0f48c3c03cdf2b94959383ccf6513e69020230243065f036110ed9bd138eb
3
- size 4832008968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80b3eeb954da9f52b7e1e13511b6d1cadf559eb51c088041181d5e71e758da4c
3
+ size 4999814592
model-00006-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15fb95f1fd07d3ab2799ef64b019aff71c9f2f558f18c4a0d0c2bbcb7e458cd1
3
- size 4999814592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:358b440e787e6c58174c7a8e21d6903d9a3c1e74371314f94a90e871519fc8f8
3
+ size 4999832552
model-00007-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35316e26ce47cf3248e70bf79f6f644ee478463d793a8ac6557602b765958ac9
3
- size 4999814600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a774aff2e5c4df794b60d34c02394017475e2c872e8ccb1f7f28d92e1ed0d084
3
+ size 4832008976
model-00008-of-00008.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df4db01e76b394d9fa3bb4c91555aa8eed077b50997a30053678c0ea00f4a77d
3
- size 4550955256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:163864c5c1c8e804ceb8f0d86cc74a694ada89e2d5e511b1f2ef6937f3b8beeb
3
+ size 1957168600
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 39100670016
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00008-of-00008.safetensors",
@@ -25,13 +25,13 @@
25
  "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
26
  "model.layers.10.input_layernorm.weight": "model-00003-of-00008.safetensors",
27
  "model.layers.10.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
28
- "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
29
  "model.layers.10.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
30
  "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
31
- "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
32
- "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
33
- "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
34
- "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
35
  "model.layers.11.input_layernorm.weight": "model-00003-of-00008.safetensors",
36
  "model.layers.11.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
37
  "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
@@ -63,33 +63,33 @@
63
  "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
64
  "model.layers.13.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
65
  "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
66
- "model.layers.14.input_layernorm.weight": "model-00004-of-00008.safetensors",
67
- "model.layers.14.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
68
  "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
69
- "model.layers.14.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
70
- "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
71
  "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
72
  "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
73
  "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
74
  "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
75
- "model.layers.15.input_layernorm.weight": "model-00004-of-00008.safetensors",
76
- "model.layers.15.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
77
- "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
78
- "model.layers.15.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
79
- "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
80
- "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
81
- "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
82
- "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
83
- "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
84
  "model.layers.16.input_layernorm.weight": "model-00004-of-00008.safetensors",
85
  "model.layers.16.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
86
  "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
87
  "model.layers.16.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
88
  "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
89
- "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
90
- "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
91
- "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
92
- "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
93
  "model.layers.17.input_layernorm.weight": "model-00004-of-00008.safetensors",
94
  "model.layers.17.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
95
  "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
@@ -130,24 +130,24 @@
130
  "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
131
  "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
132
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
133
- "model.layers.20.input_layernorm.weight": "model-00005-of-00008.safetensors",
134
- "model.layers.20.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
135
- "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
136
- "model.layers.20.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
137
- "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
138
  "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
139
  "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
140
  "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
141
  "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
142
  "model.layers.21.input_layernorm.weight": "model-00005-of-00008.safetensors",
143
  "model.layers.21.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
144
- "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
145
- "model.layers.21.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
146
  "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
147
- "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
148
- "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
149
- "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
150
- "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
151
  "model.layers.22.input_layernorm.weight": "model-00005-of-00008.safetensors",
152
  "model.layers.22.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
153
  "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
@@ -179,33 +179,33 @@
179
  "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
180
  "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
181
  "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
182
- "model.layers.25.input_layernorm.weight": "model-00006-of-00008.safetensors",
183
- "model.layers.25.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
184
  "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
185
  "model.layers.25.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
186
- "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
187
  "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
188
  "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
189
  "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
190
  "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
191
- "model.layers.26.input_layernorm.weight": "model-00006-of-00008.safetensors",
192
- "model.layers.26.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
193
- "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
194
- "model.layers.26.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
195
- "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
196
- "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
197
- "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
198
- "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
199
- "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
200
  "model.layers.27.input_layernorm.weight": "model-00006-of-00008.safetensors",
201
  "model.layers.27.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
202
- "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
203
  "model.layers.27.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
204
  "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
205
- "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
206
- "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
207
- "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
208
- "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
209
  "model.layers.28.cross_attn.k_norm.weight": "model-00006-of-00008.safetensors",
210
  "model.layers.28.cross_attn.k_proj.weight": "model-00006-of-00008.safetensors",
211
  "model.layers.28.cross_attn.o_proj.weight": "model-00006-of-00008.safetensors",
@@ -237,10 +237,10 @@
237
  "model.layers.3.cross_attn_attn_gate": "model-00001-of-00008.safetensors",
238
  "model.layers.3.cross_attn_mlp_gate": "model-00001-of-00008.safetensors",
239
  "model.layers.3.input_layernorm.weight": "model-00001-of-00008.safetensors",
240
- "model.layers.3.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
241
- "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
242
- "model.layers.3.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
243
- "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
244
  "model.layers.30.input_layernorm.weight": "model-00006-of-00008.safetensors",
245
  "model.layers.30.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
246
  "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
@@ -250,33 +250,33 @@
250
  "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
251
  "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
252
  "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
253
- "model.layers.31.input_layernorm.weight": "model-00007-of-00008.safetensors",
254
- "model.layers.31.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
255
  "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
256
- "model.layers.31.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
257
- "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
258
  "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
259
  "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
260
  "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
261
  "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
262
- "model.layers.32.input_layernorm.weight": "model-00007-of-00008.safetensors",
263
- "model.layers.32.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
264
- "model.layers.32.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
265
- "model.layers.32.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
266
- "model.layers.32.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
267
- "model.layers.32.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
268
- "model.layers.32.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
269
- "model.layers.32.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
270
- "model.layers.32.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
271
- "model.layers.33.cross_attn.k_norm.weight": "model-00007-of-00008.safetensors",
272
- "model.layers.33.cross_attn.k_proj.weight": "model-00007-of-00008.safetensors",
273
- "model.layers.33.cross_attn.o_proj.weight": "model-00007-of-00008.safetensors",
274
- "model.layers.33.cross_attn.q_norm.weight": "model-00007-of-00008.safetensors",
275
- "model.layers.33.cross_attn.q_proj.weight": "model-00007-of-00008.safetensors",
276
- "model.layers.33.cross_attn.v_proj.weight": "model-00007-of-00008.safetensors",
277
- "model.layers.33.cross_attn_attn_gate": "model-00007-of-00008.safetensors",
278
- "model.layers.33.cross_attn_mlp_gate": "model-00007-of-00008.safetensors",
279
- "model.layers.33.input_layernorm.weight": "model-00007-of-00008.safetensors",
280
  "model.layers.33.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
281
  "model.layers.33.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
282
  "model.layers.33.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
@@ -308,27 +308,27 @@
308
  "model.layers.36.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
309
  "model.layers.36.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
310
  "model.layers.36.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
311
- "model.layers.37.input_layernorm.weight": "model-00008-of-00008.safetensors",
312
- "model.layers.37.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
313
- "model.layers.37.mlp.gate_proj.weight": "model-00008-of-00008.safetensors",
314
- "model.layers.37.mlp.up_proj.weight": "model-00008-of-00008.safetensors",
315
- "model.layers.37.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
316
  "model.layers.37.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
317
  "model.layers.37.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
318
  "model.layers.37.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
319
  "model.layers.37.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
320
- "model.layers.38.cross_attn.k_norm.weight": "model-00008-of-00008.safetensors",
321
- "model.layers.38.cross_attn.k_proj.weight": "model-00008-of-00008.safetensors",
322
- "model.layers.38.cross_attn.o_proj.weight": "model-00008-of-00008.safetensors",
323
- "model.layers.38.cross_attn.q_norm.weight": "model-00008-of-00008.safetensors",
324
- "model.layers.38.cross_attn.q_proj.weight": "model-00008-of-00008.safetensors",
325
- "model.layers.38.cross_attn.v_proj.weight": "model-00008-of-00008.safetensors",
326
- "model.layers.38.cross_attn_attn_gate": "model-00008-of-00008.safetensors",
327
- "model.layers.38.cross_attn_mlp_gate": "model-00008-of-00008.safetensors",
328
- "model.layers.38.input_layernorm.weight": "model-00008-of-00008.safetensors",
329
  "model.layers.38.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
330
- "model.layers.38.mlp.gate_proj.weight": "model-00008-of-00008.safetensors",
331
- "model.layers.38.mlp.up_proj.weight": "model-00008-of-00008.safetensors",
332
  "model.layers.38.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
333
  "model.layers.39.input_layernorm.weight": "model-00008-of-00008.safetensors",
334
  "model.layers.39.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
@@ -341,13 +341,13 @@
341
  "model.layers.39.self_attn.v_proj.weight": "model-00008-of-00008.safetensors",
342
  "model.layers.4.input_layernorm.weight": "model-00002-of-00008.safetensors",
343
  "model.layers.4.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
344
- "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
345
- "model.layers.4.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
346
  "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
347
- "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
348
- "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
349
- "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
350
- "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
351
  "model.layers.5.input_layernorm.weight": "model-00002-of-00008.safetensors",
352
  "model.layers.5.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
353
  "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
@@ -384,19 +384,19 @@
384
  "model.layers.8.cross_attn_attn_gate": "model-00002-of-00008.safetensors",
385
  "model.layers.8.cross_attn_mlp_gate": "model-00002-of-00008.safetensors",
386
  "model.layers.8.input_layernorm.weight": "model-00002-of-00008.safetensors",
387
- "model.layers.8.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
388
  "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
389
  "model.layers.8.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
390
- "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
391
- "model.layers.9.input_layernorm.weight": "model-00003-of-00008.safetensors",
392
- "model.layers.9.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
393
- "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
394
- "model.layers.9.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
395
- "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
396
- "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
397
- "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
398
- "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
399
- "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
400
  "model.norm.weight": "model-00008-of-00008.safetensors"
401
  }
402
  }
 
1
  {
2
  "metadata": {
3
+ "total_size": 36597555264
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00008-of-00008.safetensors",
 
25
  "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
26
  "model.layers.10.input_layernorm.weight": "model-00003-of-00008.safetensors",
27
  "model.layers.10.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
29
  "model.layers.10.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
30
  "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
35
  "model.layers.11.input_layernorm.weight": "model-00003-of-00008.safetensors",
36
  "model.layers.11.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
37
  "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
 
63
  "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
64
  "model.layers.13.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
65
  "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
66
+ "model.layers.14.input_layernorm.weight": "model-00003-of-00008.safetensors",
67
+ "model.layers.14.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
68
  "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
69
+ "model.layers.14.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
70
+ "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
71
  "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
72
  "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
73
  "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
74
  "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
75
+ "model.layers.15.input_layernorm.weight": "model-00003-of-00008.safetensors",
76
+ "model.layers.15.mlp.down_proj.weight": "model-00003-of-00008.safetensors",
77
+ "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00008.safetensors",
78
+ "model.layers.15.mlp.up_proj.weight": "model-00003-of-00008.safetensors",
79
+ "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00008.safetensors",
80
+ "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
81
+ "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
82
+ "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
83
+ "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
84
  "model.layers.16.input_layernorm.weight": "model-00004-of-00008.safetensors",
85
  "model.layers.16.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
86
  "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
87
  "model.layers.16.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
88
  "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
89
+ "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00008.safetensors",
90
+ "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00008.safetensors",
91
+ "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00008.safetensors",
92
+ "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00008.safetensors",
93
  "model.layers.17.input_layernorm.weight": "model-00004-of-00008.safetensors",
94
  "model.layers.17.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
95
  "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
 
130
  "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
131
  "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
132
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
133
+ "model.layers.20.input_layernorm.weight": "model-00004-of-00008.safetensors",
134
+ "model.layers.20.mlp.down_proj.weight": "model-00004-of-00008.safetensors",
135
+ "model.layers.20.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
136
+ "model.layers.20.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
137
+ "model.layers.20.post_attention_layernorm.weight": "model-00004-of-00008.safetensors",
138
  "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
139
  "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
140
  "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
141
  "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
142
  "model.layers.21.input_layernorm.weight": "model-00005-of-00008.safetensors",
143
  "model.layers.21.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
144
+ "model.layers.21.mlp.gate_proj.weight": "model-00004-of-00008.safetensors",
145
+ "model.layers.21.mlp.up_proj.weight": "model-00004-of-00008.safetensors",
146
  "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
147
+ "model.layers.21.self_attn.k_proj.weight": "model-00004-of-00008.safetensors",
148
+ "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00008.safetensors",
149
+ "model.layers.21.self_attn.q_proj.weight": "model-00004-of-00008.safetensors",
150
+ "model.layers.21.self_attn.v_proj.weight": "model-00004-of-00008.safetensors",
151
  "model.layers.22.input_layernorm.weight": "model-00005-of-00008.safetensors",
152
  "model.layers.22.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
153
  "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
 
179
  "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
180
  "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
181
  "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
182
+ "model.layers.25.input_layernorm.weight": "model-00005-of-00008.safetensors",
183
+ "model.layers.25.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
184
  "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
185
  "model.layers.25.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
186
+ "model.layers.25.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
187
  "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
188
  "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
189
  "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
190
  "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
191
+ "model.layers.26.input_layernorm.weight": "model-00005-of-00008.safetensors",
192
+ "model.layers.26.mlp.down_proj.weight": "model-00005-of-00008.safetensors",
193
+ "model.layers.26.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
194
+ "model.layers.26.mlp.up_proj.weight": "model-00005-of-00008.safetensors",
195
+ "model.layers.26.post_attention_layernorm.weight": "model-00005-of-00008.safetensors",
196
+ "model.layers.26.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
197
+ "model.layers.26.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
198
+ "model.layers.26.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
199
+ "model.layers.26.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
200
  "model.layers.27.input_layernorm.weight": "model-00006-of-00008.safetensors",
201
  "model.layers.27.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
202
+ "model.layers.27.mlp.gate_proj.weight": "model-00005-of-00008.safetensors",
203
  "model.layers.27.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
204
  "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
205
+ "model.layers.27.self_attn.k_proj.weight": "model-00005-of-00008.safetensors",
206
+ "model.layers.27.self_attn.o_proj.weight": "model-00005-of-00008.safetensors",
207
+ "model.layers.27.self_attn.q_proj.weight": "model-00005-of-00008.safetensors",
208
+ "model.layers.27.self_attn.v_proj.weight": "model-00005-of-00008.safetensors",
209
  "model.layers.28.cross_attn.k_norm.weight": "model-00006-of-00008.safetensors",
210
  "model.layers.28.cross_attn.k_proj.weight": "model-00006-of-00008.safetensors",
211
  "model.layers.28.cross_attn.o_proj.weight": "model-00006-of-00008.safetensors",
 
237
  "model.layers.3.cross_attn_attn_gate": "model-00001-of-00008.safetensors",
238
  "model.layers.3.cross_attn_mlp_gate": "model-00001-of-00008.safetensors",
239
  "model.layers.3.input_layernorm.weight": "model-00001-of-00008.safetensors",
240
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00008.safetensors",
241
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
242
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
243
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00008.safetensors",
244
  "model.layers.30.input_layernorm.weight": "model-00006-of-00008.safetensors",
245
  "model.layers.30.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
246
  "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
 
250
  "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
251
  "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
252
  "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
253
+ "model.layers.31.input_layernorm.weight": "model-00006-of-00008.safetensors",
254
+ "model.layers.31.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
255
  "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
256
+ "model.layers.31.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
257
+ "model.layers.31.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
258
  "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
259
  "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
260
  "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
261
  "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
262
+ "model.layers.32.input_layernorm.weight": "model-00006-of-00008.safetensors",
263
+ "model.layers.32.mlp.down_proj.weight": "model-00006-of-00008.safetensors",
264
+ "model.layers.32.mlp.gate_proj.weight": "model-00006-of-00008.safetensors",
265
+ "model.layers.32.mlp.up_proj.weight": "model-00006-of-00008.safetensors",
266
+ "model.layers.32.post_attention_layernorm.weight": "model-00006-of-00008.safetensors",
267
+ "model.layers.32.self_attn.k_proj.weight": "model-00006-of-00008.safetensors",
268
+ "model.layers.32.self_attn.o_proj.weight": "model-00006-of-00008.safetensors",
269
+ "model.layers.32.self_attn.q_proj.weight": "model-00006-of-00008.safetensors",
270
+ "model.layers.32.self_attn.v_proj.weight": "model-00006-of-00008.safetensors",
271
+ "model.layers.33.cross_attn.k_norm.weight": "model-00006-of-00008.safetensors",
272
+ "model.layers.33.cross_attn.k_proj.weight": "model-00006-of-00008.safetensors",
273
+ "model.layers.33.cross_attn.o_proj.weight": "model-00006-of-00008.safetensors",
274
+ "model.layers.33.cross_attn.q_norm.weight": "model-00006-of-00008.safetensors",
275
+ "model.layers.33.cross_attn.q_proj.weight": "model-00006-of-00008.safetensors",
276
+ "model.layers.33.cross_attn.v_proj.weight": "model-00006-of-00008.safetensors",
277
+ "model.layers.33.cross_attn_attn_gate": "model-00006-of-00008.safetensors",
278
+ "model.layers.33.cross_attn_mlp_gate": "model-00006-of-00008.safetensors",
279
+ "model.layers.33.input_layernorm.weight": "model-00006-of-00008.safetensors",
280
  "model.layers.33.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
281
  "model.layers.33.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
282
  "model.layers.33.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
 
308
  "model.layers.36.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
309
  "model.layers.36.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
310
  "model.layers.36.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
311
+ "model.layers.37.input_layernorm.weight": "model-00007-of-00008.safetensors",
312
+ "model.layers.37.mlp.down_proj.weight": "model-00007-of-00008.safetensors",
313
+ "model.layers.37.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
314
+ "model.layers.37.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
315
+ "model.layers.37.post_attention_layernorm.weight": "model-00007-of-00008.safetensors",
316
  "model.layers.37.self_attn.k_proj.weight": "model-00007-of-00008.safetensors",
317
  "model.layers.37.self_attn.o_proj.weight": "model-00007-of-00008.safetensors",
318
  "model.layers.37.self_attn.q_proj.weight": "model-00007-of-00008.safetensors",
319
  "model.layers.37.self_attn.v_proj.weight": "model-00007-of-00008.safetensors",
320
+ "model.layers.38.cross_attn.k_norm.weight": "model-00007-of-00008.safetensors",
321
+ "model.layers.38.cross_attn.k_proj.weight": "model-00007-of-00008.safetensors",
322
+ "model.layers.38.cross_attn.o_proj.weight": "model-00007-of-00008.safetensors",
323
+ "model.layers.38.cross_attn.q_norm.weight": "model-00007-of-00008.safetensors",
324
+ "model.layers.38.cross_attn.q_proj.weight": "model-00007-of-00008.safetensors",
325
+ "model.layers.38.cross_attn.v_proj.weight": "model-00007-of-00008.safetensors",
326
+ "model.layers.38.cross_attn_attn_gate": "model-00007-of-00008.safetensors",
327
+ "model.layers.38.cross_attn_mlp_gate": "model-00007-of-00008.safetensors",
328
+ "model.layers.38.input_layernorm.weight": "model-00007-of-00008.safetensors",
329
  "model.layers.38.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
330
+ "model.layers.38.mlp.gate_proj.weight": "model-00007-of-00008.safetensors",
331
+ "model.layers.38.mlp.up_proj.weight": "model-00007-of-00008.safetensors",
332
  "model.layers.38.post_attention_layernorm.weight": "model-00008-of-00008.safetensors",
333
  "model.layers.39.input_layernorm.weight": "model-00008-of-00008.safetensors",
334
  "model.layers.39.mlp.down_proj.weight": "model-00008-of-00008.safetensors",
 
341
  "model.layers.39.self_attn.v_proj.weight": "model-00008-of-00008.safetensors",
342
  "model.layers.4.input_layernorm.weight": "model-00002-of-00008.safetensors",
343
  "model.layers.4.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
344
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00008.safetensors",
345
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00008.safetensors",
346
  "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
347
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00008.safetensors",
348
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00008.safetensors",
349
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00008.safetensors",
350
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00008.safetensors",
351
  "model.layers.5.input_layernorm.weight": "model-00002-of-00008.safetensors",
352
  "model.layers.5.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
353
  "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
 
384
  "model.layers.8.cross_attn_attn_gate": "model-00002-of-00008.safetensors",
385
  "model.layers.8.cross_attn_mlp_gate": "model-00002-of-00008.safetensors",
386
  "model.layers.8.input_layernorm.weight": "model-00002-of-00008.safetensors",
387
+ "model.layers.8.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
388
  "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
389
  "model.layers.8.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
390
+ "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
391
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00008.safetensors",
392
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00008.safetensors",
393
+ "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00008.safetensors",
394
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00008.safetensors",
395
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00008.safetensors",
396
+ "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00008.safetensors",
397
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00008.safetensors",
398
+ "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00008.safetensors",
399
+ "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00008.safetensors",
400
  "model.norm.weight": "model-00008-of-00008.safetensors"
401
  }
402
  }