davisrbr commited on
Commit
7855692
1 Parent(s): 47d095e

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -20,7 +20,7 @@
20
  "rope_theta": 1000000.0,
21
  "sliding_window": null,
22
  "tie_word_embeddings": true,
23
- "torch_dtype": "float32",
24
  "transformers_version": "4.44.0",
25
  "use_cache": false,
26
  "use_sliding_window": false,
 
20
  "rope_theta": 1000000.0,
21
  "sliding_window": null,
22
  "tie_word_embeddings": true,
23
+ "torch_dtype": "bfloat16",
24
  "transformers_version": "4.44.0",
25
  "use_cache": false,
26
  "use_sliding_window": false,
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34eb00fa0644fb1e53827256ff290d1e0f220d894d74bb33fbd4e883766ced5a
3
- size 4996670464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:861c2ec5ce8f32ebb9be60a0d113a088b8bcc2ddbea4cd94f84d3826c9931b1c
3
+ size 2377258952
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efbcfe665bf0fbc742beb0bf7b7bb878ed0eb561a5bda0af0281f96728aa66e5
3
- size 1178224960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2473ba39dcd5aa702746fc6fde5d532e3a7c59319c9f511d0e70701883219fd
3
+ size 710208072
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 6174857216
4
  },
5
  "weight_map": {
6
  "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
@@ -160,11 +160,11 @@
160
  "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
161
  "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
162
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
163
- "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
164
- "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
165
  "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
166
- "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
167
- "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
168
  "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
169
  "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
170
  "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
@@ -174,87 +174,87 @@
174
  "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
175
  "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
176
  "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
177
- "model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
178
- "model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
179
  "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
180
  "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
181
- "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
182
- "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
183
  "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
184
- "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
185
  "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
186
- "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
187
  "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
188
  "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
189
  "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
190
  "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
191
  "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
192
- "model.layers.22.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
193
  "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
194
  "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
195
- "model.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
196
  "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
197
- "model.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
198
  "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
199
  "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
200
  "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
201
  "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
202
  "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
203
  "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
204
- "model.layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
205
  "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
206
  "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
207
- "model.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
208
  "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
209
- "model.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
210
  "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
211
  "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
212
  "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
213
  "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
214
  "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
215
  "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
216
- "model.layers.24.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
217
  "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
218
  "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
219
- "model.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
220
  "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
221
- "model.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
222
  "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
223
  "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
224
  "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
225
  "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
226
  "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
227
  "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
228
- "model.layers.25.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
229
  "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
230
  "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
231
- "model.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
232
  "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
233
- "model.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
234
  "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
235
  "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
236
  "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
237
  "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
238
  "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
239
  "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
240
- "model.layers.26.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
241
  "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
242
  "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
243
- "model.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
244
  "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
245
- "model.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
246
  "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
247
  "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
248
  "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
249
  "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
250
  "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
251
  "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
252
- "model.layers.27.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
253
  "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
254
  "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
255
- "model.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
256
  "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
257
- "model.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
258
  "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
259
  "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
260
  "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 5707995136
4
  },
5
  "weight_map": {
6
  "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
 
160
  "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
161
  "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
162
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
163
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
164
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
165
  "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
166
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
167
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
168
  "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
169
  "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
170
  "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
 
174
  "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
175
  "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
176
  "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
177
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
178
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
179
  "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
180
  "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
181
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
182
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
183
  "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
184
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
185
  "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
186
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
187
  "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
188
  "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
189
  "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
190
  "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
191
  "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
192
+ "model.layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
193
  "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
194
  "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
195
+ "model.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
196
  "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
197
+ "model.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
198
  "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
199
  "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
200
  "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
201
  "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
202
  "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
203
  "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
204
+ "model.layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
205
  "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
206
  "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
207
+ "model.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
208
  "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
209
+ "model.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
210
  "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
211
  "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
212
  "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
213
  "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
214
  "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
215
  "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
216
+ "model.layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
217
  "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
218
  "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
219
+ "model.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
220
  "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
221
+ "model.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
222
  "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
223
  "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
224
  "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
225
  "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
226
  "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
227
  "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
228
+ "model.layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
229
  "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
230
  "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
231
+ "model.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
232
  "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
233
+ "model.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
234
  "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
235
  "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
236
  "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
237
  "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
238
  "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
239
  "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
240
+ "model.layers.26.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
241
  "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
242
  "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
243
+ "model.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
244
  "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
245
+ "model.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
246
  "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
247
  "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
248
  "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
249
  "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
250
  "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
251
  "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
252
+ "model.layers.27.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
253
  "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
254
  "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
255
+ "model.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
256
  "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
257
+ "model.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
258
  "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
259
  "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
260
  "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {