llm-slayer commited on
Commit
4db248d
1 Parent(s): d401ed5

add weights and chat config

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
mlc-chat-config.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "llama",
3
+ "quantization": "q0f16",
4
+ "model_config": {
5
+ "hidden_size": 2048,
6
+ "intermediate_size": 5504,
7
+ "num_attention_heads": 16,
8
+ "num_hidden_layers": 24,
9
+ "rms_norm_eps": 1e-05,
10
+ "vocab_size": 32002,
11
+ "position_embedding_base": 10000.0,
12
+ "context_window_size": 2048,
13
+ "prefill_chunk_size": 2048,
14
+ "num_key_value_heads": 16,
15
+ "head_dim": 128,
16
+ "tensor_parallel_shards": 1,
17
+ "max_batch_size": 80
18
+ },
19
+ "vocab_size": 32002,
20
+ "context_window_size": 2048,
21
+ "sliding_window_size": -1,
22
+ "prefill_chunk_size": 2048,
23
+ "attention_sink_size": -1,
24
+ "tensor_parallel_shards": 1,
25
+ "mean_gen_len": 128,
26
+ "max_gen_len": 512,
27
+ "shift_fill_factor": 0.3,
28
+ "temperature": 0.3,
29
+ "presence_penalty": 0.0,
30
+ "frequency_penalty": 0.0,
31
+ "repetition_penalty": 1.05,
32
+ "top_p": 0.9,
33
+ "conv_template": {
34
+ "name": "croissantllm",
35
+ "system_template": "<s>",
36
+ "system_message": "",
37
+ "add_role_after_system_message": true,
38
+ "roles": {
39
+ "user": "<|im_start|>user",
40
+ "assistant": "<|im_start|>assistant"
41
+ },
42
+ "role_templates": {
43
+ "user": "{user_message}",
44
+ "assistant": "{assistant_message}",
45
+ "tool": "{tool_message}"
46
+ },
47
+ "messages": [],
48
+ "seps": [
49
+ "<|im_end|>\n"
50
+ ],
51
+ "role_content_sep": "\n",
52
+ "role_empty_sep": "\n",
53
+ "stop_str": [
54
+ "<|im_end|>",
55
+ "</s>"
56
+ ],
57
+ "stop_token_ids": [
58
+ 32000,
59
+ 2
60
+ ],
61
+ "function_string": "",
62
+ "use_function_calling": false
63
+ },
64
+ "pad_token_id": 2,
65
+ "bos_token_id": 1,
66
+ "eos_token_id": 2,
67
+ "tokenizer_files": [
68
+ "tokenizer.json",
69
+ "tokenizer_config.json"
70
+ ],
71
+ "version": "0.1.0"
72
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,2185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 147,
4
+ "ParamBytes": 2690863104.0,
5
+ "BitsPerParam": 16.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 131080192,
12
+ "records": [
13
+ {
14
+ "name": "lm_head.weight",
15
+ "shape": [
16
+ 32002,
17
+ 2048
18
+ ],
19
+ "dtype": "float16",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 131080192,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "9f9e289791e66b5f020ed3adfd9733c5"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 45088768,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.23.mlp.gate_up_proj.weight",
34
+ "shape": [
35
+ 11008,
36
+ 2048
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 45088768,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "4d8a064245fdf9d5527440d3758c80fa"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 131080192,
50
+ "records": [
51
+ {
52
+ "name": "model.embed_tokens.weight",
53
+ "shape": [
54
+ 32002,
55
+ 2048
56
+ ],
57
+ "dtype": "float16",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 131080192,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "b1d54ac38b97b992d841f2d125696f6c"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 22544384,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.0.mlp.down_proj.weight",
72
+ "shape": [
73
+ 2048,
74
+ 5504
75
+ ],
76
+ "dtype": "float16",
77
+ "format": "f32-to-bf16",
78
+ "nbytes": 22544384,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "0c9b5631c8d0782487272c1c05821ea0"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 45088768,
88
+ "records": [
89
+ {
90
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
91
+ "shape": [
92
+ 11008,
93
+ 2048
94
+ ],
95
+ "dtype": "float16",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 45088768,
98
+ "byteOffset": 0
99
+ }
100
+ ],
101
+ "md5sum": "68ffe5cdc7b36404f6d9c117b005502f"
102
+ },
103
+ {
104
+ "dataPath": "params_shard_5.bin",
105
+ "format": "raw-shard",
106
+ "nbytes": 25165824,
107
+ "records": [
108
+ {
109
+ "name": "model.layers.0.self_attn.qkv_proj.weight",
110
+ "shape": [
111
+ 6144,
112
+ 2048
113
+ ],
114
+ "dtype": "float16",
115
+ "format": "f32-to-bf16",
116
+ "nbytes": 25165824,
117
+ "byteOffset": 0
118
+ }
119
+ ],
120
+ "md5sum": "a1ee14cff03dde5b33e30af7d575a190"
121
+ },
122
+ {
123
+ "dataPath": "params_shard_6.bin",
124
+ "format": "raw-shard",
125
+ "nbytes": 22544384,
126
+ "records": [
127
+ {
128
+ "name": "model.layers.1.mlp.down_proj.weight",
129
+ "shape": [
130
+ 2048,
131
+ 5504
132
+ ],
133
+ "dtype": "float16",
134
+ "format": "f32-to-bf16",
135
+ "nbytes": 22544384,
136
+ "byteOffset": 0
137
+ }
138
+ ],
139
+ "md5sum": "f17d8f11d5eb78bcd28a2ab102bd35df"
140
+ },
141
+ {
142
+ "dataPath": "params_shard_7.bin",
143
+ "format": "raw-shard",
144
+ "nbytes": 45088768,
145
+ "records": [
146
+ {
147
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
148
+ "shape": [
149
+ 11008,
150
+ 2048
151
+ ],
152
+ "dtype": "float16",
153
+ "format": "f32-to-bf16",
154
+ "nbytes": 45088768,
155
+ "byteOffset": 0
156
+ }
157
+ ],
158
+ "md5sum": "808fc0d79c4eb2667490bbb583798d04"
159
+ },
160
+ {
161
+ "dataPath": "params_shard_8.bin",
162
+ "format": "raw-shard",
163
+ "nbytes": 25165824,
164
+ "records": [
165
+ {
166
+ "name": "model.layers.1.self_attn.qkv_proj.weight",
167
+ "shape": [
168
+ 6144,
169
+ 2048
170
+ ],
171
+ "dtype": "float16",
172
+ "format": "f32-to-bf16",
173
+ "nbytes": 25165824,
174
+ "byteOffset": 0
175
+ }
176
+ ],
177
+ "md5sum": "16485b4ac2d9d5a30eca613c1714bb28"
178
+ },
179
+ {
180
+ "dataPath": "params_shard_9.bin",
181
+ "format": "raw-shard",
182
+ "nbytes": 30961664,
183
+ "records": [
184
+ {
185
+ "name": "model.layers.23.input_layernorm.weight",
186
+ "shape": [
187
+ 2048
188
+ ],
189
+ "dtype": "float16",
190
+ "format": "f32-to-bf16",
191
+ "nbytes": 4096,
192
+ "byteOffset": 0
193
+ },
194
+ {
195
+ "name": "model.layers.23.mlp.down_proj.weight",
196
+ "shape": [
197
+ 2048,
198
+ 5504
199
+ ],
200
+ "dtype": "float16",
201
+ "format": "f32-to-bf16",
202
+ "nbytes": 22544384,
203
+ "byteOffset": 4096
204
+ },
205
+ {
206
+ "name": "model.layers.23.post_attention_layernorm.weight",
207
+ "shape": [
208
+ 2048
209
+ ],
210
+ "dtype": "float16",
211
+ "format": "f32-to-bf16",
212
+ "nbytes": 4096,
213
+ "byteOffset": 22548480
214
+ },
215
+ {
216
+ "name": "model.norm.weight",
217
+ "shape": [
218
+ 2048
219
+ ],
220
+ "dtype": "float16",
221
+ "format": "f32-to-bf16",
222
+ "nbytes": 4096,
223
+ "byteOffset": 22552576
224
+ },
225
+ {
226
+ "name": "model.layers.0.input_layernorm.weight",
227
+ "shape": [
228
+ 2048
229
+ ],
230
+ "dtype": "float16",
231
+ "format": "f32-to-bf16",
232
+ "nbytes": 4096,
233
+ "byteOffset": 22556672
234
+ },
235
+ {
236
+ "name": "model.layers.0.post_attention_layernorm.weight",
237
+ "shape": [
238
+ 2048
239
+ ],
240
+ "dtype": "float16",
241
+ "format": "f32-to-bf16",
242
+ "nbytes": 4096,
243
+ "byteOffset": 22560768
244
+ },
245
+ {
246
+ "name": "model.layers.0.self_attn.o_proj.weight",
247
+ "shape": [
248
+ 2048,
249
+ 2048
250
+ ],
251
+ "dtype": "float16",
252
+ "format": "f32-to-bf16",
253
+ "nbytes": 8388608,
254
+ "byteOffset": 22564864
255
+ },
256
+ {
257
+ "name": "model.layers.1.input_layernorm.weight",
258
+ "shape": [
259
+ 2048
260
+ ],
261
+ "dtype": "float16",
262
+ "format": "f32-to-bf16",
263
+ "nbytes": 4096,
264
+ "byteOffset": 30953472
265
+ },
266
+ {
267
+ "name": "model.layers.1.post_attention_layernorm.weight",
268
+ "shape": [
269
+ 2048
270
+ ],
271
+ "dtype": "float16",
272
+ "format": "f32-to-bf16",
273
+ "nbytes": 4096,
274
+ "byteOffset": 30957568
275
+ }
276
+ ],
277
+ "md5sum": "6c67534699b30cdf34f3b290f6585246"
278
+ },
279
+ {
280
+ "dataPath": "params_shard_10.bin",
281
+ "format": "raw-shard",
282
+ "nbytes": 45088768,
283
+ "records": [
284
+ {
285
+ "name": "model.layers.10.mlp.gate_up_proj.weight",
286
+ "shape": [
287
+ 11008,
288
+ 2048
289
+ ],
290
+ "dtype": "float16",
291
+ "format": "f32-to-bf16",
292
+ "nbytes": 45088768,
293
+ "byteOffset": 0
294
+ }
295
+ ],
296
+ "md5sum": "0d4c78a526d638be4cda8f99eda54c52"
297
+ },
298
+ {
299
+ "dataPath": "params_shard_11.bin",
300
+ "format": "raw-shard",
301
+ "nbytes": 25165824,
302
+ "records": [
303
+ {
304
+ "name": "model.layers.10.self_attn.qkv_proj.weight",
305
+ "shape": [
306
+ 6144,
307
+ 2048
308
+ ],
309
+ "dtype": "float16",
310
+ "format": "f32-to-bf16",
311
+ "nbytes": 25165824,
312
+ "byteOffset": 0
313
+ }
314
+ ],
315
+ "md5sum": "2e96d783ac5815edb36271486b860b91"
316
+ },
317
+ {
318
+ "dataPath": "params_shard_12.bin",
319
+ "format": "raw-shard",
320
+ "nbytes": 30941184,
321
+ "records": [
322
+ {
323
+ "name": "model.layers.1.self_attn.o_proj.weight",
324
+ "shape": [
325
+ 2048,
326
+ 2048
327
+ ],
328
+ "dtype": "float16",
329
+ "format": "f32-to-bf16",
330
+ "nbytes": 8388608,
331
+ "byteOffset": 0
332
+ },
333
+ {
334
+ "name": "model.layers.10.input_layernorm.weight",
335
+ "shape": [
336
+ 2048
337
+ ],
338
+ "dtype": "float16",
339
+ "format": "f32-to-bf16",
340
+ "nbytes": 4096,
341
+ "byteOffset": 8388608
342
+ },
343
+ {
344
+ "name": "model.layers.10.mlp.down_proj.weight",
345
+ "shape": [
346
+ 2048,
347
+ 5504
348
+ ],
349
+ "dtype": "float16",
350
+ "format": "f32-to-bf16",
351
+ "nbytes": 22544384,
352
+ "byteOffset": 8392704
353
+ },
354
+ {
355
+ "name": "model.layers.10.post_attention_layernorm.weight",
356
+ "shape": [
357
+ 2048
358
+ ],
359
+ "dtype": "float16",
360
+ "format": "f32-to-bf16",
361
+ "nbytes": 4096,
362
+ "byteOffset": 30937088
363
+ }
364
+ ],
365
+ "md5sum": "debfe6d51b8b897fb7bc6d96711c3ed3"
366
+ },
367
+ {
368
+ "dataPath": "params_shard_13.bin",
369
+ "format": "raw-shard",
370
+ "nbytes": 45088768,
371
+ "records": [
372
+ {
373
+ "name": "model.layers.11.mlp.gate_up_proj.weight",
374
+ "shape": [
375
+ 11008,
376
+ 2048
377
+ ],
378
+ "dtype": "float16",
379
+ "format": "f32-to-bf16",
380
+ "nbytes": 45088768,
381
+ "byteOffset": 0
382
+ }
383
+ ],
384
+ "md5sum": "50ebc40a05f4aa4ce9859316ff83d9f8"
385
+ },
386
+ {
387
+ "dataPath": "params_shard_14.bin",
388
+ "format": "raw-shard",
389
+ "nbytes": 25165824,
390
+ "records": [
391
+ {
392
+ "name": "model.layers.11.self_attn.qkv_proj.weight",
393
+ "shape": [
394
+ 6144,
395
+ 2048
396
+ ],
397
+ "dtype": "float16",
398
+ "format": "f32-to-bf16",
399
+ "nbytes": 25165824,
400
+ "byteOffset": 0
401
+ }
402
+ ],
403
+ "md5sum": "88a35ca8dbbee98cb90ffe2d61b9cde7"
404
+ },
405
+ {
406
+ "dataPath": "params_shard_15.bin",
407
+ "format": "raw-shard",
408
+ "nbytes": 30941184,
409
+ "records": [
410
+ {
411
+ "name": "model.layers.10.self_attn.o_proj.weight",
412
+ "shape": [
413
+ 2048,
414
+ 2048
415
+ ],
416
+ "dtype": "float16",
417
+ "format": "f32-to-bf16",
418
+ "nbytes": 8388608,
419
+ "byteOffset": 0
420
+ },
421
+ {
422
+ "name": "model.layers.11.input_layernorm.weight",
423
+ "shape": [
424
+ 2048
425
+ ],
426
+ "dtype": "float16",
427
+ "format": "f32-to-bf16",
428
+ "nbytes": 4096,
429
+ "byteOffset": 8388608
430
+ },
431
+ {
432
+ "name": "model.layers.11.mlp.down_proj.weight",
433
+ "shape": [
434
+ 2048,
435
+ 5504
436
+ ],
437
+ "dtype": "float16",
438
+ "format": "f32-to-bf16",
439
+ "nbytes": 22544384,
440
+ "byteOffset": 8392704
441
+ },
442
+ {
443
+ "name": "model.layers.11.post_attention_layernorm.weight",
444
+ "shape": [
445
+ 2048
446
+ ],
447
+ "dtype": "float16",
448
+ "format": "f32-to-bf16",
449
+ "nbytes": 4096,
450
+ "byteOffset": 30937088
451
+ }
452
+ ],
453
+ "md5sum": "fabe4ecbf48146653f53514f88857355"
454
+ },
455
+ {
456
+ "dataPath": "params_shard_16.bin",
457
+ "format": "raw-shard",
458
+ "nbytes": 45088768,
459
+ "records": [
460
+ {
461
+ "name": "model.layers.12.mlp.gate_up_proj.weight",
462
+ "shape": [
463
+ 11008,
464
+ 2048
465
+ ],
466
+ "dtype": "float16",
467
+ "format": "f32-to-bf16",
468
+ "nbytes": 45088768,
469
+ "byteOffset": 0
470
+ }
471
+ ],
472
+ "md5sum": "b904d3951fbdc1591eef73a5c5efbbcd"
473
+ },
474
+ {
475
+ "dataPath": "params_shard_17.bin",
476
+ "format": "raw-shard",
477
+ "nbytes": 25165824,
478
+ "records": [
479
+ {
480
+ "name": "model.layers.12.self_attn.qkv_proj.weight",
481
+ "shape": [
482
+ 6144,
483
+ 2048
484
+ ],
485
+ "dtype": "float16",
486
+ "format": "f32-to-bf16",
487
+ "nbytes": 25165824,
488
+ "byteOffset": 0
489
+ }
490
+ ],
491
+ "md5sum": "e84ed8d0bd39c0c4ea3a7bcf00b63225"
492
+ },
493
+ {
494
+ "dataPath": "params_shard_18.bin",
495
+ "format": "raw-shard",
496
+ "nbytes": 30941184,
497
+ "records": [
498
+ {
499
+ "name": "model.layers.11.self_attn.o_proj.weight",
500
+ "shape": [
501
+ 2048,
502
+ 2048
503
+ ],
504
+ "dtype": "float16",
505
+ "format": "f32-to-bf16",
506
+ "nbytes": 8388608,
507
+ "byteOffset": 0
508
+ },
509
+ {
510
+ "name": "model.layers.12.input_layernorm.weight",
511
+ "shape": [
512
+ 2048
513
+ ],
514
+ "dtype": "float16",
515
+ "format": "f32-to-bf16",
516
+ "nbytes": 4096,
517
+ "byteOffset": 8388608
518
+ },
519
+ {
520
+ "name": "model.layers.12.mlp.down_proj.weight",
521
+ "shape": [
522
+ 2048,
523
+ 5504
524
+ ],
525
+ "dtype": "float16",
526
+ "format": "f32-to-bf16",
527
+ "nbytes": 22544384,
528
+ "byteOffset": 8392704
529
+ },
530
+ {
531
+ "name": "model.layers.12.post_attention_layernorm.weight",
532
+ "shape": [
533
+ 2048
534
+ ],
535
+ "dtype": "float16",
536
+ "format": "f32-to-bf16",
537
+ "nbytes": 4096,
538
+ "byteOffset": 30937088
539
+ }
540
+ ],
541
+ "md5sum": "96663ca6bb7ee0f2e61c6d04f642bc0f"
542
+ },
543
+ {
544
+ "dataPath": "params_shard_19.bin",
545
+ "format": "raw-shard",
546
+ "nbytes": 45088768,
547
+ "records": [
548
+ {
549
+ "name": "model.layers.13.mlp.gate_up_proj.weight",
550
+ "shape": [
551
+ 11008,
552
+ 2048
553
+ ],
554
+ "dtype": "float16",
555
+ "format": "f32-to-bf16",
556
+ "nbytes": 45088768,
557
+ "byteOffset": 0
558
+ }
559
+ ],
560
+ "md5sum": "151fc8b33d617d1ffbe07e75627f125b"
561
+ },
562
+ {
563
+ "dataPath": "params_shard_20.bin",
564
+ "format": "raw-shard",
565
+ "nbytes": 25165824,
566
+ "records": [
567
+ {
568
+ "name": "model.layers.13.self_attn.qkv_proj.weight",
569
+ "shape": [
570
+ 6144,
571
+ 2048
572
+ ],
573
+ "dtype": "float16",
574
+ "format": "f32-to-bf16",
575
+ "nbytes": 25165824,
576
+ "byteOffset": 0
577
+ }
578
+ ],
579
+ "md5sum": "bcd9e7cf587b92c83f0d1e975aa17239"
580
+ },
581
+ {
582
+ "dataPath": "params_shard_21.bin",
583
+ "format": "raw-shard",
584
+ "nbytes": 30941184,
585
+ "records": [
586
+ {
587
+ "name": "model.layers.12.self_attn.o_proj.weight",
588
+ "shape": [
589
+ 2048,
590
+ 2048
591
+ ],
592
+ "dtype": "float16",
593
+ "format": "f32-to-bf16",
594
+ "nbytes": 8388608,
595
+ "byteOffset": 0
596
+ },
597
+ {
598
+ "name": "model.layers.13.input_layernorm.weight",
599
+ "shape": [
600
+ 2048
601
+ ],
602
+ "dtype": "float16",
603
+ "format": "f32-to-bf16",
604
+ "nbytes": 4096,
605
+ "byteOffset": 8388608
606
+ },
607
+ {
608
+ "name": "model.layers.13.mlp.down_proj.weight",
609
+ "shape": [
610
+ 2048,
611
+ 5504
612
+ ],
613
+ "dtype": "float16",
614
+ "format": "f32-to-bf16",
615
+ "nbytes": 22544384,
616
+ "byteOffset": 8392704
617
+ },
618
+ {
619
+ "name": "model.layers.13.post_attention_layernorm.weight",
620
+ "shape": [
621
+ 2048
622
+ ],
623
+ "dtype": "float16",
624
+ "format": "f32-to-bf16",
625
+ "nbytes": 4096,
626
+ "byteOffset": 30937088
627
+ }
628
+ ],
629
+ "md5sum": "c0f8c65a6ce70d0c1032459932d18d67"
630
+ },
631
+ {
632
+ "dataPath": "params_shard_22.bin",
633
+ "format": "raw-shard",
634
+ "nbytes": 45088768,
635
+ "records": [
636
+ {
637
+ "name": "model.layers.14.mlp.gate_up_proj.weight",
638
+ "shape": [
639
+ 11008,
640
+ 2048
641
+ ],
642
+ "dtype": "float16",
643
+ "format": "f32-to-bf16",
644
+ "nbytes": 45088768,
645
+ "byteOffset": 0
646
+ }
647
+ ],
648
+ "md5sum": "01e1115542ac1e8e8f2aae519d2218d8"
649
+ },
650
+ {
651
+ "dataPath": "params_shard_23.bin",
652
+ "format": "raw-shard",
653
+ "nbytes": 25165824,
654
+ "records": [
655
+ {
656
+ "name": "model.layers.14.self_attn.qkv_proj.weight",
657
+ "shape": [
658
+ 6144,
659
+ 2048
660
+ ],
661
+ "dtype": "float16",
662
+ "format": "f32-to-bf16",
663
+ "nbytes": 25165824,
664
+ "byteOffset": 0
665
+ }
666
+ ],
667
+ "md5sum": "51cc23816c40b0947aeeacf5ba47c108"
668
+ },
669
+ {
670
+ "dataPath": "params_shard_24.bin",
671
+ "format": "raw-shard",
672
+ "nbytes": 30941184,
673
+ "records": [
674
+ {
675
+ "name": "model.layers.13.self_attn.o_proj.weight",
676
+ "shape": [
677
+ 2048,
678
+ 2048
679
+ ],
680
+ "dtype": "float16",
681
+ "format": "f32-to-bf16",
682
+ "nbytes": 8388608,
683
+ "byteOffset": 0
684
+ },
685
+ {
686
+ "name": "model.layers.14.input_layernorm.weight",
687
+ "shape": [
688
+ 2048
689
+ ],
690
+ "dtype": "float16",
691
+ "format": "f32-to-bf16",
692
+ "nbytes": 4096,
693
+ "byteOffset": 8388608
694
+ },
695
+ {
696
+ "name": "model.layers.14.mlp.down_proj.weight",
697
+ "shape": [
698
+ 2048,
699
+ 5504
700
+ ],
701
+ "dtype": "float16",
702
+ "format": "f32-to-bf16",
703
+ "nbytes": 22544384,
704
+ "byteOffset": 8392704
705
+ },
706
+ {
707
+ "name": "model.layers.14.post_attention_layernorm.weight",
708
+ "shape": [
709
+ 2048
710
+ ],
711
+ "dtype": "float16",
712
+ "format": "f32-to-bf16",
713
+ "nbytes": 4096,
714
+ "byteOffset": 30937088
715
+ }
716
+ ],
717
+ "md5sum": "0083f49fda74b8cf84e5f1d07a900e16"
718
+ },
719
+ {
720
+ "dataPath": "params_shard_25.bin",
721
+ "format": "raw-shard",
722
+ "nbytes": 45088768,
723
+ "records": [
724
+ {
725
+ "name": "model.layers.15.mlp.gate_up_proj.weight",
726
+ "shape": [
727
+ 11008,
728
+ 2048
729
+ ],
730
+ "dtype": "float16",
731
+ "format": "f32-to-bf16",
732
+ "nbytes": 45088768,
733
+ "byteOffset": 0
734
+ }
735
+ ],
736
+ "md5sum": "f360dce9c6b14b96cd26abb58e826863"
737
+ },
738
+ {
739
+ "dataPath": "params_shard_26.bin",
740
+ "format": "raw-shard",
741
+ "nbytes": 25165824,
742
+ "records": [
743
+ {
744
+ "name": "model.layers.15.self_attn.qkv_proj.weight",
745
+ "shape": [
746
+ 6144,
747
+ 2048
748
+ ],
749
+ "dtype": "float16",
750
+ "format": "f32-to-bf16",
751
+ "nbytes": 25165824,
752
+ "byteOffset": 0
753
+ }
754
+ ],
755
+ "md5sum": "f4028906532c1a77c81fea68325f7e37"
756
+ },
757
+ {
758
+ "dataPath": "params_shard_27.bin",
759
+ "format": "raw-shard",
760
+ "nbytes": 30941184,
761
+ "records": [
762
+ {
763
+ "name": "model.layers.14.self_attn.o_proj.weight",
764
+ "shape": [
765
+ 2048,
766
+ 2048
767
+ ],
768
+ "dtype": "float16",
769
+ "format": "f32-to-bf16",
770
+ "nbytes": 8388608,
771
+ "byteOffset": 0
772
+ },
773
+ {
774
+ "name": "model.layers.15.input_layernorm.weight",
775
+ "shape": [
776
+ 2048
777
+ ],
778
+ "dtype": "float16",
779
+ "format": "f32-to-bf16",
780
+ "nbytes": 4096,
781
+ "byteOffset": 8388608
782
+ },
783
+ {
784
+ "name": "model.layers.15.mlp.down_proj.weight",
785
+ "shape": [
786
+ 2048,
787
+ 5504
788
+ ],
789
+ "dtype": "float16",
790
+ "format": "f32-to-bf16",
791
+ "nbytes": 22544384,
792
+ "byteOffset": 8392704
793
+ },
794
+ {
795
+ "name": "model.layers.15.post_attention_layernorm.weight",
796
+ "shape": [
797
+ 2048
798
+ ],
799
+ "dtype": "float16",
800
+ "format": "f32-to-bf16",
801
+ "nbytes": 4096,
802
+ "byteOffset": 30937088
803
+ }
804
+ ],
805
+ "md5sum": "3385aa8af474da3ecab88532ab76d44c"
806
+ },
807
+ {
808
+ "dataPath": "params_shard_28.bin",
809
+ "format": "raw-shard",
810
+ "nbytes": 45088768,
811
+ "records": [
812
+ {
813
+ "name": "model.layers.16.mlp.gate_up_proj.weight",
814
+ "shape": [
815
+ 11008,
816
+ 2048
817
+ ],
818
+ "dtype": "float16",
819
+ "format": "f32-to-bf16",
820
+ "nbytes": 45088768,
821
+ "byteOffset": 0
822
+ }
823
+ ],
824
+ "md5sum": "9fd3b31de601dd1ea655d707fcb2b878"
825
+ },
826
+ {
827
+ "dataPath": "params_shard_29.bin",
828
+ "format": "raw-shard",
829
+ "nbytes": 25165824,
830
+ "records": [
831
+ {
832
+ "name": "model.layers.16.self_attn.qkv_proj.weight",
833
+ "shape": [
834
+ 6144,
835
+ 2048
836
+ ],
837
+ "dtype": "float16",
838
+ "format": "f32-to-bf16",
839
+ "nbytes": 25165824,
840
+ "byteOffset": 0
841
+ }
842
+ ],
843
+ "md5sum": "ad0a0681984c1e8443e83da0aa5042ba"
844
+ },
845
+ {
846
+ "dataPath": "params_shard_30.bin",
847
+ "format": "raw-shard",
848
+ "nbytes": 30941184,
849
+ "records": [
850
+ {
851
+ "name": "model.layers.15.self_attn.o_proj.weight",
852
+ "shape": [
853
+ 2048,
854
+ 2048
855
+ ],
856
+ "dtype": "float16",
857
+ "format": "f32-to-bf16",
858
+ "nbytes": 8388608,
859
+ "byteOffset": 0
860
+ },
861
+ {
862
+ "name": "model.layers.16.input_layernorm.weight",
863
+ "shape": [
864
+ 2048
865
+ ],
866
+ "dtype": "float16",
867
+ "format": "f32-to-bf16",
868
+ "nbytes": 4096,
869
+ "byteOffset": 8388608
870
+ },
871
+ {
872
+ "name": "model.layers.16.mlp.down_proj.weight",
873
+ "shape": [
874
+ 2048,
875
+ 5504
876
+ ],
877
+ "dtype": "float16",
878
+ "format": "f32-to-bf16",
879
+ "nbytes": 22544384,
880
+ "byteOffset": 8392704
881
+ },
882
+ {
883
+ "name": "model.layers.16.post_attention_layernorm.weight",
884
+ "shape": [
885
+ 2048
886
+ ],
887
+ "dtype": "float16",
888
+ "format": "f32-to-bf16",
889
+ "nbytes": 4096,
890
+ "byteOffset": 30937088
891
+ }
892
+ ],
893
+ "md5sum": "d870f7ff9671024da16435a5c3d2e0ae"
894
+ },
895
+ {
896
+ "dataPath": "params_shard_31.bin",
897
+ "format": "raw-shard",
898
+ "nbytes": 45088768,
899
+ "records": [
900
+ {
901
+ "name": "model.layers.17.mlp.gate_up_proj.weight",
902
+ "shape": [
903
+ 11008,
904
+ 2048
905
+ ],
906
+ "dtype": "float16",
907
+ "format": "f32-to-bf16",
908
+ "nbytes": 45088768,
909
+ "byteOffset": 0
910
+ }
911
+ ],
912
+ "md5sum": "364070562dba92fe20668f0dd6c4def1"
913
+ },
914
+ {
915
+ "dataPath": "params_shard_32.bin",
916
+ "format": "raw-shard",
917
+ "nbytes": 25165824,
918
+ "records": [
919
+ {
920
+ "name": "model.layers.17.self_attn.qkv_proj.weight",
921
+ "shape": [
922
+ 6144,
923
+ 2048
924
+ ],
925
+ "dtype": "float16",
926
+ "format": "f32-to-bf16",
927
+ "nbytes": 25165824,
928
+ "byteOffset": 0
929
+ }
930
+ ],
931
+ "md5sum": "21ae9345d50c77cd3e07e29d93b9efdf"
932
+ },
933
+ {
934
+ "dataPath": "params_shard_33.bin",
935
+ "format": "raw-shard",
936
+ "nbytes": 30941184,
937
+ "records": [
938
+ {
939
+ "name": "model.layers.16.self_attn.o_proj.weight",
940
+ "shape": [
941
+ 2048,
942
+ 2048
943
+ ],
944
+ "dtype": "float16",
945
+ "format": "f32-to-bf16",
946
+ "nbytes": 8388608,
947
+ "byteOffset": 0
948
+ },
949
+ {
950
+ "name": "model.layers.17.input_layernorm.weight",
951
+ "shape": [
952
+ 2048
953
+ ],
954
+ "dtype": "float16",
955
+ "format": "f32-to-bf16",
956
+ "nbytes": 4096,
957
+ "byteOffset": 8388608
958
+ },
959
+ {
960
+ "name": "model.layers.17.mlp.down_proj.weight",
961
+ "shape": [
962
+ 2048,
963
+ 5504
964
+ ],
965
+ "dtype": "float16",
966
+ "format": "f32-to-bf16",
967
+ "nbytes": 22544384,
968
+ "byteOffset": 8392704
969
+ },
970
+ {
971
+ "name": "model.layers.17.post_attention_layernorm.weight",
972
+ "shape": [
973
+ 2048
974
+ ],
975
+ "dtype": "float16",
976
+ "format": "f32-to-bf16",
977
+ "nbytes": 4096,
978
+ "byteOffset": 30937088
979
+ }
980
+ ],
981
+ "md5sum": "7aeca74e93851c74a624201dd5ba5359"
982
+ },
983
+ {
984
+ "dataPath": "params_shard_34.bin",
985
+ "format": "raw-shard",
986
+ "nbytes": 45088768,
987
+ "records": [
988
+ {
989
+ "name": "model.layers.18.mlp.gate_up_proj.weight",
990
+ "shape": [
991
+ 11008,
992
+ 2048
993
+ ],
994
+ "dtype": "float16",
995
+ "format": "f32-to-bf16",
996
+ "nbytes": 45088768,
997
+ "byteOffset": 0
998
+ }
999
+ ],
1000
+ "md5sum": "65bb02353f04662f5ab3224e8eba760b"
1001
+ },
1002
+ {
1003
+ "dataPath": "params_shard_35.bin",
1004
+ "format": "raw-shard",
1005
+ "nbytes": 25165824,
1006
+ "records": [
1007
+ {
1008
+ "name": "model.layers.18.self_attn.qkv_proj.weight",
1009
+ "shape": [
1010
+ 6144,
1011
+ 2048
1012
+ ],
1013
+ "dtype": "float16",
1014
+ "format": "f32-to-bf16",
1015
+ "nbytes": 25165824,
1016
+ "byteOffset": 0
1017
+ }
1018
+ ],
1019
+ "md5sum": "da313fb40a2faba7e1903cb6754d2372"
1020
+ },
1021
+ {
1022
+ "dataPath": "params_shard_36.bin",
1023
+ "format": "raw-shard",
1024
+ "nbytes": 30941184,
1025
+ "records": [
1026
+ {
1027
+ "name": "model.layers.17.self_attn.o_proj.weight",
1028
+ "shape": [
1029
+ 2048,
1030
+ 2048
1031
+ ],
1032
+ "dtype": "float16",
1033
+ "format": "f32-to-bf16",
1034
+ "nbytes": 8388608,
1035
+ "byteOffset": 0
1036
+ },
1037
+ {
1038
+ "name": "model.layers.18.input_layernorm.weight",
1039
+ "shape": [
1040
+ 2048
1041
+ ],
1042
+ "dtype": "float16",
1043
+ "format": "f32-to-bf16",
1044
+ "nbytes": 4096,
1045
+ "byteOffset": 8388608
1046
+ },
1047
+ {
1048
+ "name": "model.layers.18.mlp.down_proj.weight",
1049
+ "shape": [
1050
+ 2048,
1051
+ 5504
1052
+ ],
1053
+ "dtype": "float16",
1054
+ "format": "f32-to-bf16",
1055
+ "nbytes": 22544384,
1056
+ "byteOffset": 8392704
1057
+ },
1058
+ {
1059
+ "name": "model.layers.18.post_attention_layernorm.weight",
1060
+ "shape": [
1061
+ 2048
1062
+ ],
1063
+ "dtype": "float16",
1064
+ "format": "f32-to-bf16",
1065
+ "nbytes": 4096,
1066
+ "byteOffset": 30937088
1067
+ }
1068
+ ],
1069
+ "md5sum": "602f91df1048aa4d59f5780c5a150e4e"
1070
+ },
1071
+ {
1072
+ "dataPath": "params_shard_37.bin",
1073
+ "format": "raw-shard",
1074
+ "nbytes": 45088768,
1075
+ "records": [
1076
+ {
1077
+ "name": "model.layers.19.mlp.gate_up_proj.weight",
1078
+ "shape": [
1079
+ 11008,
1080
+ 2048
1081
+ ],
1082
+ "dtype": "float16",
1083
+ "format": "f32-to-bf16",
1084
+ "nbytes": 45088768,
1085
+ "byteOffset": 0
1086
+ }
1087
+ ],
1088
+ "md5sum": "cde127fed4c562ffc9dfdd94298759f4"
1089
+ },
1090
+ {
1091
+ "dataPath": "params_shard_38.bin",
1092
+ "format": "raw-shard",
1093
+ "nbytes": 25165824,
1094
+ "records": [
1095
+ {
1096
+ "name": "model.layers.19.self_attn.qkv_proj.weight",
1097
+ "shape": [
1098
+ 6144,
1099
+ 2048
1100
+ ],
1101
+ "dtype": "float16",
1102
+ "format": "f32-to-bf16",
1103
+ "nbytes": 25165824,
1104
+ "byteOffset": 0
1105
+ }
1106
+ ],
1107
+ "md5sum": "5ecb5726da144dd9250eae22ce08d827"
1108
+ },
1109
+ {
1110
+ "dataPath": "params_shard_39.bin",
1111
+ "format": "raw-shard",
1112
+ "nbytes": 30941184,
1113
+ "records": [
1114
+ {
1115
+ "name": "model.layers.18.self_attn.o_proj.weight",
1116
+ "shape": [
1117
+ 2048,
1118
+ 2048
1119
+ ],
1120
+ "dtype": "float16",
1121
+ "format": "f32-to-bf16",
1122
+ "nbytes": 8388608,
1123
+ "byteOffset": 0
1124
+ },
1125
+ {
1126
+ "name": "model.layers.19.input_layernorm.weight",
1127
+ "shape": [
1128
+ 2048
1129
+ ],
1130
+ "dtype": "float16",
1131
+ "format": "f32-to-bf16",
1132
+ "nbytes": 4096,
1133
+ "byteOffset": 8388608
1134
+ },
1135
+ {
1136
+ "name": "model.layers.19.mlp.down_proj.weight",
1137
+ "shape": [
1138
+ 2048,
1139
+ 5504
1140
+ ],
1141
+ "dtype": "float16",
1142
+ "format": "f32-to-bf16",
1143
+ "nbytes": 22544384,
1144
+ "byteOffset": 8392704
1145
+ },
1146
+ {
1147
+ "name": "model.layers.19.post_attention_layernorm.weight",
1148
+ "shape": [
1149
+ 2048
1150
+ ],
1151
+ "dtype": "float16",
1152
+ "format": "f32-to-bf16",
1153
+ "nbytes": 4096,
1154
+ "byteOffset": 30937088
1155
+ }
1156
+ ],
1157
+ "md5sum": "df1e9f697e210b8f3d94e01c7f7023f3"
1158
+ },
1159
+ {
1160
+ "dataPath": "params_shard_40.bin",
1161
+ "format": "raw-shard",
1162
+ "nbytes": 45088768,
1163
+ "records": [
1164
+ {
1165
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
1166
+ "shape": [
1167
+ 11008,
1168
+ 2048
1169
+ ],
1170
+ "dtype": "float16",
1171
+ "format": "f32-to-bf16",
1172
+ "nbytes": 45088768,
1173
+ "byteOffset": 0
1174
+ }
1175
+ ],
1176
+ "md5sum": "fed9db092ed650ce0d1154f0b128988c"
1177
+ },
1178
+ {
1179
+ "dataPath": "params_shard_41.bin",
1180
+ "format": "raw-shard",
1181
+ "nbytes": 25165824,
1182
+ "records": [
1183
+ {
1184
+ "name": "model.layers.2.self_attn.qkv_proj.weight",
1185
+ "shape": [
1186
+ 6144,
1187
+ 2048
1188
+ ],
1189
+ "dtype": "float16",
1190
+ "format": "f32-to-bf16",
1191
+ "nbytes": 25165824,
1192
+ "byteOffset": 0
1193
+ }
1194
+ ],
1195
+ "md5sum": "59a4ecebda8af22d714dc8a6b8075628"
1196
+ },
1197
+ {
1198
+ "dataPath": "params_shard_42.bin",
1199
+ "format": "raw-shard",
1200
+ "nbytes": 30941184,
1201
+ "records": [
1202
+ {
1203
+ "name": "model.layers.19.self_attn.o_proj.weight",
1204
+ "shape": [
1205
+ 2048,
1206
+ 2048
1207
+ ],
1208
+ "dtype": "float16",
1209
+ "format": "f32-to-bf16",
1210
+ "nbytes": 8388608,
1211
+ "byteOffset": 0
1212
+ },
1213
+ {
1214
+ "name": "model.layers.2.input_layernorm.weight",
1215
+ "shape": [
1216
+ 2048
1217
+ ],
1218
+ "dtype": "float16",
1219
+ "format": "f32-to-bf16",
1220
+ "nbytes": 4096,
1221
+ "byteOffset": 8388608
1222
+ },
1223
+ {
1224
+ "name": "model.layers.2.mlp.down_proj.weight",
1225
+ "shape": [
1226
+ 2048,
1227
+ 5504
1228
+ ],
1229
+ "dtype": "float16",
1230
+ "format": "f32-to-bf16",
1231
+ "nbytes": 22544384,
1232
+ "byteOffset": 8392704
1233
+ },
1234
+ {
1235
+ "name": "model.layers.2.post_attention_layernorm.weight",
1236
+ "shape": [
1237
+ 2048
1238
+ ],
1239
+ "dtype": "float16",
1240
+ "format": "f32-to-bf16",
1241
+ "nbytes": 4096,
1242
+ "byteOffset": 30937088
1243
+ }
1244
+ ],
1245
+ "md5sum": "852a934145f89a5017127dd06a380a6f"
1246
+ },
1247
+ {
1248
+ "dataPath": "params_shard_43.bin",
1249
+ "format": "raw-shard",
1250
+ "nbytes": 45088768,
1251
+ "records": [
1252
+ {
1253
+ "name": "model.layers.20.mlp.gate_up_proj.weight",
1254
+ "shape": [
1255
+ 11008,
1256
+ 2048
1257
+ ],
1258
+ "dtype": "float16",
1259
+ "format": "f32-to-bf16",
1260
+ "nbytes": 45088768,
1261
+ "byteOffset": 0
1262
+ }
1263
+ ],
1264
+ "md5sum": "1f05fcb53169a1d8be2dc20e5354d611"
1265
+ },
1266
+ {
1267
+ "dataPath": "params_shard_44.bin",
1268
+ "format": "raw-shard",
1269
+ "nbytes": 25165824,
1270
+ "records": [
1271
+ {
1272
+ "name": "model.layers.20.self_attn.qkv_proj.weight",
1273
+ "shape": [
1274
+ 6144,
1275
+ 2048
1276
+ ],
1277
+ "dtype": "float16",
1278
+ "format": "f32-to-bf16",
1279
+ "nbytes": 25165824,
1280
+ "byteOffset": 0
1281
+ }
1282
+ ],
1283
+ "md5sum": "3bf75fc9f377cfc71e0eb90c3b10daab"
1284
+ },
1285
+ {
1286
+ "dataPath": "params_shard_45.bin",
1287
+ "format": "raw-shard",
1288
+ "nbytes": 30941184,
1289
+ "records": [
1290
+ {
1291
+ "name": "model.layers.2.self_attn.o_proj.weight",
1292
+ "shape": [
1293
+ 2048,
1294
+ 2048
1295
+ ],
1296
+ "dtype": "float16",
1297
+ "format": "f32-to-bf16",
1298
+ "nbytes": 8388608,
1299
+ "byteOffset": 0
1300
+ },
1301
+ {
1302
+ "name": "model.layers.20.input_layernorm.weight",
1303
+ "shape": [
1304
+ 2048
1305
+ ],
1306
+ "dtype": "float16",
1307
+ "format": "f32-to-bf16",
1308
+ "nbytes": 4096,
1309
+ "byteOffset": 8388608
1310
+ },
1311
+ {
1312
+ "name": "model.layers.20.mlp.down_proj.weight",
1313
+ "shape": [
1314
+ 2048,
1315
+ 5504
1316
+ ],
1317
+ "dtype": "float16",
1318
+ "format": "f32-to-bf16",
1319
+ "nbytes": 22544384,
1320
+ "byteOffset": 8392704
1321
+ },
1322
+ {
1323
+ "name": "model.layers.20.post_attention_layernorm.weight",
1324
+ "shape": [
1325
+ 2048
1326
+ ],
1327
+ "dtype": "float16",
1328
+ "format": "f32-to-bf16",
1329
+ "nbytes": 4096,
1330
+ "byteOffset": 30937088
1331
+ }
1332
+ ],
1333
+ "md5sum": "b3723705c01ac8e5e1dbc899d70698b8"
1334
+ },
1335
+ {
1336
+ "dataPath": "params_shard_46.bin",
1337
+ "format": "raw-shard",
1338
+ "nbytes": 45088768,
1339
+ "records": [
1340
+ {
1341
+ "name": "model.layers.21.mlp.gate_up_proj.weight",
1342
+ "shape": [
1343
+ 11008,
1344
+ 2048
1345
+ ],
1346
+ "dtype": "float16",
1347
+ "format": "f32-to-bf16",
1348
+ "nbytes": 45088768,
1349
+ "byteOffset": 0
1350
+ }
1351
+ ],
1352
+ "md5sum": "6abbc9e86dd1e4f2013ff489952f9de7"
1353
+ },
1354
+ {
1355
+ "dataPath": "params_shard_47.bin",
1356
+ "format": "raw-shard",
1357
+ "nbytes": 25165824,
1358
+ "records": [
1359
+ {
1360
+ "name": "model.layers.21.self_attn.qkv_proj.weight",
1361
+ "shape": [
1362
+ 6144,
1363
+ 2048
1364
+ ],
1365
+ "dtype": "float16",
1366
+ "format": "f32-to-bf16",
1367
+ "nbytes": 25165824,
1368
+ "byteOffset": 0
1369
+ }
1370
+ ],
1371
+ "md5sum": "99e93cb34c970339bf5efb6875e38cb8"
1372
+ },
1373
+ {
1374
+ "dataPath": "params_shard_48.bin",
1375
+ "format": "raw-shard",
1376
+ "nbytes": 30941184,
1377
+ "records": [
1378
+ {
1379
+ "name": "model.layers.20.self_attn.o_proj.weight",
1380
+ "shape": [
1381
+ 2048,
1382
+ 2048
1383
+ ],
1384
+ "dtype": "float16",
1385
+ "format": "f32-to-bf16",
1386
+ "nbytes": 8388608,
1387
+ "byteOffset": 0
1388
+ },
1389
+ {
1390
+ "name": "model.layers.21.input_layernorm.weight",
1391
+ "shape": [
1392
+ 2048
1393
+ ],
1394
+ "dtype": "float16",
1395
+ "format": "f32-to-bf16",
1396
+ "nbytes": 4096,
1397
+ "byteOffset": 8388608
1398
+ },
1399
+ {
1400
+ "name": "model.layers.21.mlp.down_proj.weight",
1401
+ "shape": [
1402
+ 2048,
1403
+ 5504
1404
+ ],
1405
+ "dtype": "float16",
1406
+ "format": "f32-to-bf16",
1407
+ "nbytes": 22544384,
1408
+ "byteOffset": 8392704
1409
+ },
1410
+ {
1411
+ "name": "model.layers.21.post_attention_layernorm.weight",
1412
+ "shape": [
1413
+ 2048
1414
+ ],
1415
+ "dtype": "float16",
1416
+ "format": "f32-to-bf16",
1417
+ "nbytes": 4096,
1418
+ "byteOffset": 30937088
1419
+ }
1420
+ ],
1421
+ "md5sum": "78b764fd070d1615ba558d131f9e25dd"
1422
+ },
1423
+ {
1424
+ "dataPath": "params_shard_49.bin",
1425
+ "format": "raw-shard",
1426
+ "nbytes": 45088768,
1427
+ "records": [
1428
+ {
1429
+ "name": "model.layers.22.mlp.gate_up_proj.weight",
1430
+ "shape": [
1431
+ 11008,
1432
+ 2048
1433
+ ],
1434
+ "dtype": "float16",
1435
+ "format": "f32-to-bf16",
1436
+ "nbytes": 45088768,
1437
+ "byteOffset": 0
1438
+ }
1439
+ ],
1440
+ "md5sum": "8b647020224bda42aa46b0f4fd0b46f7"
1441
+ },
1442
+ {
1443
+ "dataPath": "params_shard_50.bin",
1444
+ "format": "raw-shard",
1445
+ "nbytes": 25165824,
1446
+ "records": [
1447
+ {
1448
+ "name": "model.layers.22.self_attn.qkv_proj.weight",
1449
+ "shape": [
1450
+ 6144,
1451
+ 2048
1452
+ ],
1453
+ "dtype": "float16",
1454
+ "format": "f32-to-bf16",
1455
+ "nbytes": 25165824,
1456
+ "byteOffset": 0
1457
+ }
1458
+ ],
1459
+ "md5sum": "4ed27a77728826cb5511b9171463c217"
1460
+ },
1461
+ {
1462
+ "dataPath": "params_shard_51.bin",
1463
+ "format": "raw-shard",
1464
+ "nbytes": 30941184,
1465
+ "records": [
1466
+ {
1467
+ "name": "model.layers.21.self_attn.o_proj.weight",
1468
+ "shape": [
1469
+ 2048,
1470
+ 2048
1471
+ ],
1472
+ "dtype": "float16",
1473
+ "format": "f32-to-bf16",
1474
+ "nbytes": 8388608,
1475
+ "byteOffset": 0
1476
+ },
1477
+ {
1478
+ "name": "model.layers.22.input_layernorm.weight",
1479
+ "shape": [
1480
+ 2048
1481
+ ],
1482
+ "dtype": "float16",
1483
+ "format": "f32-to-bf16",
1484
+ "nbytes": 4096,
1485
+ "byteOffset": 8388608
1486
+ },
1487
+ {
1488
+ "name": "model.layers.22.mlp.down_proj.weight",
1489
+ "shape": [
1490
+ 2048,
1491
+ 5504
1492
+ ],
1493
+ "dtype": "float16",
1494
+ "format": "f32-to-bf16",
1495
+ "nbytes": 22544384,
1496
+ "byteOffset": 8392704
1497
+ },
1498
+ {
1499
+ "name": "model.layers.22.post_attention_layernorm.weight",
1500
+ "shape": [
1501
+ 2048
1502
+ ],
1503
+ "dtype": "float16",
1504
+ "format": "f32-to-bf16",
1505
+ "nbytes": 4096,
1506
+ "byteOffset": 30937088
1507
+ }
1508
+ ],
1509
+ "md5sum": "e251ff26cdcff77317f4b118b02c1497"
1510
+ },
1511
+ {
1512
+ "dataPath": "params_shard_52.bin",
1513
+ "format": "raw-shard",
1514
+ "nbytes": 25165824,
1515
+ "records": [
1516
+ {
1517
+ "name": "model.layers.23.self_attn.qkv_proj.weight",
1518
+ "shape": [
1519
+ 6144,
1520
+ 2048
1521
+ ],
1522
+ "dtype": "float16",
1523
+ "format": "f32-to-bf16",
1524
+ "nbytes": 25165824,
1525
+ "byteOffset": 0
1526
+ }
1527
+ ],
1528
+ "md5sum": "36d8ea7c07c8bfcb2f88fa1fefe8ed79"
1529
+ },
1530
+ {
1531
+ "dataPath": "params_shard_53.bin",
1532
+ "format": "raw-shard",
1533
+ "nbytes": 22544384,
1534
+ "records": [
1535
+ {
1536
+ "name": "model.layers.3.mlp.down_proj.weight",
1537
+ "shape": [
1538
+ 2048,
1539
+ 5504
1540
+ ],
1541
+ "dtype": "float16",
1542
+ "format": "f32-to-bf16",
1543
+ "nbytes": 22544384,
1544
+ "byteOffset": 0
1545
+ }
1546
+ ],
1547
+ "md5sum": "517a9d2d31433214bdf434c6aa8a37e3"
1548
+ },
1549
+ {
1550
+ "dataPath": "params_shard_54.bin",
1551
+ "format": "raw-shard",
1552
+ "nbytes": 45088768,
1553
+ "records": [
1554
+ {
1555
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
1556
+ "shape": [
1557
+ 11008,
1558
+ 2048
1559
+ ],
1560
+ "dtype": "float16",
1561
+ "format": "f32-to-bf16",
1562
+ "nbytes": 45088768,
1563
+ "byteOffset": 0
1564
+ }
1565
+ ],
1566
+ "md5sum": "aea80b9e667db9e3a0807c6debe68f0c"
1567
+ },
1568
+ {
1569
+ "dataPath": "params_shard_55.bin",
1570
+ "format": "raw-shard",
1571
+ "nbytes": 25165824,
1572
+ "records": [
1573
+ {
1574
+ "name": "model.layers.3.self_attn.qkv_proj.weight",
1575
+ "shape": [
1576
+ 6144,
1577
+ 2048
1578
+ ],
1579
+ "dtype": "float16",
1580
+ "format": "f32-to-bf16",
1581
+ "nbytes": 25165824,
1582
+ "byteOffset": 0
1583
+ }
1584
+ ],
1585
+ "md5sum": "cf9f8d29467431927a7bca62874a159b"
1586
+ },
1587
+ {
1588
+ "dataPath": "params_shard_56.bin",
1589
+ "format": "raw-shard",
1590
+ "nbytes": 22544384,
1591
+ "records": [
1592
+ {
1593
+ "name": "model.layers.4.mlp.down_proj.weight",
1594
+ "shape": [
1595
+ 2048,
1596
+ 5504
1597
+ ],
1598
+ "dtype": "float16",
1599
+ "format": "f32-to-bf16",
1600
+ "nbytes": 22544384,
1601
+ "byteOffset": 0
1602
+ }
1603
+ ],
1604
+ "md5sum": "4137120515b1fa42514d52e38434234d"
1605
+ },
1606
+ {
1607
+ "dataPath": "params_shard_57.bin",
1608
+ "format": "raw-shard",
1609
+ "nbytes": 45088768,
1610
+ "records": [
1611
+ {
1612
+ "name": "model.layers.4.mlp.gate_up_proj.weight",
1613
+ "shape": [
1614
+ 11008,
1615
+ 2048
1616
+ ],
1617
+ "dtype": "float16",
1618
+ "format": "f32-to-bf16",
1619
+ "nbytes": 45088768,
1620
+ "byteOffset": 0
1621
+ }
1622
+ ],
1623
+ "md5sum": "a6f9f971dc59ea038c22f9b3ccff60d4"
1624
+ },
1625
+ {
1626
+ "dataPath": "params_shard_58.bin",
1627
+ "format": "raw-shard",
1628
+ "nbytes": 25165824,
1629
+ "records": [
1630
+ {
1631
+ "name": "model.layers.4.self_attn.qkv_proj.weight",
1632
+ "shape": [
1633
+ 6144,
1634
+ 2048
1635
+ ],
1636
+ "dtype": "float16",
1637
+ "format": "f32-to-bf16",
1638
+ "nbytes": 25165824,
1639
+ "byteOffset": 0
1640
+ }
1641
+ ],
1642
+ "md5sum": "7a312b77164fd642e73b51ce7324b18f"
1643
+ },
1644
+ {
1645
+ "dataPath": "params_shard_59.bin",
1646
+ "format": "raw-shard",
1647
+ "nbytes": 25182208,
1648
+ "records": [
1649
+ {
1650
+ "name": "model.layers.22.self_attn.o_proj.weight",
1651
+ "shape": [
1652
+ 2048,
1653
+ 2048
1654
+ ],
1655
+ "dtype": "float16",
1656
+ "format": "f32-to-bf16",
1657
+ "nbytes": 8388608,
1658
+ "byteOffset": 0
1659
+ },
1660
+ {
1661
+ "name": "model.layers.23.self_attn.o_proj.weight",
1662
+ "shape": [
1663
+ 2048,
1664
+ 2048
1665
+ ],
1666
+ "dtype": "float16",
1667
+ "format": "f32-to-bf16",
1668
+ "nbytes": 8388608,
1669
+ "byteOffset": 8388608
1670
+ },
1671
+ {
1672
+ "name": "model.layers.3.input_layernorm.weight",
1673
+ "shape": [
1674
+ 2048
1675
+ ],
1676
+ "dtype": "float16",
1677
+ "format": "f32-to-bf16",
1678
+ "nbytes": 4096,
1679
+ "byteOffset": 16777216
1680
+ },
1681
+ {
1682
+ "name": "model.layers.3.post_attention_layernorm.weight",
1683
+ "shape": [
1684
+ 2048
1685
+ ],
1686
+ "dtype": "float16",
1687
+ "format": "f32-to-bf16",
1688
+ "nbytes": 4096,
1689
+ "byteOffset": 16781312
1690
+ },
1691
+ {
1692
+ "name": "model.layers.3.self_attn.o_proj.weight",
1693
+ "shape": [
1694
+ 2048,
1695
+ 2048
1696
+ ],
1697
+ "dtype": "float16",
1698
+ "format": "f32-to-bf16",
1699
+ "nbytes": 8388608,
1700
+ "byteOffset": 16785408
1701
+ },
1702
+ {
1703
+ "name": "model.layers.4.input_layernorm.weight",
1704
+ "shape": [
1705
+ 2048
1706
+ ],
1707
+ "dtype": "float16",
1708
+ "format": "f32-to-bf16",
1709
+ "nbytes": 4096,
1710
+ "byteOffset": 25174016
1711
+ },
1712
+ {
1713
+ "name": "model.layers.4.post_attention_layernorm.weight",
1714
+ "shape": [
1715
+ 2048
1716
+ ],
1717
+ "dtype": "float16",
1718
+ "format": "f32-to-bf16",
1719
+ "nbytes": 4096,
1720
+ "byteOffset": 25178112
1721
+ }
1722
+ ],
1723
+ "md5sum": "79413c6425e3979dc2d54af1f5f2acec"
1724
+ },
1725
+ {
1726
+ "dataPath": "params_shard_60.bin",
1727
+ "format": "raw-shard",
1728
+ "nbytes": 45088768,
1729
+ "records": [
1730
+ {
1731
+ "name": "model.layers.5.mlp.gate_up_proj.weight",
1732
+ "shape": [
1733
+ 11008,
1734
+ 2048
1735
+ ],
1736
+ "dtype": "float16",
1737
+ "format": "f32-to-bf16",
1738
+ "nbytes": 45088768,
1739
+ "byteOffset": 0
1740
+ }
1741
+ ],
1742
+ "md5sum": "07385011bf5cca986261e5627cb01477"
1743
+ },
1744
+ {
1745
+ "dataPath": "params_shard_61.bin",
1746
+ "format": "raw-shard",
1747
+ "nbytes": 25165824,
1748
+ "records": [
1749
+ {
1750
+ "name": "model.layers.5.self_attn.qkv_proj.weight",
1751
+ "shape": [
1752
+ 6144,
1753
+ 2048
1754
+ ],
1755
+ "dtype": "float16",
1756
+ "format": "f32-to-bf16",
1757
+ "nbytes": 25165824,
1758
+ "byteOffset": 0
1759
+ }
1760
+ ],
1761
+ "md5sum": "c163a8740f3a2634c4270b92bb3e27b3"
1762
+ },
1763
+ {
1764
+ "dataPath": "params_shard_62.bin",
1765
+ "format": "raw-shard",
1766
+ "nbytes": 30941184,
1767
+ "records": [
1768
+ {
1769
+ "name": "model.layers.4.self_attn.o_proj.weight",
1770
+ "shape": [
1771
+ 2048,
1772
+ 2048
1773
+ ],
1774
+ "dtype": "float16",
1775
+ "format": "f32-to-bf16",
1776
+ "nbytes": 8388608,
1777
+ "byteOffset": 0
1778
+ },
1779
+ {
1780
+ "name": "model.layers.5.input_layernorm.weight",
1781
+ "shape": [
1782
+ 2048
1783
+ ],
1784
+ "dtype": "float16",
1785
+ "format": "f32-to-bf16",
1786
+ "nbytes": 4096,
1787
+ "byteOffset": 8388608
1788
+ },
1789
+ {
1790
+ "name": "model.layers.5.mlp.down_proj.weight",
1791
+ "shape": [
1792
+ 2048,
1793
+ 5504
1794
+ ],
1795
+ "dtype": "float16",
1796
+ "format": "f32-to-bf16",
1797
+ "nbytes": 22544384,
1798
+ "byteOffset": 8392704
1799
+ },
1800
+ {
1801
+ "name": "model.layers.5.post_attention_layernorm.weight",
1802
+ "shape": [
1803
+ 2048
1804
+ ],
1805
+ "dtype": "float16",
1806
+ "format": "f32-to-bf16",
1807
+ "nbytes": 4096,
1808
+ "byteOffset": 30937088
1809
+ }
1810
+ ],
1811
+ "md5sum": "98999f0e1836f1bb330be0b9988ac1d1"
1812
+ },
1813
+ {
1814
+ "dataPath": "params_shard_63.bin",
1815
+ "format": "raw-shard",
1816
+ "nbytes": 45088768,
1817
+ "records": [
1818
+ {
1819
+ "name": "model.layers.6.mlp.gate_up_proj.weight",
1820
+ "shape": [
1821
+ 11008,
1822
+ 2048
1823
+ ],
1824
+ "dtype": "float16",
1825
+ "format": "f32-to-bf16",
1826
+ "nbytes": 45088768,
1827
+ "byteOffset": 0
1828
+ }
1829
+ ],
1830
+ "md5sum": "1b6d7427c4de377e95d02c74e95b51a8"
1831
+ },
1832
+ {
1833
+ "dataPath": "params_shard_64.bin",
1834
+ "format": "raw-shard",
1835
+ "nbytes": 25165824,
1836
+ "records": [
1837
+ {
1838
+ "name": "model.layers.6.self_attn.qkv_proj.weight",
1839
+ "shape": [
1840
+ 6144,
1841
+ 2048
1842
+ ],
1843
+ "dtype": "float16",
1844
+ "format": "f32-to-bf16",
1845
+ "nbytes": 25165824,
1846
+ "byteOffset": 0
1847
+ }
1848
+ ],
1849
+ "md5sum": "a64e834919358eafcf4437d4564603d1"
1850
+ },
1851
+ {
1852
+ "dataPath": "params_shard_65.bin",
1853
+ "format": "raw-shard",
1854
+ "nbytes": 30941184,
1855
+ "records": [
1856
+ {
1857
+ "name": "model.layers.5.self_attn.o_proj.weight",
1858
+ "shape": [
1859
+ 2048,
1860
+ 2048
1861
+ ],
1862
+ "dtype": "float16",
1863
+ "format": "f32-to-bf16",
1864
+ "nbytes": 8388608,
1865
+ "byteOffset": 0
1866
+ },
1867
+ {
1868
+ "name": "model.layers.6.input_layernorm.weight",
1869
+ "shape": [
1870
+ 2048
1871
+ ],
1872
+ "dtype": "float16",
1873
+ "format": "f32-to-bf16",
1874
+ "nbytes": 4096,
1875
+ "byteOffset": 8388608
1876
+ },
1877
+ {
1878
+ "name": "model.layers.6.mlp.down_proj.weight",
1879
+ "shape": [
1880
+ 2048,
1881
+ 5504
1882
+ ],
1883
+ "dtype": "float16",
1884
+ "format": "f32-to-bf16",
1885
+ "nbytes": 22544384,
1886
+ "byteOffset": 8392704
1887
+ },
1888
+ {
1889
+ "name": "model.layers.6.post_attention_layernorm.weight",
1890
+ "shape": [
1891
+ 2048
1892
+ ],
1893
+ "dtype": "float16",
1894
+ "format": "f32-to-bf16",
1895
+ "nbytes": 4096,
1896
+ "byteOffset": 30937088
1897
+ }
1898
+ ],
1899
+ "md5sum": "c530bf3867b0c3bae88cca854b8dbffd"
1900
+ },
1901
+ {
1902
+ "dataPath": "params_shard_66.bin",
1903
+ "format": "raw-shard",
1904
+ "nbytes": 45088768,
1905
+ "records": [
1906
+ {
1907
+ "name": "model.layers.7.mlp.gate_up_proj.weight",
1908
+ "shape": [
1909
+ 11008,
1910
+ 2048
1911
+ ],
1912
+ "dtype": "float16",
1913
+ "format": "f32-to-bf16",
1914
+ "nbytes": 45088768,
1915
+ "byteOffset": 0
1916
+ }
1917
+ ],
1918
+ "md5sum": "15273c470f4d0bff79539b2ec3e6ea65"
1919
+ },
1920
+ {
1921
+ "dataPath": "params_shard_67.bin",
1922
+ "format": "raw-shard",
1923
+ "nbytes": 25165824,
1924
+ "records": [
1925
+ {
1926
+ "name": "model.layers.7.self_attn.qkv_proj.weight",
1927
+ "shape": [
1928
+ 6144,
1929
+ 2048
1930
+ ],
1931
+ "dtype": "float16",
1932
+ "format": "f32-to-bf16",
1933
+ "nbytes": 25165824,
1934
+ "byteOffset": 0
1935
+ }
1936
+ ],
1937
+ "md5sum": "80328d7651bec73dbc301cf4fce5aa48"
1938
+ },
1939
+ {
1940
+ "dataPath": "params_shard_68.bin",
1941
+ "format": "raw-shard",
1942
+ "nbytes": 30941184,
1943
+ "records": [
1944
+ {
1945
+ "name": "model.layers.6.self_attn.o_proj.weight",
1946
+ "shape": [
1947
+ 2048,
1948
+ 2048
1949
+ ],
1950
+ "dtype": "float16",
1951
+ "format": "f32-to-bf16",
1952
+ "nbytes": 8388608,
1953
+ "byteOffset": 0
1954
+ },
1955
+ {
1956
+ "name": "model.layers.7.input_layernorm.weight",
1957
+ "shape": [
1958
+ 2048
1959
+ ],
1960
+ "dtype": "float16",
1961
+ "format": "f32-to-bf16",
1962
+ "nbytes": 4096,
1963
+ "byteOffset": 8388608
1964
+ },
1965
+ {
1966
+ "name": "model.layers.7.mlp.down_proj.weight",
1967
+ "shape": [
1968
+ 2048,
1969
+ 5504
1970
+ ],
1971
+ "dtype": "float16",
1972
+ "format": "f32-to-bf16",
1973
+ "nbytes": 22544384,
1974
+ "byteOffset": 8392704
1975
+ },
1976
+ {
1977
+ "name": "model.layers.7.post_attention_layernorm.weight",
1978
+ "shape": [
1979
+ 2048
1980
+ ],
1981
+ "dtype": "float16",
1982
+ "format": "f32-to-bf16",
1983
+ "nbytes": 4096,
1984
+ "byteOffset": 30937088
1985
+ }
1986
+ ],
1987
+ "md5sum": "cd936b0ee314daf92bc39651145ab724"
1988
+ },
1989
+ {
1990
+ "dataPath": "params_shard_69.bin",
1991
+ "format": "raw-shard",
1992
+ "nbytes": 45088768,
1993
+ "records": [
1994
+ {
1995
+ "name": "model.layers.8.mlp.gate_up_proj.weight",
1996
+ "shape": [
1997
+ 11008,
1998
+ 2048
1999
+ ],
2000
+ "dtype": "float16",
2001
+ "format": "f32-to-bf16",
2002
+ "nbytes": 45088768,
2003
+ "byteOffset": 0
2004
+ }
2005
+ ],
2006
+ "md5sum": "cdf81fb402a8a9f124c3603af04f43ac"
2007
+ },
2008
+ {
2009
+ "dataPath": "params_shard_70.bin",
2010
+ "format": "raw-shard",
2011
+ "nbytes": 25165824,
2012
+ "records": [
2013
+ {
2014
+ "name": "model.layers.8.self_attn.qkv_proj.weight",
2015
+ "shape": [
2016
+ 6144,
2017
+ 2048
2018
+ ],
2019
+ "dtype": "float16",
2020
+ "format": "f32-to-bf16",
2021
+ "nbytes": 25165824,
2022
+ "byteOffset": 0
2023
+ }
2024
+ ],
2025
+ "md5sum": "e5c5b7875ae5b54e3db24128edc79228"
2026
+ },
2027
+ {
2028
+ "dataPath": "params_shard_71.bin",
2029
+ "format": "raw-shard",
2030
+ "nbytes": 30941184,
2031
+ "records": [
2032
+ {
2033
+ "name": "model.layers.7.self_attn.o_proj.weight",
2034
+ "shape": [
2035
+ 2048,
2036
+ 2048
2037
+ ],
2038
+ "dtype": "float16",
2039
+ "format": "f32-to-bf16",
2040
+ "nbytes": 8388608,
2041
+ "byteOffset": 0
2042
+ },
2043
+ {
2044
+ "name": "model.layers.8.input_layernorm.weight",
2045
+ "shape": [
2046
+ 2048
2047
+ ],
2048
+ "dtype": "float16",
2049
+ "format": "f32-to-bf16",
2050
+ "nbytes": 4096,
2051
+ "byteOffset": 8388608
2052
+ },
2053
+ {
2054
+ "name": "model.layers.8.mlp.down_proj.weight",
2055
+ "shape": [
2056
+ 2048,
2057
+ 5504
2058
+ ],
2059
+ "dtype": "float16",
2060
+ "format": "f32-to-bf16",
2061
+ "nbytes": 22544384,
2062
+ "byteOffset": 8392704
2063
+ },
2064
+ {
2065
+ "name": "model.layers.8.post_attention_layernorm.weight",
2066
+ "shape": [
2067
+ 2048
2068
+ ],
2069
+ "dtype": "float16",
2070
+ "format": "f32-to-bf16",
2071
+ "nbytes": 4096,
2072
+ "byteOffset": 30937088
2073
+ }
2074
+ ],
2075
+ "md5sum": "423e7b1d04b274a992b8cc537daf1b61"
2076
+ },
2077
+ {
2078
+ "dataPath": "params_shard_72.bin",
2079
+ "format": "raw-shard",
2080
+ "nbytes": 45088768,
2081
+ "records": [
2082
+ {
2083
+ "name": "model.layers.9.mlp.gate_up_proj.weight",
2084
+ "shape": [
2085
+ 11008,
2086
+ 2048
2087
+ ],
2088
+ "dtype": "float16",
2089
+ "format": "f32-to-bf16",
2090
+ "nbytes": 45088768,
2091
+ "byteOffset": 0
2092
+ }
2093
+ ],
2094
+ "md5sum": "ff294c8adb898fb366b14302216e2101"
2095
+ },
2096
+ {
2097
+ "dataPath": "params_shard_73.bin",
2098
+ "format": "raw-shard",
2099
+ "nbytes": 25165824,
2100
+ "records": [
2101
+ {
2102
+ "name": "model.layers.9.self_attn.qkv_proj.weight",
2103
+ "shape": [
2104
+ 6144,
2105
+ 2048
2106
+ ],
2107
+ "dtype": "float16",
2108
+ "format": "f32-to-bf16",
2109
+ "nbytes": 25165824,
2110
+ "byteOffset": 0
2111
+ }
2112
+ ],
2113
+ "md5sum": "43a3490bd4d0e3426886c924f9e692b7"
2114
+ },
2115
+ {
2116
+ "dataPath": "params_shard_74.bin",
2117
+ "format": "raw-shard",
2118
+ "nbytes": 30941184,
2119
+ "records": [
2120
+ {
2121
+ "name": "model.layers.8.self_attn.o_proj.weight",
2122
+ "shape": [
2123
+ 2048,
2124
+ 2048
2125
+ ],
2126
+ "dtype": "float16",
2127
+ "format": "f32-to-bf16",
2128
+ "nbytes": 8388608,
2129
+ "byteOffset": 0
2130
+ },
2131
+ {
2132
+ "name": "model.layers.9.input_layernorm.weight",
2133
+ "shape": [
2134
+ 2048
2135
+ ],
2136
+ "dtype": "float16",
2137
+ "format": "f32-to-bf16",
2138
+ "nbytes": 4096,
2139
+ "byteOffset": 8388608
2140
+ },
2141
+ {
2142
+ "name": "model.layers.9.mlp.down_proj.weight",
2143
+ "shape": [
2144
+ 2048,
2145
+ 5504
2146
+ ],
2147
+ "dtype": "float16",
2148
+ "format": "f32-to-bf16",
2149
+ "nbytes": 22544384,
2150
+ "byteOffset": 8392704
2151
+ },
2152
+ {
2153
+ "name": "model.layers.9.post_attention_layernorm.weight",
2154
+ "shape": [
2155
+ 2048
2156
+ ],
2157
+ "dtype": "float16",
2158
+ "format": "f32-to-bf16",
2159
+ "nbytes": 4096,
2160
+ "byteOffset": 30937088
2161
+ }
2162
+ ],
2163
+ "md5sum": "a233477ab303a8d9c73d40c8a3424cba"
2164
+ },
2165
+ {
2166
+ "dataPath": "params_shard_75.bin",
2167
+ "format": "raw-shard",
2168
+ "nbytes": 8388608,
2169
+ "records": [
2170
+ {
2171
+ "name": "model.layers.9.self_attn.o_proj.weight",
2172
+ "shape": [
2173
+ 2048,
2174
+ 2048
2175
+ ],
2176
+ "dtype": "float16",
2177
+ "format": "f32-to-bf16",
2178
+ "nbytes": 8388608,
2179
+ "byteOffset": 0
2180
+ }
2181
+ ],
2182
+ "md5sum": "dd6db153a5d469094783da0810151802"
2183
+ }
2184
+ ]
2185
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e6c314a2b6a8cbe6029ba8426259007989372dd7bd1b0cbfb83d8202cd4f9f4
3
+ size 131080192
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c6ac84a8d3e980ffb1f816f5b55e65fbafedfab5d1e81f692565ab1ae12837b
3
+ size 45088768
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c606d96ce5766a2e0f86ee5a30bade3c1a4f2236239ed041bd26f4c2b93f8de
3
+ size 45088768
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e70960680b70019572bec72a5e190a11ef2df2e5240720951982010d8d64c092
3
+ size 25165824
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b626fd25c6cbde14a667da45a074a3b4818dc420ab06a2f9c6b7a2059624a9bb
3
+ size 30941184
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59bee0946966acd1463ea09e7655d01a90636637e2f291b097d64a5325f446f6
3
+ size 45088768
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:675083e6f4265eed79e160a8f438b2bb385325aff7d5833fd1b868bcb32b6825
3
+ size 25165824
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e246b7ec81cf222b4db53a727034ef4e60baa8f39934ec4abb01688a1984712
3
+ size 30941184
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8961053a2d7d97c489bff79e376610b2726f69eac0c5eb574fe288a11fd64a4
3
+ size 45088768
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4fb7a11bec36a9539babf251bfb2d505f3ce84d39b45910986b5593e108c281
3
+ size 25165824
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7ac268d11334f0b14dfd3e57a044c1799adbe99bbb5fef525c3b1001043416e
3
+ size 30941184
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d59fe6123e68779f4919e8f7d04e58b31406cd92abfd07e54f72b14b3f6d534
3
+ size 45088768
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60f0333d6db23825a73ad8489efc320b0acd61eacce244b6e6164a919e5d8d8a
3
+ size 131080192
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883fc868ac5a8214fd33e10931c4ca515925bc30d071cf5e50db977c2a9463d7
3
+ size 25165824
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b3703cf81bb10e05affdeb73ce2f8107386c7bccd0442342afcd6b5d33ef1f0
3
+ size 30941184
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7efa19b8eb6f5a77a1cc0f84822d44bca94c59146ab61ccfc19b07dba91d07e9
3
+ size 45088768
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1374b24d19e435361810d79b22ef6f208a85e950eadbec0fb75d98ec6b1341fd
3
+ size 25165824
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a7ded293fcebb74b0a7f7a73a8ad90cc42b56f137424a01c5c6bbf96cd090a6
3
+ size 30941184
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a1cb6b14bae2e1b53aa79fd587383a4331488869123046e678e992f1cb7ebc3
3
+ size 45088768
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3a343d34be33cad7bcf4323c16830d8b709fca1fa49d8f17f47b139f6c5fea7
3
+ size 25165824
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc797bfa5e70b95ef3c3407fa23fb1e2afa270e0c5eeaccd712a8e82b80748fa
3
+ size 30941184
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f3a9d614d791a1263bd7e8482c4a6752c793a9ba24467d4882b23e46dc8253d
3
+ size 45088768
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c55a0dc6e5d1c163f62d0e8a2c0843c36352883b7b45673bf32ad8b29f49a4
3
+ size 25165824
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e0ff60f4e8afdc4ca1fcd30f93d2ec4ab5fb83c98a7a655aa1967a3e5296317
3
+ size 22544384
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48f77bb7ab0a8c07d232ab3dca4de17dc61bb97900780d2f42b277c591ff5348
3
+ size 30941184
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83e57616b6e3e0798ec88c6980aff890e0a51ae9b22d8b99996c3dd8278a7ccf
3
+ size 45088768
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71faaccda8165f40f13f04ca26faf0a3cd3ca90db49f7f7148dc0d393c459306
3
+ size 25165824
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71645a25a845bb449a29c3ad3137df30c6bece0be47340b7b841a0f08c66f89f
3
+ size 30941184
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:337eabe2e4646cd4d80871f2bf617eaed648efc869efa31417e7133566bf62ca
3
+ size 45088768
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bad23ebcfa960806698da2097bacff29bc022cafebf6a622e876fbbf9d8e3639
3
+ size 25165824
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e2a4a901ae576ac0d514f7984bb9780eea01f01ee8814e40da5aed59eaa6009
3
+ size 30941184
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e5b82eeee4d470400c457a5170a1330afda17ba7b109efbf4ed5bb7f59ded2e
3
+ size 45088768
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3d6d77eb982f97ed02354891d39399423c75a2846450f493db81785ae5a8e87
3
+ size 25165824
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46a27002e2f4c4ad8f4cc68f2408f2581eb88bedd267182bf769f05975907882
3
+ size 30941184
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cba31edf216162db1007a519544b4678585ec872758f3f1c106bb399734b562
3
+ size 45088768
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf8ce242676b39b77c0142816baa9d927e859cff99ba8ebcafd4c733d347b95b
3
+ size 45088768
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ea66e18d37eb534e0ab361c4c2dcfe8e1ef0d034570390868943209dbcaaa3f
3
+ size 25165824
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63ebfc2453b31403a3a654ee4581e2572283a3f18ac2732d99dc0c45bf471307
3
+ size 30941184
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b64a46bbc700aee05185ad7990e77cead465745a4c66e941270fa5f80015b0e6
3
+ size 45088768
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:804f4c06bf35a3e359f86fd9e347bbe2d11ebf327d4cc62042bf5d4aefcabceb
3
+ size 25165824
params_shard_45.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98504f8b129075961e85dbc998b3214bd70c225b9e6a863cd6c4ff430cf46bae
3
+ size 30941184
params_shard_46.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:117f00b1066ad0b2e7e85aa25b9cc1c32db80db096fc85635888c993bd945604
3
+ size 45088768
params_shard_47.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eae56bed3347cf95305261fce803ecf400059a228f985a9769212ef591a17de7
3
+ size 25165824
params_shard_48.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:047fdf4990b832b294ed4487e8dcca17441b6c251984630b949b65a1d7257a30
3
+ size 30941184
params_shard_49.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7f11e2b9d99df6a240a76993d164ff940564632ff557c5b94cdb868341a04f2
3
+ size 45088768
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4391dc8298df8e5bc7244f5adc0ed73f165d416d25495a19cfdc6eddea85ea87
3
+ size 25165824
params_shard_50.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6f3f9d76f7d67ba0d05d0412bcc0ee82e393c48a0d94dd5e87f2f8b34e63ce3
3
+ size 25165824
params_shard_51.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8a9da4b3ffbbd0cddb84b0676c76cffc94d09d494877982510885f80f09a665
3
+ size 30941184