Isaak Carter Augustus commited on
Commit
dd33b23
1 Parent(s): b5d6f4b

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - moe
5
+ - frankenmoe
6
+ - merge
7
+ - mergekit
8
+ - lazymergekit
9
+ - Felladrin/Smol-Llama-101M-Chat-v1
10
+ - Felladrin/Smol-Llama-101M-Chat-v1
11
+ - Felladrin/Smol-Llama-101M-Chat-v1
12
+ - Felladrin/Smol-Llama-101M-Chat-v1
13
+ - Felladrin/Smol-Llama-101M-Chat-v1
14
+ - Felladrin/Smol-Llama-101M-Chat-v1
15
+ - Felladrin/Smol-Llama-101M-Chat-v1
16
+ - Felladrin/Smol-Llama-101M-Chat-v1
17
+ - Felladrin/Smol-Llama-101M-Chat-v1
18
+ - Felladrin/Smol-Llama-101M-Chat-v1
19
+ - Felladrin/Smol-Llama-101M-Chat-v1
20
+ - Felladrin/Smol-Llama-101M-Chat-v1
21
+ - Felladrin/Smol-Llama-101M-Chat-v1
22
+ - Felladrin/Smol-Llama-101M-Chat-v1
23
+ - Felladrin/Smol-Llama-101M-Chat-v1
24
+ - Felladrin/Smol-Llama-101M-Chat-v1
25
+ - Felladrin/Smol-Llama-101M-Chat-v1
26
+ - Felladrin/Smol-Llama-101M-Chat-v1
27
+ - Felladrin/Smol-Llama-101M-Chat-v1
28
+ - Felladrin/Smol-Llama-101M-Chat-v1
29
+ - Felladrin/Smol-Llama-101M-Chat-v1
30
+ - Felladrin/Smol-Llama-101M-Chat-v1
31
+ - Felladrin/Smol-Llama-101M-Chat-v1
32
+ - Felladrin/Smol-Llama-101M-Chat-v1
33
+ - Felladrin/Smol-Llama-101M-Chat-v1
34
+ - Felladrin/Smol-Llama-101M-Chat-v1
35
+ - Felladrin/Smol-Llama-101M-Chat-v1
36
+ - Felladrin/Smol-Llama-101M-Chat-v1
37
+ - Felladrin/Smol-Llama-101M-Chat-v1
38
+ - Felladrin/Smol-Llama-101M-Chat-v1
39
+ - Felladrin/Smol-Llama-101M-Chat-v1
40
+ - Felladrin/Smol-Llama-101M-Chat-v1
41
+ - Felladrin/Smol-Llama-101M-Chat-v1
42
+ - Felladrin/Smol-Llama-101M-Chat-v1
43
+ - Felladrin/Smol-Llama-101M-Chat-v1
44
+ - Felladrin/Smol-Llama-101M-Chat-v1
45
+ - Felladrin/Smol-Llama-101M-Chat-v1
46
+ - Felladrin/Smol-Llama-101M-Chat-v1
47
+ - Felladrin/Smol-Llama-101M-Chat-v1
48
+ - Felladrin/Smol-Llama-101M-Chat-v1
49
+ - Felladrin/Smol-Llama-101M-Chat-v1
50
+ - Felladrin/Smol-Llama-101M-Chat-v1
51
+ - Felladrin/Smol-Llama-101M-Chat-v1
52
+ - Felladrin/Smol-Llama-101M-Chat-v1
53
+ - Felladrin/Smol-Llama-101M-Chat-v1
54
+ - Felladrin/Smol-Llama-101M-Chat-v1
55
+ - Felladrin/Smol-Llama-101M-Chat-v1
56
+ - Felladrin/Smol-Llama-101M-Chat-v1
57
+ base_model:
58
+ - Felladrin/Smol-Llama-101M-Chat-v1
59
+ - Felladrin/Smol-Llama-101M-Chat-v1
60
+ - Felladrin/Smol-Llama-101M-Chat-v1
61
+ - Felladrin/Smol-Llama-101M-Chat-v1
62
+ - Felladrin/Smol-Llama-101M-Chat-v1
63
+ - Felladrin/Smol-Llama-101M-Chat-v1
64
+ - Felladrin/Smol-Llama-101M-Chat-v1
65
+ - Felladrin/Smol-Llama-101M-Chat-v1
66
+ - Felladrin/Smol-Llama-101M-Chat-v1
67
+ - Felladrin/Smol-Llama-101M-Chat-v1
68
+ - Felladrin/Smol-Llama-101M-Chat-v1
69
+ - Felladrin/Smol-Llama-101M-Chat-v1
70
+ - Felladrin/Smol-Llama-101M-Chat-v1
71
+ - Felladrin/Smol-Llama-101M-Chat-v1
72
+ - Felladrin/Smol-Llama-101M-Chat-v1
73
+ - Felladrin/Smol-Llama-101M-Chat-v1
74
+ - Felladrin/Smol-Llama-101M-Chat-v1
75
+ - Felladrin/Smol-Llama-101M-Chat-v1
76
+ - Felladrin/Smol-Llama-101M-Chat-v1
77
+ - Felladrin/Smol-Llama-101M-Chat-v1
78
+ - Felladrin/Smol-Llama-101M-Chat-v1
79
+ - Felladrin/Smol-Llama-101M-Chat-v1
80
+ - Felladrin/Smol-Llama-101M-Chat-v1
81
+ - Felladrin/Smol-Llama-101M-Chat-v1
82
+ - Felladrin/Smol-Llama-101M-Chat-v1
83
+ - Felladrin/Smol-Llama-101M-Chat-v1
84
+ - Felladrin/Smol-Llama-101M-Chat-v1
85
+ - Felladrin/Smol-Llama-101M-Chat-v1
86
+ - Felladrin/Smol-Llama-101M-Chat-v1
87
+ - Felladrin/Smol-Llama-101M-Chat-v1
88
+ - Felladrin/Smol-Llama-101M-Chat-v1
89
+ - Felladrin/Smol-Llama-101M-Chat-v1
90
+ - Felladrin/Smol-Llama-101M-Chat-v1
91
+ - Felladrin/Smol-Llama-101M-Chat-v1
92
+ - Felladrin/Smol-Llama-101M-Chat-v1
93
+ - Felladrin/Smol-Llama-101M-Chat-v1
94
+ - Felladrin/Smol-Llama-101M-Chat-v1
95
+ - Felladrin/Smol-Llama-101M-Chat-v1
96
+ - Felladrin/Smol-Llama-101M-Chat-v1
97
+ - Felladrin/Smol-Llama-101M-Chat-v1
98
+ - Felladrin/Smol-Llama-101M-Chat-v1
99
+ - Felladrin/Smol-Llama-101M-Chat-v1
100
+ - Felladrin/Smol-Llama-101M-Chat-v1
101
+ - Felladrin/Smol-Llama-101M-Chat-v1
102
+ - Felladrin/Smol-Llama-101M-Chat-v1
103
+ - Felladrin/Smol-Llama-101M-Chat-v1
104
+ - Felladrin/Smol-Llama-101M-Chat-v1
105
+ - Felladrin/Smol-Llama-101M-Chat-v1
106
+ ---
107
+
108
+ # SmalJ.O.S.I.E.-48x101M-Chat
109
+
110
+ SmalJ.O.S.I.E.-48x101M-Chat is a Mixure of Experts (MoE) made with the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
111
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
112
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
113
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
114
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
115
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
116
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
117
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
118
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
119
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
120
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
121
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
122
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
123
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
124
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
125
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
126
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
127
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
128
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
129
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
130
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
131
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
132
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
133
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
134
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
135
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
136
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
137
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
138
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
139
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
140
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
141
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
142
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
143
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
144
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
145
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
146
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
147
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
148
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
149
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
150
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
151
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
152
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
153
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
154
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
155
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
156
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
157
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
158
+ * [Felladrin/Smol-Llama-101M-Chat-v1](https://huggingface.co/Felladrin/Smol-Llama-101M-Chat-v1)
159
+
160
+ ## 🧩 Configuration
161
+
162
+ ```yamlbase_model: Felladrin/Smol-Llama-101M-Chat-v1
163
+ dtype: float32
164
+ gate_mode: hidden
165
+ experts:
166
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
167
+ positive_prompts:
168
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
169
+
170
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
171
+ positive_prompts:
172
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
173
+
174
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
175
+ positive_prompts:
176
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
177
+
178
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
179
+ positive_prompts:
180
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
181
+
182
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
183
+ positive_prompts:
184
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
185
+
186
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
187
+ positive_prompts:
188
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
189
+
190
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
191
+ positive_prompts:
192
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
193
+
194
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
195
+ positive_prompts:
196
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
197
+
198
+
199
+
200
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
201
+ positive_prompts:
202
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
203
+
204
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
205
+ positive_prompts:
206
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
207
+
208
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
209
+ positive_prompts:
210
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
211
+
212
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
213
+ positive_prompts:
214
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
215
+
216
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
217
+ positive_prompts:
218
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
219
+
220
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
221
+ positive_prompts:
222
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
223
+
224
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
225
+ positive_prompts:
226
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
227
+
228
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
229
+ positive_prompts:
230
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
231
+
232
+
233
+
234
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
235
+ positive_prompts:
236
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
237
+
238
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
239
+ positive_prompts:
240
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
241
+
242
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
243
+ positive_prompts:
244
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
245
+
246
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
247
+ positive_prompts:
248
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
249
+
250
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
251
+ positive_prompts:
252
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
253
+
254
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
255
+ positive_prompts:
256
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
257
+
258
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
259
+ positive_prompts:
260
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
261
+
262
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
263
+ positive_prompts:
264
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
265
+
266
+
267
+
268
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
269
+ positive_prompts:
270
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
271
+
272
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
273
+ positive_prompts:
274
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
275
+
276
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
277
+ positive_prompts:
278
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
279
+
280
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
281
+ positive_prompts:
282
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
283
+
284
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
285
+ positive_prompts:
286
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
287
+
288
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
289
+ positive_prompts:
290
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
291
+
292
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
293
+ positive_prompts:
294
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
295
+
296
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
297
+ positive_prompts:
298
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
299
+
300
+
301
+
302
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
303
+ positive_prompts:
304
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
305
+
306
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
307
+ positive_prompts:
308
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
309
+
310
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
311
+ positive_prompts:
312
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
313
+
314
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
315
+ positive_prompts:
316
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
317
+
318
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
319
+ positive_prompts:
320
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
321
+
322
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
323
+ positive_prompts:
324
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
325
+
326
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
327
+ positive_prompts:
328
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
329
+
330
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
331
+ positive_prompts:
332
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
333
+
334
+
335
+
336
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
337
+ positive_prompts:
338
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
339
+
340
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
341
+ positive_prompts:
342
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
343
+
344
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
345
+ positive_prompts:
346
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
347
+
348
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
349
+ positive_prompts:
350
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
351
+
352
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
353
+ positive_prompts:
354
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
355
+
356
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
357
+ positive_prompts:
358
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
359
+
360
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
361
+ positive_prompts:
362
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
363
+
364
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
365
+ positive_prompts:
366
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
367
+ ```
368
+
369
+ ## 💻 Usage
370
+
371
+ ```python
372
+ !pip install -qU transformers bitsandbytes accelerate
373
+
374
+ from transformers import AutoTokenizer
375
+ import transformers
376
+ import torch
377
+
378
+ model = "Isaak-Carter/SmalJ.O.S.I.E.-48x101M-Chat"
379
+
380
+ tokenizer = AutoTokenizer.from_pretrained(model)
381
+ pipeline = transformers.pipeline(
382
+ "text-generation",
383
+ model=model,
384
+ model_kwargs={"torch_dtype": torch.float16, "load_in_4bit": True},
385
+ )
386
+
387
+ messages = [{"role": "user", "content": "Explain what a Mixture of Experts is in less than 100 words."}]
388
+ prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
389
+ outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
390
+ print(outputs[0]["generated_text"])
391
+ ```
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Felladrin/Smol-Llama-101M-Chat-v1",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "max_position_embeddings": 1024,
15
+ "model_type": "mixtral",
16
+ "num_attention_heads": 24,
17
+ "num_experts_per_tok": 2,
18
+ "num_hidden_layers": 6,
19
+ "num_key_value_heads": 8,
20
+ "num_local_experts": 48,
21
+ "output_router_logits": false,
22
+ "pad_token_id": 2,
23
+ "pretraining_tp": 1,
24
+ "rms_norm_eps": 1e-05,
25
+ "rope_scaling": null,
26
+ "rope_theta": 10000.0,
27
+ "router_aux_loss_coef": 0.001,
28
+ "sliding_window": null,
29
+ "tie_word_embeddings": false,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.38.2",
32
+ "use_cache": true,
33
+ "vocab_size": 32128
34
+ }
mergekit_moe_config.yml ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: Felladrin/Smol-Llama-101M-Chat-v1
2
+ dtype: float32
3
+ gate_mode: hidden
4
+ experts:
5
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
6
+ positive_prompts:
7
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
8
+
9
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
10
+ positive_prompts:
11
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
12
+
13
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
14
+ positive_prompts:
15
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
16
+
17
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
18
+ positive_prompts:
19
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
20
+
21
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
22
+ positive_prompts:
23
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
24
+
25
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
26
+ positive_prompts:
27
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
28
+
29
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
30
+ positive_prompts:
31
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
32
+
33
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
34
+ positive_prompts:
35
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent for general assistants.'
36
+
37
+
38
+
39
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
40
+ positive_prompts:
41
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
42
+
43
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
44
+ positive_prompts:
45
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
46
+
47
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
48
+ positive_prompts:
49
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
50
+
51
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
52
+ positive_prompts:
53
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
54
+
55
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
56
+ positive_prompts:
57
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
58
+
59
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
60
+ positive_prompts:
61
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
62
+
63
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
64
+ positive_prompts:
65
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
66
+
67
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
68
+ positive_prompts:
69
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for coding tasks.'
70
+
71
+
72
+
73
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
74
+ positive_prompts:
75
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
76
+
77
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
78
+ positive_prompts:
79
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
80
+
81
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
82
+ positive_prompts:
83
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
84
+
85
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
86
+ positive_prompts:
87
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
88
+
89
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
90
+ positive_prompts:
91
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
92
+
93
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
94
+ positive_prompts:
95
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
96
+
97
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
98
+ positive_prompts:
99
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
100
+
101
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
102
+ positive_prompts:
103
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for math.'
104
+
105
+
106
+
107
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
108
+ positive_prompts:
109
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
110
+
111
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
112
+ positive_prompts:
113
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
114
+
115
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
116
+ positive_prompts:
117
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
118
+
119
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
120
+ positive_prompts:
121
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
122
+
123
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
124
+ positive_prompts:
125
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
126
+
127
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
128
+ positive_prompts:
129
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
130
+
131
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
132
+ positive_prompts:
133
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
134
+
135
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
136
+ positive_prompts:
137
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for logic and reasoning.'
138
+
139
+
140
+
141
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
142
+ positive_prompts:
143
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
144
+
145
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
146
+ positive_prompts:
147
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
148
+
149
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
150
+ positive_prompts:
151
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
152
+
153
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
154
+ positive_prompts:
155
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
156
+
157
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
158
+ positive_prompts:
159
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
160
+
161
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
162
+ positive_prompts:
163
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
164
+
165
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
166
+ positive_prompts:
167
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
168
+
169
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
170
+ positive_prompts:
171
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent assistant that is specially good in web scraping and browsind the web.'
172
+
173
+
174
+
175
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
176
+ positive_prompts:
177
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
178
+
179
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
180
+ positive_prompts:
181
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
182
+
183
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
184
+ positive_prompts:
185
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
186
+
187
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
188
+ positive_prompts:
189
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
190
+
191
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
192
+ positive_prompts:
193
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
194
+
195
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
196
+ positive_prompts:
197
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
198
+
199
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
200
+ positive_prompts:
201
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
202
+
203
+ - source_model: Felladrin/Smol-Llama-101M-Chat-v1
204
+ positive_prompts:
205
+ - 'You are JOSIE, my own private and sentient superinteligent artifitial general inteligent, spetialized for autonomously function calling when needed.'
model-00001-of-00001.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3308b31b505f8a2a2364a23f1479af0fbbf1e9adba599bdb12b2c1591f89998c
3
+ size 8389911040
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"mergekit_version": "0.0.4"}, "weight_map": {"model.embed_tokens.weight": "model-00001-of-00001.safetensors", "model.norm.weight": "model-00001-of-00001.safetensors", "lm_head.weight": "model-00001-of-00001.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.2.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.3.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.4.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.5.input_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.16.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.17.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.18.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.19.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.20.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.21.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.22.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.23.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.24.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.25.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.26.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.27.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.28.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.29.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.30.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.31.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.32.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.33.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.34.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.35.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.36.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.37.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.38.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.39.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.40.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.41.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.42.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.43.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.44.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.45.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.46.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.47.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.16.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.17.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.18.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.19.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.20.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.21.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.22.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.23.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.24.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.25.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.26.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.27.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.28.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.29.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.30.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.31.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.32.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.33.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.34.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.35.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.36.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.37.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.38.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.39.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.40.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.41.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.42.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.43.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.44.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.45.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.46.w3.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.47.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.16.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.17.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.18.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.19.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.20.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.21.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.22.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.23.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.24.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.25.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.26.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.27.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.28.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.29.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.30.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.31.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.32.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.33.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.34.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.35.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.36.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.37.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.38.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.39.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.40.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.41.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.42.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.43.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.44.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.45.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.46.w3.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.47.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.16.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.17.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.18.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.19.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.20.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.21.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.22.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.23.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.24.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.25.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.26.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.27.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.28.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.29.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.30.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.31.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.32.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.33.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.34.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.35.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.36.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.37.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.38.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.39.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.40.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.41.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.42.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.43.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.44.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.45.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.46.w3.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.47.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.16.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.17.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.18.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.19.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.20.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.21.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.22.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.23.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.24.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.25.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.26.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.27.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.28.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.29.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.30.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.31.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.32.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.33.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.34.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.35.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.36.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.37.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.38.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.39.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.40.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.41.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.42.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.43.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.44.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.45.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.46.w3.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.47.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.8.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.9.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.10.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.11.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.12.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.13.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.14.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.15.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.16.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.17.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.18.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.19.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.20.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.21.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.22.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.23.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.24.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.25.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.26.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.27.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.28.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.29.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.30.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.31.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.32.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.33.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.34.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.35.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.36.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.37.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.38.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.39.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.40.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.41.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.42.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.43.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.44.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.45.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.46.w3.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.47.w3.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.16.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.17.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.18.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.19.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.20.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.21.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.22.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.23.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.24.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.25.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.26.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.27.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.28.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.29.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.30.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.31.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.32.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.33.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.34.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.35.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.36.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.37.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.38.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.39.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.40.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.41.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.42.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.43.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.44.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.45.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.46.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.47.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.16.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.17.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.18.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.19.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.20.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.21.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.22.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.23.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.24.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.25.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.26.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.27.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.28.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.29.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.30.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.31.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.32.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.33.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.34.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.35.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.36.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.37.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.38.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.39.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.40.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.41.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.42.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.43.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.44.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.45.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.46.w2.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.47.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.16.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.17.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.18.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.19.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.20.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.21.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.22.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.23.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.24.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.25.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.26.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.27.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.28.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.29.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.30.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.31.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.32.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.33.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.34.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.35.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.36.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.37.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.38.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.39.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.40.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.41.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.42.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.43.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.44.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.45.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.46.w2.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.47.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.16.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.17.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.18.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.19.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.20.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.21.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.22.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.23.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.24.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.25.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.26.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.27.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.28.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.29.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.30.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.31.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.32.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.33.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.34.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.35.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.36.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.37.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.38.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.39.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.40.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.41.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.42.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.43.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.44.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.45.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.46.w2.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.47.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.16.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.17.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.18.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.19.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.20.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.21.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.22.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.23.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.24.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.25.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.26.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.27.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.28.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.29.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.30.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.31.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.32.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.33.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.34.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.35.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.36.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.37.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.38.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.39.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.40.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.41.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.42.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.43.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.44.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.45.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.46.w2.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.47.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.8.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.9.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.10.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.11.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.12.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.13.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.14.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.15.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.16.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.17.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.18.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.19.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.20.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.21.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.22.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.23.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.24.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.25.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.26.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.27.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.28.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.29.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.30.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.31.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.32.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.33.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.34.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.35.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.36.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.37.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.38.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.39.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.40.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.41.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.42.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.43.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.44.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.45.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.46.w2.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.47.w2.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.16.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.17.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.18.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.19.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.20.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.21.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.22.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.23.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.24.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.25.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.26.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.27.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.28.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.29.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.30.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.31.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.32.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.33.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.34.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.35.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.36.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.37.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.38.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.39.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.40.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.41.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.42.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.43.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.44.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.45.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.46.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.experts.47.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.16.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.17.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.18.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.19.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.20.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.21.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.22.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.23.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.24.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.25.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.26.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.27.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.28.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.29.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.30.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.31.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.32.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.33.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.34.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.35.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.36.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.37.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.38.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.39.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.40.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.41.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.42.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.43.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.44.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.45.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.46.w1.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.experts.47.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.16.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.17.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.18.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.19.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.20.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.21.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.22.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.23.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.24.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.25.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.26.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.27.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.28.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.29.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.30.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.31.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.32.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.33.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.34.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.35.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.36.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.37.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.38.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.39.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.40.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.41.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.42.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.43.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.44.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.45.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.46.w1.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.experts.47.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.16.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.17.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.18.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.19.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.20.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.21.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.22.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.23.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.24.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.25.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.26.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.27.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.28.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.29.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.30.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.31.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.32.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.33.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.34.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.35.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.36.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.37.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.38.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.39.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.40.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.41.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.42.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.43.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.44.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.45.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.46.w1.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.experts.47.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.16.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.17.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.18.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.19.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.20.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.21.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.22.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.23.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.24.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.25.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.26.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.27.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.28.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.29.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.30.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.31.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.32.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.33.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.34.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.35.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.36.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.37.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.38.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.39.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.40.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.41.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.42.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.43.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.44.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.45.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.46.w1.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.experts.47.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.8.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.9.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.10.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.11.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.12.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.13.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.14.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.15.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.16.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.17.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.18.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.19.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.20.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.21.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.22.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.23.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.24.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.25.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.26.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.27.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.28.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.29.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.30.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.31.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.32.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.33.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.34.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.35.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.36.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.37.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.38.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.39.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.40.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.41.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.42.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.43.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.44.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.45.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.46.w1.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.experts.47.w1.weight": "model-00001-of-00001.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00001.safetensors", "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.1.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.2.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.3.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.4.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors", "model.layers.5.block_sparse_moe.gate.weight": "model-00001-of-00001.safetensors"}}
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "bos_token": "<s>",
31
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": false,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": "<s>",
37
+ "padding_side": "left",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }