Jon commited on
Commit
eb03016
1 Parent(s): be7ebfc

Upload 8 files

Browse files
README.md CHANGED
@@ -1,3 +1,34 @@
1
  ---
2
- license: mit
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ library_name: peft
3
  ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: QuantizationMethod.BITS_AND_BYTES
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: float16
18
+
19
+ The following `bitsandbytes` quantization config was used during training:
20
+ - quant_method: QuantizationMethod.BITS_AND_BYTES
21
+ - load_in_8bit: False
22
+ - load_in_4bit: True
23
+ - llm_int8_threshold: 6.0
24
+ - llm_int8_skip_modules: None
25
+ - llm_int8_enable_fp32_cpu_offload: False
26
+ - llm_int8_has_fp16_weight: False
27
+ - bnb_4bit_quant_type: nf4
28
+ - bnb_4bit_use_double_quant: True
29
+ - bnb_4bit_compute_dtype: float16
30
+ ### Framework versions
31
+
32
+ - PEFT 0.5.0
33
+
34
+ - PEFT 0.5.0
adapter_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "models\\HuggingFaceH4_zephyr-7b-alpha",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 64,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 32,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "v_proj"
19
+ ],
20
+ "task_type": "CAUSAL_LM"
21
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53e37f76541a681d7adc309759a6fec09199cefa4e57e670fb85c99c07e72fba
3
+ size 54572362
training_graph.json ADDED
@@ -0,0 +1,1268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "current_steps": 0,
4
+ "loss": 3.6926,
5
+ "learning_rate": 0.0,
6
+ "epoch": 0.01
7
+ },
8
+ {
9
+ "current_steps": 1,
10
+ "loss": 3.0107,
11
+ "learning_rate": 0.0,
12
+ "epoch": 0.03
13
+ },
14
+ {
15
+ "current_steps": 2,
16
+ "loss": 3.0688,
17
+ "learning_rate": 0.0,
18
+ "epoch": 0.04
19
+ },
20
+ {
21
+ "current_steps": 3,
22
+ "loss": 4.0176,
23
+ "learning_rate": 0.0,
24
+ "epoch": 0.06
25
+ },
26
+ {
27
+ "current_steps": 4,
28
+ "loss": 4.0609,
29
+ "learning_rate": 0.0,
30
+ "epoch": 0.07
31
+ },
32
+ {
33
+ "current_steps": 5,
34
+ "loss": 3.754,
35
+ "learning_rate": 0.0,
36
+ "epoch": 0.09
37
+ },
38
+ {
39
+ "current_steps": 6,
40
+ "loss": 3.1981,
41
+ "learning_rate": 0.0,
42
+ "epoch": 0.1
43
+ },
44
+ {
45
+ "current_steps": 7,
46
+ "loss": 2.7138,
47
+ "learning_rate": 0.0,
48
+ "epoch": 0.11
49
+ },
50
+ {
51
+ "current_steps": 8,
52
+ "loss": 3.8803,
53
+ "learning_rate": 0.0,
54
+ "epoch": 0.13
55
+ },
56
+ {
57
+ "current_steps": 9,
58
+ "loss": 3.5793,
59
+ "learning_rate": 2.9999999999999997e-06,
60
+ "epoch": 0.14
61
+ },
62
+ {
63
+ "current_steps": 10,
64
+ "loss": 2.8589,
65
+ "learning_rate": 5.999999999999999e-06,
66
+ "epoch": 0.16
67
+ },
68
+ {
69
+ "current_steps": 11,
70
+ "loss": 2.6536,
71
+ "learning_rate": 8.999999999999999e-06,
72
+ "epoch": 0.17
73
+ },
74
+ {
75
+ "current_steps": 12,
76
+ "loss": 4.1845,
77
+ "learning_rate": 1.1999999999999999e-05,
78
+ "epoch": 0.19
79
+ },
80
+ {
81
+ "current_steps": 13,
82
+ "loss": 3.7447,
83
+ "learning_rate": 1.4999999999999999e-05,
84
+ "epoch": 0.2
85
+ },
86
+ {
87
+ "current_steps": 14,
88
+ "loss": 2.9558,
89
+ "learning_rate": 1.7999999999999997e-05,
90
+ "epoch": 0.21
91
+ },
92
+ {
93
+ "current_steps": 15,
94
+ "loss": 3.7552,
95
+ "learning_rate": 2.1e-05,
96
+ "epoch": 0.23
97
+ },
98
+ {
99
+ "current_steps": 16,
100
+ "loss": 3.7336,
101
+ "learning_rate": 2.3999999999999997e-05,
102
+ "epoch": 0.24
103
+ },
104
+ {
105
+ "current_steps": 17,
106
+ "loss": 2.484,
107
+ "learning_rate": 2.6999999999999996e-05,
108
+ "epoch": 0.26
109
+ },
110
+ {
111
+ "current_steps": 18,
112
+ "loss": 3.7203,
113
+ "learning_rate": 2.9999999999999997e-05,
114
+ "epoch": 0.27
115
+ },
116
+ {
117
+ "current_steps": 19,
118
+ "loss": 3.6129,
119
+ "learning_rate": 2.9999999999999997e-05,
120
+ "epoch": 0.29
121
+ },
122
+ {
123
+ "current_steps": 20,
124
+ "loss": 2.2497,
125
+ "learning_rate": 3.2999999999999996e-05,
126
+ "epoch": 0.3
127
+ },
128
+ {
129
+ "current_steps": 21,
130
+ "loss": 3.2732,
131
+ "learning_rate": 3.5999999999999994e-05,
132
+ "epoch": 0.31
133
+ },
134
+ {
135
+ "current_steps": 22,
136
+ "loss": 2.4979,
137
+ "learning_rate": 3.9e-05,
138
+ "epoch": 0.33
139
+ },
140
+ {
141
+ "current_steps": 23,
142
+ "loss": 3.043,
143
+ "learning_rate": 4.2e-05,
144
+ "epoch": 0.34
145
+ },
146
+ {
147
+ "current_steps": 24,
148
+ "loss": 3.2435,
149
+ "learning_rate": 4.4999999999999996e-05,
150
+ "epoch": 0.36
151
+ },
152
+ {
153
+ "current_steps": 25,
154
+ "loss": 3.3493,
155
+ "learning_rate": 4.7999999999999994e-05,
156
+ "epoch": 0.37
157
+ },
158
+ {
159
+ "current_steps": 26,
160
+ "loss": 2.6339,
161
+ "learning_rate": 5.1e-05,
162
+ "epoch": 0.39
163
+ },
164
+ {
165
+ "current_steps": 27,
166
+ "loss": 2.9486,
167
+ "learning_rate": 5.399999999999999e-05,
168
+ "epoch": 0.4
169
+ },
170
+ {
171
+ "current_steps": 28,
172
+ "loss": 2.3865,
173
+ "learning_rate": 5.6999999999999996e-05,
174
+ "epoch": 0.41
175
+ },
176
+ {
177
+ "current_steps": 29,
178
+ "loss": 2.3687,
179
+ "learning_rate": 5.9999999999999995e-05,
180
+ "epoch": 0.43
181
+ },
182
+ {
183
+ "current_steps": 30,
184
+ "loss": 2.3417,
185
+ "learning_rate": 6.299999999999999e-05,
186
+ "epoch": 0.44
187
+ },
188
+ {
189
+ "current_steps": 31,
190
+ "loss": 2.5443,
191
+ "learning_rate": 6.599999999999999e-05,
192
+ "epoch": 0.46
193
+ },
194
+ {
195
+ "current_steps": 32,
196
+ "loss": 2.4717,
197
+ "learning_rate": 6.9e-05,
198
+ "epoch": 0.47
199
+ },
200
+ {
201
+ "current_steps": 33,
202
+ "loss": 2.2291,
203
+ "learning_rate": 7.199999999999999e-05,
204
+ "epoch": 0.49
205
+ },
206
+ {
207
+ "current_steps": 34,
208
+ "loss": 2.4815,
209
+ "learning_rate": 7.5e-05,
210
+ "epoch": 0.5
211
+ },
212
+ {
213
+ "current_steps": 35,
214
+ "loss": 2.4468,
215
+ "learning_rate": 7.8e-05,
216
+ "epoch": 0.51
217
+ },
218
+ {
219
+ "current_steps": 36,
220
+ "loss": 2.4246,
221
+ "learning_rate": 8.1e-05,
222
+ "epoch": 0.53
223
+ },
224
+ {
225
+ "current_steps": 37,
226
+ "loss": 2.3154,
227
+ "learning_rate": 8.4e-05,
228
+ "epoch": 0.54
229
+ },
230
+ {
231
+ "current_steps": 38,
232
+ "loss": 1.9939,
233
+ "learning_rate": 8.699999999999999e-05,
234
+ "epoch": 0.56
235
+ },
236
+ {
237
+ "current_steps": 39,
238
+ "loss": 1.8099,
239
+ "learning_rate": 8.999999999999999e-05,
240
+ "epoch": 0.57
241
+ },
242
+ {
243
+ "current_steps": 40,
244
+ "loss": 1.865,
245
+ "learning_rate": 9.3e-05,
246
+ "epoch": 0.59
247
+ },
248
+ {
249
+ "current_steps": 41,
250
+ "loss": 1.4322,
251
+ "learning_rate": 9.599999999999999e-05,
252
+ "epoch": 0.6
253
+ },
254
+ {
255
+ "current_steps": 42,
256
+ "loss": 2.3252,
257
+ "learning_rate": 9.9e-05,
258
+ "epoch": 0.61
259
+ },
260
+ {
261
+ "current_steps": 43,
262
+ "loss": 1.7521,
263
+ "learning_rate": 0.000102,
264
+ "epoch": 0.63
265
+ },
266
+ {
267
+ "current_steps": 44,
268
+ "loss": 2.145,
269
+ "learning_rate": 0.00010499999999999999,
270
+ "epoch": 0.64
271
+ },
272
+ {
273
+ "current_steps": 45,
274
+ "loss": 1.6641,
275
+ "learning_rate": 0.00010799999999999998,
276
+ "epoch": 0.66
277
+ },
278
+ {
279
+ "current_steps": 46,
280
+ "loss": 1.8938,
281
+ "learning_rate": 0.00011099999999999999,
282
+ "epoch": 0.67
283
+ },
284
+ {
285
+ "current_steps": 47,
286
+ "loss": 1.5194,
287
+ "learning_rate": 0.00011399999999999999,
288
+ "epoch": 0.69
289
+ },
290
+ {
291
+ "current_steps": 48,
292
+ "loss": 1.4478,
293
+ "learning_rate": 0.000117,
294
+ "epoch": 0.7
295
+ },
296
+ {
297
+ "current_steps": 49,
298
+ "loss": 1.9414,
299
+ "learning_rate": 0.00011999999999999999,
300
+ "epoch": 0.71
301
+ },
302
+ {
303
+ "current_steps": 50,
304
+ "loss": 1.5601,
305
+ "learning_rate": 0.00012299999999999998,
306
+ "epoch": 0.73
307
+ },
308
+ {
309
+ "current_steps": 51,
310
+ "loss": 1.9015,
311
+ "learning_rate": 0.00012599999999999997,
312
+ "epoch": 0.74
313
+ },
314
+ {
315
+ "current_steps": 52,
316
+ "loss": 1.7384,
317
+ "learning_rate": 0.000129,
318
+ "epoch": 0.76
319
+ },
320
+ {
321
+ "current_steps": 53,
322
+ "loss": 1.7282,
323
+ "learning_rate": 0.00013199999999999998,
324
+ "epoch": 0.77
325
+ },
326
+ {
327
+ "current_steps": 54,
328
+ "loss": 1.6877,
329
+ "learning_rate": 0.000135,
330
+ "epoch": 0.79
331
+ },
332
+ {
333
+ "current_steps": 55,
334
+ "loss": 1.0647,
335
+ "learning_rate": 0.000138,
336
+ "epoch": 0.8
337
+ },
338
+ {
339
+ "current_steps": 56,
340
+ "loss": 1.755,
341
+ "learning_rate": 0.00014099999999999998,
342
+ "epoch": 0.81
343
+ },
344
+ {
345
+ "current_steps": 57,
346
+ "loss": 1.516,
347
+ "learning_rate": 0.00014399999999999998,
348
+ "epoch": 0.83
349
+ },
350
+ {
351
+ "current_steps": 58,
352
+ "loss": 1.3498,
353
+ "learning_rate": 0.000147,
354
+ "epoch": 0.84
355
+ },
356
+ {
357
+ "current_steps": 59,
358
+ "loss": 1.6789,
359
+ "learning_rate": 0.00015,
360
+ "epoch": 0.86
361
+ },
362
+ {
363
+ "current_steps": 60,
364
+ "loss": 1.3651,
365
+ "learning_rate": 0.00015299999999999998,
366
+ "epoch": 0.87
367
+ },
368
+ {
369
+ "current_steps": 61,
370
+ "loss": 1.2967,
371
+ "learning_rate": 0.000156,
372
+ "epoch": 0.89
373
+ },
374
+ {
375
+ "current_steps": 62,
376
+ "loss": 1.3998,
377
+ "learning_rate": 0.000159,
378
+ "epoch": 0.9
379
+ },
380
+ {
381
+ "current_steps": 63,
382
+ "loss": 1.1989,
383
+ "learning_rate": 0.000162,
384
+ "epoch": 0.91
385
+ },
386
+ {
387
+ "current_steps": 64,
388
+ "loss": 1.029,
389
+ "learning_rate": 0.000165,
390
+ "epoch": 0.93
391
+ },
392
+ {
393
+ "current_steps": 65,
394
+ "loss": 1.4441,
395
+ "learning_rate": 0.000168,
396
+ "epoch": 0.94
397
+ },
398
+ {
399
+ "current_steps": 66,
400
+ "loss": 1.1804,
401
+ "learning_rate": 0.00017099999999999998,
402
+ "epoch": 0.96
403
+ },
404
+ {
405
+ "current_steps": 67,
406
+ "loss": 1.2868,
407
+ "learning_rate": 0.00017399999999999997,
408
+ "epoch": 0.97
409
+ },
410
+ {
411
+ "current_steps": 68,
412
+ "loss": 1.1724,
413
+ "learning_rate": 0.00017699999999999997,
414
+ "epoch": 0.99
415
+ },
416
+ {
417
+ "current_steps": 69,
418
+ "loss": 1.4346,
419
+ "learning_rate": 0.00017999999999999998,
420
+ "epoch": 1.0
421
+ },
422
+ {
423
+ "current_steps": 70,
424
+ "loss": 1.0183,
425
+ "learning_rate": 0.00018299999999999998,
426
+ "epoch": 1.01
427
+ },
428
+ {
429
+ "current_steps": 71,
430
+ "loss": 1.1259,
431
+ "learning_rate": 0.000186,
432
+ "epoch": 1.03
433
+ },
434
+ {
435
+ "current_steps": 72,
436
+ "loss": 1.1713,
437
+ "learning_rate": 0.00018899999999999999,
438
+ "epoch": 1.04
439
+ },
440
+ {
441
+ "current_steps": 73,
442
+ "loss": 1.0773,
443
+ "learning_rate": 0.00019199999999999998,
444
+ "epoch": 1.06
445
+ },
446
+ {
447
+ "current_steps": 74,
448
+ "loss": 1.2956,
449
+ "learning_rate": 0.000195,
450
+ "epoch": 1.07
451
+ },
452
+ {
453
+ "current_steps": 75,
454
+ "loss": 0.9359,
455
+ "learning_rate": 0.000198,
456
+ "epoch": 1.09
457
+ },
458
+ {
459
+ "current_steps": 76,
460
+ "loss": 0.9838,
461
+ "learning_rate": 0.000201,
462
+ "epoch": 1.1
463
+ },
464
+ {
465
+ "current_steps": 77,
466
+ "loss": 0.9488,
467
+ "learning_rate": 0.000204,
468
+ "epoch": 1.11
469
+ },
470
+ {
471
+ "current_steps": 78,
472
+ "loss": 1.0748,
473
+ "learning_rate": 0.00020699999999999996,
474
+ "epoch": 1.13
475
+ },
476
+ {
477
+ "current_steps": 79,
478
+ "loss": 1.0914,
479
+ "learning_rate": 0.00020999999999999998,
480
+ "epoch": 1.14
481
+ },
482
+ {
483
+ "current_steps": 80,
484
+ "loss": 0.5364,
485
+ "learning_rate": 0.00021299999999999997,
486
+ "epoch": 1.16
487
+ },
488
+ {
489
+ "current_steps": 81,
490
+ "loss": 0.4909,
491
+ "learning_rate": 0.00021599999999999996,
492
+ "epoch": 1.17
493
+ },
494
+ {
495
+ "current_steps": 82,
496
+ "loss": 1.0176,
497
+ "learning_rate": 0.00021899999999999998,
498
+ "epoch": 1.19
499
+ },
500
+ {
501
+ "current_steps": 83,
502
+ "loss": 1.0543,
503
+ "learning_rate": 0.00022199999999999998,
504
+ "epoch": 1.2
505
+ },
506
+ {
507
+ "current_steps": 84,
508
+ "loss": 0.9092,
509
+ "learning_rate": 0.000225,
510
+ "epoch": 1.21
511
+ },
512
+ {
513
+ "current_steps": 85,
514
+ "loss": 0.5678,
515
+ "learning_rate": 0.00022799999999999999,
516
+ "epoch": 1.23
517
+ },
518
+ {
519
+ "current_steps": 86,
520
+ "loss": 0.8385,
521
+ "learning_rate": 0.00023099999999999998,
522
+ "epoch": 1.24
523
+ },
524
+ {
525
+ "current_steps": 87,
526
+ "loss": 1.6241,
527
+ "learning_rate": 0.000234,
528
+ "epoch": 1.26
529
+ },
530
+ {
531
+ "current_steps": 88,
532
+ "loss": 0.8294,
533
+ "learning_rate": 0.000237,
534
+ "epoch": 1.27
535
+ },
536
+ {
537
+ "current_steps": 89,
538
+ "loss": 1.2953,
539
+ "learning_rate": 0.00023999999999999998,
540
+ "epoch": 1.29
541
+ },
542
+ {
543
+ "current_steps": 90,
544
+ "loss": 0.5297,
545
+ "learning_rate": 0.000243,
546
+ "epoch": 1.3
547
+ },
548
+ {
549
+ "current_steps": 91,
550
+ "loss": 1.1592,
551
+ "learning_rate": 0.00024599999999999996,
552
+ "epoch": 1.31
553
+ },
554
+ {
555
+ "current_steps": 92,
556
+ "loss": 1.1055,
557
+ "learning_rate": 0.000249,
558
+ "epoch": 1.33
559
+ },
560
+ {
561
+ "current_steps": 93,
562
+ "loss": 0.9804,
563
+ "learning_rate": 0.00025199999999999995,
564
+ "epoch": 1.34
565
+ },
566
+ {
567
+ "current_steps": 94,
568
+ "loss": 0.8683,
569
+ "learning_rate": 0.00025499999999999996,
570
+ "epoch": 1.36
571
+ },
572
+ {
573
+ "current_steps": 95,
574
+ "loss": 0.9698,
575
+ "learning_rate": 0.000258,
576
+ "epoch": 1.37
577
+ },
578
+ {
579
+ "current_steps": 96,
580
+ "loss": 1.037,
581
+ "learning_rate": 0.000261,
582
+ "epoch": 1.39
583
+ },
584
+ {
585
+ "current_steps": 97,
586
+ "loss": 0.5479,
587
+ "learning_rate": 0.00026399999999999997,
588
+ "epoch": 1.4
589
+ },
590
+ {
591
+ "current_steps": 98,
592
+ "loss": 0.598,
593
+ "learning_rate": 0.000267,
594
+ "epoch": 1.41
595
+ },
596
+ {
597
+ "current_steps": 99,
598
+ "loss": 0.3627,
599
+ "learning_rate": 0.00027,
600
+ "epoch": 1.43
601
+ },
602
+ {
603
+ "current_steps": 100,
604
+ "loss": 0.8042,
605
+ "learning_rate": 0.00027299999999999997,
606
+ "epoch": 1.44
607
+ },
608
+ {
609
+ "current_steps": 101,
610
+ "loss": 1.0378,
611
+ "learning_rate": 0.000276,
612
+ "epoch": 1.46
613
+ },
614
+ {
615
+ "current_steps": 102,
616
+ "loss": 1.0192,
617
+ "learning_rate": 0.000279,
618
+ "epoch": 1.47
619
+ },
620
+ {
621
+ "current_steps": 103,
622
+ "loss": 1.4654,
623
+ "learning_rate": 0.00028199999999999997,
624
+ "epoch": 1.49
625
+ },
626
+ {
627
+ "current_steps": 104,
628
+ "loss": 1.0614,
629
+ "learning_rate": 0.000285,
630
+ "epoch": 1.5
631
+ },
632
+ {
633
+ "current_steps": 105,
634
+ "loss": 0.779,
635
+ "learning_rate": 0.00028799999999999995,
636
+ "epoch": 1.51
637
+ },
638
+ {
639
+ "current_steps": 106,
640
+ "loss": 1.0255,
641
+ "learning_rate": 0.00029099999999999997,
642
+ "epoch": 1.53
643
+ },
644
+ {
645
+ "current_steps": 107,
646
+ "loss": 1.0924,
647
+ "learning_rate": 0.000294,
648
+ "epoch": 1.54
649
+ },
650
+ {
651
+ "current_steps": 108,
652
+ "loss": 1.1016,
653
+ "learning_rate": 0.00029699999999999996,
654
+ "epoch": 1.56
655
+ },
656
+ {
657
+ "current_steps": 109,
658
+ "loss": 0.8748,
659
+ "learning_rate": 0.0003,
660
+ "epoch": 1.57
661
+ },
662
+ {
663
+ "current_steps": 110,
664
+ "loss": 0.8048,
665
+ "learning_rate": 0.00029727272727272724,
666
+ "epoch": 1.59
667
+ },
668
+ {
669
+ "current_steps": 111,
670
+ "loss": 1.0229,
671
+ "learning_rate": 0.0002945454545454545,
672
+ "epoch": 1.6
673
+ },
674
+ {
675
+ "current_steps": 112,
676
+ "loss": 0.9677,
677
+ "learning_rate": 0.0002918181818181818,
678
+ "epoch": 1.61
679
+ },
680
+ {
681
+ "current_steps": 113,
682
+ "loss": 0.5605,
683
+ "learning_rate": 0.00028909090909090904,
684
+ "epoch": 1.63
685
+ },
686
+ {
687
+ "current_steps": 114,
688
+ "loss": 0.9392,
689
+ "learning_rate": 0.00028636363636363636,
690
+ "epoch": 1.64
691
+ },
692
+ {
693
+ "current_steps": 115,
694
+ "loss": 1.0068,
695
+ "learning_rate": 0.0002836363636363636,
696
+ "epoch": 1.66
697
+ },
698
+ {
699
+ "current_steps": 116,
700
+ "loss": 0.898,
701
+ "learning_rate": 0.0002809090909090909,
702
+ "epoch": 1.67
703
+ },
704
+ {
705
+ "current_steps": 117,
706
+ "loss": 1.1297,
707
+ "learning_rate": 0.00027818181818181815,
708
+ "epoch": 1.69
709
+ },
710
+ {
711
+ "current_steps": 118,
712
+ "loss": 0.8696,
713
+ "learning_rate": 0.0002754545454545454,
714
+ "epoch": 1.7
715
+ },
716
+ {
717
+ "current_steps": 119,
718
+ "loss": 0.975,
719
+ "learning_rate": 0.0002727272727272727,
720
+ "epoch": 1.71
721
+ },
722
+ {
723
+ "current_steps": 120,
724
+ "loss": 1.107,
725
+ "learning_rate": 0.00027,
726
+ "epoch": 1.73
727
+ },
728
+ {
729
+ "current_steps": 121,
730
+ "loss": 1.1696,
731
+ "learning_rate": 0.0002672727272727272,
732
+ "epoch": 1.74
733
+ },
734
+ {
735
+ "current_steps": 122,
736
+ "loss": 0.7181,
737
+ "learning_rate": 0.00026454545454545453,
738
+ "epoch": 1.76
739
+ },
740
+ {
741
+ "current_steps": 123,
742
+ "loss": 0.4008,
743
+ "learning_rate": 0.0002618181818181818,
744
+ "epoch": 1.77
745
+ },
746
+ {
747
+ "current_steps": 124,
748
+ "loss": 0.6227,
749
+ "learning_rate": 0.00025909090909090907,
750
+ "epoch": 1.79
751
+ },
752
+ {
753
+ "current_steps": 125,
754
+ "loss": 1.4091,
755
+ "learning_rate": 0.00025636363636363633,
756
+ "epoch": 1.8
757
+ },
758
+ {
759
+ "current_steps": 126,
760
+ "loss": 0.7953,
761
+ "learning_rate": 0.0002536363636363636,
762
+ "epoch": 1.81
763
+ },
764
+ {
765
+ "current_steps": 127,
766
+ "loss": 1.1635,
767
+ "learning_rate": 0.00025090909090909086,
768
+ "epoch": 1.83
769
+ },
770
+ {
771
+ "current_steps": 128,
772
+ "loss": 0.6109,
773
+ "learning_rate": 0.0002481818181818182,
774
+ "epoch": 1.84
775
+ },
776
+ {
777
+ "current_steps": 129,
778
+ "loss": 0.7244,
779
+ "learning_rate": 0.00024545454545454545,
780
+ "epoch": 1.86
781
+ },
782
+ {
783
+ "current_steps": 130,
784
+ "loss": 1.0868,
785
+ "learning_rate": 0.0002427272727272727,
786
+ "epoch": 1.87
787
+ },
788
+ {
789
+ "current_steps": 131,
790
+ "loss": 1.1968,
791
+ "learning_rate": 0.00023999999999999998,
792
+ "epoch": 1.89
793
+ },
794
+ {
795
+ "current_steps": 132,
796
+ "loss": 1.109,
797
+ "learning_rate": 0.00023727272727272724,
798
+ "epoch": 1.9
799
+ },
800
+ {
801
+ "current_steps": 133,
802
+ "loss": 0.8744,
803
+ "learning_rate": 0.00023454545454545454,
804
+ "epoch": 1.91
805
+ },
806
+ {
807
+ "current_steps": 134,
808
+ "loss": 0.6971,
809
+ "learning_rate": 0.0002318181818181818,
810
+ "epoch": 1.93
811
+ },
812
+ {
813
+ "current_steps": 135,
814
+ "loss": 0.827,
815
+ "learning_rate": 0.00022909090909090907,
816
+ "epoch": 1.94
817
+ },
818
+ {
819
+ "current_steps": 136,
820
+ "loss": 0.8474,
821
+ "learning_rate": 0.00022636363636363633,
822
+ "epoch": 1.96
823
+ },
824
+ {
825
+ "current_steps": 137,
826
+ "loss": 0.8051,
827
+ "learning_rate": 0.00022363636363636363,
828
+ "epoch": 1.97
829
+ },
830
+ {
831
+ "current_steps": 138,
832
+ "loss": 0.8532,
833
+ "learning_rate": 0.0002209090909090909,
834
+ "epoch": 1.99
835
+ },
836
+ {
837
+ "current_steps": 139,
838
+ "loss": 0.8277,
839
+ "learning_rate": 0.00021818181818181816,
840
+ "epoch": 2.0
841
+ },
842
+ {
843
+ "current_steps": 140,
844
+ "loss": 0.3926,
845
+ "learning_rate": 0.00021545454545454542,
846
+ "epoch": 2.01
847
+ },
848
+ {
849
+ "current_steps": 141,
850
+ "loss": 0.5717,
851
+ "learning_rate": 0.00021272727272727272,
852
+ "epoch": 2.03
853
+ },
854
+ {
855
+ "current_steps": 142,
856
+ "loss": 0.6956,
857
+ "learning_rate": 0.00020999999999999998,
858
+ "epoch": 2.04
859
+ },
860
+ {
861
+ "current_steps": 143,
862
+ "loss": 0.6353,
863
+ "learning_rate": 0.00020727272727272725,
864
+ "epoch": 2.06
865
+ },
866
+ {
867
+ "current_steps": 144,
868
+ "loss": 0.4248,
869
+ "learning_rate": 0.0002045454545454545,
870
+ "epoch": 2.07
871
+ },
872
+ {
873
+ "current_steps": 145,
874
+ "loss": 0.6299,
875
+ "learning_rate": 0.0002018181818181818,
876
+ "epoch": 2.09
877
+ },
878
+ {
879
+ "current_steps": 146,
880
+ "loss": 0.3415,
881
+ "learning_rate": 0.0001990909090909091,
882
+ "epoch": 2.1
883
+ },
884
+ {
885
+ "current_steps": 147,
886
+ "loss": 0.4788,
887
+ "learning_rate": 0.00019636363636363634,
888
+ "epoch": 2.11
889
+ },
890
+ {
891
+ "current_steps": 148,
892
+ "loss": 0.6898,
893
+ "learning_rate": 0.00019363636363636363,
894
+ "epoch": 2.13
895
+ },
896
+ {
897
+ "current_steps": 149,
898
+ "loss": 0.4212,
899
+ "learning_rate": 0.0001909090909090909,
900
+ "epoch": 2.14
901
+ },
902
+ {
903
+ "current_steps": 150,
904
+ "loss": 0.6855,
905
+ "learning_rate": 0.0001881818181818182,
906
+ "epoch": 2.16
907
+ },
908
+ {
909
+ "current_steps": 151,
910
+ "loss": 0.4411,
911
+ "learning_rate": 0.00018545454545454543,
912
+ "epoch": 2.17
913
+ },
914
+ {
915
+ "current_steps": 152,
916
+ "loss": 0.4706,
917
+ "learning_rate": 0.00018272727272727272,
918
+ "epoch": 2.19
919
+ },
920
+ {
921
+ "current_steps": 153,
922
+ "loss": 0.6222,
923
+ "learning_rate": 0.00017999999999999998,
924
+ "epoch": 2.2
925
+ },
926
+ {
927
+ "current_steps": 154,
928
+ "loss": 0.6584,
929
+ "learning_rate": 0.00017727272727272728,
930
+ "epoch": 2.21
931
+ },
932
+ {
933
+ "current_steps": 155,
934
+ "loss": 0.6037,
935
+ "learning_rate": 0.00017454545454545452,
936
+ "epoch": 2.23
937
+ },
938
+ {
939
+ "current_steps": 156,
940
+ "loss": 0.6478,
941
+ "learning_rate": 0.0001718181818181818,
942
+ "epoch": 2.24
943
+ },
944
+ {
945
+ "current_steps": 157,
946
+ "loss": 0.4591,
947
+ "learning_rate": 0.00016909090909090907,
948
+ "epoch": 2.26
949
+ },
950
+ {
951
+ "current_steps": 158,
952
+ "loss": 0.4085,
953
+ "learning_rate": 0.00016636363636363637,
954
+ "epoch": 2.27
955
+ },
956
+ {
957
+ "current_steps": 159,
958
+ "loss": 0.6747,
959
+ "learning_rate": 0.0001636363636363636,
960
+ "epoch": 2.29
961
+ },
962
+ {
963
+ "current_steps": 160,
964
+ "loss": 0.4008,
965
+ "learning_rate": 0.0001609090909090909,
966
+ "epoch": 2.3
967
+ },
968
+ {
969
+ "current_steps": 161,
970
+ "loss": 0.671,
971
+ "learning_rate": 0.00015818181818181816,
972
+ "epoch": 2.31
973
+ },
974
+ {
975
+ "current_steps": 162,
976
+ "loss": 0.593,
977
+ "learning_rate": 0.00015545454545454546,
978
+ "epoch": 2.33
979
+ },
980
+ {
981
+ "current_steps": 163,
982
+ "loss": 0.4881,
983
+ "learning_rate": 0.0001527272727272727,
984
+ "epoch": 2.34
985
+ },
986
+ {
987
+ "current_steps": 164,
988
+ "loss": 0.6749,
989
+ "learning_rate": 0.00015,
990
+ "epoch": 2.36
991
+ },
992
+ {
993
+ "current_steps": 165,
994
+ "loss": 0.7904,
995
+ "learning_rate": 0.00014727272727272725,
996
+ "epoch": 2.37
997
+ },
998
+ {
999
+ "current_steps": 166,
1000
+ "loss": 0.7036,
1001
+ "learning_rate": 0.00014454545454545452,
1002
+ "epoch": 2.39
1003
+ },
1004
+ {
1005
+ "current_steps": 167,
1006
+ "loss": 0.5043,
1007
+ "learning_rate": 0.0001418181818181818,
1008
+ "epoch": 2.4
1009
+ },
1010
+ {
1011
+ "current_steps": 168,
1012
+ "loss": 0.7245,
1013
+ "learning_rate": 0.00013909090909090908,
1014
+ "epoch": 2.41
1015
+ },
1016
+ {
1017
+ "current_steps": 169,
1018
+ "loss": 0.7101,
1019
+ "learning_rate": 0.00013636363636363634,
1020
+ "epoch": 2.43
1021
+ },
1022
+ {
1023
+ "current_steps": 170,
1024
+ "loss": 0.3898,
1025
+ "learning_rate": 0.0001336363636363636,
1026
+ "epoch": 2.44
1027
+ },
1028
+ {
1029
+ "current_steps": 171,
1030
+ "loss": 0.4639,
1031
+ "learning_rate": 0.0001309090909090909,
1032
+ "epoch": 2.46
1033
+ },
1034
+ {
1035
+ "current_steps": 172,
1036
+ "loss": 0.686,
1037
+ "learning_rate": 0.00012818181818181817,
1038
+ "epoch": 2.47
1039
+ },
1040
+ {
1041
+ "current_steps": 173,
1042
+ "loss": 0.4952,
1043
+ "learning_rate": 0.00012545454545454543,
1044
+ "epoch": 2.49
1045
+ },
1046
+ {
1047
+ "current_steps": 174,
1048
+ "loss": 0.2727,
1049
+ "learning_rate": 0.00012272727272727272,
1050
+ "epoch": 2.5
1051
+ },
1052
+ {
1053
+ "current_steps": 175,
1054
+ "loss": 0.3428,
1055
+ "learning_rate": 0.00011999999999999999,
1056
+ "epoch": 2.51
1057
+ },
1058
+ {
1059
+ "current_steps": 176,
1060
+ "loss": 0.253,
1061
+ "learning_rate": 0.00011727272727272727,
1062
+ "epoch": 2.53
1063
+ },
1064
+ {
1065
+ "current_steps": 177,
1066
+ "loss": 0.5778,
1067
+ "learning_rate": 0.00011454545454545453,
1068
+ "epoch": 2.54
1069
+ },
1070
+ {
1071
+ "current_steps": 178,
1072
+ "loss": 0.639,
1073
+ "learning_rate": 0.00011181818181818181,
1074
+ "epoch": 2.56
1075
+ },
1076
+ {
1077
+ "current_steps": 179,
1078
+ "loss": 0.7327,
1079
+ "learning_rate": 0.00010909090909090908,
1080
+ "epoch": 2.57
1081
+ },
1082
+ {
1083
+ "current_steps": 180,
1084
+ "loss": 0.4956,
1085
+ "learning_rate": 0.00010636363636363636,
1086
+ "epoch": 2.59
1087
+ },
1088
+ {
1089
+ "current_steps": 181,
1090
+ "loss": 0.364,
1091
+ "learning_rate": 0.00010363636363636362,
1092
+ "epoch": 2.6
1093
+ },
1094
+ {
1095
+ "current_steps": 182,
1096
+ "loss": 0.4497,
1097
+ "learning_rate": 0.0001009090909090909,
1098
+ "epoch": 2.61
1099
+ },
1100
+ {
1101
+ "current_steps": 183,
1102
+ "loss": 0.7653,
1103
+ "learning_rate": 9.818181818181817e-05,
1104
+ "epoch": 2.63
1105
+ },
1106
+ {
1107
+ "current_steps": 184,
1108
+ "loss": 0.6586,
1109
+ "learning_rate": 9.545454545454545e-05,
1110
+ "epoch": 2.64
1111
+ },
1112
+ {
1113
+ "current_steps": 185,
1114
+ "loss": 0.4404,
1115
+ "learning_rate": 9.272727272727271e-05,
1116
+ "epoch": 2.66
1117
+ },
1118
+ {
1119
+ "current_steps": 186,
1120
+ "loss": 0.7484,
1121
+ "learning_rate": 8.999999999999999e-05,
1122
+ "epoch": 2.67
1123
+ },
1124
+ {
1125
+ "current_steps": 187,
1126
+ "loss": 0.6176,
1127
+ "learning_rate": 8.727272727272726e-05,
1128
+ "epoch": 2.69
1129
+ },
1130
+ {
1131
+ "current_steps": 188,
1132
+ "loss": 0.7404,
1133
+ "learning_rate": 8.454545454545454e-05,
1134
+ "epoch": 2.7
1135
+ },
1136
+ {
1137
+ "current_steps": 189,
1138
+ "loss": 0.8124,
1139
+ "learning_rate": 8.18181818181818e-05,
1140
+ "epoch": 2.71
1141
+ },
1142
+ {
1143
+ "current_steps": 190,
1144
+ "loss": 0.6525,
1145
+ "learning_rate": 7.909090909090908e-05,
1146
+ "epoch": 2.73
1147
+ },
1148
+ {
1149
+ "current_steps": 191,
1150
+ "loss": 0.5986,
1151
+ "learning_rate": 7.636363636363635e-05,
1152
+ "epoch": 2.74
1153
+ },
1154
+ {
1155
+ "current_steps": 192,
1156
+ "loss": 0.294,
1157
+ "learning_rate": 7.363636363636363e-05,
1158
+ "epoch": 2.76
1159
+ },
1160
+ {
1161
+ "current_steps": 193,
1162
+ "loss": 0.2973,
1163
+ "learning_rate": 7.09090909090909e-05,
1164
+ "epoch": 2.77
1165
+ },
1166
+ {
1167
+ "current_steps": 194,
1168
+ "loss": 0.3589,
1169
+ "learning_rate": 6.818181818181817e-05,
1170
+ "epoch": 2.79
1171
+ },
1172
+ {
1173
+ "current_steps": 195,
1174
+ "loss": 0.78,
1175
+ "learning_rate": 6.545454545454545e-05,
1176
+ "epoch": 2.8
1177
+ },
1178
+ {
1179
+ "current_steps": 196,
1180
+ "loss": 0.4056,
1181
+ "learning_rate": 6.272727272727272e-05,
1182
+ "epoch": 2.81
1183
+ },
1184
+ {
1185
+ "current_steps": 197,
1186
+ "loss": 0.5843,
1187
+ "learning_rate": 5.9999999999999995e-05,
1188
+ "epoch": 2.83
1189
+ },
1190
+ {
1191
+ "current_steps": 198,
1192
+ "loss": 0.5745,
1193
+ "learning_rate": 5.727272727272727e-05,
1194
+ "epoch": 2.84
1195
+ },
1196
+ {
1197
+ "current_steps": 199,
1198
+ "loss": 0.2763,
1199
+ "learning_rate": 5.454545454545454e-05,
1200
+ "epoch": 2.86
1201
+ },
1202
+ {
1203
+ "current_steps": 200,
1204
+ "loss": 0.5494,
1205
+ "learning_rate": 5.181818181818181e-05,
1206
+ "epoch": 2.87
1207
+ },
1208
+ {
1209
+ "current_steps": 201,
1210
+ "loss": 0.4962,
1211
+ "learning_rate": 4.9090909090909084e-05,
1212
+ "epoch": 2.89
1213
+ },
1214
+ {
1215
+ "current_steps": 202,
1216
+ "loss": 0.5864,
1217
+ "learning_rate": 4.6363636363636356e-05,
1218
+ "epoch": 2.9
1219
+ },
1220
+ {
1221
+ "current_steps": 203,
1222
+ "loss": 0.3165,
1223
+ "learning_rate": 4.363636363636363e-05,
1224
+ "epoch": 2.91
1225
+ },
1226
+ {
1227
+ "current_steps": 204,
1228
+ "loss": 0.5969,
1229
+ "learning_rate": 4.09090909090909e-05,
1230
+ "epoch": 2.93
1231
+ },
1232
+ {
1233
+ "current_steps": 205,
1234
+ "loss": 0.3003,
1235
+ "learning_rate": 3.8181818181818174e-05,
1236
+ "epoch": 2.94
1237
+ },
1238
+ {
1239
+ "current_steps": 206,
1240
+ "loss": 0.6398,
1241
+ "learning_rate": 3.545454545454545e-05,
1242
+ "epoch": 2.96
1243
+ },
1244
+ {
1245
+ "current_steps": 207,
1246
+ "loss": 0.604,
1247
+ "learning_rate": 3.2727272727272725e-05,
1248
+ "epoch": 2.97
1249
+ },
1250
+ {
1251
+ "current_steps": 208,
1252
+ "loss": 0.6389,
1253
+ "learning_rate": 2.9999999999999997e-05,
1254
+ "epoch": 2.99
1255
+ },
1256
+ {
1257
+ "current_steps": 209,
1258
+ "loss": 0.5865,
1259
+ "learning_rate": 2.727272727272727e-05,
1260
+ "epoch": 3.0
1261
+ },
1262
+ {
1263
+ "current_steps": 209,
1264
+ "loss": 0.5865,
1265
+ "learning_rate": 2.727272727272727e-05,
1266
+ "epoch": 3.0
1267
+ }
1268
+ ]
training_graph.png ADDED
training_log.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name": "HuggingFaceH4_zephyr-7b-alpha",
3
+ "base_model_class": "MistralForCausalLM",
4
+ "base_loaded_in_4bit": true,
5
+ "base_loaded_in_8bit": false,
6
+ "projections": "q, v",
7
+ "loss": 0.5865,
8
+ "learning_rate": 2.727272727272727e-05,
9
+ "epoch": 3.0,
10
+ "current_steps": 209,
11
+ "current_steps_adjusted": 209,
12
+ "epoch_adjusted": 3.0,
13
+ "train_runtime": 152.7978,
14
+ "train_samples_per_second": 5.478,
15
+ "train_steps_per_second": 1.374,
16
+ "total_flos": 9159217237721088.0,
17
+ "train_loss": 1.2863307027589708
18
+ }
training_parameters.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lora_name": "zephyr7b_prompts",
3
+ "always_override": false,
4
+ "save_steps": 0.0,
5
+ "micro_batch_size": 4,
6
+ "batch_size": 0,
7
+ "epochs": 3.0,
8
+ "learning_rate": "3e-4",
9
+ "lr_scheduler_type": "linear",
10
+ "lora_rank": 32,
11
+ "lora_alpha": 64,
12
+ "lora_dropout": 0.05,
13
+ "cutoff_len": 256,
14
+ "dataset": "None",
15
+ "eval_dataset": "None",
16
+ "format": "None",
17
+ "eval_steps": 100.0,
18
+ "raw_text_file": "singles",
19
+ "higher_rank_limit": false,
20
+ "warmup_steps": 100.0,
21
+ "optimizer": "adamw_torch",
22
+ "hard_cut_string": "end;\\n",
23
+ "train_only_after": "",
24
+ "stop_at_loss": 0,
25
+ "add_eos_token": false,
26
+ "min_chars": 0.0,
27
+ "report_to": "None",
28
+ "precize_slicing_overlap": true,
29
+ "add_eos_token_type": "Every Block",
30
+ "save_steps_under_loss": 1.8,
31
+ "add_bos_token": true,
32
+ "training_projection": "q-v",
33
+ "sliding_window": false,
34
+ "warmup_ratio": 0,
35
+ "grad_accumulation": 1,
36
+ "neft_noise_alpha": 0
37
+ }
training_prompt.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "template_type": "raw_text"
3
+ }