alxxtexxr commited on
Commit
fd5fb6b
1 Parent(s): 00378b4

Upload folder using huggingface_hub

Browse files
checkpoint-204/adapter_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "codellama/CodeLlama-7b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 8,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "v_proj"
17
+ ],
18
+ "task_type": "CAUSAL_LM"
19
+ }
checkpoint-204/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5db6c6101e3916d64df355d7b92036adbb14f712a70dfbcdf08a4bffa0768644
3
+ size 16822989
checkpoint-204/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f15f95bd1858b53f75104113ed497be1f95cd20710598ca1bad6eb8af3e656bc
3
+ size 33661637
checkpoint-204/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32322ff7ebd033080c5cd396128d5a4b5a7657285728dfd5012d1aeddfa9d290
3
+ size 14575
checkpoint-204/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98ce55bb186dd3cba292bbd155ce9da7dfdbb9a5826e0c6f3c6a70af0fad921f
3
+ size 627
checkpoint-204/trainer_state.json ADDED
@@ -0,0 +1,549 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.13464748859405518,
3
+ "best_model_checkpoint": "./lora-out/checkpoint-204",
4
+ "epoch": 14.442477876106194,
5
+ "eval_steps": 4,
6
+ "global_step": 204,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.28,
13
+ "eval_loss": 0.3006412982940674,
14
+ "eval_runtime": 54.7333,
15
+ "eval_samples_per_second": 0.914,
16
+ "eval_steps_per_second": 0.238,
17
+ "step": 4
18
+ },
19
+ {
20
+ "epoch": 0.57,
21
+ "eval_loss": 0.300335168838501,
22
+ "eval_runtime": 54.8113,
23
+ "eval_samples_per_second": 0.912,
24
+ "eval_steps_per_second": 0.237,
25
+ "step": 8
26
+ },
27
+ {
28
+ "epoch": 0.71,
29
+ "learning_rate": 1e-05,
30
+ "loss": 0.3024,
31
+ "step": 10
32
+ },
33
+ {
34
+ "epoch": 0.85,
35
+ "eval_loss": 0.2993900179862976,
36
+ "eval_runtime": 54.782,
37
+ "eval_samples_per_second": 0.913,
38
+ "eval_steps_per_second": 0.237,
39
+ "step": 12
40
+ },
41
+ {
42
+ "epoch": 1.13,
43
+ "eval_loss": 0.29816487431526184,
44
+ "eval_runtime": 54.8049,
45
+ "eval_samples_per_second": 0.912,
46
+ "eval_steps_per_second": 0.237,
47
+ "step": 16
48
+ },
49
+ {
50
+ "epoch": 1.42,
51
+ "learning_rate": 2e-05,
52
+ "loss": 0.3035,
53
+ "step": 20
54
+ },
55
+ {
56
+ "epoch": 1.42,
57
+ "eval_loss": 0.29595255851745605,
58
+ "eval_runtime": 54.7885,
59
+ "eval_samples_per_second": 0.913,
60
+ "eval_steps_per_second": 0.237,
61
+ "step": 20
62
+ },
63
+ {
64
+ "epoch": 1.7,
65
+ "eval_loss": 0.2939557135105133,
66
+ "eval_runtime": 54.7999,
67
+ "eval_samples_per_second": 0.912,
68
+ "eval_steps_per_second": 0.237,
69
+ "step": 24
70
+ },
71
+ {
72
+ "epoch": 1.98,
73
+ "eval_loss": 0.29013773798942566,
74
+ "eval_runtime": 54.7805,
75
+ "eval_samples_per_second": 0.913,
76
+ "eval_steps_per_second": 0.237,
77
+ "step": 28
78
+ },
79
+ {
80
+ "epoch": 2.12,
81
+ "learning_rate": 3e-05,
82
+ "loss": 0.2959,
83
+ "step": 30
84
+ },
85
+ {
86
+ "epoch": 2.27,
87
+ "eval_loss": 0.28251081705093384,
88
+ "eval_runtime": 54.7706,
89
+ "eval_samples_per_second": 0.913,
90
+ "eval_steps_per_second": 0.237,
91
+ "step": 32
92
+ },
93
+ {
94
+ "epoch": 2.55,
95
+ "eval_loss": 0.2771329879760742,
96
+ "eval_runtime": 54.7818,
97
+ "eval_samples_per_second": 0.913,
98
+ "eval_steps_per_second": 0.237,
99
+ "step": 36
100
+ },
101
+ {
102
+ "epoch": 2.83,
103
+ "learning_rate": 4e-05,
104
+ "loss": 0.284,
105
+ "step": 40
106
+ },
107
+ {
108
+ "epoch": 2.83,
109
+ "eval_loss": 0.27146488428115845,
110
+ "eval_runtime": 54.803,
111
+ "eval_samples_per_second": 0.912,
112
+ "eval_steps_per_second": 0.237,
113
+ "step": 40
114
+ },
115
+ {
116
+ "epoch": 3.12,
117
+ "eval_loss": 0.26464152336120605,
118
+ "eval_runtime": 54.8467,
119
+ "eval_samples_per_second": 0.912,
120
+ "eval_steps_per_second": 0.237,
121
+ "step": 44
122
+ },
123
+ {
124
+ "epoch": 3.4,
125
+ "eval_loss": 0.25653430819511414,
126
+ "eval_runtime": 54.8327,
127
+ "eval_samples_per_second": 0.912,
128
+ "eval_steps_per_second": 0.237,
129
+ "step": 48
130
+ },
131
+ {
132
+ "epoch": 3.54,
133
+ "learning_rate": 5e-05,
134
+ "loss": 0.263,
135
+ "step": 50
136
+ },
137
+ {
138
+ "epoch": 3.68,
139
+ "eval_loss": 0.24627122282981873,
140
+ "eval_runtime": 54.813,
141
+ "eval_samples_per_second": 0.912,
142
+ "eval_steps_per_second": 0.237,
143
+ "step": 52
144
+ },
145
+ {
146
+ "epoch": 3.96,
147
+ "eval_loss": 0.23474617302417755,
148
+ "eval_runtime": 54.7901,
149
+ "eval_samples_per_second": 0.913,
150
+ "eval_steps_per_second": 0.237,
151
+ "step": 56
152
+ },
153
+ {
154
+ "epoch": 4.25,
155
+ "learning_rate": 6e-05,
156
+ "loss": 0.241,
157
+ "step": 60
158
+ },
159
+ {
160
+ "epoch": 4.25,
161
+ "eval_loss": 0.2220366895198822,
162
+ "eval_runtime": 54.7983,
163
+ "eval_samples_per_second": 0.912,
164
+ "eval_steps_per_second": 0.237,
165
+ "step": 60
166
+ },
167
+ {
168
+ "epoch": 4.53,
169
+ "eval_loss": 0.20926769077777863,
170
+ "eval_runtime": 54.7403,
171
+ "eval_samples_per_second": 0.913,
172
+ "eval_steps_per_second": 0.237,
173
+ "step": 64
174
+ },
175
+ {
176
+ "epoch": 4.81,
177
+ "eval_loss": 0.19629451632499695,
178
+ "eval_runtime": 54.7525,
179
+ "eval_samples_per_second": 0.913,
180
+ "eval_steps_per_second": 0.237,
181
+ "step": 68
182
+ },
183
+ {
184
+ "epoch": 4.96,
185
+ "learning_rate": 7e-05,
186
+ "loss": 0.2101,
187
+ "step": 70
188
+ },
189
+ {
190
+ "epoch": 5.1,
191
+ "eval_loss": 0.18524658679962158,
192
+ "eval_runtime": 54.7303,
193
+ "eval_samples_per_second": 0.914,
194
+ "eval_steps_per_second": 0.238,
195
+ "step": 72
196
+ },
197
+ {
198
+ "epoch": 5.38,
199
+ "eval_loss": 0.17731742560863495,
200
+ "eval_runtime": 54.7552,
201
+ "eval_samples_per_second": 0.913,
202
+ "eval_steps_per_second": 0.237,
203
+ "step": 76
204
+ },
205
+ {
206
+ "epoch": 5.66,
207
+ "learning_rate": 8e-05,
208
+ "loss": 0.1788,
209
+ "step": 80
210
+ },
211
+ {
212
+ "epoch": 5.66,
213
+ "eval_loss": 0.16993452608585358,
214
+ "eval_runtime": 54.729,
215
+ "eval_samples_per_second": 0.914,
216
+ "eval_steps_per_second": 0.238,
217
+ "step": 80
218
+ },
219
+ {
220
+ "epoch": 5.95,
221
+ "eval_loss": 0.164781853556633,
222
+ "eval_runtime": 54.741,
223
+ "eval_samples_per_second": 0.913,
224
+ "eval_steps_per_second": 0.237,
225
+ "step": 84
226
+ },
227
+ {
228
+ "epoch": 6.23,
229
+ "eval_loss": 0.16103117167949677,
230
+ "eval_runtime": 54.7837,
231
+ "eval_samples_per_second": 0.913,
232
+ "eval_steps_per_second": 0.237,
233
+ "step": 88
234
+ },
235
+ {
236
+ "epoch": 6.37,
237
+ "learning_rate": 9e-05,
238
+ "loss": 0.1615,
239
+ "step": 90
240
+ },
241
+ {
242
+ "epoch": 6.51,
243
+ "eval_loss": 0.15781742334365845,
244
+ "eval_runtime": 54.7138,
245
+ "eval_samples_per_second": 0.914,
246
+ "eval_steps_per_second": 0.238,
247
+ "step": 92
248
+ },
249
+ {
250
+ "epoch": 6.8,
251
+ "eval_loss": 0.15516981482505798,
252
+ "eval_runtime": 54.7516,
253
+ "eval_samples_per_second": 0.913,
254
+ "eval_steps_per_second": 0.237,
255
+ "step": 96
256
+ },
257
+ {
258
+ "epoch": 7.08,
259
+ "learning_rate": 0.0001,
260
+ "loss": 0.1533,
261
+ "step": 100
262
+ },
263
+ {
264
+ "epoch": 7.08,
265
+ "eval_loss": 0.15261690318584442,
266
+ "eval_runtime": 54.6891,
267
+ "eval_samples_per_second": 0.914,
268
+ "eval_steps_per_second": 0.238,
269
+ "step": 100
270
+ },
271
+ {
272
+ "epoch": 7.36,
273
+ "eval_loss": 0.15066812932491302,
274
+ "eval_runtime": 54.6884,
275
+ "eval_samples_per_second": 0.914,
276
+ "eval_steps_per_second": 0.238,
277
+ "step": 104
278
+ },
279
+ {
280
+ "epoch": 7.65,
281
+ "eval_loss": 0.14893724024295807,
282
+ "eval_runtime": 54.6275,
283
+ "eval_samples_per_second": 0.915,
284
+ "eval_steps_per_second": 0.238,
285
+ "step": 108
286
+ },
287
+ {
288
+ "epoch": 7.79,
289
+ "learning_rate": 9.090909090909092e-05,
290
+ "loss": 0.1463,
291
+ "step": 110
292
+ },
293
+ {
294
+ "epoch": 7.93,
295
+ "eval_loss": 0.14742153882980347,
296
+ "eval_runtime": 54.6174,
297
+ "eval_samples_per_second": 0.915,
298
+ "eval_steps_per_second": 0.238,
299
+ "step": 112
300
+ },
301
+ {
302
+ "epoch": 8.21,
303
+ "eval_loss": 0.14575307071208954,
304
+ "eval_runtime": 54.6366,
305
+ "eval_samples_per_second": 0.915,
306
+ "eval_steps_per_second": 0.238,
307
+ "step": 116
308
+ },
309
+ {
310
+ "epoch": 8.5,
311
+ "learning_rate": 8.181818181818183e-05,
312
+ "loss": 0.1399,
313
+ "step": 120
314
+ },
315
+ {
316
+ "epoch": 8.5,
317
+ "eval_loss": 0.14450186491012573,
318
+ "eval_runtime": 54.6303,
319
+ "eval_samples_per_second": 0.915,
320
+ "eval_steps_per_second": 0.238,
321
+ "step": 120
322
+ },
323
+ {
324
+ "epoch": 8.78,
325
+ "eval_loss": 0.1431863009929657,
326
+ "eval_runtime": 54.6358,
327
+ "eval_samples_per_second": 0.915,
328
+ "eval_steps_per_second": 0.238,
329
+ "step": 124
330
+ },
331
+ {
332
+ "epoch": 9.06,
333
+ "eval_loss": 0.1424635797739029,
334
+ "eval_runtime": 54.6449,
335
+ "eval_samples_per_second": 0.915,
336
+ "eval_steps_per_second": 0.238,
337
+ "step": 128
338
+ },
339
+ {
340
+ "epoch": 9.2,
341
+ "learning_rate": 7.272727272727273e-05,
342
+ "loss": 0.1357,
343
+ "step": 130
344
+ },
345
+ {
346
+ "epoch": 9.35,
347
+ "eval_loss": 0.14175941050052643,
348
+ "eval_runtime": 54.6307,
349
+ "eval_samples_per_second": 0.915,
350
+ "eval_steps_per_second": 0.238,
351
+ "step": 132
352
+ },
353
+ {
354
+ "epoch": 9.63,
355
+ "eval_loss": 0.14105737209320068,
356
+ "eval_runtime": 54.6121,
357
+ "eval_samples_per_second": 0.916,
358
+ "eval_steps_per_second": 0.238,
359
+ "step": 136
360
+ },
361
+ {
362
+ "epoch": 9.91,
363
+ "learning_rate": 6.363636363636364e-05,
364
+ "loss": 0.1322,
365
+ "step": 140
366
+ },
367
+ {
368
+ "epoch": 9.91,
369
+ "eval_loss": 0.14027251303195953,
370
+ "eval_runtime": 54.6594,
371
+ "eval_samples_per_second": 0.915,
372
+ "eval_steps_per_second": 0.238,
373
+ "step": 140
374
+ },
375
+ {
376
+ "epoch": 10.19,
377
+ "eval_loss": 0.13963991403579712,
378
+ "eval_runtime": 54.6378,
379
+ "eval_samples_per_second": 0.915,
380
+ "eval_steps_per_second": 0.238,
381
+ "step": 144
382
+ },
383
+ {
384
+ "epoch": 10.48,
385
+ "eval_loss": 0.138994500041008,
386
+ "eval_runtime": 54.6115,
387
+ "eval_samples_per_second": 0.916,
388
+ "eval_steps_per_second": 0.238,
389
+ "step": 148
390
+ },
391
+ {
392
+ "epoch": 10.62,
393
+ "learning_rate": 5.4545454545454546e-05,
394
+ "loss": 0.1355,
395
+ "step": 150
396
+ },
397
+ {
398
+ "epoch": 10.76,
399
+ "eval_loss": 0.13857363164424896,
400
+ "eval_runtime": 54.6622,
401
+ "eval_samples_per_second": 0.915,
402
+ "eval_steps_per_second": 0.238,
403
+ "step": 152
404
+ },
405
+ {
406
+ "epoch": 11.04,
407
+ "eval_loss": 0.13809233903884888,
408
+ "eval_runtime": 54.7824,
409
+ "eval_samples_per_second": 0.913,
410
+ "eval_steps_per_second": 0.237,
411
+ "step": 156
412
+ },
413
+ {
414
+ "epoch": 11.33,
415
+ "learning_rate": 4.545454545454546e-05,
416
+ "loss": 0.1216,
417
+ "step": 160
418
+ },
419
+ {
420
+ "epoch": 11.33,
421
+ "eval_loss": 0.137764573097229,
422
+ "eval_runtime": 54.6049,
423
+ "eval_samples_per_second": 0.916,
424
+ "eval_steps_per_second": 0.238,
425
+ "step": 160
426
+ },
427
+ {
428
+ "epoch": 11.61,
429
+ "eval_loss": 0.1369408369064331,
430
+ "eval_runtime": 54.6188,
431
+ "eval_samples_per_second": 0.915,
432
+ "eval_steps_per_second": 0.238,
433
+ "step": 164
434
+ },
435
+ {
436
+ "epoch": 11.89,
437
+ "eval_loss": 0.13684938848018646,
438
+ "eval_runtime": 54.5949,
439
+ "eval_samples_per_second": 0.916,
440
+ "eval_steps_per_second": 0.238,
441
+ "step": 168
442
+ },
443
+ {
444
+ "epoch": 12.04,
445
+ "learning_rate": 3.6363636363636364e-05,
446
+ "loss": 0.1265,
447
+ "step": 170
448
+ },
449
+ {
450
+ "epoch": 12.18,
451
+ "eval_loss": 0.1366124004125595,
452
+ "eval_runtime": 54.5928,
453
+ "eval_samples_per_second": 0.916,
454
+ "eval_steps_per_second": 0.238,
455
+ "step": 172
456
+ },
457
+ {
458
+ "epoch": 12.46,
459
+ "eval_loss": 0.1361435353755951,
460
+ "eval_runtime": 54.6703,
461
+ "eval_samples_per_second": 0.915,
462
+ "eval_steps_per_second": 0.238,
463
+ "step": 176
464
+ },
465
+ {
466
+ "epoch": 12.74,
467
+ "learning_rate": 2.7272727272727273e-05,
468
+ "loss": 0.127,
469
+ "step": 180
470
+ },
471
+ {
472
+ "epoch": 12.74,
473
+ "eval_loss": 0.13553684949874878,
474
+ "eval_runtime": 54.6232,
475
+ "eval_samples_per_second": 0.915,
476
+ "eval_steps_per_second": 0.238,
477
+ "step": 180
478
+ },
479
+ {
480
+ "epoch": 13.03,
481
+ "eval_loss": 0.13531364500522614,
482
+ "eval_runtime": 54.7712,
483
+ "eval_samples_per_second": 0.913,
484
+ "eval_steps_per_second": 0.237,
485
+ "step": 184
486
+ },
487
+ {
488
+ "epoch": 13.31,
489
+ "eval_loss": 0.1353050172328949,
490
+ "eval_runtime": 54.8052,
491
+ "eval_samples_per_second": 0.912,
492
+ "eval_steps_per_second": 0.237,
493
+ "step": 188
494
+ },
495
+ {
496
+ "epoch": 13.45,
497
+ "learning_rate": 1.8181818181818182e-05,
498
+ "loss": 0.1233,
499
+ "step": 190
500
+ },
501
+ {
502
+ "epoch": 13.59,
503
+ "eval_loss": 0.1348796784877777,
504
+ "eval_runtime": 54.5607,
505
+ "eval_samples_per_second": 0.916,
506
+ "eval_steps_per_second": 0.238,
507
+ "step": 192
508
+ },
509
+ {
510
+ "epoch": 13.88,
511
+ "eval_loss": 0.13486500084400177,
512
+ "eval_runtime": 54.5618,
513
+ "eval_samples_per_second": 0.916,
514
+ "eval_steps_per_second": 0.238,
515
+ "step": 196
516
+ },
517
+ {
518
+ "epoch": 14.16,
519
+ "learning_rate": 9.090909090909091e-06,
520
+ "loss": 0.1189,
521
+ "step": 200
522
+ },
523
+ {
524
+ "epoch": 14.16,
525
+ "eval_loss": 0.13471217453479767,
526
+ "eval_runtime": 54.6065,
527
+ "eval_samples_per_second": 0.916,
528
+ "eval_steps_per_second": 0.238,
529
+ "step": 200
530
+ },
531
+ {
532
+ "epoch": 14.44,
533
+ "eval_loss": 0.13464748859405518,
534
+ "eval_runtime": 54.5803,
535
+ "eval_samples_per_second": 0.916,
536
+ "eval_steps_per_second": 0.238,
537
+ "step": 204
538
+ }
539
+ ],
540
+ "logging_steps": 10,
541
+ "max_steps": 210,
542
+ "num_input_tokens_seen": 0,
543
+ "num_train_epochs": 15,
544
+ "save_steps": 12,
545
+ "total_flos": 6.711268353280573e+17,
546
+ "train_batch_size": 4,
547
+ "trial_name": null,
548
+ "trial_params": null
549
+ }
checkpoint-204/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85310c54a4f279d40e8badbc8f6f7406b57e15fc0c79500de525827feedf5072
3
+ size 4219