AlekseyKorshuk commited on
Commit
79978f0
·
1 Parent(s): a091c3b

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/eminem")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3v5pn9j9/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Eminem's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2ap4xtdx) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2ap4xtdx/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/eminem")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1qrb0dkt/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Eminem's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3htsm9rh) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3htsm9rh/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -35,7 +35,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.9.2",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
35
  }
36
  },
37
  "torch_dtype": "float32",
38
+ "transformers_version": "4.10.0",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 3.107706069946289, "eval_runtime": 7.9801, "eval_samples_per_second": 76.941, "eval_steps_per_second": 9.649, "epoch": 4.0}
 
1
+ {"eval_loss": 3.2645678520202637, "eval_runtime": 27.753, "eval_samples_per_second": 22.16, "eval_steps_per_second": 2.774, "epoch": 4.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e1500e0c5d09f9119242eadb9231548efc64206fa75b843f4d6d55245d3a75c
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1378031fdf9482b5db27b2bc00af093181f9681f90ee767c0cf9b58c15f34e2d
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf629e600273d439719124086f2ca792b1da3378aa7bc6b89eb361f4fb8de37c
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71f6347398b9b70ec58ef5870588fabc343892795992a9cc55b88838578f9cb0
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12a33dfd99fed39956abb56e218118d2269b823d67e4fee20fa2964aa2e50ef5
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2634e3dc54e0fcf5b59c873d6c31c539bd50f6a8ee764ed20d3596bee9a7a6b
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59cffeee4aac7aed4e7c264e0a4a30bc1fdbbb06cda6c0f412900955a6345ab5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83f1ad5e80a1e13954ef55b306eeb22095173b0c8f2dc6ea02fadde78742155f
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c1c8a23997246a48d829f3cb1c862573344f689d6521be45104a8c4d9404f2e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d351be0cf1fb195e4b3d5eaeb5b8bdf81c2295b6997969fad1395e04446b38a1
3
  size 623
trainer_state.json CHANGED
@@ -1,1700 +1,570 @@
1
  {
2
- "best_metric": 3.107706069946289,
3
- "best_model_checkpoint": "output/eminem/checkpoint-1374",
4
- "epoch": 3.0,
5
- "global_step": 1374,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.01,
12
- "learning_rate": 0.00013715876234566868,
13
- "loss": 4.4386,
14
  "step": 5
15
  },
16
  {
17
  "epoch": 0.02,
18
- "learning_rate": 0.00013703509896122095,
19
- "loss": 4.1948,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 0.03,
24
- "learning_rate": 0.00013682915852268886,
25
- "loss": 4.0957,
26
  "step": 15
27
  },
28
  {
29
  "epoch": 0.04,
30
- "learning_rate": 0.00013654118862484264,
31
- "loss": 3.9438,
32
  "step": 20
33
  },
34
  {
35
- "epoch": 0.06,
36
- "learning_rate": 0.00013617153548351626,
37
- "loss": 4.058,
38
  "step": 25
39
  },
40
  {
41
  "epoch": 0.07,
42
- "learning_rate": 0.00013572064351936462,
43
- "loss": 4.0441,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 0.08,
48
- "learning_rate": 0.00013518905482355273,
49
- "loss": 3.872,
50
  "step": 35
51
  },
52
  {
53
  "epoch": 0.09,
54
- "learning_rate": 0.00013457740850601892,
55
- "loss": 4.2427,
56
  "step": 40
57
  },
58
  {
59
  "epoch": 0.1,
60
- "learning_rate": 0.00013388643992709594,
61
- "loss": 4.0466,
62
  "step": 45
63
  },
64
  {
65
  "epoch": 0.11,
66
- "learning_rate": 0.0001331169798134139,
67
- "loss": 3.9197,
68
  "step": 50
69
  },
70
  {
71
  "epoch": 0.12,
72
- "learning_rate": 0.00013226995325914744,
73
- "loss": 4.0498,
74
  "step": 55
75
  },
76
  {
77
  "epoch": 0.13,
78
- "learning_rate": 0.00013134637861380834,
79
- "loss": 3.9624,
80
  "step": 60
81
  },
82
  {
83
  "epoch": 0.14,
84
- "learning_rate": 0.0001303473662579206,
85
- "loss": 3.8491,
86
  "step": 65
87
  },
88
  {
89
  "epoch": 0.15,
90
- "learning_rate": 0.00012927411726804995,
91
- "loss": 3.8877,
92
  "step": 70
93
  },
94
  {
95
- "epoch": 0.17,
96
- "learning_rate": 0.00012812792197279278,
97
- "loss": 3.9778,
98
  "step": 75
99
  },
100
  {
101
- "epoch": 0.18,
102
- "learning_rate": 0.00012691015840146053,
103
- "loss": 3.9312,
104
  "step": 80
105
  },
106
  {
107
  "epoch": 0.19,
108
- "learning_rate": 0.00012562229062732468,
109
- "loss": 3.8452,
110
  "step": 85
111
  },
112
  {
113
  "epoch": 0.2,
114
- "learning_rate": 0.00012426586700741422,
115
- "loss": 3.9473,
116
  "step": 90
117
  },
118
  {
119
  "epoch": 0.21,
120
- "learning_rate": 0.00012284251832098172,
121
- "loss": 4.0293,
122
  "step": 95
123
  },
124
  {
125
  "epoch": 0.22,
126
- "learning_rate": 0.00012135395580887633,
127
- "loss": 3.8128,
128
  "step": 100
129
  },
130
  {
131
  "epoch": 0.23,
132
- "learning_rate": 0.00011980196911618039,
133
- "loss": 4.0833,
134
  "step": 105
135
  },
136
  {
137
  "epoch": 0.24,
138
- "learning_rate": 0.0001181884241405837,
139
- "loss": 3.7779,
140
  "step": 110
141
  },
142
  {
143
  "epoch": 0.25,
144
- "learning_rate": 0.00011651526078908192,
145
- "loss": 3.8247,
146
  "step": 115
147
  },
148
  {
149
  "epoch": 0.26,
150
- "learning_rate": 0.00011478449064569633,
151
- "loss": 3.9986,
152
  "step": 120
153
  },
154
  {
155
- "epoch": 0.28,
156
- "learning_rate": 0.00011299819455301873,
157
- "loss": 4.0396,
158
  "step": 125
159
  },
160
  {
161
- "epoch": 0.29,
162
- "learning_rate": 0.0001111585201104895,
163
- "loss": 3.8924,
164
  "step": 130
165
  },
166
  {
167
- "epoch": 0.3,
168
- "learning_rate": 0.000109267679092416,
169
- "loss": 3.8117,
170
  "step": 135
171
  },
172
  {
173
  "epoch": 0.31,
174
- "learning_rate": 0.00010732794478883606,
175
- "loss": 3.9015,
176
  "step": 140
177
  },
178
  {
179
  "epoch": 0.32,
180
- "learning_rate": 0.00010534164927242335,
181
- "loss": 3.7786,
182
  "step": 145
183
  },
184
  {
185
  "epoch": 0.33,
186
- "learning_rate": 0.0001033111805947203,
187
- "loss": 4.0211,
188
  "step": 150
189
  },
190
  {
191
  "epoch": 0.34,
192
- "learning_rate": 0.00010123897991506982,
193
- "loss": 3.8728,
194
  "step": 155
195
  },
196
  {
197
  "epoch": 0.35,
198
- "learning_rate": 9.912753856569734e-05,
199
- "loss": 3.9266,
200
  "step": 160
201
  },
202
  {
203
  "epoch": 0.36,
204
- "learning_rate": 9.697939505647188e-05,
205
- "loss": 3.9463,
206
  "step": 165
207
  },
208
  {
209
- "epoch": 0.38,
210
- "learning_rate": 9.479713202294696e-05,
211
- "loss": 3.7152,
212
  "step": 170
213
  },
214
  {
215
- "epoch": 0.39,
216
- "learning_rate": 9.258337312135107e-05,
217
- "loss": 3.9496,
218
  "step": 175
219
  },
220
  {
221
- "epoch": 0.4,
222
- "learning_rate": 9.034077987426021e-05,
223
- "loss": 3.7439,
224
  "step": 180
225
  },
226
  {
227
- "epoch": 0.41,
228
- "learning_rate": 8.807204847074523e-05,
229
- "loss": 3.7879,
230
  "step": 185
231
  },
232
  {
233
- "epoch": 0.42,
234
- "learning_rate": 8.577990652484077e-05,
235
- "loss": 3.7768,
236
  "step": 190
237
  },
238
  {
239
  "epoch": 0.43,
240
- "learning_rate": 8.34671097962332e-05,
241
- "loss": 3.6851,
242
  "step": 195
243
  },
244
  {
245
  "epoch": 0.44,
246
- "learning_rate": 8.113643887711011e-05,
247
- "loss": 3.7911,
248
  "step": 200
249
  },
250
  {
251
  "epoch": 0.45,
252
- "learning_rate": 7.879069584915438e-05,
253
- "loss": 3.6839,
254
  "step": 205
255
  },
256
  {
257
  "epoch": 0.46,
258
- "learning_rate": 7.643270091470234e-05,
259
- "loss": 3.7319,
260
  "step": 210
261
  },
262
  {
263
  "epoch": 0.47,
264
- "learning_rate": 7.406528900611617e-05,
265
- "loss": 3.6703,
266
  "step": 215
267
  },
268
  {
269
- "epoch": 0.49,
270
- "learning_rate": 7.169130637744674e-05,
271
- "loss": 3.7467,
272
  "step": 220
273
  },
274
  {
275
- "epoch": 0.5,
276
- "learning_rate": 6.931360718248504e-05,
277
- "loss": 3.8521,
278
  "step": 225
279
  },
280
  {
281
- "epoch": 0.51,
282
- "learning_rate": 6.693505004331577e-05,
283
- "loss": 3.6709,
284
  "step": 230
285
  },
286
  {
287
- "epoch": 0.52,
288
- "learning_rate": 6.455849461349907e-05,
289
- "loss": 3.6242,
290
  "step": 235
291
  },
292
  {
293
- "epoch": 0.53,
294
- "learning_rate": 6.218679814001198e-05,
295
- "loss": 3.6225,
296
  "step": 240
297
  },
298
  {
299
- "epoch": 0.54,
300
- "learning_rate": 5.9822812028083505e-05,
301
- "loss": 3.8016,
302
  "step": 245
303
  },
304
  {
305
  "epoch": 0.55,
306
- "learning_rate": 5.746937841305257e-05,
307
- "loss": 3.6923,
308
  "step": 250
309
  },
310
  {
311
  "epoch": 0.56,
312
- "learning_rate": 5.512932674337138e-05,
313
- "loss": 3.894,
314
  "step": 255
315
  },
316
  {
317
  "epoch": 0.57,
318
- "learning_rate": 5.280547037886122e-05,
319
- "loss": 3.7461,
320
  "step": 260
321
  },
322
  {
323
  "epoch": 0.58,
324
- "learning_rate": 5.050060320831149e-05,
325
- "loss": 3.5966,
326
  "step": 265
327
  },
328
  {
329
- "epoch": 0.6,
330
- "learning_rate": 4.821749629048772e-05,
331
- "loss": 3.7551,
332
  "step": 270
333
  },
334
  {
335
- "epoch": 0.61,
336
- "learning_rate": 4.595889452258756e-05,
337
- "loss": 3.8694,
338
  "step": 275
339
  },
340
  {
341
- "epoch": 0.62,
342
- "learning_rate": 4.372751334014969e-05,
343
- "loss": 3.7224,
344
  "step": 280
345
  },
346
  {
347
- "epoch": 0.63,
348
- "learning_rate": 4.1526035452383523e-05,
349
- "loss": 3.915,
350
  "step": 285
351
  },
352
  {
353
- "epoch": 0.64,
354
- "learning_rate": 3.935710761684453e-05,
355
- "loss": 3.8484,
356
  "step": 290
357
  },
358
  {
359
- "epoch": 0.65,
360
- "learning_rate": 3.722333745733311e-05,
361
- "loss": 3.7062,
362
  "step": 295
363
  },
364
  {
365
  "epoch": 0.66,
366
- "learning_rate": 3.512729032884219e-05,
367
- "loss": 3.676,
368
  "step": 300
369
  },
370
  {
371
  "epoch": 0.67,
372
- "learning_rate": 3.3071486233323674e-05,
373
- "loss": 3.6448,
374
  "step": 305
375
  },
376
  {
377
  "epoch": 0.68,
378
- "learning_rate": 3.105839678998049e-05,
379
- "loss": 3.9731,
380
  "step": 310
381
  },
382
  {
383
- "epoch": 0.7,
384
- "learning_rate": 2.9090442263728265e-05,
385
- "loss": 3.6278,
386
  "step": 315
387
  },
388
  {
389
- "epoch": 0.71,
390
- "learning_rate": 2.716998865539764e-05,
391
- "loss": 3.6389,
392
  "step": 320
393
  },
394
  {
395
- "epoch": 0.72,
396
- "learning_rate": 2.5299344857176957e-05,
397
- "loss": 3.6411,
398
  "step": 325
399
  },
400
  {
401
- "epoch": 0.73,
402
- "learning_rate": 2.3480759876714295e-05,
403
- "loss": 3.7764,
404
  "step": 330
405
  },
406
  {
407
- "epoch": 0.74,
408
- "learning_rate": 2.1716420133216482e-05,
409
- "loss": 3.7769,
410
  "step": 335
411
  },
412
  {
413
- "epoch": 0.75,
414
- "learning_rate": 2.0008446828796293e-05,
415
- "loss": 3.528,
416
  "step": 340
417
  },
418
  {
419
- "epoch": 0.76,
420
- "learning_rate": 1.8358893398227267e-05,
421
- "loss": 3.819,
422
  "step": 345
423
  },
424
  {
425
- "epoch": 0.77,
426
- "learning_rate": 1.6769743040173313e-05,
427
- "loss": 3.6014,
428
  "step": 350
429
  },
430
  {
431
  "epoch": 0.78,
432
- "learning_rate": 1.5242906332860249e-05,
433
- "loss": 3.6993,
434
  "step": 355
435
  },
436
  {
437
  "epoch": 0.79,
438
- "learning_rate": 1.3780218937056495e-05,
439
- "loss": 3.7919,
440
  "step": 360
441
  },
442
  {
443
- "epoch": 0.81,
444
- "learning_rate": 1.2383439389124231e-05,
445
- "loss": 3.6903,
446
  "step": 365
447
  },
448
  {
449
- "epoch": 0.82,
450
- "learning_rate": 1.105424698679451e-05,
451
- "loss": 3.4355,
452
  "step": 370
453
  },
454
  {
455
- "epoch": 0.83,
456
- "learning_rate": 9.794239770208025e-06,
457
- "loss": 3.7661,
458
  "step": 375
459
  },
460
  {
461
- "epoch": 0.84,
462
- "learning_rate": 8.604932600649156e-06,
463
- "loss": 4.0533,
464
  "step": 380
465
  },
466
  {
467
- "epoch": 0.85,
468
- "learning_rate": 7.487755339282637e-06,
469
- "loss": 3.7796,
470
  "step": 385
471
  },
472
  {
473
- "epoch": 0.86,
474
- "learning_rate": 6.444051128083183e-06,
475
- "loss": 3.7312,
476
  "step": 390
477
  },
478
  {
479
- "epoch": 0.87,
480
- "learning_rate": 5.4750747750241e-06,
481
- "loss": 3.7231,
482
  "step": 395
483
  },
484
  {
485
- "epoch": 0.88,
486
- "learning_rate": 4.581991245466992e-06,
487
- "loss": 3.7788,
488
  "step": 400
489
  },
490
  {
491
- "epoch": 0.89,
492
- "learning_rate": 3.7658742615658213e-06,
493
- "loss": 3.7838,
494
  "step": 405
495
  },
496
  {
497
- "epoch": 0.91,
498
- "learning_rate": 3.027705011369445e-06,
499
- "loss": 3.7598,
500
  "step": 410
501
  },
502
  {
503
- "epoch": 0.92,
504
- "learning_rate": 2.3683709691745994e-06,
505
- "loss": 3.954,
506
  "step": 415
507
  },
508
  {
509
- "epoch": 0.93,
510
- "learning_rate": 1.7886648285474887e-06,
511
- "loss": 3.7673,
512
  "step": 420
513
  },
514
  {
515
- "epoch": 0.94,
516
- "learning_rate": 1.289283549296875e-06,
517
- "loss": 3.8811,
518
  "step": 425
519
  },
520
  {
521
- "epoch": 0.95,
522
- "learning_rate": 8.708275195444353e-07,
523
- "loss": 3.6489,
524
  "step": 430
525
  },
526
  {
527
- "epoch": 0.96,
528
- "learning_rate": 5.337998338997259e-07,
529
- "loss": 3.6089,
530
  "step": 435
531
  },
532
  {
533
- "epoch": 0.97,
534
- "learning_rate": 2.786056886076668e-07,
535
- "loss": 3.5491,
536
  "step": 440
537
  },
538
  {
539
- "epoch": 0.98,
540
- "learning_rate": 1.0555189439568316e-07,
541
- "loss": 3.7324,
542
  "step": 445
543
  },
544
  {
545
- "epoch": 0.99,
546
- "learning_rate": 1.484650760624615e-08,
547
- "loss": 3.7782,
548
  "step": 450
549
  },
550
- {
551
- "epoch": 1.0,
552
- "eval_loss": 3.7048158645629883,
553
- "eval_runtime": 29.3019,
554
- "eval_samples_per_second": 22.388,
555
- "eval_steps_per_second": 2.798,
556
- "step": 453
557
- },
558
  {
559
  "epoch": 0.99,
560
- "learning_rate": 3.9992265680461966e-08,
561
- "loss": 3.6362,
562
  "step": 455
563
  },
564
  {
565
  "epoch": 1.0,
566
- "learning_rate": 0.0,
567
- "loss": 3.3994,
568
- "step": 460
569
- },
570
- {
571
- "epoch": 1.0,
572
- "eval_loss": 3.5555710792541504,
573
- "eval_runtime": 13.5198,
574
- "eval_samples_per_second": 44.675,
575
- "eval_steps_per_second": 5.621,
576
- "step": 460
577
- },
578
- {
579
- "epoch": 1.01,
580
- "learning_rate": 3.9992265680461966e-08,
581
- "loss": 3.5614,
582
- "step": 465
583
- },
584
- {
585
- "epoch": 1.02,
586
- "learning_rate": 1.5992243352901425e-07,
587
- "loss": 3.7249,
588
- "step": 470
589
- },
590
- {
591
- "epoch": 1.03,
592
- "learning_rate": 3.5965067033469397e-07,
593
- "loss": 3.4991,
594
- "step": 475
595
- },
596
- {
597
- "epoch": 1.04,
598
- "learning_rate": 6.389441019077102e-07,
599
- "loss": 3.439,
600
- "step": 480
601
- },
602
- {
603
- "epoch": 1.05,
604
- "learning_rate": 9.97477084600295e-07,
605
- "loss": 3.6568,
606
- "step": 485
607
- },
608
- {
609
- "epoch": 1.07,
610
- "learning_rate": 1.4348315849926483e-06,
611
- "loss": 3.5489,
612
- "step": 490
613
- },
614
- {
615
- "epoch": 1.08,
616
- "learning_rate": 1.9504976673012086e-06,
617
- "loss": 3.6268,
618
- "step": 495
619
- },
620
- {
621
- "epoch": 1.09,
622
- "learning_rate": 2.5438740879409643e-06,
623
- "loss": 3.8246,
624
- "step": 500
625
- },
626
- {
627
- "epoch": 1.1,
628
- "learning_rate": 3.2142689965485674e-06,
629
- "loss": 3.5789,
630
- "step": 505
631
- },
632
- {
633
- "epoch": 1.11,
634
- "learning_rate": 3.960900742648913e-06,
635
- "loss": 3.618,
636
- "step": 510
637
- },
638
- {
639
- "epoch": 1.12,
640
- "learning_rate": 4.782898787024646e-06,
641
- "loss": 3.6802,
642
- "step": 515
643
- },
644
- {
645
- "epoch": 1.13,
646
- "learning_rate": 5.679304716725914e-06,
647
- "loss": 3.6302,
648
- "step": 520
649
- },
650
- {
651
- "epoch": 1.14,
652
- "learning_rate": 6.649073362537048e-06,
653
- "loss": 3.5926,
654
- "step": 525
655
- },
656
- {
657
- "epoch": 1.15,
658
- "learning_rate": 7.691074017597052e-06,
659
- "loss": 3.5005,
660
- "step": 530
661
- },
662
- {
663
- "epoch": 1.16,
664
- "learning_rate": 8.804091755753263e-06,
665
- "loss": 3.6247,
666
- "step": 535
667
- },
668
- {
669
- "epoch": 1.17,
670
- "learning_rate": 9.986828848110884e-06,
671
- "loss": 3.5699,
672
- "step": 540
673
- },
674
- {
675
- "epoch": 1.18,
676
- "learning_rate": 1.1237906276126821e-05,
677
- "loss": 3.5435,
678
- "step": 545
679
- },
680
- {
681
- "epoch": 1.2,
682
- "learning_rate": 1.2555865339483672e-05,
683
- "loss": 3.5681,
684
- "step": 550
685
- },
686
- {
687
- "epoch": 1.21,
688
- "learning_rate": 1.3939169356868945e-05,
689
- "loss": 3.5438,
690
- "step": 555
691
- },
692
- {
693
- "epoch": 1.22,
694
- "learning_rate": 1.5386205457676803e-05,
695
- "loss": 3.6615,
696
- "step": 560
697
- },
698
- {
699
- "epoch": 1.23,
700
- "learning_rate": 1.6895286462543014e-05,
701
- "loss": 3.5334,
702
- "step": 565
703
- },
704
- {
705
- "epoch": 1.24,
706
- "learning_rate": 1.8464652850520678e-05,
707
- "loss": 3.5256,
708
- "step": 570
709
- },
710
- {
711
- "epoch": 1.25,
712
- "learning_rate": 2.009247481060283e-05,
713
- "loss": 3.5908,
714
- "step": 575
715
- },
716
- {
717
- "epoch": 1.26,
718
- "learning_rate": 2.1776854375200328e-05,
719
- "loss": 3.6817,
720
- "step": 580
721
- },
722
- {
723
- "epoch": 1.27,
724
- "learning_rate": 2.351582763308709e-05,
725
- "loss": 3.6254,
726
- "step": 585
727
- },
728
- {
729
- "epoch": 1.28,
730
- "learning_rate": 2.5307367019232758e-05,
731
- "loss": 3.5371,
732
- "step": 590
733
- },
734
- {
735
- "epoch": 1.29,
736
- "learning_rate": 2.714938367885288e-05,
737
- "loss": 3.6406,
738
- "step": 595
739
- },
740
- {
741
- "epoch": 1.3,
742
- "learning_rate": 2.9039729902920112e-05,
743
- "loss": 3.5787,
744
- "step": 600
745
- },
746
- {
747
- "epoch": 1.32,
748
- "learning_rate": 3.097620163229676e-05,
749
- "loss": 3.5732,
750
- "step": 605
751
- },
752
- {
753
- "epoch": 1.33,
754
- "learning_rate": 3.295654102756921e-05,
755
- "loss": 3.5091,
756
- "step": 610
757
- },
758
- {
759
- "epoch": 1.34,
760
- "learning_rate": 3.4978439101588006e-05,
761
- "loss": 3.617,
762
- "step": 615
763
- },
764
- {
765
- "epoch": 1.35,
766
- "learning_rate": 3.703953841164292e-05,
767
- "loss": 3.582,
768
- "step": 620
769
- },
770
- {
771
- "epoch": 1.36,
772
- "learning_rate": 3.913743580813637e-05,
773
- "loss": 3.6594,
774
- "step": 625
775
- },
776
- {
777
- "epoch": 1.37,
778
- "learning_rate": 4.126968523654786e-05,
779
- "loss": 3.6063,
780
- "step": 630
781
- },
782
- {
783
- "epoch": 1.38,
784
- "learning_rate": 4.343380058942427e-05,
785
- "loss": 3.6137,
786
- "step": 635
787
- },
788
- {
789
- "epoch": 1.39,
790
- "learning_rate": 4.562725860507034e-05,
791
- "loss": 3.5565,
792
- "step": 640
793
- },
794
- {
795
- "epoch": 1.4,
796
- "learning_rate": 4.784750180955822e-05,
797
- "loss": 3.6099,
798
- "step": 645
799
- },
800
- {
801
- "epoch": 1.41,
802
- "learning_rate": 5.009194149862813e-05,
803
- "loss": 3.6417,
804
- "step": 650
805
- },
806
- {
807
- "epoch": 1.42,
808
- "learning_rate": 5.235796075600178e-05,
809
- "loss": 3.7049,
810
- "step": 655
811
- },
812
- {
813
- "epoch": 1.43,
814
- "learning_rate": 5.4642917504589275e-05,
815
- "loss": 3.7319,
816
- "step": 660
817
- },
818
- {
819
- "epoch": 1.45,
820
- "learning_rate": 5.694414758703346e-05,
821
- "loss": 3.554,
822
- "step": 665
823
- },
824
- {
825
- "epoch": 1.46,
826
- "learning_rate": 5.92589678719975e-05,
827
- "loss": 3.5522,
828
- "step": 670
829
- },
830
- {
831
- "epoch": 1.47,
832
- "learning_rate": 6.158467938257645e-05,
833
- "loss": 3.6115,
834
- "step": 675
835
- },
836
- {
837
- "epoch": 1.48,
838
- "learning_rate": 6.391857044318355e-05,
839
- "loss": 3.6475,
840
- "step": 680
841
- },
842
- {
843
- "epoch": 1.49,
844
- "learning_rate": 6.625791984124255e-05,
845
- "loss": 3.5034,
846
- "step": 685
847
- },
848
- {
849
- "epoch": 1.5,
850
- "learning_rate": 6.859999999999999e-05,
851
- "loss": 3.5767,
852
- "step": 690
853
- },
854
- {
855
- "epoch": 1.51,
856
- "learning_rate": 7.094208015875743e-05,
857
- "loss": 3.6471,
858
- "step": 695
859
- },
860
- {
861
- "epoch": 1.52,
862
- "learning_rate": 7.328142955681643e-05,
863
- "loss": 3.6515,
864
- "step": 700
865
- },
866
- {
867
- "epoch": 1.53,
868
- "learning_rate": 7.561532061742353e-05,
869
- "loss": 3.6649,
870
- "step": 705
871
- },
872
- {
873
- "epoch": 1.54,
874
- "learning_rate": 7.794103212800247e-05,
875
- "loss": 3.4103,
876
- "step": 710
877
- },
878
- {
879
- "epoch": 1.55,
880
- "learning_rate": 8.025585241296653e-05,
881
- "loss": 3.5488,
882
- "step": 715
883
- },
884
- {
885
- "epoch": 1.57,
886
- "learning_rate": 8.25570824954107e-05,
887
- "loss": 3.5076,
888
- "step": 720
889
- },
890
- {
891
- "epoch": 1.58,
892
- "learning_rate": 8.484203924399819e-05,
893
- "loss": 3.5708,
894
- "step": 725
895
- },
896
- {
897
- "epoch": 1.59,
898
- "learning_rate": 8.710805850137184e-05,
899
- "loss": 3.7272,
900
- "step": 730
901
- },
902
- {
903
- "epoch": 1.6,
904
- "learning_rate": 8.935249819044176e-05,
905
- "loss": 3.6548,
906
- "step": 735
907
- },
908
- {
909
- "epoch": 1.61,
910
- "learning_rate": 9.157274139492964e-05,
911
- "loss": 3.6463,
912
- "step": 740
913
- },
914
- {
915
- "epoch": 1.62,
916
- "learning_rate": 9.376619941057571e-05,
917
- "loss": 3.484,
918
- "step": 745
919
- },
920
- {
921
- "epoch": 1.63,
922
- "learning_rate": 9.593031476345212e-05,
923
- "loss": 3.5106,
924
- "step": 750
925
- },
926
- {
927
- "epoch": 1.64,
928
- "learning_rate": 9.80625641918636e-05,
929
- "loss": 3.6845,
930
- "step": 755
931
- },
932
- {
933
- "epoch": 1.65,
934
- "learning_rate": 0.00010016046158835706,
935
- "loss": 3.5078,
936
- "step": 760
937
- },
938
- {
939
- "epoch": 1.66,
940
- "learning_rate": 0.00010222156089841198,
941
- "loss": 3.62,
942
- "step": 765
943
- },
944
- {
945
- "epoch": 1.67,
946
- "learning_rate": 0.00010424345897243078,
947
- "loss": 3.644,
948
- "step": 770
949
- },
950
- {
951
- "epoch": 1.68,
952
- "learning_rate": 0.00010622379836770322,
953
- "loss": 3.7684,
954
- "step": 775
955
- },
956
- {
957
- "epoch": 1.7,
958
- "learning_rate": 0.00010816027009707987,
959
- "loss": 3.5978,
960
- "step": 780
961
- },
962
- {
963
- "epoch": 1.71,
964
- "learning_rate": 0.0001100506163211471,
965
- "loss": 3.5937,
966
- "step": 785
967
- },
968
- {
969
- "epoch": 1.72,
970
- "learning_rate": 0.00011189263298076723,
971
- "loss": 3.6434,
972
- "step": 790
973
- },
974
- {
975
- "epoch": 1.73,
976
- "learning_rate": 0.00011368417236691289,
977
- "loss": 3.6893,
978
- "step": 795
979
- },
980
- {
981
- "epoch": 1.74,
982
- "learning_rate": 0.00011542314562479964,
983
- "loss": 3.6739,
984
- "step": 800
985
- },
986
- {
987
- "epoch": 1.75,
988
- "learning_rate": 0.00011710752518939715,
989
- "loss": 3.5122,
990
- "step": 805
991
- },
992
- {
993
- "epoch": 1.76,
994
- "learning_rate": 0.00011873534714947934,
995
- "loss": 3.6633,
996
- "step": 810
997
- },
998
- {
999
- "epoch": 1.77,
1000
- "learning_rate": 0.00012030471353745696,
1001
- "loss": 3.5101,
1002
- "step": 815
1003
- },
1004
- {
1005
- "epoch": 1.78,
1006
- "learning_rate": 0.00012181379454232318,
1007
- "loss": 3.6736,
1008
- "step": 820
1009
- },
1010
- {
1011
- "epoch": 1.79,
1012
- "learning_rate": 0.00012326083064313103,
1013
- "loss": 3.4271,
1014
- "step": 825
1015
- },
1016
- {
1017
- "epoch": 1.8,
1018
- "learning_rate": 0.0001246441346605163,
1019
- "loss": 3.5088,
1020
- "step": 830
1021
- },
1022
- {
1023
- "epoch": 1.82,
1024
- "learning_rate": 0.00012596209372387317,
1025
- "loss": 3.5845,
1026
- "step": 835
1027
- },
1028
- {
1029
- "epoch": 1.83,
1030
- "learning_rate": 0.0001272131711518891,
1031
- "loss": 3.6172,
1032
- "step": 840
1033
- },
1034
- {
1035
- "epoch": 1.84,
1036
- "learning_rate": 0.00012839590824424672,
1037
- "loss": 3.5649,
1038
- "step": 845
1039
- },
1040
- {
1041
- "epoch": 1.85,
1042
- "learning_rate": 0.00012950892598240292,
1043
- "loss": 3.5506,
1044
- "step": 850
1045
- },
1046
- {
1047
- "epoch": 1.86,
1048
- "learning_rate": 0.00013055092663746294,
1049
- "loss": 3.4779,
1050
- "step": 855
1051
- },
1052
- {
1053
- "epoch": 1.87,
1054
- "learning_rate": 0.00013152069528327408,
1055
- "loss": 3.5707,
1056
- "step": 860
1057
- },
1058
- {
1059
- "epoch": 1.88,
1060
- "learning_rate": 0.00013241710121297533,
1061
- "loss": 3.497,
1062
- "step": 865
1063
- },
1064
- {
1065
- "epoch": 1.89,
1066
- "learning_rate": 0.0001332390992573511,
1067
- "loss": 3.6343,
1068
- "step": 870
1069
- },
1070
- {
1071
- "epoch": 1.9,
1072
- "learning_rate": 0.00013398573100345144,
1073
- "loss": 3.5251,
1074
- "step": 875
1075
- },
1076
- {
1077
- "epoch": 1.91,
1078
- "learning_rate": 0.00013465612591205902,
1079
- "loss": 3.7421,
1080
- "step": 880
1081
- },
1082
- {
1083
- "epoch": 1.92,
1084
- "learning_rate": 0.00013524950233269879,
1085
- "loss": 3.5967,
1086
- "step": 885
1087
- },
1088
- {
1089
- "epoch": 1.93,
1090
- "learning_rate": 0.00013576516841500732,
1091
- "loss": 3.5853,
1092
- "step": 890
1093
- },
1094
- {
1095
- "epoch": 1.95,
1096
- "learning_rate": 0.0001362025229153997,
1097
- "loss": 3.7016,
1098
- "step": 895
1099
- },
1100
- {
1101
- "epoch": 1.96,
1102
- "learning_rate": 0.00013656105589809228,
1103
- "loss": 3.7649,
1104
- "step": 900
1105
- },
1106
- {
1107
- "epoch": 1.97,
1108
- "learning_rate": 0.0001368403493296653,
1109
- "loss": 3.6488,
1110
- "step": 905
1111
- },
1112
- {
1113
- "epoch": 1.98,
1114
- "learning_rate": 0.000137040077566471,
1115
- "loss": 3.5113,
1116
- "step": 910
1117
- },
1118
- {
1119
- "epoch": 1.99,
1120
- "learning_rate": 0.00013716000773431953,
1121
- "loss": 3.4688,
1122
- "step": 915
1123
- },
1124
- {
1125
- "epoch": 2.0,
1126
- "learning_rate": 0.0001372,
1127
- "loss": 3.5363,
1128
- "step": 920
1129
- },
1130
- {
1131
- "epoch": 2.0,
1132
- "eval_loss": 3.550342321395874,
1133
- "eval_runtime": 15.5302,
1134
- "eval_samples_per_second": 38.892,
1135
- "eval_steps_per_second": 4.894,
1136
- "step": 920
1137
- },
1138
- {
1139
- "epoch": 2.04,
1140
- "learning_rate": 0.0001366053297599336,
1141
- "loss": 3.3872,
1142
- "step": 925
1143
- },
1144
- {
1145
- "epoch": 2.05,
1146
- "learning_rate": 0.00013625198084925744,
1147
- "loss": 3.3661,
1148
- "step": 930
1149
- },
1150
- {
1151
- "epoch": 2.06,
1152
- "learning_rate": 0.00013581729639919083,
1153
- "loss": 3.3705,
1154
- "step": 935
1155
- },
1156
- {
1157
- "epoch": 2.08,
1158
- "learning_rate": 0.00013530179901518854,
1159
- "loss": 3.3747,
1160
- "step": 940
1161
- },
1162
- {
1163
- "epoch": 2.09,
1164
- "learning_rate": 0.00013470610846118432,
1165
- "loss": 3.3343,
1166
- "step": 945
1167
- },
1168
- {
1169
- "epoch": 2.1,
1170
- "learning_rate": 0.00013403094091447133,
1171
- "loss": 3.4331,
1172
- "step": 950
1173
- },
1174
- {
1175
- "epoch": 2.11,
1176
- "learning_rate": 0.00013327710810466788,
1177
- "loss": 3.3831,
1178
- "step": 955
1179
- },
1180
- {
1181
- "epoch": 2.12,
1182
- "learning_rate": 0.00013244551633780405,
1183
- "loss": 3.4936,
1184
- "step": 960
1185
- },
1186
- {
1187
- "epoch": 2.13,
1188
- "learning_rate": 0.00013153716540670286,
1189
- "loss": 3.5632,
1190
- "step": 965
1191
- },
1192
- {
1193
- "epoch": 2.14,
1194
- "learning_rate": 0.0001305531473889651,
1195
- "loss": 3.5966,
1196
- "step": 970
1197
- },
1198
- {
1199
- "epoch": 2.15,
1200
- "learning_rate": 0.00012949464533400386,
1201
- "loss": 3.5279,
1202
- "step": 975
1203
- },
1204
- {
1205
- "epoch": 2.16,
1206
- "learning_rate": 0.00012836293184070708,
1207
- "loss": 3.3265,
1208
- "step": 980
1209
- },
1210
- {
1211
- "epoch": 2.17,
1212
- "learning_rate": 0.00012715936752743734,
1213
- "loss": 3.3291,
1214
- "step": 985
1215
- },
1216
- {
1217
- "epoch": 2.19,
1218
- "learning_rate": 0.00012588539939620955,
1219
- "loss": 3.4152,
1220
- "step": 990
1221
- },
1222
- {
1223
- "epoch": 2.2,
1224
- "learning_rate": 0.000124542559093013,
1225
- "loss": 3.3681,
1226
- "step": 995
1227
- },
1228
- {
1229
- "epoch": 2.21,
1230
- "learning_rate": 0.00012313246106636776,
1231
- "loss": 3.4529,
1232
- "step": 1000
1233
- },
1234
- {
1235
- "epoch": 2.22,
1236
- "learning_rate": 0.00012165680062633217,
1237
- "loss": 3.3251,
1238
- "step": 1005
1239
- },
1240
- {
1241
- "epoch": 2.23,
1242
- "learning_rate": 0.00012011735190629213,
1243
- "loss": 3.5049,
1244
- "step": 1010
1245
- },
1246
- {
1247
- "epoch": 2.24,
1248
- "learning_rate": 0.00011851596572998445,
1249
- "loss": 3.3393,
1250
- "step": 1015
1251
- },
1252
- {
1253
- "epoch": 2.25,
1254
- "learning_rate": 0.00011685456738631861,
1255
- "loss": 3.2768,
1256
- "step": 1020
1257
- },
1258
- {
1259
- "epoch": 2.26,
1260
- "learning_rate": 0.00011513515431467074,
1261
- "loss": 3.4727,
1262
- "step": 1025
1263
- },
1264
- {
1265
- "epoch": 2.27,
1266
- "learning_rate": 0.0001133597937034344,
1267
- "loss": 3.5473,
1268
- "step": 1030
1269
- },
1270
- {
1271
- "epoch": 2.28,
1272
- "learning_rate": 0.00011153062000471465,
1273
- "loss": 3.363,
1274
- "step": 1035
1275
- },
1276
- {
1277
- "epoch": 2.3,
1278
- "learning_rate": 0.00010964983236815272,
1279
- "loss": 3.5201,
1280
- "step": 1040
1281
- },
1282
- {
1283
- "epoch": 2.31,
1284
- "learning_rate": 0.00010771969199696821,
1285
- "loss": 3.3989,
1286
- "step": 1045
1287
- },
1288
- {
1289
- "epoch": 2.32,
1290
- "learning_rate": 0.00010574251942939544,
1291
- "loss": 3.4949,
1292
- "step": 1050
1293
- },
1294
- {
1295
- "epoch": 2.33,
1296
- "learning_rate": 0.00010372069174878399,
1297
- "loss": 3.5509,
1298
- "step": 1055
1299
- },
1300
- {
1301
- "epoch": 2.34,
1302
- "learning_rate": 0.00010165663972571744,
1303
- "loss": 3.3905,
1304
- "step": 1060
1305
- },
1306
- {
1307
- "epoch": 2.35,
1308
- "learning_rate": 9.955284489558486e-05,
1309
- "loss": 3.2992,
1310
- "step": 1065
1311
- },
1312
- {
1313
- "epoch": 2.36,
1314
- "learning_rate": 9.741183657512063e-05,
1315
- "loss": 3.4254,
1316
- "step": 1070
1317
- },
1318
- {
1319
- "epoch": 2.37,
1320
- "learning_rate": 9.523618882149793e-05,
1321
- "loss": 3.5512,
1322
- "step": 1075
1323
- },
1324
- {
1325
- "epoch": 2.38,
1326
- "learning_rate": 9.30285173376325e-05,
1327
- "loss": 3.5264,
1328
- "step": 1080
1329
- },
1330
- {
1331
- "epoch": 2.4,
1332
- "learning_rate": 9.079147632741743e-05,
1333
- "loss": 3.5711,
1334
- "step": 1085
1335
- },
1336
- {
1337
- "epoch": 2.41,
1338
- "learning_rate": 8.852775530466907e-05,
1339
- "loss": 3.3514,
1340
- "step": 1090
1341
- },
1342
- {
1343
- "epoch": 2.42,
1344
- "learning_rate": 8.624007585962169e-05,
1345
- "loss": 3.4614,
1346
- "step": 1095
1347
- },
1348
- {
1349
- "epoch": 2.43,
1350
- "learning_rate": 8.393118838685784e-05,
1351
- "loss": 3.4247,
1352
- "step": 1100
1353
- },
1354
- {
1355
- "epoch": 2.44,
1356
- "learning_rate": 8.160386877860755e-05,
1357
- "loss": 3.6425,
1358
- "step": 1105
1359
- },
1360
- {
1361
- "epoch": 2.45,
1362
- "learning_rate": 7.926091508739327e-05,
1363
- "loss": 3.4344,
1364
- "step": 1110
1365
- },
1366
- {
1367
- "epoch": 2.46,
1368
- "learning_rate": 7.690514416203246e-05,
1369
- "loss": 3.2589,
1370
- "step": 1115
1371
- },
1372
- {
1373
- "epoch": 2.47,
1374
- "learning_rate": 7.453938826104105e-05,
1375
- "loss": 3.4692,
1376
- "step": 1120
1377
- },
1378
- {
1379
- "epoch": 2.48,
1380
- "learning_rate": 7.216649164751185e-05,
1381
- "loss": 3.1535,
1382
- "step": 1125
1383
- },
1384
- {
1385
- "epoch": 2.49,
1386
- "learning_rate": 6.978930716955893e-05,
1387
- "loss": 3.2569,
1388
- "step": 1130
1389
- },
1390
- {
1391
- "epoch": 2.51,
1392
- "learning_rate": 6.741069283044111e-05,
1393
- "loss": 3.2807,
1394
- "step": 1135
1395
- },
1396
- {
1397
- "epoch": 2.52,
1398
- "learning_rate": 6.50335083524882e-05,
1399
- "loss": 3.618,
1400
- "step": 1140
1401
- },
1402
- {
1403
- "epoch": 2.53,
1404
- "learning_rate": 6.2660611738959e-05,
1405
- "loss": 3.2624,
1406
- "step": 1145
1407
- },
1408
- {
1409
- "epoch": 2.54,
1410
- "learning_rate": 6.029485583796758e-05,
1411
- "loss": 3.5394,
1412
- "step": 1150
1413
- },
1414
- {
1415
- "epoch": 2.55,
1416
- "learning_rate": 5.793908491260678e-05,
1417
- "loss": 3.3858,
1418
- "step": 1155
1419
- },
1420
- {
1421
- "epoch": 2.56,
1422
- "learning_rate": 5.5596131221392484e-05,
1423
- "loss": 3.553,
1424
- "step": 1160
1425
- },
1426
- {
1427
- "epoch": 2.57,
1428
- "learning_rate": 5.326881161314222e-05,
1429
- "loss": 3.533,
1430
- "step": 1165
1431
- },
1432
- {
1433
- "epoch": 2.58,
1434
- "learning_rate": 5.09599241403784e-05,
1435
- "loss": 3.4779,
1436
- "step": 1170
1437
- },
1438
- {
1439
- "epoch": 2.59,
1440
- "learning_rate": 4.8672244695331025e-05,
1441
- "loss": 3.3822,
1442
- "step": 1175
1443
- },
1444
- {
1445
- "epoch": 2.6,
1446
- "learning_rate": 4.640852367258268e-05,
1447
- "loss": 3.5514,
1448
- "step": 1180
1449
- },
1450
- {
1451
- "epoch": 2.62,
1452
- "learning_rate": 4.417148266236749e-05,
1453
- "loss": 3.3215,
1454
- "step": 1185
1455
- },
1456
- {
1457
- "epoch": 2.63,
1458
- "learning_rate": 4.1963811178502046e-05,
1459
- "loss": 3.384,
1460
- "step": 1190
1461
- },
1462
- {
1463
- "epoch": 2.64,
1464
- "learning_rate": 3.978816342487941e-05,
1465
- "loss": 3.2548,
1466
- "step": 1195
1467
- },
1468
- {
1469
- "epoch": 2.65,
1470
- "learning_rate": 3.7647155104415125e-05,
1471
- "loss": 3.5365,
1472
- "step": 1200
1473
- },
1474
- {
1475
- "epoch": 2.66,
1476
- "learning_rate": 3.5543360274282583e-05,
1477
- "loss": 3.2376,
1478
- "step": 1205
1479
- },
1480
- {
1481
- "epoch": 2.67,
1482
- "learning_rate": 3.3479308251216045e-05,
1483
- "loss": 3.3611,
1484
- "step": 1210
1485
- },
1486
- {
1487
- "epoch": 2.68,
1488
- "learning_rate": 3.14574805706046e-05,
1489
- "loss": 3.3802,
1490
- "step": 1215
1491
- },
1492
- {
1493
- "epoch": 2.69,
1494
- "learning_rate": 2.9480308003031837e-05,
1495
- "loss": 3.4986,
1496
- "step": 1220
1497
- },
1498
- {
1499
- "epoch": 2.7,
1500
- "learning_rate": 2.7550167631847363e-05,
1501
- "loss": 3.2628,
1502
- "step": 1225
1503
- },
1504
- {
1505
- "epoch": 2.72,
1506
- "learning_rate": 2.5669379995285432e-05,
1507
- "loss": 3.3669,
1508
- "step": 1230
1509
- },
1510
- {
1511
- "epoch": 2.73,
1512
- "learning_rate": 2.3840206296565597e-05,
1513
- "loss": 3.2283,
1514
- "step": 1235
1515
- },
1516
- {
1517
- "epoch": 2.74,
1518
- "learning_rate": 2.2064845685329227e-05,
1519
- "loss": 3.3849,
1520
- "step": 1240
1521
- },
1522
- {
1523
- "epoch": 2.75,
1524
- "learning_rate": 2.0345432613681382e-05,
1525
- "loss": 3.3535,
1526
- "step": 1245
1527
- },
1528
- {
1529
- "epoch": 2.76,
1530
- "learning_rate": 1.8684034270015573e-05,
1531
- "loss": 3.4427,
1532
- "step": 1250
1533
- },
1534
- {
1535
- "epoch": 2.77,
1536
- "learning_rate": 1.70826480937079e-05,
1537
- "loss": 3.4215,
1538
- "step": 1255
1539
- },
1540
- {
1541
- "epoch": 2.78,
1542
- "learning_rate": 1.554319937366785e-05,
1543
- "loss": 3.218,
1544
- "step": 1260
1545
- },
1546
- {
1547
- "epoch": 2.79,
1548
- "learning_rate": 1.4067538933632289e-05,
1549
- "loss": 3.3675,
1550
- "step": 1265
1551
- },
1552
- {
1553
- "epoch": 2.8,
1554
- "learning_rate": 1.2657440906987054e-05,
1555
- "loss": 3.3894,
1556
- "step": 1270
1557
- },
1558
- {
1559
- "epoch": 2.81,
1560
- "learning_rate": 1.1314600603790494e-05,
1561
- "loss": 3.3839,
1562
- "step": 1275
1563
- },
1564
- {
1565
- "epoch": 2.83,
1566
- "learning_rate": 1.0040632472562733e-05,
1567
- "loss": 3.6158,
1568
- "step": 1280
1569
- },
1570
- {
1571
- "epoch": 2.84,
1572
- "learning_rate": 8.837068159292907e-06,
1573
- "loss": 3.3993,
1574
- "step": 1285
1575
- },
1576
- {
1577
- "epoch": 2.85,
1578
- "learning_rate": 7.705354665996124e-06,
1579
- "loss": 3.5461,
1580
- "step": 1290
1581
- },
1582
- {
1583
- "epoch": 2.86,
1584
- "learning_rate": 6.646852611034896e-06,
1585
- "loss": 3.429,
1586
- "step": 1295
1587
- },
1588
- {
1589
- "epoch": 2.87,
1590
- "learning_rate": 5.662834593297144e-06,
1591
- "loss": 3.2661,
1592
- "step": 1300
1593
- },
1594
- {
1595
- "epoch": 2.88,
1596
- "learning_rate": 4.754483662195961e-06,
1597
- "loss": 3.4591,
1598
- "step": 1305
1599
- },
1600
- {
1601
- "epoch": 2.89,
1602
- "learning_rate": 3.922891895332133e-06,
1603
- "loss": 3.3605,
1604
- "step": 1310
1605
- },
1606
- {
1607
- "epoch": 2.9,
1608
- "learning_rate": 3.1690590855286824e-06,
1609
- "loss": 3.377,
1610
- "step": 1315
1611
- },
1612
- {
1613
- "epoch": 2.91,
1614
- "learning_rate": 2.493891538815714e-06,
1615
- "loss": 3.3229,
1616
- "step": 1320
1617
- },
1618
- {
1619
- "epoch": 2.92,
1620
- "learning_rate": 1.898200984811481e-06,
1621
- "loss": 3.3711,
1622
- "step": 1325
1623
- },
1624
- {
1625
- "epoch": 2.94,
1626
- "learning_rate": 1.3827036008091848e-06,
1627
- "loss": 3.3335,
1628
- "step": 1330
1629
- },
1630
- {
1631
- "epoch": 2.95,
1632
- "learning_rate": 9.48019150742587e-07,
1633
- "loss": 3.2538,
1634
- "step": 1335
1635
- },
1636
- {
1637
- "epoch": 2.96,
1638
- "learning_rate": 5.946702400663892e-07,
1639
- "loss": 3.2806,
1640
- "step": 1340
1641
- },
1642
- {
1643
- "epoch": 2.97,
1644
- "learning_rate": 3.2308168744640525e-07,
1645
- "loss": 3.5042,
1646
- "step": 1345
1647
- },
1648
- {
1649
- "epoch": 2.98,
1650
- "learning_rate": 1.335800140153434e-07,
1651
- "loss": 3.4461,
1652
- "step": 1350
1653
- },
1654
- {
1655
- "epoch": 2.99,
1656
- "learning_rate": 2.6393050808329943e-08,
1657
- "loss": 3.3588,
1658
- "step": 1355
1659
- },
1660
- {
1661
- "epoch": 3.0,
1662
- "eval_loss": 3.3743550777435303,
1663
- "eval_runtime": 31.7563,
1664
- "eval_samples_per_second": 20.815,
1665
- "eval_steps_per_second": 2.614,
1666
- "step": 1359
1667
- },
1668
- {
1669
- "epoch": 2.97,
1670
- "learning_rate": 3.160713999402438e-07,
1671
- "loss": 2.9957,
1672
- "step": 1360
1673
- },
1674
- {
1675
- "epoch": 2.98,
1676
- "learning_rate": 1.3068026139647436e-07,
1677
- "loss": 3.3681,
1678
- "step": 1365
1679
- },
1680
- {
1681
- "epoch": 2.99,
1682
- "learning_rate": 2.5819964843398836e-08,
1683
- "loss": 3.333,
1684
- "step": 1370
1685
- },
1686
- {
1687
- "epoch": 3.0,
1688
- "eval_loss": 3.107706069946289,
1689
- "eval_runtime": 7.9683,
1690
- "eval_samples_per_second": 77.055,
1691
- "eval_steps_per_second": 9.663,
1692
- "step": 1374
1693
  }
1694
  ],
1695
  "max_steps": 1832,
1696
  "num_train_epochs": 4,
1697
- "total_flos": 1433056149504000.0,
1698
  "trial_name": null,
1699
  "trial_params": null
1700
  }
 
1
  {
2
+ "best_metric": 3.2645678520202637,
3
+ "best_model_checkpoint": "output/eminem/checkpoint-458",
4
+ "epoch": 1.0,
5
+ "global_step": 458,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.01,
12
+ "learning_rate": 0.00013715965772858815,
13
+ "loss": 3.1417,
14
  "step": 5
15
  },
16
  {
17
  "epoch": 0.02,
18
+ "learning_rate": 0.000137038678363299,
19
+ "loss": 2.9863,
20
  "step": 10
21
  },
22
  {
23
  "epoch": 0.03,
24
+ "learning_rate": 0.00013683720419516428,
25
+ "loss": 3.1237,
26
  "step": 15
27
  },
28
  {
29
  "epoch": 0.04,
30
+ "learning_rate": 0.00013655547218994425,
31
+ "loss": 3.0636,
32
  "step": 20
33
  },
34
  {
35
+ "epoch": 0.05,
36
+ "learning_rate": 0.00013619381370941783,
37
+ "loss": 3.1631,
38
  "step": 25
39
  },
40
  {
41
  "epoch": 0.07,
42
+ "learning_rate": 0.0001357526541216486,
43
+ "loss": 3.017,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 0.08,
48
+ "learning_rate": 0.000135232512300684,
49
+ "loss": 3.1919,
50
  "step": 35
51
  },
52
  {
53
  "epoch": 0.09,
54
+ "learning_rate": 0.00013463400001627657,
55
+ "loss": 3.404,
56
  "step": 40
57
  },
58
  {
59
  "epoch": 0.1,
60
+ "learning_rate": 0.0001339578212143452,
61
+ "loss": 3.2587,
62
  "step": 45
63
  },
64
  {
65
  "epoch": 0.11,
66
+ "learning_rate": 0.00013320477118902276,
67
+ "loss": 3.2812,
68
  "step": 50
69
  },
70
  {
71
  "epoch": 0.12,
72
+ "learning_rate": 0.00013237573564726276,
73
+ "loss": 2.9708,
74
  "step": 55
75
  },
76
  {
77
  "epoch": 0.13,
78
+ "learning_rate": 0.000131471689667107,
79
+ "loss": 3.2539,
80
  "step": 60
81
  },
82
  {
83
  "epoch": 0.14,
84
+ "learning_rate": 0.00013049369655083827,
85
+ "loss": 3.3207,
86
  "step": 65
87
  },
88
  {
89
  "epoch": 0.15,
90
+ "learning_rate": 0.00012944290657436735,
91
+ "loss": 3.1984,
92
  "step": 70
93
  },
94
  {
95
+ "epoch": 0.16,
96
+ "learning_rate": 0.00012832055563432473,
97
+ "loss": 3.2094,
98
  "step": 75
99
  },
100
  {
101
+ "epoch": 0.17,
102
+ "learning_rate": 0.0001271279637944492,
103
+ "loss": 3.2116,
104
  "step": 80
105
  },
106
  {
107
  "epoch": 0.19,
108
+ "learning_rate": 0.00012586653373298248,
109
+ "loss": 3.2721,
110
  "step": 85
111
  },
112
  {
113
  "epoch": 0.2,
114
+ "learning_rate": 0.00012453774909289557,
115
+ "loss": 3.2313,
116
  "step": 90
117
  },
118
  {
119
  "epoch": 0.21,
120
+ "learning_rate": 0.00012314317273688812,
121
+ "loss": 3.3427,
122
  "step": 95
123
  },
124
  {
125
  "epoch": 0.22,
126
+ "learning_rate": 0.00012168444490921243,
127
+ "loss": 3.3434,
128
  "step": 100
129
  },
130
  {
131
  "epoch": 0.23,
132
+ "learning_rate": 0.00012016328130648513,
133
+ "loss": 3.0684,
134
  "step": 105
135
  },
136
  {
137
  "epoch": 0.24,
138
+ "learning_rate": 0.00011858147105975337,
139
+ "loss": 2.9787,
140
  "step": 110
141
  },
142
  {
143
  "epoch": 0.25,
144
+ "learning_rate": 0.0001169408746301919,
145
+ "loss": 3.0232,
146
  "step": 115
147
  },
148
  {
149
  "epoch": 0.26,
150
+ "learning_rate": 0.00011524342162090394,
151
+ "loss": 3.097,
152
  "step": 120
153
  },
154
  {
155
+ "epoch": 0.27,
156
+ "learning_rate": 0.00011349110850740006,
157
+ "loss": 2.9875,
158
  "step": 125
159
  },
160
  {
161
+ "epoch": 0.28,
162
+ "learning_rate": 0.00011168599628942434,
163
+ "loss": 3.069,
164
  "step": 130
165
  },
166
  {
167
+ "epoch": 0.29,
168
+ "learning_rate": 0.00010983020806688995,
169
+ "loss": 3.2752,
170
  "step": 135
171
  },
172
  {
173
  "epoch": 0.31,
174
+ "learning_rate": 0.00010792592654277545,
175
+ "loss": 3.0414,
176
  "step": 140
177
  },
178
  {
179
  "epoch": 0.32,
180
+ "learning_rate": 0.00010597539145591715,
181
+ "loss": 3.2296,
182
  "step": 145
183
  },
184
  {
185
  "epoch": 0.33,
186
+ "learning_rate": 0.0001039808969467193,
187
+ "loss": 3.2247,
188
  "step": 150
189
  },
190
  {
191
  "epoch": 0.34,
192
+ "learning_rate": 0.00010194478885887969,
193
+ "loss": 3.0732,
194
  "step": 155
195
  },
196
  {
197
  "epoch": 0.35,
198
+ "learning_rate": 9.986946198030201e-05,
199
+ "loss": 3.0828,
200
  "step": 160
201
  },
202
  {
203
  "epoch": 0.36,
204
+ "learning_rate": 9.775735722644407e-05,
205
+ "loss": 3.0514,
206
  "step": 165
207
  },
208
  {
209
+ "epoch": 0.37,
210
+ "learning_rate": 9.561095876941162e-05,
211
+ "loss": 2.9164,
212
  "step": 170
213
  },
214
  {
215
+ "epoch": 0.38,
216
+ "learning_rate": 9.343279111617608e-05,
217
+ "loss": 3.0357,
218
  "step": 175
219
  },
220
  {
221
+ "epoch": 0.39,
222
+ "learning_rate": 9.122541613935095e-05,
223
+ "loss": 3.2488,
224
  "step": 180
225
  },
226
  {
227
+ "epoch": 0.4,
228
+ "learning_rate": 8.89914300640212e-05,
229
+ "loss": 3.017,
230
  "step": 185
231
  },
232
  {
233
+ "epoch": 0.41,
234
+ "learning_rate": 8.673346041416928e-05,
235
+ "loss": 3.1126,
236
  "step": 190
237
  },
238
  {
239
  "epoch": 0.43,
240
+ "learning_rate": 8.445416292228631e-05,
241
+ "loss": 3.2453,
242
  "step": 195
243
  },
244
  {
245
  "epoch": 0.44,
246
+ "learning_rate": 8.215621840580805e-05,
247
+ "loss": 3.0444,
248
  "step": 200
249
  },
250
  {
251
  "epoch": 0.45,
252
+ "learning_rate": 7.984232961404542e-05,
253
+ "loss": 3.1687,
254
  "step": 205
255
  },
256
  {
257
  "epoch": 0.46,
258
+ "learning_rate": 7.75152180493212e-05,
259
+ "loss": 3.225,
260
  "step": 210
261
  },
262
  {
263
  "epoch": 0.47,
264
+ "learning_rate": 7.517762076604763e-05,
265
+ "loss": 3.0953,
266
  "step": 215
267
  },
268
  {
269
+ "epoch": 0.48,
270
+ "learning_rate": 7.283228715151494e-05,
271
+ "loss": 3.25,
272
  "step": 220
273
  },
274
  {
275
+ "epoch": 0.49,
276
+ "learning_rate": 7.048197569217281e-05,
277
+ "loss": 3.1282,
278
  "step": 225
279
  },
280
  {
281
+ "epoch": 0.5,
282
+ "learning_rate": 6.812945072921086e-05,
283
+ "loss": 2.9809,
284
  "step": 230
285
  },
286
  {
287
+ "epoch": 0.51,
288
+ "learning_rate": 6.577747920725168e-05,
289
+ "loss": 3.0873,
290
  "step": 235
291
  },
292
  {
293
+ "epoch": 0.52,
294
+ "learning_rate": 6.342882741998283e-05,
295
+ "loss": 3.0845,
296
  "step": 240
297
  },
298
  {
299
+ "epoch": 0.53,
300
+ "learning_rate": 6.108625775655505e-05,
301
+ "loss": 2.9657,
302
  "step": 245
303
  },
304
  {
305
  "epoch": 0.55,
306
+ "learning_rate": 5.875252545257054e-05,
307
+ "loss": 3.0202,
308
  "step": 250
309
  },
310
  {
311
  "epoch": 0.56,
312
+ "learning_rate": 5.6430375349487304e-05,
313
+ "loss": 3.1417,
314
  "step": 255
315
  },
316
  {
317
  "epoch": 0.57,
318
+ "learning_rate": 5.412253866624731e-05,
319
+ "loss": 3.0013,
320
  "step": 260
321
  },
322
  {
323
  "epoch": 0.58,
324
+ "learning_rate": 5.183172978692829e-05,
325
+ "loss": 2.8449,
326
  "step": 265
327
  },
328
  {
329
+ "epoch": 0.59,
330
+ "learning_rate": 4.956064306819361e-05,
331
+ "loss": 3.2741,
332
  "step": 270
333
  },
334
  {
335
+ "epoch": 0.6,
336
+ "learning_rate": 4.731194967029994e-05,
337
+ "loss": 3.024,
338
  "step": 275
339
  },
340
  {
341
+ "epoch": 0.61,
342
+ "learning_rate": 4.508829441538685e-05,
343
+ "loss": 3.0632,
344
  "step": 280
345
  },
346
  {
347
+ "epoch": 0.62,
348
+ "learning_rate": 4.289229267674345e-05,
349
+ "loss": 2.8501,
350
  "step": 285
351
  },
352
  {
353
+ "epoch": 0.63,
354
+ "learning_rate": 4.072652730271264e-05,
355
+ "loss": 3.2079,
356
  "step": 290
357
  },
358
  {
359
+ "epoch": 0.64,
360
+ "learning_rate": 3.859354557884909e-05,
361
+ "loss": 3.1717,
362
  "step": 295
363
  },
364
  {
365
  "epoch": 0.66,
366
+ "learning_rate": 3.649585623190606e-05,
367
+ "loss": 3.1905,
368
  "step": 300
369
  },
370
  {
371
  "epoch": 0.67,
372
+ "learning_rate": 3.443592647917112e-05,
373
+ "loss": 3.0304,
374
  "step": 305
375
  },
376
  {
377
  "epoch": 0.68,
378
+ "learning_rate": 3.2416179126625834e-05,
379
+ "loss": 2.9144,
380
  "step": 310
381
  },
382
  {
383
+ "epoch": 0.69,
384
+ "learning_rate": 3.0438989719340004e-05,
385
+ "loss": 3.2034,
386
  "step": 315
387
  },
388
  {
389
+ "epoch": 0.7,
390
+ "learning_rate": 2.850668374745048e-05,
391
+ "loss": 2.9562,
392
  "step": 320
393
  },
394
  {
395
+ "epoch": 0.71,
396
+ "learning_rate": 2.662153391101465e-05,
397
+ "loss": 2.9282,
398
  "step": 325
399
  },
400
  {
401
+ "epoch": 0.72,
402
+ "learning_rate": 2.4785757446952362e-05,
403
+ "loss": 2.9689,
404
  "step": 330
405
  },
406
  {
407
+ "epoch": 0.73,
408
+ "learning_rate": 2.3001513521221957e-05,
409
+ "loss": 3.1242,
410
  "step": 335
411
  },
412
  {
413
+ "epoch": 0.74,
414
+ "learning_rate": 2.1270900689296127e-05,
415
+ "loss": 3.174,
416
  "step": 340
417
  },
418
  {
419
+ "epoch": 0.75,
420
+ "learning_rate": 1.959595442792614e-05,
421
+ "loss": 3.0034,
422
  "step": 345
423
  },
424
  {
425
+ "epoch": 0.76,
426
+ "learning_rate": 1.797864474109709e-05,
427
+ "loss": 3.1004,
428
  "step": 350
429
  },
430
  {
431
  "epoch": 0.78,
432
+ "learning_rate": 1.642087384298789e-05,
433
+ "loss": 2.9905,
434
  "step": 355
435
  },
436
  {
437
  "epoch": 0.79,
438
+ "learning_rate": 1.4924473920664698e-05,
439
+ "loss": 2.984,
440
  "step": 360
441
  },
442
  {
443
+ "epoch": 0.8,
444
+ "learning_rate": 1.3491204979136316e-05,
445
+ "loss": 3.1477,
446
  "step": 365
447
  },
448
  {
449
+ "epoch": 0.81,
450
+ "learning_rate": 1.2122752771308157e-05,
451
+ "loss": 3.3952,
452
  "step": 370
453
  },
454
  {
455
+ "epoch": 0.82,
456
+ "learning_rate": 1.0820726815267017e-05,
457
+ "loss": 3.2005,
458
  "step": 375
459
  },
460
  {
461
+ "epoch": 0.83,
462
+ "learning_rate": 9.586658501231604e-06,
463
+ "loss": 3.1182,
464
  "step": 380
465
  },
466
  {
467
+ "epoch": 0.84,
468
+ "learning_rate": 8.421999290393463e-06,
469
+ "loss": 3.1384,
470
  "step": 385
471
  },
472
  {
473
+ "epoch": 0.85,
474
+ "learning_rate": 7.328119007766703e-06,
475
+ "loss": 3.2064,
476
  "step": 390
477
  },
478
  {
479
+ "epoch": 0.86,
480
+ "learning_rate": 6.306304231055513e-06,
481
+ "loss": 3.1691,
482
  "step": 395
483
  },
484
  {
485
+ "epoch": 0.87,
486
+ "learning_rate": 5.357756777433396e-06,
487
+ "loss": 2.9847,
488
  "step": 400
489
  },
490
  {
491
+ "epoch": 0.88,
492
+ "learning_rate": 4.4835922900148836e-06,
493
+ "loss": 2.919,
494
  "step": 405
495
  },
496
  {
497
+ "epoch": 0.9,
498
+ "learning_rate": 3.6848389256806193e-06,
499
+ "loss": 3.0809,
500
  "step": 410
501
  },
502
  {
503
+ "epoch": 0.91,
504
+ "learning_rate": 2.9624361458012783e-06,
505
+ "loss": 3.1271,
506
  "step": 415
507
  },
508
  {
509
+ "epoch": 0.92,
510
+ "learning_rate": 2.317233611281141e-06,
511
+ "loss": 3.2683,
512
  "step": 420
513
  },
514
  {
515
+ "epoch": 0.93,
516
+ "learning_rate": 1.7499901832211207e-06,
517
+ "loss": 3.0606,
518
  "step": 425
519
  },
520
  {
521
+ "epoch": 0.94,
522
+ "learning_rate": 1.2613730303772807e-06,
523
+ "loss": 3.1985,
524
  "step": 430
525
  },
526
  {
527
+ "epoch": 0.95,
528
+ "learning_rate": 8.519568444637701e-07,
529
+ "loss": 3.0785,
530
  "step": 435
531
  },
532
  {
533
+ "epoch": 0.96,
534
+ "learning_rate": 5.222231642238413e-07,
535
+ "loss": 3.2775,
536
  "step": 440
537
  },
538
  {
539
+ "epoch": 0.97,
540
+ "learning_rate": 2.7255980906336113e-07,
541
+ "loss": 2.8361,
542
  "step": 445
543
  },
544
  {
545
+ "epoch": 0.98,
546
+ "learning_rate": 1.0326042291338553e-07,
547
+ "loss": 2.9579,
548
  "step": 450
549
  },
 
 
 
 
 
 
 
 
550
  {
551
  "epoch": 0.99,
552
+ "learning_rate": 1.4524128858071773e-08,
553
+ "loss": 3.1667,
554
  "step": 455
555
  },
556
  {
557
  "epoch": 1.0,
558
+ "eval_loss": 3.2645678520202637,
559
+ "eval_runtime": 27.8254,
560
+ "eval_samples_per_second": 22.102,
561
+ "eval_steps_per_second": 2.767,
562
+ "step": 458
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
563
  }
564
  ],
565
  "max_steps": 1832,
566
  "num_train_epochs": 4,
567
+ "total_flos": 478425710592000.0,
568
  "trial_name": null,
569
  "trial_params": null
570
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cc6b40508eb16387b324dae4260f7472a12be4c981b8e9ac6616b3a39ce1aa2
3
  size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:932ce4ce440f60f3536cbafb5b7a0426c27973f99d940115f605b96d124d42e2
3
  size 2671