cosql / trainer_state.json
tscholak's picture
add T5 model
dc0ccf7 unverified
raw
history blame
47.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 191.96969696969697,
"global_step": 1536,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"learning_rate": 0,
"loss": 3.6611,
"step": 1
},
{
"epoch": 0.48,
"learning_rate": 0,
"loss": 3.6226,
"step": 4
},
{
"epoch": 0.97,
"learning_rate": 0.0001,
"loss": 2.845,
"step": 8
},
{
"epoch": 1.48,
"learning_rate": 0.0001,
"loss": 1.6871,
"step": 12
},
{
"epoch": 1.97,
"learning_rate": 0.0001,
"loss": 0.6514,
"step": 16
},
{
"epoch": 2.48,
"learning_rate": 0.0001,
"loss": 0.4597,
"step": 20
},
{
"epoch": 2.97,
"learning_rate": 0.0001,
"loss": 0.346,
"step": 24
},
{
"epoch": 3.48,
"learning_rate": 0.0001,
"loss": 0.3093,
"step": 28
},
{
"epoch": 3.97,
"learning_rate": 0.0001,
"loss": 0.2487,
"step": 32
},
{
"epoch": 4.48,
"learning_rate": 0.0001,
"loss": 0.2344,
"step": 36
},
{
"epoch": 4.97,
"learning_rate": 0.0001,
"loss": 0.2048,
"step": 40
},
{
"epoch": 5.48,
"learning_rate": 0.0001,
"loss": 0.1839,
"step": 44
},
{
"epoch": 5.97,
"learning_rate": 0.0001,
"loss": 0.1714,
"step": 48
},
{
"epoch": 6.48,
"learning_rate": 0.0001,
"loss": 0.1553,
"step": 52
},
{
"epoch": 6.97,
"learning_rate": 0.0001,
"loss": 0.1323,
"step": 56
},
{
"epoch": 7.48,
"learning_rate": 0.0001,
"loss": 0.1243,
"step": 60
},
{
"epoch": 7.97,
"learning_rate": 0.0001,
"loss": 0.113,
"step": 64
},
{
"epoch": 7.97,
"eval_exact_match": 0.5004965243296922,
"eval_exec": 0.5292949354518371,
"eval_loss": 0.184326171875,
"eval_runtime": 2467.6178,
"eval_samples_per_second": 0.527,
"step": 64
},
{
"epoch": 8.48,
"learning_rate": 0.0001,
"loss": 0.1088,
"step": 68
},
{
"epoch": 8.97,
"learning_rate": 0.0001,
"loss": 0.1037,
"step": 72
},
{
"epoch": 9.48,
"learning_rate": 0.0001,
"loss": 0.1025,
"step": 76
},
{
"epoch": 9.97,
"learning_rate": 0.0001,
"loss": 0.0885,
"step": 80
},
{
"epoch": 10.48,
"learning_rate": 0.0001,
"loss": 0.0879,
"step": 84
},
{
"epoch": 10.97,
"learning_rate": 0.0001,
"loss": 0.0847,
"step": 88
},
{
"epoch": 11.48,
"learning_rate": 0.0001,
"loss": 0.0817,
"step": 92
},
{
"epoch": 11.97,
"learning_rate": 0.0001,
"loss": 0.0731,
"step": 96
},
{
"epoch": 12.48,
"learning_rate": 0.0001,
"loss": 0.0701,
"step": 100
},
{
"epoch": 12.97,
"learning_rate": 0.0001,
"loss": 0.0697,
"step": 104
},
{
"epoch": 13.48,
"learning_rate": 0.0001,
"loss": 0.0632,
"step": 108
},
{
"epoch": 13.97,
"learning_rate": 0.0001,
"loss": 0.0581,
"step": 112
},
{
"epoch": 14.48,
"learning_rate": 0.0001,
"loss": 0.0603,
"step": 116
},
{
"epoch": 14.97,
"learning_rate": 0.0001,
"loss": 0.0575,
"step": 120
},
{
"epoch": 15.48,
"learning_rate": 0.0001,
"loss": 0.0595,
"step": 124
},
{
"epoch": 15.97,
"learning_rate": 0.0001,
"loss": 0.0513,
"step": 128
},
{
"epoch": 15.97,
"eval_exact_match": 0.5431976166832175,
"eval_exec": 0.5809334657398213,
"eval_loss": 0.19482421875,
"eval_runtime": 2620.0652,
"eval_samples_per_second": 0.496,
"step": 128
},
{
"epoch": 16.48,
"learning_rate": 0.0001,
"loss": 0.0516,
"step": 132
},
{
"epoch": 16.97,
"learning_rate": 0.0001,
"loss": 0.0515,
"step": 136
},
{
"epoch": 17.48,
"learning_rate": 0.0001,
"loss": 0.0467,
"step": 140
},
{
"epoch": 17.97,
"learning_rate": 0.0001,
"loss": 0.0443,
"step": 144
},
{
"epoch": 18.48,
"learning_rate": 0.0001,
"loss": 0.0439,
"step": 148
},
{
"epoch": 18.97,
"learning_rate": 0.0001,
"loss": 0.0393,
"step": 152
},
{
"epoch": 19.48,
"learning_rate": 0.0001,
"loss": 0.0407,
"step": 156
},
{
"epoch": 19.97,
"learning_rate": 0.0001,
"loss": 0.0396,
"step": 160
},
{
"epoch": 20.48,
"learning_rate": 0.0001,
"loss": 0.0381,
"step": 164
},
{
"epoch": 20.97,
"learning_rate": 0.0001,
"loss": 0.0358,
"step": 168
},
{
"epoch": 21.48,
"learning_rate": 0.0001,
"loss": 0.0304,
"step": 172
},
{
"epoch": 21.97,
"learning_rate": 0.0001,
"loss": 0.0318,
"step": 176
},
{
"epoch": 22.48,
"learning_rate": 0.0001,
"loss": 0.0331,
"step": 180
},
{
"epoch": 22.97,
"learning_rate": 0.0001,
"loss": 0.0305,
"step": 184
},
{
"epoch": 23.48,
"learning_rate": 0.0001,
"loss": 0.0344,
"step": 188
},
{
"epoch": 23.97,
"learning_rate": 0.0001,
"loss": 0.0302,
"step": 192
},
{
"epoch": 23.97,
"eval_exact_match": 0.5431976166832175,
"eval_exec": 0.5789473684210527,
"eval_loss": 0.2381591796875,
"eval_runtime": 2596.5804,
"eval_samples_per_second": 0.501,
"step": 192
},
{
"epoch": 24.48,
"learning_rate": 0.0001,
"loss": 0.0315,
"step": 196
},
{
"epoch": 24.97,
"learning_rate": 0.0001,
"loss": 0.0253,
"step": 200
},
{
"epoch": 25.48,
"learning_rate": 0.0001,
"loss": 0.0277,
"step": 204
},
{
"epoch": 25.97,
"learning_rate": 0.0001,
"loss": 0.0252,
"step": 208
},
{
"epoch": 26.48,
"learning_rate": 0.0001,
"loss": 0.0266,
"step": 212
},
{
"epoch": 26.97,
"learning_rate": 0.0001,
"loss": 0.0243,
"step": 216
},
{
"epoch": 27.48,
"learning_rate": 0.0001,
"loss": 0.0224,
"step": 220
},
{
"epoch": 27.97,
"learning_rate": 0.0001,
"loss": 0.024,
"step": 224
},
{
"epoch": 28.48,
"learning_rate": 0.0001,
"loss": 0.022,
"step": 228
},
{
"epoch": 28.97,
"learning_rate": 0.0001,
"loss": 0.02,
"step": 232
},
{
"epoch": 29.48,
"learning_rate": 0.0001,
"loss": 0.0191,
"step": 236
},
{
"epoch": 29.97,
"learning_rate": 0.0001,
"loss": 0.0213,
"step": 240
},
{
"epoch": 30.48,
"learning_rate": 0.0001,
"loss": 0.0195,
"step": 244
},
{
"epoch": 30.97,
"learning_rate": 0.0001,
"loss": 0.0191,
"step": 248
},
{
"epoch": 31.48,
"learning_rate": 0.0001,
"loss": 0.0185,
"step": 252
},
{
"epoch": 31.97,
"learning_rate": 0.0001,
"loss": 0.0163,
"step": 256
},
{
"epoch": 31.97,
"eval_exact_match": 0.5521350546176763,
"eval_exec": 0.5858987090367428,
"eval_loss": 0.265625,
"eval_runtime": 2579.5067,
"eval_samples_per_second": 0.504,
"step": 256
},
{
"epoch": 32.48,
"learning_rate": 0.0001,
"loss": 0.0162,
"step": 260
},
{
"epoch": 32.97,
"learning_rate": 0.0001,
"loss": 0.0164,
"step": 264
},
{
"epoch": 33.48,
"learning_rate": 0.0001,
"loss": 0.0172,
"step": 268
},
{
"epoch": 33.97,
"learning_rate": 0.0001,
"loss": 0.0157,
"step": 272
},
{
"epoch": 34.48,
"learning_rate": 0.0001,
"loss": 0.0175,
"step": 276
},
{
"epoch": 34.97,
"learning_rate": 0.0001,
"loss": 0.0142,
"step": 280
},
{
"epoch": 35.48,
"learning_rate": 0.0001,
"loss": 0.0147,
"step": 284
},
{
"epoch": 35.97,
"learning_rate": 0.0001,
"loss": 0.0141,
"step": 288
},
{
"epoch": 36.48,
"learning_rate": 0.0001,
"loss": 0.0119,
"step": 292
},
{
"epoch": 36.97,
"learning_rate": 0.0001,
"loss": 0.0136,
"step": 296
},
{
"epoch": 37.48,
"learning_rate": 0.0001,
"loss": 0.0152,
"step": 300
},
{
"epoch": 37.97,
"learning_rate": 0.0001,
"loss": 0.0111,
"step": 304
},
{
"epoch": 38.48,
"learning_rate": 0.0001,
"loss": 0.0132,
"step": 308
},
{
"epoch": 38.97,
"learning_rate": 0.0001,
"loss": 0.0121,
"step": 312
},
{
"epoch": 39.48,
"learning_rate": 0.0001,
"loss": 0.0125,
"step": 316
},
{
"epoch": 39.97,
"learning_rate": 0.0001,
"loss": 0.0125,
"step": 320
},
{
"epoch": 39.97,
"eval_exact_match": 0.5551142005958292,
"eval_exec": 0.5779543197616683,
"eval_loss": 0.29541015625,
"eval_runtime": 2642.6168,
"eval_samples_per_second": 0.492,
"step": 320
},
{
"epoch": 40.48,
"learning_rate": 0.0001,
"loss": 0.0107,
"step": 324
},
{
"epoch": 40.97,
"learning_rate": 0.0001,
"loss": 0.0109,
"step": 328
},
{
"epoch": 41.48,
"learning_rate": 0.0001,
"loss": 0.0099,
"step": 332
},
{
"epoch": 41.97,
"learning_rate": 0.0001,
"loss": 0.0103,
"step": 336
},
{
"epoch": 42.48,
"learning_rate": 0.0001,
"loss": 0.0114,
"step": 340
},
{
"epoch": 42.97,
"learning_rate": 0.0001,
"loss": 0.0094,
"step": 344
},
{
"epoch": 43.48,
"learning_rate": 0.0001,
"loss": 0.0086,
"step": 348
},
{
"epoch": 43.97,
"learning_rate": 0.0001,
"loss": 0.009,
"step": 352
},
{
"epoch": 44.48,
"learning_rate": 0.0001,
"loss": 0.0108,
"step": 356
},
{
"epoch": 44.97,
"learning_rate": 0.0001,
"loss": 0.0104,
"step": 360
},
{
"epoch": 45.48,
"learning_rate": 0.0001,
"loss": 0.0086,
"step": 364
},
{
"epoch": 45.97,
"learning_rate": 0.0001,
"loss": 0.0094,
"step": 368
},
{
"epoch": 46.48,
"learning_rate": 0.0001,
"loss": 0.0088,
"step": 372
},
{
"epoch": 46.97,
"learning_rate": 0.0001,
"loss": 0.0091,
"step": 376
},
{
"epoch": 47.48,
"learning_rate": 0.0001,
"loss": 0.0071,
"step": 380
},
{
"epoch": 47.97,
"learning_rate": 0.0001,
"loss": 0.0075,
"step": 384
},
{
"epoch": 47.97,
"eval_exact_match": 0.5680238331678252,
"eval_exec": 0.5918570009930486,
"eval_loss": 0.32373046875,
"eval_runtime": 2755.8621,
"eval_samples_per_second": 0.472,
"step": 384
},
{
"epoch": 48.48,
"learning_rate": 0.0001,
"loss": 0.0075,
"step": 388
},
{
"epoch": 48.97,
"learning_rate": 0.0001,
"loss": 0.0063,
"step": 392
},
{
"epoch": 49.48,
"learning_rate": 0.0001,
"loss": 0.0067,
"step": 396
},
{
"epoch": 49.97,
"learning_rate": 0.0001,
"loss": 0.0067,
"step": 400
},
{
"epoch": 50.48,
"learning_rate": 0.0001,
"loss": 0.007,
"step": 404
},
{
"epoch": 50.97,
"learning_rate": 0.0001,
"loss": 0.0057,
"step": 408
},
{
"epoch": 51.48,
"learning_rate": 0.0001,
"loss": 0.0074,
"step": 412
},
{
"epoch": 51.97,
"learning_rate": 0.0001,
"loss": 0.0064,
"step": 416
},
{
"epoch": 52.48,
"learning_rate": 0.0001,
"loss": 0.0059,
"step": 420
},
{
"epoch": 52.97,
"learning_rate": 0.0001,
"loss": 0.0067,
"step": 424
},
{
"epoch": 53.48,
"learning_rate": 0.0001,
"loss": 0.0057,
"step": 428
},
{
"epoch": 53.97,
"learning_rate": 0.0001,
"loss": 0.0056,
"step": 432
},
{
"epoch": 54.48,
"learning_rate": 0.0001,
"loss": 0.0058,
"step": 436
},
{
"epoch": 54.97,
"learning_rate": 0.0001,
"loss": 0.0057,
"step": 440
},
{
"epoch": 55.48,
"learning_rate": 0.0001,
"loss": 0.0055,
"step": 444
},
{
"epoch": 55.97,
"learning_rate": 0.0001,
"loss": 0.0053,
"step": 448
},
{
"epoch": 55.97,
"eval_exact_match": 0.5729890764647467,
"eval_exec": 0.5948361469712016,
"eval_loss": 0.349853515625,
"eval_runtime": 2795.6483,
"eval_samples_per_second": 0.465,
"step": 448
},
{
"epoch": 56.48,
"learning_rate": 0.0001,
"loss": 0.0055,
"step": 452
},
{
"epoch": 56.97,
"learning_rate": 0.0001,
"loss": 0.0049,
"step": 456
},
{
"epoch": 57.48,
"learning_rate": 0.0001,
"loss": 0.0055,
"step": 460
},
{
"epoch": 57.97,
"learning_rate": 0.0001,
"loss": 0.0049,
"step": 464
},
{
"epoch": 58.48,
"learning_rate": 0.0001,
"loss": 0.0056,
"step": 468
},
{
"epoch": 58.97,
"learning_rate": 0.0001,
"loss": 0.0048,
"step": 472
},
{
"epoch": 59.48,
"learning_rate": 0.0001,
"loss": 0.0052,
"step": 476
},
{
"epoch": 59.97,
"learning_rate": 0.0001,
"loss": 0.0049,
"step": 480
},
{
"epoch": 60.48,
"learning_rate": 0.0001,
"loss": 0.0053,
"step": 484
},
{
"epoch": 60.97,
"learning_rate": 0.0001,
"loss": 0.0047,
"step": 488
},
{
"epoch": 61.48,
"learning_rate": 0.0001,
"loss": 0.0056,
"step": 492
},
{
"epoch": 61.97,
"learning_rate": 0.0001,
"loss": 0.0044,
"step": 496
},
{
"epoch": 62.48,
"learning_rate": 0.0001,
"loss": 0.0039,
"step": 500
},
{
"epoch": 62.97,
"learning_rate": 0.0001,
"loss": 0.0047,
"step": 504
},
{
"epoch": 63.48,
"learning_rate": 0.0001,
"loss": 0.0048,
"step": 508
},
{
"epoch": 63.97,
"learning_rate": 0.0001,
"loss": 0.0043,
"step": 512
},
{
"epoch": 63.97,
"eval_exact_match": 0.5719960278053625,
"eval_exec": 0.5918570009930486,
"eval_loss": 0.3505859375,
"eval_runtime": 2583.794,
"eval_samples_per_second": 0.503,
"step": 512
},
{
"epoch": 64.48,
"learning_rate": 0.0001,
"loss": 0.0046,
"step": 516
},
{
"epoch": 64.97,
"learning_rate": 0.0001,
"loss": 0.0043,
"step": 520
},
{
"epoch": 65.48,
"learning_rate": 0.0001,
"loss": 0.0053,
"step": 524
},
{
"epoch": 65.97,
"learning_rate": 0.0001,
"loss": 0.0036,
"step": 528
},
{
"epoch": 66.48,
"learning_rate": 0.0001,
"loss": 0.0043,
"step": 532
},
{
"epoch": 66.97,
"learning_rate": 0.0001,
"loss": 0.0046,
"step": 536
},
{
"epoch": 67.48,
"learning_rate": 0.0001,
"loss": 0.0043,
"step": 540
},
{
"epoch": 67.97,
"learning_rate": 0.0001,
"loss": 0.0042,
"step": 544
},
{
"epoch": 68.48,
"learning_rate": 0.0001,
"loss": 0.0034,
"step": 548
},
{
"epoch": 68.97,
"learning_rate": 0.0001,
"loss": 0.0033,
"step": 552
},
{
"epoch": 69.48,
"learning_rate": 0.0001,
"loss": 0.005,
"step": 556
},
{
"epoch": 69.97,
"learning_rate": 0.0001,
"loss": 0.0032,
"step": 560
},
{
"epoch": 70.48,
"learning_rate": 0.0001,
"loss": 0.0032,
"step": 564
},
{
"epoch": 70.97,
"learning_rate": 0.0001,
"loss": 0.0041,
"step": 568
},
{
"epoch": 71.48,
"learning_rate": 0.0001,
"loss": 0.0034,
"step": 572
},
{
"epoch": 71.97,
"learning_rate": 0.0001,
"loss": 0.0044,
"step": 576
},
{
"epoch": 71.97,
"eval_exact_match": 0.5680238331678252,
"eval_exec": 0.5968222442899702,
"eval_loss": 0.38623046875,
"eval_runtime": 2715.8229,
"eval_samples_per_second": 0.479,
"step": 576
},
{
"epoch": 72.48,
"learning_rate": 0.0001,
"loss": 0.0031,
"step": 580
},
{
"epoch": 72.97,
"learning_rate": 0.0001,
"loss": 0.0037,
"step": 584
},
{
"epoch": 73.48,
"learning_rate": 0.0001,
"loss": 0.0036,
"step": 588
},
{
"epoch": 73.97,
"learning_rate": 0.0001,
"loss": 0.0037,
"step": 592
},
{
"epoch": 74.48,
"learning_rate": 0.0001,
"loss": 0.0032,
"step": 596
},
{
"epoch": 74.97,
"learning_rate": 0.0001,
"loss": 0.0037,
"step": 600
},
{
"epoch": 75.48,
"learning_rate": 0.0001,
"loss": 0.0032,
"step": 604
},
{
"epoch": 75.97,
"learning_rate": 0.0001,
"loss": 0.0033,
"step": 608
},
{
"epoch": 76.48,
"learning_rate": 0.0001,
"loss": 0.0026,
"step": 612
},
{
"epoch": 76.97,
"learning_rate": 0.0001,
"loss": 0.0032,
"step": 616
},
{
"epoch": 77.48,
"learning_rate": 0.0001,
"loss": 0.003,
"step": 620
},
{
"epoch": 77.97,
"learning_rate": 0.0001,
"loss": 0.0024,
"step": 624
},
{
"epoch": 78.48,
"learning_rate": 0.0001,
"loss": 0.0028,
"step": 628
},
{
"epoch": 78.97,
"learning_rate": 0.0001,
"loss": 0.0031,
"step": 632
},
{
"epoch": 79.48,
"learning_rate": 0.0001,
"loss": 0.0028,
"step": 636
},
{
"epoch": 79.97,
"learning_rate": 0.0001,
"loss": 0.0035,
"step": 640
},
{
"epoch": 79.97,
"eval_exact_match": 0.564051638530288,
"eval_exec": 0.5878848063555114,
"eval_loss": 0.39404296875,
"eval_runtime": 2640.1046,
"eval_samples_per_second": 0.492,
"step": 640
},
{
"epoch": 80.48,
"learning_rate": 0.0001,
"loss": 0.0027,
"step": 644
},
{
"epoch": 80.97,
"learning_rate": 0.0001,
"loss": 0.003,
"step": 648
},
{
"epoch": 81.48,
"learning_rate": 0.0001,
"loss": 0.003,
"step": 652
},
{
"epoch": 81.97,
"learning_rate": 0.0001,
"loss": 0.0022,
"step": 656
},
{
"epoch": 82.48,
"learning_rate": 0.0001,
"loss": 0.0031,
"step": 660
},
{
"epoch": 82.97,
"learning_rate": 0.0001,
"loss": 0.0031,
"step": 664
},
{
"epoch": 83.48,
"learning_rate": 0.0001,
"loss": 0.0031,
"step": 668
},
{
"epoch": 83.97,
"learning_rate": 0.0001,
"loss": 0.0025,
"step": 672
},
{
"epoch": 84.48,
"learning_rate": 0.0001,
"loss": 0.0034,
"step": 676
},
{
"epoch": 84.97,
"learning_rate": 0.0001,
"loss": 0.0026,
"step": 680
},
{
"epoch": 85.48,
"learning_rate": 0.0001,
"loss": 0.003,
"step": 684
},
{
"epoch": 85.97,
"learning_rate": 0.0001,
"loss": 0.0027,
"step": 688
},
{
"epoch": 86.48,
"learning_rate": 0.0001,
"loss": 0.0029,
"step": 692
},
{
"epoch": 86.97,
"learning_rate": 0.0001,
"loss": 0.003,
"step": 696
},
{
"epoch": 87.48,
"learning_rate": 0.0001,
"loss": 0.0031,
"step": 700
},
{
"epoch": 87.97,
"learning_rate": 0.0001,
"loss": 0.0022,
"step": 704
},
{
"epoch": 87.97,
"eval_exact_match": 0.5700099304865939,
"eval_exec": 0.5938430983118173,
"eval_loss": 0.399169921875,
"eval_runtime": 2706.8779,
"eval_samples_per_second": 0.48,
"step": 704
},
{
"epoch": 88.48,
"learning_rate": 0.0001,
"loss": 0.0033,
"step": 708
},
{
"epoch": 88.97,
"learning_rate": 0.0001,
"loss": 0.0026,
"step": 712
},
{
"epoch": 89.48,
"learning_rate": 0.0001,
"loss": 0.0023,
"step": 716
},
{
"epoch": 89.97,
"learning_rate": 0.0001,
"loss": 0.0022,
"step": 720
},
{
"epoch": 90.48,
"learning_rate": 0.0001,
"loss": 0.0023,
"step": 724
},
{
"epoch": 90.97,
"learning_rate": 0.0001,
"loss": 0.0029,
"step": 728
},
{
"epoch": 91.48,
"learning_rate": 0.0001,
"loss": 0.0029,
"step": 732
},
{
"epoch": 91.97,
"learning_rate": 0.0001,
"loss": 0.0028,
"step": 736
},
{
"epoch": 92.48,
"learning_rate": 0.0001,
"loss": 0.0025,
"step": 740
},
{
"epoch": 92.97,
"learning_rate": 0.0001,
"loss": 0.0024,
"step": 744
},
{
"epoch": 93.48,
"learning_rate": 0.0001,
"loss": 0.0035,
"step": 748
},
{
"epoch": 93.97,
"learning_rate": 0.0001,
"loss": 0.0021,
"step": 752
},
{
"epoch": 94.48,
"learning_rate": 0.0001,
"loss": 0.0021,
"step": 756
},
{
"epoch": 94.97,
"learning_rate": 0.0001,
"loss": 0.0027,
"step": 760
},
{
"epoch": 95.48,
"learning_rate": 0.0001,
"loss": 0.002,
"step": 764
},
{
"epoch": 95.97,
"learning_rate": 0.0001,
"loss": 0.002,
"step": 768
},
{
"epoch": 95.97,
"eval_exact_match": 0.5630585898709036,
"eval_exec": 0.5898709036742801,
"eval_loss": 0.38916015625,
"eval_runtime": 2439.1984,
"eval_samples_per_second": 0.533,
"step": 768
},
{
"epoch": 96.48,
"learning_rate": 0.0001,
"loss": 0.0024,
"step": 772
},
{
"epoch": 96.97,
"learning_rate": 0.0001,
"loss": 0.0019,
"step": 776
},
{
"epoch": 97.48,
"learning_rate": 0.0001,
"loss": 0.0025,
"step": 780
},
{
"epoch": 97.97,
"learning_rate": 0.0001,
"loss": 0.0018,
"step": 784
},
{
"epoch": 98.48,
"learning_rate": 0.0001,
"loss": 0.0024,
"step": 788
},
{
"epoch": 98.97,
"learning_rate": 0.0001,
"loss": 0.0026,
"step": 792
},
{
"epoch": 99.48,
"learning_rate": 0.0001,
"loss": 0.0029,
"step": 796
},
{
"epoch": 99.97,
"learning_rate": 0.0001,
"loss": 0.0028,
"step": 800
},
{
"epoch": 100.48,
"learning_rate": 0.0001,
"loss": 0.0021,
"step": 804
},
{
"epoch": 100.97,
"learning_rate": 0.0001,
"loss": 0.0027,
"step": 808
},
{
"epoch": 101.48,
"learning_rate": 0.0001,
"loss": 0.0029,
"step": 812
},
{
"epoch": 101.97,
"learning_rate": 0.0001,
"loss": 0.0021,
"step": 816
},
{
"epoch": 102.48,
"learning_rate": 0.0001,
"loss": 0.0024,
"step": 820
},
{
"epoch": 102.97,
"learning_rate": 0.0001,
"loss": 0.0033,
"step": 824
},
{
"epoch": 103.48,
"learning_rate": 0.0001,
"loss": 0.0017,
"step": 828
},
{
"epoch": 103.97,
"learning_rate": 0.0001,
"loss": 0.0024,
"step": 832
},
{
"epoch": 103.97,
"eval_exact_match": 0.5620655412115194,
"eval_exec": 0.6017874875868917,
"eval_loss": 0.4052734375,
"eval_runtime": 2462.89,
"eval_samples_per_second": 0.528,
"step": 832
},
{
"epoch": 104.48,
"learning_rate": 0.0001,
"loss": 0.0026,
"step": 836
},
{
"epoch": 104.97,
"learning_rate": 0.0001,
"loss": 0.0023,
"step": 840
},
{
"epoch": 105.48,
"learning_rate": 0.0001,
"loss": 0.0018,
"step": 844
},
{
"epoch": 105.97,
"learning_rate": 0.0001,
"loss": 0.0021,
"step": 848
},
{
"epoch": 106.48,
"learning_rate": 0.0001,
"loss": 0.0021,
"step": 852
},
{
"epoch": 106.97,
"learning_rate": 0.0001,
"loss": 0.0019,
"step": 856
},
{
"epoch": 107.48,
"learning_rate": 0.0001,
"loss": 0.0021,
"step": 860
},
{
"epoch": 107.97,
"learning_rate": 0.0001,
"loss": 0.002,
"step": 864
},
{
"epoch": 108.48,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 868
},
{
"epoch": 108.97,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 872
},
{
"epoch": 109.48,
"learning_rate": 0.0001,
"loss": 0.0017,
"step": 876
},
{
"epoch": 109.97,
"learning_rate": 0.0001,
"loss": 0.0023,
"step": 880
},
{
"epoch": 110.48,
"learning_rate": 0.0001,
"loss": 0.0025,
"step": 884
},
{
"epoch": 110.97,
"learning_rate": 0.0001,
"loss": 0.0023,
"step": 888
},
{
"epoch": 111.48,
"learning_rate": 0.0001,
"loss": 0.0019,
"step": 892
},
{
"epoch": 111.97,
"learning_rate": 0.0001,
"loss": 0.0016,
"step": 896
},
{
"epoch": 111.97,
"eval_exact_match": 0.5690168818272096,
"eval_exec": 0.5968222442899702,
"eval_loss": 0.406005859375,
"eval_runtime": 2444.0389,
"eval_samples_per_second": 0.532,
"step": 896
},
{
"epoch": 112.48,
"learning_rate": 0.0001,
"loss": 0.0018,
"step": 900
},
{
"epoch": 112.97,
"learning_rate": 0.0001,
"loss": 0.0021,
"step": 904
},
{
"epoch": 113.48,
"learning_rate": 0.0001,
"loss": 0.0025,
"step": 908
},
{
"epoch": 113.97,
"learning_rate": 0.0001,
"loss": 0.0015,
"step": 912
},
{
"epoch": 114.48,
"learning_rate": 0.0001,
"loss": 0.0021,
"step": 916
},
{
"epoch": 114.97,
"learning_rate": 0.0001,
"loss": 0.0018,
"step": 920
},
{
"epoch": 115.48,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 924
},
{
"epoch": 115.97,
"learning_rate": 0.0001,
"loss": 0.0019,
"step": 928
},
{
"epoch": 116.48,
"learning_rate": 0.0001,
"loss": 0.002,
"step": 932
},
{
"epoch": 116.97,
"learning_rate": 0.0001,
"loss": 0.0023,
"step": 936
},
{
"epoch": 117.48,
"learning_rate": 0.0001,
"loss": 0.002,
"step": 940
},
{
"epoch": 117.97,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 944
},
{
"epoch": 118.48,
"learning_rate": 0.0001,
"loss": 0.0017,
"step": 948
},
{
"epoch": 118.97,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 952
},
{
"epoch": 119.48,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 956
},
{
"epoch": 119.97,
"learning_rate": 0.0001,
"loss": 0.002,
"step": 960
},
{
"epoch": 119.97,
"eval_exact_match": 0.5680238331678252,
"eval_exec": 0.5918570009930486,
"eval_loss": 0.418701171875,
"eval_runtime": 2472.2191,
"eval_samples_per_second": 0.526,
"step": 960
},
{
"epoch": 120.48,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 964
},
{
"epoch": 120.97,
"learning_rate": 0.0001,
"loss": 0.0017,
"step": 968
},
{
"epoch": 121.48,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 972
},
{
"epoch": 121.97,
"learning_rate": 0.0001,
"loss": 0.0015,
"step": 976
},
{
"epoch": 122.48,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 980
},
{
"epoch": 122.97,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 984
},
{
"epoch": 123.48,
"learning_rate": 0.0001,
"loss": 0.0016,
"step": 988
},
{
"epoch": 123.97,
"learning_rate": 0.0001,
"loss": 0.0015,
"step": 992
},
{
"epoch": 124.48,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 996
},
{
"epoch": 124.97,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1000
},
{
"epoch": 125.48,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1004
},
{
"epoch": 125.97,
"learning_rate": 0.0001,
"loss": 0.0016,
"step": 1008
},
{
"epoch": 126.48,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1012
},
{
"epoch": 126.97,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1016
},
{
"epoch": 127.48,
"learning_rate": 0.0001,
"loss": 0.0017,
"step": 1020
},
{
"epoch": 127.97,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1024
},
{
"epoch": 127.97,
"eval_exact_match": 0.5680238331678252,
"eval_exec": 0.5938430983118173,
"eval_loss": 0.42626953125,
"eval_runtime": 2538.4241,
"eval_samples_per_second": 0.512,
"step": 1024
},
{
"epoch": 128.48,
"learning_rate": 0.0001,
"loss": 0.0016,
"step": 1028
},
{
"epoch": 128.97,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 1032
},
{
"epoch": 129.48,
"learning_rate": 0.0001,
"loss": 0.0019,
"step": 1036
},
{
"epoch": 129.97,
"learning_rate": 0.0001,
"loss": 0.0018,
"step": 1040
},
{
"epoch": 130.48,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 1044
},
{
"epoch": 130.97,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1048
},
{
"epoch": 131.48,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1052
},
{
"epoch": 131.97,
"learning_rate": 0.0001,
"loss": 0.0008,
"step": 1056
},
{
"epoch": 132.48,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1060
},
{
"epoch": 132.97,
"learning_rate": 0.0001,
"loss": 0.0016,
"step": 1064
},
{
"epoch": 133.48,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 1068
},
{
"epoch": 133.97,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1072
},
{
"epoch": 134.48,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1076
},
{
"epoch": 134.97,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1080
},
{
"epoch": 135.48,
"learning_rate": 0.0001,
"loss": 0.0018,
"step": 1084
},
{
"epoch": 135.97,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1088
},
{
"epoch": 135.97,
"eval_exact_match": 0.5620655412115194,
"eval_exec": 0.5908639523336644,
"eval_loss": 0.4482421875,
"eval_runtime": 2432.8996,
"eval_samples_per_second": 0.534,
"step": 1088
},
{
"epoch": 136.48,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1092
},
{
"epoch": 136.97,
"learning_rate": 0.0001,
"loss": 0.0019,
"step": 1096
},
{
"epoch": 137.48,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1100
},
{
"epoch": 137.97,
"learning_rate": 0.0001,
"loss": 0.0015,
"step": 1104
},
{
"epoch": 138.48,
"learning_rate": 0.0001,
"loss": 0.0016,
"step": 1108
},
{
"epoch": 138.97,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1112
},
{
"epoch": 139.48,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 1116
},
{
"epoch": 139.97,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1120
},
{
"epoch": 140.48,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1124
},
{
"epoch": 140.97,
"learning_rate": 0.0001,
"loss": 0.0015,
"step": 1128
},
{
"epoch": 141.48,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1132
},
{
"epoch": 141.97,
"learning_rate": 0.0001,
"loss": 0.0016,
"step": 1136
},
{
"epoch": 142.48,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1140
},
{
"epoch": 142.97,
"learning_rate": 0.0001,
"loss": 0.0016,
"step": 1144
},
{
"epoch": 143.48,
"learning_rate": 0.0001,
"loss": 0.0019,
"step": 1148
},
{
"epoch": 143.97,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1152
},
{
"epoch": 143.97,
"eval_exact_match": 0.5580933465739821,
"eval_exec": 0.5888778550148958,
"eval_loss": 0.448486328125,
"eval_runtime": 2855.924,
"eval_samples_per_second": 0.455,
"step": 1152
},
{
"epoch": 144.48,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 1156
},
{
"epoch": 144.97,
"learning_rate": 0.0001,
"loss": 0.0017,
"step": 1160
},
{
"epoch": 145.48,
"learning_rate": 0.0001,
"loss": 0.0015,
"step": 1164
},
{
"epoch": 145.97,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1168
},
{
"epoch": 146.48,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1172
},
{
"epoch": 146.97,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1176
},
{
"epoch": 147.48,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 1180
},
{
"epoch": 147.97,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1184
},
{
"epoch": 148.48,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1188
},
{
"epoch": 148.97,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1192
},
{
"epoch": 149.48,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1196
},
{
"epoch": 149.97,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1200
},
{
"epoch": 150.48,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1204
},
{
"epoch": 150.97,
"learning_rate": 0.0001,
"loss": 0.0009,
"step": 1208
},
{
"epoch": 151.48,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1212
},
{
"epoch": 151.97,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1216
},
{
"epoch": 151.97,
"eval_exact_match": 0.5571002979145978,
"eval_exec": 0.5928500496524329,
"eval_loss": 0.458984375,
"eval_runtime": 2831.5441,
"eval_samples_per_second": 0.459,
"step": 1216
},
{
"epoch": 152.48,
"learning_rate": 0.0001,
"loss": 0.0008,
"step": 1220
},
{
"epoch": 152.97,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1224
},
{
"epoch": 153.48,
"learning_rate": 0.0001,
"loss": 0.0009,
"step": 1228
},
{
"epoch": 153.97,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1232
},
{
"epoch": 154.48,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1236
},
{
"epoch": 154.97,
"learning_rate": 0.0001,
"loss": 0.0015,
"step": 1240
},
{
"epoch": 155.48,
"learning_rate": 0.0001,
"loss": 0.0009,
"step": 1244
},
{
"epoch": 155.97,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1248
},
{
"epoch": 156.48,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1252
},
{
"epoch": 156.97,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 1256
},
{
"epoch": 157.48,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1260
},
{
"epoch": 157.97,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1264
},
{
"epoch": 158.48,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 1268
},
{
"epoch": 158.97,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1272
},
{
"epoch": 159.48,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1276
},
{
"epoch": 159.97,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1280
},
{
"epoch": 159.97,
"eval_exact_match": 0.5561072492552135,
"eval_exec": 0.5888778550148958,
"eval_loss": 0.45849609375,
"eval_runtime": 2610.2922,
"eval_samples_per_second": 0.498,
"step": 1280
},
{
"epoch": 160.48,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1284
},
{
"epoch": 160.97,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1288
},
{
"epoch": 161.48,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1292
},
{
"epoch": 161.97,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 1296
},
{
"epoch": 162.48,
"learning_rate": 0.0001,
"loss": 0.0015,
"step": 1300
},
{
"epoch": 162.97,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1304
},
{
"epoch": 163.48,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 1308
},
{
"epoch": 163.97,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1312
},
{
"epoch": 164.48,
"learning_rate": 0.0001,
"loss": 0.0028,
"step": 1316
},
{
"epoch": 164.97,
"learning_rate": 0.0001,
"loss": 0.0027,
"step": 1320
},
{
"epoch": 165.48,
"learning_rate": 0.0001,
"loss": 0.0022,
"step": 1324
},
{
"epoch": 165.97,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1328
},
{
"epoch": 166.48,
"learning_rate": 0.0001,
"loss": 0.0016,
"step": 1332
},
{
"epoch": 166.97,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1336
},
{
"epoch": 167.48,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1340
},
{
"epoch": 167.97,
"learning_rate": 0.0001,
"loss": 0.0008,
"step": 1344
},
{
"epoch": 167.97,
"eval_exact_match": 0.5630585898709036,
"eval_exec": 0.6037735849056604,
"eval_loss": 0.431396484375,
"eval_runtime": 2582.959,
"eval_samples_per_second": 0.503,
"step": 1344
},
{
"epoch": 168.48,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1348
},
{
"epoch": 168.97,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 1352
},
{
"epoch": 169.48,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 1356
},
{
"epoch": 169.97,
"learning_rate": 0.0001,
"loss": 0.0009,
"step": 1360
},
{
"epoch": 170.48,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 1364
},
{
"epoch": 170.97,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1368
},
{
"epoch": 171.48,
"learning_rate": 0.0001,
"loss": 0.0006,
"step": 1372
},
{
"epoch": 171.97,
"learning_rate": 0.0001,
"loss": 0.0009,
"step": 1376
},
{
"epoch": 172.48,
"learning_rate": 0.0001,
"loss": 0.0007,
"step": 1380
},
{
"epoch": 172.97,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1384
},
{
"epoch": 173.48,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1388
},
{
"epoch": 173.97,
"learning_rate": 0.0001,
"loss": 0.0009,
"step": 1392
},
{
"epoch": 174.48,
"learning_rate": 0.0001,
"loss": 0.0009,
"step": 1396
},
{
"epoch": 174.97,
"learning_rate": 0.0001,
"loss": 0.0009,
"step": 1400
},
{
"epoch": 175.48,
"learning_rate": 0.0001,
"loss": 0.0008,
"step": 1404
},
{
"epoch": 175.97,
"learning_rate": 0.0001,
"loss": 0.0008,
"step": 1408
},
{
"epoch": 175.97,
"eval_exact_match": 0.5610724925521351,
"eval_exec": 0.5978152929493545,
"eval_loss": 0.4541015625,
"eval_runtime": 2572.262,
"eval_samples_per_second": 0.505,
"step": 1408
},
{
"epoch": 176.48,
"learning_rate": 0.0001,
"loss": 0.0008,
"step": 1412
},
{
"epoch": 176.97,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1416
},
{
"epoch": 177.48,
"learning_rate": 0.0001,
"loss": 0.0009,
"step": 1420
},
{
"epoch": 177.97,
"learning_rate": 0.0001,
"loss": 0.0008,
"step": 1424
},
{
"epoch": 178.48,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1428
},
{
"epoch": 178.97,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1432
},
{
"epoch": 179.48,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1436
},
{
"epoch": 179.97,
"learning_rate": 0.0001,
"loss": 0.0007,
"step": 1440
},
{
"epoch": 180.48,
"learning_rate": 0.0001,
"loss": 0.0007,
"step": 1444
},
{
"epoch": 180.97,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1448
},
{
"epoch": 181.48,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1452
},
{
"epoch": 181.97,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1456
},
{
"epoch": 182.48,
"learning_rate": 0.0001,
"loss": 0.0007,
"step": 1460
},
{
"epoch": 182.97,
"learning_rate": 0.0001,
"loss": 0.0009,
"step": 1464
},
{
"epoch": 183.48,
"learning_rate": 0.0001,
"loss": 0.0009,
"step": 1468
},
{
"epoch": 183.97,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1472
},
{
"epoch": 183.97,
"eval_exact_match": 0.564051638530288,
"eval_exec": 0.5958291956305859,
"eval_loss": 0.468505859375,
"eval_runtime": 2664.1072,
"eval_samples_per_second": 0.488,
"step": 1472
},
{
"epoch": 184.48,
"learning_rate": 0.0001,
"loss": 0.0013,
"step": 1476
},
{
"epoch": 184.97,
"learning_rate": 0.0001,
"loss": 0.0008,
"step": 1480
},
{
"epoch": 185.48,
"learning_rate": 0.0001,
"loss": 0.0009,
"step": 1484
},
{
"epoch": 185.97,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1488
},
{
"epoch": 186.48,
"learning_rate": 0.0001,
"loss": 0.0005,
"step": 1492
},
{
"epoch": 186.97,
"learning_rate": 0.0001,
"loss": 0.0016,
"step": 1496
},
{
"epoch": 187.48,
"learning_rate": 0.0001,
"loss": 0.0011,
"step": 1500
},
{
"epoch": 187.97,
"learning_rate": 0.0001,
"loss": 0.0007,
"step": 1504
},
{
"epoch": 188.48,
"learning_rate": 0.0001,
"loss": 0.0017,
"step": 1508
},
{
"epoch": 188.97,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1512
},
{
"epoch": 189.48,
"learning_rate": 0.0001,
"loss": 0.0012,
"step": 1516
},
{
"epoch": 189.97,
"learning_rate": 0.0001,
"loss": 0.0014,
"step": 1520
},
{
"epoch": 190.48,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1524
},
{
"epoch": 190.97,
"learning_rate": 0.0001,
"loss": 0.0008,
"step": 1528
},
{
"epoch": 191.48,
"learning_rate": 0.0001,
"loss": 0.0008,
"step": 1532
},
{
"epoch": 191.97,
"learning_rate": 0.0001,
"loss": 0.001,
"step": 1536
},
{
"epoch": 191.97,
"eval_exact_match": 0.5710029791459782,
"eval_exec": 0.5998013902681232,
"eval_loss": 0.466552734375,
"eval_runtime": 2439.0394,
"eval_samples_per_second": 0.533,
"step": 1536
}
],
"max_steps": 24576,
"num_train_epochs": 3072,
"total_flos": 6277475729408.0,
"trial_name": null,
"trial_params": null
}