ben81828's picture
Training in progress, step 450
260589e verified
raw
history blame
24.8 kB
{"current_steps": 5, "total_steps": 1200, "loss": 1.8836, "lr": 8.333333333333334e-06, "epoch": 0.0014771048744460858, "percentage": 0.42, "elapsed_time": "0:02:44", "remaining_time": "10:55:34", "throughput": 317.71, "total_tokens": 52288}
{"current_steps": 5, "total_steps": 6770, "loss": 1.8844, "lr": 1.4749262536873157e-06, "epoch": 0.0014771048744460858, "percentage": 0.07, "elapsed_time": "0:02:46", "remaining_time": "2 days, 14:44:41", "throughput": 313.2, "total_tokens": 52288}
{"current_steps": 10, "total_steps": 6770, "loss": 1.9494, "lr": 2.9498525073746313e-06, "epoch": 0.0029542097488921715, "percentage": 0.15, "elapsed_time": "0:04:15", "remaining_time": "1 day, 23:59:44", "throughput": 406.79, "total_tokens": 103976}
{"current_steps": 15, "total_steps": 6770, "loss": 1.995, "lr": 4.424778761061947e-06, "epoch": 0.004431314623338257, "percentage": 0.22, "elapsed_time": "0:05:43", "remaining_time": "1 day, 18:55:19", "throughput": 453.37, "total_tokens": 155560}
{"current_steps": 20, "total_steps": 6770, "loss": 2.0327, "lr": 5.899705014749263e-06, "epoch": 0.005908419497784343, "percentage": 0.3, "elapsed_time": "0:07:11", "remaining_time": "1 day, 16:26:17", "throughput": 478.79, "total_tokens": 206520}
{"current_steps": 25, "total_steps": 6770, "loss": 1.9153, "lr": 7.374631268436579e-06, "epoch": 0.007385524372230428, "percentage": 0.37, "elapsed_time": "0:08:38", "remaining_time": "1 day, 14:51:44", "throughput": 498.44, "total_tokens": 258464}
{"current_steps": 30, "total_steps": 6770, "loss": 1.9723, "lr": 8.849557522123894e-06, "epoch": 0.008862629246676515, "percentage": 0.44, "elapsed_time": "0:10:06", "remaining_time": "1 day, 13:51:32", "throughput": 510.68, "total_tokens": 309800}
{"current_steps": 35, "total_steps": 6770, "loss": 1.6646, "lr": 1.032448377581121e-05, "epoch": 0.0103397341211226, "percentage": 0.52, "elapsed_time": "0:11:34", "remaining_time": "1 day, 13:06:20", "throughput": 520.35, "total_tokens": 361216}
{"current_steps": 40, "total_steps": 6770, "loss": 1.7057, "lr": 1.1799410029498525e-05, "epoch": 0.011816838995568686, "percentage": 0.59, "elapsed_time": "0:13:02", "remaining_time": "1 day, 12:34:38", "throughput": 527.29, "total_tokens": 412680}
{"current_steps": 45, "total_steps": 6770, "loss": 1.4552, "lr": 1.3274336283185843e-05, "epoch": 0.013293943870014771, "percentage": 0.66, "elapsed_time": "0:14:30", "remaining_time": "1 day, 12:07:14", "throughput": 534.0, "total_tokens": 464640}
{"current_steps": 50, "total_steps": 6770, "loss": 1.3918, "lr": 1.4749262536873157e-05, "epoch": 0.014771048744460856, "percentage": 0.74, "elapsed_time": "0:15:58", "remaining_time": "1 day, 11:47:49", "throughput": 538.4, "total_tokens": 516240}
{"current_steps": 50, "total_steps": 6770, "eval_loss": 1.042170763015747, "epoch": 0.014771048744460856, "percentage": 0.74, "elapsed_time": "0:16:47", "remaining_time": "1 day, 13:37:05", "throughput": 512.33, "total_tokens": 516240}
{"current_steps": 55, "total_steps": 6770, "loss": 1.2308, "lr": 1.6224188790560475e-05, "epoch": 0.01624815361890694, "percentage": 0.81, "elapsed_time": "0:18:22", "remaining_time": "1 day, 13:24:07", "throughput": 514.61, "total_tokens": 567536}
{"current_steps": 60, "total_steps": 6770, "loss": 1.0922, "lr": 1.7699115044247787e-05, "epoch": 0.01772525849335303, "percentage": 0.89, "elapsed_time": "0:19:50", "remaining_time": "1 day, 12:58:38", "throughput": 520.35, "total_tokens": 619392}
{"current_steps": 65, "total_steps": 6770, "loss": 0.9517, "lr": 1.9174041297935107e-05, "epoch": 0.019202363367799114, "percentage": 0.96, "elapsed_time": "0:21:16", "remaining_time": "1 day, 12:34:14", "throughput": 525.87, "total_tokens": 671168}
{"current_steps": 70, "total_steps": 6770, "loss": 0.9277, "lr": 2.064896755162242e-05, "epoch": 0.0206794682422452, "percentage": 1.03, "elapsed_time": "0:22:43", "remaining_time": "1 day, 12:15:52", "throughput": 529.67, "total_tokens": 722464}
{"current_steps": 75, "total_steps": 6770, "loss": 0.8741, "lr": 2.2123893805309738e-05, "epoch": 0.022156573116691284, "percentage": 1.11, "elapsed_time": "0:24:09", "remaining_time": "1 day, 11:57:07", "throughput": 533.91, "total_tokens": 774120}
{"current_steps": 80, "total_steps": 6770, "loss": 0.8837, "lr": 2.359882005899705e-05, "epoch": 0.023633677991137372, "percentage": 1.18, "elapsed_time": "0:25:37", "remaining_time": "1 day, 11:42:56", "throughput": 537.19, "total_tokens": 825944}
{"current_steps": 85, "total_steps": 6770, "loss": 0.8658, "lr": 2.5073746312684367e-05, "epoch": 0.025110782865583457, "percentage": 1.26, "elapsed_time": "0:27:03", "remaining_time": "1 day, 11:28:18", "throughput": 540.52, "total_tokens": 877632}
{"current_steps": 90, "total_steps": 6770, "loss": 0.8626, "lr": 2.6548672566371686e-05, "epoch": 0.026587887740029542, "percentage": 1.33, "elapsed_time": "0:28:32", "remaining_time": "1 day, 11:18:12", "throughput": 542.34, "total_tokens": 928664}
{"current_steps": 95, "total_steps": 6770, "loss": 0.828, "lr": 2.8023598820059e-05, "epoch": 0.028064992614475627, "percentage": 1.4, "elapsed_time": "0:29:59", "remaining_time": "1 day, 11:07:21", "throughput": 544.65, "total_tokens": 980120}
{"current_steps": 100, "total_steps": 6770, "loss": 0.8208, "lr": 2.9498525073746314e-05, "epoch": 0.029542097488921712, "percentage": 1.48, "elapsed_time": "0:31:27", "remaining_time": "1 day, 10:58:11", "throughput": 546.08, "total_tokens": 1030696}
{"current_steps": 100, "total_steps": 6770, "eval_loss": 0.8917127847671509, "epoch": 0.029542097488921712, "percentage": 1.48, "elapsed_time": "0:31:46", "remaining_time": "1 day, 11:19:33", "throughput": 540.58, "total_tokens": 1030696}
{"current_steps": 105, "total_steps": 6770, "loss": 0.8858, "lr": 3.097345132743363e-05, "epoch": 0.0310192023633678, "percentage": 1.55, "elapsed_time": "0:33:20", "remaining_time": "1 day, 11:16:26", "throughput": 541.44, "total_tokens": 1083184}
{"current_steps": 110, "total_steps": 6770, "loss": 0.8395, "lr": 3.244837758112095e-05, "epoch": 0.03249630723781388, "percentage": 1.62, "elapsed_time": "0:34:47", "remaining_time": "1 day, 11:06:02", "throughput": 543.93, "total_tokens": 1135216}
{"current_steps": 115, "total_steps": 6770, "loss": 0.8729, "lr": 3.3923303834808265e-05, "epoch": 0.033973412112259974, "percentage": 1.7, "elapsed_time": "0:36:14", "remaining_time": "1 day, 10:57:04", "throughput": 546.2, "total_tokens": 1187592}
{"current_steps": 120, "total_steps": 6770, "loss": 0.8534, "lr": 3.5398230088495574e-05, "epoch": 0.03545051698670606, "percentage": 1.77, "elapsed_time": "0:37:39", "remaining_time": "1 day, 10:47:09", "throughput": 548.52, "total_tokens": 1239544}
{"current_steps": 125, "total_steps": 6770, "loss": 0.8621, "lr": 3.687315634218289e-05, "epoch": 0.03692762186115214, "percentage": 1.85, "elapsed_time": "0:39:07", "remaining_time": "1 day, 10:39:29", "throughput": 550.26, "total_tokens": 1291496}
{"current_steps": 130, "total_steps": 6770, "loss": 0.8548, "lr": 3.834808259587021e-05, "epoch": 0.03840472673559823, "percentage": 1.92, "elapsed_time": "0:40:32", "remaining_time": "1 day, 10:30:49", "throughput": 552.78, "total_tokens": 1344704}
{"current_steps": 135, "total_steps": 6770, "loss": 0.8555, "lr": 3.982300884955752e-05, "epoch": 0.03988183161004431, "percentage": 1.99, "elapsed_time": "0:41:59", "remaining_time": "1 day, 10:24:08", "throughput": 554.16, "total_tokens": 1396432}
{"current_steps": 140, "total_steps": 6770, "loss": 0.8503, "lr": 4.129793510324484e-05, "epoch": 0.0413589364844904, "percentage": 2.07, "elapsed_time": "0:43:25", "remaining_time": "1 day, 10:16:23", "throughput": 555.89, "total_tokens": 1448304}
{"current_steps": 145, "total_steps": 6770, "loss": 0.7974, "lr": 4.2772861356932154e-05, "epoch": 0.04283604135893648, "percentage": 2.14, "elapsed_time": "0:44:52", "remaining_time": "1 day, 10:10:28", "throughput": 557.24, "total_tokens": 1500480}
{"current_steps": 150, "total_steps": 6770, "loss": 0.8125, "lr": 4.4247787610619477e-05, "epoch": 0.04431314623338257, "percentage": 2.22, "elapsed_time": "0:46:18", "remaining_time": "1 day, 10:03:34", "throughput": 558.18, "total_tokens": 1550792}
{"current_steps": 150, "total_steps": 6770, "eval_loss": 0.9009397625923157, "epoch": 0.04431314623338257, "percentage": 2.22, "elapsed_time": "0:46:37", "remaining_time": "1 day, 10:17:38", "throughput": 554.37, "total_tokens": 1550792}
{"current_steps": 155, "total_steps": 6770, "loss": 0.8444, "lr": 4.5722713864306786e-05, "epoch": 0.04579025110782865, "percentage": 2.29, "elapsed_time": "0:48:09", "remaining_time": "1 day, 10:15:29", "throughput": 554.6, "total_tokens": 1602680}
{"current_steps": 160, "total_steps": 6770, "loss": 0.8832, "lr": 4.71976401179941e-05, "epoch": 0.047267355982274745, "percentage": 2.36, "elapsed_time": "0:49:35", "remaining_time": "1 day, 10:08:39", "throughput": 556.3, "total_tokens": 1655184}
{"current_steps": 165, "total_steps": 6770, "loss": 0.8428, "lr": 4.867256637168142e-05, "epoch": 0.04874446085672083, "percentage": 2.44, "elapsed_time": "0:51:01", "remaining_time": "1 day, 10:02:52", "throughput": 557.66, "total_tokens": 1707544}
{"current_steps": 170, "total_steps": 6770, "loss": 0.8235, "lr": 5.014749262536873e-05, "epoch": 0.050221565731166914, "percentage": 2.51, "elapsed_time": "0:52:28", "remaining_time": "1 day, 9:56:57", "throughput": 558.86, "total_tokens": 1759296}
{"current_steps": 175, "total_steps": 6770, "loss": 0.8293, "lr": 5.162241887905604e-05, "epoch": 0.051698670605613, "percentage": 2.58, "elapsed_time": "0:53:54", "remaining_time": "1 day, 9:51:42", "throughput": 560.32, "total_tokens": 1812488}
{"current_steps": 180, "total_steps": 6770, "loss": 0.8284, "lr": 5.309734513274337e-05, "epoch": 0.053175775480059084, "percentage": 2.66, "elapsed_time": "0:55:20", "remaining_time": "1 day, 9:46:19", "throughput": 561.43, "total_tokens": 1864408}
{"current_steps": 185, "total_steps": 6770, "loss": 0.8268, "lr": 5.457227138643069e-05, "epoch": 0.05465288035450517, "percentage": 2.73, "elapsed_time": "0:56:47", "remaining_time": "1 day, 9:41:18", "throughput": 562.56, "total_tokens": 1916744}
{"current_steps": 190, "total_steps": 6770, "loss": 0.8153, "lr": 5.6047197640118e-05, "epoch": 0.056129985228951254, "percentage": 2.81, "elapsed_time": "0:58:13", "remaining_time": "1 day, 9:36:32", "throughput": 563.34, "total_tokens": 1968128}
{"current_steps": 195, "total_steps": 6770, "loss": 0.8123, "lr": 5.752212389380531e-05, "epoch": 0.05760709010339734, "percentage": 2.88, "elapsed_time": "0:59:40", "remaining_time": "1 day, 9:31:54", "throughput": 564.03, "total_tokens": 2019312}
{"current_steps": 200, "total_steps": 6770, "loss": 0.7675, "lr": 5.899705014749263e-05, "epoch": 0.059084194977843424, "percentage": 2.95, "elapsed_time": "1:01:06", "remaining_time": "1 day, 9:27:19", "throughput": 564.91, "total_tokens": 2071176}
{"current_steps": 200, "total_steps": 6770, "eval_loss": 0.9007444977760315, "epoch": 0.059084194977843424, "percentage": 2.95, "elapsed_time": "1:01:25", "remaining_time": "1 day, 9:37:46", "throughput": 561.99, "total_tokens": 2071176}
{"current_steps": 205, "total_steps": 6770, "loss": 0.8075, "lr": 6.0471976401179945e-05, "epoch": 0.060561299852289516, "percentage": 3.03, "elapsed_time": "1:02:56", "remaining_time": "1 day, 9:35:48", "throughput": 561.95, "total_tokens": 2122328}
{"current_steps": 210, "total_steps": 6770, "loss": 0.8207, "lr": 6.194690265486725e-05, "epoch": 0.0620384047267356, "percentage": 3.1, "elapsed_time": "1:04:24", "remaining_time": "1 day, 9:31:45", "throughput": 562.82, "total_tokens": 2174744}
{"current_steps": 215, "total_steps": 6770, "loss": 0.7867, "lr": 6.342182890855458e-05, "epoch": 0.06351550960118169, "percentage": 3.18, "elapsed_time": "1:05:49", "remaining_time": "1 day, 9:26:57", "throughput": 563.89, "total_tokens": 2227136}
{"current_steps": 220, "total_steps": 6770, "loss": 0.8256, "lr": 6.48967551622419e-05, "epoch": 0.06499261447562776, "percentage": 3.25, "elapsed_time": "1:07:16", "remaining_time": "1 day, 9:23:02", "throughput": 564.47, "total_tokens": 2278568}
{"current_steps": 225, "total_steps": 6770, "loss": 0.7897, "lr": 6.637168141592921e-05, "epoch": 0.06646971935007386, "percentage": 3.32, "elapsed_time": "1:08:42", "remaining_time": "1 day, 9:18:28", "throughput": 565.29, "total_tokens": 2330224}
{"current_steps": 230, "total_steps": 6770, "loss": 0.792, "lr": 6.784660766961653e-05, "epoch": 0.06794682422451995, "percentage": 3.4, "elapsed_time": "1:10:08", "remaining_time": "1 day, 9:14:40", "throughput": 565.78, "total_tokens": 2381344}
{"current_steps": 235, "total_steps": 6770, "loss": 0.8309, "lr": 6.932153392330384e-05, "epoch": 0.06942392909896603, "percentage": 3.47, "elapsed_time": "1:11:34", "remaining_time": "1 day, 9:10:23", "throughput": 566.34, "total_tokens": 2432136}
{"current_steps": 240, "total_steps": 6770, "loss": 0.7974, "lr": 7.079646017699115e-05, "epoch": 0.07090103397341212, "percentage": 3.55, "elapsed_time": "1:13:01", "remaining_time": "1 day, 9:06:53", "throughput": 566.83, "total_tokens": 2483568}
{"current_steps": 245, "total_steps": 6770, "loss": 0.7739, "lr": 7.227138643067847e-05, "epoch": 0.0723781388478582, "percentage": 3.62, "elapsed_time": "1:14:27", "remaining_time": "1 day, 9:03:07", "throughput": 567.41, "total_tokens": 2535040}
{"current_steps": 250, "total_steps": 6770, "loss": 0.7558, "lr": 7.374631268436578e-05, "epoch": 0.07385524372230429, "percentage": 3.69, "elapsed_time": "1:15:54", "remaining_time": "1 day, 8:59:44", "throughput": 568.06, "total_tokens": 2587272}
{"current_steps": 250, "total_steps": 6770, "eval_loss": 0.810763955116272, "epoch": 0.07385524372230429, "percentage": 3.69, "elapsed_time": "1:16:13", "remaining_time": "1 day, 9:08:04", "throughput": 565.67, "total_tokens": 2587272}
{"current_steps": 255, "total_steps": 6770, "loss": 0.7851, "lr": 7.522123893805309e-05, "epoch": 0.07533234859675036, "percentage": 3.77, "elapsed_time": "1:17:45", "remaining_time": "1 day, 9:06:30", "throughput": 565.6, "total_tokens": 2638632}
{"current_steps": 260, "total_steps": 6770, "loss": 0.7211, "lr": 7.669616519174043e-05, "epoch": 0.07680945347119646, "percentage": 3.84, "elapsed_time": "1:19:12", "remaining_time": "1 day, 9:03:07", "throughput": 566.27, "total_tokens": 2691016}
{"current_steps": 265, "total_steps": 6770, "loss": 0.8082, "lr": 7.817109144542774e-05, "epoch": 0.07828655834564253, "percentage": 3.91, "elapsed_time": "1:20:38", "remaining_time": "1 day, 8:59:25", "throughput": 566.92, "total_tokens": 2742912}
{"current_steps": 270, "total_steps": 6770, "loss": 0.7485, "lr": 7.964601769911504e-05, "epoch": 0.07976366322008863, "percentage": 3.99, "elapsed_time": "1:22:04", "remaining_time": "1 day, 8:56:03", "throughput": 567.57, "total_tokens": 2795264}
{"current_steps": 275, "total_steps": 6770, "loss": 0.7454, "lr": 8.112094395280237e-05, "epoch": 0.08124076809453472, "percentage": 4.06, "elapsed_time": "1:23:31", "remaining_time": "1 day, 8:52:33", "throughput": 568.0, "total_tokens": 2846344}
{"current_steps": 280, "total_steps": 6770, "loss": 0.7258, "lr": 8.259587020648968e-05, "epoch": 0.0827178729689808, "percentage": 4.14, "elapsed_time": "1:24:57", "remaining_time": "1 day, 8:49:18", "throughput": 568.55, "total_tokens": 2898304}
{"current_steps": 285, "total_steps": 6770, "loss": 0.7863, "lr": 8.4070796460177e-05, "epoch": 0.08419497784342689, "percentage": 4.21, "elapsed_time": "1:26:24", "remaining_time": "1 day, 8:46:15", "throughput": 569.24, "total_tokens": 2951368}
{"current_steps": 290, "total_steps": 6770, "loss": 0.7105, "lr": 8.554572271386431e-05, "epoch": 0.08567208271787297, "percentage": 4.28, "elapsed_time": "1:27:51", "remaining_time": "1 day, 8:43:00", "throughput": 569.77, "total_tokens": 3003288}
{"current_steps": 295, "total_steps": 6770, "loss": 0.6885, "lr": 8.702064896755162e-05, "epoch": 0.08714918759231906, "percentage": 4.36, "elapsed_time": "1:29:17", "remaining_time": "1 day, 8:39:53", "throughput": 570.19, "total_tokens": 3054808}
{"current_steps": 300, "total_steps": 6770, "loss": 0.78, "lr": 8.849557522123895e-05, "epoch": 0.08862629246676514, "percentage": 4.43, "elapsed_time": "1:30:43", "remaining_time": "1 day, 8:36:32", "throughput": 570.84, "total_tokens": 3107200}
{"current_steps": 300, "total_steps": 6770, "eval_loss": 0.8194220662117004, "epoch": 0.08862629246676514, "percentage": 4.43, "elapsed_time": "1:31:02", "remaining_time": "1 day, 8:43:25", "throughput": 568.83, "total_tokens": 3107200}
{"current_steps": 305, "total_steps": 6770, "loss": 0.7394, "lr": 8.997050147492626e-05, "epoch": 0.09010339734121123, "percentage": 4.51, "elapsed_time": "1:32:35", "remaining_time": "1 day, 8:42:43", "throughput": 568.54, "total_tokens": 3158648}
{"current_steps": 310, "total_steps": 6770, "loss": 0.7371, "lr": 9.144542772861357e-05, "epoch": 0.0915805022156573, "percentage": 4.58, "elapsed_time": "1:34:01", "remaining_time": "1 day, 8:39:19", "throughput": 569.11, "total_tokens": 3210560}
{"current_steps": 315, "total_steps": 6770, "loss": 0.7622, "lr": 9.29203539823009e-05, "epoch": 0.0930576070901034, "percentage": 4.65, "elapsed_time": "1:35:29", "remaining_time": "1 day, 8:36:39", "throughput": 569.67, "total_tokens": 3263664}
{"current_steps": 320, "total_steps": 6770, "loss": 0.7214, "lr": 9.43952802359882e-05, "epoch": 0.09453471196454949, "percentage": 4.73, "elapsed_time": "1:36:54", "remaining_time": "1 day, 8:33:24", "throughput": 570.18, "total_tokens": 3315520}
{"current_steps": 325, "total_steps": 6770, "loss": 0.7078, "lr": 9.587020648967551e-05, "epoch": 0.09601181683899557, "percentage": 4.8, "elapsed_time": "1:38:22", "remaining_time": "1 day, 8:30:46", "throughput": 570.64, "total_tokens": 3368088}
{"current_steps": 330, "total_steps": 6770, "loss": 0.6852, "lr": 9.734513274336283e-05, "epoch": 0.09748892171344166, "percentage": 4.87, "elapsed_time": "1:39:48", "remaining_time": "1 day, 8:27:38", "throughput": 571.16, "total_tokens": 3420176}
{"current_steps": 335, "total_steps": 6770, "loss": 0.7557, "lr": 9.882005899705014e-05, "epoch": 0.09896602658788774, "percentage": 4.95, "elapsed_time": "1:41:15", "remaining_time": "1 day, 8:25:11", "throughput": 571.3, "total_tokens": 3471184}
{"current_steps": 340, "total_steps": 6770, "loss": 0.6709, "lr": 9.99999940340072e-05, "epoch": 0.10044313146233383, "percentage": 5.02, "elapsed_time": "1:42:41", "remaining_time": "1 day, 8:22:10", "throughput": 571.75, "total_tokens": 3523008}
{"current_steps": 345, "total_steps": 6770, "loss": 0.7252, "lr": 9.999978522440803e-05, "epoch": 0.1019202363367799, "percentage": 5.1, "elapsed_time": "1:44:08", "remaining_time": "1 day, 8:19:27", "throughput": 571.96, "total_tokens": 3573880}
{"current_steps": 350, "total_steps": 6770, "loss": 0.6602, "lr": 9.999927811659165e-05, "epoch": 0.103397341211226, "percentage": 5.17, "elapsed_time": "1:45:35", "remaining_time": "1 day, 8:16:42", "throughput": 572.33, "total_tokens": 3625752}
{"current_steps": 350, "total_steps": 6770, "eval_loss": 0.7663387656211853, "epoch": 0.103397341211226, "percentage": 5.17, "elapsed_time": "1:45:54", "remaining_time": "1 day, 8:22:34", "throughput": 570.6, "total_tokens": 3625752}
{"current_steps": 355, "total_steps": 6770, "loss": 0.7222, "lr": 9.999847271358347e-05, "epoch": 0.10487444608567208, "percentage": 5.24, "elapsed_time": "1:47:27", "remaining_time": "1 day, 8:21:42", "throughput": 570.33, "total_tokens": 3676984}
{"current_steps": 360, "total_steps": 6770, "loss": 0.6639, "lr": 9.99973690201885e-05, "epoch": 0.10635155096011817, "percentage": 5.32, "elapsed_time": "1:48:53", "remaining_time": "1 day, 8:18:51", "throughput": 570.78, "total_tokens": 3729168}
{"current_steps": 365, "total_steps": 6770, "loss": 0.6501, "lr": 9.999596704299139e-05, "epoch": 0.10782865583456426, "percentage": 5.39, "elapsed_time": "1:50:20", "remaining_time": "1 day, 8:16:15", "throughput": 571.06, "total_tokens": 3780672}
{"current_steps": 370, "total_steps": 6770, "loss": 0.6871, "lr": 9.999426679035628e-05, "epoch": 0.10930576070901034, "percentage": 5.47, "elapsed_time": "1:51:46", "remaining_time": "1 day, 8:13:25", "throughput": 571.43, "total_tokens": 3832328}
{"current_steps": 375, "total_steps": 6770, "loss": 0.6621, "lr": 9.99922682724269e-05, "epoch": 0.11078286558345643, "percentage": 5.54, "elapsed_time": "1:53:12", "remaining_time": "1 day, 8:10:41", "throughput": 571.64, "total_tokens": 3883112}
{"current_steps": 380, "total_steps": 6770, "loss": 0.7156, "lr": 9.998997150112635e-05, "epoch": 0.11225997045790251, "percentage": 5.61, "elapsed_time": "1:54:39", "remaining_time": "1 day, 8:08:06", "throughput": 571.98, "total_tokens": 3934976}
{"current_steps": 385, "total_steps": 6770, "loss": 0.6662, "lr": 9.998737649015718e-05, "epoch": 0.1137370753323486, "percentage": 5.69, "elapsed_time": "1:56:05", "remaining_time": "1 day, 8:05:13", "throughput": 572.3, "total_tokens": 3986192}
{"current_steps": 390, "total_steps": 6770, "loss": 0.682, "lr": 9.998448325500118e-05, "epoch": 0.11521418020679468, "percentage": 5.76, "elapsed_time": "1:57:32", "remaining_time": "1 day, 8:02:48", "throughput": 572.54, "total_tokens": 4037760}
{"current_steps": 395, "total_steps": 6770, "loss": 0.6137, "lr": 9.998129181291936e-05, "epoch": 0.11669128508124077, "percentage": 5.83, "elapsed_time": "1:58:57", "remaining_time": "1 day, 7:59:58", "throughput": 573.13, "total_tokens": 4090872}
{"current_steps": 400, "total_steps": 6770, "loss": 0.6739, "lr": 9.997780218295185e-05, "epoch": 0.11816838995568685, "percentage": 5.91, "elapsed_time": "2:00:25", "remaining_time": "1 day, 7:57:39", "throughput": 573.36, "total_tokens": 4142592}
{"current_steps": 400, "total_steps": 6770, "eval_loss": 0.7038857936859131, "epoch": 0.11816838995568685, "percentage": 5.91, "elapsed_time": "2:00:44", "remaining_time": "1 day, 8:02:42", "throughput": 571.85, "total_tokens": 4142592}
{"current_steps": 405, "total_steps": 6770, "loss": 0.6209, "lr": 9.997401438591772e-05, "epoch": 0.11964549483013294, "percentage": 5.98, "elapsed_time": "2:02:16", "remaining_time": "1 day, 8:01:36", "throughput": 571.81, "total_tokens": 4194920}
{"current_steps": 410, "total_steps": 6770, "loss": 0.6576, "lr": 9.996992844441495e-05, "epoch": 0.12112259970457903, "percentage": 6.06, "elapsed_time": "2:03:44", "remaining_time": "1 day, 7:59:30", "throughput": 572.03, "total_tokens": 4247048}
{"current_steps": 415, "total_steps": 6770, "loss": 0.6851, "lr": 9.996554438282022e-05, "epoch": 0.12259970457902511, "percentage": 6.13, "elapsed_time": "2:05:11", "remaining_time": "1 day, 7:57:04", "throughput": 572.42, "total_tokens": 4299728}
{"current_steps": 420, "total_steps": 6770, "loss": 0.6288, "lr": 9.996086222728879e-05, "epoch": 0.1240768094534712, "percentage": 6.2, "elapsed_time": "2:06:39", "remaining_time": "1 day, 7:55:03", "throughput": 572.52, "total_tokens": 4351088}
{"current_steps": 425, "total_steps": 6770, "loss": 0.667, "lr": 9.995588200575439e-05, "epoch": 0.1255539143279173, "percentage": 6.28, "elapsed_time": "2:08:05", "remaining_time": "1 day, 7:52:16", "throughput": 572.92, "total_tokens": 4403016}
{"current_steps": 430, "total_steps": 6770, "loss": 0.6747, "lr": 9.995060374792892e-05, "epoch": 0.12703101920236337, "percentage": 6.35, "elapsed_time": "2:09:31", "remaining_time": "1 day, 7:49:50", "throughput": 573.07, "total_tokens": 4453880}
{"current_steps": 435, "total_steps": 6770, "loss": 0.6594, "lr": 9.994502748530244e-05, "epoch": 0.12850812407680945, "percentage": 6.43, "elapsed_time": "2:10:57", "remaining_time": "1 day, 7:47:14", "throughput": 573.4, "total_tokens": 4505616}
{"current_steps": 440, "total_steps": 6770, "loss": 0.6727, "lr": 9.993915325114288e-05, "epoch": 0.12998522895125553, "percentage": 6.5, "elapsed_time": "2:12:24", "remaining_time": "1 day, 7:44:55", "throughput": 573.77, "total_tokens": 4558384}
{"current_steps": 445, "total_steps": 6770, "loss": 0.6526, "lr": 9.993298108049582e-05, "epoch": 0.13146233382570163, "percentage": 6.57, "elapsed_time": "2:13:50", "remaining_time": "1 day, 7:42:20", "throughput": 574.21, "total_tokens": 4611184}
{"current_steps": 450, "total_steps": 6770, "loss": 0.5661, "lr": 9.992651101018445e-05, "epoch": 0.1329394387001477, "percentage": 6.65, "elapsed_time": "2:15:17", "remaining_time": "1 day, 7:40:00", "throughput": 574.5, "total_tokens": 4663320}
{"current_steps": 450, "total_steps": 6770, "eval_loss": 0.7132604718208313, "epoch": 0.1329394387001477, "percentage": 6.65, "elapsed_time": "2:15:36", "remaining_time": "1 day, 7:44:27", "throughput": 573.16, "total_tokens": 4663320}