ben81828's picture
Training in progress, step 3400
d5799e2 verified
{"current_steps": 5, "total_steps": 3400, "loss": 2.8889, "lr": 2.9411764705882355e-06, "epoch": 0.0012876641771825909, "percentage": 0.15, "elapsed_time": "0:02:46", "remaining_time": "1 day, 7:20:36", "throughput": 317.97, "total_tokens": 52840}
{"current_steps": 10, "total_steps": 3400, "loss": 2.8165, "lr": 5.882352941176471e-06, "epoch": 0.0025753283543651817, "percentage": 0.29, "elapsed_time": "0:04:14", "remaining_time": "23:57:41", "throughput": 414.71, "total_tokens": 105528}
{"current_steps": 15, "total_steps": 3400, "loss": 2.8363, "lr": 8.823529411764707e-06, "epoch": 0.0038629925315477724, "percentage": 0.44, "elapsed_time": "0:05:42", "remaining_time": "21:27:36", "throughput": 463.76, "total_tokens": 158768}
{"current_steps": 20, "total_steps": 3400, "loss": 2.6853, "lr": 1.1764705882352942e-05, "epoch": 0.0051506567087303634, "percentage": 0.59, "elapsed_time": "0:07:11", "remaining_time": "20:14:07", "throughput": 489.08, "total_tokens": 210816}
{"current_steps": 25, "total_steps": 3400, "loss": 2.2992, "lr": 1.4705882352941177e-05, "epoch": 0.006438320885912954, "percentage": 0.74, "elapsed_time": "0:08:38", "remaining_time": "19:26:57", "throughput": 506.96, "total_tokens": 262936}
{"current_steps": 30, "total_steps": 3400, "loss": 1.8923, "lr": 1.7647058823529414e-05, "epoch": 0.007725985063095545, "percentage": 0.88, "elapsed_time": "0:10:08", "remaining_time": "18:58:31", "throughput": 518.43, "total_tokens": 315264}
{"current_steps": 35, "total_steps": 3400, "loss": 1.6984, "lr": 2.058823529411765e-05, "epoch": 0.009013649240278136, "percentage": 1.03, "elapsed_time": "0:11:36", "remaining_time": "18:36:02", "throughput": 528.14, "total_tokens": 367840}
{"current_steps": 40, "total_steps": 3400, "loss": 1.6434, "lr": 2.3529411764705884e-05, "epoch": 0.010301313417460727, "percentage": 1.18, "elapsed_time": "0:13:06", "remaining_time": "18:20:53", "throughput": 534.26, "total_tokens": 420112}
{"current_steps": 45, "total_steps": 3400, "loss": 1.4659, "lr": 2.647058823529412e-05, "epoch": 0.011588977594643318, "percentage": 1.32, "elapsed_time": "0:14:34", "remaining_time": "18:06:24", "throughput": 540.69, "total_tokens": 472728}
{"current_steps": 50, "total_steps": 3400, "loss": 1.3506, "lr": 2.9411764705882354e-05, "epoch": 0.012876641771825908, "percentage": 1.47, "elapsed_time": "0:16:03", "remaining_time": "17:56:24", "throughput": 544.27, "total_tokens": 524648}
{"current_steps": 50, "total_steps": 3400, "eval_loss": 1.1727452278137207, "epoch": 0.012876641771825908, "percentage": 1.47, "elapsed_time": "0:17:10", "remaining_time": "19:10:27", "throughput": 509.24, "total_tokens": 524648}
{"current_steps": 55, "total_steps": 3400, "loss": 1.1455, "lr": 3.235294117647059e-05, "epoch": 0.014164305949008499, "percentage": 1.62, "elapsed_time": "0:18:46", "remaining_time": "19:01:52", "throughput": 511.73, "total_tokens": 576472}
{"current_steps": 60, "total_steps": 3400, "loss": 0.9971, "lr": 3.529411764705883e-05, "epoch": 0.01545197012619109, "percentage": 1.76, "elapsed_time": "0:20:15", "remaining_time": "18:48:02", "throughput": 516.56, "total_tokens": 628056}
{"current_steps": 65, "total_steps": 3400, "loss": 0.9073, "lr": 3.8235294117647055e-05, "epoch": 0.01673963430337368, "percentage": 1.91, "elapsed_time": "0:21:44", "remaining_time": "18:35:51", "throughput": 521.45, "total_tokens": 680448}
{"current_steps": 70, "total_steps": 3400, "loss": 0.8386, "lr": 4.11764705882353e-05, "epoch": 0.018027298480556272, "percentage": 2.06, "elapsed_time": "0:23:19", "remaining_time": "18:29:19", "throughput": 524.37, "total_tokens": 733664}
{"current_steps": 75, "total_steps": 3400, "loss": 0.7827, "lr": 4.411764705882353e-05, "epoch": 0.01931496265773886, "percentage": 2.21, "elapsed_time": "0:24:51", "remaining_time": "18:22:18", "throughput": 526.93, "total_tokens": 786096}
{"current_steps": 80, "total_steps": 3400, "loss": 0.7814, "lr": 4.705882352941177e-05, "epoch": 0.020602626834921454, "percentage": 2.35, "elapsed_time": "0:26:25", "remaining_time": "18:16:38", "throughput": 528.66, "total_tokens": 838192}
{"current_steps": 85, "total_steps": 3400, "loss": 0.7297, "lr": 5e-05, "epoch": 0.021890291012104043, "percentage": 2.5, "elapsed_time": "0:27:54", "remaining_time": "18:08:20", "throughput": 531.61, "total_tokens": 890112}
{"current_steps": 90, "total_steps": 3400, "loss": 0.7894, "lr": 5.294117647058824e-05, "epoch": 0.023177955189286635, "percentage": 2.65, "elapsed_time": "0:29:24", "remaining_time": "18:01:48", "throughput": 534.58, "total_tokens": 943472}
{"current_steps": 95, "total_steps": 3400, "loss": 0.7758, "lr": 5.588235294117647e-05, "epoch": 0.024465619366469224, "percentage": 2.79, "elapsed_time": "0:30:52", "remaining_time": "17:54:11", "throughput": 538.09, "total_tokens": 996872}
{"current_steps": 100, "total_steps": 3400, "loss": 0.7577, "lr": 5.882352941176471e-05, "epoch": 0.025753283543651816, "percentage": 2.94, "elapsed_time": "0:32:21", "remaining_time": "17:47:53", "throughput": 540.69, "total_tokens": 1049816}
{"current_steps": 100, "total_steps": 3400, "eval_loss": 0.7517351508140564, "epoch": 0.025753283543651816, "percentage": 2.94, "elapsed_time": "0:33:00", "remaining_time": "18:09:13", "throughput": 530.1, "total_tokens": 1049816}
{"current_steps": 105, "total_steps": 3400, "loss": 0.7579, "lr": 6.176470588235295e-05, "epoch": 0.027040947720834405, "percentage": 3.09, "elapsed_time": "0:34:36", "remaining_time": "18:05:49", "throughput": 531.09, "total_tokens": 1102584}
{"current_steps": 110, "total_steps": 3400, "loss": 0.7659, "lr": 6.470588235294118e-05, "epoch": 0.028328611898016998, "percentage": 3.24, "elapsed_time": "0:36:03", "remaining_time": "17:58:31", "throughput": 534.07, "total_tokens": 1155512}
{"current_steps": 115, "total_steps": 3400, "loss": 0.7469, "lr": 6.764705882352942e-05, "epoch": 0.029616276075199587, "percentage": 3.38, "elapsed_time": "0:37:32", "remaining_time": "17:52:22", "throughput": 536.29, "total_tokens": 1207976}
{"current_steps": 120, "total_steps": 3400, "loss": 0.7353, "lr": 7.058823529411765e-05, "epoch": 0.03090394025238218, "percentage": 3.53, "elapsed_time": "0:39:00", "remaining_time": "17:46:11", "throughput": 538.27, "total_tokens": 1259776}
{"current_steps": 125, "total_steps": 3400, "loss": 0.7537, "lr": 7.352941176470589e-05, "epoch": 0.03219160442956477, "percentage": 3.68, "elapsed_time": "0:40:29", "remaining_time": "17:40:51", "throughput": 540.35, "total_tokens": 1312760}
{"current_steps": 130, "total_steps": 3400, "loss": 0.7669, "lr": 7.647058823529411e-05, "epoch": 0.03347926860674736, "percentage": 3.82, "elapsed_time": "0:41:57", "remaining_time": "17:35:14", "throughput": 542.54, "total_tokens": 1365616}
{"current_steps": 135, "total_steps": 3400, "loss": 0.722, "lr": 7.941176470588235e-05, "epoch": 0.03476693278392995, "percentage": 3.97, "elapsed_time": "0:43:26", "remaining_time": "17:30:31", "throughput": 543.91, "total_tokens": 1417544}
{"current_steps": 140, "total_steps": 3400, "loss": 0.7502, "lr": 8.23529411764706e-05, "epoch": 0.036054596961112545, "percentage": 4.12, "elapsed_time": "0:44:52", "remaining_time": "17:24:54", "throughput": 545.93, "total_tokens": 1469856}
{"current_steps": 145, "total_steps": 3400, "loss": 0.7174, "lr": 8.529411764705883e-05, "epoch": 0.037342261138295134, "percentage": 4.26, "elapsed_time": "0:46:20", "remaining_time": "17:20:11", "throughput": 547.25, "total_tokens": 1521496}
{"current_steps": 150, "total_steps": 3400, "loss": 0.7018, "lr": 8.823529411764706e-05, "epoch": 0.03862992531547772, "percentage": 4.41, "elapsed_time": "0:47:46", "remaining_time": "17:15:14", "throughput": 548.82, "total_tokens": 1573376}
{"current_steps": 150, "total_steps": 3400, "eval_loss": 0.7309949994087219, "epoch": 0.03862992531547772, "percentage": 4.41, "elapsed_time": "0:48:25", "remaining_time": "17:29:02", "throughput": 541.61, "total_tokens": 1573376}
{"current_steps": 155, "total_steps": 3400, "loss": 0.738, "lr": 9.11764705882353e-05, "epoch": 0.03991758949266031, "percentage": 4.56, "elapsed_time": "0:49:58", "remaining_time": "17:26:05", "throughput": 542.4, "total_tokens": 1626136}
{"current_steps": 160, "total_steps": 3400, "loss": 0.7579, "lr": 9.411764705882353e-05, "epoch": 0.04120525366984291, "percentage": 4.71, "elapsed_time": "0:51:24", "remaining_time": "17:20:51", "throughput": 544.34, "total_tokens": 1678760}
{"current_steps": 165, "total_steps": 3400, "loss": 0.7502, "lr": 9.705882352941177e-05, "epoch": 0.042492917847025496, "percentage": 4.85, "elapsed_time": "0:52:51", "remaining_time": "17:16:21", "throughput": 545.87, "total_tokens": 1731240}
{"current_steps": 170, "total_steps": 3400, "loss": 0.7448, "lr": 0.0001, "epoch": 0.043780582024208085, "percentage": 5.0, "elapsed_time": "0:54:17", "remaining_time": "17:11:35", "throughput": 547.57, "total_tokens": 1783816}
{"current_steps": 175, "total_steps": 3400, "loss": 0.6648, "lr": 9.999940874631277e-05, "epoch": 0.045068246201390674, "percentage": 5.15, "elapsed_time": "0:55:45", "remaining_time": "17:07:24", "throughput": 548.45, "total_tokens": 1834592}
{"current_steps": 180, "total_steps": 3400, "loss": 0.7759, "lr": 9.999763499923432e-05, "epoch": 0.04635591037857327, "percentage": 5.29, "elapsed_time": "0:57:11", "remaining_time": "17:02:59", "throughput": 550.3, "total_tokens": 1888176}
{"current_steps": 185, "total_steps": 3400, "loss": 0.7167, "lr": 9.999467880071402e-05, "epoch": 0.04764357455575586, "percentage": 5.44, "elapsed_time": "0:58:39", "remaining_time": "16:59:16", "throughput": 551.35, "total_tokens": 1940280}
{"current_steps": 190, "total_steps": 3400, "loss": 0.7483, "lr": 9.999054022066641e-05, "epoch": 0.04893123873293845, "percentage": 5.59, "elapsed_time": "1:00:04", "remaining_time": "16:55:04", "throughput": 552.88, "total_tokens": 1993096}
{"current_steps": 195, "total_steps": 3400, "loss": 0.7464, "lr": 9.998521935696953e-05, "epoch": 0.050218902910121044, "percentage": 5.74, "elapsed_time": "1:01:31", "remaining_time": "16:51:14", "throughput": 554.14, "total_tokens": 2045648}
{"current_steps": 200, "total_steps": 3400, "loss": 0.7594, "lr": 9.997871633546257e-05, "epoch": 0.05150656708730363, "percentage": 5.88, "elapsed_time": "1:02:57", "remaining_time": "16:47:25", "throughput": 555.61, "total_tokens": 2099008}
{"current_steps": 200, "total_steps": 3400, "eval_loss": 0.7274295687675476, "epoch": 0.05150656708730363, "percentage": 5.88, "elapsed_time": "1:03:35", "remaining_time": "16:57:35", "throughput": 550.06, "total_tokens": 2099008}
{"current_steps": 205, "total_steps": 3400, "loss": 0.706, "lr": 9.997103130994296e-05, "epoch": 0.05279423126448622, "percentage": 6.03, "elapsed_time": "1:05:08", "remaining_time": "16:55:12", "throughput": 550.54, "total_tokens": 2151680}
{"current_steps": 210, "total_steps": 3400, "loss": 0.7186, "lr": 9.996216446216267e-05, "epoch": 0.05408189544166881, "percentage": 6.18, "elapsed_time": "1:06:34", "remaining_time": "16:51:24", "throughput": 551.65, "total_tokens": 2203784}
{"current_steps": 215, "total_steps": 3400, "loss": 0.7009, "lr": 9.995211600182397e-05, "epoch": 0.055369559618851406, "percentage": 6.32, "elapsed_time": "1:08:00", "remaining_time": "16:47:34", "throughput": 552.73, "total_tokens": 2255632}
{"current_steps": 220, "total_steps": 3400, "loss": 0.6801, "lr": 9.994088616657444e-05, "epoch": 0.056657223796033995, "percentage": 6.47, "elapsed_time": "1:09:28", "remaining_time": "16:44:15", "throughput": 553.68, "total_tokens": 2308096}
{"current_steps": 225, "total_steps": 3400, "loss": 0.7569, "lr": 9.992847522200133e-05, "epoch": 0.057944887973216584, "percentage": 6.62, "elapsed_time": "1:10:55", "remaining_time": "16:40:46", "throughput": 554.88, "total_tokens": 2361168}
{"current_steps": 230, "total_steps": 3400, "loss": 0.7402, "lr": 9.99148834616253e-05, "epoch": 0.05923255215039917, "percentage": 6.76, "elapsed_time": "1:12:23", "remaining_time": "16:37:48", "throughput": 555.71, "total_tokens": 2413896}
{"current_steps": 235, "total_steps": 3400, "loss": 0.7191, "lr": 9.990011120689351e-05, "epoch": 0.06052021632758177, "percentage": 6.91, "elapsed_time": "1:13:51", "remaining_time": "16:34:47", "throughput": 556.47, "total_tokens": 2466136}
{"current_steps": 240, "total_steps": 3400, "loss": 0.7274, "lr": 9.988415880717194e-05, "epoch": 0.06180788050476436, "percentage": 7.06, "elapsed_time": "1:15:20", "remaining_time": "16:31:59", "throughput": 557.21, "total_tokens": 2518848}
{"current_steps": 245, "total_steps": 3400, "loss": 0.7704, "lr": 9.986702663973722e-05, "epoch": 0.06309554468194695, "percentage": 7.21, "elapsed_time": "1:16:48", "remaining_time": "16:29:01", "throughput": 558.22, "total_tokens": 2572384}
{"current_steps": 250, "total_steps": 3400, "loss": 0.7346, "lr": 9.98487151097676e-05, "epoch": 0.06438320885912954, "percentage": 7.35, "elapsed_time": "1:18:17", "remaining_time": "16:26:29", "throughput": 558.88, "total_tokens": 2625352}
{"current_steps": 250, "total_steps": 3400, "eval_loss": 0.7181503176689148, "epoch": 0.06438320885912954, "percentage": 7.35, "elapsed_time": "1:18:55", "remaining_time": "16:34:29", "throughput": 554.38, "total_tokens": 2625352}
{"current_steps": 255, "total_steps": 3400, "loss": 0.7408, "lr": 9.98292246503335e-05, "epoch": 0.06567087303631212, "percentage": 7.5, "elapsed_time": "1:20:28", "remaining_time": "16:32:30", "throughput": 554.67, "total_tokens": 2678216}
{"current_steps": 260, "total_steps": 3400, "loss": 0.7044, "lr": 9.980855572238714e-05, "epoch": 0.06695853721349472, "percentage": 7.65, "elapsed_time": "1:21:57", "remaining_time": "16:29:49", "throughput": 555.29, "total_tokens": 2730664}
{"current_steps": 265, "total_steps": 3400, "loss": 0.7334, "lr": 9.978670881475172e-05, "epoch": 0.06824620139067732, "percentage": 7.79, "elapsed_time": "1:23:25", "remaining_time": "16:26:51", "throughput": 556.14, "total_tokens": 2783584}
{"current_steps": 270, "total_steps": 3400, "loss": 0.7075, "lr": 9.976368444410985e-05, "epoch": 0.0695338655678599, "percentage": 7.94, "elapsed_time": "1:24:53", "remaining_time": "16:24:09", "throughput": 556.79, "total_tokens": 2836152}
{"current_steps": 275, "total_steps": 3400, "loss": 0.7039, "lr": 9.973948315499126e-05, "epoch": 0.0708215297450425, "percentage": 8.09, "elapsed_time": "1:26:21", "remaining_time": "16:21:18", "throughput": 557.35, "total_tokens": 2887808}
{"current_steps": 280, "total_steps": 3400, "loss": 0.6953, "lr": 9.971410551976002e-05, "epoch": 0.07210919392222509, "percentage": 8.24, "elapsed_time": "1:27:50", "remaining_time": "16:18:43", "throughput": 557.81, "total_tokens": 2939656}
{"current_steps": 285, "total_steps": 3400, "loss": 0.7022, "lr": 9.968755213860094e-05, "epoch": 0.07339685809940767, "percentage": 8.38, "elapsed_time": "1:29:17", "remaining_time": "16:15:56", "throughput": 558.4, "total_tokens": 2991632}
{"current_steps": 290, "total_steps": 3400, "loss": 0.6796, "lr": 9.96598236395054e-05, "epoch": 0.07468452227659027, "percentage": 8.53, "elapsed_time": "1:30:45", "remaining_time": "16:13:18", "throughput": 558.92, "total_tokens": 3043616}
{"current_steps": 295, "total_steps": 3400, "loss": 0.7346, "lr": 9.96309206782565e-05, "epoch": 0.07597218645377285, "percentage": 8.68, "elapsed_time": "1:32:13", "remaining_time": "16:10:42", "throughput": 559.66, "total_tokens": 3096920}
{"current_steps": 300, "total_steps": 3400, "loss": 0.6815, "lr": 9.960084393841355e-05, "epoch": 0.07725985063095545, "percentage": 8.82, "elapsed_time": "1:33:40", "remaining_time": "16:07:58", "throughput": 560.28, "total_tokens": 3149032}
{"current_steps": 300, "total_steps": 3400, "eval_loss": 0.7073924541473389, "epoch": 0.07725985063095545, "percentage": 8.82, "elapsed_time": "1:34:18", "remaining_time": "16:14:32", "throughput": 556.5, "total_tokens": 3149032}
{"current_steps": 305, "total_steps": 3400, "loss": 0.7208, "lr": 9.956959413129585e-05, "epoch": 0.07854751480813804, "percentage": 8.97, "elapsed_time": "1:35:53", "remaining_time": "16:13:04", "throughput": 556.45, "total_tokens": 3201560}
{"current_steps": 310, "total_steps": 3400, "loss": 0.7144, "lr": 9.953717199596598e-05, "epoch": 0.07983517898532062, "percentage": 9.12, "elapsed_time": "1:37:21", "remaining_time": "16:10:27", "throughput": 557.14, "total_tokens": 3254632}
{"current_steps": 315, "total_steps": 3400, "loss": 0.6861, "lr": 9.95035782992122e-05, "epoch": 0.08112284316250322, "percentage": 9.26, "elapsed_time": "1:38:52", "remaining_time": "16:08:22", "throughput": 557.33, "total_tokens": 3306432}
{"current_steps": 320, "total_steps": 3400, "loss": 0.6836, "lr": 9.94688138355304e-05, "epoch": 0.08241050733968582, "percentage": 9.41, "elapsed_time": "1:40:21", "remaining_time": "16:05:55", "throughput": 557.75, "total_tokens": 3358392}
{"current_steps": 325, "total_steps": 3400, "loss": 0.7353, "lr": 9.943287942710527e-05, "epoch": 0.0836981715168684, "percentage": 9.56, "elapsed_time": "1:41:50", "remaining_time": "16:03:39", "throughput": 558.24, "total_tokens": 3411424}
{"current_steps": 330, "total_steps": 3400, "loss": 0.6774, "lr": 9.939577592379088e-05, "epoch": 0.08498583569405099, "percentage": 9.71, "elapsed_time": "1:43:18", "remaining_time": "16:01:07", "throughput": 558.66, "total_tokens": 3462992}
{"current_steps": 335, "total_steps": 3400, "loss": 0.7331, "lr": 9.935750420309055e-05, "epoch": 0.08627349987123359, "percentage": 9.85, "elapsed_time": "1:44:49", "remaining_time": "15:59:00", "throughput": 559.08, "total_tokens": 3516136}
{"current_steps": 340, "total_steps": 3400, "loss": 0.6939, "lr": 9.931806517013612e-05, "epoch": 0.08756116404841617, "percentage": 10.0, "elapsed_time": "1:46:17", "remaining_time": "15:56:39", "throughput": 559.51, "total_tokens": 3568360}
{"current_steps": 345, "total_steps": 3400, "loss": 0.7158, "lr": 9.927745975766654e-05, "epoch": 0.08884882822559877, "percentage": 10.15, "elapsed_time": "1:47:46", "remaining_time": "15:54:25", "throughput": 559.88, "total_tokens": 3620696}
{"current_steps": 350, "total_steps": 3400, "loss": 0.6932, "lr": 9.923568892600578e-05, "epoch": 0.09013649240278135, "percentage": 10.29, "elapsed_time": "1:49:14", "remaining_time": "15:51:57", "throughput": 560.41, "total_tokens": 3673152}
{"current_steps": 350, "total_steps": 3400, "eval_loss": 0.7044599056243896, "epoch": 0.09013649240278135, "percentage": 10.29, "elapsed_time": "1:49:52", "remaining_time": "15:57:30", "throughput": 557.15, "total_tokens": 3673152}
{"current_steps": 355, "total_steps": 3400, "loss": 0.6778, "lr": 9.91927536630402e-05, "epoch": 0.09142415657996394, "percentage": 10.44, "elapsed_time": "1:51:26", "remaining_time": "15:55:53", "throughput": 557.13, "total_tokens": 3725296}
{"current_steps": 360, "total_steps": 3400, "loss": 0.6857, "lr": 9.91486549841951e-05, "epoch": 0.09271182075714654, "percentage": 10.59, "elapsed_time": "1:52:52", "remaining_time": "15:53:13", "throughput": 557.75, "total_tokens": 3777552}
{"current_steps": 365, "total_steps": 3400, "loss": 0.7184, "lr": 9.91033939324107e-05, "epoch": 0.09399948493432912, "percentage": 10.74, "elapsed_time": "1:54:21", "remaining_time": "15:50:50", "throughput": 558.25, "total_tokens": 3830200}
{"current_steps": 370, "total_steps": 3400, "loss": 0.7196, "lr": 9.905697157811761e-05, "epoch": 0.09528714911151172, "percentage": 10.88, "elapsed_time": "1:55:46", "remaining_time": "15:48:09", "throughput": 558.99, "total_tokens": 3883200}
{"current_steps": 375, "total_steps": 3400, "loss": 0.6914, "lr": 9.900938901921131e-05, "epoch": 0.09657481328869431, "percentage": 11.03, "elapsed_time": "1:57:14", "remaining_time": "15:45:41", "throughput": 559.5, "total_tokens": 3935576}
{"current_steps": 380, "total_steps": 3400, "loss": 0.6681, "lr": 9.896064738102635e-05, "epoch": 0.0978624774658769, "percentage": 11.18, "elapsed_time": "1:58:40", "remaining_time": "15:43:07", "throughput": 560.04, "total_tokens": 3987624}
{"current_steps": 385, "total_steps": 3400, "loss": 0.6723, "lr": 9.891074781630966e-05, "epoch": 0.09915014164305949, "percentage": 11.32, "elapsed_time": "2:00:06", "remaining_time": "15:40:32", "throughput": 560.58, "total_tokens": 4039680}
{"current_steps": 390, "total_steps": 3400, "loss": 0.6498, "lr": 9.885969150519331e-05, "epoch": 0.10043780582024209, "percentage": 11.47, "elapsed_time": "2:01:33", "remaining_time": "15:38:13", "throughput": 560.91, "total_tokens": 4091216}
{"current_steps": 395, "total_steps": 3400, "loss": 0.7311, "lr": 9.88074796551666e-05, "epoch": 0.10172546999742467, "percentage": 11.62, "elapsed_time": "2:03:01", "remaining_time": "15:35:58", "throughput": 561.41, "total_tokens": 4144264}
{"current_steps": 400, "total_steps": 3400, "loss": 0.7089, "lr": 9.875411350104744e-05, "epoch": 0.10301313417460727, "percentage": 11.76, "elapsed_time": "2:04:29", "remaining_time": "15:33:37", "throughput": 561.93, "total_tokens": 4197072}
{"current_steps": 400, "total_steps": 3400, "eval_loss": 0.6847750544548035, "epoch": 0.10301313417460727, "percentage": 11.76, "elapsed_time": "2:05:06", "remaining_time": "15:38:22", "throughput": 559.09, "total_tokens": 4197072}
{"current_steps": 405, "total_steps": 3400, "loss": 0.7021, "lr": 9.86995943049533e-05, "epoch": 0.10430079835178985, "percentage": 11.91, "elapsed_time": "2:06:38", "remaining_time": "15:36:29", "throughput": 559.3, "total_tokens": 4249656}
{"current_steps": 410, "total_steps": 3400, "loss": 0.6943, "lr": 9.864392335627117e-05, "epoch": 0.10558846252897244, "percentage": 12.06, "elapsed_time": "2:08:05", "remaining_time": "15:34:08", "throughput": 559.87, "total_tokens": 4302944}
{"current_steps": 415, "total_steps": 3400, "loss": 0.7146, "lr": 9.858710197162721e-05, "epoch": 0.10687612670615504, "percentage": 12.21, "elapsed_time": "2:09:31", "remaining_time": "15:31:37", "throughput": 560.45, "total_tokens": 4355480}
{"current_steps": 420, "total_steps": 3400, "loss": 0.6312, "lr": 9.852913149485556e-05, "epoch": 0.10816379088333762, "percentage": 12.35, "elapsed_time": "2:10:58", "remaining_time": "15:29:18", "throughput": 560.88, "total_tokens": 4407688}
{"current_steps": 425, "total_steps": 3400, "loss": 0.6877, "lr": 9.847001329696653e-05, "epoch": 0.10945145506052022, "percentage": 12.5, "elapsed_time": "2:12:24", "remaining_time": "15:26:51", "throughput": 561.37, "total_tokens": 4459736}
{"current_steps": 430, "total_steps": 3400, "loss": 0.6975, "lr": 9.840974877611422e-05, "epoch": 0.11073911923770281, "percentage": 12.65, "elapsed_time": "2:13:51", "remaining_time": "15:24:35", "throughput": 561.88, "total_tokens": 4512928}
{"current_steps": 435, "total_steps": 3400, "loss": 0.651, "lr": 9.834833935756344e-05, "epoch": 0.1120267834148854, "percentage": 12.79, "elapsed_time": "2:15:18", "remaining_time": "15:22:19", "throughput": 562.37, "total_tokens": 4565840}
{"current_steps": 440, "total_steps": 3400, "loss": 0.685, "lr": 9.828578649365601e-05, "epoch": 0.11331444759206799, "percentage": 12.94, "elapsed_time": "2:16:48", "remaining_time": "15:20:20", "throughput": 562.61, "total_tokens": 4618168}
{"current_steps": 445, "total_steps": 3400, "loss": 0.6258, "lr": 9.822209166377635e-05, "epoch": 0.11460211176925057, "percentage": 13.09, "elapsed_time": "2:18:17", "remaining_time": "15:18:17", "throughput": 562.81, "total_tokens": 4669784}
{"current_steps": 450, "total_steps": 3400, "loss": 0.6732, "lr": 9.815725637431662e-05, "epoch": 0.11588977594643317, "percentage": 13.24, "elapsed_time": "2:19:48", "remaining_time": "15:16:28", "throughput": 563.01, "total_tokens": 4722528}
{"current_steps": 450, "total_steps": 3400, "eval_loss": 0.6526497006416321, "epoch": 0.11588977594643317, "percentage": 13.24, "elapsed_time": "2:20:27", "remaining_time": "15:20:44", "throughput": 560.4, "total_tokens": 4722528}
{"current_steps": 455, "total_steps": 3400, "loss": 0.6544, "lr": 9.809128215864097e-05, "epoch": 0.11717744012361576, "percentage": 13.38, "elapsed_time": "2:22:00", "remaining_time": "15:19:06", "throughput": 560.37, "total_tokens": 4774400}
{"current_steps": 460, "total_steps": 3400, "loss": 0.652, "lr": 9.802417057704931e-05, "epoch": 0.11846510430079835, "percentage": 13.53, "elapsed_time": "2:23:28", "remaining_time": "15:17:02", "throughput": 560.67, "total_tokens": 4826704}
{"current_steps": 465, "total_steps": 3400, "loss": 0.6582, "lr": 9.795592321674045e-05, "epoch": 0.11975276847798094, "percentage": 13.68, "elapsed_time": "2:24:56", "remaining_time": "15:14:49", "throughput": 561.17, "total_tokens": 4880072}
{"current_steps": 470, "total_steps": 3400, "loss": 0.6506, "lr": 9.788654169177453e-05, "epoch": 0.12104043265516354, "percentage": 13.82, "elapsed_time": "2:26:24", "remaining_time": "15:12:43", "throughput": 561.43, "total_tokens": 4931968}
{"current_steps": 475, "total_steps": 3400, "loss": 0.6551, "lr": 9.781602764303487e-05, "epoch": 0.12232809683234612, "percentage": 13.97, "elapsed_time": "2:27:52", "remaining_time": "15:10:36", "throughput": 561.69, "total_tokens": 4983656}
{"current_steps": 480, "total_steps": 3400, "loss": 0.6978, "lr": 9.774438273818911e-05, "epoch": 0.12361576100952872, "percentage": 14.12, "elapsed_time": "2:29:20", "remaining_time": "15:08:32", "throughput": 562.05, "total_tokens": 5036528}
{"current_steps": 485, "total_steps": 3400, "loss": 0.6407, "lr": 9.767160867164979e-05, "epoch": 0.12490342518671131, "percentage": 14.26, "elapsed_time": "2:30:49", "remaining_time": "15:06:32", "throughput": 562.3, "total_tokens": 5088768}
{"current_steps": 490, "total_steps": 3400, "loss": 0.6641, "lr": 9.759770716453436e-05, "epoch": 0.1261910893638939, "percentage": 14.41, "elapsed_time": "2:32:17", "remaining_time": "15:04:26", "throughput": 562.73, "total_tokens": 5142080}
{"current_steps": 495, "total_steps": 3400, "loss": 0.6588, "lr": 9.752267996462434e-05, "epoch": 0.1274787535410765, "percentage": 14.56, "elapsed_time": "2:33:47", "remaining_time": "15:02:30", "throughput": 562.95, "total_tokens": 5194432}
{"current_steps": 500, "total_steps": 3400, "loss": 0.6304, "lr": 9.744652884632406e-05, "epoch": 0.12876641771825909, "percentage": 14.71, "elapsed_time": "2:35:15", "remaining_time": "15:00:27", "throughput": 563.24, "total_tokens": 5246640}
{"current_steps": 500, "total_steps": 3400, "eval_loss": 0.6272165775299072, "epoch": 0.12876641771825909, "percentage": 14.71, "elapsed_time": "2:35:54", "remaining_time": "15:04:16", "throughput": 560.87, "total_tokens": 5246640}
{"current_steps": 505, "total_steps": 3400, "loss": 0.5741, "lr": 9.736925561061871e-05, "epoch": 0.13005408189544168, "percentage": 14.85, "elapsed_time": "2:37:30", "remaining_time": "15:02:55", "throughput": 560.73, "total_tokens": 5299024}
{"current_steps": 510, "total_steps": 3400, "loss": 0.6535, "lr": 9.729086208503174e-05, "epoch": 0.13134174607262425, "percentage": 15.0, "elapsed_time": "2:38:58", "remaining_time": "15:00:50", "throughput": 561.18, "total_tokens": 5352664}
{"current_steps": 515, "total_steps": 3400, "loss": 0.6081, "lr": 9.721135012358156e-05, "epoch": 0.13262941024980685, "percentage": 15.15, "elapsed_time": "2:40:27", "remaining_time": "14:58:55", "throughput": 561.49, "total_tokens": 5406008}
{"current_steps": 520, "total_steps": 3400, "loss": 0.6792, "lr": 9.713072160673777e-05, "epoch": 0.13391707442698944, "percentage": 15.29, "elapsed_time": "2:41:55", "remaining_time": "14:56:51", "throughput": 561.9, "total_tokens": 5459368}
{"current_steps": 525, "total_steps": 3400, "loss": 0.6821, "lr": 9.704897844137673e-05, "epoch": 0.13520473860417204, "percentage": 15.44, "elapsed_time": "2:43:25", "remaining_time": "14:54:56", "throughput": 562.24, "total_tokens": 5512960}
{"current_steps": 530, "total_steps": 3400, "loss": 0.5835, "lr": 9.696612256073633e-05, "epoch": 0.13649240278135463, "percentage": 15.59, "elapsed_time": "2:44:52", "remaining_time": "14:52:51", "throughput": 562.56, "total_tokens": 5565368}
{"current_steps": 535, "total_steps": 3400, "loss": 0.6129, "lr": 9.688215592437039e-05, "epoch": 0.1377800669585372, "percentage": 15.74, "elapsed_time": "2:46:21", "remaining_time": "14:50:50", "throughput": 562.86, "total_tokens": 5618008}
{"current_steps": 540, "total_steps": 3400, "loss": 0.5765, "lr": 9.679708051810221e-05, "epoch": 0.1390677311357198, "percentage": 15.88, "elapsed_time": "2:47:47", "remaining_time": "14:48:38", "throughput": 563.22, "total_tokens": 5670072}
{"current_steps": 545, "total_steps": 3400, "loss": 0.6325, "lr": 9.67108983539777e-05, "epoch": 0.1403553953129024, "percentage": 16.03, "elapsed_time": "2:49:14", "remaining_time": "14:46:35", "throughput": 563.58, "total_tokens": 5722936}
{"current_steps": 550, "total_steps": 3400, "loss": 0.5596, "lr": 9.662361147021779e-05, "epoch": 0.141643059490085, "percentage": 16.18, "elapsed_time": "2:50:41", "remaining_time": "14:44:27", "throughput": 563.9, "total_tokens": 5774880}
{"current_steps": 550, "total_steps": 3400, "eval_loss": 0.5832681059837341, "epoch": 0.141643059490085, "percentage": 16.18, "elapsed_time": "2:51:19", "remaining_time": "14:47:45", "throughput": 561.8, "total_tokens": 5774880}
{"current_steps": 555, "total_steps": 3400, "loss": 0.5073, "lr": 9.653522193117013e-05, "epoch": 0.14293072366726758, "percentage": 16.32, "elapsed_time": "2:52:51", "remaining_time": "14:46:05", "throughput": 561.79, "total_tokens": 5826608}
{"current_steps": 560, "total_steps": 3400, "loss": 0.5652, "lr": 9.644573182726035e-05, "epoch": 0.14421838784445018, "percentage": 16.47, "elapsed_time": "2:54:18", "remaining_time": "14:43:58", "throughput": 562.21, "total_tokens": 5879776}
{"current_steps": 565, "total_steps": 3400, "loss": 0.5727, "lr": 9.63551432749426e-05, "epoch": 0.14550605202163275, "percentage": 16.62, "elapsed_time": "2:55:44", "remaining_time": "14:41:50", "throughput": 562.64, "total_tokens": 5932888}
{"current_steps": 570, "total_steps": 3400, "loss": 0.6251, "lr": 9.626345841664953e-05, "epoch": 0.14679371619881534, "percentage": 16.76, "elapsed_time": "2:57:12", "remaining_time": "14:39:49", "throughput": 562.87, "total_tokens": 5984648}
{"current_steps": 575, "total_steps": 3400, "loss": 0.6508, "lr": 9.617067942074153e-05, "epoch": 0.14808138037599794, "percentage": 16.91, "elapsed_time": "2:58:38", "remaining_time": "14:37:39", "throughput": 563.24, "total_tokens": 6037000}
{"current_steps": 580, "total_steps": 3400, "loss": 0.6686, "lr": 9.607680848145558e-05, "epoch": 0.14936904455318054, "percentage": 17.06, "elapsed_time": "3:00:06", "remaining_time": "14:35:39", "throughput": 563.62, "total_tokens": 6090512}
{"current_steps": 585, "total_steps": 3400, "loss": 0.5793, "lr": 9.598184781885318e-05, "epoch": 0.15065670873036313, "percentage": 17.21, "elapsed_time": "3:01:32", "remaining_time": "14:33:32", "throughput": 564.01, "total_tokens": 6143320}
{"current_steps": 590, "total_steps": 3400, "loss": 0.5954, "lr": 9.588579967876806e-05, "epoch": 0.1519443729075457, "percentage": 17.35, "elapsed_time": "3:02:59", "remaining_time": "14:31:33", "throughput": 564.28, "total_tokens": 6195720}
{"current_steps": 595, "total_steps": 3400, "loss": 0.5644, "lr": 9.578866633275288e-05, "epoch": 0.1532320370847283, "percentage": 17.5, "elapsed_time": "3:04:25", "remaining_time": "14:29:26", "throughput": 564.6, "total_tokens": 6247592}
{"current_steps": 600, "total_steps": 3400, "loss": 0.5794, "lr": 9.569045007802559e-05, "epoch": 0.1545197012619109, "percentage": 17.65, "elapsed_time": "3:05:53", "remaining_time": "14:27:29", "throughput": 564.81, "total_tokens": 6299656}
{"current_steps": 600, "total_steps": 3400, "eval_loss": 0.6039358973503113, "epoch": 0.1545197012619109, "percentage": 17.65, "elapsed_time": "3:06:31", "remaining_time": "14:30:28", "throughput": 562.88, "total_tokens": 6299656}
{"current_steps": 605, "total_steps": 3400, "loss": 0.6032, "lr": 9.55911532374151e-05, "epoch": 0.1558073654390935, "percentage": 17.79, "elapsed_time": "3:08:04", "remaining_time": "14:28:50", "throughput": 562.89, "total_tokens": 6351664}
{"current_steps": 610, "total_steps": 3400, "loss": 0.5942, "lr": 9.549077815930636e-05, "epoch": 0.15709502961627608, "percentage": 17.94, "elapsed_time": "3:09:32", "remaining_time": "14:26:53", "throughput": 563.1, "total_tokens": 6403688}
{"current_steps": 615, "total_steps": 3400, "loss": 0.5643, "lr": 9.538932721758474e-05, "epoch": 0.15838269379345868, "percentage": 18.09, "elapsed_time": "3:10:58", "remaining_time": "14:24:50", "throughput": 563.44, "total_tokens": 6456344}
{"current_steps": 620, "total_steps": 3400, "loss": 0.5914, "lr": 9.528680281157999e-05, "epoch": 0.15967035797064125, "percentage": 18.24, "elapsed_time": "3:12:26", "remaining_time": "14:22:51", "throughput": 563.74, "total_tokens": 6509000}
{"current_steps": 605, "total_steps": 3400, "loss": 0.6106, "lr": 9.55911532374151e-05, "epoch": 0.1558073654390935, "percentage": 17.79, "elapsed_time": "0:02:45", "remaining_time": "0:12:42", "throughput": 38471.59, "total_tokens": 6351680}
{"current_steps": 610, "total_steps": 3400, "loss": 0.5812, "lr": 9.549077815930636e-05, "epoch": 0.15709502961627608, "percentage": 17.94, "elapsed_time": "0:04:14", "remaining_time": "0:19:21", "throughput": 25207.5, "total_tokens": 6403648}
{"current_steps": 615, "total_steps": 3400, "loss": 0.5992, "lr": 9.538932721758474e-05, "epoch": 0.15838269379345868, "percentage": 18.09, "elapsed_time": "0:05:41", "remaining_time": "0:25:47", "throughput": 18896.22, "total_tokens": 6456328}
{"current_steps": 620, "total_steps": 3400, "loss": 0.587, "lr": 9.528680281157999e-05, "epoch": 0.15967035797064125, "percentage": 18.24, "elapsed_time": "0:07:10", "remaining_time": "0:32:11", "throughput": 15113.22, "total_tokens": 6509024}
{"current_steps": 625, "total_steps": 3400, "loss": 0.5836, "lr": 9.518320736600943e-05, "epoch": 0.16095802214782384, "percentage": 18.38, "elapsed_time": "0:08:38", "remaining_time": "0:38:21", "throughput": 12658.21, "total_tokens": 6561336}
{"current_steps": 630, "total_steps": 3400, "loss": 0.5913, "lr": 9.507854333092063e-05, "epoch": 0.16224568632500644, "percentage": 18.53, "elapsed_time": "0:10:07", "remaining_time": "0:44:29", "throughput": 10895.64, "total_tokens": 6614024}
{"current_steps": 635, "total_steps": 3400, "loss": 0.5693, "lr": 9.497281318163346e-05, "epoch": 0.16353335050218903, "percentage": 18.68, "elapsed_time": "0:11:34", "remaining_time": "0:50:24", "throughput": 9595.98, "total_tokens": 6666416}
{"current_steps": 640, "total_steps": 3400, "loss": 0.572, "lr": 9.486601941868154e-05, "epoch": 0.16482101467937163, "percentage": 18.82, "elapsed_time": "0:13:04", "remaining_time": "0:56:24", "throughput": 8560.2, "total_tokens": 6718200}
{"current_steps": 645, "total_steps": 3400, "loss": 0.6111, "lr": 9.475816456775313e-05, "epoch": 0.1661086788565542, "percentage": 18.97, "elapsed_time": "0:14:35", "remaining_time": "1:02:19", "throughput": 7734.51, "total_tokens": 6771256}
{"current_steps": 650, "total_steps": 3400, "loss": 0.5959, "lr": 9.464925117963133e-05, "epoch": 0.1673963430337368, "percentage": 19.12, "elapsed_time": "0:16:07", "remaining_time": "1:08:11", "throughput": 7056.83, "total_tokens": 6824008}
{"current_steps": 650, "total_steps": 3400, "eval_loss": 0.5542036890983582, "epoch": 0.1673963430337368, "percentage": 19.12, "elapsed_time": "0:17:15", "remaining_time": "1:13:02", "throughput": 6587.43, "total_tokens": 6824008}
{"current_steps": 655, "total_steps": 3400, "loss": 0.5344, "lr": 9.453928183013385e-05, "epoch": 0.1686840072109194, "percentage": 19.26, "elapsed_time": "0:18:55", "remaining_time": "1:19:19", "throughput": 6053.33, "total_tokens": 6875432}
{"current_steps": 660, "total_steps": 3400, "loss": 0.56, "lr": 9.442825912005202e-05, "epoch": 0.16997167138810199, "percentage": 19.41, "elapsed_time": "0:20:27", "remaining_time": "1:24:57", "throughput": 5642.47, "total_tokens": 6927768}
{"current_steps": 665, "total_steps": 3400, "loss": 0.5701, "lr": 9.431618567508933e-05, "epoch": 0.17125933556528458, "percentage": 19.56, "elapsed_time": "0:21:58", "remaining_time": "1:30:24", "throughput": 5292.74, "total_tokens": 6980544}
{"current_steps": 670, "total_steps": 3400, "loss": 0.5604, "lr": 9.420306414579925e-05, "epoch": 0.17254699974246718, "percentage": 19.71, "elapsed_time": "0:23:32", "remaining_time": "1:35:53", "throughput": 4980.28, "total_tokens": 7032584}
{"current_steps": 675, "total_steps": 3400, "loss": 0.5763, "lr": 9.408889720752266e-05, "epoch": 0.17383466391964975, "percentage": 19.85, "elapsed_time": "0:25:03", "remaining_time": "1:41:09", "throughput": 4712.79, "total_tokens": 7085048}
{"current_steps": 680, "total_steps": 3400, "loss": 0.5962, "lr": 9.397368756032445e-05, "epoch": 0.17512232809683234, "percentage": 20.0, "elapsed_time": "0:26:35", "remaining_time": "1:46:21", "throughput": 4474.29, "total_tokens": 7137952}
{"current_steps": 685, "total_steps": 3400, "loss": 0.5935, "lr": 9.385743792892982e-05, "epoch": 0.17640999227401494, "percentage": 20.15, "elapsed_time": "0:28:03", "remaining_time": "1:51:13", "throughput": 4270.71, "total_tokens": 7190584}
{"current_steps": 690, "total_steps": 3400, "loss": 0.5267, "lr": 9.374015106265968e-05, "epoch": 0.17769765645119753, "percentage": 20.29, "elapsed_time": "0:29:33", "remaining_time": "1:56:04", "throughput": 4084.88, "total_tokens": 7243440}
{"current_steps": 695, "total_steps": 3400, "loss": 0.5351, "lr": 9.362182973536569e-05, "epoch": 0.17898532062838013, "percentage": 20.44, "elapsed_time": "0:31:00", "remaining_time": "2:00:42", "throughput": 3920.82, "total_tokens": 7295568}
{"current_steps": 700, "total_steps": 3400, "loss": 0.5014, "lr": 9.35024767453647e-05, "epoch": 0.1802729848055627, "percentage": 20.59, "elapsed_time": "0:32:28", "remaining_time": "2:05:16", "throughput": 3770.1, "total_tokens": 7347040}
{"current_steps": 700, "total_steps": 3400, "eval_loss": 0.5440100431442261, "epoch": 0.1802729848055627, "percentage": 20.59, "elapsed_time": "0:33:07", "remaining_time": "2:07:47", "throughput": 3695.91, "total_tokens": 7347040}
{"current_steps": 705, "total_steps": 3400, "loss": 0.543, "lr": 9.338209491537257e-05, "epoch": 0.1815606489827453, "percentage": 20.74, "elapsed_time": "0:34:43", "remaining_time": "2:12:44", "throughput": 3551.6, "total_tokens": 7399584}
{"current_steps": 710, "total_steps": 3400, "loss": 0.4995, "lr": 9.326068709243727e-05, "epoch": 0.1828483131599279, "percentage": 20.88, "elapsed_time": "0:36:11", "remaining_time": "2:17:07", "throughput": 3432.24, "total_tokens": 7452928}
{"current_steps": 715, "total_steps": 3400, "loss": 0.5109, "lr": 9.313825614787177e-05, "epoch": 0.18413597733711048, "percentage": 21.03, "elapsed_time": "0:37:39", "remaining_time": "2:21:26", "throughput": 3321.05, "total_tokens": 7505112}
{"current_steps": 720, "total_steps": 3400, "loss": 0.4932, "lr": 9.301480497718593e-05, "epoch": 0.18542364151429308, "percentage": 21.18, "elapsed_time": "0:39:07", "remaining_time": "2:25:37", "throughput": 3219.44, "total_tokens": 7557608}
{"current_steps": 725, "total_steps": 3400, "loss": 0.5573, "lr": 9.289033650001817e-05, "epoch": 0.18671130569147568, "percentage": 21.32, "elapsed_time": "0:40:36", "remaining_time": "2:29:51", "throughput": 3122.82, "total_tokens": 7610048}
{"current_steps": 730, "total_steps": 3400, "loss": 0.5305, "lr": 9.276485366006634e-05, "epoch": 0.18799896986865824, "percentage": 21.47, "elapsed_time": "0:42:04", "remaining_time": "2:33:54", "throughput": 3034.8, "total_tokens": 7662056}
{"current_steps": 735, "total_steps": 3400, "loss": 0.5369, "lr": 9.263835942501807e-05, "epoch": 0.18928663404584084, "percentage": 21.62, "elapsed_time": "0:43:34", "remaining_time": "2:37:58", "throughput": 2950.71, "total_tokens": 7713656}
{"current_steps": 740, "total_steps": 3400, "loss": 0.5397, "lr": 9.251085678648072e-05, "epoch": 0.19057429822302344, "percentage": 21.76, "elapsed_time": "0:45:02", "remaining_time": "2:41:54", "throughput": 2873.57, "total_tokens": 7765992}
{"current_steps": 745, "total_steps": 3400, "loss": 0.5116, "lr": 9.238234875991046e-05, "epoch": 0.19186196240020603, "percentage": 21.91, "elapsed_time": "0:46:31", "remaining_time": "2:45:49", "throughput": 2800.48, "total_tokens": 7818448}
{"current_steps": 750, "total_steps": 3400, "loss": 0.541, "lr": 9.225283838454111e-05, "epoch": 0.19314962657738863, "percentage": 22.06, "elapsed_time": "0:47:59", "remaining_time": "2:49:34", "throughput": 2733.14, "total_tokens": 7870520}
{"current_steps": 750, "total_steps": 3400, "eval_loss": 0.5273815989494324, "epoch": 0.19314962657738863, "percentage": 22.06, "elapsed_time": "0:48:38", "remaining_time": "2:51:53", "throughput": 2696.45, "total_tokens": 7870520}
{"current_steps": 755, "total_steps": 3400, "loss": 0.4961, "lr": 9.21223287233121e-05, "epoch": 0.1944372907545712, "percentage": 22.21, "elapsed_time": "0:50:14", "remaining_time": "2:55:59", "throughput": 2628.5, "total_tokens": 7922736}
{"current_steps": 760, "total_steps": 3400, "loss": 0.4956, "lr": 9.199082286279622e-05, "epoch": 0.1957249549317538, "percentage": 22.35, "elapsed_time": "0:51:41", "remaining_time": "2:59:32", "throughput": 2571.82, "total_tokens": 7975304}
{"current_steps": 765, "total_steps": 3400, "loss": 0.4997, "lr": 9.185832391312644e-05, "epoch": 0.1970126191089364, "percentage": 22.5, "elapsed_time": "0:53:10", "remaining_time": "3:03:09", "throughput": 2515.96, "total_tokens": 8027448}
{"current_steps": 770, "total_steps": 3400, "loss": 0.5214, "lr": 9.172483500792244e-05, "epoch": 0.19830028328611898, "percentage": 22.65, "elapsed_time": "0:54:38", "remaining_time": "3:06:38", "throughput": 2464.71, "total_tokens": 8080944}
{"current_steps": 775, "total_steps": 3400, "loss": 0.6098, "lr": 9.159035930421658e-05, "epoch": 0.19958794746330158, "percentage": 22.79, "elapsed_time": "0:56:07", "remaining_time": "3:10:07", "throughput": 2414.98, "total_tokens": 8133392}
{"current_steps": 780, "total_steps": 3400, "loss": 0.5046, "lr": 9.145489998237902e-05, "epoch": 0.20087561164048418, "percentage": 22.94, "elapsed_time": "0:57:34", "remaining_time": "3:13:25", "throughput": 2369.16, "total_tokens": 8185360}
{"current_steps": 785, "total_steps": 3400, "loss": 0.5803, "lr": 9.131846024604274e-05, "epoch": 0.20216327581766674, "percentage": 23.09, "elapsed_time": "0:59:06", "remaining_time": "3:16:53", "throughput": 2322.96, "total_tokens": 8237672}
{"current_steps": 790, "total_steps": 3400, "loss": 0.5365, "lr": 9.11810433220276e-05, "epoch": 0.20345093999484934, "percentage": 23.24, "elapsed_time": "1:00:36", "remaining_time": "3:20:14", "throughput": 2279.6, "total_tokens": 8289688}
{"current_steps": 795, "total_steps": 3400, "loss": 0.5259, "lr": 9.104265246026415e-05, "epoch": 0.20473860417203193, "percentage": 23.38, "elapsed_time": "1:02:06", "remaining_time": "3:23:30", "throughput": 2238.56, "total_tokens": 8341624}
{"current_steps": 800, "total_steps": 3400, "loss": 0.5291, "lr": 9.090329093371666e-05, "epoch": 0.20602626834921453, "percentage": 23.53, "elapsed_time": "1:03:35", "remaining_time": "3:26:41", "throughput": 2199.61, "total_tokens": 8393696}
{"current_steps": 800, "total_steps": 3400, "eval_loss": 0.5219093561172485, "epoch": 0.20602626834921453, "percentage": 23.53, "elapsed_time": "1:04:15", "remaining_time": "3:28:51", "throughput": 2176.94, "total_tokens": 8393696}
{"current_steps": 805, "total_steps": 3400, "loss": 0.5449, "lr": 9.076296203830579e-05, "epoch": 0.20731393252639713, "percentage": 23.68, "elapsed_time": "1:05:50", "remaining_time": "3:32:16", "throughput": 2137.83, "total_tokens": 8446496}
{"current_steps": 810, "total_steps": 3400, "loss": 0.5625, "lr": 9.062166909283062e-05, "epoch": 0.2086015967035797, "percentage": 23.82, "elapsed_time": "1:07:19", "remaining_time": "3:35:17", "throughput": 2103.98, "total_tokens": 8499544}
{"current_steps": 815, "total_steps": 3400, "loss": 0.5564, "lr": 9.047941543889014e-05, "epoch": 0.2098892608807623, "percentage": 23.97, "elapsed_time": "1:08:48", "remaining_time": "3:38:13", "throughput": 2071.83, "total_tokens": 8552568}
{"current_steps": 820, "total_steps": 3400, "loss": 0.5487, "lr": 9.033620444080428e-05, "epoch": 0.2111769250579449, "percentage": 24.12, "elapsed_time": "1:10:19", "remaining_time": "3:41:15", "throughput": 2039.59, "total_tokens": 8605560}
{"current_steps": 825, "total_steps": 3400, "loss": 0.5719, "lr": 9.019203948553422e-05, "epoch": 0.21246458923512748, "percentage": 24.26, "elapsed_time": "1:11:48", "remaining_time": "3:44:07", "throughput": 2009.42, "total_tokens": 8657704}
{"current_steps": 830, "total_steps": 3400, "loss": 0.5235, "lr": 9.004692398260244e-05, "epoch": 0.21375225341231008, "percentage": 24.41, "elapsed_time": "1:13:19", "remaining_time": "3:47:03", "throughput": 1979.82, "total_tokens": 8711088}
{"current_steps": 835, "total_steps": 3400, "loss": 0.5566, "lr": 8.9900861364012e-05, "epoch": 0.21503991758949267, "percentage": 24.56, "elapsed_time": "1:14:51", "remaining_time": "3:49:58", "throughput": 1950.99, "total_tokens": 8763712}
{"current_steps": 840, "total_steps": 3400, "loss": 0.482, "lr": 8.975385508416532e-05, "epoch": 0.21632758176667524, "percentage": 24.71, "elapsed_time": "1:16:22", "remaining_time": "3:52:45", "throughput": 1923.79, "total_tokens": 8815760}
{"current_steps": 845, "total_steps": 3400, "loss": 0.5046, "lr": 8.960590861978265e-05, "epoch": 0.21761524594385784, "percentage": 24.85, "elapsed_time": "1:17:51", "remaining_time": "3:55:24", "throughput": 1898.38, "total_tokens": 8867720}
{"current_steps": 850, "total_steps": 3400, "loss": 0.5063, "lr": 8.945702546981969e-05, "epoch": 0.21890291012104043, "percentage": 25.0, "elapsed_time": "1:19:21", "remaining_time": "3:58:04", "throughput": 1873.33, "total_tokens": 8919608}
{"current_steps": 850, "total_steps": 3400, "eval_loss": 0.5525640249252319, "epoch": 0.21890291012104043, "percentage": 25.0, "elapsed_time": "1:20:00", "remaining_time": "4:00:01", "throughput": 1858.09, "total_tokens": 8919608}
{"current_steps": 855, "total_steps": 3400, "loss": 0.5853, "lr": 8.930720915538487e-05, "epoch": 0.22019057429822303, "percentage": 25.15, "elapsed_time": "1:21:34", "remaining_time": "4:02:50", "throughput": 1832.75, "total_tokens": 8971048}
{"current_steps": 860, "total_steps": 3400, "loss": 0.5534, "lr": 8.915646321965614e-05, "epoch": 0.22147823847540563, "percentage": 25.29, "elapsed_time": "1:23:04", "remaining_time": "4:05:22", "throughput": 1810.11, "total_tokens": 9022936}
{"current_steps": 865, "total_steps": 3400, "loss": 0.5623, "lr": 8.900479122779712e-05, "epoch": 0.2227659026525882, "percentage": 25.44, "elapsed_time": "1:24:32", "remaining_time": "4:07:47", "throughput": 1788.95, "total_tokens": 9075336}
{"current_steps": 870, "total_steps": 3400, "loss": 0.5561, "lr": 8.885219676687277e-05, "epoch": 0.2240535668297708, "percentage": 25.59, "elapsed_time": "1:26:03", "remaining_time": "4:10:14", "throughput": 1767.89, "total_tokens": 9127688}
{"current_steps": 875, "total_steps": 3400, "loss": 0.5449, "lr": 8.869868344576459e-05, "epoch": 0.22534123100695339, "percentage": 25.74, "elapsed_time": "1:27:31", "remaining_time": "4:12:34", "throughput": 1748.15, "total_tokens": 9180624}
{"current_steps": 880, "total_steps": 3400, "loss": 0.5062, "lr": 8.854425489508532e-05, "epoch": 0.22662889518413598, "percentage": 25.88, "elapsed_time": "1:29:02", "remaining_time": "4:14:57", "throughput": 1728.37, "total_tokens": 9233176}
{"current_steps": 885, "total_steps": 3400, "loss": 0.5033, "lr": 8.838891476709288e-05, "epoch": 0.22791655936131858, "percentage": 26.03, "elapsed_time": "1:30:30", "remaining_time": "4:17:12", "throughput": 1710.09, "total_tokens": 9286688}
{"current_steps": 890, "total_steps": 3400, "loss": 0.4845, "lr": 8.823266673560426e-05, "epoch": 0.22920422353850115, "percentage": 26.18, "elapsed_time": "1:31:59", "remaining_time": "4:19:26", "throughput": 1692.06, "total_tokens": 9339600}
{"current_steps": 895, "total_steps": 3400, "loss": 0.5595, "lr": 8.807551449590846e-05, "epoch": 0.23049188771568374, "percentage": 26.32, "elapsed_time": "1:33:27", "remaining_time": "4:21:34", "throughput": 1674.82, "total_tokens": 9391536}
{"current_steps": 900, "total_steps": 3400, "loss": 0.5251, "lr": 8.791746176467907e-05, "epoch": 0.23177955189286634, "percentage": 26.47, "elapsed_time": "1:34:57", "remaining_time": "4:23:46", "throughput": 1657.48, "total_tokens": 9443616}
{"current_steps": 900, "total_steps": 3400, "eval_loss": 0.49604204297065735, "epoch": 0.23177955189286634, "percentage": 26.47, "elapsed_time": "1:35:37", "remaining_time": "4:25:36", "throughput": 1646.06, "total_tokens": 9443616}
{"current_steps": 905, "total_steps": 3400, "loss": 0.5774, "lr": 8.775851227988656e-05, "epoch": 0.23306721607004893, "percentage": 26.62, "elapsed_time": "1:37:14", "remaining_time": "4:28:05", "throughput": 1627.77, "total_tokens": 9497304}
{"current_steps": 910, "total_steps": 3400, "loss": 0.5441, "lr": 8.759866980070963e-05, "epoch": 0.23435488024723153, "percentage": 26.76, "elapsed_time": "1:38:44", "remaining_time": "4:30:10", "throughput": 1611.88, "total_tokens": 9549416}
{"current_steps": 915, "total_steps": 3400, "loss": 0.4898, "lr": 8.743793810744654e-05, "epoch": 0.23564254442441412, "percentage": 26.91, "elapsed_time": "1:40:15", "remaining_time": "4:32:16", "throughput": 1596.21, "total_tokens": 9601800}
{"current_steps": 920, "total_steps": 3400, "loss": 0.4681, "lr": 8.727632100142551e-05, "epoch": 0.2369302086015967, "percentage": 27.06, "elapsed_time": "1:41:44", "remaining_time": "4:34:16", "throughput": 1581.28, "total_tokens": 9653600}
{"current_steps": 925, "total_steps": 3400, "loss": 0.4946, "lr": 8.711382230491493e-05, "epoch": 0.2382178727787793, "percentage": 27.21, "elapsed_time": "1:43:15", "remaining_time": "4:36:18", "throughput": 1566.7, "total_tokens": 9707224}
{"current_steps": 930, "total_steps": 3400, "loss": 0.5517, "lr": 8.695044586103296e-05, "epoch": 0.23950553695596188, "percentage": 27.35, "elapsed_time": "1:44:45", "remaining_time": "4:38:13", "throughput": 1552.86, "total_tokens": 9760096}
{"current_steps": 935, "total_steps": 3400, "loss": 0.6064, "lr": 8.678619553365659e-05, "epoch": 0.24079320113314448, "percentage": 27.5, "elapsed_time": "1:46:16", "remaining_time": "4:40:10", "throughput": 1538.94, "total_tokens": 9812672}
{"current_steps": 940, "total_steps": 3400, "loss": 0.5398, "lr": 8.662107520733027e-05, "epoch": 0.24208086531032708, "percentage": 27.65, "elapsed_time": "1:47:45", "remaining_time": "4:41:59", "throughput": 1526.05, "total_tokens": 9866200}
{"current_steps": 945, "total_steps": 3400, "loss": 0.5068, "lr": 8.64550887871741e-05, "epoch": 0.24336852948750964, "percentage": 27.79, "elapsed_time": "1:49:15", "remaining_time": "4:43:50", "throughput": 1512.93, "total_tokens": 9918160}
{"current_steps": 950, "total_steps": 3400, "loss": 0.5862, "lr": 8.628824019879137e-05, "epoch": 0.24465619366469224, "percentage": 27.94, "elapsed_time": "1:50:44", "remaining_time": "4:45:35", "throughput": 1500.64, "total_tokens": 9970600}
{"current_steps": 950, "total_steps": 3400, "eval_loss": 0.5085262656211853, "epoch": 0.24465619366469224, "percentage": 27.94, "elapsed_time": "1:51:23", "remaining_time": "4:47:15", "throughput": 1491.87, "total_tokens": 9970600}
{"current_steps": 955, "total_steps": 3400, "loss": 0.4549, "lr": 8.612053338817581e-05, "epoch": 0.24594385784187484, "percentage": 28.09, "elapsed_time": "1:53:00", "remaining_time": "4:49:20", "throughput": 1478.02, "total_tokens": 10022248}
{"current_steps": 960, "total_steps": 3400, "loss": 0.4791, "lr": 8.595197232161824e-05, "epoch": 0.24723152201905743, "percentage": 28.24, "elapsed_time": "1:54:30", "remaining_time": "4:51:02", "throughput": 1466.45, "total_tokens": 10075280}
{"current_steps": 965, "total_steps": 3400, "loss": 0.4833, "lr": 8.578256098561275e-05, "epoch": 0.24851918619624003, "percentage": 28.38, "elapsed_time": "1:56:02", "remaining_time": "4:52:47", "throughput": 1454.77, "total_tokens": 10128392}
{"current_steps": 970, "total_steps": 3400, "loss": 0.4672, "lr": 8.561230338676239e-05, "epoch": 0.24980685037342262, "percentage": 28.53, "elapsed_time": "1:57:32", "remaining_time": "4:54:27", "throughput": 1443.57, "total_tokens": 10180720}
{"current_steps": 975, "total_steps": 3400, "loss": 0.5205, "lr": 8.544120355168451e-05, "epoch": 0.2510945145506052, "percentage": 28.68, "elapsed_time": "1:59:03", "remaining_time": "4:56:07", "throughput": 1432.48, "total_tokens": 10233256}
{"current_steps": 980, "total_steps": 3400, "loss": 0.5124, "lr": 8.526926552691544e-05, "epoch": 0.2523821787277878, "percentage": 28.82, "elapsed_time": "2:00:33", "remaining_time": "4:57:42", "throughput": 1421.81, "total_tokens": 10284928}
{"current_steps": 985, "total_steps": 3400, "loss": 0.5034, "lr": 8.509649337881483e-05, "epoch": 0.2536698429049704, "percentage": 28.97, "elapsed_time": "2:02:03", "remaining_time": "4:59:15", "throughput": 1411.63, "total_tokens": 10338208}
{"current_steps": 990, "total_steps": 3400, "loss": 0.5226, "lr": 8.492289119346943e-05, "epoch": 0.254957507082153, "percentage": 29.12, "elapsed_time": "2:03:33", "remaining_time": "5:00:46", "throughput": 1401.61, "total_tokens": 10390224}
{"current_steps": 995, "total_steps": 3400, "loss": 0.5399, "lr": 8.474846307659658e-05, "epoch": 0.25624517125933555, "percentage": 29.26, "elapsed_time": "2:05:01", "remaining_time": "5:02:11", "throughput": 1392.18, "total_tokens": 10443080}
{"current_steps": 1000, "total_steps": 3400, "loss": 0.483, "lr": 8.457321315344694e-05, "epoch": 0.25753283543651817, "percentage": 29.41, "elapsed_time": "2:06:30", "remaining_time": "5:03:37", "throughput": 1382.7, "total_tokens": 10495592}
{"current_steps": 1000, "total_steps": 3400, "eval_loss": 0.5305114388465881, "epoch": 0.25753283543651817, "percentage": 29.41, "elapsed_time": "2:07:09", "remaining_time": "5:05:10", "throughput": 1375.65, "total_tokens": 10495592}
{"current_steps": 1005, "total_steps": 3400, "loss": 0.568, "lr": 8.439714556870704e-05, "epoch": 0.25882049961370074, "percentage": 29.56, "elapsed_time": "2:08:43", "remaining_time": "5:06:44", "throughput": 1365.8, "total_tokens": 10548136}
{"current_steps": 1010, "total_steps": 3400, "loss": 0.4335, "lr": 8.422026448640124e-05, "epoch": 0.26010816379088336, "percentage": 29.71, "elapsed_time": "2:10:12", "remaining_time": "5:08:07", "throughput": 1356.78, "total_tokens": 10600048}
{"current_steps": 1015, "total_steps": 3400, "loss": 0.5385, "lr": 8.40425740897932e-05, "epoch": 0.26139582796806593, "percentage": 29.85, "elapsed_time": "2:11:40", "remaining_time": "5:09:25", "throughput": 1348.21, "total_tokens": 10652160}
{"current_steps": 1020, "total_steps": 3400, "loss": 0.5171, "lr": 8.386407858128706e-05, "epoch": 0.2626834921452485, "percentage": 30.0, "elapsed_time": "2:13:11", "remaining_time": "5:10:46", "throughput": 1339.57, "total_tokens": 10705208}
{"current_steps": 1025, "total_steps": 3400, "loss": 0.5201, "lr": 8.368478218232787e-05, "epoch": 0.2639711563224311, "percentage": 30.15, "elapsed_time": "2:14:39", "remaining_time": "5:12:00", "throughput": 1331.61, "total_tokens": 10758688}
{"current_steps": 1030, "total_steps": 3400, "loss": 0.5521, "lr": 8.350468913330192e-05, "epoch": 0.2652588204996137, "percentage": 30.29, "elapsed_time": "2:16:08", "remaining_time": "5:13:16", "throughput": 1323.48, "total_tokens": 10811408}
{"current_steps": 1035, "total_steps": 3400, "loss": 0.4938, "lr": 8.33238036934364e-05, "epoch": 0.2665464846767963, "percentage": 30.44, "elapsed_time": "2:17:37", "remaining_time": "5:14:28", "throughput": 1315.65, "total_tokens": 10864144}
{"current_steps": 1040, "total_steps": 3400, "loss": 0.4828, "lr": 8.31421301406986e-05, "epoch": 0.2678341488539789, "percentage": 30.59, "elapsed_time": "2:19:08", "remaining_time": "5:15:44", "throughput": 1307.65, "total_tokens": 10916952}
{"current_steps": 1045, "total_steps": 3400, "loss": 0.5491, "lr": 8.29596727716949e-05, "epoch": 0.26912181303116145, "percentage": 30.74, "elapsed_time": "2:20:37", "remaining_time": "5:16:53", "throughput": 1300.06, "total_tokens": 10968824}
{"current_steps": 1050, "total_steps": 3400, "loss": 0.4628, "lr": 8.277643590156894e-05, "epoch": 0.2704094772083441, "percentage": 30.88, "elapsed_time": "2:22:08", "remaining_time": "5:18:07", "throughput": 1292.35, "total_tokens": 11021656}
{"current_steps": 1050, "total_steps": 3400, "eval_loss": 0.5039986371994019, "epoch": 0.2704094772083441, "percentage": 30.88, "elapsed_time": "2:22:48", "remaining_time": "5:19:37", "throughput": 1286.28, "total_tokens": 11021656}
{"current_steps": 1055, "total_steps": 3400, "loss": 0.4586, "lr": 8.259242386389973e-05, "epoch": 0.27169714138552664, "percentage": 31.03, "elapsed_time": "2:24:23", "remaining_time": "5:20:57", "throughput": 1278.23, "total_tokens": 11074336}
{"current_steps": 1060, "total_steps": 3400, "loss": 0.4939, "lr": 8.240764101059912e-05, "epoch": 0.27298480556270927, "percentage": 31.18, "elapsed_time": "2:25:55", "remaining_time": "5:22:07", "throughput": 1270.9, "total_tokens": 11126776}
{"current_steps": 1065, "total_steps": 3400, "loss": 0.4978, "lr": 8.222209171180883e-05, "epoch": 0.27427246973989183, "percentage": 31.32, "elapsed_time": "2:27:23", "remaining_time": "5:23:09", "throughput": 1264.18, "total_tokens": 11179680}
{"current_steps": 1070, "total_steps": 3400, "loss": 0.5695, "lr": 8.203578035579715e-05, "epoch": 0.2755601339170744, "percentage": 31.47, "elapsed_time": "2:28:53", "remaining_time": "5:24:12", "throughput": 1257.29, "total_tokens": 11231616}
{"current_steps": 1075, "total_steps": 3400, "loss": 0.4635, "lr": 8.184871134885513e-05, "epoch": 0.276847798094257, "percentage": 31.62, "elapsed_time": "2:30:21", "remaining_time": "5:25:11", "throughput": 1250.77, "total_tokens": 11283720}
{"current_steps": 1080, "total_steps": 3400, "loss": 0.4974, "lr": 8.166088911519235e-05, "epoch": 0.2781354622714396, "percentage": 31.76, "elapsed_time": "2:31:50", "remaining_time": "5:26:10", "throughput": 1244.32, "total_tokens": 11336144}
{"current_steps": 1085, "total_steps": 3400, "loss": 0.4439, "lr": 8.147231809683236e-05, "epoch": 0.2794231264486222, "percentage": 31.91, "elapsed_time": "2:33:19", "remaining_time": "5:27:07", "throughput": 1238.08, "total_tokens": 11389128}
{"current_steps": 1090, "total_steps": 3400, "loss": 0.4368, "lr": 8.128300275350756e-05, "epoch": 0.2807107906258048, "percentage": 32.06, "elapsed_time": "2:34:47", "remaining_time": "5:28:03", "throughput": 1231.92, "total_tokens": 11441864}
{"current_steps": 1095, "total_steps": 3400, "loss": 0.4895, "lr": 8.109294756255375e-05, "epoch": 0.2819984548029874, "percentage": 32.21, "elapsed_time": "2:36:17", "remaining_time": "5:28:59", "throughput": 1225.81, "total_tokens": 11494880}
{"current_steps": 1100, "total_steps": 3400, "loss": 0.4825, "lr": 8.090215701880419e-05, "epoch": 0.28328611898017, "percentage": 32.35, "elapsed_time": "2:37:45", "remaining_time": "5:29:51", "throughput": 1219.93, "total_tokens": 11547008}
{"current_steps": 1100, "total_steps": 3400, "eval_loss": 0.4798590838909149, "epoch": 0.28328611898017, "percentage": 32.35, "elapsed_time": "2:38:25", "remaining_time": "5:31:16", "throughput": 1214.71, "total_tokens": 11547008}
{"current_steps": 1105, "total_steps": 3400, "loss": 0.4927, "lr": 8.07106356344834e-05, "epoch": 0.28457378315735254, "percentage": 32.5, "elapsed_time": "2:40:02", "remaining_time": "5:32:23", "throughput": 1208.03, "total_tokens": 11600032}
{"current_steps": 1110, "total_steps": 3400, "loss": 0.4353, "lr": 8.051838793910038e-05, "epoch": 0.28586144733453517, "percentage": 32.65, "elapsed_time": "2:41:30", "remaining_time": "5:33:11", "throughput": 1202.47, "total_tokens": 11652120}
{"current_steps": 1115, "total_steps": 3400, "loss": 0.4891, "lr": 8.032541847934146e-05, "epoch": 0.28714911151171774, "percentage": 32.79, "elapsed_time": "2:42:59", "remaining_time": "5:34:01", "throughput": 1196.88, "total_tokens": 11705184}
{"current_steps": 1120, "total_steps": 3400, "loss": 0.4497, "lr": 8.013173181896283e-05, "epoch": 0.28843677568890036, "percentage": 32.94, "elapsed_time": "2:44:27", "remaining_time": "5:34:48", "throughput": 1191.54, "total_tokens": 11758032}
{"current_steps": 1125, "total_steps": 3400, "loss": 0.4927, "lr": 7.993733253868256e-05, "epoch": 0.28972443986608293, "percentage": 33.09, "elapsed_time": "2:45:57", "remaining_time": "5:35:36", "throughput": 1186.11, "total_tokens": 11810736}
{"current_steps": 1130, "total_steps": 3400, "loss": 0.4853, "lr": 7.974222523607236e-05, "epoch": 0.2910121040432655, "percentage": 33.24, "elapsed_time": "2:47:24", "remaining_time": "5:36:17", "throughput": 1181.07, "total_tokens": 11863152}
{"current_steps": 1135, "total_steps": 3400, "loss": 0.4458, "lr": 7.954641452544865e-05, "epoch": 0.2922997682204481, "percentage": 33.38, "elapsed_time": "2:48:52", "remaining_time": "5:37:00", "throughput": 1175.84, "total_tokens": 11914536}
{"current_steps": 1140, "total_steps": 3400, "loss": 0.3976, "lr": 7.934990503776363e-05, "epoch": 0.2935874323976307, "percentage": 33.53, "elapsed_time": "2:50:19", "remaining_time": "5:37:40", "throughput": 1170.89, "total_tokens": 11966064}
{"current_steps": 1145, "total_steps": 3400, "loss": 0.508, "lr": 7.915270142049566e-05, "epoch": 0.2948750965748133, "percentage": 33.68, "elapsed_time": "2:51:47", "remaining_time": "5:38:20", "throughput": 1166.0, "total_tokens": 12018928}
{"current_steps": 1150, "total_steps": 3400, "loss": 0.4553, "lr": 7.89548083375394e-05, "epoch": 0.2961627607519959, "percentage": 33.82, "elapsed_time": "2:53:14", "remaining_time": "5:38:57", "throughput": 1161.26, "total_tokens": 12071088}
{"current_steps": 1150, "total_steps": 3400, "eval_loss": 0.45381438732147217, "epoch": 0.2961627607519959, "percentage": 33.82, "elapsed_time": "2:53:53", "remaining_time": "5:40:12", "throughput": 1156.99, "total_tokens": 12071088}
{"current_steps": 1155, "total_steps": 3400, "loss": 0.4192, "lr": 7.875623046909544e-05, "epoch": 0.29745042492917845, "percentage": 33.97, "elapsed_time": "2:55:26", "remaining_time": "5:41:00", "throughput": 1151.57, "total_tokens": 12122128}
{"current_steps": 1160, "total_steps": 3400, "loss": 0.433, "lr": 7.855697251155967e-05, "epoch": 0.29873808910636107, "percentage": 34.12, "elapsed_time": "2:56:53", "remaining_time": "5:41:34", "throughput": 1147.09, "total_tokens": 12174288}
{"current_steps": 1165, "total_steps": 3400, "loss": 0.4817, "lr": 7.835703917741212e-05, "epoch": 0.30002575328354364, "percentage": 34.26, "elapsed_time": "2:58:21", "remaining_time": "5:42:09", "throughput": 1142.59, "total_tokens": 12227008}
{"current_steps": 1170, "total_steps": 3400, "loss": 0.485, "lr": 7.81564351951057e-05, "epoch": 0.30131341746072626, "percentage": 34.41, "elapsed_time": "2:59:48", "remaining_time": "5:42:41", "throughput": 1138.31, "total_tokens": 12280168}
{"current_steps": 1175, "total_steps": 3400, "loss": 0.4532, "lr": 7.795516530895414e-05, "epoch": 0.30260108163790883, "percentage": 34.56, "elapsed_time": "3:01:15", "remaining_time": "5:43:13", "throughput": 1134.03, "total_tokens": 12333072}
{"current_steps": 1180, "total_steps": 3400, "loss": 0.4643, "lr": 7.775323427901993e-05, "epoch": 0.3038887458150914, "percentage": 34.71, "elapsed_time": "3:02:43", "remaining_time": "5:43:46", "throughput": 1129.73, "total_tokens": 12386208}
{"current_steps": 1185, "total_steps": 3400, "loss": 0.4577, "lr": 7.755064688100171e-05, "epoch": 0.305176409992274, "percentage": 34.85, "elapsed_time": "3:04:11", "remaining_time": "5:44:16", "throughput": 1125.62, "total_tokens": 12439304}
{"current_steps": 1190, "total_steps": 3400, "loss": 0.4666, "lr": 7.734740790612136e-05, "epoch": 0.3064640741694566, "percentage": 35.0, "elapsed_time": "3:05:39", "remaining_time": "5:44:48", "throughput": 1121.31, "total_tokens": 12491360}
{"current_steps": 1195, "total_steps": 3400, "loss": 0.407, "lr": 7.714352216101055e-05, "epoch": 0.3077517383466392, "percentage": 35.15, "elapsed_time": "3:07:07", "remaining_time": "5:45:17", "throughput": 1117.27, "total_tokens": 12544264}
{"current_steps": 1200, "total_steps": 3400, "loss": 0.454, "lr": 7.693899446759727e-05, "epoch": 0.3090394025238218, "percentage": 35.29, "elapsed_time": "3:08:36", "remaining_time": "5:45:47", "throughput": 1113.05, "total_tokens": 12596208}
{"current_steps": 1200, "total_steps": 3400, "eval_loss": 0.49250805377960205, "epoch": 0.3090394025238218, "percentage": 35.29, "elapsed_time": "3:09:15", "remaining_time": "5:46:58", "throughput": 1109.25, "total_tokens": 12596208}
{"current_steps": 1205, "total_steps": 3400, "loss": 0.5226, "lr": 7.673382966299163e-05, "epoch": 0.31032706670100435, "percentage": 35.44, "elapsed_time": "3:10:48", "remaining_time": "5:47:33", "throughput": 1104.87, "total_tokens": 12648936}
{"current_steps": 1210, "total_steps": 3400, "loss": 0.4757, "lr": 7.65280325993715e-05, "epoch": 0.311614730878187, "percentage": 35.59, "elapsed_time": "3:12:16", "remaining_time": "5:48:00", "throughput": 1101.05, "total_tokens": 12702432}
{"current_steps": 1215, "total_steps": 3400, "loss": 0.451, "lr": 7.63216081438678e-05, "epoch": 0.31290239505536954, "percentage": 35.74, "elapsed_time": "3:13:43", "remaining_time": "5:48:23", "throughput": 1097.34, "total_tokens": 12755128}
{"current_steps": 1220, "total_steps": 3400, "loss": 0.4155, "lr": 7.611456117844934e-05, "epoch": 0.31419005923255217, "percentage": 35.88, "elapsed_time": "3:15:12", "remaining_time": "5:48:49", "throughput": 1093.54, "total_tokens": 12808152}
{"current_steps": 1225, "total_steps": 3400, "loss": 0.4094, "lr": 7.59068965998074e-05, "epoch": 0.31547772340973473, "percentage": 36.03, "elapsed_time": "3:16:39", "remaining_time": "5:49:10", "throughput": 1090.0, "total_tokens": 12861592}
{"current_steps": 1230, "total_steps": 3400, "loss": 0.4663, "lr": 7.569861931923989e-05, "epoch": 0.31676538758691736, "percentage": 36.18, "elapsed_time": "3:18:08", "remaining_time": "5:49:33", "throughput": 1086.31, "total_tokens": 12914240}
{"current_steps": 1235, "total_steps": 3400, "loss": 0.468, "lr": 7.548973426253521e-05, "epoch": 0.3180530517640999, "percentage": 36.32, "elapsed_time": "3:19:35", "remaining_time": "5:49:52", "throughput": 1082.86, "total_tokens": 12967472}
{"current_steps": 1240, "total_steps": 3400, "loss": 0.4744, "lr": 7.528024636985575e-05, "epoch": 0.3193407159412825, "percentage": 36.47, "elapsed_time": "3:21:04", "remaining_time": "5:50:15", "throughput": 1079.25, "total_tokens": 13020232}
{"current_steps": 1245, "total_steps": 3400, "loss": 0.4269, "lr": 7.507016059562107e-05, "epoch": 0.3206283801184651, "percentage": 36.62, "elapsed_time": "3:22:31", "remaining_time": "5:50:33", "throughput": 1075.84, "total_tokens": 13073032}
{"current_steps": 1250, "total_steps": 3400, "loss": 0.4725, "lr": 7.485948190839077e-05, "epoch": 0.3219160442956477, "percentage": 36.76, "elapsed_time": "3:24:00", "remaining_time": "5:50:52", "throughput": 1072.35, "total_tokens": 13125624}
{"current_steps": 1250, "total_steps": 3400, "eval_loss": 0.4339977502822876, "epoch": 0.3219160442956477, "percentage": 36.76, "elapsed_time": "3:24:39", "remaining_time": "5:52:00", "throughput": 1068.93, "total_tokens": 13125624}
{"current_steps": 1255, "total_steps": 3400, "loss": 0.4196, "lr": 7.464821529074679e-05, "epoch": 0.3232037084728303, "percentage": 36.91, "elapsed_time": "3:26:12", "remaining_time": "5:52:25", "throughput": 1065.2, "total_tokens": 13178656}
{"current_steps": 1260, "total_steps": 3400, "loss": 0.4349, "lr": 7.443636573917585e-05, "epoch": 0.3244913726500129, "percentage": 37.06, "elapsed_time": "3:27:40", "remaining_time": "5:52:42", "throughput": 1061.89, "total_tokens": 13231224}
{"current_steps": 1265, "total_steps": 3400, "loss": 0.4726, "lr": 7.422393826395108e-05, "epoch": 0.32577903682719545, "percentage": 37.21, "elapsed_time": "3:29:07", "remaining_time": "5:52:57", "throughput": 1058.62, "total_tokens": 13283208}
{"current_steps": 1270, "total_steps": 3400, "loss": 0.4604, "lr": 7.40109378890136e-05, "epoch": 0.32706670100437807, "percentage": 37.35, "elapsed_time": "3:30:35", "remaining_time": "5:53:11", "throughput": 1055.43, "total_tokens": 13335808}
{"current_steps": 1275, "total_steps": 3400, "loss": 0.4606, "lr": 7.379736965185368e-05, "epoch": 0.32835436518156064, "percentage": 37.5, "elapsed_time": "3:32:03", "remaining_time": "5:53:26", "throughput": 1052.3, "total_tokens": 13389112}
{"current_steps": 1280, "total_steps": 3400, "loss": 0.4487, "lr": 7.358323860339165e-05, "epoch": 0.32964202935874326, "percentage": 37.65, "elapsed_time": "3:33:30", "remaining_time": "5:53:36", "throughput": 1049.31, "total_tokens": 13441816}
{"current_steps": 1285, "total_steps": 3400, "loss": 0.422, "lr": 7.336854980785839e-05, "epoch": 0.33092969353592583, "percentage": 37.79, "elapsed_time": "3:34:58", "remaining_time": "5:53:49", "throughput": 1046.13, "total_tokens": 13493592}
{"current_steps": 1290, "total_steps": 3400, "loss": 0.5397, "lr": 7.315330834267553e-05, "epoch": 0.3322173577131084, "percentage": 37.94, "elapsed_time": "3:36:25", "remaining_time": "5:53:59", "throughput": 1043.14, "total_tokens": 13545696}
{"current_steps": 1295, "total_steps": 3400, "loss": 0.5022, "lr": 7.293751929833553e-05, "epoch": 0.333505021890291, "percentage": 38.09, "elapsed_time": "3:37:53", "remaining_time": "5:54:11", "throughput": 1040.05, "total_tokens": 13597560}
{"current_steps": 1300, "total_steps": 3400, "loss": 0.4794, "lr": 7.272118777828108e-05, "epoch": 0.3347926860674736, "percentage": 38.24, "elapsed_time": "3:39:20", "remaining_time": "5:54:19", "throughput": 1037.23, "total_tokens": 13650264}
{"current_steps": 1300, "total_steps": 3400, "eval_loss": 0.4991846978664398, "epoch": 0.3347926860674736, "percentage": 38.24, "elapsed_time": "3:39:58", "remaining_time": "5:55:20", "throughput": 1034.22, "total_tokens": 13650264}
{"current_steps": 1305, "total_steps": 3400, "loss": 0.4971, "lr": 7.250431889878455e-05, "epoch": 0.3360803502446562, "percentage": 38.38, "elapsed_time": "3:41:32", "remaining_time": "5:55:39", "throughput": 1030.84, "total_tokens": 13702584}
{"current_steps": 1310, "total_steps": 3400, "loss": 0.4574, "lr": 7.228691778882693e-05, "epoch": 0.3373680144218388, "percentage": 38.53, "elapsed_time": "3:42:59", "remaining_time": "5:55:45", "throughput": 1028.08, "total_tokens": 13755024}
{"current_steps": 1315, "total_steps": 3400, "loss": 0.4463, "lr": 7.20689895899765e-05, "epoch": 0.33865567859902135, "percentage": 38.68, "elapsed_time": "3:44:28", "remaining_time": "5:55:54", "throughput": 1025.21, "total_tokens": 13807528}
{"current_steps": 1320, "total_steps": 3400, "loss": 0.4549, "lr": 7.185053945626733e-05, "epoch": 0.33994334277620397, "percentage": 38.82, "elapsed_time": "3:45:54", "remaining_time": "5:55:59", "throughput": 1022.48, "total_tokens": 13859760}
{"current_steps": 1325, "total_steps": 3400, "loss": 0.4073, "lr": 7.163157255407732e-05, "epoch": 0.34123100695338654, "percentage": 38.97, "elapsed_time": "3:47:23", "remaining_time": "5:56:06", "throughput": 1019.66, "total_tokens": 13911656}
{"current_steps": 1330, "total_steps": 3400, "loss": 0.433, "lr": 7.141209406200599e-05, "epoch": 0.34251867113056916, "percentage": 39.12, "elapsed_time": "3:48:50", "remaining_time": "5:56:10", "throughput": 1016.99, "total_tokens": 13963816}
{"current_steps": 1335, "total_steps": 3400, "loss": 0.4244, "lr": 7.1192109170752e-05, "epoch": 0.34380633530775173, "percentage": 39.26, "elapsed_time": "3:50:19", "remaining_time": "5:56:15", "throughput": 1014.25, "total_tokens": 14016256}
{"current_steps": 1340, "total_steps": 3400, "loss": 0.4448, "lr": 7.097162308299054e-05, "epoch": 0.34509399948493436, "percentage": 39.41, "elapsed_time": "3:51:46", "remaining_time": "5:56:18", "throughput": 1011.67, "total_tokens": 14068768}
{"current_steps": 1345, "total_steps": 3400, "loss": 0.4608, "lr": 7.07506410132501e-05, "epoch": 0.3463816636621169, "percentage": 39.56, "elapsed_time": "3:53:14", "remaining_time": "5:56:22", "throughput": 1009.05, "total_tokens": 14121272}
{"current_steps": 1350, "total_steps": 3400, "loss": 0.3994, "lr": 7.052916818778918e-05, "epoch": 0.3476693278392995, "percentage": 39.71, "elapsed_time": "3:54:42", "remaining_time": "5:56:24", "throughput": 1006.46, "total_tokens": 14173240}
{"current_steps": 1350, "total_steps": 3400, "eval_loss": 0.460835725069046, "epoch": 0.3476693278392995, "percentage": 39.71, "elapsed_time": "3:55:20", "remaining_time": "5:57:22", "throughput": 1003.72, "total_tokens": 14173240}
{"current_steps": 1355, "total_steps": 3400, "loss": 0.41, "lr": 7.030720984447279e-05, "epoch": 0.3489569920164821, "percentage": 39.85, "elapsed_time": "3:56:54", "remaining_time": "5:57:33", "throughput": 1000.79, "total_tokens": 14226032}
{"current_steps": 1360, "total_steps": 3400, "loss": 0.3751, "lr": 7.008477123264848e-05, "epoch": 0.3502446561936647, "percentage": 40.0, "elapsed_time": "3:58:22", "remaining_time": "5:57:33", "throughput": 998.33, "total_tokens": 14278128}
{"current_steps": 1365, "total_steps": 3400, "loss": 0.4814, "lr": 6.986185761302224e-05, "epoch": 0.3515323203708473, "percentage": 40.15, "elapsed_time": "3:59:49", "remaining_time": "5:57:32", "throughput": 995.92, "total_tokens": 14330624}
{"current_steps": 1370, "total_steps": 3400, "loss": 0.5007, "lr": 6.963847425753403e-05, "epoch": 0.3528199845480299, "percentage": 40.29, "elapsed_time": "4:01:16", "remaining_time": "5:57:31", "throughput": 993.47, "total_tokens": 14382416}
{"current_steps": 1375, "total_steps": 3400, "loss": 0.4335, "lr": 6.941462644923318e-05, "epoch": 0.35410764872521244, "percentage": 40.44, "elapsed_time": "4:02:44", "remaining_time": "5:57:29", "throughput": 991.11, "total_tokens": 14434896}
{"current_steps": 1380, "total_steps": 3400, "loss": 0.4427, "lr": 6.919031948215335e-05, "epoch": 0.35539531290239507, "percentage": 40.59, "elapsed_time": "4:04:12", "remaining_time": "5:57:28", "throughput": 988.7, "total_tokens": 14487152}
{"current_steps": 1385, "total_steps": 3400, "loss": 0.42, "lr": 6.896555866118741e-05, "epoch": 0.35668297707957763, "percentage": 40.74, "elapsed_time": "4:05:39", "remaining_time": "5:57:24", "throughput": 986.43, "total_tokens": 14539608}
{"current_steps": 1390, "total_steps": 3400, "loss": 0.4573, "lr": 6.87403493019619e-05, "epoch": 0.35797064125676026, "percentage": 40.88, "elapsed_time": "4:07:08", "remaining_time": "5:57:22", "throughput": 984.08, "total_tokens": 14592168}
{"current_steps": 1395, "total_steps": 3400, "loss": 0.4341, "lr": 6.851469673071143e-05, "epoch": 0.3592583054339428, "percentage": 41.03, "elapsed_time": "4:08:35", "remaining_time": "5:57:17", "throughput": 981.81, "total_tokens": 14643920}
{"current_steps": 1400, "total_steps": 3400, "loss": 0.437, "lr": 6.828860628415253e-05, "epoch": 0.3605459696111254, "percentage": 41.18, "elapsed_time": "4:10:03", "remaining_time": "5:57:13", "throughput": 979.58, "total_tokens": 14697136}
{"current_steps": 1400, "total_steps": 3400, "eval_loss": 0.46620962023735046, "epoch": 0.3605459696111254, "percentage": 41.18, "elapsed_time": "4:10:41", "remaining_time": "5:58:08", "throughput": 977.08, "total_tokens": 14697136}
{"current_steps": 1405, "total_steps": 3400, "loss": 0.4377, "lr": 6.806208330935766e-05, "epoch": 0.361833633788308, "percentage": 41.32, "elapsed_time": "4:12:14", "remaining_time": "5:58:09", "throughput": 974.56, "total_tokens": 14749168}
{"current_steps": 1410, "total_steps": 3400, "loss": 0.412, "lr": 6.783513316362855e-05, "epoch": 0.3631212979654906, "percentage": 41.47, "elapsed_time": "4:13:42", "remaining_time": "5:58:04", "throughput": 972.35, "total_tokens": 14801568}
{"current_steps": 1415, "total_steps": 3400, "loss": 0.4441, "lr": 6.760776121436962e-05, "epoch": 0.3644089621426732, "percentage": 41.62, "elapsed_time": "4:15:09", "remaining_time": "5:57:56", "throughput": 970.22, "total_tokens": 14853384}
{"current_steps": 1420, "total_steps": 3400, "loss": 0.4576, "lr": 6.737997283896103e-05, "epoch": 0.3656966263198558, "percentage": 41.76, "elapsed_time": "4:16:37", "remaining_time": "5:57:50", "throughput": 968.1, "total_tokens": 14906632}
{"current_steps": 1425, "total_steps": 3400, "loss": 0.3853, "lr": 6.715177342463145e-05, "epoch": 0.36698429049703835, "percentage": 41.91, "elapsed_time": "4:18:04", "remaining_time": "5:57:41", "throughput": 966.07, "total_tokens": 14959240}
{"current_steps": 1430, "total_steps": 3400, "loss": 0.3755, "lr": 6.692316836833065e-05, "epoch": 0.36827195467422097, "percentage": 42.06, "elapsed_time": "4:19:32", "remaining_time": "5:57:33", "throughput": 964.01, "total_tokens": 15012256}
{"current_steps": 1435, "total_steps": 3400, "loss": 0.5384, "lr": 6.6694163076602e-05, "epoch": 0.36955961885140354, "percentage": 42.21, "elapsed_time": "4:21:00", "remaining_time": "5:57:24", "throughput": 961.97, "total_tokens": 15064664}
{"current_steps": 1440, "total_steps": 3400, "loss": 0.4377, "lr": 6.646476296545434e-05, "epoch": 0.37084728302858616, "percentage": 42.35, "elapsed_time": "4:22:28", "remaining_time": "5:57:15", "throughput": 959.94, "total_tokens": 15117384}
{"current_steps": 1445, "total_steps": 3400, "loss": 0.3876, "lr": 6.623497346023418e-05, "epoch": 0.37213494720576873, "percentage": 42.5, "elapsed_time": "4:23:55", "remaining_time": "5:57:05", "throughput": 957.94, "total_tokens": 15169880}
{"current_steps": 1450, "total_steps": 3400, "loss": 0.4065, "lr": 6.60047999954972e-05, "epoch": 0.37342261138295135, "percentage": 42.65, "elapsed_time": "4:25:23", "remaining_time": "5:56:54", "throughput": 955.96, "total_tokens": 15222568}
{"current_steps": 1450, "total_steps": 3400, "eval_loss": 0.4395444095134735, "epoch": 0.37342261138295135, "percentage": 42.65, "elapsed_time": "4:26:02", "remaining_time": "5:57:46", "throughput": 953.66, "total_tokens": 15222568}
{"current_steps": 1455, "total_steps": 3400, "loss": 0.4231, "lr": 6.57742480148798e-05, "epoch": 0.3747102755601339, "percentage": 42.79, "elapsed_time": "4:27:34", "remaining_time": "5:57:41", "throughput": 951.44, "total_tokens": 15275288}
{"current_steps": 1460, "total_steps": 3400, "loss": 0.4301, "lr": 6.554332297097031e-05, "epoch": 0.3759979397373165, "percentage": 42.94, "elapsed_time": "4:29:02", "remaining_time": "5:57:29", "throughput": 949.54, "total_tokens": 15328072}
{"current_steps": 1465, "total_steps": 3400, "loss": 0.446, "lr": 6.53120303251801e-05, "epoch": 0.3772856039144991, "percentage": 43.09, "elapsed_time": "4:30:30", "remaining_time": "5:57:17", "throughput": 947.56, "total_tokens": 15379120}
{"current_steps": 1470, "total_steps": 3400, "loss": 0.3764, "lr": 6.508037554761432e-05, "epoch": 0.3785732680916817, "percentage": 43.24, "elapsed_time": "4:31:57", "remaining_time": "5:57:04", "throughput": 945.66, "total_tokens": 15431104}
{"current_steps": 1475, "total_steps": 3400, "loss": 0.4423, "lr": 6.484836411694267e-05, "epoch": 0.3798609322688643, "percentage": 43.38, "elapsed_time": "4:33:25", "remaining_time": "5:56:50", "throughput": 943.75, "total_tokens": 15482816}
{"current_steps": 1480, "total_steps": 3400, "loss": 0.4439, "lr": 6.461600152026965e-05, "epoch": 0.3811485964460469, "percentage": 43.53, "elapsed_time": "4:34:52", "remaining_time": "5:56:35", "throughput": 941.94, "total_tokens": 15534896}
{"current_steps": 1485, "total_steps": 3400, "loss": 0.4408, "lr": 6.438329325300499e-05, "epoch": 0.38243626062322944, "percentage": 43.68, "elapsed_time": "4:36:20", "remaining_time": "5:56:22", "throughput": 940.09, "total_tokens": 15587496}
{"current_steps": 1490, "total_steps": 3400, "loss": 0.4086, "lr": 6.415024481873352e-05, "epoch": 0.38372392480041206, "percentage": 43.82, "elapsed_time": "4:37:47", "remaining_time": "5:56:05", "throughput": 938.33, "total_tokens": 15639672}
{"current_steps": 1495, "total_steps": 3400, "loss": 0.4489, "lr": 6.391686172908506e-05, "epoch": 0.38501158897759463, "percentage": 43.97, "elapsed_time": "4:39:16", "remaining_time": "5:55:51", "throughput": 936.55, "total_tokens": 15693120}
{"current_steps": 1500, "total_steps": 3400, "loss": 0.4338, "lr": 6.368314950360415e-05, "epoch": 0.38629925315477726, "percentage": 44.12, "elapsed_time": "4:40:42", "remaining_time": "5:55:34", "throughput": 934.81, "total_tokens": 15744848}
{"current_steps": 1500, "total_steps": 3400, "eval_loss": 0.45475366711616516, "epoch": 0.38629925315477726, "percentage": 44.12, "elapsed_time": "4:41:21", "remaining_time": "5:56:22", "throughput": 932.68, "total_tokens": 15744848}
{"current_steps": 1505, "total_steps": 3400, "loss": 0.4558, "lr": 6.344911366961934e-05, "epoch": 0.3875869173319598, "percentage": 44.26, "elapsed_time": "4:42:55", "remaining_time": "5:56:14", "throughput": 930.61, "total_tokens": 15797632}
{"current_steps": 1510, "total_steps": 3400, "loss": 0.4518, "lr": 6.321475976211266e-05, "epoch": 0.3888745815091424, "percentage": 44.41, "elapsed_time": "4:44:22", "remaining_time": "5:55:56", "throughput": 928.95, "total_tokens": 15850040}
{"current_steps": 1515, "total_steps": 3400, "loss": 0.4092, "lr": 6.298009332358856e-05, "epoch": 0.390162245686325, "percentage": 44.56, "elapsed_time": "4:45:50", "remaining_time": "5:55:39", "throughput": 927.21, "total_tokens": 15902496}
{"current_steps": 1520, "total_steps": 3400, "loss": 0.478, "lr": 6.274511990394294e-05, "epoch": 0.3914499098635076, "percentage": 44.71, "elapsed_time": "4:47:17", "remaining_time": "5:55:20", "throughput": 925.59, "total_tokens": 15954936}
{"current_steps": 1525, "total_steps": 3400, "loss": 0.4294, "lr": 6.250984506033183e-05, "epoch": 0.3927375740406902, "percentage": 44.85, "elapsed_time": "4:48:45", "remaining_time": "5:55:01", "throughput": 923.94, "total_tokens": 16007624}
{"current_steps": 1530, "total_steps": 3400, "loss": 0.3846, "lr": 6.227427435703997e-05, "epoch": 0.3940252382178728, "percentage": 45.0, "elapsed_time": "4:50:14", "remaining_time": "5:54:44", "throughput": 922.17, "total_tokens": 16059440}
{"current_steps": 1535, "total_steps": 3400, "loss": 0.4372, "lr": 6.203841336534924e-05, "epoch": 0.39531290239505534, "percentage": 45.15, "elapsed_time": "4:51:44", "remaining_time": "5:54:27", "throughput": 920.42, "total_tokens": 16111136}
{"current_steps": 1540, "total_steps": 3400, "loss": 0.484, "lr": 6.180226766340688e-05, "epoch": 0.39660056657223797, "percentage": 45.29, "elapsed_time": "4:53:13", "remaining_time": "5:54:09", "throughput": 918.74, "total_tokens": 16163976}
{"current_steps": 1545, "total_steps": 3400, "loss": 0.3965, "lr": 6.156584283609359e-05, "epoch": 0.39788823074942054, "percentage": 45.44, "elapsed_time": "4:54:43", "remaining_time": "5:53:51", "throughput": 917.09, "total_tokens": 16217192}
{"current_steps": 1550, "total_steps": 3400, "loss": 0.3872, "lr": 6.132914447489137e-05, "epoch": 0.39917589492660316, "percentage": 45.59, "elapsed_time": "4:56:11", "remaining_time": "5:53:30", "throughput": 915.52, "total_tokens": 16269896}
{"current_steps": 1550, "total_steps": 3400, "eval_loss": 0.4416767656803131, "epoch": 0.39917589492660316, "percentage": 45.59, "elapsed_time": "4:56:49", "remaining_time": "5:54:16", "throughput": 913.54, "total_tokens": 16269896}
{"current_steps": 1555, "total_steps": 3400, "loss": 0.4593, "lr": 6.109217817775139e-05, "epoch": 0.4004635591037857, "percentage": 45.74, "elapsed_time": "4:58:22", "remaining_time": "5:54:01", "throughput": 911.73, "total_tokens": 16322496}
{"current_steps": 1560, "total_steps": 3400, "loss": 0.4865, "lr": 6.085494954896156e-05, "epoch": 0.40175122328096835, "percentage": 45.88, "elapsed_time": "4:59:50", "remaining_time": "5:53:39", "throughput": 910.22, "total_tokens": 16375320}
{"current_steps": 1565, "total_steps": 3400, "loss": 0.4422, "lr": 6.061746419901388e-05, "epoch": 0.4030388874581509, "percentage": 46.03, "elapsed_time": "5:01:17", "remaining_time": "5:53:16", "throughput": 908.76, "total_tokens": 16428096}
{"current_steps": 1570, "total_steps": 3400, "loss": 0.3538, "lr": 6.0379727744471936e-05, "epoch": 0.4043265516353335, "percentage": 46.18, "elapsed_time": "5:02:45", "remaining_time": "5:52:53", "throughput": 907.26, "total_tokens": 16480832}
{"current_steps": 1575, "total_steps": 3400, "loss": 0.3923, "lr": 6.014174580783794e-05, "epoch": 0.4056142158125161, "percentage": 46.32, "elapsed_time": "5:04:12", "remaining_time": "5:52:29", "throughput": 905.86, "total_tokens": 16534016}
{"current_steps": 1580, "total_steps": 3400, "loss": 0.3967, "lr": 5.990352401741981e-05, "epoch": 0.4069018799896987, "percentage": 46.47, "elapsed_time": "5:05:40", "remaining_time": "5:52:06", "throughput": 904.34, "total_tokens": 16586216}
{"current_steps": 1585, "total_steps": 3400, "loss": 0.4212, "lr": 5.9665068007197976e-05, "epoch": 0.4081895441668813, "percentage": 46.62, "elapsed_time": "5:07:07", "remaining_time": "5:51:41", "throughput": 902.97, "total_tokens": 16639312}
{"current_steps": 1590, "total_steps": 3400, "loss": 0.3489, "lr": 5.94263834166923e-05, "epoch": 0.40947720834406387, "percentage": 46.76, "elapsed_time": "5:08:35", "remaining_time": "5:51:17", "throughput": 901.53, "total_tokens": 16692328}
{"current_steps": 1595, "total_steps": 3400, "loss": 0.4105, "lr": 5.918747589082853e-05, "epoch": 0.41076487252124644, "percentage": 46.91, "elapsed_time": "5:10:02", "remaining_time": "5:50:51", "throughput": 900.15, "total_tokens": 16745088}
{"current_steps": 1600, "total_steps": 3400, "loss": 0.3914, "lr": 5.8948351079804875e-05, "epoch": 0.41205253669842906, "percentage": 47.06, "elapsed_time": "5:11:30", "remaining_time": "5:50:26", "throughput": 898.78, "total_tokens": 16798768}
{"current_steps": 1600, "total_steps": 3400, "eval_loss": 0.4657597243785858, "epoch": 0.41205253669842906, "percentage": 47.06, "elapsed_time": "5:12:08", "remaining_time": "5:51:10", "throughput": 896.94, "total_tokens": 16798768}
{"current_steps": 1605, "total_steps": 3400, "loss": 0.3731, "lr": 5.8709014638958404e-05, "epoch": 0.41334020087561163, "percentage": 47.21, "elapsed_time": "5:13:40", "remaining_time": "5:50:48", "throughput": 895.35, "total_tokens": 16851408}
{"current_steps": 1610, "total_steps": 3400, "loss": 0.4099, "lr": 5.846947222863123e-05, "epoch": 0.41462786505279425, "percentage": 47.35, "elapsed_time": "5:15:09", "remaining_time": "5:50:23", "throughput": 893.91, "total_tokens": 16903136}
{"current_steps": 1615, "total_steps": 3400, "loss": 0.4136, "lr": 5.8229729514036705e-05, "epoch": 0.4159155292299768, "percentage": 47.5, "elapsed_time": "5:16:35", "remaining_time": "5:49:55", "throughput": 892.59, "total_tokens": 16955528}
{"current_steps": 1620, "total_steps": 3400, "loss": 0.3818, "lr": 5.7989792165125356e-05, "epoch": 0.4172031934071594, "percentage": 47.65, "elapsed_time": "5:18:03", "remaining_time": "5:49:28", "throughput": 891.24, "total_tokens": 17008032}
{"current_steps": 1625, "total_steps": 3400, "loss": 0.4303, "lr": 5.774966585645092e-05, "epoch": 0.418490857584342, "percentage": 47.79, "elapsed_time": "5:19:30", "remaining_time": "5:49:00", "throughput": 889.94, "total_tokens": 17060488}
{"current_steps": 1630, "total_steps": 3400, "loss": 0.3673, "lr": 5.7509356267035975e-05, "epoch": 0.4197785217615246, "percentage": 47.94, "elapsed_time": "5:20:58", "remaining_time": "5:48:32", "throughput": 888.59, "total_tokens": 17112408}
{"current_steps": 1635, "total_steps": 3400, "loss": 0.4149, "lr": 5.726886908023776e-05, "epoch": 0.4210661859387072, "percentage": 48.09, "elapsed_time": "5:22:24", "remaining_time": "5:48:02", "throughput": 887.3, "total_tokens": 17164664}
{"current_steps": 1640, "total_steps": 3400, "loss": 0.4613, "lr": 5.702820998361373e-05, "epoch": 0.4223538501158898, "percentage": 48.24, "elapsed_time": "5:23:52", "remaining_time": "5:47:34", "throughput": 886.0, "total_tokens": 17217232}
{"current_steps": 1645, "total_steps": 3400, "loss": 0.372, "lr": 5.6787384668786994e-05, "epoch": 0.42364151429307234, "percentage": 48.38, "elapsed_time": "5:25:19", "remaining_time": "5:47:04", "throughput": 884.74, "total_tokens": 17269344}
{"current_steps": 1650, "total_steps": 3400, "loss": 0.3755, "lr": 5.654639883131178e-05, "epoch": 0.42492917847025496, "percentage": 48.53, "elapsed_time": "5:26:46", "remaining_time": "5:46:34", "throughput": 883.55, "total_tokens": 17323232}
{"current_steps": 1650, "total_steps": 3400, "eval_loss": 0.4726848006248474, "epoch": 0.42492917847025496, "percentage": 48.53, "elapsed_time": "5:27:24", "remaining_time": "5:47:15", "throughput": 881.82, "total_tokens": 17323232}
{"current_steps": 1655, "total_steps": 3400, "loss": 0.3972, "lr": 5.6305258170538676e-05, "epoch": 0.42621684264743753, "percentage": 48.68, "elapsed_time": "5:28:56", "remaining_time": "5:46:50", "throughput": 880.35, "total_tokens": 17375432}
{"current_steps": 1660, "total_steps": 3400, "loss": 0.3988, "lr": 5.606396838947988e-05, "epoch": 0.42750450682462016, "percentage": 48.82, "elapsed_time": "5:30:23", "remaining_time": "5:46:19", "throughput": 879.13, "total_tokens": 17427832}
{"current_steps": 1665, "total_steps": 3400, "loss": 0.4247, "lr": 5.582253519467432e-05, "epoch": 0.4287921710018027, "percentage": 48.97, "elapsed_time": "5:31:51", "remaining_time": "5:45:48", "throughput": 877.87, "total_tokens": 17480056}
{"current_steps": 1670, "total_steps": 3400, "loss": 0.386, "lr": 5.558096429605263e-05, "epoch": 0.43007983517898535, "percentage": 49.12, "elapsed_time": "5:33:18", "remaining_time": "5:45:16", "throughput": 876.74, "total_tokens": 17533192}
{"current_steps": 1675, "total_steps": 3400, "loss": 0.4487, "lr": 5.533926140680221e-05, "epoch": 0.4313674993561679, "percentage": 49.26, "elapsed_time": "5:34:46", "remaining_time": "5:44:45", "throughput": 875.48, "total_tokens": 17585000}
{"current_steps": 1680, "total_steps": 3400, "loss": 0.3878, "lr": 5.509743224323203e-05, "epoch": 0.4326551635333505, "percentage": 49.41, "elapsed_time": "5:36:14", "remaining_time": "5:44:15", "throughput": 874.26, "total_tokens": 17638152}
{"current_steps": 1685, "total_steps": 3400, "loss": 0.3333, "lr": 5.485548252463749e-05, "epoch": 0.4339428277105331, "percentage": 49.56, "elapsed_time": "5:37:47", "remaining_time": "5:43:48", "throughput": 872.86, "total_tokens": 17690656}
{"current_steps": 1690, "total_steps": 3400, "loss": 0.464, "lr": 5.4613417973165106e-05, "epoch": 0.4352304918877157, "percentage": 49.71, "elapsed_time": "5:39:19", "remaining_time": "5:43:20", "throughput": 871.45, "total_tokens": 17742112}
{"current_steps": 1695, "total_steps": 3400, "loss": 0.4374, "lr": 5.4371244313677225e-05, "epoch": 0.4365181560648983, "percentage": 49.85, "elapsed_time": "5:40:50", "remaining_time": "5:42:50", "throughput": 870.12, "total_tokens": 17793968}
{"current_steps": 1700, "total_steps": 3400, "loss": 0.3796, "lr": 5.4128967273616625e-05, "epoch": 0.43780582024208087, "percentage": 50.0, "elapsed_time": "5:42:16", "remaining_time": "5:42:16", "throughput": 868.97, "total_tokens": 17845600}
{"current_steps": 1700, "total_steps": 3400, "eval_loss": 0.4555380642414093, "epoch": 0.43780582024208087, "percentage": 50.0, "elapsed_time": "5:42:54", "remaining_time": "5:42:54", "throughput": 867.36, "total_tokens": 17845600}
{"current_steps": 1705, "total_steps": 3400, "loss": 0.4066, "lr": 5.388659258287102e-05, "epoch": 0.43909348441926344, "percentage": 50.15, "elapsed_time": "5:44:27", "remaining_time": "5:42:26", "throughput": 865.98, "total_tokens": 17897920}
{"current_steps": 1710, "total_steps": 3400, "loss": 0.3599, "lr": 5.364412597363759e-05, "epoch": 0.44038114859644606, "percentage": 50.29, "elapsed_time": "5:45:59", "remaining_time": "5:41:57", "throughput": 864.69, "total_tokens": 17950920}
{"current_steps": 1715, "total_steps": 3400, "loss": 0.3681, "lr": 5.3401573180287426e-05, "epoch": 0.4416688127736286, "percentage": 50.44, "elapsed_time": "5:47:32", "remaining_time": "5:41:27", "throughput": 863.38, "total_tokens": 18003280}
{"current_steps": 1720, "total_steps": 3400, "loss": 0.4005, "lr": 5.315893993922986e-05, "epoch": 0.44295647695081125, "percentage": 50.59, "elapsed_time": "5:49:01", "remaining_time": "5:40:54", "throughput": 862.21, "total_tokens": 18056296}
{"current_steps": 1725, "total_steps": 3400, "loss": 0.3513, "lr": 5.29162319887768e-05, "epoch": 0.4442441411279938, "percentage": 50.74, "elapsed_time": "5:50:30", "remaining_time": "5:40:21", "throughput": 861.06, "total_tokens": 18108904}
{"current_steps": 1730, "total_steps": 3400, "loss": 0.373, "lr": 5.26734550690071e-05, "epoch": 0.4455318053051764, "percentage": 50.88, "elapsed_time": "5:51:59", "remaining_time": "5:39:46", "throughput": 859.91, "total_tokens": 18160696}
{"current_steps": 1735, "total_steps": 3400, "loss": 0.4246, "lr": 5.243061492163073e-05, "epoch": 0.446819469482359, "percentage": 51.03, "elapsed_time": "5:53:29", "remaining_time": "5:39:13", "throughput": 858.77, "total_tokens": 18213760}
{"current_steps": 1740, "total_steps": 3400, "loss": 0.3703, "lr": 5.2187717289852955e-05, "epoch": 0.4481071336595416, "percentage": 51.18, "elapsed_time": "5:54:57", "remaining_time": "5:38:37", "throughput": 857.7, "total_tokens": 18266424}
{"current_steps": 1745, "total_steps": 3400, "loss": 0.3763, "lr": 5.1944767918238624e-05, "epoch": 0.4493947978367242, "percentage": 51.32, "elapsed_time": "5:56:24", "remaining_time": "5:38:01", "throughput": 856.63, "total_tokens": 18318984}
{"current_steps": 1750, "total_steps": 3400, "loss": 0.3767, "lr": 5.170177255257618e-05, "epoch": 0.45068246201390677, "percentage": 51.47, "elapsed_time": "5:57:52", "remaining_time": "5:37:25", "throughput": 855.59, "total_tokens": 18371928}
{"current_steps": 1750, "total_steps": 3400, "eval_loss": 0.4234265685081482, "epoch": 0.45068246201390677, "percentage": 51.47, "elapsed_time": "5:58:31", "remaining_time": "5:38:01", "throughput": 854.07, "total_tokens": 18371928}
{"current_steps": 1755, "total_steps": 3400, "loss": 0.4059, "lr": 5.145873693974188e-05, "epoch": 0.45197012619108934, "percentage": 51.62, "elapsed_time": "6:00:03", "remaining_time": "5:37:29", "throughput": 852.85, "total_tokens": 18424432}
{"current_steps": 1760, "total_steps": 3400, "loss": 0.3709, "lr": 5.12156668275638e-05, "epoch": 0.45325779036827196, "percentage": 51.76, "elapsed_time": "6:01:31", "remaining_time": "5:36:52", "throughput": 851.8, "total_tokens": 18476736}
{"current_steps": 1765, "total_steps": 3400, "loss": 0.4075, "lr": 5.097256796468598e-05, "epoch": 0.45454545454545453, "percentage": 51.91, "elapsed_time": "6:02:59", "remaining_time": "5:36:15", "throughput": 850.79, "total_tokens": 18529552}
{"current_steps": 1770, "total_steps": 3400, "loss": 0.3101, "lr": 5.072944610043232e-05, "epoch": 0.45583311872263715, "percentage": 52.06, "elapsed_time": "6:04:28", "remaining_time": "5:35:38", "throughput": 849.77, "total_tokens": 18583232}
{"current_steps": 1775, "total_steps": 3400, "loss": 0.33, "lr": 5.048630698467081e-05, "epoch": 0.4571207828998197, "percentage": 52.21, "elapsed_time": "6:05:55", "remaining_time": "5:35:00", "throughput": 848.8, "total_tokens": 18636296}
{"current_steps": 1780, "total_steps": 3400, "loss": 0.4204, "lr": 5.024315636767738e-05, "epoch": 0.4584084470770023, "percentage": 52.35, "elapsed_time": "6:07:24", "remaining_time": "5:34:22", "throughput": 847.76, "total_tokens": 18688376}
{"current_steps": 1785, "total_steps": 3400, "loss": 0.4855, "lr": 5e-05, "epoch": 0.4596961112541849, "percentage": 52.5, "elapsed_time": "6:08:51", "remaining_time": "5:33:43", "throughput": 846.83, "total_tokens": 18741192}
{"current_steps": 1790, "total_steps": 3400, "loss": 0.3344, "lr": 4.9756843632322626e-05, "epoch": 0.4609837754313675, "percentage": 52.65, "elapsed_time": "6:10:19", "remaining_time": "5:33:05", "throughput": 845.85, "total_tokens": 18794320}
{"current_steps": 1795, "total_steps": 3400, "loss": 0.3836, "lr": 4.9513693015329197e-05, "epoch": 0.4622714396085501, "percentage": 52.79, "elapsed_time": "6:11:45", "remaining_time": "5:32:24", "throughput": 844.9, "total_tokens": 18846368}
{"current_steps": 1800, "total_steps": 3400, "loss": 0.4484, "lr": 4.9270553899567686e-05, "epoch": 0.4635591037857327, "percentage": 52.94, "elapsed_time": "6:13:14", "remaining_time": "5:31:46", "throughput": 843.91, "total_tokens": 18898888}
{"current_steps": 1800, "total_steps": 3400, "eval_loss": 0.4194311797618866, "epoch": 0.4635591037857327, "percentage": 52.94, "elapsed_time": "6:13:52", "remaining_time": "5:32:20", "throughput": 842.48, "total_tokens": 18898888}
{"current_steps": 1805, "total_steps": 3400, "loss": 0.3301, "lr": 4.902743203531405e-05, "epoch": 0.4648467679629153, "percentage": 53.09, "elapsed_time": "6:15:24", "remaining_time": "5:31:43", "throughput": 841.39, "total_tokens": 18951672}
{"current_steps": 1810, "total_steps": 3400, "loss": 0.3861, "lr": 4.8784333172436206e-05, "epoch": 0.46613443214009787, "percentage": 53.24, "elapsed_time": "6:16:51", "remaining_time": "5:31:03", "throughput": 840.48, "total_tokens": 19005008}
{"current_steps": 1815, "total_steps": 3400, "loss": 0.459, "lr": 4.854126306025812e-05, "epoch": 0.46742209631728043, "percentage": 53.38, "elapsed_time": "6:18:18", "remaining_time": "5:30:22", "throughput": 839.6, "total_tokens": 19057856}
{"current_steps": 1820, "total_steps": 3400, "loss": 0.3944, "lr": 4.829822744742383e-05, "epoch": 0.46870976049446306, "percentage": 53.53, "elapsed_time": "6:19:46", "remaining_time": "5:29:41", "throughput": 838.71, "total_tokens": 19110992}
{"current_steps": 1825, "total_steps": 3400, "loss": 0.3447, "lr": 4.8055232081761395e-05, "epoch": 0.4699974246716456, "percentage": 53.68, "elapsed_time": "6:21:13", "remaining_time": "5:29:00", "throughput": 837.78, "total_tokens": 19162816}
{"current_steps": 1830, "total_steps": 3400, "loss": 0.3954, "lr": 4.781228271014704e-05, "epoch": 0.47128508884882825, "percentage": 53.82, "elapsed_time": "6:22:40", "remaining_time": "5:28:18", "throughput": 836.9, "total_tokens": 19215752}
{"current_steps": 1835, "total_steps": 3400, "loss": 0.415, "lr": 4.756938507836929e-05, "epoch": 0.4725727530260108, "percentage": 53.97, "elapsed_time": "6:24:07", "remaining_time": "5:27:36", "throughput": 836.02, "total_tokens": 19268392}
{"current_steps": 1840, "total_steps": 3400, "loss": 0.2794, "lr": 4.732654493099291e-05, "epoch": 0.4738604172031934, "percentage": 54.12, "elapsed_time": "6:25:34", "remaining_time": "5:26:54", "throughput": 835.18, "total_tokens": 19321696}
{"current_steps": 1845, "total_steps": 3400, "loss": 0.3699, "lr": 4.708376801122321e-05, "epoch": 0.475148081380376, "percentage": 54.26, "elapsed_time": "6:27:02", "remaining_time": "5:26:12", "throughput": 834.27, "total_tokens": 19373584}
{"current_steps": 1850, "total_steps": 3400, "loss": 0.3941, "lr": 4.6841060060770154e-05, "epoch": 0.4764357455575586, "percentage": 54.41, "elapsed_time": "6:28:29", "remaining_time": "5:25:29", "throughput": 833.35, "total_tokens": 19424688}
{"current_steps": 1850, "total_steps": 3400, "eval_loss": 0.45103010535240173, "epoch": 0.4764357455575586, "percentage": 54.41, "elapsed_time": "6:29:07", "remaining_time": "5:26:01", "throughput": 831.99, "total_tokens": 19424688}
{"current_steps": 1855, "total_steps": 3400, "loss": 0.4362, "lr": 4.659842681971258e-05, "epoch": 0.4777234097347412, "percentage": 54.56, "elapsed_time": "6:30:40", "remaining_time": "5:25:23", "throughput": 830.93, "total_tokens": 19477320}
{"current_steps": 1860, "total_steps": 3400, "loss": 0.4027, "lr": 4.635587402636241e-05, "epoch": 0.47901107391192377, "percentage": 54.71, "elapsed_time": "6:32:06", "remaining_time": "5:24:39", "throughput": 830.08, "total_tokens": 19529000}
{"current_steps": 1865, "total_steps": 3400, "loss": 0.3981, "lr": 4.611340741712901e-05, "epoch": 0.48029873808910634, "percentage": 54.85, "elapsed_time": "6:33:34", "remaining_time": "5:23:56", "throughput": 829.22, "total_tokens": 19581736}
{"current_steps": 1870, "total_steps": 3400, "loss": 0.3545, "lr": 4.5871032726383386e-05, "epoch": 0.48158640226628896, "percentage": 55.0, "elapsed_time": "6:35:00", "remaining_time": "5:23:11", "throughput": 828.45, "total_tokens": 19634744}
{"current_steps": 1875, "total_steps": 3400, "loss": 0.34, "lr": 4.562875568632278e-05, "epoch": 0.48287406644347153, "percentage": 55.15, "elapsed_time": "6:36:28", "remaining_time": "5:22:28", "throughput": 827.57, "total_tokens": 19686792}
{"current_steps": 1880, "total_steps": 3400, "loss": 0.3041, "lr": 4.5386582026834906e-05, "epoch": 0.48416173062065415, "percentage": 55.29, "elapsed_time": "6:37:54", "remaining_time": "5:21:42", "throughput": 826.81, "total_tokens": 19739784}
{"current_steps": 1885, "total_steps": 3400, "loss": 0.445, "lr": 4.5144517475362514e-05, "epoch": 0.4854493947978367, "percentage": 55.44, "elapsed_time": "6:39:23", "remaining_time": "5:20:59", "throughput": 825.92, "total_tokens": 19792024}
{"current_steps": 1890, "total_steps": 3400, "loss": 0.3532, "lr": 4.490256775676797e-05, "epoch": 0.4867370589750193, "percentage": 55.59, "elapsed_time": "6:40:50", "remaining_time": "5:20:14", "throughput": 825.12, "total_tokens": 19844568}
{"current_steps": 1895, "total_steps": 3400, "loss": 0.4356, "lr": 4.466073859319781e-05, "epoch": 0.4880247231522019, "percentage": 55.74, "elapsed_time": "6:42:18", "remaining_time": "5:19:30", "throughput": 824.3, "total_tokens": 19897464}
{"current_steps": 1900, "total_steps": 3400, "loss": 0.2877, "lr": 4.441903570394739e-05, "epoch": 0.4893123873293845, "percentage": 55.88, "elapsed_time": "6:43:44", "remaining_time": "5:18:44", "throughput": 823.55, "total_tokens": 19950480}
{"current_steps": 1900, "total_steps": 3400, "eval_loss": 0.4511750042438507, "epoch": 0.4893123873293845, "percentage": 55.88, "elapsed_time": "6:44:22", "remaining_time": "5:19:14", "throughput": 822.26, "total_tokens": 19950480}
{"current_steps": 1905, "total_steps": 3400, "loss": 0.3542, "lr": 4.41774648053257e-05, "epoch": 0.4906000515065671, "percentage": 56.03, "elapsed_time": "6:45:56", "remaining_time": "5:18:34", "throughput": 821.27, "total_tokens": 20002968}
{"current_steps": 1910, "total_steps": 3400, "loss": 0.4095, "lr": 4.3936031610520124e-05, "epoch": 0.49188771568374967, "percentage": 56.18, "elapsed_time": "6:47:22", "remaining_time": "5:17:47", "throughput": 820.53, "total_tokens": 20055560}
{"current_steps": 1915, "total_steps": 3400, "loss": 0.3887, "lr": 4.3694741829461336e-05, "epoch": 0.4931753798609323, "percentage": 56.32, "elapsed_time": "6:48:49", "remaining_time": "5:17:01", "throughput": 819.74, "total_tokens": 20108016}
{"current_steps": 1920, "total_steps": 3400, "loss": 0.3485, "lr": 4.345360116868823e-05, "epoch": 0.49446304403811486, "percentage": 56.47, "elapsed_time": "6:50:16", "remaining_time": "5:16:15", "throughput": 818.97, "total_tokens": 20160480}
{"current_steps": 1925, "total_steps": 3400, "loss": 0.4348, "lr": 4.321261533121303e-05, "epoch": 0.49575070821529743, "percentage": 56.62, "elapsed_time": "6:51:43", "remaining_time": "5:15:28", "throughput": 818.22, "total_tokens": 20213312}
{"current_steps": 1930, "total_steps": 3400, "loss": 0.4073, "lr": 4.2971790016386286e-05, "epoch": 0.49703837239248005, "percentage": 56.76, "elapsed_time": "6:53:11", "remaining_time": "5:14:42", "throughput": 817.48, "total_tokens": 20266288}
{"current_steps": 1935, "total_steps": 3400, "loss": 0.3297, "lr": 4.273113091976225e-05, "epoch": 0.4983260365696626, "percentage": 56.91, "elapsed_time": "6:54:38", "remaining_time": "5:13:55", "throughput": 816.72, "total_tokens": 20318576}
{"current_steps": 1940, "total_steps": 3400, "loss": 0.3352, "lr": 4.249064373296403e-05, "epoch": 0.49961370074684525, "percentage": 57.06, "elapsed_time": "6:56:05", "remaining_time": "5:13:08", "throughput": 815.96, "total_tokens": 20370696}
{"current_steps": 1945, "total_steps": 3400, "loss": 0.3195, "lr": 4.225033414354908e-05, "epoch": 0.5009013649240278, "percentage": 57.21, "elapsed_time": "6:57:32", "remaining_time": "5:12:20", "throughput": 815.23, "total_tokens": 20423480}
{"current_steps": 1950, "total_steps": 3400, "loss": 0.365, "lr": 4.201020783487464e-05, "epoch": 0.5021890291012104, "percentage": 57.35, "elapsed_time": "6:58:59", "remaining_time": "5:11:33", "throughput": 814.49, "total_tokens": 20476176}
{"current_steps": 1950, "total_steps": 3400, "eval_loss": 0.4763557016849518, "epoch": 0.5021890291012104, "percentage": 57.35, "elapsed_time": "6:59:37", "remaining_time": "5:12:02", "throughput": 813.26, "total_tokens": 20476176}
{"current_steps": 1955, "total_steps": 3400, "loss": 0.3723, "lr": 4.17702704859633e-05, "epoch": 0.503476693278393, "percentage": 57.5, "elapsed_time": "7:01:10", "remaining_time": "5:11:18", "throughput": 812.38, "total_tokens": 20529160}
{"current_steps": 1960, "total_steps": 3400, "loss": 0.3637, "lr": 4.153052777136879e-05, "epoch": 0.5047643574555756, "percentage": 57.65, "elapsed_time": "7:02:38", "remaining_time": "5:10:30", "throughput": 811.61, "total_tokens": 20580864}
{"current_steps": 1965, "total_steps": 3400, "loss": 0.3513, "lr": 4.1290985361041614e-05, "epoch": 0.5060520216327582, "percentage": 57.79, "elapsed_time": "7:04:04", "remaining_time": "5:09:41", "throughput": 810.93, "total_tokens": 20633720}
{"current_steps": 1970, "total_steps": 3400, "loss": 0.3569, "lr": 4.105164892019514e-05, "epoch": 0.5073396858099408, "percentage": 57.94, "elapsed_time": "7:05:32", "remaining_time": "5:08:53", "throughput": 810.19, "total_tokens": 20685832}
{"current_steps": 1975, "total_steps": 3400, "loss": 0.3072, "lr": 4.0812524109171476e-05, "epoch": 0.5086273499871233, "percentage": 58.09, "elapsed_time": "7:06:58", "remaining_time": "5:08:04", "throughput": 809.49, "total_tokens": 20737832}
{"current_steps": 1980, "total_steps": 3400, "loss": 0.3884, "lr": 4.0573616583307705e-05, "epoch": 0.509915014164306, "percentage": 58.24, "elapsed_time": "7:08:26", "remaining_time": "5:07:15", "throughput": 808.8, "total_tokens": 20791184}
{"current_steps": 1985, "total_steps": 3400, "loss": 0.3414, "lr": 4.033493199280202e-05, "epoch": 0.5112026783414886, "percentage": 58.38, "elapsed_time": "7:09:52", "remaining_time": "5:06:26", "throughput": 808.12, "total_tokens": 20843672}
{"current_steps": 1990, "total_steps": 3400, "loss": 0.3545, "lr": 4.009647598258022e-05, "epoch": 0.5124903425186711, "percentage": 58.53, "elapsed_time": "7:11:20", "remaining_time": "5:05:37", "throughput": 807.39, "total_tokens": 20895760}
{"current_steps": 1995, "total_steps": 3400, "loss": 0.3406, "lr": 3.985825419216207e-05, "epoch": 0.5137780066958537, "percentage": 58.68, "elapsed_time": "7:12:47", "remaining_time": "5:04:48", "throughput": 806.71, "total_tokens": 20948448}
{"current_steps": 2000, "total_steps": 3400, "loss": 0.3814, "lr": 3.962027225552807e-05, "epoch": 0.5150656708730363, "percentage": 58.82, "elapsed_time": "7:14:15", "remaining_time": "5:03:58", "throughput": 806.05, "total_tokens": 21002032}
{"current_steps": 2000, "total_steps": 3400, "eval_loss": 0.5097677707672119, "epoch": 0.5150656708730363, "percentage": 58.82, "elapsed_time": "7:14:53", "remaining_time": "5:04:25", "throughput": 804.87, "total_tokens": 21002032}
{"current_steps": 2005, "total_steps": 3400, "loss": 0.3312, "lr": 3.938253580098613e-05, "epoch": 0.5163533350502189, "percentage": 58.97, "elapsed_time": "7:16:26", "remaining_time": "5:03:39", "throughput": 804.02, "total_tokens": 21054264}
{"current_steps": 2010, "total_steps": 3400, "loss": 0.2914, "lr": 3.914505045103845e-05, "epoch": 0.5176409992274015, "percentage": 59.12, "elapsed_time": "7:17:53", "remaining_time": "5:02:49", "throughput": 803.36, "total_tokens": 21106872}
{"current_steps": 2015, "total_steps": 3400, "loss": 0.3414, "lr": 3.8907821822248605e-05, "epoch": 0.5189286634045841, "percentage": 59.26, "elapsed_time": "7:19:20", "remaining_time": "5:01:58", "throughput": 802.71, "total_tokens": 21159976}
{"current_steps": 2020, "total_steps": 3400, "loss": 0.3701, "lr": 3.867085552510864e-05, "epoch": 0.5202163275817667, "percentage": 59.41, "elapsed_time": "7:20:48", "remaining_time": "5:01:08", "throughput": 802.02, "total_tokens": 21211920}
{"current_steps": 2025, "total_steps": 3400, "loss": 0.3867, "lr": 3.843415716390644e-05, "epoch": 0.5215039917589492, "percentage": 59.56, "elapsed_time": "7:22:15", "remaining_time": "5:00:18", "throughput": 801.37, "total_tokens": 21265128}
{"current_steps": 2030, "total_steps": 3400, "loss": 0.3515, "lr": 3.819773233659314e-05, "epoch": 0.5227916559361319, "percentage": 59.71, "elapsed_time": "7:23:43", "remaining_time": "4:59:27", "throughput": 800.71, "total_tokens": 21317592}
{"current_steps": 2035, "total_steps": 3400, "loss": 0.3359, "lr": 3.7961586634650767e-05, "epoch": 0.5240793201133145, "percentage": 59.85, "elapsed_time": "7:25:10", "remaining_time": "4:58:36", "throughput": 800.09, "total_tokens": 21370976}
{"current_steps": 2040, "total_steps": 3400, "loss": 0.3265, "lr": 3.772572564296005e-05, "epoch": 0.525366984290497, "percentage": 60.0, "elapsed_time": "7:26:37", "remaining_time": "4:57:45", "throughput": 799.47, "total_tokens": 21424056}
{"current_steps": 2045, "total_steps": 3400, "loss": 0.3738, "lr": 3.749015493966817e-05, "epoch": 0.5266546484676796, "percentage": 60.15, "elapsed_time": "7:28:05", "remaining_time": "4:56:53", "throughput": 798.81, "total_tokens": 21476248}
{"current_steps": 2050, "total_steps": 3400, "loss": 0.3389, "lr": 3.7254880096057073e-05, "epoch": 0.5279423126448622, "percentage": 60.29, "elapsed_time": "7:29:31", "remaining_time": "4:56:01", "throughput": 798.15, "total_tokens": 21527496}
{"current_steps": 2050, "total_steps": 3400, "eval_loss": 0.4327767789363861, "epoch": 0.5279423126448622, "percentage": 60.29, "elapsed_time": "7:30:09", "remaining_time": "4:56:27", "throughput": 797.02, "total_tokens": 21527496}
{"current_steps": 2055, "total_steps": 3400, "loss": 0.3937, "lr": 3.7019906676411446e-05, "epoch": 0.5292299768220448, "percentage": 60.44, "elapsed_time": "7:31:43", "remaining_time": "4:55:39", "throughput": 796.21, "total_tokens": 21579816}
{"current_steps": 2060, "total_steps": 3400, "loss": 0.4039, "lr": 3.678524023788735e-05, "epoch": 0.5305176409992274, "percentage": 60.59, "elapsed_time": "7:33:09", "remaining_time": "4:54:46", "throughput": 795.58, "total_tokens": 21631776}
{"current_steps": 2065, "total_steps": 3400, "loss": 0.3501, "lr": 3.6550886330380665e-05, "epoch": 0.53180530517641, "percentage": 60.74, "elapsed_time": "7:34:38", "remaining_time": "4:53:54", "throughput": 794.91, "total_tokens": 21683608}
{"current_steps": 2070, "total_steps": 3400, "loss": 0.3334, "lr": 3.631685049639586e-05, "epoch": 0.5330929693535926, "percentage": 60.88, "elapsed_time": "7:36:04", "remaining_time": "4:53:02", "throughput": 794.3, "total_tokens": 21735672}
{"current_steps": 2075, "total_steps": 3400, "loss": 0.3292, "lr": 3.608313827091493e-05, "epoch": 0.5343806335307751, "percentage": 61.03, "elapsed_time": "7:37:32", "remaining_time": "4:52:09", "throughput": 793.65, "total_tokens": 21787592}
{"current_steps": 2080, "total_steps": 3400, "loss": 0.3616, "lr": 3.5849755181266474e-05, "epoch": 0.5356682977079578, "percentage": 61.18, "elapsed_time": "7:38:58", "remaining_time": "4:51:16", "throughput": 793.08, "total_tokens": 21840448}
{"current_steps": 2085, "total_steps": 3400, "loss": 0.3082, "lr": 3.5616706746995026e-05, "epoch": 0.5369559618851404, "percentage": 61.32, "elapsed_time": "7:40:26", "remaining_time": "4:50:23", "throughput": 792.47, "total_tokens": 21893096}
{"current_steps": 2090, "total_steps": 3400, "loss": 0.293, "lr": 3.538399847973036e-05, "epoch": 0.5382436260623229, "percentage": 61.47, "elapsed_time": "7:41:53", "remaining_time": "4:49:30", "throughput": 791.87, "total_tokens": 21945184}
{"current_steps": 2095, "total_steps": 3400, "loss": 0.3835, "lr": 3.515163588305735e-05, "epoch": 0.5395312902395055, "percentage": 61.62, "elapsed_time": "7:43:20", "remaining_time": "4:48:37", "throughput": 791.29, "total_tokens": 21998016}
{"current_steps": 2100, "total_steps": 3400, "loss": 0.3983, "lr": 3.491962445238569e-05, "epoch": 0.5408189544166881, "percentage": 61.76, "elapsed_time": "7:44:47", "remaining_time": "4:47:43", "throughput": 790.68, "total_tokens": 22050376}
{"current_steps": 2100, "total_steps": 3400, "eval_loss": 0.481829971075058, "epoch": 0.5408189544166881, "percentage": 61.76, "elapsed_time": "7:45:25", "remaining_time": "4:48:07", "throughput": 789.61, "total_tokens": 22050376}
{"current_steps": 2105, "total_steps": 3400, "loss": 0.4067, "lr": 3.4687969674819906e-05, "epoch": 0.5421066185938708, "percentage": 61.91, "elapsed_time": "7:46:58", "remaining_time": "4:47:17", "throughput": 788.86, "total_tokens": 22102848}
{"current_steps": 2110, "total_steps": 3400, "loss": 0.36, "lr": 3.445667702902969e-05, "epoch": 0.5433942827710533, "percentage": 62.06, "elapsed_time": "7:48:26", "remaining_time": "4:46:23", "throughput": 788.28, "total_tokens": 22155432}
{"current_steps": 2115, "total_steps": 3400, "loss": 0.3569, "lr": 3.4225751985120215e-05, "epoch": 0.5446819469482359, "percentage": 62.21, "elapsed_time": "7:49:53", "remaining_time": "4:45:29", "throughput": 787.67, "total_tokens": 22207528}
{"current_steps": 2120, "total_steps": 3400, "loss": 0.3503, "lr": 3.3995200004502816e-05, "epoch": 0.5459696111254185, "percentage": 62.35, "elapsed_time": "7:51:21", "remaining_time": "4:44:35", "throughput": 787.09, "total_tokens": 22260016}
{"current_steps": 2125, "total_steps": 3400, "loss": 0.342, "lr": 3.3765026539765834e-05, "epoch": 0.547257275302601, "percentage": 62.5, "elapsed_time": "7:52:49", "remaining_time": "4:43:41", "throughput": 786.51, "total_tokens": 22312616}
{"current_steps": 2130, "total_steps": 3400, "loss": 0.4113, "lr": 3.3535237034545675e-05, "epoch": 0.5485449394797837, "percentage": 62.65, "elapsed_time": "7:54:16", "remaining_time": "4:42:47", "throughput": 785.92, "total_tokens": 22364776}
{"current_steps": 2135, "total_steps": 3400, "loss": 0.311, "lr": 3.330583692339802e-05, "epoch": 0.5498326036569663, "percentage": 62.79, "elapsed_time": "7:55:43", "remaining_time": "4:41:52", "throughput": 785.37, "total_tokens": 22416944}
{"current_steps": 2140, "total_steps": 3400, "loss": 0.358, "lr": 3.307683163166934e-05, "epoch": 0.5511202678341488, "percentage": 62.94, "elapsed_time": "7:57:11", "remaining_time": "4:40:57", "throughput": 784.77, "total_tokens": 22468960}
{"current_steps": 2145, "total_steps": 3400, "loss": 0.378, "lr": 3.284822657536856e-05, "epoch": 0.5524079320113314, "percentage": 63.09, "elapsed_time": "7:58:37", "remaining_time": "4:40:02", "throughput": 784.24, "total_tokens": 22521624}
{"current_steps": 2150, "total_steps": 3400, "loss": 0.3687, "lr": 3.262002716103897e-05, "epoch": 0.553695596188514, "percentage": 63.24, "elapsed_time": "8:00:05", "remaining_time": "4:39:07", "throughput": 783.66, "total_tokens": 22574104}
{"current_steps": 2150, "total_steps": 3400, "eval_loss": 0.4504742920398712, "epoch": 0.553695596188514, "percentage": 63.24, "elapsed_time": "8:00:44", "remaining_time": "4:39:29", "throughput": 782.63, "total_tokens": 22574104}
{"current_steps": 2155, "total_steps": 3400, "loss": 0.3188, "lr": 3.2392238785630386e-05, "epoch": 0.5549832603656967, "percentage": 63.38, "elapsed_time": "8:02:16", "remaining_time": "4:38:37", "throughput": 781.95, "total_tokens": 22626752}
{"current_steps": 2160, "total_steps": 3400, "loss": 0.3724, "lr": 3.216486683637146e-05, "epoch": 0.5562709245428792, "percentage": 63.53, "elapsed_time": "8:03:44", "remaining_time": "4:37:42", "throughput": 781.38, "total_tokens": 22679152}
{"current_steps": 2165, "total_steps": 3400, "loss": 0.3341, "lr": 3.1937916690642356e-05, "epoch": 0.5575585887200618, "percentage": 63.68, "elapsed_time": "8:05:10", "remaining_time": "4:36:46", "throughput": 780.89, "total_tokens": 22732576}
{"current_steps": 2170, "total_steps": 3400, "loss": 0.3741, "lr": 3.1711393715847476e-05, "epoch": 0.5588462528972444, "percentage": 63.82, "elapsed_time": "8:06:39", "remaining_time": "4:35:50", "throughput": 780.35, "total_tokens": 22785536}
{"current_steps": 2175, "total_steps": 3400, "loss": 0.4109, "lr": 3.14853032692886e-05, "epoch": 0.560133917074427, "percentage": 63.97, "elapsed_time": "8:08:05", "remaining_time": "4:34:54", "throughput": 779.85, "total_tokens": 22838448}
{"current_steps": 2180, "total_steps": 3400, "loss": 0.3548, "lr": 3.125965069803811e-05, "epoch": 0.5614215812516096, "percentage": 64.12, "elapsed_time": "8:09:33", "remaining_time": "4:33:58", "throughput": 779.31, "total_tokens": 22891176}
{"current_steps": 2185, "total_steps": 3400, "loss": 0.339, "lr": 3.103444133881261e-05, "epoch": 0.5627092454287922, "percentage": 64.26, "elapsed_time": "8:11:00", "remaining_time": "4:33:01", "throughput": 778.77, "total_tokens": 22942832}
{"current_steps": 2190, "total_steps": 3400, "loss": 0.4406, "lr": 3.080968051784666e-05, "epoch": 0.5639969096059748, "percentage": 64.41, "elapsed_time": "8:12:27", "remaining_time": "4:32:05", "throughput": 778.27, "total_tokens": 22995928}
{"current_steps": 2195, "total_steps": 3400, "loss": 0.3615, "lr": 3.058537355076683e-05, "epoch": 0.5652845737831573, "percentage": 64.56, "elapsed_time": "8:13:54", "remaining_time": "4:31:08", "throughput": 777.76, "total_tokens": 23048848}
{"current_steps": 2200, "total_steps": 3400, "loss": 0.3232, "lr": 3.0361525742465973e-05, "epoch": 0.56657223796034, "percentage": 64.71, "elapsed_time": "8:15:22", "remaining_time": "4:30:12", "throughput": 777.25, "total_tokens": 23101488}
{"current_steps": 2200, "total_steps": 3400, "eval_loss": 0.45173853635787964, "epoch": 0.56657223796034, "percentage": 64.71, "elapsed_time": "8:16:00", "remaining_time": "4:30:32", "throughput": 776.25, "total_tokens": 23101488}
{"current_steps": 2205, "total_steps": 3400, "loss": 0.3453, "lr": 3.0138142386977787e-05, "epoch": 0.5678599021375226, "percentage": 64.85, "elapsed_time": "8:17:33", "remaining_time": "4:29:38", "throughput": 775.61, "total_tokens": 23154320}
{"current_steps": 2210, "total_steps": 3400, "loss": 0.3277, "lr": 2.991522876735154e-05, "epoch": 0.5691475663147051, "percentage": 65.0, "elapsed_time": "8:19:00", "remaining_time": "4:28:41", "throughput": 775.09, "total_tokens": 23206640}
{"current_steps": 2215, "total_steps": 3400, "loss": 0.3368, "lr": 2.9692790155527227e-05, "epoch": 0.5704352304918877, "percentage": 65.15, "elapsed_time": "8:20:27", "remaining_time": "4:27:44", "throughput": 774.58, "total_tokens": 23258992}
{"current_steps": 2220, "total_steps": 3400, "loss": 0.3518, "lr": 2.9470831812210837e-05, "epoch": 0.5717228946690703, "percentage": 65.29, "elapsed_time": "8:21:55", "remaining_time": "4:26:47", "throughput": 774.08, "total_tokens": 23311640}
{"current_steps": 2225, "total_steps": 3400, "loss": 0.3142, "lr": 2.924935898674992e-05, "epoch": 0.5730105588462528, "percentage": 65.44, "elapsed_time": "8:23:22", "remaining_time": "4:25:49", "throughput": 773.58, "total_tokens": 23364048}
{"current_steps": 2230, "total_steps": 3400, "loss": 0.347, "lr": 2.902837691700945e-05, "epoch": 0.5742982230234355, "percentage": 65.59, "elapsed_time": "8:24:49", "remaining_time": "4:24:51", "throughput": 773.1, "total_tokens": 23416632}
{"current_steps": 2235, "total_steps": 3400, "loss": 0.3807, "lr": 2.880789082924798e-05, "epoch": 0.5755858872006181, "percentage": 65.74, "elapsed_time": "8:26:16", "remaining_time": "4:23:54", "throughput": 772.58, "total_tokens": 23468608}
{"current_steps": 2240, "total_steps": 3400, "loss": 0.3271, "lr": 2.858790593799405e-05, "epoch": 0.5768735513778007, "percentage": 65.88, "elapsed_time": "8:27:43", "remaining_time": "4:22:55", "throughput": 772.12, "total_tokens": 23521312}
{"current_steps": 2245, "total_steps": 3400, "loss": 0.296, "lr": 2.8368427445922696e-05, "epoch": 0.5781612155549832, "percentage": 66.03, "elapsed_time": "8:29:11", "remaining_time": "4:21:57", "throughput": 771.62, "total_tokens": 23574104}
{"current_steps": 2250, "total_steps": 3400, "loss": 0.325, "lr": 2.8149460543732664e-05, "epoch": 0.5794488797321659, "percentage": 66.18, "elapsed_time": "8:30:37", "remaining_time": "4:20:59", "throughput": 771.17, "total_tokens": 23626952}
{"current_steps": 2250, "total_steps": 3400, "eval_loss": 0.4990580379962921, "epoch": 0.5794488797321659, "percentage": 66.18, "elapsed_time": "8:31:15", "remaining_time": "4:21:18", "throughput": 770.21, "total_tokens": 23626952}
{"current_steps": 2255, "total_steps": 3400, "loss": 0.3544, "lr": 2.7931010410023518e-05, "epoch": 0.5807365439093485, "percentage": 66.32, "elapsed_time": "8:32:49", "remaining_time": "4:20:23", "throughput": 769.6, "total_tokens": 23680112}
{"current_steps": 2260, "total_steps": 3400, "loss": 0.3467, "lr": 2.771308221117309e-05, "epoch": 0.582024208086531, "percentage": 66.47, "elapsed_time": "8:34:15", "remaining_time": "4:19:24", "throughput": 769.13, "total_tokens": 23731896}
{"current_steps": 2265, "total_steps": 3400, "loss": 0.3374, "lr": 2.749568110121545e-05, "epoch": 0.5833118722637136, "percentage": 66.62, "elapsed_time": "8:35:43", "remaining_time": "4:18:26", "throughput": 768.64, "total_tokens": 23784616}
{"current_steps": 2270, "total_steps": 3400, "loss": 0.3509, "lr": 2.7278812221718924e-05, "epoch": 0.5845995364408962, "percentage": 66.76, "elapsed_time": "8:37:09", "remaining_time": "4:17:26", "throughput": 768.19, "total_tokens": 23836920}
{"current_steps": 2275, "total_steps": 3400, "loss": 0.3014, "lr": 2.7062480701664488e-05, "epoch": 0.5858872006180788, "percentage": 66.91, "elapsed_time": "8:38:37", "remaining_time": "4:16:27", "throughput": 767.75, "total_tokens": 23890792}
{"current_steps": 2280, "total_steps": 3400, "loss": 0.4672, "lr": 2.6846691657324473e-05, "epoch": 0.5871748647952614, "percentage": 67.06, "elapsed_time": "8:40:05", "remaining_time": "4:15:28", "throughput": 767.29, "total_tokens": 23943264}
{"current_steps": 2285, "total_steps": 3400, "loss": 0.2817, "lr": 2.663145019214163e-05, "epoch": 0.588462528972444, "percentage": 67.21, "elapsed_time": "8:41:32", "remaining_time": "4:14:29", "throughput": 766.81, "total_tokens": 23995760}
{"current_steps": 2290, "total_steps": 3400, "loss": 0.3538, "lr": 2.6416761396608362e-05, "epoch": 0.5897501931496266, "percentage": 67.35, "elapsed_time": "8:43:00", "remaining_time": "4:13:30", "throughput": 766.36, "total_tokens": 24048696}
{"current_steps": 2295, "total_steps": 3400, "loss": 0.3018, "lr": 2.6202630348146324e-05, "epoch": 0.5910378573268091, "percentage": 67.5, "elapsed_time": "8:44:27", "remaining_time": "4:12:31", "throughput": 765.93, "total_tokens": 24102248}
{"current_steps": 2300, "total_steps": 3400, "loss": 0.3322, "lr": 2.598906211098643e-05, "epoch": 0.5923255215039918, "percentage": 67.65, "elapsed_time": "8:45:55", "remaining_time": "4:11:31", "throughput": 765.47, "total_tokens": 24154624}
{"current_steps": 2300, "total_steps": 3400, "eval_loss": 0.4960116744041443, "epoch": 0.5923255215039918, "percentage": 67.65, "elapsed_time": "8:46:33", "remaining_time": "4:11:49", "throughput": 764.54, "total_tokens": 24154624}
{"current_steps": 2305, "total_steps": 3400, "loss": 0.3806, "lr": 2.577606173604894e-05, "epoch": 0.5936131856811744, "percentage": 67.79, "elapsed_time": "8:48:06", "remaining_time": "4:10:52", "throughput": 763.94, "total_tokens": 24206536}
{"current_steps": 2310, "total_steps": 3400, "loss": 0.4, "lr": 2.5563634260824175e-05, "epoch": 0.5949008498583569, "percentage": 67.94, "elapsed_time": "8:49:34", "remaining_time": "4:09:53", "throughput": 763.49, "total_tokens": 24259448}
{"current_steps": 2315, "total_steps": 3400, "loss": 0.3565, "lr": 2.535178470925323e-05, "epoch": 0.5961885140355395, "percentage": 68.09, "elapsed_time": "8:51:02", "remaining_time": "4:08:53", "throughput": 763.05, "total_tokens": 24312520}
{"current_steps": 2320, "total_steps": 3400, "loss": 0.2725, "lr": 2.5140518091609256e-05, "epoch": 0.5974761782127221, "percentage": 68.24, "elapsed_time": "8:52:30", "remaining_time": "4:07:53", "throughput": 762.57, "total_tokens": 24364600}
{"current_steps": 2325, "total_steps": 3400, "loss": 0.3154, "lr": 2.4929839404378936e-05, "epoch": 0.5987638423899048, "percentage": 68.38, "elapsed_time": "8:53:58", "remaining_time": "4:06:53", "throughput": 762.14, "total_tokens": 24417624}
{"current_steps": 2330, "total_steps": 3400, "loss": 0.3016, "lr": 2.471975363014428e-05, "epoch": 0.6000515065670873, "percentage": 68.53, "elapsed_time": "8:55:26", "remaining_time": "4:05:53", "throughput": 761.66, "total_tokens": 24469680}
{"current_steps": 2335, "total_steps": 3400, "loss": 0.3363, "lr": 2.451026573746482e-05, "epoch": 0.6013391707442699, "percentage": 68.68, "elapsed_time": "8:56:53", "remaining_time": "4:04:52", "throughput": 761.22, "total_tokens": 24521784}
{"current_steps": 2340, "total_steps": 3400, "loss": 0.3393, "lr": 2.430138068076013e-05, "epoch": 0.6026268349214525, "percentage": 68.82, "elapsed_time": "8:58:22", "remaining_time": "4:03:52", "throughput": 760.74, "total_tokens": 24573824}
{"current_steps": 2345, "total_steps": 3400, "loss": 0.3243, "lr": 2.4093103400192625e-05, "epoch": 0.603914499098635, "percentage": 68.97, "elapsed_time": "8:59:49", "remaining_time": "4:02:51", "throughput": 760.3, "total_tokens": 24625824}
{"current_steps": 2350, "total_steps": 3400, "loss": 0.3651, "lr": 2.388543882155067e-05, "epoch": 0.6052021632758177, "percentage": 69.12, "elapsed_time": "9:01:18", "remaining_time": "4:01:51", "throughput": 759.85, "total_tokens": 24678768}
{"current_steps": 2350, "total_steps": 3400, "eval_loss": 0.4145541489124298, "epoch": 0.6052021632758177, "percentage": 69.12, "elapsed_time": "9:01:56", "remaining_time": "4:02:08", "throughput": 758.96, "total_tokens": 24678768}
{"current_steps": 2355, "total_steps": 3400, "loss": 0.3229, "lr": 2.3678391856132204e-05, "epoch": 0.6064898274530003, "percentage": 69.26, "elapsed_time": "9:03:29", "remaining_time": "4:01:09", "throughput": 758.39, "total_tokens": 24730528}
{"current_steps": 2360, "total_steps": 3400, "loss": 0.3308, "lr": 2.3471967400628513e-05, "epoch": 0.6077774916301828, "percentage": 69.41, "elapsed_time": "9:04:57", "remaining_time": "4:00:09", "throughput": 757.98, "total_tokens": 24784472}
{"current_steps": 2365, "total_steps": 3400, "loss": 0.356, "lr": 2.3266170337008398e-05, "epoch": 0.6090651558073654, "percentage": 69.56, "elapsed_time": "9:06:24", "remaining_time": "3:59:07", "throughput": 757.61, "total_tokens": 24838168}
{"current_steps": 2370, "total_steps": 3400, "loss": 0.2784, "lr": 2.306100553240274e-05, "epoch": 0.610352819984548, "percentage": 69.71, "elapsed_time": "9:07:53", "remaining_time": "3:58:06", "throughput": 757.16, "total_tokens": 24890552}
{"current_steps": 2375, "total_steps": 3400, "loss": 0.2859, "lr": 2.2856477838989456e-05, "epoch": 0.6116404841617307, "percentage": 69.85, "elapsed_time": "9:09:20", "remaining_time": "3:57:05", "throughput": 756.74, "total_tokens": 24942904}
{"current_steps": 2380, "total_steps": 3400, "loss": 0.3107, "lr": 2.2652592093878666e-05, "epoch": 0.6129281483389132, "percentage": 70.0, "elapsed_time": "9:10:48", "remaining_time": "3:56:03", "throughput": 756.33, "total_tokens": 24995776}
{"current_steps": 2385, "total_steps": 3400, "loss": 0.3131, "lr": 2.244935311899829e-05, "epoch": 0.6142158125160958, "percentage": 70.15, "elapsed_time": "9:12:16", "remaining_time": "3:55:02", "throughput": 755.89, "total_tokens": 25047848}
{"current_steps": 2390, "total_steps": 3400, "loss": 0.3175, "lr": 2.224676572098007e-05, "epoch": 0.6155034766932784, "percentage": 70.29, "elapsed_time": "9:13:44", "remaining_time": "3:54:00", "throughput": 755.49, "total_tokens": 25100896}
{"current_steps": 2395, "total_steps": 3400, "loss": 0.3482, "lr": 2.2044834691045873e-05, "epoch": 0.6167911408704609, "percentage": 70.44, "elapsed_time": "9:15:12", "remaining_time": "3:52:58", "throughput": 755.08, "total_tokens": 25153912}
{"current_steps": 2400, "total_steps": 3400, "loss": 0.3445, "lr": 2.184356480489432e-05, "epoch": 0.6180788050476436, "percentage": 70.59, "elapsed_time": "9:16:40", "remaining_time": "3:51:56", "throughput": 754.67, "total_tokens": 25206168}
{"current_steps": 2400, "total_steps": 3400, "eval_loss": 0.42807063460350037, "epoch": 0.6180788050476436, "percentage": 70.59, "elapsed_time": "9:17:18", "remaining_time": "3:52:12", "throughput": 753.81, "total_tokens": 25206168}
{"current_steps": 2405, "total_steps": 3400, "loss": 0.3147, "lr": 2.1642960822587878e-05, "epoch": 0.6193664692248262, "percentage": 70.74, "elapsed_time": "9:18:51", "remaining_time": "3:51:12", "throughput": 753.28, "total_tokens": 25258880}
{"current_steps": 2410, "total_steps": 3400, "loss": 0.3467, "lr": 2.1443027488440338e-05, "epoch": 0.6206541334020087, "percentage": 70.88, "elapsed_time": "9:20:19", "remaining_time": "3:50:10", "throughput": 752.87, "total_tokens": 25310976}
{"current_steps": 2415, "total_steps": 3400, "loss": 0.3085, "lr": 2.124376953090456e-05, "epoch": 0.6219417975791913, "percentage": 71.03, "elapsed_time": "9:21:47", "remaining_time": "3:49:08", "throughput": 752.46, "total_tokens": 25363520}
{"current_steps": 2420, "total_steps": 3400, "loss": 0.3376, "lr": 2.104519166246059e-05, "epoch": 0.623229461756374, "percentage": 71.18, "elapsed_time": "9:23:14", "remaining_time": "3:48:05", "throughput": 752.05, "total_tokens": 25415400}
{"current_steps": 2425, "total_steps": 3400, "loss": 0.3312, "lr": 2.0847298579504344e-05, "epoch": 0.6245171259335566, "percentage": 71.32, "elapsed_time": "9:24:43", "remaining_time": "3:47:03", "throughput": 751.65, "total_tokens": 25468296}
{"current_steps": 2430, "total_steps": 3400, "loss": 0.3282, "lr": 2.065009496223638e-05, "epoch": 0.6258047901107391, "percentage": 71.47, "elapsed_time": "9:26:10", "remaining_time": "3:46:00", "throughput": 751.27, "total_tokens": 25520816}
{"current_steps": 2435, "total_steps": 3400, "loss": 0.321, "lr": 2.045358547455138e-05, "epoch": 0.6270924542879217, "percentage": 71.62, "elapsed_time": "9:27:38", "remaining_time": "3:44:57", "throughput": 750.87, "total_tokens": 25573416}
{"current_steps": 2440, "total_steps": 3400, "loss": 0.33, "lr": 2.0257774763927655e-05, "epoch": 0.6283801184651043, "percentage": 71.76, "elapsed_time": "9:29:06", "remaining_time": "3:43:54", "throughput": 750.5, "total_tokens": 25626536}
{"current_steps": 2445, "total_steps": 3400, "loss": 0.2833, "lr": 2.0062667461317426e-05, "epoch": 0.6296677826422868, "percentage": 71.91, "elapsed_time": "9:30:34", "remaining_time": "3:42:51", "throughput": 750.1, "total_tokens": 25679208}
{"current_steps": 2450, "total_steps": 3400, "loss": 0.3413, "lr": 1.9868268181037185e-05, "epoch": 0.6309554468194695, "percentage": 72.06, "elapsed_time": "9:32:01", "remaining_time": "3:41:48", "throughput": 749.68, "total_tokens": 25730432}
{"current_steps": 2450, "total_steps": 3400, "eval_loss": 0.46914541721343994, "epoch": 0.6309554468194695, "percentage": 72.06, "elapsed_time": "9:32:40", "remaining_time": "3:42:03", "throughput": 748.85, "total_tokens": 25730432}
{"current_steps": 2455, "total_steps": 3400, "loss": 0.3132, "lr": 1.967458152065857e-05, "epoch": 0.6322431109966521, "percentage": 72.21, "elapsed_time": "9:34:13", "remaining_time": "3:41:02", "throughput": 748.34, "total_tokens": 25782992}
{"current_steps": 2460, "total_steps": 3400, "loss": 0.2995, "lr": 1.9481612060899646e-05, "epoch": 0.6335307751738347, "percentage": 72.35, "elapsed_time": "9:35:41", "remaining_time": "3:39:58", "throughput": 747.96, "total_tokens": 25835576}
{"current_steps": 2465, "total_steps": 3400, "loss": 0.3104, "lr": 1.928936436551661e-05, "epoch": 0.6348184393510172, "percentage": 72.5, "elapsed_time": "9:37:09", "remaining_time": "3:38:55", "throughput": 747.54, "total_tokens": 25886784}
{"current_steps": 2470, "total_steps": 3400, "loss": 0.2866, "lr": 1.9097842981195834e-05, "epoch": 0.6361061035281999, "percentage": 72.65, "elapsed_time": "9:38:37", "remaining_time": "3:37:51", "throughput": 747.16, "total_tokens": 25939408}
{"current_steps": 2475, "total_steps": 3400, "loss": 0.2886, "lr": 1.8907052437446272e-05, "epoch": 0.6373937677053825, "percentage": 72.79, "elapsed_time": "9:40:05", "remaining_time": "3:36:48", "throughput": 746.78, "total_tokens": 25992048}
{"current_steps": 2480, "total_steps": 3400, "loss": 0.3752, "lr": 1.871699724649244e-05, "epoch": 0.638681431882565, "percentage": 72.94, "elapsed_time": "9:41:33", "remaining_time": "3:35:44", "throughput": 746.42, "total_tokens": 26045216}
{"current_steps": 2485, "total_steps": 3400, "loss": 0.3039, "lr": 1.8527681903167644e-05, "epoch": 0.6399690960597476, "percentage": 73.09, "elapsed_time": "9:43:01", "remaining_time": "3:34:40", "throughput": 746.04, "total_tokens": 26097424}
{"current_steps": 2490, "total_steps": 3400, "loss": 0.3142, "lr": 1.833911088480767e-05, "epoch": 0.6412567602369302, "percentage": 73.24, "elapsed_time": "9:44:29", "remaining_time": "3:33:36", "throughput": 745.65, "total_tokens": 26149616}
{"current_steps": 2495, "total_steps": 3400, "loss": 0.3576, "lr": 1.8151288651144893e-05, "epoch": 0.6425444244141127, "percentage": 73.38, "elapsed_time": "9:45:57", "remaining_time": "3:32:32", "throughput": 745.24, "total_tokens": 26200744}
{"current_steps": 2500, "total_steps": 3400, "loss": 0.363, "lr": 1.796421964420285e-05, "epoch": 0.6438320885912954, "percentage": 73.53, "elapsed_time": "9:47:25", "remaining_time": "3:31:28", "throughput": 744.86, "total_tokens": 26252584}
{"current_steps": 2500, "total_steps": 3400, "eval_loss": 0.44705262780189514, "epoch": 0.6438320885912954, "percentage": 73.53, "elapsed_time": "9:48:03", "remaining_time": "3:31:42", "throughput": 744.05, "total_tokens": 26252584}
{"current_steps": 2505, "total_steps": 3400, "loss": 0.3113, "lr": 1.7777908288191176e-05, "epoch": 0.645119752768478, "percentage": 73.68, "elapsed_time": "9:49:35", "remaining_time": "3:30:39", "throughput": 743.58, "total_tokens": 26304800}
{"current_steps": 2510, "total_steps": 3400, "loss": 0.3581, "lr": 1.7592358989400883e-05, "epoch": 0.6464074169456606, "percentage": 73.82, "elapsed_time": "9:51:03", "remaining_time": "3:29:34", "throughput": 743.23, "total_tokens": 26357680}
{"current_steps": 2515, "total_steps": 3400, "loss": 0.3353, "lr": 1.740757613610028e-05, "epoch": 0.6476950811228431, "percentage": 73.97, "elapsed_time": "9:52:31", "remaining_time": "3:28:30", "throughput": 742.89, "total_tokens": 26410432}
{"current_steps": 2520, "total_steps": 3400, "loss": 0.2796, "lr": 1.7223564098431067e-05, "epoch": 0.6489827453000258, "percentage": 74.12, "elapsed_time": "9:54:00", "remaining_time": "3:27:26", "throughput": 742.49, "total_tokens": 26463016}
{"current_steps": 2525, "total_steps": 3400, "loss": 0.3197, "lr": 1.704032722830512e-05, "epoch": 0.6502704094772084, "percentage": 74.26, "elapsed_time": "9:55:31", "remaining_time": "3:26:22", "throughput": 742.07, "total_tokens": 26515408}
{"current_steps": 2530, "total_steps": 3400, "loss": 0.3182, "lr": 1.68578698593014e-05, "epoch": 0.6515580736543909, "percentage": 74.41, "elapsed_time": "9:57:00", "remaining_time": "3:25:17", "throughput": 741.67, "total_tokens": 26567024}
{"current_steps": 2535, "total_steps": 3400, "loss": 0.3822, "lr": 1.6676196306563613e-05, "epoch": 0.6528457378315735, "percentage": 74.56, "elapsed_time": "9:58:28", "remaining_time": "3:24:12", "throughput": 741.31, "total_tokens": 26619744}
{"current_steps": 2540, "total_steps": 3400, "loss": 0.2853, "lr": 1.6495310866698093e-05, "epoch": 0.6541334020087561, "percentage": 74.71, "elapsed_time": "9:59:57", "remaining_time": "3:23:08", "throughput": 740.94, "total_tokens": 26672408}
{"current_steps": 2545, "total_steps": 3400, "loss": 0.3622, "lr": 1.631521781767214e-05, "epoch": 0.6554210661859388, "percentage": 74.85, "elapsed_time": "10:01:25", "remaining_time": "3:22:03", "throughput": 740.58, "total_tokens": 26724488}
{"current_steps": 2550, "total_steps": 3400, "loss": 0.3195, "lr": 1.6135921418712956e-05, "epoch": 0.6567087303631213, "percentage": 75.0, "elapsed_time": "10:02:54", "remaining_time": "3:20:58", "throughput": 740.22, "total_tokens": 26776816}
{"current_steps": 2550, "total_steps": 3400, "eval_loss": 0.43731561303138733, "epoch": 0.6567087303631213, "percentage": 75.0, "elapsed_time": "10:03:32", "remaining_time": "3:21:10", "throughput": 739.43, "total_tokens": 26776816}
{"current_steps": 2560, "total_steps": 3400, "loss": 0.281, "lr": 1.577973551359877e-05, "epoch": 0.6592840587174865, "percentage": 75.29, "elapsed_time": "0:04:12", "remaining_time": "0:01:22", "throughput": 106495.52, "total_tokens": 26881272}
{"current_steps": 2565, "total_steps": 3400, "loss": 0.2814, "lr": 1.560285443129296e-05, "epoch": 0.660571722894669, "percentage": 75.44, "elapsed_time": "0:05:39", "remaining_time": "0:01:50", "throughput": 79391.53, "total_tokens": 26934104}
{"current_steps": 2570, "total_steps": 3400, "loss": 0.2602, "lr": 1.542678684655306e-05, "epoch": 0.6618593870718517, "percentage": 75.59, "elapsed_time": "0:07:06", "remaining_time": "0:02:17", "throughput": 63238.19, "total_tokens": 26986248}
{"current_steps": 2575, "total_steps": 3400, "loss": 0.336, "lr": 1.5251536923403426e-05, "epoch": 0.6631470512490343, "percentage": 75.74, "elapsed_time": "0:08:33", "remaining_time": "0:02:44", "throughput": 52682.1, "total_tokens": 27038528}
{"current_steps": 2580, "total_steps": 3400, "loss": 0.2867, "lr": 1.5077108806530581e-05, "epoch": 0.6644347154262168, "percentage": 75.88, "elapsed_time": "0:10:01", "remaining_time": "0:03:11", "throughput": 45068.81, "total_tokens": 27090792}
{"current_steps": 2585, "total_steps": 3400, "loss": 0.2898, "lr": 1.4903506621185192e-05, "epoch": 0.6657223796033994, "percentage": 76.03, "elapsed_time": "0:11:28", "remaining_time": "0:03:36", "throughput": 39447.58, "total_tokens": 27143544}
{"current_steps": 2590, "total_steps": 3400, "loss": 0.2955, "lr": 1.4730734473084568e-05, "epoch": 0.667010043780582, "percentage": 76.18, "elapsed_time": "0:12:56", "remaining_time": "0:04:02", "throughput": 35030.09, "total_tokens": 27195632}
{"current_steps": 2595, "total_steps": 3400, "loss": 0.281, "lr": 1.4558796448315504e-05, "epoch": 0.6682977079577647, "percentage": 76.32, "elapsed_time": "0:14:22", "remaining_time": "0:04:27", "throughput": 31584.55, "total_tokens": 27248472}
{"current_steps": 2600, "total_steps": 3400, "loss": 0.3075, "lr": 1.4387696613237612e-05, "epoch": 0.6695853721349472, "percentage": 76.47, "elapsed_time": "0:15:50", "remaining_time": "0:04:52", "throughput": 28726.52, "total_tokens": 27301776}
{"current_steps": 2600, "total_steps": 3400, "eval_loss": 0.4504788815975189, "epoch": 0.6695853721349472, "percentage": 76.47, "elapsed_time": "0:16:55", "remaining_time": "0:05:12", "throughput": 26882.73, "total_tokens": 27301776}
{"current_steps": 2605, "total_steps": 3400, "loss": 0.3403, "lr": 1.4217439014387251e-05, "epoch": 0.6708730363121298, "percentage": 76.62, "elapsed_time": "0:18:30", "remaining_time": "0:05:38", "throughput": 24631.72, "total_tokens": 27354136}
{"current_steps": 2610, "total_steps": 3400, "loss": 0.286, "lr": 1.404802767838176e-05, "epoch": 0.6721607004893124, "percentage": 76.76, "elapsed_time": "0:19:57", "remaining_time": "0:06:02", "throughput": 22877.64, "total_tokens": 27405792}
{"current_steps": 2615, "total_steps": 3400, "loss": 0.2851, "lr": 1.3879466611824199e-05, "epoch": 0.6734483646664949, "percentage": 76.91, "elapsed_time": "0:21:24", "remaining_time": "0:06:25", "throughput": 21381.45, "total_tokens": 27457864}
{"current_steps": 2620, "total_steps": 3400, "loss": 0.3605, "lr": 1.371175980120864e-05, "epoch": 0.6747360288436776, "percentage": 77.06, "elapsed_time": "0:22:51", "remaining_time": "0:06:48", "throughput": 20055.94, "total_tokens": 27511520}
{"current_steps": 2625, "total_steps": 3400, "loss": 0.2961, "lr": 1.3544911212825906e-05, "epoch": 0.6760236930208602, "percentage": 77.21, "elapsed_time": "0:24:17", "remaining_time": "0:07:10", "throughput": 18907.21, "total_tokens": 27564200}
{"current_steps": 2630, "total_steps": 3400, "loss": 0.3004, "lr": 1.337892479266974e-05, "epoch": 0.6773113571980427, "percentage": 77.35, "elapsed_time": "0:25:45", "remaining_time": "0:07:32", "throughput": 17864.85, "total_tokens": 27616704}
{"current_steps": 2635, "total_steps": 3400, "loss": 0.2878, "lr": 1.3213804466343421e-05, "epoch": 0.6785990213752253, "percentage": 77.5, "elapsed_time": "0:27:11", "remaining_time": "0:07:53", "throughput": 16954.93, "total_tokens": 27668944}
{"current_steps": 2640, "total_steps": 3400, "loss": 0.2863, "lr": 1.3049554138967051e-05, "epoch": 0.6798866855524079, "percentage": 77.65, "elapsed_time": "0:28:38", "remaining_time": "0:08:14", "throughput": 16130.41, "total_tokens": 27722304}
{"current_steps": 2645, "total_steps": 3400, "loss": 0.3084, "lr": 1.2886177695085078e-05, "epoch": 0.6811743497295906, "percentage": 77.79, "elapsed_time": "0:30:03", "remaining_time": "0:08:34", "throughput": 15399.14, "total_tokens": 27775400}
{"current_steps": 2650, "total_steps": 3400, "loss": 0.324, "lr": 1.2723678998574512e-05, "epoch": 0.6824620139067731, "percentage": 77.94, "elapsed_time": "0:31:29", "remaining_time": "0:08:54", "throughput": 14730.09, "total_tokens": 27827480}
{"current_steps": 2650, "total_steps": 3400, "eval_loss": 0.5079630613327026, "epoch": 0.6824620139067731, "percentage": 77.94, "elapsed_time": "0:32:06", "remaining_time": "0:09:05", "throughput": 14441.68, "total_tokens": 27827480}
{"current_steps": 2655, "total_steps": 3400, "loss": 0.3189, "lr": 1.2562061892553473e-05, "epoch": 0.6837496780839557, "percentage": 78.09, "elapsed_time": "0:33:40", "remaining_time": "0:09:27", "throughput": 13795.27, "total_tokens": 27879064}
{"current_steps": 2660, "total_steps": 3400, "loss": 0.2458, "lr": 1.2401330199290367e-05, "epoch": 0.6850373422611383, "percentage": 78.24, "elapsed_time": "0:35:07", "remaining_time": "0:09:46", "throughput": 13254.02, "total_tokens": 27931864}
{"current_steps": 2665, "total_steps": 3400, "loss": 0.3055, "lr": 1.224148772011346e-05, "epoch": 0.6863250064383208, "percentage": 78.38, "elapsed_time": "0:36:34", "remaining_time": "0:10:05", "throughput": 12749.47, "total_tokens": 27984408}
{"current_steps": 2670, "total_steps": 3400, "loss": 0.2993, "lr": 1.2082538235320929e-05, "epoch": 0.6876126706155035, "percentage": 78.53, "elapsed_time": "0:38:01", "remaining_time": "0:10:23", "throughput": 12289.04, "total_tokens": 28037368}
{"current_steps": 2675, "total_steps": 3400, "loss": 0.3572, "lr": 1.1924485504091565e-05, "epoch": 0.6889003347926861, "percentage": 78.68, "elapsed_time": "0:39:28", "remaining_time": "0:10:42", "throughput": 11858.72, "total_tokens": 28090768}
{"current_steps": 2680, "total_steps": 3400, "loss": 0.4043, "lr": 1.1767333264395736e-05, "epoch": 0.6901879989698687, "percentage": 78.82, "elapsed_time": "0:40:54", "remaining_time": "0:10:59", "throughput": 11464.2, "total_tokens": 28142432}
{"current_steps": 2685, "total_steps": 3400, "loss": 0.3288, "lr": 1.1611085232907132e-05, "epoch": 0.6914756631470512, "percentage": 78.97, "elapsed_time": "0:42:22", "remaining_time": "0:11:17", "throughput": 11087.75, "total_tokens": 28194896}
{"current_steps": 2690, "total_steps": 3400, "loss": 0.3491, "lr": 1.14557451049147e-05, "epoch": 0.6927633273242338, "percentage": 79.12, "elapsed_time": "0:43:49", "remaining_time": "0:11:33", "throughput": 10744.36, "total_tokens": 28247264}
{"current_steps": 2695, "total_steps": 3400, "loss": 0.2881, "lr": 1.1301316554235397e-05, "epoch": 0.6940509915014165, "percentage": 79.26, "elapsed_time": "0:45:16", "remaining_time": "0:11:50", "throughput": 10416.26, "total_tokens": 28299864}
{"current_steps": 2700, "total_steps": 3400, "loss": 0.3076, "lr": 1.114780323312724e-05, "epoch": 0.695338655678599, "percentage": 79.41, "elapsed_time": "0:46:42", "remaining_time": "0:12:06", "throughput": 10115.15, "total_tokens": 28352368}
{"current_steps": 2700, "total_steps": 3400, "eval_loss": 0.4338160753250122, "epoch": 0.695338655678599, "percentage": 79.41, "elapsed_time": "0:47:21", "remaining_time": "0:12:16", "throughput": 9977.7, "total_tokens": 28352368}
{"current_steps": 2705, "total_steps": 3400, "loss": 0.3024, "lr": 1.0995208772202897e-05, "epoch": 0.6966263198557816, "percentage": 79.56, "elapsed_time": "0:48:55", "remaining_time": "0:12:34", "throughput": 9675.61, "total_tokens": 28404360}
{"current_steps": 2710, "total_steps": 3400, "loss": 0.2668, "lr": 1.0843536780343865e-05, "epoch": 0.6979139840329642, "percentage": 79.71, "elapsed_time": "0:50:21", "remaining_time": "0:12:49", "throughput": 9417.24, "total_tokens": 28456960}
{"current_steps": 2715, "total_steps": 3400, "loss": 0.3344, "lr": 1.069279084461513e-05, "epoch": 0.6992016482101467, "percentage": 79.85, "elapsed_time": "0:51:49", "remaining_time": "0:13:04", "throughput": 9167.45, "total_tokens": 28509448}
{"current_steps": 2720, "total_steps": 3400, "loss": 0.2942, "lr": 1.0542974530180327e-05, "epoch": 0.7004893123873294, "percentage": 80.0, "elapsed_time": "0:53:15", "remaining_time": "0:13:18", "throughput": 8937.08, "total_tokens": 28561496}
{"current_steps": 2725, "total_steps": 3400, "loss": 0.3209, "lr": 1.0394091380217352e-05, "epoch": 0.701776976564512, "percentage": 80.15, "elapsed_time": "0:54:43", "remaining_time": "0:13:33", "throughput": 8714.68, "total_tokens": 28613224}
{"current_steps": 2730, "total_steps": 3400, "loss": 0.3021, "lr": 1.0246144915834683e-05, "epoch": 0.7030646407416946, "percentage": 80.29, "elapsed_time": "0:56:08", "remaining_time": "0:13:46", "throughput": 8509.05, "total_tokens": 28665360}
{"current_steps": 2735, "total_steps": 3400, "loss": 0.211, "lr": 1.0099138635988026e-05, "epoch": 0.7043523049188771, "percentage": 80.44, "elapsed_time": "0:57:35", "remaining_time": "0:14:00", "throughput": 8311.14, "total_tokens": 28719488}
{"current_steps": 2740, "total_steps": 3400, "loss": 0.3017, "lr": 9.953076017397578e-06, "epoch": 0.7056399690960597, "percentage": 80.59, "elapsed_time": "0:59:02", "remaining_time": "0:14:13", "throughput": 8122.35, "total_tokens": 28771880}
{"current_steps": 2745, "total_steps": 3400, "loss": 0.3022, "lr": 9.807960514465792e-06, "epoch": 0.7069276332732424, "percentage": 80.74, "elapsed_time": "1:00:29", "remaining_time": "0:14:25", "throughput": 7942.62, "total_tokens": 28825096}
{"current_steps": 2750, "total_steps": 3400, "loss": 0.2817, "lr": 9.663795559195733e-06, "epoch": 0.7082152974504249, "percentage": 80.88, "elapsed_time": "1:01:54", "remaining_time": "0:14:38", "throughput": 7773.58, "total_tokens": 28877960}
{"current_steps": 2750, "total_steps": 3400, "eval_loss": 0.4439634680747986, "epoch": 0.7082152974504249, "percentage": 80.88, "elapsed_time": "1:02:33", "remaining_time": "0:14:47", "throughput": 7693.75, "total_tokens": 28877960}
{"current_steps": 2755, "total_steps": 3400, "loss": 0.2854, "lr": 9.520584561109864e-06, "epoch": 0.7095029616276075, "percentage": 81.03, "elapsed_time": "1:04:05", "remaining_time": "0:15:00", "throughput": 7523.08, "total_tokens": 28930512}
{"current_steps": 2760, "total_steps": 3400, "loss": 0.3635, "lr": 9.378330907169386e-06, "epoch": 0.7107906258047901, "percentage": 81.18, "elapsed_time": "1:05:32", "remaining_time": "0:15:11", "throughput": 7369.7, "total_tokens": 28984048}
{"current_steps": 2765, "total_steps": 3400, "loss": 0.3276, "lr": 9.237037961694223e-06, "epoch": 0.7120782899819728, "percentage": 81.32, "elapsed_time": "1:06:59", "remaining_time": "0:15:23", "throughput": 7223.95, "total_tokens": 29034368}
{"current_steps": 2770, "total_steps": 3400, "loss": 0.2939, "lr": 9.096709066283354e-06, "epoch": 0.7133659541591553, "percentage": 81.47, "elapsed_time": "1:08:26", "remaining_time": "0:15:33", "throughput": 7082.87, "total_tokens": 29086720}
{"current_steps": 2775, "total_steps": 3400, "loss": 0.2814, "lr": 8.957347539735872e-06, "epoch": 0.7146536183363379, "percentage": 81.62, "elapsed_time": "1:09:52", "remaining_time": "0:15:44", "throughput": 6950.08, "total_tokens": 29139744}
{"current_steps": 2780, "total_steps": 3400, "loss": 0.3773, "lr": 8.818956677972406e-06, "epoch": 0.7159412825135205, "percentage": 81.76, "elapsed_time": "1:11:20", "remaining_time": "0:15:54", "throughput": 6819.86, "total_tokens": 29192168}
{"current_steps": 2785, "total_steps": 3400, "loss": 0.3126, "lr": 8.681539753957269e-06, "epoch": 0.717228946690703, "percentage": 81.91, "elapsed_time": "1:12:46", "remaining_time": "0:16:04", "throughput": 6697.07, "total_tokens": 29244896}
{"current_steps": 2790, "total_steps": 3400, "loss": 0.3038, "lr": 8.545100017620988e-06, "epoch": 0.7185166108678857, "percentage": 82.06, "elapsed_time": "1:14:14", "remaining_time": "0:16:13", "throughput": 6577.22, "total_tokens": 29297424}
{"current_steps": 2795, "total_steps": 3400, "loss": 0.308, "lr": 8.409640695783443e-06, "epoch": 0.7198042750450683, "percentage": 82.21, "elapsed_time": "1:15:40", "remaining_time": "0:16:22", "throughput": 6464.36, "total_tokens": 29349664}
{"current_steps": 2800, "total_steps": 3400, "loss": 0.3567, "lr": 8.275164992077556e-06, "epoch": 0.7210919392222508, "percentage": 82.35, "elapsed_time": "1:17:08", "remaining_time": "0:16:31", "throughput": 6353.06, "total_tokens": 29402040}
{"current_steps": 2800, "total_steps": 3400, "eval_loss": 0.4282406270503998, "epoch": 0.7210919392222508, "percentage": 82.35, "elapsed_time": "1:17:46", "remaining_time": "0:16:39", "throughput": 6301.28, "total_tokens": 29402040}
{"current_steps": 2805, "total_steps": 3400, "loss": 0.2538, "lr": 8.141676086873572e-06, "epoch": 0.7223796033994334, "percentage": 82.5, "elapsed_time": "1:19:16", "remaining_time": "0:16:49", "throughput": 6192.3, "total_tokens": 29455456}
{"current_steps": 2810, "total_steps": 3400, "loss": 0.3374, "lr": 8.009177137203794e-06, "epoch": 0.723667267576616, "percentage": 82.65, "elapsed_time": "1:20:42", "remaining_time": "0:16:56", "throughput": 6093.0, "total_tokens": 29507136}
{"current_steps": 2815, "total_steps": 3400, "loss": 0.3303, "lr": 7.877671276687898e-06, "epoch": 0.7249549317537987, "percentage": 82.79, "elapsed_time": "1:22:07", "remaining_time": "0:17:03", "throughput": 5998.8, "total_tokens": 29558760}
{"current_steps": 2820, "total_steps": 3400, "loss": 0.2834, "lr": 7.747161615458902e-06, "epoch": 0.7262425959309812, "percentage": 82.94, "elapsed_time": "1:23:34", "remaining_time": "0:17:11", "throughput": 5905.78, "total_tokens": 29612000}
{"current_steps": 2825, "total_steps": 3400, "loss": 0.2746, "lr": 7.617651240089546e-06, "epoch": 0.7275302601081638, "percentage": 83.09, "elapsed_time": "1:24:58", "remaining_time": "0:17:17", "throughput": 5817.99, "total_tokens": 29664472}
{"current_steps": 2830, "total_steps": 3400, "loss": 0.315, "lr": 7.489143213519301e-06, "epoch": 0.7288179242853464, "percentage": 83.24, "elapsed_time": "1:26:24", "remaining_time": "0:17:24", "throughput": 5731.65, "total_tokens": 29716440}
{"current_steps": 2835, "total_steps": 3400, "loss": 0.2877, "lr": 7.361640574981937e-06, "epoch": 0.7301055884625289, "percentage": 83.38, "elapsed_time": "1:27:49", "remaining_time": "0:17:30", "throughput": 5648.99, "total_tokens": 29769248}
{"current_steps": 2840, "total_steps": 3400, "loss": 0.2953, "lr": 7.2351463399336735e-06, "epoch": 0.7313932526397116, "percentage": 83.53, "elapsed_time": "1:29:14", "remaining_time": "0:17:35", "throughput": 5569.15, "total_tokens": 29821968}
{"current_steps": 2845, "total_steps": 3400, "loss": 0.2709, "lr": 7.109663499981834e-06, "epoch": 0.7326809168168942, "percentage": 83.68, "elapsed_time": "1:30:40", "remaining_time": "0:17:41", "throughput": 5490.9, "total_tokens": 29875104}
{"current_steps": 2850, "total_steps": 3400, "loss": 0.3024, "lr": 6.985195022814067e-06, "epoch": 0.7339685809940767, "percentage": 83.82, "elapsed_time": "1:32:05", "remaining_time": "0:17:46", "throughput": 5415.97, "total_tokens": 29928032}
{"current_steps": 2850, "total_steps": 3400, "eval_loss": 0.47043517231941223, "epoch": 0.7339685809940767, "percentage": 83.82, "elapsed_time": "1:32:43", "remaining_time": "0:17:53", "throughput": 5379.51, "total_tokens": 29928032}
{"current_steps": 2855, "total_steps": 3400, "loss": 0.3425, "lr": 6.861743852128233e-06, "epoch": 0.7352562451712593, "percentage": 83.97, "elapsed_time": "1:34:14", "remaining_time": "0:17:59", "throughput": 5301.85, "total_tokens": 29980608}
{"current_steps": 2860, "total_steps": 3400, "loss": 0.3095, "lr": 6.7393129075627335e-06, "epoch": 0.7365439093484419, "percentage": 84.12, "elapsed_time": "1:35:40", "remaining_time": "0:18:03", "throughput": 5232.21, "total_tokens": 30033680}
{"current_steps": 2865, "total_steps": 3400, "loss": 0.2894, "lr": 6.6179050846274515e-06, "epoch": 0.7378315735256246, "percentage": 84.26, "elapsed_time": "1:37:06", "remaining_time": "0:18:07", "throughput": 5163.75, "total_tokens": 30086016}
{"current_steps": 2870, "total_steps": 3400, "loss": 0.3044, "lr": 6.497523254635296e-06, "epoch": 0.7391192377028071, "percentage": 84.41, "elapsed_time": "1:38:31", "remaining_time": "0:18:11", "throughput": 5098.52, "total_tokens": 30139216}
{"current_steps": 2875, "total_steps": 3400, "loss": 0.3116, "lr": 6.37817026463432e-06, "epoch": 0.7404069018799897, "percentage": 84.56, "elapsed_time": "1:39:58", "remaining_time": "0:18:15", "throughput": 5033.54, "total_tokens": 30191240}
{"current_steps": 2880, "total_steps": 3400, "loss": 0.3229, "lr": 6.25984893734034e-06, "epoch": 0.7416945660571723, "percentage": 84.71, "elapsed_time": "1:41:22", "remaining_time": "0:18:18", "throughput": 4972.02, "total_tokens": 30243680}
{"current_steps": 2885, "total_steps": 3400, "loss": 0.2495, "lr": 6.142562071070179e-06, "epoch": 0.7429822302343548, "percentage": 84.85, "elapsed_time": "1:42:49", "remaining_time": "0:18:21", "throughput": 4911.03, "total_tokens": 30296376}
{"current_steps": 2890, "total_steps": 3400, "loss": 0.3083, "lr": 6.026312439675552e-06, "epoch": 0.7442698944115375, "percentage": 85.0, "elapsed_time": "1:44:14", "remaining_time": "0:18:23", "throughput": 4852.82, "total_tokens": 30349864}
{"current_steps": 2895, "total_steps": 3400, "loss": 0.3252, "lr": 5.911102792477357e-06, "epoch": 0.7455575585887201, "percentage": 85.15, "elapsed_time": "1:45:40", "remaining_time": "0:18:26", "throughput": 4794.9, "total_tokens": 30402248}
{"current_steps": 2900, "total_steps": 3400, "loss": 0.3167, "lr": 5.796935854200763e-06, "epoch": 0.7468452227659027, "percentage": 85.29, "elapsed_time": "1:47:05", "remaining_time": "0:18:27", "throughput": 4739.89, "total_tokens": 30455480}
{"current_steps": 2900, "total_steps": 3400, "eval_loss": 0.46323254704475403, "epoch": 0.7468452227659027, "percentage": 85.29, "elapsed_time": "1:47:43", "remaining_time": "0:18:34", "throughput": 4711.68, "total_tokens": 30455480}
{"current_steps": 2905, "total_steps": 3400, "loss": 0.3063, "lr": 5.683814324910685e-06, "epoch": 0.7481328869430852, "percentage": 85.44, "elapsed_time": "1:49:16", "remaining_time": "0:18:37", "throughput": 4652.91, "total_tokens": 30507096}
{"current_steps": 2910, "total_steps": 3400, "loss": 0.2694, "lr": 5.571740879947979e-06, "epoch": 0.7494205511202678, "percentage": 85.59, "elapsed_time": "1:50:42", "remaining_time": "0:18:38", "throughput": 4600.55, "total_tokens": 30558760}
{"current_steps": 2915, "total_steps": 3400, "loss": 0.2578, "lr": 5.4607181698661634e-06, "epoch": 0.7507082152974505, "percentage": 85.74, "elapsed_time": "1:52:09", "remaining_time": "0:18:39", "throughput": 4549.02, "total_tokens": 30612024}
{"current_steps": 2920, "total_steps": 3400, "loss": 0.3526, "lr": 5.35074882036869e-06, "epoch": 0.751995879474633, "percentage": 85.88, "elapsed_time": "1:53:35", "remaining_time": "0:18:40", "throughput": 4499.42, "total_tokens": 30665272}
{"current_steps": 2925, "total_steps": 3400, "loss": 0.2965, "lr": 5.241835432246889e-06, "epoch": 0.7532835436518156, "percentage": 86.03, "elapsed_time": "1:55:02", "remaining_time": "0:18:40", "throughput": 4450.28, "total_tokens": 30717104}
{"current_steps": 2930, "total_steps": 3400, "loss": 0.3122, "lr": 5.133980581318459e-06, "epoch": 0.7545712078289982, "percentage": 86.18, "elapsed_time": "1:56:28", "remaining_time": "0:18:41", "throughput": 4402.63, "total_tokens": 30769656}
{"current_steps": 2935, "total_steps": 3400, "loss": 0.2968, "lr": 5.027186818366542e-06, "epoch": 0.7558588720061807, "percentage": 86.32, "elapsed_time": "1:57:55", "remaining_time": "0:18:41", "throughput": 4356.08, "total_tokens": 30822016}
{"current_steps": 2940, "total_steps": 3400, "loss": 0.3536, "lr": 4.921456669079366e-06, "epoch": 0.7571465361833634, "percentage": 86.47, "elapsed_time": "1:59:22", "remaining_time": "0:18:40", "throughput": 4310.46, "total_tokens": 30873336}
{"current_steps": 2945, "total_steps": 3400, "loss": 0.2721, "lr": 4.816792633990569e-06, "epoch": 0.758434200360546, "percentage": 86.62, "elapsed_time": "2:00:48", "remaining_time": "0:18:39", "throughput": 4266.31, "total_tokens": 30926104}
{"current_steps": 2950, "total_steps": 3400, "loss": 0.2899, "lr": 4.713197188420026e-06, "epoch": 0.7597218645377286, "percentage": 86.76, "elapsed_time": "2:02:15", "remaining_time": "0:18:39", "throughput": 4222.93, "total_tokens": 30979312}
{"current_steps": 2950, "total_steps": 3400, "eval_loss": 0.4720001518726349, "epoch": 0.7597218645377286, "percentage": 86.76, "elapsed_time": "2:02:54", "remaining_time": "0:18:44", "throughput": 4200.85, "total_tokens": 30979312}
{"current_steps": 2955, "total_steps": 3400, "loss": 0.262, "lr": 4.610672782415276e-06, "epoch": 0.7610095287149111, "percentage": 86.91, "elapsed_time": "2:04:26", "remaining_time": "0:18:44", "throughput": 4156.52, "total_tokens": 31032752}
{"current_steps": 2960, "total_steps": 3400, "loss": 0.3094, "lr": 4.509221840693656e-06, "epoch": 0.7622971928920937, "percentage": 87.06, "elapsed_time": "2:05:53", "remaining_time": "0:18:42", "throughput": 4115.18, "total_tokens": 31085208}
{"current_steps": 2965, "total_steps": 3400, "loss": 0.2995, "lr": 4.408846762584901e-06, "epoch": 0.7635848570692764, "percentage": 87.21, "elapsed_time": "2:07:19", "remaining_time": "0:18:40", "throughput": 4075.78, "total_tokens": 31137584}
{"current_steps": 2970, "total_steps": 3400, "loss": 0.312, "lr": 4.309549921974421e-06, "epoch": 0.7648725212464589, "percentage": 87.35, "elapsed_time": "2:08:47", "remaining_time": "0:18:38", "throughput": 4036.28, "total_tokens": 31190160}
{"current_steps": 2975, "total_steps": 3400, "loss": 0.2961, "lr": 4.2113336672471245e-06, "epoch": 0.7661601854236415, "percentage": 87.5, "elapsed_time": "2:10:13", "remaining_time": "0:18:36", "throughput": 3998.45, "total_tokens": 31242024}
{"current_steps": 2980, "total_steps": 3400, "loss": 0.3188, "lr": 4.114200321231937e-06, "epoch": 0.7674478496008241, "percentage": 87.65, "elapsed_time": "2:11:41", "remaining_time": "0:18:33", "throughput": 3960.75, "total_tokens": 31294272}
{"current_steps": 2985, "total_steps": 3400, "loss": 0.2721, "lr": 4.018152181146823e-06, "epoch": 0.7687355137780066, "percentage": 87.79, "elapsed_time": "2:13:07", "remaining_time": "0:18:30", "throughput": 3924.67, "total_tokens": 31347128}
{"current_steps": 2990, "total_steps": 3400, "loss": 0.2993, "lr": 3.923191518544434e-06, "epoch": 0.7700231779551893, "percentage": 87.94, "elapsed_time": "2:14:34", "remaining_time": "0:18:27", "throughput": 3888.6, "total_tokens": 31399576}
{"current_steps": 2995, "total_steps": 3400, "loss": 0.3327, "lr": 3.829320579258466e-06, "epoch": 0.7713108421323719, "percentage": 88.09, "elapsed_time": "2:15:59", "remaining_time": "0:18:23", "throughput": 3854.49, "total_tokens": 31451704}
{"current_steps": 3000, "total_steps": 3400, "loss": 0.3522, "lr": 3.7365415833504725e-06, "epoch": 0.7725985063095545, "percentage": 88.24, "elapsed_time": "2:17:26", "remaining_time": "0:18:19", "throughput": 3820.26, "total_tokens": 31503344}
{"current_steps": 3000, "total_steps": 3400, "eval_loss": 0.4726044833660126, "epoch": 0.7725985063095545, "percentage": 88.24, "elapsed_time": "2:18:04", "remaining_time": "0:18:24", "throughput": 3802.82, "total_tokens": 31503344}
{"current_steps": 3005, "total_steps": 3400, "loss": 0.3076, "lr": 3.644856725057405e-06, "epoch": 0.773886170486737, "percentage": 88.38, "elapsed_time": "2:19:34", "remaining_time": "0:18:20", "throughput": 3768.05, "total_tokens": 31555896}
{"current_steps": 3010, "total_steps": 3400, "loss": 0.2934, "lr": 3.554268172739661e-06, "epoch": 0.7751738346639196, "percentage": 88.53, "elapsed_time": "2:21:00", "remaining_time": "0:18:16", "throughput": 3735.81, "total_tokens": 31608208}
{"current_steps": 3015, "total_steps": 3400, "loss": 0.3333, "lr": 3.4647780688298826e-06, "epoch": 0.7764614988411023, "percentage": 88.68, "elapsed_time": "2:22:26", "remaining_time": "0:18:11", "throughput": 3704.6, "total_tokens": 31659576}
{"current_steps": 3020, "total_steps": 3400, "loss": 0.2666, "lr": 3.376388529782215e-06, "epoch": 0.7777491630182848, "percentage": 88.82, "elapsed_time": "2:23:51", "remaining_time": "0:18:06", "throughput": 3673.99, "total_tokens": 31712176}
{"current_steps": 3025, "total_steps": 3400, "loss": 0.2454, "lr": 3.2891016460222967e-06, "epoch": 0.7790368271954674, "percentage": 88.97, "elapsed_time": "2:25:17", "remaining_time": "0:18:00", "throughput": 3644.09, "total_tokens": 31765672}
{"current_steps": 3030, "total_steps": 3400, "loss": 0.3242, "lr": 3.2029194818977983e-06, "epoch": 0.78032449137265, "percentage": 89.12, "elapsed_time": "2:26:42", "remaining_time": "0:17:54", "throughput": 3614.68, "total_tokens": 31818456}
{"current_steps": 3035, "total_steps": 3400, "loss": 0.3378, "lr": 3.117844075629617e-06, "epoch": 0.7816121555498327, "percentage": 89.26, "elapsed_time": "2:28:08", "remaining_time": "0:17:48", "throughput": 3585.87, "total_tokens": 31871648}
{"current_steps": 3040, "total_steps": 3400, "loss": 0.2981, "lr": 3.033877439263666e-06, "epoch": 0.7828998197270152, "percentage": 89.41, "elapsed_time": "2:29:33", "remaining_time": "0:17:42", "throughput": 3557.62, "total_tokens": 31924688}
{"current_steps": 3045, "total_steps": 3400, "loss": 0.2909, "lr": 2.951021558623274e-06, "epoch": 0.7841874839041978, "percentage": 89.56, "elapsed_time": "2:30:59", "remaining_time": "0:17:36", "throughput": 3529.88, "total_tokens": 31977752}
{"current_steps": 3050, "total_steps": 3400, "loss": 0.3137, "lr": 2.869278393262226e-06, "epoch": 0.7854751480813804, "percentage": 89.71, "elapsed_time": "2:32:23", "remaining_time": "0:17:29", "throughput": 3502.95, "total_tokens": 32030016}
{"current_steps": 3050, "total_steps": 3400, "eval_loss": 0.4746885299682617, "epoch": 0.7854751480813804, "percentage": 89.71, "elapsed_time": "2:33:01", "remaining_time": "0:17:33", "throughput": 3488.43, "total_tokens": 32030016}
{"current_steps": 3055, "total_steps": 3400, "loss": 0.3247, "lr": 2.7886498764184588e-06, "epoch": 0.7867628122585629, "percentage": 89.85, "elapsed_time": "2:34:33", "remaining_time": "0:17:27", "throughput": 3459.54, "total_tokens": 32082256}
{"current_steps": 3060, "total_steps": 3400, "loss": 0.2895, "lr": 2.7091379149682685e-06, "epoch": 0.7880504764357456, "percentage": 90.0, "elapsed_time": "2:35:58", "remaining_time": "0:17:19", "throughput": 3433.78, "total_tokens": 32134592}
{"current_steps": 3065, "total_steps": 3400, "loss": 0.294, "lr": 2.6307443893812843e-06, "epoch": 0.7893381406129282, "percentage": 90.15, "elapsed_time": "2:37:24", "remaining_time": "0:17:12", "throughput": 3407.95, "total_tokens": 32187064}
{"current_steps": 3070, "total_steps": 3400, "loss": 0.3205, "lr": 2.5534711536759404e-06, "epoch": 0.7906258047901107, "percentage": 90.29, "elapsed_time": "2:38:49", "remaining_time": "0:17:04", "throughput": 3383.13, "total_tokens": 32238944}
{"current_steps": 3075, "total_steps": 3400, "loss": 0.2726, "lr": 2.4773200353756798e-06, "epoch": 0.7919134689672933, "percentage": 90.44, "elapsed_time": "2:40:15", "remaining_time": "0:16:56", "throughput": 3358.2, "total_tokens": 32291528}
{"current_steps": 3080, "total_steps": 3400, "loss": 0.3012, "lr": 2.4022928354656473e-06, "epoch": 0.7932011331444759, "percentage": 90.59, "elapsed_time": "2:41:40", "remaining_time": "0:16:47", "throughput": 3334.27, "total_tokens": 32343600}
{"current_steps": 3085, "total_steps": 3400, "loss": 0.2712, "lr": 2.3283913283502044e-06, "epoch": 0.7944887973216586, "percentage": 90.74, "elapsed_time": "2:43:06", "remaining_time": "0:16:39", "throughput": 3310.24, "total_tokens": 32396128}
{"current_steps": 3090, "total_steps": 3400, "loss": 0.3342, "lr": 2.2556172618108997e-06, "epoch": 0.7957764614988411, "percentage": 90.88, "elapsed_time": "2:44:31", "remaining_time": "0:16:30", "throughput": 3287.24, "total_tokens": 32448624}
{"current_steps": 3095, "total_steps": 3400, "loss": 0.3132, "lr": 2.183972356965125e-06, "epoch": 0.7970641256760237, "percentage": 91.03, "elapsed_time": "2:45:57", "remaining_time": "0:16:21", "throughput": 3264.09, "total_tokens": 32500664}
{"current_steps": 3100, "total_steps": 3400, "loss": 0.2856, "lr": 2.113458308225458e-06, "epoch": 0.7983517898532063, "percentage": 91.18, "elapsed_time": "2:47:21", "remaining_time": "0:16:11", "throughput": 3241.74, "total_tokens": 32553288}
{"current_steps": 3100, "total_steps": 3400, "eval_loss": 0.4740166962146759, "epoch": 0.7983517898532063, "percentage": 91.18, "elapsed_time": "2:47:59", "remaining_time": "0:16:15", "throughput": 3229.66, "total_tokens": 32553288}
{"current_steps": 3105, "total_steps": 3400, "loss": 0.3052, "lr": 2.0440767832595574e-06, "epoch": 0.7996394540303888, "percentage": 91.32, "elapsed_time": "2:49:30", "remaining_time": "0:16:06", "throughput": 3205.89, "total_tokens": 32606096}
{"current_steps": 3110, "total_steps": 3400, "loss": 0.2125, "lr": 1.975829422950709e-06, "epoch": 0.8009271182075715, "percentage": 91.47, "elapsed_time": "2:50:55", "remaining_time": "0:15:56", "throughput": 3184.46, "total_tokens": 32659376}
{"current_steps": 3115, "total_steps": 3400, "loss": 0.3122, "lr": 1.908717841359048e-06, "epoch": 0.8022147823847541, "percentage": 91.62, "elapsed_time": "2:52:21", "remaining_time": "0:15:46", "throughput": 3163.29, "total_tokens": 32712168}
{"current_steps": 3120, "total_steps": 3400, "loss": 0.3006, "lr": 1.8427436256833852e-06, "epoch": 0.8035024465619367, "percentage": 91.76, "elapsed_time": "2:53:46", "remaining_time": "0:15:35", "throughput": 3142.38, "total_tokens": 32764296}
{"current_steps": 3125, "total_steps": 3400, "loss": 0.3077, "lr": 1.7779083362236547e-06, "epoch": 0.8047901107391192, "percentage": 91.91, "elapsed_time": "2:55:11", "remaining_time": "0:15:25", "throughput": 3121.71, "total_tokens": 32815296}
{"current_steps": 3130, "total_steps": 3400, "loss": 0.29, "lr": 1.7142135063440035e-06, "epoch": 0.8060777749163018, "percentage": 92.06, "elapsed_time": "2:56:37", "remaining_time": "0:15:14", "throughput": 3101.47, "total_tokens": 32867288}
{"current_steps": 3135, "total_steps": 3400, "loss": 0.3574, "lr": 1.6516606424365643e-06, "epoch": 0.8073654390934845, "percentage": 92.21, "elapsed_time": "2:58:02", "remaining_time": "0:15:02", "throughput": 3081.68, "total_tokens": 32919584}
{"current_steps": 3140, "total_steps": 3400, "loss": 0.2414, "lr": 1.5902512238857858e-06, "epoch": 0.808653103270667, "percentage": 92.35, "elapsed_time": "2:59:28", "remaining_time": "0:14:51", "throughput": 3062.07, "total_tokens": 32972736}
{"current_steps": 3145, "total_steps": 3400, "loss": 0.2521, "lr": 1.5299867030334814e-06, "epoch": 0.8099407674478496, "percentage": 92.5, "elapsed_time": "3:00:53", "remaining_time": "0:14:39", "throughput": 3043.05, "total_tokens": 33026320}
{"current_steps": 3150, "total_steps": 3400, "loss": 0.2669, "lr": 1.4708685051444515e-06, "epoch": 0.8112284316250322, "percentage": 92.65, "elapsed_time": "3:02:18", "remaining_time": "0:14:28", "throughput": 3023.98, "total_tokens": 33078960}
{"current_steps": 3150, "total_steps": 3400, "eval_loss": 0.4687062203884125, "epoch": 0.8112284316250322, "percentage": 92.65, "elapsed_time": "3:02:56", "remaining_time": "0:14:31", "throughput": 3013.66, "total_tokens": 33078960}
{"current_steps": 3155, "total_steps": 3400, "loss": 0.2622, "lr": 1.4128980283727943e-06, "epoch": 0.8125160958022147, "percentage": 92.79, "elapsed_time": "3:04:27", "remaining_time": "0:14:19", "throughput": 2993.58, "total_tokens": 33131352}
{"current_steps": 3160, "total_steps": 3400, "loss": 0.2776, "lr": 1.356076643728843e-06, "epoch": 0.8138037599793974, "percentage": 92.94, "elapsed_time": "3:05:54", "remaining_time": "0:14:07", "throughput": 2974.74, "total_tokens": 33183032}
{"current_steps": 3165, "total_steps": 3400, "loss": 0.247, "lr": 1.3004056950467135e-06, "epoch": 0.81509142415658, "percentage": 93.09, "elapsed_time": "3:07:21", "remaining_time": "0:13:54", "throughput": 2956.6, "total_tokens": 33235992}
{"current_steps": 3170, "total_steps": 3400, "loss": 0.2917, "lr": 1.2458864989525698e-06, "epoch": 0.8163790883337626, "percentage": 93.24, "elapsed_time": "3:08:48", "remaining_time": "0:13:41", "throughput": 2938.55, "total_tokens": 33288696}
{"current_steps": 3175, "total_steps": 3400, "loss": 0.257, "lr": 1.19252034483342e-06, "epoch": 0.8176667525109451, "percentage": 93.38, "elapsed_time": "3:10:13", "remaining_time": "0:13:28", "throughput": 2921.35, "total_tokens": 33341472}
{"current_steps": 3180, "total_steps": 3400, "loss": 0.2836, "lr": 1.1403084948067021e-06, "epoch": 0.8189544166881277, "percentage": 93.53, "elapsed_time": "3:11:39", "remaining_time": "0:13:15", "throughput": 2904.15, "total_tokens": 33394856}
{"current_steps": 3185, "total_steps": 3400, "loss": 0.3201, "lr": 1.089252183690348e-06, "epoch": 0.8202420808653104, "percentage": 93.68, "elapsed_time": "3:13:04", "remaining_time": "0:13:01", "throughput": 2887.33, "total_tokens": 33447208}
{"current_steps": 3190, "total_steps": 3400, "loss": 0.2751, "lr": 1.0393526189736602e-06, "epoch": 0.8215297450424929, "percentage": 93.82, "elapsed_time": "3:14:30", "remaining_time": "0:12:48", "throughput": 2870.61, "total_tokens": 33500288}
{"current_steps": 3195, "total_steps": 3400, "loss": 0.3231, "lr": 9.906109807887032e-07, "epoch": 0.8228174092196755, "percentage": 93.97, "elapsed_time": "3:15:55", "remaining_time": "0:12:34", "throughput": 2854.25, "total_tokens": 33552400}
{"current_steps": 3200, "total_steps": 3400, "loss": 0.3322, "lr": 9.430284218824026e-07, "epoch": 0.8241050733968581, "percentage": 94.12, "elapsed_time": "3:17:21", "remaining_time": "0:12:20", "throughput": 2837.93, "total_tokens": 33604328}
{"current_steps": 3200, "total_steps": 3400, "eval_loss": 0.47025421261787415, "epoch": 0.8241050733968581, "percentage": 94.12, "elapsed_time": "3:17:58", "remaining_time": "0:12:22", "throughput": 2828.96, "total_tokens": 33604328}
{"current_steps": 3205, "total_steps": 3400, "loss": 0.2841, "lr": 8.966060675892951e-07, "epoch": 0.8253927375740406, "percentage": 94.26, "elapsed_time": "3:19:28", "remaining_time": "0:12:08", "throughput": 2812.01, "total_tokens": 33656768}
{"current_steps": 3210, "total_steps": 3400, "loss": 0.3064, "lr": 8.513450158049108e-07, "epoch": 0.8266804017512233, "percentage": 94.41, "elapsed_time": "3:20:54", "remaining_time": "0:11:53", "throughput": 2796.49, "total_tokens": 33709960}
{"current_steps": 3215, "total_steps": 3400, "loss": 0.3126, "lr": 8.072463369597993e-07, "epoch": 0.8279680659284059, "percentage": 94.56, "elapsed_time": "3:22:19", "remaining_time": "0:11:38", "throughput": 2781.09, "total_tokens": 33762336}
{"current_steps": 3220, "total_steps": 3400, "loss": 0.2758, "lr": 7.643110739942172e-07, "epoch": 0.8292557301055885, "percentage": 94.71, "elapsed_time": "3:23:45", "remaining_time": "0:11:23", "throughput": 2765.89, "total_tokens": 33814544}
{"current_steps": 3225, "total_steps": 3400, "loss": 0.3154, "lr": 7.225402423334693e-07, "epoch": 0.830543394282771, "percentage": 94.85, "elapsed_time": "3:25:11", "remaining_time": "0:11:08", "throughput": 2750.96, "total_tokens": 33867184}
{"current_steps": 3230, "total_steps": 3400, "loss": 0.2894, "lr": 6.819348298638839e-07, "epoch": 0.8318310584599536, "percentage": 95.0, "elapsed_time": "3:26:36", "remaining_time": "0:10:52", "throughput": 2736.37, "total_tokens": 33920120}
{"current_steps": 3235, "total_steps": 3400, "loss": 0.2521, "lr": 6.424957969094536e-07, "epoch": 0.8331187226371363, "percentage": 95.15, "elapsed_time": "3:28:02", "remaining_time": "0:10:36", "throughput": 2721.63, "total_tokens": 33971928}
{"current_steps": 3240, "total_steps": 3400, "loss": 0.3532, "lr": 6.0422407620912e-07, "epoch": 0.8344063868143188, "percentage": 95.29, "elapsed_time": "3:29:27", "remaining_time": "0:10:20", "throughput": 2707.38, "total_tokens": 34024272}
{"current_steps": 3245, "total_steps": 3400, "loss": 0.2519, "lr": 5.671205728947305e-07, "epoch": 0.8356940509915014, "percentage": 95.44, "elapsed_time": "3:30:53", "remaining_time": "0:10:04", "throughput": 2693.25, "total_tokens": 34077920}
{"current_steps": 3250, "total_steps": 3400, "loss": 0.2836, "lr": 5.311861644696048e-07, "epoch": 0.836981715168684, "percentage": 95.59, "elapsed_time": "3:32:17", "remaining_time": "0:09:47", "throughput": 2679.43, "total_tokens": 34129832}
{"current_steps": 3250, "total_steps": 3400, "eval_loss": 0.46573224663734436, "epoch": 0.836981715168684, "percentage": 95.59, "elapsed_time": "3:32:55", "remaining_time": "0:09:49", "throughput": 2671.58, "total_tokens": 34129832}
{"current_steps": 3255, "total_steps": 3400, "loss": 0.3243, "lr": 4.964217007878081e-07, "epoch": 0.8382693793458666, "percentage": 95.74, "elapsed_time": "3:34:26", "remaining_time": "0:09:33", "throughput": 2656.65, "total_tokens": 34182360}
{"current_steps": 3260, "total_steps": 3400, "loss": 0.295, "lr": 4.6282800403402715e-07, "epoch": 0.8395570435230492, "percentage": 95.88, "elapsed_time": "3:35:51", "remaining_time": "0:09:16", "throughput": 2643.19, "total_tokens": 34234176}
{"current_steps": 3265, "total_steps": 3400, "loss": 0.3189, "lr": 4.3040586870415346e-07, "epoch": 0.8408447077002318, "percentage": 96.03, "elapsed_time": "3:37:18", "remaining_time": "0:08:59", "throughput": 2629.71, "total_tokens": 34287472}
{"current_steps": 3270, "total_steps": 3400, "loss": 0.2927, "lr": 3.991560615864587e-07, "epoch": 0.8421323718774144, "percentage": 96.18, "elapsed_time": "3:38:43", "remaining_time": "0:08:41", "throughput": 2616.63, "total_tokens": 34339496}
{"current_steps": 3275, "total_steps": 3400, "loss": 0.299, "lr": 3.6907932174349846e-07, "epoch": 0.8434200360545969, "percentage": 96.32, "elapsed_time": "3:40:10", "remaining_time": "0:08:24", "throughput": 2603.45, "total_tokens": 34391688}
{"current_steps": 3280, "total_steps": 3400, "loss": 0.3218, "lr": 3.40176360494604e-07, "epoch": 0.8447077002317795, "percentage": 96.47, "elapsed_time": "3:41:34", "remaining_time": "0:08:06", "throughput": 2590.8, "total_tokens": 34443720}
{"current_steps": 3285, "total_steps": 3400, "loss": 0.3295, "lr": 3.124478613990733e-07, "epoch": 0.8459953644089622, "percentage": 96.62, "elapsed_time": "3:43:01", "remaining_time": "0:07:48", "throughput": 2577.92, "total_tokens": 34495512}
{"current_steps": 3290, "total_steps": 3400, "loss": 0.2889, "lr": 2.8589448023998987e-07, "epoch": 0.8472830285861447, "percentage": 96.76, "elapsed_time": "3:44:27", "remaining_time": "0:07:30", "throughput": 2565.24, "total_tokens": 34547936}
{"current_steps": 3295, "total_steps": 3400, "loss": 0.2697, "lr": 2.605168450087514e-07, "epoch": 0.8485706927633273, "percentage": 96.91, "elapsed_time": "3:45:54", "remaining_time": "0:07:11", "throughput": 2552.74, "total_tokens": 34601320}
{"current_steps": 3300, "total_steps": 3400, "loss": 0.3135, "lr": 2.363155558901542e-07, "epoch": 0.8498583569405099, "percentage": 97.06, "elapsed_time": "3:47:21", "remaining_time": "0:06:53", "throughput": 2540.34, "total_tokens": 34654480}
{"current_steps": 3300, "total_steps": 3400, "eval_loss": 0.4714098274707794, "epoch": 0.8498583569405099, "percentage": 97.06, "elapsed_time": "3:47:59", "remaining_time": "0:06:54", "throughput": 2533.36, "total_tokens": 34654480}
{"current_steps": 3305, "total_steps": 3400, "loss": 0.3195, "lr": 2.1329118524827662e-07, "epoch": 0.8511460211176926, "percentage": 97.21, "elapsed_time": "3:49:32", "remaining_time": "0:06:35", "throughput": 2519.96, "total_tokens": 34706600}
{"current_steps": 3310, "total_steps": 3400, "loss": 0.2817, "lr": 1.9144427761286222e-07, "epoch": 0.8524336852948751, "percentage": 97.35, "elapsed_time": "3:50:59", "remaining_time": "0:06:16", "throughput": 2507.93, "total_tokens": 34759528}
{"current_steps": 3315, "total_steps": 3400, "loss": 0.3131, "lr": 1.7077534966650766e-07, "epoch": 0.8537213494720577, "percentage": 97.5, "elapsed_time": "3:52:26", "remaining_time": "0:05:57", "throughput": 2496.05, "total_tokens": 34811832}
{"current_steps": 3320, "total_steps": 3400, "loss": 0.2926, "lr": 1.51284890232406e-07, "epoch": 0.8550090136492403, "percentage": 97.65, "elapsed_time": "3:53:53", "remaining_time": "0:05:38", "throughput": 2484.34, "total_tokens": 34864696}
{"current_steps": 3325, "total_steps": 3400, "loss": 0.2606, "lr": 1.3297336026280027e-07, "epoch": 0.8562966778264228, "percentage": 97.79, "elapsed_time": "3:55:20", "remaining_time": "0:05:18", "throughput": 2472.84, "total_tokens": 34917584}
{"current_steps": 3330, "total_steps": 3400, "loss": 0.3203, "lr": 1.158411928280645e-07, "epoch": 0.8575843420036054, "percentage": 97.94, "elapsed_time": "3:56:47", "remaining_time": "0:04:58", "throughput": 2461.38, "total_tokens": 34969720}
{"current_steps": 3335, "total_steps": 3400, "loss": 0.3211, "lr": 9.988879310649513e-08, "epoch": 0.8588720061807881, "percentage": 98.09, "elapsed_time": "3:58:13", "remaining_time": "0:04:38", "throughput": 2450.16, "total_tokens": 35021296}
{"current_steps": 3340, "total_steps": 3400, "loss": 0.2923, "lr": 8.511653837470212e-08, "epoch": 0.8601596703579707, "percentage": 98.24, "elapsed_time": "3:59:41", "remaining_time": "0:04:18", "throughput": 2438.82, "total_tokens": 35073120}
{"current_steps": 3345, "total_steps": 3400, "loss": 0.289, "lr": 7.152477799867719e-08, "epoch": 0.8614473345351532, "percentage": 98.38, "elapsed_time": "4:01:07", "remaining_time": "0:03:57", "throughput": 2427.92, "total_tokens": 35126296}
{"current_steps": 3350, "total_steps": 3400, "loss": 0.3253, "lr": 5.911383342556143e-08, "epoch": 0.8627349987123358, "percentage": 98.53, "elapsed_time": "4:02:35", "remaining_time": "0:03:37", "throughput": 2416.91, "total_tokens": 35179104}
{"current_steps": 3350, "total_steps": 3400, "eval_loss": 0.4714648127555847, "epoch": 0.8627349987123358, "percentage": 98.53, "elapsed_time": "4:03:13", "remaining_time": "0:03:37", "throughput": 2410.68, "total_tokens": 35179104}
{"current_steps": 3355, "total_steps": 3400, "loss": 0.3179, "lr": 4.788399817602929e-08, "epoch": 0.8640226628895185, "percentage": 98.68, "elapsed_time": "4:04:45", "remaining_time": "0:03:16", "throughput": 2399.06, "total_tokens": 35231608}
{"current_steps": 3360, "total_steps": 3400, "loss": 0.2829, "lr": 3.7835537837338506e-08, "epoch": 0.865310327066701, "percentage": 98.82, "elapsed_time": "4:06:13", "remaining_time": "0:02:55", "throughput": 2388.31, "total_tokens": 35284448}
{"current_steps": 3365, "total_steps": 3400, "loss": 0.2579, "lr": 2.8968690057051828e-08, "epoch": 0.8665979912438836, "percentage": 98.97, "elapsed_time": "4:07:40", "remaining_time": "0:02:34", "throughput": 2377.94, "total_tokens": 35336520}
{"current_steps": 3370, "total_steps": 3400, "loss": 0.2862, "lr": 2.128366453743591e-08, "epoch": 0.8678856554210662, "percentage": 99.12, "elapsed_time": "4:09:07", "remaining_time": "0:02:13", "throughput": 2367.46, "total_tokens": 35388728}
{"current_steps": 3375, "total_steps": 3400, "loss": 0.2812, "lr": 1.4780643030476438e-08, "epoch": 0.8691733195982487, "percentage": 99.26, "elapsed_time": "4:10:34", "remaining_time": "0:01:51", "throughput": 2357.36, "total_tokens": 35441824}
{"current_steps": 3380, "total_steps": 3400, "loss": 0.3174, "lr": 9.459779333587104e-09, "epoch": 0.8704609837754314, "percentage": 99.41, "elapsed_time": "4:12:02", "remaining_time": "0:01:29", "throughput": 2347.22, "total_tokens": 35495128}
{"current_steps": 3385, "total_steps": 3400, "loss": 0.3049, "lr": 5.3211992859791835e-09, "epoch": 0.871748647952614, "percentage": 99.56, "elapsed_time": "4:13:28", "remaining_time": "0:01:07", "throughput": 2337.34, "total_tokens": 35548144}
{"current_steps": 3390, "total_steps": 3400, "loss": 0.2882, "lr": 2.3650007656805806e-09, "epoch": 0.8730363121297966, "percentage": 99.71, "elapsed_time": "4:14:56", "remaining_time": "0:00:45", "throughput": 2327.46, "total_tokens": 35600936}
{"current_steps": 3395, "total_steps": 3400, "loss": 0.2789, "lr": 5.912536872321184e-10, "epoch": 0.8743239763069791, "percentage": 99.85, "elapsed_time": "4:16:23", "remaining_time": "0:00:22", "throughput": 2317.73, "total_tokens": 35653896}
{"current_steps": 3400, "total_steps": 3400, "loss": 0.3187, "lr": 0.0, "epoch": 0.8756116404841617, "percentage": 100.0, "elapsed_time": "4:17:50", "remaining_time": "0:00:00", "throughput": 2308.08, "total_tokens": 35706848}
{"current_steps": 3400, "total_steps": 3400, "eval_loss": 0.4701705873012543, "epoch": 0.8756116404841617, "percentage": 100.0, "elapsed_time": "4:18:27", "remaining_time": "0:00:00", "throughput": 2302.5, "total_tokens": 35706848}
{"current_steps": 3400, "total_steps": 3400, "epoch": 0.8756116404841617, "percentage": 100.0, "elapsed_time": "4:18:33", "remaining_time": "0:00:00", "throughput": 2301.72, "total_tokens": 35706848}