ben81828's picture
Training in progress, step 1100
1898f4a verified
raw
history blame
59.4 kB
{"current_steps": 5, "total_steps": 3400, "loss": 3.0444, "lr": 2.9411764705882355e-06, "epoch": 0.0025753283543651817, "percentage": 0.15, "elapsed_time": "0:02:03", "remaining_time": "23:19:52", "throughput": 472.88, "total_tokens": 58496}
{"current_steps": 10, "total_steps": 3400, "loss": 2.9824, "lr": 5.882352941176471e-06, "epoch": 0.0051506567087303634, "percentage": 0.29, "elapsed_time": "0:03:11", "remaining_time": "18:03:18", "throughput": 610.01, "total_tokens": 116960}
{"current_steps": 15, "total_steps": 3400, "loss": 2.8371, "lr": 8.823529411764707e-06, "epoch": 0.007725985063095545, "percentage": 0.44, "elapsed_time": "0:04:20", "remaining_time": "16:19:06", "throughput": 673.96, "total_tokens": 175448}
{"current_steps": 20, "total_steps": 3400, "loss": 2.5198, "lr": 1.1764705882352942e-05, "epoch": 0.010301313417460727, "percentage": 0.59, "elapsed_time": "0:05:28", "remaining_time": "15:24:32", "throughput": 712.72, "total_tokens": 233944}
{"current_steps": 5, "total_steps": 3400, "loss": 3.0444, "lr": 2.9411764705882355e-06, "epoch": 0.0025753283543651817, "percentage": 0.15, "elapsed_time": "0:02:03", "remaining_time": "23:21:43", "throughput": 472.26, "total_tokens": 58496}
{"current_steps": 10, "total_steps": 3400, "loss": 2.9824, "lr": 5.882352941176471e-06, "epoch": 0.0051506567087303634, "percentage": 0.29, "elapsed_time": "0:03:12", "remaining_time": "18:05:24", "throughput": 608.83, "total_tokens": 116960}
{"current_steps": 15, "total_steps": 3400, "loss": 2.8371, "lr": 8.823529411764707e-06, "epoch": 0.007725985063095545, "percentage": 0.44, "elapsed_time": "0:04:20", "remaining_time": "16:19:01", "throughput": 674.02, "total_tokens": 175448}
{"current_steps": 20, "total_steps": 3400, "loss": 2.5198, "lr": 1.1764705882352942e-05, "epoch": 0.010301313417460727, "percentage": 0.59, "elapsed_time": "0:05:28", "remaining_time": "15:25:09", "throughput": 712.24, "total_tokens": 233944}
{"current_steps": 25, "total_steps": 3400, "loss": 1.772, "lr": 1.4705882352941177e-05, "epoch": 0.012876641771825908, "percentage": 0.74, "elapsed_time": "0:06:35", "remaining_time": "14:50:39", "throughput": 738.71, "total_tokens": 292416}
{"current_steps": 30, "total_steps": 3400, "loss": 1.2263, "lr": 1.7647058823529414e-05, "epoch": 0.01545197012619109, "percentage": 0.88, "elapsed_time": "0:07:43", "remaining_time": "14:27:35", "throughput": 757.24, "total_tokens": 350904}
{"current_steps": 35, "total_steps": 3400, "loss": 1.0102, "lr": 2.058823529411765e-05, "epoch": 0.018027298480556272, "percentage": 1.03, "elapsed_time": "0:08:51", "remaining_time": "14:11:22", "throughput": 770.5, "total_tokens": 409384}
{"current_steps": 40, "total_steps": 3400, "loss": 0.9378, "lr": 2.3529411764705884e-05, "epoch": 0.020602626834921454, "percentage": 1.18, "elapsed_time": "0:09:59", "remaining_time": "13:59:28", "throughput": 780.26, "total_tokens": 467864}
{"current_steps": 45, "total_steps": 3400, "loss": 0.9265, "lr": 2.647058823529412e-05, "epoch": 0.023177955189286635, "percentage": 1.32, "elapsed_time": "0:11:07", "remaining_time": "13:49:36", "throughput": 788.43, "total_tokens": 526384}
{"current_steps": 50, "total_steps": 3400, "loss": 0.9157, "lr": 2.9411764705882354e-05, "epoch": 0.025753283543651816, "percentage": 1.47, "elapsed_time": "0:12:15", "remaining_time": "13:41:29", "throughput": 795.0, "total_tokens": 584856}
{"current_steps": 50, "total_steps": 3400, "eval_loss": 0.9191630482673645, "epoch": 0.025753283543651816, "percentage": 1.47, "elapsed_time": "0:12:52", "remaining_time": "14:22:23", "throughput": 757.31, "total_tokens": 584856}
{"current_steps": 55, "total_steps": 3400, "loss": 0.9009, "lr": 3.235294117647059e-05, "epoch": 0.028328611898016998, "percentage": 1.62, "elapsed_time": "0:14:08", "remaining_time": "14:19:34", "throughput": 758.66, "total_tokens": 643344}
{"current_steps": 60, "total_steps": 3400, "loss": 0.9063, "lr": 3.529411764705883e-05, "epoch": 0.03090394025238218, "percentage": 1.76, "elapsed_time": "0:15:15", "remaining_time": "14:09:45", "throughput": 766.24, "total_tokens": 701808}
{"current_steps": 65, "total_steps": 3400, "loss": 0.9031, "lr": 3.8235294117647055e-05, "epoch": 0.03347926860674736, "percentage": 1.91, "elapsed_time": "0:16:23", "remaining_time": "14:01:11", "throughput": 772.9, "total_tokens": 760304}
{"current_steps": 70, "total_steps": 3400, "loss": 0.8991, "lr": 4.11764705882353e-05, "epoch": 0.036054596961112545, "percentage": 2.06, "elapsed_time": "0:17:31", "remaining_time": "13:53:40", "throughput": 778.67, "total_tokens": 818760}
{"current_steps": 75, "total_steps": 3400, "loss": 0.9055, "lr": 4.411764705882353e-05, "epoch": 0.03862992531547772, "percentage": 2.21, "elapsed_time": "0:18:38", "remaining_time": "13:46:47", "throughput": 783.98, "total_tokens": 877256}
{"current_steps": 80, "total_steps": 3400, "loss": 0.9092, "lr": 4.705882352941177e-05, "epoch": 0.04120525366984291, "percentage": 2.35, "elapsed_time": "0:19:46", "remaining_time": "13:40:53", "throughput": 788.45, "total_tokens": 935752}
{"current_steps": 85, "total_steps": 3400, "loss": 0.9069, "lr": 5e-05, "epoch": 0.043780582024208085, "percentage": 2.5, "elapsed_time": "0:20:53", "remaining_time": "13:35:01", "throughput": 792.91, "total_tokens": 994216}
{"current_steps": 90, "total_steps": 3400, "loss": 0.8924, "lr": 5.294117647058824e-05, "epoch": 0.04635591037857327, "percentage": 2.65, "elapsed_time": "0:22:02", "remaining_time": "13:30:22", "throughput": 796.26, "total_tokens": 1052704}
{"current_steps": 95, "total_steps": 3400, "loss": 0.9059, "lr": 5.588235294117647e-05, "epoch": 0.04893123873293845, "percentage": 2.79, "elapsed_time": "0:23:10", "remaining_time": "13:26:06", "throughput": 799.26, "total_tokens": 1111176}
{"current_steps": 100, "total_steps": 3400, "loss": 0.901, "lr": 5.882352941176471e-05, "epoch": 0.05150656708730363, "percentage": 2.94, "elapsed_time": "0:24:20", "remaining_time": "13:23:15", "throughput": 800.88, "total_tokens": 1169664}
{"current_steps": 100, "total_steps": 3400, "eval_loss": 0.9077914953231812, "epoch": 0.05150656708730363, "percentage": 2.94, "elapsed_time": "0:24:37", "remaining_time": "13:32:32", "throughput": 791.73, "total_tokens": 1169664}
{"current_steps": 105, "total_steps": 3400, "loss": 0.9159, "lr": 6.176470588235295e-05, "epoch": 0.05408189544166881, "percentage": 3.09, "elapsed_time": "0:25:51", "remaining_time": "13:31:17", "throughput": 791.74, "total_tokens": 1228112}
{"current_steps": 110, "total_steps": 3400, "loss": 0.91, "lr": 6.470588235294118e-05, "epoch": 0.056657223796033995, "percentage": 3.24, "elapsed_time": "0:27:00", "remaining_time": "13:27:33", "throughput": 794.19, "total_tokens": 1286608}
{"current_steps": 115, "total_steps": 3400, "loss": 0.9047, "lr": 6.764705882352942e-05, "epoch": 0.05923255215039917, "percentage": 3.38, "elapsed_time": "0:28:08", "remaining_time": "13:23:45", "throughput": 796.72, "total_tokens": 1345072}
{"current_steps": 120, "total_steps": 3400, "loss": 0.9022, "lr": 7.058823529411765e-05, "epoch": 0.06180788050476436, "percentage": 3.53, "elapsed_time": "0:29:16", "remaining_time": "13:20:18", "throughput": 798.94, "total_tokens": 1403544}
{"current_steps": 125, "total_steps": 3400, "loss": 0.9081, "lr": 7.352941176470589e-05, "epoch": 0.06438320885912954, "percentage": 3.68, "elapsed_time": "0:30:25", "remaining_time": "13:17:01", "throughput": 801.0, "total_tokens": 1462024}
{"current_steps": 130, "total_steps": 3400, "loss": 0.8939, "lr": 7.647058823529411e-05, "epoch": 0.06695853721349472, "percentage": 3.82, "elapsed_time": "0:31:33", "remaining_time": "13:13:51", "throughput": 802.98, "total_tokens": 1520528}
{"current_steps": 135, "total_steps": 3400, "loss": 0.9029, "lr": 7.941176470588235e-05, "epoch": 0.0695338655678599, "percentage": 3.97, "elapsed_time": "0:32:42", "remaining_time": "13:10:59", "throughput": 804.66, "total_tokens": 1579024}
{"current_steps": 140, "total_steps": 3400, "loss": 0.9014, "lr": 8.23529411764706e-05, "epoch": 0.07210919392222509, "percentage": 4.12, "elapsed_time": "0:33:50", "remaining_time": "13:07:57", "throughput": 806.53, "total_tokens": 1637504}
{"current_steps": 145, "total_steps": 3400, "loss": 0.9053, "lr": 8.529411764705883e-05, "epoch": 0.07468452227659027, "percentage": 4.26, "elapsed_time": "0:34:58", "remaining_time": "13:05:04", "throughput": 808.26, "total_tokens": 1696024}
{"current_steps": 150, "total_steps": 3400, "loss": 0.9032, "lr": 8.823529411764706e-05, "epoch": 0.07725985063095545, "percentage": 4.41, "elapsed_time": "0:36:06", "remaining_time": "13:02:20", "throughput": 809.84, "total_tokens": 1754512}
{"current_steps": 150, "total_steps": 3400, "eval_loss": 0.8962129950523376, "epoch": 0.07725985063095545, "percentage": 4.41, "elapsed_time": "0:36:23", "remaining_time": "13:08:30", "throughput": 803.51, "total_tokens": 1754512}
{"current_steps": 155, "total_steps": 3400, "loss": 0.8985, "lr": 9.11764705882353e-05, "epoch": 0.07983517898532062, "percentage": 4.56, "elapsed_time": "0:37:40", "remaining_time": "13:08:35", "throughput": 802.19, "total_tokens": 1812976}
{"current_steps": 160, "total_steps": 3400, "loss": 0.8949, "lr": 9.411764705882353e-05, "epoch": 0.08241050733968582, "percentage": 4.71, "elapsed_time": "0:38:48", "remaining_time": "13:05:42", "throughput": 803.89, "total_tokens": 1871464}
{"current_steps": 165, "total_steps": 3400, "loss": 0.9069, "lr": 9.705882352941177e-05, "epoch": 0.08498583569405099, "percentage": 4.85, "elapsed_time": "0:39:55", "remaining_time": "13:02:45", "throughput": 805.66, "total_tokens": 1929928}
{"current_steps": 170, "total_steps": 3400, "loss": 0.9049, "lr": 0.0001, "epoch": 0.08756116404841617, "percentage": 5.0, "elapsed_time": "0:41:03", "remaining_time": "13:00:00", "throughput": 807.27, "total_tokens": 1988432}
{"current_steps": 175, "total_steps": 3400, "loss": 0.9026, "lr": 9.999940874631277e-05, "epoch": 0.09013649240278135, "percentage": 5.15, "elapsed_time": "0:42:10", "remaining_time": "12:57:05", "throughput": 809.04, "total_tokens": 2046920}
{"current_steps": 180, "total_steps": 3400, "loss": 0.8984, "lr": 9.999763499923432e-05, "epoch": 0.09271182075714654, "percentage": 5.29, "elapsed_time": "0:43:16", "remaining_time": "12:54:16", "throughput": 810.72, "total_tokens": 2105392}
{"current_steps": 185, "total_steps": 3400, "loss": 0.9057, "lr": 9.999467880071402e-05, "epoch": 0.09528714911151172, "percentage": 5.44, "elapsed_time": "0:44:23", "remaining_time": "12:51:33", "throughput": 812.3, "total_tokens": 2163872}
{"current_steps": 190, "total_steps": 3400, "loss": 0.9078, "lr": 9.999054022066641e-05, "epoch": 0.0978624774658769, "percentage": 5.59, "elapsed_time": "0:45:31", "remaining_time": "12:49:08", "throughput": 813.59, "total_tokens": 2222352}
{"current_steps": 195, "total_steps": 3400, "loss": 0.9028, "lr": 9.998521935696953e-05, "epoch": 0.10043780582024209, "percentage": 5.74, "elapsed_time": "0:46:38", "remaining_time": "12:46:33", "throughput": 815.05, "total_tokens": 2280800}
{"current_steps": 200, "total_steps": 3400, "loss": 0.9053, "lr": 9.997871633546257e-05, "epoch": 0.10301313417460727, "percentage": 5.88, "elapsed_time": "0:47:45", "remaining_time": "12:44:03", "throughput": 816.44, "total_tokens": 2339304}
{"current_steps": 200, "total_steps": 3400, "eval_loss": 0.8982028961181641, "epoch": 0.10301313417460727, "percentage": 5.88, "elapsed_time": "0:48:02", "remaining_time": "12:48:34", "throughput": 811.65, "total_tokens": 2339304}
{"current_steps": 205, "total_steps": 3400, "loss": 0.9003, "lr": 9.997103130994296e-05, "epoch": 0.10558846252897244, "percentage": 6.03, "elapsed_time": "0:49:13", "remaining_time": "12:47:17", "throughput": 811.74, "total_tokens": 2397808}
{"current_steps": 210, "total_steps": 3400, "loss": 0.8969, "lr": 9.996216446216267e-05, "epoch": 0.10816379088333762, "percentage": 6.18, "elapsed_time": "0:50:20", "remaining_time": "12:44:49", "throughput": 813.08, "total_tokens": 2456288}
{"current_steps": 215, "total_steps": 3400, "loss": 0.9114, "lr": 9.995211600182397e-05, "epoch": 0.11073911923770281, "percentage": 6.32, "elapsed_time": "0:51:28", "remaining_time": "12:42:28", "throughput": 814.32, "total_tokens": 2514784}
{"current_steps": 220, "total_steps": 3400, "loss": 0.899, "lr": 9.994088616657444e-05, "epoch": 0.11331444759206799, "percentage": 6.47, "elapsed_time": "0:52:35", "remaining_time": "12:40:13", "throughput": 815.44, "total_tokens": 2573240}
{"current_steps": 225, "total_steps": 3400, "loss": 0.898, "lr": 9.992847522200133e-05, "epoch": 0.11588977594643317, "percentage": 6.62, "elapsed_time": "0:53:42", "remaining_time": "12:37:54", "throughput": 816.63, "total_tokens": 2631672}
{"current_steps": 230, "total_steps": 3400, "loss": 0.9006, "lr": 9.99148834616253e-05, "epoch": 0.11846510430079835, "percentage": 6.76, "elapsed_time": "0:54:49", "remaining_time": "12:35:36", "throughput": 817.82, "total_tokens": 2690112}
{"current_steps": 235, "total_steps": 3400, "loss": 0.8973, "lr": 9.990011120689351e-05, "epoch": 0.12104043265516354, "percentage": 6.91, "elapsed_time": "0:55:56", "remaining_time": "12:33:23", "throughput": 818.92, "total_tokens": 2748608}
{"current_steps": 240, "total_steps": 3400, "loss": 0.8885, "lr": 9.988415880717194e-05, "epoch": 0.12361576100952872, "percentage": 7.06, "elapsed_time": "0:57:03", "remaining_time": "12:31:13", "throughput": 820.0, "total_tokens": 2807080}
{"current_steps": 245, "total_steps": 3400, "loss": 0.9066, "lr": 9.986702663973722e-05, "epoch": 0.1261910893638939, "percentage": 7.21, "elapsed_time": "0:58:10", "remaining_time": "12:29:07", "throughput": 820.98, "total_tokens": 2865520}
{"current_steps": 250, "total_steps": 3400, "loss": 0.9098, "lr": 9.98487151097676e-05, "epoch": 0.12876641771825909, "percentage": 7.35, "elapsed_time": "0:59:17", "remaining_time": "12:26:59", "throughput": 822.02, "total_tokens": 2924016}
{"current_steps": 250, "total_steps": 3400, "eval_loss": 0.8956434726715088, "epoch": 0.12876641771825909, "percentage": 7.35, "elapsed_time": "0:59:34", "remaining_time": "12:30:40", "throughput": 818.0, "total_tokens": 2924016}
{"current_steps": 255, "total_steps": 3400, "loss": 0.8987, "lr": 9.98292246503335e-05, "epoch": 0.13134174607262425, "percentage": 7.5, "elapsed_time": "1:00:46", "remaining_time": "12:29:36", "throughput": 817.86, "total_tokens": 2982520}
{"current_steps": 260, "total_steps": 3400, "loss": 0.9036, "lr": 9.980855572238714e-05, "epoch": 0.13391707442698944, "percentage": 7.65, "elapsed_time": "1:01:53", "remaining_time": "12:27:29", "throughput": 818.88, "total_tokens": 3041008}
{"current_steps": 265, "total_steps": 3400, "loss": 0.8961, "lr": 9.978670881475172e-05, "epoch": 0.13649240278135463, "percentage": 7.79, "elapsed_time": "1:03:00", "remaining_time": "12:25:24", "throughput": 819.86, "total_tokens": 3099464}
{"current_steps": 270, "total_steps": 3400, "loss": 0.9012, "lr": 9.976368444410985e-05, "epoch": 0.1390677311357198, "percentage": 7.94, "elapsed_time": "1:04:07", "remaining_time": "12:23:27", "throughput": 820.69, "total_tokens": 3157944}
{"current_steps": 275, "total_steps": 3400, "loss": 0.8985, "lr": 9.973948315499126e-05, "epoch": 0.141643059490085, "percentage": 8.09, "elapsed_time": "1:05:15", "remaining_time": "12:21:28", "throughput": 821.56, "total_tokens": 3216448}
{"current_steps": 280, "total_steps": 3400, "loss": 0.9114, "lr": 9.971410551976002e-05, "epoch": 0.14421838784445018, "percentage": 8.24, "elapsed_time": "1:06:21", "remaining_time": "12:19:27", "throughput": 822.49, "total_tokens": 3274928}
{"current_steps": 285, "total_steps": 3400, "loss": 0.8886, "lr": 9.968755213860094e-05, "epoch": 0.14679371619881534, "percentage": 8.38, "elapsed_time": "1:07:28", "remaining_time": "12:17:29", "throughput": 823.36, "total_tokens": 3333408}
{"current_steps": 290, "total_steps": 3400, "loss": 0.8929, "lr": 9.96598236395054e-05, "epoch": 0.14936904455318054, "percentage": 8.53, "elapsed_time": "1:08:35", "remaining_time": "12:15:35", "throughput": 824.18, "total_tokens": 3391896}
{"current_steps": 295, "total_steps": 3400, "loss": 0.9091, "lr": 9.96309206782565e-05, "epoch": 0.1519443729075457, "percentage": 8.68, "elapsed_time": "1:09:42", "remaining_time": "12:13:46", "throughput": 824.88, "total_tokens": 3450392}
{"current_steps": 300, "total_steps": 3400, "loss": 0.8893, "lr": 9.960084393841355e-05, "epoch": 0.1545197012619109, "percentage": 8.82, "elapsed_time": "1:10:49", "remaining_time": "12:11:54", "throughput": 825.67, "total_tokens": 3508888}
{"current_steps": 300, "total_steps": 3400, "eval_loss": 0.8908902406692505, "epoch": 0.1545197012619109, "percentage": 8.82, "elapsed_time": "1:11:06", "remaining_time": "12:14:49", "throughput": 822.39, "total_tokens": 3508888}
{"current_steps": 305, "total_steps": 3400, "loss": 0.9056, "lr": 9.956959413129585e-05, "epoch": 0.15709502961627608, "percentage": 8.97, "elapsed_time": "1:12:19", "remaining_time": "12:13:52", "throughput": 822.12, "total_tokens": 3567368}
{"current_steps": 310, "total_steps": 3400, "loss": 0.8982, "lr": 9.953717199596598e-05, "epoch": 0.15967035797064125, "percentage": 9.12, "elapsed_time": "1:13:26", "remaining_time": "12:12:05", "throughput": 822.79, "total_tokens": 3625848}
{"current_steps": 315, "total_steps": 3400, "loss": 0.8968, "lr": 9.95035782992122e-05, "epoch": 0.16224568632500644, "percentage": 9.26, "elapsed_time": "1:14:34", "remaining_time": "12:10:19", "throughput": 823.44, "total_tokens": 3684336}
{"current_steps": 320, "total_steps": 3400, "loss": 0.8975, "lr": 9.94688138355304e-05, "epoch": 0.16482101467937163, "percentage": 9.41, "elapsed_time": "1:15:42", "remaining_time": "12:08:37", "throughput": 824.02, "total_tokens": 3742800}
{"current_steps": 325, "total_steps": 3400, "loss": 0.9061, "lr": 9.943287942710527e-05, "epoch": 0.1673963430337368, "percentage": 9.56, "elapsed_time": "1:16:50", "remaining_time": "12:07:00", "throughput": 824.51, "total_tokens": 3801280}
{"current_steps": 330, "total_steps": 3400, "loss": 0.8948, "lr": 9.939577592379088e-05, "epoch": 0.16997167138810199, "percentage": 9.71, "elapsed_time": "1:17:58", "remaining_time": "12:05:19", "throughput": 825.09, "total_tokens": 3859792}
{"current_steps": 335, "total_steps": 3400, "loss": 0.9063, "lr": 9.935750420309055e-05, "epoch": 0.17254699974246718, "percentage": 9.85, "elapsed_time": "1:19:05", "remaining_time": "12:03:41", "throughput": 825.61, "total_tokens": 3918272}
{"current_steps": 340, "total_steps": 3400, "loss": 0.8952, "lr": 9.931806517013612e-05, "epoch": 0.17512232809683234, "percentage": 10.0, "elapsed_time": "1:20:14", "remaining_time": "12:02:06", "throughput": 826.07, "total_tokens": 3976760}
{"current_steps": 345, "total_steps": 3400, "loss": 0.9136, "lr": 9.927745975766654e-05, "epoch": 0.17769765645119753, "percentage": 10.15, "elapsed_time": "1:21:22", "remaining_time": "12:00:30", "throughput": 826.55, "total_tokens": 4035240}
{"current_steps": 350, "total_steps": 3400, "loss": 0.9075, "lr": 9.923568892600578e-05, "epoch": 0.1802729848055627, "percentage": 10.29, "elapsed_time": "1:22:30", "remaining_time": "11:59:02", "throughput": 826.88, "total_tokens": 4093688}
{"current_steps": 350, "total_steps": 3400, "eval_loss": 0.89204341173172, "epoch": 0.1802729848055627, "percentage": 10.29, "elapsed_time": "1:22:47", "remaining_time": "12:01:26", "throughput": 824.12, "total_tokens": 4093688}
{"current_steps": 355, "total_steps": 3400, "loss": 0.8812, "lr": 9.91927536630402e-05, "epoch": 0.1828483131599279, "percentage": 10.44, "elapsed_time": "1:24:00", "remaining_time": "12:00:35", "throughput": 823.75, "total_tokens": 4152160}
{"current_steps": 360, "total_steps": 3400, "loss": 0.9109, "lr": 9.91486549841951e-05, "epoch": 0.18542364151429308, "percentage": 10.59, "elapsed_time": "1:25:08", "remaining_time": "11:59:02", "throughput": 824.17, "total_tokens": 4210648}
{"current_steps": 365, "total_steps": 3400, "loss": 0.9176, "lr": 9.91033939324107e-05, "epoch": 0.18799896986865824, "percentage": 10.74, "elapsed_time": "1:26:17", "remaining_time": "11:57:30", "throughput": 824.56, "total_tokens": 4269136}
{"current_steps": 370, "total_steps": 3400, "loss": 0.9077, "lr": 9.905697157811761e-05, "epoch": 0.19057429822302344, "percentage": 10.88, "elapsed_time": "1:27:26", "remaining_time": "11:56:02", "throughput": 824.91, "total_tokens": 4327664}
{"current_steps": 375, "total_steps": 3400, "loss": 0.893, "lr": 9.900938901921131e-05, "epoch": 0.19314962657738863, "percentage": 11.03, "elapsed_time": "1:28:34", "remaining_time": "11:54:32", "throughput": 825.27, "total_tokens": 4386120}
{"current_steps": 380, "total_steps": 3400, "loss": 0.9094, "lr": 9.896064738102635e-05, "epoch": 0.1957249549317538, "percentage": 11.18, "elapsed_time": "1:29:43", "remaining_time": "11:53:03", "throughput": 825.62, "total_tokens": 4444560}
{"current_steps": 385, "total_steps": 3400, "loss": 0.9091, "lr": 9.891074781630966e-05, "epoch": 0.19830028328611898, "percentage": 11.32, "elapsed_time": "1:30:51", "remaining_time": "11:51:29", "throughput": 826.06, "total_tokens": 4503016}
{"current_steps": 390, "total_steps": 3400, "loss": 0.9033, "lr": 9.885969150519331e-05, "epoch": 0.20087561164048418, "percentage": 11.47, "elapsed_time": "1:31:58", "remaining_time": "11:49:54", "throughput": 826.52, "total_tokens": 4561496}
{"current_steps": 395, "total_steps": 3400, "loss": 0.8851, "lr": 9.88074796551666e-05, "epoch": 0.20345093999484934, "percentage": 11.62, "elapsed_time": "1:33:05", "remaining_time": "11:48:14", "throughput": 827.08, "total_tokens": 4619944}
{"current_steps": 400, "total_steps": 3400, "loss": 0.9004, "lr": 9.875411350104744e-05, "epoch": 0.20602626834921453, "percentage": 11.76, "elapsed_time": "1:34:12", "remaining_time": "11:46:36", "throughput": 827.62, "total_tokens": 4678384}
{"current_steps": 400, "total_steps": 3400, "eval_loss": 0.9086406826972961, "epoch": 0.20602626834921453, "percentage": 11.76, "elapsed_time": "1:34:29", "remaining_time": "11:48:42", "throughput": 825.17, "total_tokens": 4678384}
{"current_steps": 405, "total_steps": 3400, "loss": 0.8976, "lr": 9.86995943049533e-05, "epoch": 0.2086015967035797, "percentage": 11.91, "elapsed_time": "1:35:41", "remaining_time": "11:47:40", "throughput": 824.99, "total_tokens": 4736904}
{"current_steps": 410, "total_steps": 3400, "loss": 0.9134, "lr": 9.864392335627117e-05, "epoch": 0.2111769250579449, "percentage": 12.06, "elapsed_time": "1:36:49", "remaining_time": "11:46:04", "throughput": 825.49, "total_tokens": 4795376}
{"current_steps": 415, "total_steps": 3400, "loss": 0.8955, "lr": 9.858710197162721e-05, "epoch": 0.21375225341231008, "percentage": 12.21, "elapsed_time": "1:37:56", "remaining_time": "11:44:26", "throughput": 826.02, "total_tokens": 4853880}
{"current_steps": 420, "total_steps": 3400, "loss": 0.9014, "lr": 9.852913149485556e-05, "epoch": 0.21632758176667524, "percentage": 12.35, "elapsed_time": "1:39:02", "remaining_time": "11:42:46", "throughput": 826.59, "total_tokens": 4912360}
{"current_steps": 425, "total_steps": 3400, "loss": 0.9065, "lr": 9.847001329696653e-05, "epoch": 0.21890291012104043, "percentage": 12.5, "elapsed_time": "1:40:09", "remaining_time": "11:41:08", "throughput": 827.12, "total_tokens": 4970872}
{"current_steps": 430, "total_steps": 3400, "loss": 0.8952, "lr": 9.840974877611422e-05, "epoch": 0.22147823847540563, "percentage": 12.65, "elapsed_time": "1:41:17", "remaining_time": "11:39:33", "throughput": 827.59, "total_tokens": 5029304}
{"current_steps": 435, "total_steps": 3400, "loss": 0.9106, "lr": 9.834833935756344e-05, "epoch": 0.2240535668297708, "percentage": 12.79, "elapsed_time": "1:42:25", "remaining_time": "11:38:11", "throughput": 827.83, "total_tokens": 5087800}
{"current_steps": 440, "total_steps": 3400, "loss": 0.8996, "lr": 9.828578649365601e-05, "epoch": 0.22662889518413598, "percentage": 12.94, "elapsed_time": "1:43:34", "remaining_time": "11:36:48", "throughput": 828.08, "total_tokens": 5146312}
{"current_steps": 445, "total_steps": 3400, "loss": 0.8999, "lr": 9.822209166377635e-05, "epoch": 0.22920422353850115, "percentage": 13.09, "elapsed_time": "1:44:43", "remaining_time": "11:35:24", "throughput": 828.35, "total_tokens": 5204800}
{"current_steps": 450, "total_steps": 3400, "loss": 0.9076, "lr": 9.815725637431662e-05, "epoch": 0.23177955189286634, "percentage": 13.24, "elapsed_time": "1:45:53", "remaining_time": "11:34:08", "throughput": 828.45, "total_tokens": 5263304}
{"current_steps": 450, "total_steps": 3400, "eval_loss": 0.8962157368659973, "epoch": 0.23177955189286634, "percentage": 13.24, "elapsed_time": "1:46:10", "remaining_time": "11:36:01", "throughput": 826.21, "total_tokens": 5263304}
{"current_steps": 455, "total_steps": 3400, "loss": 0.8942, "lr": 9.809128215864097e-05, "epoch": 0.23435488024723153, "percentage": 13.38, "elapsed_time": "1:47:26", "remaining_time": "11:35:24", "throughput": 825.54, "total_tokens": 5321760}
{"current_steps": 460, "total_steps": 3400, "loss": 0.9099, "lr": 9.802417057704931e-05, "epoch": 0.2369302086015967, "percentage": 13.53, "elapsed_time": "1:48:35", "remaining_time": "11:34:01", "throughput": 825.79, "total_tokens": 5380224}
{"current_steps": 465, "total_steps": 3400, "loss": 0.8981, "lr": 9.795592321674045e-05, "epoch": 0.23950553695596188, "percentage": 13.68, "elapsed_time": "1:49:44", "remaining_time": "11:32:37", "throughput": 826.03, "total_tokens": 5438704}
{"current_steps": 470, "total_steps": 3400, "loss": 0.8952, "lr": 9.788654169177453e-05, "epoch": 0.24208086531032708, "percentage": 13.82, "elapsed_time": "1:50:53", "remaining_time": "11:31:17", "throughput": 826.23, "total_tokens": 5497208}
{"current_steps": 475, "total_steps": 3400, "loss": 0.8959, "lr": 9.781602764303487e-05, "epoch": 0.24465619366469224, "percentage": 13.97, "elapsed_time": "1:52:01", "remaining_time": "11:29:52", "throughput": 826.52, "total_tokens": 5555704}
{"current_steps": 480, "total_steps": 3400, "loss": 0.901, "lr": 9.774438273818911e-05, "epoch": 0.24723152201905743, "percentage": 14.12, "elapsed_time": "1:53:09", "remaining_time": "11:28:25", "throughput": 826.83, "total_tokens": 5614160}
{"current_steps": 485, "total_steps": 3400, "loss": 0.9008, "lr": 9.767160867164979e-05, "epoch": 0.24980685037342262, "percentage": 14.26, "elapsed_time": "1:54:18", "remaining_time": "11:27:00", "throughput": 827.13, "total_tokens": 5672640}
{"current_steps": 490, "total_steps": 3400, "loss": 0.9016, "lr": 9.759770716453436e-05, "epoch": 0.2523821787277878, "percentage": 14.41, "elapsed_time": "1:55:26", "remaining_time": "11:25:37", "throughput": 827.37, "total_tokens": 5731072}
{"current_steps": 495, "total_steps": 3400, "loss": 0.9132, "lr": 9.752267996462434e-05, "epoch": 0.254957507082153, "percentage": 14.56, "elapsed_time": "1:56:35", "remaining_time": "11:24:12", "throughput": 827.64, "total_tokens": 5789544}
{"current_steps": 500, "total_steps": 3400, "loss": 0.8962, "lr": 9.744652884632406e-05, "epoch": 0.25753283543651817, "percentage": 14.71, "elapsed_time": "1:57:43", "remaining_time": "11:22:47", "throughput": 827.95, "total_tokens": 5848048}
{"current_steps": 500, "total_steps": 3400, "eval_loss": 0.8987945914268494, "epoch": 0.25753283543651817, "percentage": 14.71, "elapsed_time": "1:58:00", "remaining_time": "11:24:26", "throughput": 825.94, "total_tokens": 5848048}
{"current_steps": 505, "total_steps": 3400, "loss": 0.8954, "lr": 9.736925561061871e-05, "epoch": 0.26010816379088336, "percentage": 14.85, "elapsed_time": "1:59:12", "remaining_time": "11:23:24", "throughput": 825.76, "total_tokens": 5906512}
{"current_steps": 510, "total_steps": 3400, "loss": 0.8927, "lr": 9.729086208503174e-05, "epoch": 0.2626834921452485, "percentage": 15.0, "elapsed_time": "2:00:20", "remaining_time": "11:21:55", "throughput": 826.14, "total_tokens": 5965024}
{"current_steps": 515, "total_steps": 3400, "loss": 0.898, "lr": 9.721135012358156e-05, "epoch": 0.2652588204996137, "percentage": 15.15, "elapsed_time": "2:01:28", "remaining_time": "11:20:27", "throughput": 826.48, "total_tokens": 6023496}
{"current_steps": 520, "total_steps": 3400, "loss": 0.9016, "lr": 9.713072160673777e-05, "epoch": 0.2678341488539789, "percentage": 15.29, "elapsed_time": "2:02:35", "remaining_time": "11:19:00", "throughput": 826.81, "total_tokens": 6082000}
{"current_steps": 525, "total_steps": 3400, "loss": 0.8842, "lr": 9.704897844137673e-05, "epoch": 0.2704094772083441, "percentage": 15.44, "elapsed_time": "2:03:43", "remaining_time": "11:17:34", "throughput": 827.13, "total_tokens": 6140480}
{"current_steps": 530, "total_steps": 3400, "loss": 0.8921, "lr": 9.696612256073633e-05, "epoch": 0.27298480556270927, "percentage": 15.59, "elapsed_time": "2:04:51", "remaining_time": "11:16:05", "throughput": 827.5, "total_tokens": 6198968}
{"current_steps": 535, "total_steps": 3400, "loss": 0.8979, "lr": 9.688215592437039e-05, "epoch": 0.2755601339170744, "percentage": 15.74, "elapsed_time": "2:05:58", "remaining_time": "11:14:37", "throughput": 827.85, "total_tokens": 6257464}
{"current_steps": 540, "total_steps": 3400, "loss": 0.8951, "lr": 9.679708051810221e-05, "epoch": 0.2781354622714396, "percentage": 15.88, "elapsed_time": "2:07:06", "remaining_time": "11:13:12", "throughput": 828.16, "total_tokens": 6315944}
{"current_steps": 545, "total_steps": 3400, "loss": 0.9149, "lr": 9.67108983539777e-05, "epoch": 0.2807107906258048, "percentage": 16.03, "elapsed_time": "2:08:15", "remaining_time": "11:11:51", "throughput": 828.37, "total_tokens": 6374408}
{"current_steps": 550, "total_steps": 3400, "loss": 0.9013, "lr": 9.662361147021779e-05, "epoch": 0.28328611898017, "percentage": 16.18, "elapsed_time": "2:09:23", "remaining_time": "11:10:29", "throughput": 828.6, "total_tokens": 6432936}
{"current_steps": 550, "total_steps": 3400, "eval_loss": 0.9001271724700928, "epoch": 0.28328611898017, "percentage": 16.18, "elapsed_time": "2:09:40", "remaining_time": "11:11:57", "throughput": 826.79, "total_tokens": 6432936}
{"current_steps": 555, "total_steps": 3400, "loss": 0.8981, "lr": 9.653522193117013e-05, "epoch": 0.28586144733453517, "percentage": 16.32, "elapsed_time": "2:10:53", "remaining_time": "11:10:57", "throughput": 826.56, "total_tokens": 6491400}
{"current_steps": 560, "total_steps": 3400, "loss": 0.9041, "lr": 9.644573182726035e-05, "epoch": 0.28843677568890036, "percentage": 16.47, "elapsed_time": "2:12:01", "remaining_time": "11:09:31", "throughput": 826.89, "total_tokens": 6549872}
{"current_steps": 565, "total_steps": 3400, "loss": 0.9024, "lr": 9.63551432749426e-05, "epoch": 0.2910121040432655, "percentage": 16.62, "elapsed_time": "2:13:08", "remaining_time": "11:08:04", "throughput": 827.21, "total_tokens": 6608296}
{"current_steps": 570, "total_steps": 3400, "loss": 0.9002, "lr": 9.626345841664953e-05, "epoch": 0.2935874323976307, "percentage": 16.76, "elapsed_time": "2:14:16", "remaining_time": "11:06:41", "throughput": 827.46, "total_tokens": 6666768}
{"current_steps": 575, "total_steps": 3400, "loss": 0.9035, "lr": 9.617067942074153e-05, "epoch": 0.2961627607519959, "percentage": 16.91, "elapsed_time": "2:15:24", "remaining_time": "11:05:16", "throughput": 827.76, "total_tokens": 6725248}
{"current_steps": 580, "total_steps": 3400, "loss": 0.9019, "lr": 9.607680848145558e-05, "epoch": 0.29873808910636107, "percentage": 17.06, "elapsed_time": "2:16:32", "remaining_time": "11:03:54", "throughput": 827.99, "total_tokens": 6783680}
{"current_steps": 585, "total_steps": 3400, "loss": 0.9001, "lr": 9.598184781885318e-05, "epoch": 0.30131341746072626, "percentage": 17.21, "elapsed_time": "2:17:40", "remaining_time": "11:02:30", "throughput": 828.27, "total_tokens": 6842144}
{"current_steps": 590, "total_steps": 3400, "loss": 0.8961, "lr": 9.588579967876806e-05, "epoch": 0.3038887458150914, "percentage": 17.35, "elapsed_time": "2:18:48", "remaining_time": "11:01:05", "throughput": 828.58, "total_tokens": 6900656}
{"current_steps": 595, "total_steps": 3400, "loss": 0.9, "lr": 9.578866633275288e-05, "epoch": 0.3064640741694566, "percentage": 17.5, "elapsed_time": "2:19:56", "remaining_time": "10:59:41", "throughput": 828.86, "total_tokens": 6959128}
{"current_steps": 600, "total_steps": 3400, "loss": 0.9046, "lr": 9.569045007802559e-05, "epoch": 0.3090394025238218, "percentage": 17.65, "elapsed_time": "2:21:03", "remaining_time": "10:58:14", "throughput": 829.2, "total_tokens": 7017576}
{"current_steps": 600, "total_steps": 3400, "eval_loss": 0.9053278565406799, "epoch": 0.3090394025238218, "percentage": 17.65, "elapsed_time": "2:21:20", "remaining_time": "10:59:34", "throughput": 827.52, "total_tokens": 7017576}
{"current_steps": 605, "total_steps": 3400, "loss": 0.9019, "lr": 9.55911532374151e-05, "epoch": 0.311614730878187, "percentage": 17.79, "elapsed_time": "2:22:31", "remaining_time": "10:58:28", "throughput": 827.42, "total_tokens": 7076032}
{"current_steps": 610, "total_steps": 3400, "loss": 0.8956, "lr": 9.549077815930636e-05, "epoch": 0.31419005923255217, "percentage": 17.94, "elapsed_time": "2:23:39", "remaining_time": "10:57:03", "throughput": 827.73, "total_tokens": 7134536}
{"current_steps": 615, "total_steps": 3400, "loss": 0.898, "lr": 9.538932721758474e-05, "epoch": 0.31676538758691736, "percentage": 18.09, "elapsed_time": "2:24:47", "remaining_time": "10:55:38", "throughput": 828.02, "total_tokens": 7193032}
{"current_steps": 620, "total_steps": 3400, "loss": 0.8991, "lr": 9.528680281157999e-05, "epoch": 0.3193407159412825, "percentage": 18.24, "elapsed_time": "2:25:54", "remaining_time": "10:54:14", "throughput": 828.31, "total_tokens": 7251568}
{"current_steps": 625, "total_steps": 3400, "loss": 0.8961, "lr": 9.518320736600943e-05, "epoch": 0.3219160442956477, "percentage": 18.38, "elapsed_time": "2:27:02", "remaining_time": "10:52:50", "throughput": 828.6, "total_tokens": 7310072}
{"current_steps": 630, "total_steps": 3400, "loss": 0.8994, "lr": 9.507854333092063e-05, "epoch": 0.3244913726500129, "percentage": 18.53, "elapsed_time": "2:28:09", "remaining_time": "10:51:27", "throughput": 828.86, "total_tokens": 7368560}
{"current_steps": 635, "total_steps": 3400, "loss": 0.8925, "lr": 9.497281318163346e-05, "epoch": 0.32706670100437807, "percentage": 18.68, "elapsed_time": "2:29:18", "remaining_time": "10:50:06", "throughput": 829.09, "total_tokens": 7427040}
{"current_steps": 640, "total_steps": 3400, "loss": 0.9087, "lr": 9.486601941868154e-05, "epoch": 0.32964202935874326, "percentage": 18.82, "elapsed_time": "2:30:24", "remaining_time": "10:48:39", "throughput": 829.44, "total_tokens": 7485552}
{"current_steps": 645, "total_steps": 3400, "loss": 0.8924, "lr": 9.475816456775313e-05, "epoch": 0.3322173577131084, "percentage": 18.97, "elapsed_time": "2:31:29", "remaining_time": "10:47:03", "throughput": 829.98, "total_tokens": 7544040}
{"current_steps": 650, "total_steps": 3400, "loss": 0.904, "lr": 9.464925117963133e-05, "epoch": 0.3347926860674736, "percentage": 19.12, "elapsed_time": "2:32:34", "remaining_time": "10:45:29", "throughput": 830.49, "total_tokens": 7602512}
{"current_steps": 650, "total_steps": 3400, "eval_loss": 0.90328449010849, "epoch": 0.3347926860674736, "percentage": 19.12, "elapsed_time": "2:32:50", "remaining_time": "10:46:37", "throughput": 829.03, "total_tokens": 7602512}
{"current_steps": 655, "total_steps": 3400, "loss": 0.8929, "lr": 9.453928183013385e-05, "epoch": 0.3373680144218388, "percentage": 19.26, "elapsed_time": "2:33:59", "remaining_time": "10:45:22", "throughput": 829.14, "total_tokens": 7660968}
{"current_steps": 660, "total_steps": 3400, "loss": 0.9078, "lr": 9.442825912005202e-05, "epoch": 0.33994334277620397, "percentage": 19.41, "elapsed_time": "2:35:03", "remaining_time": "10:43:45", "throughput": 829.7, "total_tokens": 7719448}
{"current_steps": 665, "total_steps": 3400, "loss": 0.8963, "lr": 9.431618567508933e-05, "epoch": 0.34251867113056916, "percentage": 19.56, "elapsed_time": "2:36:07", "remaining_time": "10:42:07", "throughput": 830.29, "total_tokens": 7777928}
{"current_steps": 670, "total_steps": 3400, "loss": 0.9134, "lr": 9.420306414579925e-05, "epoch": 0.34509399948493436, "percentage": 19.71, "elapsed_time": "2:37:11", "remaining_time": "10:40:30", "throughput": 830.86, "total_tokens": 7836424}
{"current_steps": 675, "total_steps": 3400, "loss": 0.8984, "lr": 9.408889720752266e-05, "epoch": 0.3476693278392995, "percentage": 19.85, "elapsed_time": "2:38:16", "remaining_time": "10:38:56", "throughput": 831.38, "total_tokens": 7894904}
{"current_steps": 680, "total_steps": 3400, "loss": 0.8997, "lr": 9.397368756032445e-05, "epoch": 0.3502446561936647, "percentage": 20.0, "elapsed_time": "2:39:20", "remaining_time": "10:37:20", "throughput": 831.93, "total_tokens": 7953432}
{"current_steps": 685, "total_steps": 3400, "loss": 0.8926, "lr": 9.385743792892982e-05, "epoch": 0.3528199845480299, "percentage": 20.15, "elapsed_time": "2:40:24", "remaining_time": "10:35:46", "throughput": 832.45, "total_tokens": 8011888}
{"current_steps": 690, "total_steps": 3400, "loss": 0.9008, "lr": 9.374015106265968e-05, "epoch": 0.35539531290239507, "percentage": 20.29, "elapsed_time": "2:41:28", "remaining_time": "10:34:11", "throughput": 832.99, "total_tokens": 8070344}
{"current_steps": 695, "total_steps": 3400, "loss": 0.8986, "lr": 9.362182973536569e-05, "epoch": 0.35797064125676026, "percentage": 20.44, "elapsed_time": "2:42:32", "remaining_time": "10:32:38", "throughput": 833.49, "total_tokens": 8128816}
{"current_steps": 700, "total_steps": 3400, "loss": 0.8972, "lr": 9.35024767453647e-05, "epoch": 0.3605459696111254, "percentage": 20.59, "elapsed_time": "2:43:36", "remaining_time": "10:31:04", "throughput": 834.03, "total_tokens": 8187320}
{"current_steps": 700, "total_steps": 3400, "eval_loss": 0.9028835892677307, "epoch": 0.3605459696111254, "percentage": 20.59, "elapsed_time": "2:43:52", "remaining_time": "10:32:06", "throughput": 832.66, "total_tokens": 8187320}
{"current_steps": 705, "total_steps": 3400, "loss": 0.8998, "lr": 9.338209491537257e-05, "epoch": 0.3631212979654906, "percentage": 20.74, "elapsed_time": "2:45:01", "remaining_time": "10:30:51", "throughput": 832.76, "total_tokens": 8245776}
{"current_steps": 710, "total_steps": 3400, "loss": 0.8999, "lr": 9.326068709243727e-05, "epoch": 0.3656966263198558, "percentage": 20.88, "elapsed_time": "2:46:05", "remaining_time": "10:29:16", "throughput": 833.31, "total_tokens": 8304280}
{"current_steps": 715, "total_steps": 3400, "loss": 0.8983, "lr": 9.313825614787177e-05, "epoch": 0.36827195467422097, "percentage": 21.03, "elapsed_time": "2:47:09", "remaining_time": "10:27:41", "throughput": 833.84, "total_tokens": 8362728}
{"current_steps": 720, "total_steps": 3400, "loss": 0.892, "lr": 9.301480497718593e-05, "epoch": 0.37084728302858616, "percentage": 21.18, "elapsed_time": "2:48:12", "remaining_time": "10:26:08", "throughput": 834.37, "total_tokens": 8421224}
{"current_steps": 725, "total_steps": 3400, "loss": 0.9034, "lr": 9.289033650001817e-05, "epoch": 0.37342261138295135, "percentage": 21.32, "elapsed_time": "2:49:16", "remaining_time": "10:24:35", "throughput": 834.87, "total_tokens": 8479720}
{"current_steps": 730, "total_steps": 3400, "loss": 0.895, "lr": 9.276485366006634e-05, "epoch": 0.3759979397373165, "percentage": 21.47, "elapsed_time": "2:50:20", "remaining_time": "10:23:02", "throughput": 835.38, "total_tokens": 8538192}
{"current_steps": 735, "total_steps": 3400, "loss": 0.8973, "lr": 9.263835942501807e-05, "epoch": 0.3785732680916817, "percentage": 21.62, "elapsed_time": "2:51:24", "remaining_time": "10:21:29", "throughput": 835.9, "total_tokens": 8596664}
{"current_steps": 740, "total_steps": 3400, "loss": 0.8972, "lr": 9.251085678648072e-05, "epoch": 0.3811485964460469, "percentage": 21.76, "elapsed_time": "2:52:28", "remaining_time": "10:19:58", "throughput": 836.38, "total_tokens": 8655128}
{"current_steps": 745, "total_steps": 3400, "loss": 0.8987, "lr": 9.238234875991046e-05, "epoch": 0.38372392480041206, "percentage": 21.91, "elapsed_time": "2:53:31", "remaining_time": "10:18:25", "throughput": 836.89, "total_tokens": 8713624}
{"current_steps": 750, "total_steps": 3400, "loss": 0.9005, "lr": 9.225283838454111e-05, "epoch": 0.38629925315477726, "percentage": 22.06, "elapsed_time": "2:54:36", "remaining_time": "10:16:57", "throughput": 837.3, "total_tokens": 8772104}
{"current_steps": 750, "total_steps": 3400, "eval_loss": 0.8981761336326599, "epoch": 0.38629925315477726, "percentage": 22.06, "elapsed_time": "2:54:52", "remaining_time": "10:17:54", "throughput": 836.02, "total_tokens": 8772104}
{"current_steps": 755, "total_steps": 3400, "loss": 0.8973, "lr": 9.21223287233121e-05, "epoch": 0.3888745815091424, "percentage": 22.21, "elapsed_time": "2:56:01", "remaining_time": "10:16:40", "throughput": 836.09, "total_tokens": 8830568}
{"current_steps": 760, "total_steps": 3400, "loss": 0.8974, "lr": 9.199082286279622e-05, "epoch": 0.3914499098635076, "percentage": 22.35, "elapsed_time": "2:57:05", "remaining_time": "10:15:10", "throughput": 836.56, "total_tokens": 8889072}
{"current_steps": 765, "total_steps": 3400, "loss": 0.8985, "lr": 9.185832391312644e-05, "epoch": 0.3940252382178728, "percentage": 22.5, "elapsed_time": "2:58:09", "remaining_time": "10:13:39", "throughput": 837.04, "total_tokens": 8947568}
{"current_steps": 770, "total_steps": 3400, "loss": 0.8935, "lr": 9.172483500792244e-05, "epoch": 0.39660056657223797, "percentage": 22.65, "elapsed_time": "2:59:13", "remaining_time": "10:12:08", "throughput": 837.52, "total_tokens": 9006056}
{"current_steps": 775, "total_steps": 3400, "loss": 0.8985, "lr": 9.159035930421658e-05, "epoch": 0.39917589492660316, "percentage": 22.79, "elapsed_time": "3:00:17", "remaining_time": "10:10:39", "throughput": 837.96, "total_tokens": 9064592}
{"current_steps": 780, "total_steps": 3400, "loss": 0.9105, "lr": 9.145489998237902e-05, "epoch": 0.40175122328096835, "percentage": 22.94, "elapsed_time": "3:01:21", "remaining_time": "10:09:10", "throughput": 838.42, "total_tokens": 9123096}
{"current_steps": 785, "total_steps": 3400, "loss": 0.8925, "lr": 9.131846024604274e-05, "epoch": 0.4043265516353335, "percentage": 23.09, "elapsed_time": "3:02:25", "remaining_time": "10:07:41", "throughput": 838.85, "total_tokens": 9181576}
{"current_steps": 790, "total_steps": 3400, "loss": 0.8955, "lr": 9.11810433220276e-05, "epoch": 0.4069018799896987, "percentage": 23.24, "elapsed_time": "3:03:29", "remaining_time": "10:06:12", "throughput": 839.29, "total_tokens": 9240048}
{"current_steps": 795, "total_steps": 3400, "loss": 0.8986, "lr": 9.104265246026415e-05, "epoch": 0.40947720834406387, "percentage": 23.38, "elapsed_time": "3:04:33", "remaining_time": "10:04:45", "throughput": 839.7, "total_tokens": 9298528}
{"current_steps": 800, "total_steps": 3400, "loss": 0.8881, "lr": 9.090329093371666e-05, "epoch": 0.41205253669842906, "percentage": 23.53, "elapsed_time": "3:05:37", "remaining_time": "10:03:16", "throughput": 840.14, "total_tokens": 9357016}
{"current_steps": 800, "total_steps": 3400, "eval_loss": 0.8973079919815063, "epoch": 0.41205253669842906, "percentage": 23.53, "elapsed_time": "3:05:53", "remaining_time": "10:04:09", "throughput": 838.92, "total_tokens": 9357016}
{"current_steps": 805, "total_steps": 3400, "loss": 0.8798, "lr": 9.076296203830579e-05, "epoch": 0.41462786505279425, "percentage": 23.68, "elapsed_time": "3:07:02", "remaining_time": "10:02:55", "throughput": 839.0, "total_tokens": 9415480}
{"current_steps": 810, "total_steps": 3400, "loss": 0.9104, "lr": 9.062166909283062e-05, "epoch": 0.4172031934071594, "percentage": 23.82, "elapsed_time": "3:08:05", "remaining_time": "10:01:26", "throughput": 839.45, "total_tokens": 9473928}
{"current_steps": 815, "total_steps": 3400, "loss": 0.9007, "lr": 9.047941543889014e-05, "epoch": 0.4197785217615246, "percentage": 23.97, "elapsed_time": "3:09:09", "remaining_time": "9:59:58", "throughput": 839.88, "total_tokens": 9532408}
{"current_steps": 820, "total_steps": 3400, "loss": 0.8974, "lr": 9.033620444080428e-05, "epoch": 0.4223538501158898, "percentage": 24.12, "elapsed_time": "3:10:13", "remaining_time": "9:58:30", "throughput": 840.31, "total_tokens": 9590920}
{"current_steps": 825, "total_steps": 3400, "loss": 0.8992, "lr": 9.019203948553422e-05, "epoch": 0.42492917847025496, "percentage": 24.26, "elapsed_time": "3:11:17", "remaining_time": "9:57:02", "throughput": 840.75, "total_tokens": 9649400}
{"current_steps": 830, "total_steps": 3400, "loss": 0.8991, "lr": 9.004692398260244e-05, "epoch": 0.42750450682462016, "percentage": 24.41, "elapsed_time": "3:12:20", "remaining_time": "9:55:35", "throughput": 841.17, "total_tokens": 9707888}
{"current_steps": 835, "total_steps": 3400, "loss": 0.8964, "lr": 8.9900861364012e-05, "epoch": 0.43007983517898535, "percentage": 24.56, "elapsed_time": "3:13:24", "remaining_time": "9:54:07", "throughput": 841.59, "total_tokens": 9766384}
{"current_steps": 840, "total_steps": 3400, "loss": 0.8723, "lr": 8.975385508416532e-05, "epoch": 0.4326551635333505, "percentage": 24.71, "elapsed_time": "3:14:28", "remaining_time": "9:52:41", "throughput": 841.99, "total_tokens": 9824896}
{"current_steps": 845, "total_steps": 3400, "loss": 0.874, "lr": 8.960590861978265e-05, "epoch": 0.4352304918877157, "percentage": 24.85, "elapsed_time": "3:15:32", "remaining_time": "9:51:15", "throughput": 842.39, "total_tokens": 9883408}
{"current_steps": 850, "total_steps": 3400, "loss": 0.9035, "lr": 8.945702546981969e-05, "epoch": 0.43780582024208087, "percentage": 25.0, "elapsed_time": "3:16:36", "remaining_time": "9:49:49", "throughput": 842.79, "total_tokens": 9941896}
{"current_steps": 850, "total_steps": 3400, "eval_loss": 0.8779178261756897, "epoch": 0.43780582024208087, "percentage": 25.0, "elapsed_time": "3:16:52", "remaining_time": "9:50:37", "throughput": 841.64, "total_tokens": 9941896}
{"current_steps": 855, "total_steps": 3400, "loss": 0.8516, "lr": 8.930720915538487e-05, "epoch": 0.44038114859644606, "percentage": 25.15, "elapsed_time": "3:18:00", "remaining_time": "9:49:23", "throughput": 841.75, "total_tokens": 10000336}
{"current_steps": 860, "total_steps": 3400, "loss": 0.9206, "lr": 8.915646321965614e-05, "epoch": 0.44295647695081125, "percentage": 25.29, "elapsed_time": "3:19:03", "remaining_time": "9:47:54", "throughput": 842.2, "total_tokens": 10058816}
{"current_steps": 865, "total_steps": 3400, "loss": 0.9028, "lr": 8.900479122779712e-05, "epoch": 0.4455318053051764, "percentage": 25.44, "elapsed_time": "3:20:06", "remaining_time": "9:46:26", "throughput": 842.66, "total_tokens": 10117320}
{"current_steps": 870, "total_steps": 3400, "loss": 0.8991, "lr": 8.885219676687277e-05, "epoch": 0.4481071336595416, "percentage": 25.59, "elapsed_time": "3:21:09", "remaining_time": "9:45:00", "throughput": 843.07, "total_tokens": 10175824}
{"current_steps": 875, "total_steps": 3400, "loss": 0.8934, "lr": 8.869868344576459e-05, "epoch": 0.45068246201390677, "percentage": 25.74, "elapsed_time": "3:22:12", "remaining_time": "9:43:32", "throughput": 843.51, "total_tokens": 10234288}
{"current_steps": 880, "total_steps": 3400, "loss": 0.8908, "lr": 8.854425489508532e-05, "epoch": 0.45325779036827196, "percentage": 25.88, "elapsed_time": "3:23:15", "remaining_time": "9:42:04", "throughput": 843.95, "total_tokens": 10292736}
{"current_steps": 885, "total_steps": 3400, "loss": 0.8988, "lr": 8.838891476709288e-05, "epoch": 0.45583311872263715, "percentage": 26.03, "elapsed_time": "3:24:18", "remaining_time": "9:40:36", "throughput": 844.4, "total_tokens": 10351224}
{"current_steps": 890, "total_steps": 3400, "loss": 0.8965, "lr": 8.823266673560426e-05, "epoch": 0.4584084470770023, "percentage": 26.18, "elapsed_time": "3:25:21", "remaining_time": "9:39:10", "throughput": 844.81, "total_tokens": 10409736}
{"current_steps": 895, "total_steps": 3400, "loss": 0.8989, "lr": 8.807551449590846e-05, "epoch": 0.4609837754313675, "percentage": 26.32, "elapsed_time": "3:26:25", "remaining_time": "9:37:44", "throughput": 845.22, "total_tokens": 10468240}
{"current_steps": 900, "total_steps": 3400, "loss": 0.8961, "lr": 8.791746176467907e-05, "epoch": 0.4635591037857327, "percentage": 26.47, "elapsed_time": "3:27:28", "remaining_time": "9:36:18", "throughput": 845.63, "total_tokens": 10526712}
{"current_steps": 900, "total_steps": 3400, "eval_loss": 0.891426146030426, "epoch": 0.4635591037857327, "percentage": 26.47, "elapsed_time": "3:27:44", "remaining_time": "9:37:03", "throughput": 844.54, "total_tokens": 10526712}
{"current_steps": 905, "total_steps": 3400, "loss": 0.8955, "lr": 8.775851227988656e-05, "epoch": 0.46613443214009787, "percentage": 26.62, "elapsed_time": "3:28:52", "remaining_time": "9:35:50", "throughput": 844.63, "total_tokens": 10585232}
{"current_steps": 910, "total_steps": 3400, "loss": 0.8951, "lr": 8.759866980070963e-05, "epoch": 0.46870976049446306, "percentage": 26.76, "elapsed_time": "3:29:55", "remaining_time": "9:34:23", "throughput": 845.06, "total_tokens": 10643728}
{"current_steps": 915, "total_steps": 3400, "loss": 0.8951, "lr": 8.743793810744654e-05, "epoch": 0.47128508884882825, "percentage": 26.91, "elapsed_time": "3:30:58", "remaining_time": "9:32:58", "throughput": 845.46, "total_tokens": 10702240}
{"current_steps": 920, "total_steps": 3400, "loss": 0.9066, "lr": 8.727632100142551e-05, "epoch": 0.4738604172031934, "percentage": 27.06, "elapsed_time": "3:32:01", "remaining_time": "9:31:32", "throughput": 845.87, "total_tokens": 10760656}
{"current_steps": 925, "total_steps": 3400, "loss": 0.8953, "lr": 8.711382230491493e-05, "epoch": 0.4764357455575586, "percentage": 27.21, "elapsed_time": "3:33:04", "remaining_time": "9:30:06", "throughput": 846.28, "total_tokens": 10819128}
{"current_steps": 930, "total_steps": 3400, "loss": 0.8961, "lr": 8.695044586103296e-05, "epoch": 0.47901107391192377, "percentage": 27.35, "elapsed_time": "3:34:06", "remaining_time": "9:28:40", "throughput": 846.71, "total_tokens": 10877600}
{"current_steps": 935, "total_steps": 3400, "loss": 0.8965, "lr": 8.678619553365659e-05, "epoch": 0.48158640226628896, "percentage": 27.5, "elapsed_time": "3:35:10", "remaining_time": "9:27:15", "throughput": 847.09, "total_tokens": 10936088}
{"current_steps": 940, "total_steps": 3400, "loss": 0.9018, "lr": 8.662107520733027e-05, "epoch": 0.48416173062065415, "percentage": 27.65, "elapsed_time": "3:36:12", "remaining_time": "9:25:50", "throughput": 847.5, "total_tokens": 10994560}
{"current_steps": 945, "total_steps": 3400, "loss": 0.8944, "lr": 8.64550887871741e-05, "epoch": 0.4867370589750193, "percentage": 27.79, "elapsed_time": "3:37:16", "remaining_time": "9:24:26", "throughput": 847.88, "total_tokens": 11053016}
{"current_steps": 950, "total_steps": 3400, "loss": 0.8852, "lr": 8.628824019879137e-05, "epoch": 0.4893123873293845, "percentage": 27.94, "elapsed_time": "3:38:18", "remaining_time": "9:23:01", "throughput": 848.29, "total_tokens": 11111520}
{"current_steps": 950, "total_steps": 3400, "eval_loss": 0.8915690183639526, "epoch": 0.4893123873293845, "percentage": 27.94, "elapsed_time": "3:38:35", "remaining_time": "9:23:43", "throughput": 847.23, "total_tokens": 11111520}
{"current_steps": 955, "total_steps": 3400, "loss": 0.9087, "lr": 8.612053338817581e-05, "epoch": 0.49188771568374967, "percentage": 28.09, "elapsed_time": "3:39:44", "remaining_time": "9:22:36", "throughput": 847.18, "total_tokens": 11170016}
{"current_steps": 960, "total_steps": 3400, "loss": 0.8915, "lr": 8.595197232161824e-05, "epoch": 0.49446304403811486, "percentage": 28.24, "elapsed_time": "3:40:48", "remaining_time": "9:21:13", "throughput": 847.52, "total_tokens": 11228496}
{"current_steps": 965, "total_steps": 3400, "loss": 0.8836, "lr": 8.578256098561275e-05, "epoch": 0.49703837239248005, "percentage": 28.38, "elapsed_time": "3:41:51", "remaining_time": "9:19:49", "throughput": 847.89, "total_tokens": 11286928}
{"current_steps": 970, "total_steps": 3400, "loss": 0.9116, "lr": 8.561230338676239e-05, "epoch": 0.49961370074684525, "percentage": 28.53, "elapsed_time": "3:42:55", "remaining_time": "9:18:27", "throughput": 848.23, "total_tokens": 11345400}
{"current_steps": 975, "total_steps": 3400, "loss": 0.8809, "lr": 8.544120355168451e-05, "epoch": 0.5021890291012104, "percentage": 28.68, "elapsed_time": "3:43:58", "remaining_time": "9:17:03", "throughput": 848.61, "total_tokens": 11403912}
{"current_steps": 980, "total_steps": 3400, "loss": 0.8895, "lr": 8.526926552691544e-05, "epoch": 0.5047643574555756, "percentage": 28.82, "elapsed_time": "3:45:01", "remaining_time": "9:15:40", "throughput": 848.96, "total_tokens": 11462344}
{"current_steps": 985, "total_steps": 3400, "loss": 0.8674, "lr": 8.509649337881483e-05, "epoch": 0.5073396858099408, "percentage": 28.97, "elapsed_time": "3:46:04", "remaining_time": "9:14:17", "throughput": 849.32, "total_tokens": 11520808}
{"current_steps": 990, "total_steps": 3400, "loss": 0.8832, "lr": 8.492289119346943e-05, "epoch": 0.509915014164306, "percentage": 29.12, "elapsed_time": "3:47:08", "remaining_time": "9:12:56", "throughput": 849.64, "total_tokens": 11579248}
{"current_steps": 995, "total_steps": 3400, "loss": 0.8581, "lr": 8.474846307659658e-05, "epoch": 0.5124903425186711, "percentage": 29.26, "elapsed_time": "3:48:11", "remaining_time": "9:11:33", "throughput": 850.0, "total_tokens": 11637712}
{"current_steps": 1000, "total_steps": 3400, "loss": 0.8635, "lr": 8.457321315344694e-05, "epoch": 0.5150656708730363, "percentage": 29.41, "elapsed_time": "3:49:14", "remaining_time": "9:10:11", "throughput": 850.33, "total_tokens": 11696200}
{"current_steps": 1000, "total_steps": 3400, "eval_loss": 0.860200047492981, "epoch": 0.5150656708730363, "percentage": 29.41, "elapsed_time": "3:49:31", "remaining_time": "9:10:50", "throughput": 849.33, "total_tokens": 11696200}
{"current_steps": 1005, "total_steps": 3400, "loss": 0.8499, "lr": 8.439714556870704e-05, "epoch": 0.5176409992274015, "percentage": 29.56, "elapsed_time": "3:50:39", "remaining_time": "9:09:40", "throughput": 849.38, "total_tokens": 11754720}
{"current_steps": 1010, "total_steps": 3400, "loss": 0.8556, "lr": 8.422026448640124e-05, "epoch": 0.5202163275817667, "percentage": 29.71, "elapsed_time": "3:51:43", "remaining_time": "9:08:19", "throughput": 849.68, "total_tokens": 11813216}
{"current_steps": 1015, "total_steps": 3400, "loss": 0.8533, "lr": 8.40425740897932e-05, "epoch": 0.5227916559361319, "percentage": 29.85, "elapsed_time": "3:52:46", "remaining_time": "9:06:57", "throughput": 850.02, "total_tokens": 11871712}
{"current_steps": 1020, "total_steps": 3400, "loss": 0.8921, "lr": 8.386407858128706e-05, "epoch": 0.525366984290497, "percentage": 30.0, "elapsed_time": "3:53:49", "remaining_time": "9:05:35", "throughput": 850.36, "total_tokens": 11930200}
{"current_steps": 1025, "total_steps": 3400, "loss": 0.8815, "lr": 8.368478218232787e-05, "epoch": 0.5279423126448622, "percentage": 30.15, "elapsed_time": "3:54:52", "remaining_time": "9:04:14", "throughput": 850.69, "total_tokens": 11988704}
{"current_steps": 1030, "total_steps": 3400, "loss": 0.854, "lr": 8.350468913330192e-05, "epoch": 0.5305176409992274, "percentage": 30.29, "elapsed_time": "3:55:55", "remaining_time": "9:02:52", "throughput": 851.03, "total_tokens": 12047176}
{"current_steps": 1035, "total_steps": 3400, "loss": 0.8642, "lr": 8.33238036934364e-05, "epoch": 0.5330929693535926, "percentage": 30.44, "elapsed_time": "3:56:59", "remaining_time": "9:01:31", "throughput": 851.36, "total_tokens": 12105680}
{"current_steps": 1040, "total_steps": 3400, "loss": 0.8072, "lr": 8.31421301406986e-05, "epoch": 0.5356682977079578, "percentage": 30.59, "elapsed_time": "3:58:02", "remaining_time": "9:00:09", "throughput": 851.71, "total_tokens": 12164208}
{"current_steps": 1045, "total_steps": 3400, "loss": 0.8532, "lr": 8.29596727716949e-05, "epoch": 0.5382436260623229, "percentage": 30.74, "elapsed_time": "3:59:05", "remaining_time": "8:58:48", "throughput": 852.04, "total_tokens": 12222672}
{"current_steps": 1050, "total_steps": 3400, "loss": 0.8844, "lr": 8.277643590156894e-05, "epoch": 0.5408189544166881, "percentage": 30.88, "elapsed_time": "4:00:08", "remaining_time": "8:57:28", "throughput": 852.34, "total_tokens": 12281072}
{"current_steps": 1050, "total_steps": 3400, "eval_loss": 0.8446129560470581, "epoch": 0.5408189544166881, "percentage": 30.88, "elapsed_time": "4:00:24", "remaining_time": "8:58:03", "throughput": 851.39, "total_tokens": 12281072}
{"current_steps": 1055, "total_steps": 3400, "loss": 0.8602, "lr": 8.259242386389973e-05, "epoch": 0.5433942827710533, "percentage": 31.03, "elapsed_time": "4:01:33", "remaining_time": "8:56:54", "throughput": 851.41, "total_tokens": 12339544}
{"current_steps": 1060, "total_steps": 3400, "loss": 0.8615, "lr": 8.240764101059912e-05, "epoch": 0.5459696111254185, "percentage": 31.18, "elapsed_time": "4:02:36", "remaining_time": "8:55:34", "throughput": 851.71, "total_tokens": 12397992}
{"current_steps": 1065, "total_steps": 3400, "loss": 0.8732, "lr": 8.222209171180883e-05, "epoch": 0.5485449394797837, "percentage": 31.32, "elapsed_time": "4:03:39", "remaining_time": "8:54:13", "throughput": 852.04, "total_tokens": 12456480}
{"current_steps": 1070, "total_steps": 3400, "loss": 0.8691, "lr": 8.203578035579715e-05, "epoch": 0.5511202678341488, "percentage": 31.47, "elapsed_time": "4:04:42", "remaining_time": "8:52:52", "throughput": 852.36, "total_tokens": 12515000}
{"current_steps": 1075, "total_steps": 3400, "loss": 0.8544, "lr": 8.184871134885513e-05, "epoch": 0.553695596188514, "percentage": 31.62, "elapsed_time": "4:05:46", "remaining_time": "8:51:33", "throughput": 852.65, "total_tokens": 12573504}
{"current_steps": 1080, "total_steps": 3400, "loss": 0.8501, "lr": 8.166088911519235e-05, "epoch": 0.5562709245428792, "percentage": 31.76, "elapsed_time": "4:06:49", "remaining_time": "8:50:13", "throughput": 852.96, "total_tokens": 12632008}
{"current_steps": 1085, "total_steps": 3400, "loss": 0.8646, "lr": 8.147231809683236e-05, "epoch": 0.5588462528972444, "percentage": 31.91, "elapsed_time": "4:07:52", "remaining_time": "8:48:52", "throughput": 853.28, "total_tokens": 12690520}
{"current_steps": 1090, "total_steps": 3400, "loss": 0.8327, "lr": 8.128300275350756e-05, "epoch": 0.5614215812516096, "percentage": 32.06, "elapsed_time": "4:08:55", "remaining_time": "8:47:32", "throughput": 853.6, "total_tokens": 12749032}
{"current_steps": 1095, "total_steps": 3400, "loss": 0.8218, "lr": 8.109294756255375e-05, "epoch": 0.5639969096059748, "percentage": 32.21, "elapsed_time": "4:09:58", "remaining_time": "8:46:12", "throughput": 853.9, "total_tokens": 12807504}
{"current_steps": 1100, "total_steps": 3400, "loss": 0.8427, "lr": 8.090215701880419e-05, "epoch": 0.56657223796034, "percentage": 32.35, "elapsed_time": "4:11:01", "remaining_time": "8:44:52", "throughput": 854.21, "total_tokens": 12865992}
{"current_steps": 1100, "total_steps": 3400, "eval_loss": 0.7743102312088013, "epoch": 0.56657223796034, "percentage": 32.35, "elapsed_time": "4:11:17", "remaining_time": "8:45:26", "throughput": 853.3, "total_tokens": 12865992}