ben81828's picture
Training in progress, step 300
4da001b verified
raw
history blame
15.9 kB
{"current_steps": 5, "total_steps": 3400, "loss": 1.0954, "lr": 2.9411764705882355e-06, "epoch": 0.0025753283543651817, "percentage": 0.15, "elapsed_time": "0:02:44", "remaining_time": "1 day, 6:59:16", "throughput": 303.84, "total_tokens": 49920}
{"current_steps": 10, "total_steps": 3400, "loss": 0.9793, "lr": 5.882352941176471e-06, "epoch": 0.0051506567087303634, "percentage": 0.29, "elapsed_time": "0:04:11", "remaining_time": "23:42:34", "throughput": 396.53, "total_tokens": 99840}
{"current_steps": 15, "total_steps": 3400, "loss": 1.0964, "lr": 8.823529411764707e-06, "epoch": 0.007725985063095545, "percentage": 0.44, "elapsed_time": "0:05:38", "remaining_time": "21:11:48", "throughput": 442.89, "total_tokens": 149760}
{"current_steps": 20, "total_steps": 3400, "loss": 0.7079, "lr": 1.1764705882352942e-05, "epoch": 0.010301313417460727, "percentage": 0.59, "elapsed_time": "0:07:04", "remaining_time": "19:56:13", "throughput": 470.17, "total_tokens": 199680}
{"current_steps": 25, "total_steps": 3400, "loss": 0.4213, "lr": 1.4705882352941177e-05, "epoch": 0.012876641771825908, "percentage": 0.74, "elapsed_time": "0:08:30", "remaining_time": "19:08:45", "throughput": 488.88, "total_tokens": 249600}
{"current_steps": 30, "total_steps": 3400, "loss": 0.3359, "lr": 1.7647058823529414e-05, "epoch": 0.01545197012619109, "percentage": 0.88, "elapsed_time": "0:09:58", "remaining_time": "18:39:49", "throughput": 500.76, "total_tokens": 299520}
{"current_steps": 35, "total_steps": 3400, "loss": 0.3463, "lr": 2.058823529411765e-05, "epoch": 0.018027298480556272, "percentage": 1.03, "elapsed_time": "0:11:24", "remaining_time": "18:16:36", "throughput": 510.61, "total_tokens": 349440}
{"current_steps": 40, "total_steps": 3400, "loss": 0.28, "lr": 2.3529411764705884e-05, "epoch": 0.020602626834921454, "percentage": 1.18, "elapsed_time": "0:12:51", "remaining_time": "18:00:30", "throughput": 517.44, "total_tokens": 399360}
{"current_steps": 45, "total_steps": 3400, "loss": 0.3442, "lr": 2.647058823529412e-05, "epoch": 0.023177955189286635, "percentage": 1.32, "elapsed_time": "0:14:18", "remaining_time": "17:46:12", "throughput": 523.61, "total_tokens": 449280}
{"current_steps": 50, "total_steps": 3400, "loss": 0.3441, "lr": 2.9411764705882354e-05, "epoch": 0.025753283543651816, "percentage": 1.47, "elapsed_time": "0:15:45", "remaining_time": "17:36:13", "throughput": 527.77, "total_tokens": 499200}
{"current_steps": 50, "total_steps": 3400, "eval_loss": 0.33834776282310486, "epoch": 0.025753283543651816, "percentage": 1.47, "elapsed_time": "0:16:33", "remaining_time": "18:29:12", "throughput": 502.56, "total_tokens": 499200}
{"current_steps": 55, "total_steps": 3400, "loss": 0.3182, "lr": 3.235294117647059e-05, "epoch": 0.028328611898016998, "percentage": 1.62, "elapsed_time": "0:18:08", "remaining_time": "18:22:58", "throughput": 504.64, "total_tokens": 549120}
{"current_steps": 60, "total_steps": 3400, "loss": 0.3294, "lr": 3.529411764705883e-05, "epoch": 0.03090394025238218, "percentage": 1.76, "elapsed_time": "0:19:36", "remaining_time": "18:11:07", "throughput": 509.36, "total_tokens": 599040}
{"current_steps": 65, "total_steps": 3400, "loss": 0.3097, "lr": 3.8235294117647055e-05, "epoch": 0.03347926860674736, "percentage": 1.91, "elapsed_time": "0:21:02", "remaining_time": "17:59:13", "throughput": 514.21, "total_tokens": 648960}
{"current_steps": 70, "total_steps": 3400, "loss": 0.3008, "lr": 4.11764705882353e-05, "epoch": 0.036054596961112545, "percentage": 2.06, "elapsed_time": "0:22:28", "remaining_time": "17:49:24", "throughput": 518.15, "total_tokens": 698880}
{"current_steps": 75, "total_steps": 3400, "loss": 0.3278, "lr": 4.411764705882353e-05, "epoch": 0.03862992531547772, "percentage": 2.21, "elapsed_time": "0:23:55", "remaining_time": "17:40:18", "throughput": 521.81, "total_tokens": 748800}
{"current_steps": 80, "total_steps": 3400, "loss": 0.3074, "lr": 4.705882352941177e-05, "epoch": 0.04120525366984291, "percentage": 2.35, "elapsed_time": "0:25:22", "remaining_time": "17:32:47", "throughput": 524.74, "total_tokens": 798720}
{"current_steps": 85, "total_steps": 3400, "loss": 0.3423, "lr": 5e-05, "epoch": 0.043780582024208085, "percentage": 2.5, "elapsed_time": "0:26:47", "remaining_time": "17:25:06", "throughput": 527.8, "total_tokens": 848640}
{"current_steps": 90, "total_steps": 3400, "loss": 0.3008, "lr": 5.294117647058824e-05, "epoch": 0.04635591037857327, "percentage": 2.65, "elapsed_time": "0:28:13", "remaining_time": "17:17:59", "throughput": 530.62, "total_tokens": 898560}
{"current_steps": 95, "total_steps": 3400, "loss": 0.2815, "lr": 5.588235294117647e-05, "epoch": 0.04893123873293845, "percentage": 2.79, "elapsed_time": "0:29:38", "remaining_time": "17:11:24", "throughput": 533.21, "total_tokens": 948480}
{"current_steps": 100, "total_steps": 3400, "loss": 0.2274, "lr": 5.882352941176471e-05, "epoch": 0.05150656708730363, "percentage": 2.94, "elapsed_time": "0:31:05", "remaining_time": "17:06:03", "throughput": 535.17, "total_tokens": 998400}
{"current_steps": 100, "total_steps": 3400, "eval_loss": 0.18663176894187927, "epoch": 0.05150656708730363, "percentage": 2.94, "elapsed_time": "0:31:24", "remaining_time": "17:16:28", "throughput": 529.8, "total_tokens": 998400}
{"current_steps": 105, "total_steps": 3400, "loss": 0.168, "lr": 6.176470588235295e-05, "epoch": 0.05408189544166881, "percentage": 3.09, "elapsed_time": "0:32:56", "remaining_time": "17:13:45", "throughput": 530.38, "total_tokens": 1048320}
{"current_steps": 110, "total_steps": 3400, "loss": 0.0709, "lr": 6.470588235294118e-05, "epoch": 0.056657223796033995, "percentage": 3.24, "elapsed_time": "0:34:22", "remaining_time": "17:08:05", "throughput": 532.5, "total_tokens": 1098240}
{"current_steps": 115, "total_steps": 3400, "loss": 0.1609, "lr": 6.764705882352942e-05, "epoch": 0.05923255215039917, "percentage": 3.38, "elapsed_time": "0:35:49", "remaining_time": "17:03:19", "throughput": 534.16, "total_tokens": 1148160}
{"current_steps": 120, "total_steps": 3400, "loss": 0.0354, "lr": 7.058823529411765e-05, "epoch": 0.06180788050476436, "percentage": 3.53, "elapsed_time": "0:37:15", "remaining_time": "16:58:19", "throughput": 535.97, "total_tokens": 1198080}
{"current_steps": 125, "total_steps": 3400, "loss": 0.1133, "lr": 7.352941176470589e-05, "epoch": 0.06438320885912954, "percentage": 3.68, "elapsed_time": "0:38:42", "remaining_time": "16:54:05", "throughput": 537.39, "total_tokens": 1248000}
{"current_steps": 130, "total_steps": 3400, "loss": 0.0867, "lr": 7.647058823529411e-05, "epoch": 0.06695853721349472, "percentage": 3.82, "elapsed_time": "0:40:08", "remaining_time": "16:49:31", "throughput": 539.0, "total_tokens": 1297920}
{"current_steps": 135, "total_steps": 3400, "loss": 0.1352, "lr": 7.941176470588235e-05, "epoch": 0.0695338655678599, "percentage": 3.97, "elapsed_time": "0:41:35", "remaining_time": "16:45:43", "throughput": 540.2, "total_tokens": 1347840}
{"current_steps": 140, "total_steps": 3400, "loss": 0.101, "lr": 8.23529411764706e-05, "epoch": 0.07210919392222509, "percentage": 4.12, "elapsed_time": "0:43:00", "remaining_time": "16:41:36", "throughput": 541.59, "total_tokens": 1397760}
{"current_steps": 145, "total_steps": 3400, "loss": 0.0334, "lr": 8.529411764705883e-05, "epoch": 0.07468452227659027, "percentage": 4.26, "elapsed_time": "0:44:28", "remaining_time": "16:38:14", "throughput": 542.59, "total_tokens": 1447680}
{"current_steps": 150, "total_steps": 3400, "loss": 0.0667, "lr": 8.823529411764706e-05, "epoch": 0.07725985063095545, "percentage": 4.41, "elapsed_time": "0:45:53", "remaining_time": "16:34:25", "throughput": 543.83, "total_tokens": 1497600}
{"current_steps": 150, "total_steps": 3400, "eval_loss": 0.09665286540985107, "epoch": 0.07725985063095545, "percentage": 4.41, "elapsed_time": "0:46:13", "remaining_time": "16:41:23", "throughput": 540.05, "total_tokens": 1497600}
{"current_steps": 155, "total_steps": 3400, "loss": 0.06, "lr": 9.11764705882353e-05, "epoch": 0.07983517898532062, "percentage": 4.56, "elapsed_time": "0:47:46", "remaining_time": "16:40:10", "throughput": 539.88, "total_tokens": 1547520}
{"current_steps": 160, "total_steps": 3400, "loss": 0.0265, "lr": 9.411764705882353e-05, "epoch": 0.08241050733968582, "percentage": 4.71, "elapsed_time": "0:49:12", "remaining_time": "16:36:21", "throughput": 541.11, "total_tokens": 1597440}
{"current_steps": 165, "total_steps": 3400, "loss": 0.1743, "lr": 9.705882352941177e-05, "epoch": 0.08498583569405099, "percentage": 4.85, "elapsed_time": "0:50:39", "remaining_time": "16:33:07", "throughput": 542.03, "total_tokens": 1647360}
{"current_steps": 170, "total_steps": 3400, "loss": 0.051, "lr": 0.0001, "epoch": 0.08756116404841617, "percentage": 5.0, "elapsed_time": "0:52:04", "remaining_time": "16:29:34", "throughput": 543.14, "total_tokens": 1697280}
{"current_steps": 175, "total_steps": 3400, "loss": 0.0584, "lr": 9.999940874631277e-05, "epoch": 0.09013649240278135, "percentage": 5.15, "elapsed_time": "0:53:31", "remaining_time": "16:26:31", "throughput": 543.97, "total_tokens": 1747200}
{"current_steps": 180, "total_steps": 3400, "loss": 0.0704, "lr": 9.999763499923432e-05, "epoch": 0.09271182075714654, "percentage": 5.29, "elapsed_time": "0:54:57", "remaining_time": "16:23:09", "throughput": 544.99, "total_tokens": 1797120}
{"current_steps": 185, "total_steps": 3400, "loss": 0.0278, "lr": 9.999467880071402e-05, "epoch": 0.09528714911151172, "percentage": 5.44, "elapsed_time": "0:56:23", "remaining_time": "16:20:03", "throughput": 545.86, "total_tokens": 1847040}
{"current_steps": 190, "total_steps": 3400, "loss": 0.0862, "lr": 9.999054022066641e-05, "epoch": 0.0978624774658769, "percentage": 5.59, "elapsed_time": "0:57:49", "remaining_time": "16:16:57", "throughput": 546.74, "total_tokens": 1896960}
{"current_steps": 195, "total_steps": 3400, "loss": 0.0565, "lr": 9.998521935696953e-05, "epoch": 0.10043780582024209, "percentage": 5.74, "elapsed_time": "0:59:16", "remaining_time": "16:14:07", "throughput": 547.48, "total_tokens": 1946880}
{"current_steps": 200, "total_steps": 3400, "loss": 0.0459, "lr": 9.997871633546257e-05, "epoch": 0.10301313417460727, "percentage": 5.88, "elapsed_time": "1:00:42", "remaining_time": "16:11:15", "throughput": 548.24, "total_tokens": 1996800}
{"current_steps": 200, "total_steps": 3400, "eval_loss": 0.09957947582006454, "epoch": 0.10301313417460727, "percentage": 5.88, "elapsed_time": "1:01:01", "remaining_time": "16:16:23", "throughput": 545.35, "total_tokens": 1996800}
{"current_steps": 205, "total_steps": 3400, "loss": 0.0539, "lr": 9.997103130994296e-05, "epoch": 0.10558846252897244, "percentage": 6.03, "elapsed_time": "1:02:33", "remaining_time": "16:14:54", "throughput": 545.33, "total_tokens": 2046720}
{"current_steps": 210, "total_steps": 3400, "loss": 0.0654, "lr": 9.996216446216267e-05, "epoch": 0.10816379088333762, "percentage": 6.18, "elapsed_time": "1:03:59", "remaining_time": "16:12:04", "throughput": 546.06, "total_tokens": 2096640}
{"current_steps": 215, "total_steps": 3400, "loss": 0.0316, "lr": 9.995211600182397e-05, "epoch": 0.11073911923770281, "percentage": 6.32, "elapsed_time": "1:05:25", "remaining_time": "16:09:07", "throughput": 546.87, "total_tokens": 2146560}
{"current_steps": 220, "total_steps": 3400, "loss": 0.1169, "lr": 9.994088616657444e-05, "epoch": 0.11331444759206799, "percentage": 6.47, "elapsed_time": "1:06:52", "remaining_time": "16:06:35", "throughput": 547.44, "total_tokens": 2196480}
{"current_steps": 225, "total_steps": 3400, "loss": 0.0382, "lr": 9.992847522200133e-05, "epoch": 0.11588977594643317, "percentage": 6.62, "elapsed_time": "1:08:18", "remaining_time": "16:03:50", "throughput": 548.14, "total_tokens": 2246400}
{"current_steps": 230, "total_steps": 3400, "loss": 0.0406, "lr": 9.99148834616253e-05, "epoch": 0.11846510430079835, "percentage": 6.76, "elapsed_time": "1:09:45", "remaining_time": "16:01:25", "throughput": 548.65, "total_tokens": 2296320}
{"current_steps": 235, "total_steps": 3400, "loss": 0.0182, "lr": 9.990011120689351e-05, "epoch": 0.12104043265516354, "percentage": 6.91, "elapsed_time": "1:11:10", "remaining_time": "15:58:33", "throughput": 549.42, "total_tokens": 2346240}
{"current_steps": 240, "total_steps": 3400, "loss": 0.0881, "lr": 9.988415880717194e-05, "epoch": 0.12361576100952872, "percentage": 7.06, "elapsed_time": "1:12:36", "remaining_time": "15:56:06", "throughput": 549.96, "total_tokens": 2396160}
{"current_steps": 245, "total_steps": 3400, "loss": 0.0565, "lr": 9.986702663973722e-05, "epoch": 0.1261910893638939, "percentage": 7.21, "elapsed_time": "1:14:02", "remaining_time": "15:53:33", "throughput": 550.56, "total_tokens": 2446080}
{"current_steps": 250, "total_steps": 3400, "loss": 0.0805, "lr": 9.98487151097676e-05, "epoch": 0.12876641771825909, "percentage": 7.35, "elapsed_time": "1:15:30", "remaining_time": "15:51:18", "throughput": 550.98, "total_tokens": 2496000}
{"current_steps": 250, "total_steps": 3400, "eval_loss": 0.055875860154628754, "epoch": 0.12876641771825909, "percentage": 7.35, "elapsed_time": "1:15:49", "remaining_time": "15:55:24", "throughput": 548.62, "total_tokens": 2496000}
{"current_steps": 255, "total_steps": 3400, "loss": 0.0235, "lr": 9.98292246503335e-05, "epoch": 0.13134174607262425, "percentage": 7.5, "elapsed_time": "1:17:21", "remaining_time": "15:54:02", "throughput": 548.54, "total_tokens": 2545920}
{"current_steps": 260, "total_steps": 3400, "loss": 0.0137, "lr": 9.980855572238714e-05, "epoch": 0.13391707442698944, "percentage": 7.65, "elapsed_time": "1:18:48", "remaining_time": "15:51:46", "throughput": 548.97, "total_tokens": 2595840}
{"current_steps": 265, "total_steps": 3400, "loss": 0.0605, "lr": 9.978670881475172e-05, "epoch": 0.13649240278135463, "percentage": 7.79, "elapsed_time": "1:20:15", "remaining_time": "15:49:23", "throughput": 549.47, "total_tokens": 2645760}
{"current_steps": 270, "total_steps": 3400, "loss": 0.0585, "lr": 9.976368444410985e-05, "epoch": 0.1390677311357198, "percentage": 7.94, "elapsed_time": "1:21:42", "remaining_time": "15:47:09", "throughput": 549.89, "total_tokens": 2695680}
{"current_steps": 275, "total_steps": 3400, "loss": 0.0096, "lr": 9.973948315499126e-05, "epoch": 0.141643059490085, "percentage": 8.09, "elapsed_time": "1:23:08", "remaining_time": "15:44:48", "throughput": 550.37, "total_tokens": 2745600}
{"current_steps": 280, "total_steps": 3400, "loss": 0.1383, "lr": 9.971410551976002e-05, "epoch": 0.14421838784445018, "percentage": 8.24, "elapsed_time": "1:24:35", "remaining_time": "15:42:37", "throughput": 550.77, "total_tokens": 2795520}
{"current_steps": 285, "total_steps": 3400, "loss": 0.049, "lr": 9.968755213860094e-05, "epoch": 0.14679371619881534, "percentage": 8.38, "elapsed_time": "1:26:02", "remaining_time": "15:40:23", "throughput": 551.19, "total_tokens": 2845440}
{"current_steps": 290, "total_steps": 3400, "loss": 0.0336, "lr": 9.96598236395054e-05, "epoch": 0.14936904455318054, "percentage": 8.53, "elapsed_time": "1:27:29", "remaining_time": "15:38:15", "throughput": 551.56, "total_tokens": 2895360}
{"current_steps": 295, "total_steps": 3400, "loss": 0.019, "lr": 9.96309206782565e-05, "epoch": 0.1519443729075457, "percentage": 8.68, "elapsed_time": "1:28:55", "remaining_time": "15:36:01", "throughput": 551.98, "total_tokens": 2945280}
{"current_steps": 300, "total_steps": 3400, "loss": 0.0381, "lr": 9.960084393841355e-05, "epoch": 0.1545197012619109, "percentage": 8.82, "elapsed_time": "1:30:22", "remaining_time": "15:33:51", "throughput": 552.37, "total_tokens": 2995200}
{"current_steps": 300, "total_steps": 3400, "eval_loss": 0.030908752232789993, "epoch": 0.1545197012619109, "percentage": 8.82, "elapsed_time": "1:30:42", "remaining_time": "15:37:17", "throughput": 550.35, "total_tokens": 2995200}