ben81828's picture
Training in progress, step 200
bbd8539 verified
raw
history blame
10.6 kB
{"current_steps": 5, "total_steps": 3400, "loss": 1.0954, "lr": 2.9411764705882355e-06, "epoch": 0.0025753283543651817, "percentage": 0.15, "elapsed_time": "0:02:44", "remaining_time": "1 day, 6:59:16", "throughput": 303.84, "total_tokens": 49920}
{"current_steps": 10, "total_steps": 3400, "loss": 0.9793, "lr": 5.882352941176471e-06, "epoch": 0.0051506567087303634, "percentage": 0.29, "elapsed_time": "0:04:11", "remaining_time": "23:42:34", "throughput": 396.53, "total_tokens": 99840}
{"current_steps": 15, "total_steps": 3400, "loss": 1.0964, "lr": 8.823529411764707e-06, "epoch": 0.007725985063095545, "percentage": 0.44, "elapsed_time": "0:05:38", "remaining_time": "21:11:48", "throughput": 442.89, "total_tokens": 149760}
{"current_steps": 20, "total_steps": 3400, "loss": 0.7079, "lr": 1.1764705882352942e-05, "epoch": 0.010301313417460727, "percentage": 0.59, "elapsed_time": "0:07:04", "remaining_time": "19:56:13", "throughput": 470.17, "total_tokens": 199680}
{"current_steps": 25, "total_steps": 3400, "loss": 0.4213, "lr": 1.4705882352941177e-05, "epoch": 0.012876641771825908, "percentage": 0.74, "elapsed_time": "0:08:30", "remaining_time": "19:08:45", "throughput": 488.88, "total_tokens": 249600}
{"current_steps": 30, "total_steps": 3400, "loss": 0.3359, "lr": 1.7647058823529414e-05, "epoch": 0.01545197012619109, "percentage": 0.88, "elapsed_time": "0:09:58", "remaining_time": "18:39:49", "throughput": 500.76, "total_tokens": 299520}
{"current_steps": 35, "total_steps": 3400, "loss": 0.3463, "lr": 2.058823529411765e-05, "epoch": 0.018027298480556272, "percentage": 1.03, "elapsed_time": "0:11:24", "remaining_time": "18:16:36", "throughput": 510.61, "total_tokens": 349440}
{"current_steps": 40, "total_steps": 3400, "loss": 0.28, "lr": 2.3529411764705884e-05, "epoch": 0.020602626834921454, "percentage": 1.18, "elapsed_time": "0:12:51", "remaining_time": "18:00:30", "throughput": 517.44, "total_tokens": 399360}
{"current_steps": 45, "total_steps": 3400, "loss": 0.3442, "lr": 2.647058823529412e-05, "epoch": 0.023177955189286635, "percentage": 1.32, "elapsed_time": "0:14:18", "remaining_time": "17:46:12", "throughput": 523.61, "total_tokens": 449280}
{"current_steps": 50, "total_steps": 3400, "loss": 0.3441, "lr": 2.9411764705882354e-05, "epoch": 0.025753283543651816, "percentage": 1.47, "elapsed_time": "0:15:45", "remaining_time": "17:36:13", "throughput": 527.77, "total_tokens": 499200}
{"current_steps": 50, "total_steps": 3400, "eval_loss": 0.33834776282310486, "epoch": 0.025753283543651816, "percentage": 1.47, "elapsed_time": "0:16:33", "remaining_time": "18:29:12", "throughput": 502.56, "total_tokens": 499200}
{"current_steps": 55, "total_steps": 3400, "loss": 0.3182, "lr": 3.235294117647059e-05, "epoch": 0.028328611898016998, "percentage": 1.62, "elapsed_time": "0:18:08", "remaining_time": "18:22:58", "throughput": 504.64, "total_tokens": 549120}
{"current_steps": 60, "total_steps": 3400, "loss": 0.3294, "lr": 3.529411764705883e-05, "epoch": 0.03090394025238218, "percentage": 1.76, "elapsed_time": "0:19:36", "remaining_time": "18:11:07", "throughput": 509.36, "total_tokens": 599040}
{"current_steps": 65, "total_steps": 3400, "loss": 0.3097, "lr": 3.8235294117647055e-05, "epoch": 0.03347926860674736, "percentage": 1.91, "elapsed_time": "0:21:02", "remaining_time": "17:59:13", "throughput": 514.21, "total_tokens": 648960}
{"current_steps": 70, "total_steps": 3400, "loss": 0.3008, "lr": 4.11764705882353e-05, "epoch": 0.036054596961112545, "percentage": 2.06, "elapsed_time": "0:22:28", "remaining_time": "17:49:24", "throughput": 518.15, "total_tokens": 698880}
{"current_steps": 75, "total_steps": 3400, "loss": 0.3278, "lr": 4.411764705882353e-05, "epoch": 0.03862992531547772, "percentage": 2.21, "elapsed_time": "0:23:55", "remaining_time": "17:40:18", "throughput": 521.81, "total_tokens": 748800}
{"current_steps": 80, "total_steps": 3400, "loss": 0.3074, "lr": 4.705882352941177e-05, "epoch": 0.04120525366984291, "percentage": 2.35, "elapsed_time": "0:25:22", "remaining_time": "17:32:47", "throughput": 524.74, "total_tokens": 798720}
{"current_steps": 85, "total_steps": 3400, "loss": 0.3423, "lr": 5e-05, "epoch": 0.043780582024208085, "percentage": 2.5, "elapsed_time": "0:26:47", "remaining_time": "17:25:06", "throughput": 527.8, "total_tokens": 848640}
{"current_steps": 90, "total_steps": 3400, "loss": 0.3008, "lr": 5.294117647058824e-05, "epoch": 0.04635591037857327, "percentage": 2.65, "elapsed_time": "0:28:13", "remaining_time": "17:17:59", "throughput": 530.62, "total_tokens": 898560}
{"current_steps": 95, "total_steps": 3400, "loss": 0.2815, "lr": 5.588235294117647e-05, "epoch": 0.04893123873293845, "percentage": 2.79, "elapsed_time": "0:29:38", "remaining_time": "17:11:24", "throughput": 533.21, "total_tokens": 948480}
{"current_steps": 100, "total_steps": 3400, "loss": 0.2274, "lr": 5.882352941176471e-05, "epoch": 0.05150656708730363, "percentage": 2.94, "elapsed_time": "0:31:05", "remaining_time": "17:06:03", "throughput": 535.17, "total_tokens": 998400}
{"current_steps": 100, "total_steps": 3400, "eval_loss": 0.18663176894187927, "epoch": 0.05150656708730363, "percentage": 2.94, "elapsed_time": "0:31:24", "remaining_time": "17:16:28", "throughput": 529.8, "total_tokens": 998400}
{"current_steps": 105, "total_steps": 3400, "loss": 0.168, "lr": 6.176470588235295e-05, "epoch": 0.05408189544166881, "percentage": 3.09, "elapsed_time": "0:32:56", "remaining_time": "17:13:45", "throughput": 530.38, "total_tokens": 1048320}
{"current_steps": 110, "total_steps": 3400, "loss": 0.0709, "lr": 6.470588235294118e-05, "epoch": 0.056657223796033995, "percentage": 3.24, "elapsed_time": "0:34:22", "remaining_time": "17:08:05", "throughput": 532.5, "total_tokens": 1098240}
{"current_steps": 115, "total_steps": 3400, "loss": 0.1609, "lr": 6.764705882352942e-05, "epoch": 0.05923255215039917, "percentage": 3.38, "elapsed_time": "0:35:49", "remaining_time": "17:03:19", "throughput": 534.16, "total_tokens": 1148160}
{"current_steps": 120, "total_steps": 3400, "loss": 0.0354, "lr": 7.058823529411765e-05, "epoch": 0.06180788050476436, "percentage": 3.53, "elapsed_time": "0:37:15", "remaining_time": "16:58:19", "throughput": 535.97, "total_tokens": 1198080}
{"current_steps": 125, "total_steps": 3400, "loss": 0.1133, "lr": 7.352941176470589e-05, "epoch": 0.06438320885912954, "percentage": 3.68, "elapsed_time": "0:38:42", "remaining_time": "16:54:05", "throughput": 537.39, "total_tokens": 1248000}
{"current_steps": 130, "total_steps": 3400, "loss": 0.0867, "lr": 7.647058823529411e-05, "epoch": 0.06695853721349472, "percentage": 3.82, "elapsed_time": "0:40:08", "remaining_time": "16:49:31", "throughput": 539.0, "total_tokens": 1297920}
{"current_steps": 135, "total_steps": 3400, "loss": 0.1352, "lr": 7.941176470588235e-05, "epoch": 0.0695338655678599, "percentage": 3.97, "elapsed_time": "0:41:35", "remaining_time": "16:45:43", "throughput": 540.2, "total_tokens": 1347840}
{"current_steps": 140, "total_steps": 3400, "loss": 0.101, "lr": 8.23529411764706e-05, "epoch": 0.07210919392222509, "percentage": 4.12, "elapsed_time": "0:43:00", "remaining_time": "16:41:36", "throughput": 541.59, "total_tokens": 1397760}
{"current_steps": 145, "total_steps": 3400, "loss": 0.0334, "lr": 8.529411764705883e-05, "epoch": 0.07468452227659027, "percentage": 4.26, "elapsed_time": "0:44:28", "remaining_time": "16:38:14", "throughput": 542.59, "total_tokens": 1447680}
{"current_steps": 150, "total_steps": 3400, "loss": 0.0667, "lr": 8.823529411764706e-05, "epoch": 0.07725985063095545, "percentage": 4.41, "elapsed_time": "0:45:53", "remaining_time": "16:34:25", "throughput": 543.83, "total_tokens": 1497600}
{"current_steps": 150, "total_steps": 3400, "eval_loss": 0.09665286540985107, "epoch": 0.07725985063095545, "percentage": 4.41, "elapsed_time": "0:46:13", "remaining_time": "16:41:23", "throughput": 540.05, "total_tokens": 1497600}
{"current_steps": 155, "total_steps": 3400, "loss": 0.06, "lr": 9.11764705882353e-05, "epoch": 0.07983517898532062, "percentage": 4.56, "elapsed_time": "0:47:46", "remaining_time": "16:40:10", "throughput": 539.88, "total_tokens": 1547520}
{"current_steps": 160, "total_steps": 3400, "loss": 0.0265, "lr": 9.411764705882353e-05, "epoch": 0.08241050733968582, "percentage": 4.71, "elapsed_time": "0:49:12", "remaining_time": "16:36:21", "throughput": 541.11, "total_tokens": 1597440}
{"current_steps": 165, "total_steps": 3400, "loss": 0.1743, "lr": 9.705882352941177e-05, "epoch": 0.08498583569405099, "percentage": 4.85, "elapsed_time": "0:50:39", "remaining_time": "16:33:07", "throughput": 542.03, "total_tokens": 1647360}
{"current_steps": 170, "total_steps": 3400, "loss": 0.051, "lr": 0.0001, "epoch": 0.08756116404841617, "percentage": 5.0, "elapsed_time": "0:52:04", "remaining_time": "16:29:34", "throughput": 543.14, "total_tokens": 1697280}
{"current_steps": 175, "total_steps": 3400, "loss": 0.0584, "lr": 9.999940874631277e-05, "epoch": 0.09013649240278135, "percentage": 5.15, "elapsed_time": "0:53:31", "remaining_time": "16:26:31", "throughput": 543.97, "total_tokens": 1747200}
{"current_steps": 180, "total_steps": 3400, "loss": 0.0704, "lr": 9.999763499923432e-05, "epoch": 0.09271182075714654, "percentage": 5.29, "elapsed_time": "0:54:57", "remaining_time": "16:23:09", "throughput": 544.99, "total_tokens": 1797120}
{"current_steps": 185, "total_steps": 3400, "loss": 0.0278, "lr": 9.999467880071402e-05, "epoch": 0.09528714911151172, "percentage": 5.44, "elapsed_time": "0:56:23", "remaining_time": "16:20:03", "throughput": 545.86, "total_tokens": 1847040}
{"current_steps": 190, "total_steps": 3400, "loss": 0.0862, "lr": 9.999054022066641e-05, "epoch": 0.0978624774658769, "percentage": 5.59, "elapsed_time": "0:57:49", "remaining_time": "16:16:57", "throughput": 546.74, "total_tokens": 1896960}
{"current_steps": 195, "total_steps": 3400, "loss": 0.0565, "lr": 9.998521935696953e-05, "epoch": 0.10043780582024209, "percentage": 5.74, "elapsed_time": "0:59:16", "remaining_time": "16:14:07", "throughput": 547.48, "total_tokens": 1946880}
{"current_steps": 200, "total_steps": 3400, "loss": 0.0459, "lr": 9.997871633546257e-05, "epoch": 0.10301313417460727, "percentage": 5.88, "elapsed_time": "1:00:42", "remaining_time": "16:11:15", "throughput": 548.24, "total_tokens": 1996800}
{"current_steps": 200, "total_steps": 3400, "eval_loss": 0.09957947582006454, "epoch": 0.10301313417460727, "percentage": 5.88, "elapsed_time": "1:01:01", "remaining_time": "16:16:23", "throughput": 545.35, "total_tokens": 1996800}