inflaton's picture
finished finetuning of phi-3.5-mini
3d8ab56
raw
history blame
11.8 kB
{"current_steps": 5, "total_steps": 210, "loss": 4.6057, "learning_rate": 2.380952380952381e-05, "epoch": 0.1423487544483986, "percentage": 2.38, "elapsed_time": "0:01:07", "remaining_time": "0:46:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 10, "total_steps": 210, "loss": 4.3259, "learning_rate": 4.761904761904762e-05, "epoch": 0.2846975088967972, "percentage": 4.76, "elapsed_time": "0:02:18", "remaining_time": "0:46:14", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 15, "total_steps": 210, "loss": 3.5782, "learning_rate": 7.142857142857143e-05, "epoch": 0.42704626334519574, "percentage": 7.14, "elapsed_time": "0:03:22", "remaining_time": "0:43:46", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 20, "total_steps": 210, "loss": 2.542, "learning_rate": 9.523809523809524e-05, "epoch": 0.5693950177935944, "percentage": 9.52, "elapsed_time": "0:04:26", "remaining_time": "0:42:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 25, "total_steps": 210, "loss": 2.0288, "learning_rate": 9.988952191691925e-05, "epoch": 0.7117437722419929, "percentage": 11.9, "elapsed_time": "0:05:39", "remaining_time": "0:41:54", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 30, "total_steps": 210, "loss": 1.9283, "learning_rate": 9.944154131125642e-05, "epoch": 0.8540925266903915, "percentage": 14.29, "elapsed_time": "0:06:46", "remaining_time": "0:40:37", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 35, "total_steps": 210, "loss": 1.9221, "learning_rate": 9.865224352899119e-05, "epoch": 0.99644128113879, "percentage": 16.67, "elapsed_time": "0:07:58", "remaining_time": "0:39:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 35, "total_steps": 210, "eval_loss": 1.9122635126113892, "epoch": 0.99644128113879, "percentage": 16.67, "elapsed_time": "0:08:00", "remaining_time": "0:40:03", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 40, "total_steps": 210, "loss": 1.8485, "learning_rate": 9.752707744739145e-05, "epoch": 1.1387900355871885, "percentage": 19.05, "elapsed_time": "0:09:09", "remaining_time": "0:38:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 45, "total_steps": 210, "loss": 1.8223, "learning_rate": 9.607381059352038e-05, "epoch": 1.281138790035587, "percentage": 21.43, "elapsed_time": "0:10:21", "remaining_time": "0:37:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 50, "total_steps": 210, "loss": 1.7991, "learning_rate": 9.430247552150673e-05, "epoch": 1.4234875444839858, "percentage": 23.81, "elapsed_time": "0:11:31", "remaining_time": "0:36:53", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 55, "total_steps": 210, "loss": 1.7761, "learning_rate": 9.22253005533154e-05, "epoch": 1.5658362989323842, "percentage": 26.19, "elapsed_time": "0:12:39", "remaining_time": "0:35:40", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 60, "total_steps": 210, "loss": 1.7781, "learning_rate": 8.985662536114613e-05, "epoch": 1.708185053380783, "percentage": 28.57, "elapsed_time": "0:13:43", "remaining_time": "0:34:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 65, "total_steps": 210, "loss": 1.7457, "learning_rate": 8.721280197423258e-05, "epoch": 1.8505338078291815, "percentage": 30.95, "elapsed_time": "0:14:52", "remaining_time": "0:33:10", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 70, "total_steps": 210, "loss": 1.7521, "learning_rate": 8.43120818934367e-05, "epoch": 1.99288256227758, "percentage": 33.33, "elapsed_time": "0:15:59", "remaining_time": "0:31:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 70, "total_steps": 210, "eval_loss": 1.7631458044052124, "epoch": 1.99288256227758, "percentage": 33.33, "elapsed_time": "0:16:02", "remaining_time": "0:32:04", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 75, "total_steps": 210, "loss": 1.6471, "learning_rate": 8.117449009293668e-05, "epoch": 2.135231316725979, "percentage": 35.71, "elapsed_time": "0:17:11", "remaining_time": "0:30:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 80, "total_steps": 210, "loss": 1.7, "learning_rate": 7.782168677883206e-05, "epoch": 2.277580071174377, "percentage": 38.1, "elapsed_time": "0:18:16", "remaining_time": "0:29:41", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 85, "total_steps": 210, "loss": 1.7183, "learning_rate": 7.427681785900761e-05, "epoch": 2.419928825622776, "percentage": 40.48, "elapsed_time": "0:19:23", "remaining_time": "0:28:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 90, "total_steps": 210, "loss": 1.6825, "learning_rate": 7.056435515653059e-05, "epoch": 2.562277580071174, "percentage": 42.86, "elapsed_time": "0:20:30", "remaining_time": "0:27:21", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 95, "total_steps": 210, "loss": 1.664, "learning_rate": 6.670992746965938e-05, "epoch": 2.704626334519573, "percentage": 45.24, "elapsed_time": "0:21:42", "remaining_time": "0:26:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 100, "total_steps": 210, "loss": 1.6662, "learning_rate": 6.274014364473274e-05, "epoch": 2.8469750889679717, "percentage": 47.62, "elapsed_time": "0:22:48", "remaining_time": "0:25:05", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 105, "total_steps": 210, "loss": 1.6831, "learning_rate": 5.868240888334653e-05, "epoch": 2.9893238434163703, "percentage": 50.0, "elapsed_time": "0:23:56", "remaining_time": "0:23:56", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 105, "total_steps": 210, "eval_loss": 1.691434621810913, "epoch": 2.9893238434163703, "percentage": 50.0, "elapsed_time": "0:23:59", "remaining_time": "0:23:59", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 110, "total_steps": 210, "loss": 1.6085, "learning_rate": 5.456473555193242e-05, "epoch": 3.131672597864769, "percentage": 52.38, "elapsed_time": "0:25:05", "remaining_time": "0:22:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 115, "total_steps": 210, "loss": 1.6105, "learning_rate": 5.041554979980486e-05, "epoch": 3.2740213523131674, "percentage": 54.76, "elapsed_time": "0:26:12", "remaining_time": "0:21:39", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 120, "total_steps": 210, "loss": 1.6227, "learning_rate": 4.626349532067879e-05, "epoch": 3.416370106761566, "percentage": 57.14, "elapsed_time": "0:27:20", "remaining_time": "0:20:30", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 125, "total_steps": 210, "loss": 1.6202, "learning_rate": 4.213723561238074e-05, "epoch": 3.5587188612099645, "percentage": 59.52, "elapsed_time": "0:28:31", "remaining_time": "0:19:23", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 130, "total_steps": 210, "loss": 1.6178, "learning_rate": 3.806525609984312e-05, "epoch": 3.701067615658363, "percentage": 61.9, "elapsed_time": "0:29:45", "remaining_time": "0:18:18", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 135, "total_steps": 210, "loss": 1.6023, "learning_rate": 3.4075667487415785e-05, "epoch": 3.8434163701067616, "percentage": 64.29, "elapsed_time": "0:30:51", "remaining_time": "0:17:08", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 140, "total_steps": 210, "loss": 1.5566, "learning_rate": 3.019601169804216e-05, "epoch": 3.98576512455516, "percentage": 66.67, "elapsed_time": "0:31:55", "remaining_time": "0:15:57", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 140, "total_steps": 210, "eval_loss": 1.6648945808410645, "epoch": 3.98576512455516, "percentage": 66.67, "elapsed_time": "0:31:58", "remaining_time": "0:15:59", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 145, "total_steps": 210, "loss": 1.5459, "learning_rate": 2.645307173898901e-05, "epoch": 4.128113879003559, "percentage": 69.05, "elapsed_time": "0:33:02", "remaining_time": "0:14:48", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 150, "total_steps": 210, "loss": 1.5934, "learning_rate": 2.2872686806712035e-05, "epoch": 4.270462633451958, "percentage": 71.43, "elapsed_time": "0:34:11", "remaining_time": "0:13:40", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 155, "total_steps": 210, "loss": 1.543, "learning_rate": 1.947957390727185e-05, "epoch": 4.412811387900356, "percentage": 73.81, "elapsed_time": "0:35:20", "remaining_time": "0:12:32", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 160, "total_steps": 210, "loss": 1.568, "learning_rate": 1.629715722373423e-05, "epoch": 4.555160142348754, "percentage": 76.19, "elapsed_time": "0:36:29", "remaining_time": "0:11:24", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 165, "total_steps": 210, "loss": 1.5337, "learning_rate": 1.3347406408508695e-05, "epoch": 4.697508896797153, "percentage": 78.57, "elapsed_time": "0:37:36", "remaining_time": "0:10:15", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 170, "total_steps": 210, "loss": 1.5442, "learning_rate": 1.0650684916965559e-05, "epoch": 4.839857651245552, "percentage": 80.95, "elapsed_time": "0:38:41", "remaining_time": "0:09:06", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 175, "total_steps": 210, "loss": 1.562, "learning_rate": 8.225609429353187e-06, "epoch": 4.98220640569395, "percentage": 83.33, "elapsed_time": "0:39:53", "remaining_time": "0:07:58", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 175, "total_steps": 210, "eval_loss": 1.6638323068618774, "epoch": 4.98220640569395, "percentage": 83.33, "elapsed_time": "0:39:56", "remaining_time": "0:07:59", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 180, "total_steps": 210, "loss": 1.525, "learning_rate": 6.088921331488568e-06, "epoch": 5.124555160142349, "percentage": 85.71, "elapsed_time": "0:41:05", "remaining_time": "0:06:50", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 185, "total_steps": 210, "loss": 1.5231, "learning_rate": 4.255371141448272e-06, "epoch": 5.266903914590747, "percentage": 88.1, "elapsed_time": "0:42:12", "remaining_time": "0:05:42", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 190, "total_steps": 210, "loss": 1.5374, "learning_rate": 2.737616680113758e-06, "epoch": 5.409252669039146, "percentage": 90.48, "elapsed_time": "0:43:16", "remaining_time": "0:04:33", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 195, "total_steps": 210, "loss": 1.5275, "learning_rate": 1.5461356885461075e-06, "epoch": 5.551601423487544, "percentage": 92.86, "elapsed_time": "0:44:29", "remaining_time": "0:03:25", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 200, "total_steps": 210, "loss": 1.5365, "learning_rate": 6.891534954310885e-07, "epoch": 5.693950177935943, "percentage": 95.24, "elapsed_time": "0:45:38", "remaining_time": "0:02:16", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 205, "total_steps": 210, "loss": 1.5085, "learning_rate": 1.725862339392259e-07, "epoch": 5.8362989323843415, "percentage": 97.62, "elapsed_time": "0:46:43", "remaining_time": "0:01:08", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 210, "total_steps": 210, "loss": 1.5573, "learning_rate": 0.0, "epoch": 5.9786476868327405, "percentage": 100.0, "elapsed_time": "0:47:51", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 210, "total_steps": 210, "eval_loss": 1.6644203662872314, "epoch": 5.9786476868327405, "percentage": 100.0, "elapsed_time": "0:47:54", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}
{"current_steps": 210, "total_steps": 210, "epoch": 5.9786476868327405, "percentage": 100.0, "elapsed_time": "0:47:55", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0}