OH_original_wo_null_sources / trainer_log.jsonl
sedrickkeh's picture
Training in progress, epoch 3
3110750 verified
{"current_steps": 10, "total_steps": 909, "loss": 0.8023, "learning_rate": 5e-06, "epoch": 0.033003300330033, "percentage": 1.1, "elapsed_time": "0:01:37", "remaining_time": "2:26:12"}
{"current_steps": 20, "total_steps": 909, "loss": 0.7329, "learning_rate": 5e-06, "epoch": 0.066006600660066, "percentage": 2.2, "elapsed_time": "0:03:06", "remaining_time": "2:18:30"}
{"current_steps": 30, "total_steps": 909, "loss": 0.6935, "learning_rate": 5e-06, "epoch": 0.09900990099009901, "percentage": 3.3, "elapsed_time": "0:04:37", "remaining_time": "2:15:20"}
{"current_steps": 40, "total_steps": 909, "loss": 0.6825, "learning_rate": 5e-06, "epoch": 0.132013201320132, "percentage": 4.4, "elapsed_time": "0:06:06", "remaining_time": "2:12:45"}
{"current_steps": 50, "total_steps": 909, "loss": 0.6616, "learning_rate": 5e-06, "epoch": 0.16501650165016502, "percentage": 5.5, "elapsed_time": "0:07:36", "remaining_time": "2:10:36"}
{"current_steps": 60, "total_steps": 909, "loss": 0.6641, "learning_rate": 5e-06, "epoch": 0.19801980198019803, "percentage": 6.6, "elapsed_time": "0:09:06", "remaining_time": "2:08:45"}
{"current_steps": 70, "total_steps": 909, "loss": 0.6469, "learning_rate": 5e-06, "epoch": 0.23102310231023102, "percentage": 7.7, "elapsed_time": "0:10:35", "remaining_time": "2:06:52"}
{"current_steps": 80, "total_steps": 909, "loss": 0.6401, "learning_rate": 5e-06, "epoch": 0.264026402640264, "percentage": 8.8, "elapsed_time": "0:12:04", "remaining_time": "2:05:06"}
{"current_steps": 90, "total_steps": 909, "loss": 0.631, "learning_rate": 5e-06, "epoch": 0.297029702970297, "percentage": 9.9, "elapsed_time": "0:13:33", "remaining_time": "2:03:25"}
{"current_steps": 100, "total_steps": 909, "loss": 0.6376, "learning_rate": 5e-06, "epoch": 0.33003300330033003, "percentage": 11.0, "elapsed_time": "0:15:03", "remaining_time": "2:01:53"}
{"current_steps": 110, "total_steps": 909, "loss": 0.6336, "learning_rate": 5e-06, "epoch": 0.36303630363036304, "percentage": 12.1, "elapsed_time": "0:16:33", "remaining_time": "2:00:17"}
{"current_steps": 120, "total_steps": 909, "loss": 0.6253, "learning_rate": 5e-06, "epoch": 0.39603960396039606, "percentage": 13.2, "elapsed_time": "0:18:03", "remaining_time": "1:58:41"}
{"current_steps": 130, "total_steps": 909, "loss": 0.6231, "learning_rate": 5e-06, "epoch": 0.429042904290429, "percentage": 14.3, "elapsed_time": "0:19:32", "remaining_time": "1:57:07"}
{"current_steps": 140, "total_steps": 909, "loss": 0.6208, "learning_rate": 5e-06, "epoch": 0.46204620462046203, "percentage": 15.4, "elapsed_time": "0:21:02", "remaining_time": "1:55:37"}
{"current_steps": 150, "total_steps": 909, "loss": 0.6178, "learning_rate": 5e-06, "epoch": 0.49504950495049505, "percentage": 16.5, "elapsed_time": "0:22:32", "remaining_time": "1:54:04"}
{"current_steps": 160, "total_steps": 909, "loss": 0.6228, "learning_rate": 5e-06, "epoch": 0.528052805280528, "percentage": 17.6, "elapsed_time": "0:24:02", "remaining_time": "1:52:32"}
{"current_steps": 170, "total_steps": 909, "loss": 0.6161, "learning_rate": 5e-06, "epoch": 0.5610561056105611, "percentage": 18.7, "elapsed_time": "0:25:32", "remaining_time": "1:50:59"}
{"current_steps": 180, "total_steps": 909, "loss": 0.6224, "learning_rate": 5e-06, "epoch": 0.594059405940594, "percentage": 19.8, "elapsed_time": "0:27:01", "remaining_time": "1:49:27"}
{"current_steps": 190, "total_steps": 909, "loss": 0.6123, "learning_rate": 5e-06, "epoch": 0.6270627062706271, "percentage": 20.9, "elapsed_time": "0:28:31", "remaining_time": "1:47:57"}
{"current_steps": 200, "total_steps": 909, "loss": 0.6188, "learning_rate": 5e-06, "epoch": 0.6600660066006601, "percentage": 22.0, "elapsed_time": "0:30:02", "remaining_time": "1:46:29"}
{"current_steps": 210, "total_steps": 909, "loss": 0.6148, "learning_rate": 5e-06, "epoch": 0.693069306930693, "percentage": 23.1, "elapsed_time": "0:31:32", "remaining_time": "1:44:58"}
{"current_steps": 220, "total_steps": 909, "loss": 0.609, "learning_rate": 5e-06, "epoch": 0.7260726072607261, "percentage": 24.2, "elapsed_time": "0:33:02", "remaining_time": "1:43:28"}
{"current_steps": 230, "total_steps": 909, "loss": 0.6065, "learning_rate": 5e-06, "epoch": 0.759075907590759, "percentage": 25.3, "elapsed_time": "0:34:31", "remaining_time": "1:41:56"}
{"current_steps": 240, "total_steps": 909, "loss": 0.6076, "learning_rate": 5e-06, "epoch": 0.7920792079207921, "percentage": 26.4, "elapsed_time": "0:36:00", "remaining_time": "1:40:22"}
{"current_steps": 250, "total_steps": 909, "loss": 0.6007, "learning_rate": 5e-06, "epoch": 0.8250825082508251, "percentage": 27.5, "elapsed_time": "0:37:29", "remaining_time": "1:38:50"}
{"current_steps": 260, "total_steps": 909, "loss": 0.6096, "learning_rate": 5e-06, "epoch": 0.858085808580858, "percentage": 28.6, "elapsed_time": "0:38:59", "remaining_time": "1:37:20"}
{"current_steps": 270, "total_steps": 909, "loss": 0.6133, "learning_rate": 5e-06, "epoch": 0.8910891089108911, "percentage": 29.7, "elapsed_time": "0:40:28", "remaining_time": "1:35:48"}
{"current_steps": 280, "total_steps": 909, "loss": 0.5977, "learning_rate": 5e-06, "epoch": 0.9240924092409241, "percentage": 30.8, "elapsed_time": "0:41:59", "remaining_time": "1:34:18"}
{"current_steps": 290, "total_steps": 909, "loss": 0.605, "learning_rate": 5e-06, "epoch": 0.9570957095709571, "percentage": 31.9, "elapsed_time": "0:43:28", "remaining_time": "1:32:48"}
{"current_steps": 300, "total_steps": 909, "loss": 0.6001, "learning_rate": 5e-06, "epoch": 0.9900990099009901, "percentage": 33.0, "elapsed_time": "0:44:59", "remaining_time": "1:31:19"}
{"current_steps": 303, "total_steps": 909, "eval_loss": 0.6034399271011353, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:46:20", "remaining_time": "1:32:40"}
{"current_steps": 310, "total_steps": 909, "loss": 0.564, "learning_rate": 5e-06, "epoch": 1.023102310231023, "percentage": 34.1, "elapsed_time": "0:48:13", "remaining_time": "1:33:11"}
{"current_steps": 320, "total_steps": 909, "loss": 0.5624, "learning_rate": 5e-06, "epoch": 1.056105610561056, "percentage": 35.2, "elapsed_time": "0:49:43", "remaining_time": "1:31:31"}
{"current_steps": 330, "total_steps": 909, "loss": 0.5531, "learning_rate": 5e-06, "epoch": 1.0891089108910892, "percentage": 36.3, "elapsed_time": "0:51:13", "remaining_time": "1:29:51"}
{"current_steps": 340, "total_steps": 909, "loss": 0.559, "learning_rate": 5e-06, "epoch": 1.1221122112211221, "percentage": 37.4, "elapsed_time": "0:52:42", "remaining_time": "1:28:12"}
{"current_steps": 350, "total_steps": 909, "loss": 0.5573, "learning_rate": 5e-06, "epoch": 1.155115511551155, "percentage": 38.5, "elapsed_time": "0:54:11", "remaining_time": "1:26:32"}
{"current_steps": 360, "total_steps": 909, "loss": 0.5547, "learning_rate": 5e-06, "epoch": 1.188118811881188, "percentage": 39.6, "elapsed_time": "0:55:40", "remaining_time": "1:24:54"}
{"current_steps": 370, "total_steps": 909, "loss": 0.561, "learning_rate": 5e-06, "epoch": 1.221122112211221, "percentage": 40.7, "elapsed_time": "0:57:10", "remaining_time": "1:23:16"}
{"current_steps": 380, "total_steps": 909, "loss": 0.5494, "learning_rate": 5e-06, "epoch": 1.2541254125412542, "percentage": 41.8, "elapsed_time": "0:58:39", "remaining_time": "1:21:39"}
{"current_steps": 390, "total_steps": 909, "loss": 0.5548, "learning_rate": 5e-06, "epoch": 1.2871287128712872, "percentage": 42.9, "elapsed_time": "1:00:08", "remaining_time": "1:20:02"}
{"current_steps": 400, "total_steps": 909, "loss": 0.5514, "learning_rate": 5e-06, "epoch": 1.3201320132013201, "percentage": 44.0, "elapsed_time": "1:01:38", "remaining_time": "1:18:26"}
{"current_steps": 410, "total_steps": 909, "loss": 0.5521, "learning_rate": 5e-06, "epoch": 1.353135313531353, "percentage": 45.1, "elapsed_time": "1:03:07", "remaining_time": "1:16:49"}
{"current_steps": 420, "total_steps": 909, "loss": 0.5554, "learning_rate": 5e-06, "epoch": 1.386138613861386, "percentage": 46.2, "elapsed_time": "1:04:36", "remaining_time": "1:15:13"}
{"current_steps": 430, "total_steps": 909, "loss": 0.5505, "learning_rate": 5e-06, "epoch": 1.4191419141914192, "percentage": 47.3, "elapsed_time": "1:06:05", "remaining_time": "1:13:37"}
{"current_steps": 440, "total_steps": 909, "loss": 0.5564, "learning_rate": 5e-06, "epoch": 1.4521452145214522, "percentage": 48.4, "elapsed_time": "1:07:35", "remaining_time": "1:12:02"}
{"current_steps": 450, "total_steps": 909, "loss": 0.5507, "learning_rate": 5e-06, "epoch": 1.4851485148514851, "percentage": 49.5, "elapsed_time": "1:09:04", "remaining_time": "1:10:27"}
{"current_steps": 460, "total_steps": 909, "loss": 0.5537, "learning_rate": 5e-06, "epoch": 1.5181518151815183, "percentage": 50.61, "elapsed_time": "1:10:33", "remaining_time": "1:08:52"}
{"current_steps": 470, "total_steps": 909, "loss": 0.5457, "learning_rate": 5e-06, "epoch": 1.551155115511551, "percentage": 51.71, "elapsed_time": "1:12:03", "remaining_time": "1:07:18"}
{"current_steps": 480, "total_steps": 909, "loss": 0.5574, "learning_rate": 5e-06, "epoch": 1.5841584158415842, "percentage": 52.81, "elapsed_time": "1:13:32", "remaining_time": "1:05:43"}
{"current_steps": 490, "total_steps": 909, "loss": 0.5554, "learning_rate": 5e-06, "epoch": 1.6171617161716172, "percentage": 53.91, "elapsed_time": "1:15:02", "remaining_time": "1:04:10"}
{"current_steps": 500, "total_steps": 909, "loss": 0.5463, "learning_rate": 5e-06, "epoch": 1.6501650165016502, "percentage": 55.01, "elapsed_time": "1:16:32", "remaining_time": "1:02:36"}
{"current_steps": 510, "total_steps": 909, "loss": 0.5514, "learning_rate": 5e-06, "epoch": 1.6831683168316833, "percentage": 56.11, "elapsed_time": "1:18:02", "remaining_time": "1:01:03"}
{"current_steps": 520, "total_steps": 909, "loss": 0.55, "learning_rate": 5e-06, "epoch": 1.716171617161716, "percentage": 57.21, "elapsed_time": "1:19:32", "remaining_time": "0:59:30"}
{"current_steps": 530, "total_steps": 909, "loss": 0.5541, "learning_rate": 5e-06, "epoch": 1.7491749174917492, "percentage": 58.31, "elapsed_time": "1:21:02", "remaining_time": "0:57:57"}
{"current_steps": 540, "total_steps": 909, "loss": 0.5516, "learning_rate": 5e-06, "epoch": 1.7821782178217822, "percentage": 59.41, "elapsed_time": "1:22:31", "remaining_time": "0:56:23"}
{"current_steps": 550, "total_steps": 909, "loss": 0.5479, "learning_rate": 5e-06, "epoch": 1.8151815181518152, "percentage": 60.51, "elapsed_time": "1:24:01", "remaining_time": "0:54:50"}
{"current_steps": 560, "total_steps": 909, "loss": 0.5526, "learning_rate": 5e-06, "epoch": 1.8481848184818483, "percentage": 61.61, "elapsed_time": "1:25:31", "remaining_time": "0:53:17"}
{"current_steps": 570, "total_steps": 909, "loss": 0.5409, "learning_rate": 5e-06, "epoch": 1.881188118811881, "percentage": 62.71, "elapsed_time": "1:27:01", "remaining_time": "0:51:45"}
{"current_steps": 580, "total_steps": 909, "loss": 0.5463, "learning_rate": 5e-06, "epoch": 1.9141914191419143, "percentage": 63.81, "elapsed_time": "1:28:32", "remaining_time": "0:50:13"}
{"current_steps": 590, "total_steps": 909, "loss": 0.5504, "learning_rate": 5e-06, "epoch": 1.9471947194719472, "percentage": 64.91, "elapsed_time": "1:30:01", "remaining_time": "0:48:40"}
{"current_steps": 600, "total_steps": 909, "loss": 0.553, "learning_rate": 5e-06, "epoch": 1.9801980198019802, "percentage": 66.01, "elapsed_time": "1:31:30", "remaining_time": "0:47:07"}
{"current_steps": 606, "total_steps": 909, "eval_loss": 0.5952492952346802, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "1:33:19", "remaining_time": "0:46:39"}
{"current_steps": 610, "total_steps": 909, "loss": 0.5267, "learning_rate": 5e-06, "epoch": 2.0132013201320134, "percentage": 67.11, "elapsed_time": "1:34:53", "remaining_time": "0:46:30"}
{"current_steps": 620, "total_steps": 909, "loss": 0.4976, "learning_rate": 5e-06, "epoch": 2.046204620462046, "percentage": 68.21, "elapsed_time": "1:36:21", "remaining_time": "0:44:55"}
{"current_steps": 630, "total_steps": 909, "loss": 0.4895, "learning_rate": 5e-06, "epoch": 2.0792079207920793, "percentage": 69.31, "elapsed_time": "1:37:49", "remaining_time": "0:43:19"}
{"current_steps": 640, "total_steps": 909, "loss": 0.4975, "learning_rate": 5e-06, "epoch": 2.112211221122112, "percentage": 70.41, "elapsed_time": "1:39:19", "remaining_time": "0:41:44"}
{"current_steps": 650, "total_steps": 909, "loss": 0.4973, "learning_rate": 5e-06, "epoch": 2.145214521452145, "percentage": 71.51, "elapsed_time": "1:40:48", "remaining_time": "0:40:10"}
{"current_steps": 660, "total_steps": 909, "loss": 0.4961, "learning_rate": 5e-06, "epoch": 2.1782178217821784, "percentage": 72.61, "elapsed_time": "1:42:17", "remaining_time": "0:38:35"}
{"current_steps": 670, "total_steps": 909, "loss": 0.5023, "learning_rate": 5e-06, "epoch": 2.211221122112211, "percentage": 73.71, "elapsed_time": "1:43:47", "remaining_time": "0:37:01"}
{"current_steps": 680, "total_steps": 909, "loss": 0.5005, "learning_rate": 5e-06, "epoch": 2.2442244224422443, "percentage": 74.81, "elapsed_time": "1:45:18", "remaining_time": "0:35:27"}
{"current_steps": 690, "total_steps": 909, "loss": 0.4993, "learning_rate": 5e-06, "epoch": 2.2772277227722775, "percentage": 75.91, "elapsed_time": "1:46:48", "remaining_time": "0:33:53"}
{"current_steps": 700, "total_steps": 909, "loss": 0.5041, "learning_rate": 5e-06, "epoch": 2.31023102310231, "percentage": 77.01, "elapsed_time": "1:48:17", "remaining_time": "0:32:19"}
{"current_steps": 710, "total_steps": 909, "loss": 0.497, "learning_rate": 5e-06, "epoch": 2.3432343234323434, "percentage": 78.11, "elapsed_time": "1:49:47", "remaining_time": "0:30:46"}
{"current_steps": 720, "total_steps": 909, "loss": 0.5032, "learning_rate": 5e-06, "epoch": 2.376237623762376, "percentage": 79.21, "elapsed_time": "1:51:16", "remaining_time": "0:29:12"}
{"current_steps": 730, "total_steps": 909, "loss": 0.5035, "learning_rate": 5e-06, "epoch": 2.4092409240924093, "percentage": 80.31, "elapsed_time": "1:52:45", "remaining_time": "0:27:39"}
{"current_steps": 740, "total_steps": 909, "loss": 0.5104, "learning_rate": 5e-06, "epoch": 2.442244224422442, "percentage": 81.41, "elapsed_time": "1:54:15", "remaining_time": "0:26:05"}
{"current_steps": 750, "total_steps": 909, "loss": 0.4927, "learning_rate": 5e-06, "epoch": 2.4752475247524752, "percentage": 82.51, "elapsed_time": "1:55:44", "remaining_time": "0:24:32"}
{"current_steps": 760, "total_steps": 909, "loss": 0.4997, "learning_rate": 5e-06, "epoch": 2.5082508250825084, "percentage": 83.61, "elapsed_time": "1:57:13", "remaining_time": "0:22:59"}
{"current_steps": 770, "total_steps": 909, "loss": 0.5052, "learning_rate": 5e-06, "epoch": 2.541254125412541, "percentage": 84.71, "elapsed_time": "1:58:43", "remaining_time": "0:21:25"}
{"current_steps": 780, "total_steps": 909, "loss": 0.5058, "learning_rate": 5e-06, "epoch": 2.5742574257425743, "percentage": 85.81, "elapsed_time": "2:00:13", "remaining_time": "0:19:52"}
{"current_steps": 790, "total_steps": 909, "loss": 0.5035, "learning_rate": 5e-06, "epoch": 2.6072607260726075, "percentage": 86.91, "elapsed_time": "2:01:42", "remaining_time": "0:18:20"}
{"current_steps": 800, "total_steps": 909, "loss": 0.5055, "learning_rate": 5e-06, "epoch": 2.6402640264026402, "percentage": 88.01, "elapsed_time": "2:03:11", "remaining_time": "0:16:47"}
{"current_steps": 810, "total_steps": 909, "loss": 0.4997, "learning_rate": 5e-06, "epoch": 2.6732673267326734, "percentage": 89.11, "elapsed_time": "2:04:41", "remaining_time": "0:15:14"}
{"current_steps": 820, "total_steps": 909, "loss": 0.5031, "learning_rate": 5e-06, "epoch": 2.706270627062706, "percentage": 90.21, "elapsed_time": "2:06:11", "remaining_time": "0:13:41"}
{"current_steps": 830, "total_steps": 909, "loss": 0.5016, "learning_rate": 5e-06, "epoch": 2.7392739273927393, "percentage": 91.31, "elapsed_time": "2:07:41", "remaining_time": "0:12:09"}
{"current_steps": 840, "total_steps": 909, "loss": 0.4993, "learning_rate": 5e-06, "epoch": 2.772277227722772, "percentage": 92.41, "elapsed_time": "2:09:12", "remaining_time": "0:10:36"}
{"current_steps": 850, "total_steps": 909, "loss": 0.509, "learning_rate": 5e-06, "epoch": 2.8052805280528053, "percentage": 93.51, "elapsed_time": "2:10:41", "remaining_time": "0:09:04"}
{"current_steps": 860, "total_steps": 909, "loss": 0.5022, "learning_rate": 5e-06, "epoch": 2.8382838283828384, "percentage": 94.61, "elapsed_time": "2:12:11", "remaining_time": "0:07:31"}
{"current_steps": 870, "total_steps": 909, "loss": 0.5124, "learning_rate": 5e-06, "epoch": 2.871287128712871, "percentage": 95.71, "elapsed_time": "2:13:40", "remaining_time": "0:05:59"}
{"current_steps": 880, "total_steps": 909, "loss": 0.5039, "learning_rate": 5e-06, "epoch": 2.9042904290429044, "percentage": 96.81, "elapsed_time": "2:15:09", "remaining_time": "0:04:27"}
{"current_steps": 890, "total_steps": 909, "loss": 0.4979, "learning_rate": 5e-06, "epoch": 2.9372937293729375, "percentage": 97.91, "elapsed_time": "2:16:40", "remaining_time": "0:02:55"}
{"current_steps": 900, "total_steps": 909, "loss": 0.5045, "learning_rate": 5e-06, "epoch": 2.9702970297029703, "percentage": 99.01, "elapsed_time": "2:18:10", "remaining_time": "0:01:22"}
{"current_steps": 909, "total_steps": 909, "eval_loss": 0.6012608408927917, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "2:21:22", "remaining_time": "0:00:00"}
{"current_steps": 909, "total_steps": 909, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "2:22:09", "remaining_time": "0:00:00"}