|
{ |
|
"best_metric": 1.3240652084350586, |
|
"best_model_checkpoint": "output/taylor-swift/checkpoint-1413", |
|
"epoch": 9.0, |
|
"global_step": 1413, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00013686126352583852, |
|
"loss": 3.3494, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00013584839936279803, |
|
"loss": 3.1755, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00013417141025243517, |
|
"loss": 3.1276, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00013184685763435497, |
|
"loss": 3.1339, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00012889769809041896, |
|
"loss": 3.1666, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00012535305663245375, |
|
"loss": 2.8811, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00012124793907240209, |
|
"loss": 2.9824, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00011662288631546257, |
|
"loss": 2.9442, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00011152357399031441, |
|
"loss": 2.961, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000106000361370359, |
|
"loss": 2.7762, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00010010779404069628, |
|
"loss": 2.8665, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.390406522234693e-05, |
|
"loss": 2.9223, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.745044107352033e-05, |
|
"loss": 2.8602, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.081065564347997e-05, |
|
"loss": 2.8085, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.405028145425961e-05, |
|
"loss": 2.7651, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 6.723608192617619e-05, |
|
"loss": 2.7224, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 6.043535204238934e-05, |
|
"loss": 2.6702, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5.371525376390658e-05, |
|
"loss": 2.8382, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.714215275827523e-05, |
|
"loss": 2.5947, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.078096299222862e-05, |
|
"loss": 2.7596, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.4694505660892814e-05, |
|
"loss": 2.8001, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.894288878457539e-05, |
|
"loss": 2.7797, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.358291360005225e-05, |
|
"loss": 2.6613, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.8667513608652705e-05, |
|
"loss": 2.6169, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.424523182093601e-05, |
|
"loss": 2.5101, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0359741360532426e-05, |
|
"loss": 2.6278, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.049414161522035e-06, |
|
"loss": 2.8319, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.346942018765275e-06, |
|
"loss": 2.7047, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.2790137335786513e-06, |
|
"loss": 2.7375, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.660515431675314e-07, |
|
"loss": 2.9128, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.2200943675939755e-07, |
|
"loss": 2.6662, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.5653560161590576, |
|
"eval_runtime": 2.784, |
|
"eval_samples_per_second": 76.868, |
|
"eval_steps_per_second": 9.698, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5.4235350207093555e-08, |
|
"loss": 2.594, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.633985999863723e-07, |
|
"loss": 2.722, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.94348327322787e-06, |
|
"loss": 2.6618, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.881847639061565e-06, |
|
"loss": 2.5415, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 6.459348994545111e-06, |
|
"loss": 2.5807, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.650532712235251e-06, |
|
"loss": 2.6543, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.3423883622425528e-05, |
|
"loss": 2.5415, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.7742137247474782e-05, |
|
"loss": 2.435, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.256264781457008e-05, |
|
"loss": 2.629, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.783780941254068e-05, |
|
"loss": 2.4759, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.3515526133505226e-05, |
|
"loss": 2.5917, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.953972655637492e-05, |
|
"loss": 2.6353, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.585091749132818e-05, |
|
"loss": 2.6671, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.238677151664272e-05, |
|
"loss": 2.4302, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.908274250554988e-05, |
|
"loss": 2.65, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 6.587270306435089e-05, |
|
"loss": 2.6803, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 7.268959758664963e-05, |
|
"loss": 2.7479, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 7.946610447433497e-05, |
|
"loss": 2.5183, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 8.613530098541655e-05, |
|
"loss": 2.4577, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.263132414287828e-05, |
|
"loss": 2.6144, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.889002117761157e-05, |
|
"loss": 2.3613, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0001048495830818497, |
|
"loss": 2.3438, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.000110451155016322, |
|
"loss": 2.5456, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00011563941754292792, |
|
"loss": 2.4388, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012036313294285455, |
|
"loss": 2.337, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012457565122486245, |
|
"loss": 2.4888, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00012823537082655537, |
|
"loss": 2.5507, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00013130614945888676, |
|
"loss": 2.4084, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0001337576610365259, |
|
"loss": 2.5188, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0001355656951691469, |
|
"loss": 2.4279, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00013671239625595536, |
|
"loss": 2.4089, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001371864398222265, |
|
"loss": 2.3992, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.427677631378174, |
|
"eval_runtime": 2.7859, |
|
"eval_samples_per_second": 76.814, |
|
"eval_steps_per_second": 9.691, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00013698314435640815, |
|
"loss": 2.4731, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00013610451754331968, |
|
"loss": 2.5107, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00013455923643686105, |
|
"loss": 2.2137, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00013236256176804074, |
|
"loss": 2.433, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00012953618723459046, |
|
"loss": 2.2366, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00012610802526053698, |
|
"loss": 2.2305, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.0001221119313415056, |
|
"loss": 2.0334, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00011758736969803705, |
|
"loss": 2.2182, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00011257902353882629, |
|
"loss": 2.4565, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00010713635378280492, |
|
"loss": 2.3876, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00010131311059799552, |
|
"loss": 2.3244, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 9.516680258103397e-05, |
|
"loss": 2.1224, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 8.875812881958139e-05, |
|
"loss": 2.1287, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 8.215037944640755e-05, |
|
"loss": 2.3755, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 7.540881060509307e-05, |
|
"loss": 2.1973, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 6.860000000000001e-05, |
|
"loss": 2.3247, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 6.179118939490698e-05, |
|
"loss": 2.0219, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 5.504962055359254e-05, |
|
"loss": 2.2227, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.844187118041858e-05, |
|
"loss": 2.0213, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 4.203319741896607e-05, |
|
"loss": 2.4335, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.5886889402004514e-05, |
|
"loss": 2.1705, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.006364621719512e-05, |
|
"loss": 2.0096, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.462097646117378e-05, |
|
"loss": 2.233, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.9612630301962937e-05, |
|
"loss": 2.2407, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.508806865849441e-05, |
|
"loss": 2.1925, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.1091974739463048e-05, |
|
"loss": 2.0939, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 7.66381276540956e-06, |
|
"loss": 2.1105, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.8374382319592885e-06, |
|
"loss": 2.1558, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.6407635631389823e-06, |
|
"loss": 2.3444, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0954824566803356e-06, |
|
"loss": 2.0945, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.168556435918659e-07, |
|
"loss": 1.9806, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.2620296478271484, |
|
"eval_runtime": 2.784, |
|
"eval_samples_per_second": 76.867, |
|
"eval_steps_per_second": 9.698, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.3560177773502823e-08, |
|
"loss": 1.9753, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.87603744044624e-07, |
|
"loss": 2.0561, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.6343048308530896e-06, |
|
"loss": 2.0054, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.4423389634741234e-06, |
|
"loss": 2.0684, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 5.893850541113203e-06, |
|
"loss": 1.8837, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 8.964629173444626e-06, |
|
"loss": 1.9202, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.2624348775137553e-05, |
|
"loss": 1.9576, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.6836867057145347e-05, |
|
"loss": 2.1642, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 2.156058245707209e-05, |
|
"loss": 2.2199, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 2.6748844983678075e-05, |
|
"loss": 2.24, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 3.235041691815028e-05, |
|
"loss": 2.2476, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.830997882238844e-05, |
|
"loss": 1.9431, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 4.456867585712155e-05, |
|
"loss": 1.9221, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 5.1064699014583464e-05, |
|
"loss": 1.8139, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 5.773389552566511e-05, |
|
"loss": 2.0033, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 6.451040241335031e-05, |
|
"loss": 2.0651, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 7.132729693564907e-05, |
|
"loss": 2.034, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 7.811725749444997e-05, |
|
"loss": 1.8939, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 8.481322848335718e-05, |
|
"loss": 2.2029, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 9.134908250867189e-05, |
|
"loss": 1.9256, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 9.766027344362499e-05, |
|
"loss": 1.9943, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00010368447386649474, |
|
"loss": 1.9552, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.00010936219058745934, |
|
"loss": 1.9243, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.00011463735218542983, |
|
"loss": 1.8552, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00011945786275252523, |
|
"loss": 2.067, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.00012377611637757444, |
|
"loss": 2.0837, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.0001275494672877647, |
|
"loss": 2.0373, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.0001307406510054549, |
|
"loss": 1.8866, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.0001333181523609384, |
|
"loss": 2.045, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.00013525651672677215, |
|
"loss": 2.0499, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.00013653660140001365, |
|
"loss": 1.9574, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.0001371457646497929, |
|
"loss": 2.1066, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.209007978439331, |
|
"eval_runtime": 2.7857, |
|
"eval_samples_per_second": 76.822, |
|
"eval_steps_per_second": 9.692, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.0001370779905632406, |
|
"loss": 1.8936, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.00013633394845683248, |
|
"loss": 1.9616, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.00013492098626642133, |
|
"loss": 1.7865, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.00013285305798123475, |
|
"loss": 1.8786, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00013015058583847798, |
|
"loss": 1.9233, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.00012684025863946756, |
|
"loss": 1.8903, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.000122954768179064, |
|
"loss": 1.895, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.00011853248639134728, |
|
"loss": 1.8842, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 0.00011361708639994783, |
|
"loss": 1.7602, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.00010825711121542465, |
|
"loss": 1.7436, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.00010250549433910721, |
|
"loss": 1.8129, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 9.64190370077714e-05, |
|
"loss": 1.7891, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 9.005784724172477e-05, |
|
"loss": 1.8476, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 8.348474623609351e-05, |
|
"loss": 2.0649, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 7.676464795761073e-05, |
|
"loss": 1.9673, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 6.996391807382385e-05, |
|
"loss": 2.0106, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 6.314971854574042e-05, |
|
"loss": 1.8144, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 5.638934435652005e-05, |
|
"loss": 1.8997, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 4.974955892647975e-05, |
|
"loss": 1.6943, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 4.329593477765313e-05, |
|
"loss": 1.784, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 3.709220595930377e-05, |
|
"loss": 1.7719, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 3.119963862964102e-05, |
|
"loss": 1.6203, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 2.5676426009685593e-05, |
|
"loss": 1.7432, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.0577113684537407e-05, |
|
"loss": 1.6835, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 1.5952060927597964e-05, |
|
"loss": 1.8013, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.1846943367546308e-05, |
|
"loss": 1.6429, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 8.302301909581063e-06, |
|
"loss": 1.8303, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 5.353142365645029e-06, |
|
"loss": 1.6484, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 3.0285897475648074e-06, |
|
"loss": 1.7701, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.3516006372019799e-06, |
|
"loss": 1.7971, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 3.3873647416147947e-07, |
|
"loss": 1.8325, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.7255, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.1195662021636963, |
|
"eval_runtime": 2.7825, |
|
"eval_samples_per_second": 76.91, |
|
"eval_steps_per_second": 9.704, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 3.303277502872983e-07, |
|
"loss": 1.9264, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.9314, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.466734766960144, |
|
"eval_runtime": 9.3674, |
|
"eval_samples_per_second": 21.351, |
|
"eval_steps_per_second": 2.669, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 3.303277502872907e-07, |
|
"loss": 1.908, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 1.3181297643383773e-06, |
|
"loss": 1.8545, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.9538929687704367e-06, |
|
"loss": 2.0338, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 5.221864069725677e-06, |
|
"loss": 1.7718, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 8.100201265702767e-06, |
|
"loss": 1.8307, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 1.1561184596045435e-05, |
|
"loss": 1.8088, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 1.5571482899316272e-05, |
|
"loss": 1.6236, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 2.0092474810602853e-05, |
|
"loss": 2.1289, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 2.5080620706373914e-05, |
|
"loss": 1.4544, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 3.048788201485526e-05, |
|
"loss": 2.0407, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 3.6262183853736515e-05, |
|
"loss": 1.8147, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 4.2347916539754777e-05, |
|
"loss": 1.8843, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 4.868647114034379e-05, |
|
"loss": 1.8188, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 5.521680390969348e-05, |
|
"loss": 1.8368, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 6.1876024173392e-05, |
|
"loss": 1.8814, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 6.859999999999984e-05, |
|
"loss": 1.9456, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 7.532397582660791e-05, |
|
"loss": 1.6534, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 8.198319609030643e-05, |
|
"loss": 1.8587, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 8.851352885965613e-05, |
|
"loss": 2.0249, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 9.485208346024515e-05, |
|
"loss": 1.9597, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.00010093781614626339, |
|
"loss": 1.499, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 0.00010671211798514466, |
|
"loss": 1.6634, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.00011211937929362601, |
|
"loss": 1.789, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.00011710752518939709, |
|
"loss": 1.6422, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 0.00012162851710068368, |
|
"loss": 1.83, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 0.00012563881540395453, |
|
"loss": 1.8398, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.0001290997987342972, |
|
"loss": 1.8865, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 0.0001319781359302743, |
|
"loss": 2.0384, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 0.00013424610703122953, |
|
"loss": 1.8054, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.0001358818702356616, |
|
"loss": 1.6484, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.0001368696722497127, |
|
"loss": 2.0007, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0001372, |
|
"loss": 1.8572, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.5031535625457764, |
|
"eval_runtime": 9.4961, |
|
"eval_samples_per_second": 21.061, |
|
"eval_steps_per_second": 2.633, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 0.00013686967224971273, |
|
"loss": 1.4942, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.00013588187023566163, |
|
"loss": 1.782, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 0.00013424610703122958, |
|
"loss": 1.8084, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.00013197813593027432, |
|
"loss": 1.7091, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.00012909979873429724, |
|
"loss": 1.8472, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 0.00012563881540395458, |
|
"loss": 1.7417, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.00012162851710068373, |
|
"loss": 1.6717, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.00011710752518939715, |
|
"loss": 1.6891, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.00011211937929362609, |
|
"loss": 1.8756, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.00010671211798514474, |
|
"loss": 1.6079, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.00010093781614626349, |
|
"loss": 1.7571, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 9.485208346024524e-05, |
|
"loss": 1.3666, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 8.851352885965622e-05, |
|
"loss": 1.7333, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 8.198319609030653e-05, |
|
"loss": 1.4846, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 7.532397582660802e-05, |
|
"loss": 1.4929, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 6.859999999999993e-05, |
|
"loss": 1.7269, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 6.18760241733921e-05, |
|
"loss": 1.5586, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 5.5216803909693576e-05, |
|
"loss": 1.7124, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 4.868647114034389e-05, |
|
"loss": 1.7847, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 4.2347916539754865e-05, |
|
"loss": 1.5791, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 3.62621838537366e-05, |
|
"loss": 1.71, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 3.0487882014855342e-05, |
|
"loss": 1.6788, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 2.5080620706373995e-05, |
|
"loss": 1.6755, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 2.009247481060292e-05, |
|
"loss": 1.4464, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.5571482899316333e-05, |
|
"loss": 1.7238, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 1.1561184596045489e-05, |
|
"loss": 1.4748, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 8.100201265702821e-06, |
|
"loss": 1.488, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 5.221864069725715e-06, |
|
"loss": 1.545, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 2.9538929687704672e-06, |
|
"loss": 1.5979, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.3181297643383925e-06, |
|
"loss": 1.7046, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 3.303277502872983e-07, |
|
"loss": 1.5848, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.6901, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.4377235174179077, |
|
"eval_runtime": 9.5035, |
|
"eval_samples_per_second": 21.045, |
|
"eval_steps_per_second": 2.631, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 9.076596574074994e-06, |
|
"loss": 1.4269, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1.2780475216607764e-05, |
|
"loss": 1.559, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 1.7042649548372873e-05, |
|
"loss": 1.529, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 2.182049015671568e-05, |
|
"loss": 1.6687, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 2.7066210038327817e-05, |
|
"loss": 1.5638, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 3.2727342555268683e-05, |
|
"loss": 1.5554, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 3.874726619575346e-05, |
|
"loss": 1.317, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 4.5065770891153554e-05, |
|
"loss": 1.679, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 5.161966022502662e-05, |
|
"loss": 1.6683, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 5.834338351099537e-05, |
|
"loss": 1.4034, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 6.516969141756308e-05, |
|
"loss": 1.6013, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 7.20303085824368e-05, |
|
"loss": 1.5516, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 7.885661648900452e-05, |
|
"loss": 1.5248, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 8.558033977497326e-05, |
|
"loss": 1.3379, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 9.213422910884634e-05, |
|
"loss": 1.3994, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 9.845273380424641e-05, |
|
"loss": 1.4434, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.00010447265744473122, |
|
"loss": 1.6187, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 0.00011013378996167208, |
|
"loss": 1.5693, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.00011537950984328424, |
|
"loss": 1.4006, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 0.00012015735045162704, |
|
"loss": 1.7153, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.0001244195247833922, |
|
"loss": 1.5277, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.00012812340342592494, |
|
"loss": 1.5052, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.0001312319409280581, |
|
"loss": 1.548, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 0.00013371404632128166, |
|
"loss": 1.4896, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 0.0001355448940853745, |
|
"loss": 1.5506, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.00013670617244827653, |
|
"loss": 1.6902, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.0001371862665367597, |
|
"loss": 1.5112, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.352002739906311, |
|
"eval_runtime": 5.5761, |
|
"eval_samples_per_second": 40.889, |
|
"eval_steps_per_second": 5.201, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 0.00013698037454606005, |
|
"loss": 1.3097, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 0.0001360905557665658, |
|
"loss": 1.5119, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 0.00013452570998720767, |
|
"loss": 1.2738, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 0.00013230148848155559, |
|
"loss": 1.2764, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.00012944013746692, |
|
"loss": 1.2347, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 0.00012597027560214946, |
|
"loss": 1.4534, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 0.00012192660774954517, |
|
"loss": 1.4193, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 0.00011734957786379066, |
|
"loss": 1.5185, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 0.00011228496447963, |
|
"loss": 1.5225, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 0.0001067834228441478, |
|
"loss": 1.8013, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 0.00010089997827314661, |
|
"loss": 1.4035, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 9.469347579898059e-05, |
|
"loss": 1.4431, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 8.82259916143434e-05, |
|
"loss": 1.3074, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 8.1562212198643e-05, |
|
"loss": 1.3587, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 7.476878733681043e-05, |
|
"loss": 1.5297, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 6.791366350152217e-05, |
|
"loss": 1.2815, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 6.106540426620946e-05, |
|
"loss": 1.5607, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 5.4292504545952105e-05, |
|
"loss": 1.1765, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 4.766270552507704e-05, |
|
"loss": 1.5363, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 4.124231712342338e-05, |
|
"loss": 1.4107, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 3.509555477782507e-05, |
|
"loss": 1.4608, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 2.928389717219465e-05, |
|
"loss": 1.3783, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 2.386547134005838e-05, |
|
"loss": 1.2348, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 1.889447128962836e-05, |
|
"loss": 1.5068, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 1.4420615966203568e-05, |
|
"loss": 1.3345, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 1.0488651973253671e-05, |
|
"loss": 1.2429, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 7.13790602586542e-06, |
|
"loss": 1.3129, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 4.401891612830206e-06, |
|
"loss": 1.2724, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 2.307973801450113e-06, |
|
"loss": 1.3608, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 8.770955376250992e-07, |
|
"loss": 1.4328, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 1.2356817870921626e-07, |
|
"loss": 1.3714, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.3240652084350586, |
|
"eval_runtime": 5.3842, |
|
"eval_samples_per_second": 42.346, |
|
"eval_steps_per_second": 5.386, |
|
"step": 1413 |
|
} |
|
], |
|
"max_steps": 1884, |
|
"num_train_epochs": 12, |
|
"total_flos": 1473164476416000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|