|
{ |
|
"best_metric": 0.6991409248766222, |
|
"best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-hongrui_mammogram_v_1/checkpoint-1710", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 1710, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05847953216374269, |
|
"grad_norm": 1.5846091508865356, |
|
"learning_rate": 2.9239766081871343e-06, |
|
"loss": 1.3844, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11695906432748537, |
|
"grad_norm": 1.26529061794281, |
|
"learning_rate": 5.8479532163742686e-06, |
|
"loss": 1.3401, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.17543859649122806, |
|
"grad_norm": 1.0668294429779053, |
|
"learning_rate": 8.771929824561403e-06, |
|
"loss": 1.2478, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23391812865497075, |
|
"grad_norm": 0.7071924805641174, |
|
"learning_rate": 1.1695906432748537e-05, |
|
"loss": 1.1693, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.29239766081871343, |
|
"grad_norm": 0.7040536403656006, |
|
"learning_rate": 1.4619883040935673e-05, |
|
"loss": 1.0874, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3508771929824561, |
|
"grad_norm": 0.5056448578834534, |
|
"learning_rate": 1.7543859649122806e-05, |
|
"loss": 1.0551, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4093567251461988, |
|
"grad_norm": 0.6976526975631714, |
|
"learning_rate": 2.046783625730994e-05, |
|
"loss": 1.0349, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4678362573099415, |
|
"grad_norm": 0.6909885406494141, |
|
"learning_rate": 2.3391812865497074e-05, |
|
"loss": 0.9862, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 1.2905045747756958, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.9897, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5847953216374269, |
|
"grad_norm": 1.0667847394943237, |
|
"learning_rate": 2.9239766081871346e-05, |
|
"loss": 0.9316, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6432748538011696, |
|
"grad_norm": 0.7333235144615173, |
|
"learning_rate": 3.216374269005848e-05, |
|
"loss": 0.9224, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 0.8830112814903259, |
|
"learning_rate": 3.508771929824561e-05, |
|
"loss": 0.9127, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7602339181286549, |
|
"grad_norm": 0.7408013343811035, |
|
"learning_rate": 3.8011695906432746e-05, |
|
"loss": 0.9349, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8187134502923976, |
|
"grad_norm": 0.6464580297470093, |
|
"learning_rate": 4.093567251461988e-05, |
|
"loss": 0.9046, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8771929824561403, |
|
"grad_norm": 0.9568632245063782, |
|
"learning_rate": 4.3859649122807014e-05, |
|
"loss": 0.8981, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.935672514619883, |
|
"grad_norm": 0.755339503288269, |
|
"learning_rate": 4.678362573099415e-05, |
|
"loss": 0.9012, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9941520467836257, |
|
"grad_norm": 0.8921021819114685, |
|
"learning_rate": 4.970760233918128e-05, |
|
"loss": 0.8576, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6677938219703893, |
|
"eval_f1": 0.6066566978834349, |
|
"eval_loss": 0.8430724143981934, |
|
"eval_precision": 0.7751300124185229, |
|
"eval_recall": 0.6677938219703893, |
|
"eval_runtime": 78.2201, |
|
"eval_samples_per_second": 139.887, |
|
"eval_steps_per_second": 2.186, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 0.7332887053489685, |
|
"learning_rate": 4.970760233918128e-05, |
|
"loss": 0.8527, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 0.776443362236023, |
|
"learning_rate": 4.938271604938271e-05, |
|
"loss": 0.8561, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1695906432748537, |
|
"grad_norm": 0.7149679660797119, |
|
"learning_rate": 4.9057829759584143e-05, |
|
"loss": 0.8435, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2280701754385965, |
|
"grad_norm": 0.8708255290985107, |
|
"learning_rate": 4.8732943469785574e-05, |
|
"loss": 0.8332, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.286549707602339, |
|
"grad_norm": 0.8141400814056396, |
|
"learning_rate": 4.8408057179987004e-05, |
|
"loss": 0.8377, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.345029239766082, |
|
"grad_norm": 1.188214659690857, |
|
"learning_rate": 4.8083170890188434e-05, |
|
"loss": 0.8456, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.4035087719298245, |
|
"grad_norm": 0.6589232087135315, |
|
"learning_rate": 4.7758284600389865e-05, |
|
"loss": 0.8387, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.4619883040935673, |
|
"grad_norm": 0.9304301142692566, |
|
"learning_rate": 4.7433398310591295e-05, |
|
"loss": 0.8521, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.52046783625731, |
|
"grad_norm": 0.8288267254829407, |
|
"learning_rate": 4.7108512020792725e-05, |
|
"loss": 0.8453, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.5789473684210527, |
|
"grad_norm": 0.8100181221961975, |
|
"learning_rate": 4.678362573099415e-05, |
|
"loss": 0.8398, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.6374269005847952, |
|
"grad_norm": 1.2519994974136353, |
|
"learning_rate": 4.645873944119558e-05, |
|
"loss": 0.835, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.695906432748538, |
|
"grad_norm": 0.868988573551178, |
|
"learning_rate": 4.613385315139701e-05, |
|
"loss": 0.8, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"grad_norm": 0.7534909844398499, |
|
"learning_rate": 4.580896686159844e-05, |
|
"loss": 0.8103, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.8128654970760234, |
|
"grad_norm": 0.9954193234443665, |
|
"learning_rate": 4.548408057179987e-05, |
|
"loss": 0.8065, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.871345029239766, |
|
"grad_norm": 0.6459550857543945, |
|
"learning_rate": 4.51591942820013e-05, |
|
"loss": 0.8196, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.9298245614035088, |
|
"grad_norm": 0.9698415398597717, |
|
"learning_rate": 4.483430799220273e-05, |
|
"loss": 0.8464, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.9883040935672516, |
|
"grad_norm": 0.9862537980079651, |
|
"learning_rate": 4.450942170240416e-05, |
|
"loss": 0.8297, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6791263023213306, |
|
"eval_f1": 0.6182249859592962, |
|
"eval_loss": 0.796485424041748, |
|
"eval_precision": 0.6757921489303887, |
|
"eval_recall": 0.6791263023213306, |
|
"eval_runtime": 78.3952, |
|
"eval_samples_per_second": 139.575, |
|
"eval_steps_per_second": 2.181, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.046783625730994, |
|
"grad_norm": 0.7553840279579163, |
|
"learning_rate": 4.418453541260559e-05, |
|
"loss": 0.8262, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 0.8275452852249146, |
|
"learning_rate": 4.3859649122807014e-05, |
|
"loss": 0.809, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.1637426900584797, |
|
"grad_norm": 0.6450644731521606, |
|
"learning_rate": 4.3534762833008445e-05, |
|
"loss": 0.7939, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 0.6809207201004028, |
|
"learning_rate": 4.3209876543209875e-05, |
|
"loss": 0.8157, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.280701754385965, |
|
"grad_norm": 0.9741197228431702, |
|
"learning_rate": 4.2884990253411305e-05, |
|
"loss": 0.8126, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.3391812865497075, |
|
"grad_norm": 0.6929029822349548, |
|
"learning_rate": 4.2560103963612735e-05, |
|
"loss": 0.8155, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.39766081871345, |
|
"grad_norm": 0.836373507976532, |
|
"learning_rate": 4.2235217673814166e-05, |
|
"loss": 0.8145, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.456140350877193, |
|
"grad_norm": 0.7719972729682922, |
|
"learning_rate": 4.1910331384015596e-05, |
|
"loss": 0.7963, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.5146198830409356, |
|
"grad_norm": 0.5631088018417358, |
|
"learning_rate": 4.1585445094217026e-05, |
|
"loss": 0.7839, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.573099415204678, |
|
"grad_norm": 0.7167489528656006, |
|
"learning_rate": 4.1260558804418457e-05, |
|
"loss": 0.7837, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"grad_norm": 0.8713414072990417, |
|
"learning_rate": 4.093567251461988e-05, |
|
"loss": 0.8046, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.690058479532164, |
|
"grad_norm": 1.0731910467147827, |
|
"learning_rate": 4.061078622482131e-05, |
|
"loss": 0.7813, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.7485380116959064, |
|
"grad_norm": 0.6702953577041626, |
|
"learning_rate": 4.028589993502274e-05, |
|
"loss": 0.8076, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.807017543859649, |
|
"grad_norm": 0.6061651706695557, |
|
"learning_rate": 3.996101364522417e-05, |
|
"loss": 0.786, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.8654970760233915, |
|
"grad_norm": 0.9429291486740112, |
|
"learning_rate": 3.96361273554256e-05, |
|
"loss": 0.817, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.9239766081871346, |
|
"grad_norm": 0.7229118347167969, |
|
"learning_rate": 3.931124106562703e-05, |
|
"loss": 0.8003, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.982456140350877, |
|
"grad_norm": 0.702900230884552, |
|
"learning_rate": 3.898635477582846e-05, |
|
"loss": 0.8303, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6842441966733687, |
|
"eval_f1": 0.6360238176585905, |
|
"eval_loss": 0.7872248888015747, |
|
"eval_precision": 0.6704334160874683, |
|
"eval_recall": 0.6842441966733687, |
|
"eval_runtime": 77.468, |
|
"eval_samples_per_second": 141.245, |
|
"eval_steps_per_second": 2.207, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 3.0409356725146197, |
|
"grad_norm": 0.6098015308380127, |
|
"learning_rate": 3.866146848602989e-05, |
|
"loss": 0.7717, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.0994152046783627, |
|
"grad_norm": 0.7494928240776062, |
|
"learning_rate": 3.8336582196231315e-05, |
|
"loss": 0.7807, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.1578947368421053, |
|
"grad_norm": 0.8039376735687256, |
|
"learning_rate": 3.8011695906432746e-05, |
|
"loss": 0.79, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.216374269005848, |
|
"grad_norm": 0.687044084072113, |
|
"learning_rate": 3.7686809616634176e-05, |
|
"loss": 0.8004, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.2748538011695905, |
|
"grad_norm": 0.9368821978569031, |
|
"learning_rate": 3.7361923326835606e-05, |
|
"loss": 0.8002, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 1.0392132997512817, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.8042, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.391812865497076, |
|
"grad_norm": 0.5632928013801575, |
|
"learning_rate": 3.671215074723847e-05, |
|
"loss": 0.7746, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.4502923976608186, |
|
"grad_norm": 0.6281052231788635, |
|
"learning_rate": 3.63872644574399e-05, |
|
"loss": 0.7691, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.5087719298245617, |
|
"grad_norm": 0.6072255969047546, |
|
"learning_rate": 3.606237816764133e-05, |
|
"loss": 0.7552, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.5672514619883042, |
|
"grad_norm": 0.6243124604225159, |
|
"learning_rate": 3.573749187784276e-05, |
|
"loss": 0.7905, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.625730994152047, |
|
"grad_norm": 0.8322011828422546, |
|
"learning_rate": 3.541260558804418e-05, |
|
"loss": 0.7772, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.6842105263157894, |
|
"grad_norm": 0.9370966553688049, |
|
"learning_rate": 3.508771929824561e-05, |
|
"loss": 0.7683, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.742690058479532, |
|
"grad_norm": 0.6632276177406311, |
|
"learning_rate": 3.476283300844704e-05, |
|
"loss": 0.7853, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.801169590643275, |
|
"grad_norm": 0.678115963935852, |
|
"learning_rate": 3.443794671864847e-05, |
|
"loss": 0.7691, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.8596491228070176, |
|
"grad_norm": 0.6135697960853577, |
|
"learning_rate": 3.41130604288499e-05, |
|
"loss": 0.7778, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.91812865497076, |
|
"grad_norm": 0.6042221188545227, |
|
"learning_rate": 3.378817413905133e-05, |
|
"loss": 0.7983, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.976608187134503, |
|
"grad_norm": 0.8058825135231018, |
|
"learning_rate": 3.346328784925276e-05, |
|
"loss": 0.7814, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6843355876439408, |
|
"eval_f1": 0.659727957333552, |
|
"eval_loss": 0.7717081308364868, |
|
"eval_precision": 0.6601378633948723, |
|
"eval_recall": 0.6843355876439408, |
|
"eval_runtime": 78.1449, |
|
"eval_samples_per_second": 140.022, |
|
"eval_steps_per_second": 2.188, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 4.035087719298246, |
|
"grad_norm": 0.966820478439331, |
|
"learning_rate": 3.313840155945419e-05, |
|
"loss": 0.7736, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 4.093567251461988, |
|
"grad_norm": 1.1176632642745972, |
|
"learning_rate": 3.281351526965562e-05, |
|
"loss": 0.7529, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.152046783625731, |
|
"grad_norm": 0.6780201196670532, |
|
"learning_rate": 3.248862897985705e-05, |
|
"loss": 0.7722, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 4.2105263157894735, |
|
"grad_norm": 0.8257865905761719, |
|
"learning_rate": 3.216374269005848e-05, |
|
"loss": 0.771, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.269005847953216, |
|
"grad_norm": 0.6105393767356873, |
|
"learning_rate": 3.183885640025991e-05, |
|
"loss": 0.7883, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 4.3274853801169595, |
|
"grad_norm": 0.8537980914115906, |
|
"learning_rate": 3.151397011046134e-05, |
|
"loss": 0.7698, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.385964912280702, |
|
"grad_norm": 0.8124959468841553, |
|
"learning_rate": 3.118908382066277e-05, |
|
"loss": 0.7737, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 0.8992810845375061, |
|
"learning_rate": 3.08641975308642e-05, |
|
"loss": 0.7809, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 4.502923976608187, |
|
"grad_norm": 0.6706241965293884, |
|
"learning_rate": 3.053931124106563e-05, |
|
"loss": 0.7629, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.56140350877193, |
|
"grad_norm": 0.8107186555862427, |
|
"learning_rate": 3.0214424951267055e-05, |
|
"loss": 0.7683, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.619883040935672, |
|
"grad_norm": 0.7054151296615601, |
|
"learning_rate": 2.9889538661468486e-05, |
|
"loss": 0.7367, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.678362573099415, |
|
"grad_norm": 0.7284806966781616, |
|
"learning_rate": 2.9564652371669916e-05, |
|
"loss": 0.7476, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.7368421052631575, |
|
"grad_norm": 1.1839812994003296, |
|
"learning_rate": 2.9239766081871346e-05, |
|
"loss": 0.7565, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.7953216374269, |
|
"grad_norm": 0.7781530618667603, |
|
"learning_rate": 2.8914879792072773e-05, |
|
"loss": 0.7737, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.853801169590644, |
|
"grad_norm": 0.7338679432868958, |
|
"learning_rate": 2.8589993502274203e-05, |
|
"loss": 0.7519, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.912280701754386, |
|
"grad_norm": 1.015286922454834, |
|
"learning_rate": 2.8265107212475634e-05, |
|
"loss": 0.8023, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.970760233918129, |
|
"grad_norm": 0.8456217050552368, |
|
"learning_rate": 2.7940220922677064e-05, |
|
"loss": 0.7768, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6905501736428441, |
|
"eval_f1": 0.6543647035652124, |
|
"eval_loss": 0.7693981528282166, |
|
"eval_precision": 0.6774590515406133, |
|
"eval_recall": 0.6905501736428441, |
|
"eval_runtime": 77.1215, |
|
"eval_samples_per_second": 141.88, |
|
"eval_steps_per_second": 2.217, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 5.029239766081871, |
|
"grad_norm": 0.624717116355896, |
|
"learning_rate": 2.761533463287849e-05, |
|
"loss": 0.7482, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 5.087719298245614, |
|
"grad_norm": 0.752734363079071, |
|
"learning_rate": 2.729044834307992e-05, |
|
"loss": 0.7259, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 5.146198830409356, |
|
"grad_norm": 0.6503344178199768, |
|
"learning_rate": 2.696556205328135e-05, |
|
"loss": 0.7488, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 5.204678362573099, |
|
"grad_norm": 0.8620956540107727, |
|
"learning_rate": 2.664067576348278e-05, |
|
"loss": 0.7704, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 5.2631578947368425, |
|
"grad_norm": 0.6938666701316833, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.76, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.321637426900585, |
|
"grad_norm": 0.8206263184547424, |
|
"learning_rate": 2.599090318388564e-05, |
|
"loss": 0.7685, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 5.380116959064328, |
|
"grad_norm": 0.8919401168823242, |
|
"learning_rate": 2.566601689408707e-05, |
|
"loss": 0.7673, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 5.43859649122807, |
|
"grad_norm": 0.9412862062454224, |
|
"learning_rate": 2.53411306042885e-05, |
|
"loss": 0.7403, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 5.497076023391813, |
|
"grad_norm": 1.1093353033065796, |
|
"learning_rate": 2.501624431448993e-05, |
|
"loss": 0.743, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"grad_norm": 0.6838064193725586, |
|
"learning_rate": 2.4691358024691357e-05, |
|
"loss": 0.7641, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.614035087719298, |
|
"grad_norm": 0.7546567320823669, |
|
"learning_rate": 2.4366471734892787e-05, |
|
"loss": 0.7648, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.6725146198830405, |
|
"grad_norm": 0.7909204363822937, |
|
"learning_rate": 2.4041585445094217e-05, |
|
"loss": 0.7872, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 5.730994152046784, |
|
"grad_norm": 0.6969336867332458, |
|
"learning_rate": 2.3716699155295647e-05, |
|
"loss": 0.7638, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.7894736842105265, |
|
"grad_norm": 0.7838913202285767, |
|
"learning_rate": 2.3391812865497074e-05, |
|
"loss": 0.747, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.847953216374269, |
|
"grad_norm": 0.8347417712211609, |
|
"learning_rate": 2.3066926575698505e-05, |
|
"loss": 0.7749, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.906432748538012, |
|
"grad_norm": 0.9646545052528381, |
|
"learning_rate": 2.2742040285899935e-05, |
|
"loss": 0.737, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.964912280701754, |
|
"grad_norm": 0.6134990453720093, |
|
"learning_rate": 2.2417153996101365e-05, |
|
"loss": 0.7415, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6962164138183148, |
|
"eval_f1": 0.671796652878038, |
|
"eval_loss": 0.7572136521339417, |
|
"eval_precision": 0.6763761787994358, |
|
"eval_recall": 0.6962164138183148, |
|
"eval_runtime": 77.9563, |
|
"eval_samples_per_second": 140.361, |
|
"eval_steps_per_second": 2.194, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 6.023391812865497, |
|
"grad_norm": 0.7049497961997986, |
|
"learning_rate": 2.2092267706302795e-05, |
|
"loss": 0.7598, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 6.081871345029239, |
|
"grad_norm": 0.9780289530754089, |
|
"learning_rate": 2.1767381416504222e-05, |
|
"loss": 0.7472, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 6.140350877192983, |
|
"grad_norm": 0.7058891654014587, |
|
"learning_rate": 2.1442495126705653e-05, |
|
"loss": 0.742, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 6.1988304093567255, |
|
"grad_norm": 0.8734349012374878, |
|
"learning_rate": 2.1117608836907083e-05, |
|
"loss": 0.7581, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 6.257309941520468, |
|
"grad_norm": 0.8839743733406067, |
|
"learning_rate": 2.0792722547108513e-05, |
|
"loss": 0.7516, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 6.315789473684211, |
|
"grad_norm": 0.6963735222816467, |
|
"learning_rate": 2.046783625730994e-05, |
|
"loss": 0.7412, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 6.374269005847953, |
|
"grad_norm": 0.9337784647941589, |
|
"learning_rate": 2.014294996751137e-05, |
|
"loss": 0.7402, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 6.432748538011696, |
|
"grad_norm": 0.6648013591766357, |
|
"learning_rate": 1.98180636777128e-05, |
|
"loss": 0.7513, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.491228070175438, |
|
"grad_norm": 1.072342872619629, |
|
"learning_rate": 1.949317738791423e-05, |
|
"loss": 0.7406, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 6.549707602339181, |
|
"grad_norm": 1.0100135803222656, |
|
"learning_rate": 1.9168291098115658e-05, |
|
"loss": 0.7643, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 6.6081871345029235, |
|
"grad_norm": 0.7687884569168091, |
|
"learning_rate": 1.8843404808317088e-05, |
|
"loss": 0.7404, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 0.750688374042511, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.7391, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 6.7251461988304095, |
|
"grad_norm": 0.7010438442230225, |
|
"learning_rate": 1.819363222871995e-05, |
|
"loss": 0.7534, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.783625730994152, |
|
"grad_norm": 1.0012060403823853, |
|
"learning_rate": 1.786874593892138e-05, |
|
"loss": 0.7485, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 6.842105263157895, |
|
"grad_norm": 0.8860548734664917, |
|
"learning_rate": 1.7543859649122806e-05, |
|
"loss": 0.7606, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 6.900584795321637, |
|
"grad_norm": 0.969633936882019, |
|
"learning_rate": 1.7218973359324236e-05, |
|
"loss": 0.7592, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 6.95906432748538, |
|
"grad_norm": 0.8473331928253174, |
|
"learning_rate": 1.6894087069525666e-05, |
|
"loss": 0.7351, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.692195211113142, |
|
"eval_f1": 0.6568623393542826, |
|
"eval_loss": 0.754936695098877, |
|
"eval_precision": 0.6648205901494669, |
|
"eval_recall": 0.692195211113142, |
|
"eval_runtime": 77.9138, |
|
"eval_samples_per_second": 140.437, |
|
"eval_steps_per_second": 2.195, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 7.017543859649122, |
|
"grad_norm": 0.7504809498786926, |
|
"learning_rate": 1.6569200779727097e-05, |
|
"loss": 0.7102, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.076023391812866, |
|
"grad_norm": 1.306260585784912, |
|
"learning_rate": 1.6244314489928523e-05, |
|
"loss": 0.7574, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 7.1345029239766085, |
|
"grad_norm": 0.7214799523353577, |
|
"learning_rate": 1.5919428200129954e-05, |
|
"loss": 0.7355, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 7.192982456140351, |
|
"grad_norm": 0.8254335522651672, |
|
"learning_rate": 1.5594541910331384e-05, |
|
"loss": 0.758, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 7.251461988304094, |
|
"grad_norm": 0.8644353747367859, |
|
"learning_rate": 1.5269655620532814e-05, |
|
"loss": 0.7131, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 7.309941520467836, |
|
"grad_norm": 0.7876085638999939, |
|
"learning_rate": 1.4944769330734243e-05, |
|
"loss": 0.7437, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 7.368421052631579, |
|
"grad_norm": 0.904586136341095, |
|
"learning_rate": 1.4619883040935673e-05, |
|
"loss": 0.7464, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 7.426900584795321, |
|
"grad_norm": 1.033260464668274, |
|
"learning_rate": 1.4294996751137102e-05, |
|
"loss": 0.7226, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 7.485380116959064, |
|
"grad_norm": 0.9057112336158752, |
|
"learning_rate": 1.3970110461338532e-05, |
|
"loss": 0.7425, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 7.543859649122807, |
|
"grad_norm": 0.8631776571273804, |
|
"learning_rate": 1.364522417153996e-05, |
|
"loss": 0.7178, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 7.60233918128655, |
|
"grad_norm": 0.8566320538520813, |
|
"learning_rate": 1.332033788174139e-05, |
|
"loss": 0.7151, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.6608187134502925, |
|
"grad_norm": 1.056127905845642, |
|
"learning_rate": 1.299545159194282e-05, |
|
"loss": 0.7574, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 7.719298245614035, |
|
"grad_norm": 1.0582066774368286, |
|
"learning_rate": 1.267056530214425e-05, |
|
"loss": 0.7462, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 7.777777777777778, |
|
"grad_norm": 1.0808275938034058, |
|
"learning_rate": 1.2345679012345678e-05, |
|
"loss": 0.7181, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 7.83625730994152, |
|
"grad_norm": 0.8452061414718628, |
|
"learning_rate": 1.2020792722547109e-05, |
|
"loss": 0.7686, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 7.894736842105263, |
|
"grad_norm": 0.7253689765930176, |
|
"learning_rate": 1.1695906432748537e-05, |
|
"loss": 0.7174, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.953216374269006, |
|
"grad_norm": 0.9176128506660461, |
|
"learning_rate": 1.1371020142949967e-05, |
|
"loss": 0.7197, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6985925790531895, |
|
"eval_f1": 0.6855055863067254, |
|
"eval_loss": 0.7478834390640259, |
|
"eval_precision": 0.6925926647987316, |
|
"eval_recall": 0.6985925790531895, |
|
"eval_runtime": 77.8555, |
|
"eval_samples_per_second": 140.542, |
|
"eval_steps_per_second": 2.196, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 8.011695906432749, |
|
"grad_norm": 0.7897553443908691, |
|
"learning_rate": 1.1046133853151398e-05, |
|
"loss": 0.755, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 8.070175438596491, |
|
"grad_norm": 0.7324469685554504, |
|
"learning_rate": 1.0721247563352826e-05, |
|
"loss": 0.7243, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 8.128654970760234, |
|
"grad_norm": 0.7983306646347046, |
|
"learning_rate": 1.0396361273554257e-05, |
|
"loss": 0.7294, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 8.187134502923977, |
|
"grad_norm": 0.9110460877418518, |
|
"learning_rate": 1.0071474983755685e-05, |
|
"loss": 0.7027, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.24561403508772, |
|
"grad_norm": 0.9574342966079712, |
|
"learning_rate": 9.746588693957115e-06, |
|
"loss": 0.7131, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 8.304093567251462, |
|
"grad_norm": 0.7169631719589233, |
|
"learning_rate": 9.421702404158544e-06, |
|
"loss": 0.7365, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 8.362573099415204, |
|
"grad_norm": 0.9551491737365723, |
|
"learning_rate": 9.096816114359974e-06, |
|
"loss": 0.7313, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 8.421052631578947, |
|
"grad_norm": 1.159575343132019, |
|
"learning_rate": 8.771929824561403e-06, |
|
"loss": 0.7438, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 8.47953216374269, |
|
"grad_norm": 0.8166360259056091, |
|
"learning_rate": 8.447043534762833e-06, |
|
"loss": 0.7355, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 8.538011695906432, |
|
"grad_norm": 0.8369165062904358, |
|
"learning_rate": 8.122157244964262e-06, |
|
"loss": 0.7183, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 8.596491228070175, |
|
"grad_norm": 0.7923627495765686, |
|
"learning_rate": 7.797270955165692e-06, |
|
"loss": 0.711, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 8.654970760233919, |
|
"grad_norm": 0.7623910903930664, |
|
"learning_rate": 7.4723846653671214e-06, |
|
"loss": 0.7377, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 8.713450292397662, |
|
"grad_norm": 1.088745355606079, |
|
"learning_rate": 7.147498375568551e-06, |
|
"loss": 0.7199, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 8.771929824561404, |
|
"grad_norm": 0.8672430515289307, |
|
"learning_rate": 6.82261208576998e-06, |
|
"loss": 0.7234, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.830409356725147, |
|
"grad_norm": 0.77957683801651, |
|
"learning_rate": 6.49772579597141e-06, |
|
"loss": 0.7348, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 0.982523500919342, |
|
"learning_rate": 6.172839506172839e-06, |
|
"loss": 0.7348, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 8.947368421052632, |
|
"grad_norm": 0.8758224844932556, |
|
"learning_rate": 5.8479532163742686e-06, |
|
"loss": 0.7087, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6978614512886127, |
|
"eval_f1": 0.6697434161663234, |
|
"eval_loss": 0.744518518447876, |
|
"eval_precision": 0.6792260519903555, |
|
"eval_recall": 0.6978614512886127, |
|
"eval_runtime": 77.9794, |
|
"eval_samples_per_second": 140.319, |
|
"eval_steps_per_second": 2.193, |
|
"step": 1539 |
|
}, |
|
{ |
|
"epoch": 9.005847953216374, |
|
"grad_norm": 0.7864174246788025, |
|
"learning_rate": 5.523066926575699e-06, |
|
"loss": 0.713, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 9.064327485380117, |
|
"grad_norm": 1.0881294012069702, |
|
"learning_rate": 5.198180636777128e-06, |
|
"loss": 0.7092, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 9.12280701754386, |
|
"grad_norm": 1.0221022367477417, |
|
"learning_rate": 4.873294346978558e-06, |
|
"loss": 0.7463, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 9.181286549707602, |
|
"grad_norm": 0.8976357579231262, |
|
"learning_rate": 4.548408057179987e-06, |
|
"loss": 0.7392, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 9.239766081871345, |
|
"grad_norm": 0.8547308444976807, |
|
"learning_rate": 4.2235217673814166e-06, |
|
"loss": 0.7275, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 9.298245614035087, |
|
"grad_norm": 0.9341883063316345, |
|
"learning_rate": 3.898635477582846e-06, |
|
"loss": 0.7183, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 9.35672514619883, |
|
"grad_norm": 0.9447769522666931, |
|
"learning_rate": 3.5737491877842754e-06, |
|
"loss": 0.6966, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.415204678362572, |
|
"grad_norm": 1.0918306112289429, |
|
"learning_rate": 3.248862897985705e-06, |
|
"loss": 0.7161, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 9.473684210526315, |
|
"grad_norm": 0.9330850839614868, |
|
"learning_rate": 2.9239766081871343e-06, |
|
"loss": 0.7093, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 9.53216374269006, |
|
"grad_norm": 0.8420782685279846, |
|
"learning_rate": 2.599090318388564e-06, |
|
"loss": 0.7259, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 9.590643274853802, |
|
"grad_norm": 0.9159696698188782, |
|
"learning_rate": 2.2742040285899936e-06, |
|
"loss": 0.7265, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 9.649122807017545, |
|
"grad_norm": 1.0164194107055664, |
|
"learning_rate": 1.949317738791423e-06, |
|
"loss": 0.7458, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 9.707602339181287, |
|
"grad_norm": 0.8425694704055786, |
|
"learning_rate": 1.6244314489928524e-06, |
|
"loss": 0.7149, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 9.76608187134503, |
|
"grad_norm": 1.1017402410507202, |
|
"learning_rate": 1.299545159194282e-06, |
|
"loss": 0.7101, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 9.824561403508772, |
|
"grad_norm": 0.8220164179801941, |
|
"learning_rate": 9.746588693957115e-07, |
|
"loss": 0.7174, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 9.883040935672515, |
|
"grad_norm": 0.7968518733978271, |
|
"learning_rate": 6.49772579597141e-07, |
|
"loss": 0.7236, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 9.941520467836257, |
|
"grad_norm": 0.7491603493690491, |
|
"learning_rate": 3.248862897985705e-07, |
|
"loss": 0.6978, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.8862149119377136, |
|
"learning_rate": 0.0, |
|
"loss": 0.6977, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6991409248766222, |
|
"eval_f1": 0.6766571542539821, |
|
"eval_loss": 0.7419390082359314, |
|
"eval_precision": 0.6830062110035815, |
|
"eval_recall": 0.6991409248766222, |
|
"eval_runtime": 78.4178, |
|
"eval_samples_per_second": 139.535, |
|
"eval_steps_per_second": 2.181, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1710, |
|
"total_flos": 3.3914202248568177e+19, |
|
"train_loss": 0.790359598014787, |
|
"train_runtime": 7828.8109, |
|
"train_samples_per_second": 55.901, |
|
"train_steps_per_second": 0.218 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1710, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.3914202248568177e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|