|
{ |
|
"best_metric": 0.9824038982133189, |
|
"best_model_checkpoint": "/p/project/trustllm-eu/stenlund1/LLMSegm_iu/out/glot500-iu-morph-unamb-sup-6/checkpoint-1500", |
|
"epoch": 4.132231404958677, |
|
"eval_steps": 50, |
|
"global_step": 1500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06887052341597796, |
|
"grad_norm": 0.25278759002685547, |
|
"learning_rate": 1.9944289693593316e-05, |
|
"loss": 0.6929, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.13774104683195593, |
|
"grad_norm": 3.6986641883850098, |
|
"learning_rate": 1.966573816155989e-05, |
|
"loss": 0.6275, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13774104683195593, |
|
"eval_accuracy": 0.7771941343838914, |
|
"eval_f1": 0.7581564776686728, |
|
"eval_loss": 0.49594685435295105, |
|
"eval_precision": 0.6763687742847051, |
|
"eval_recall": 0.8624448247905594, |
|
"eval_runtime": 4.0112, |
|
"eval_samples_per_second": 6834.373, |
|
"eval_steps_per_second": 6.731, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2066115702479339, |
|
"grad_norm": 3.397982120513916, |
|
"learning_rate": 1.9387186629526465e-05, |
|
"loss": 0.4712, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.27548209366391185, |
|
"grad_norm": 3.2428324222564697, |
|
"learning_rate": 1.910863509749304e-05, |
|
"loss": 0.3489, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.27548209366391185, |
|
"eval_accuracy": 0.9177062814620267, |
|
"eval_f1": 0.898533777098138, |
|
"eval_loss": 0.24391423165798187, |
|
"eval_precision": 0.8972424324081559, |
|
"eval_recall": 0.8998288442482659, |
|
"eval_runtime": 3.5257, |
|
"eval_samples_per_second": 7775.49, |
|
"eval_steps_per_second": 7.658, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3443526170798898, |
|
"grad_norm": 5.30737829208374, |
|
"learning_rate": 1.883008356545961e-05, |
|
"loss": 0.2683, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.4132231404958678, |
|
"grad_norm": 3.5402088165283203, |
|
"learning_rate": 1.8551532033426184e-05, |
|
"loss": 0.2206, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4132231404958678, |
|
"eval_accuracy": 0.9466695848836361, |
|
"eval_f1": 0.9355492858402398, |
|
"eval_loss": 0.15823155641555786, |
|
"eval_precision": 0.916083916083916, |
|
"eval_recall": 0.9558598324475273, |
|
"eval_runtime": 3.8577, |
|
"eval_samples_per_second": 7106.225, |
|
"eval_steps_per_second": 6.999, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4820936639118457, |
|
"grad_norm": 3.316082239151001, |
|
"learning_rate": 1.827298050139276e-05, |
|
"loss": 0.1807, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5509641873278237, |
|
"grad_norm": 2.592745065689087, |
|
"learning_rate": 1.7994428969359333e-05, |
|
"loss": 0.1541, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5509641873278237, |
|
"eval_accuracy": 0.9585977967461881, |
|
"eval_f1": 0.9495398568443516, |
|
"eval_loss": 0.12743310630321503, |
|
"eval_precision": 0.9374122191011236, |
|
"eval_recall": 0.9619854067201153, |
|
"eval_runtime": 3.4893, |
|
"eval_samples_per_second": 7856.68, |
|
"eval_steps_per_second": 7.738, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6198347107438017, |
|
"grad_norm": 2.1011128425598145, |
|
"learning_rate": 1.7715877437325907e-05, |
|
"loss": 0.1369, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6887052341597796, |
|
"grad_norm": 1.8149715662002563, |
|
"learning_rate": 1.743732590529248e-05, |
|
"loss": 0.1258, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6887052341597796, |
|
"eval_accuracy": 0.9647990078062304, |
|
"eval_f1": 0.9563289134271621, |
|
"eval_loss": 0.10787822306156158, |
|
"eval_precision": 0.9608948708621317, |
|
"eval_recall": 0.9518061435906675, |
|
"eval_runtime": 3.4856, |
|
"eval_samples_per_second": 7864.85, |
|
"eval_steps_per_second": 7.746, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7575757575757576, |
|
"grad_norm": 2.8485476970672607, |
|
"learning_rate": 1.7158774373259056e-05, |
|
"loss": 0.1107, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.8264462809917356, |
|
"grad_norm": 1.723015546798706, |
|
"learning_rate": 1.688022284122563e-05, |
|
"loss": 0.1085, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8264462809917356, |
|
"eval_accuracy": 0.9711826074268621, |
|
"eval_f1": 0.9645421903052065, |
|
"eval_loss": 0.09259311854839325, |
|
"eval_precision": 0.9611772072636193, |
|
"eval_recall": 0.9679308170435096, |
|
"eval_runtime": 3.4737, |
|
"eval_samples_per_second": 7891.819, |
|
"eval_steps_per_second": 7.773, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8953168044077136, |
|
"grad_norm": 1.7039345502853394, |
|
"learning_rate": 1.66016713091922e-05, |
|
"loss": 0.1007, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9641873278236914, |
|
"grad_norm": 2.643916368484497, |
|
"learning_rate": 1.6323119777158775e-05, |
|
"loss": 0.1001, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9641873278236914, |
|
"eval_accuracy": 0.9737725249872328, |
|
"eval_f1": 0.9676897496966701, |
|
"eval_loss": 0.08496326208114624, |
|
"eval_precision": 0.9654770444763271, |
|
"eval_recall": 0.969912620484641, |
|
"eval_runtime": 3.877, |
|
"eval_samples_per_second": 7071.012, |
|
"eval_steps_per_second": 6.964, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0330578512396693, |
|
"grad_norm": 2.7803874015808105, |
|
"learning_rate": 1.604456824512535e-05, |
|
"loss": 0.0872, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.1019283746556474, |
|
"grad_norm": 1.6734216213226318, |
|
"learning_rate": 1.5766016713091924e-05, |
|
"loss": 0.0803, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1019283746556474, |
|
"eval_accuracy": 0.9746479900780624, |
|
"eval_f1": 0.968868980963046, |
|
"eval_loss": 0.07618524879217148, |
|
"eval_precision": 0.9635602280826799, |
|
"eval_recall": 0.9742365552652914, |
|
"eval_runtime": 3.4865, |
|
"eval_samples_per_second": 7862.946, |
|
"eval_steps_per_second": 7.744, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1707988980716253, |
|
"grad_norm": 1.4769816398620605, |
|
"learning_rate": 1.5487465181058498e-05, |
|
"loss": 0.0794, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.2396694214876034, |
|
"grad_norm": 1.9178351163864136, |
|
"learning_rate": 1.520891364902507e-05, |
|
"loss": 0.0746, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.2396694214876034, |
|
"eval_accuracy": 0.976143576274896, |
|
"eval_f1": 0.9705113175218685, |
|
"eval_loss": 0.07501044124364853, |
|
"eval_precision": 0.9715626974812674, |
|
"eval_recall": 0.9694622106116566, |
|
"eval_runtime": 3.483, |
|
"eval_samples_per_second": 7870.76, |
|
"eval_steps_per_second": 7.752, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.3085399449035813, |
|
"grad_norm": 1.5861995220184326, |
|
"learning_rate": 1.4930362116991646e-05, |
|
"loss": 0.0712, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.3774104683195592, |
|
"grad_norm": 1.4486507177352905, |
|
"learning_rate": 1.4651810584958219e-05, |
|
"loss": 0.0628, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3774104683195592, |
|
"eval_accuracy": 0.9772379076384329, |
|
"eval_f1": 0.9720254640007173, |
|
"eval_loss": 0.07069610804319382, |
|
"eval_precision": 0.9675145024542615, |
|
"eval_recall": 0.9765786866048104, |
|
"eval_runtime": 3.478, |
|
"eval_samples_per_second": 7882.089, |
|
"eval_steps_per_second": 7.763, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4462809917355373, |
|
"grad_norm": 1.6411774158477783, |
|
"learning_rate": 1.4373259052924793e-05, |
|
"loss": 0.0687, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.5151515151515151, |
|
"grad_norm": 1.1685402393341064, |
|
"learning_rate": 1.4094707520891366e-05, |
|
"loss": 0.0649, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5151515151515151, |
|
"eval_accuracy": 0.9787334938352666, |
|
"eval_f1": 0.9736473353523483, |
|
"eval_loss": 0.06786245107650757, |
|
"eval_precision": 0.9771366358192706, |
|
"eval_recall": 0.9701828664084317, |
|
"eval_runtime": 3.8335, |
|
"eval_samples_per_second": 7151.144, |
|
"eval_steps_per_second": 7.043, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5840220385674932, |
|
"grad_norm": 1.5705974102020264, |
|
"learning_rate": 1.381615598885794e-05, |
|
"loss": 0.0602, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.6528925619834711, |
|
"grad_norm": 1.430296778678894, |
|
"learning_rate": 1.3537604456824513e-05, |
|
"loss": 0.0598, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.6528925619834711, |
|
"eval_accuracy": 0.9796089589260961, |
|
"eval_f1": 0.9748390871854886, |
|
"eval_loss": 0.06365057826042175, |
|
"eval_precision": 0.9741813602015114, |
|
"eval_recall": 0.9754977029096478, |
|
"eval_runtime": 3.4734, |
|
"eval_samples_per_second": 7892.464, |
|
"eval_steps_per_second": 7.773, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.721763085399449, |
|
"grad_norm": 1.6765629053115845, |
|
"learning_rate": 1.3259052924791087e-05, |
|
"loss": 0.0588, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.790633608815427, |
|
"grad_norm": 2.7560369968414307, |
|
"learning_rate": 1.2980501392757661e-05, |
|
"loss": 0.0588, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.790633608815427, |
|
"eval_accuracy": 0.9799737360472751, |
|
"eval_f1": 0.9752423900789177, |
|
"eval_loss": 0.06402380764484406, |
|
"eval_precision": 0.9764312804767925, |
|
"eval_recall": 0.9740563913160977, |
|
"eval_runtime": 3.4664, |
|
"eval_samples_per_second": 7908.573, |
|
"eval_steps_per_second": 7.789, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.859504132231405, |
|
"grad_norm": 1.4049510955810547, |
|
"learning_rate": 1.2701949860724234e-05, |
|
"loss": 0.0587, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.9283746556473829, |
|
"grad_norm": 1.4854605197906494, |
|
"learning_rate": 1.2423398328690808e-05, |
|
"loss": 0.0549, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.9283746556473829, |
|
"eval_accuracy": 0.9801926023199825, |
|
"eval_f1": 0.975607564799425, |
|
"eval_loss": 0.06324595212936401, |
|
"eval_precision": 0.9730286738351255, |
|
"eval_recall": 0.9782001621475542, |
|
"eval_runtime": 3.831, |
|
"eval_samples_per_second": 7155.784, |
|
"eval_steps_per_second": 7.048, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.997245179063361, |
|
"grad_norm": 1.7832911014556885, |
|
"learning_rate": 1.2144846796657384e-05, |
|
"loss": 0.0598, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.0661157024793386, |
|
"grad_norm": 1.3163201808929443, |
|
"learning_rate": 1.1866295264623957e-05, |
|
"loss": 0.049, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.0661157024793386, |
|
"eval_accuracy": 0.9817611439410521, |
|
"eval_f1": 0.9773939777556742, |
|
"eval_loss": 0.06054531782865524, |
|
"eval_precision": 0.9811200871380593, |
|
"eval_recall": 0.9736960634177101, |
|
"eval_runtime": 3.4659, |
|
"eval_samples_per_second": 7909.629, |
|
"eval_steps_per_second": 7.79, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.1349862258953167, |
|
"grad_norm": 2.2273190021514893, |
|
"learning_rate": 1.1587743732590531e-05, |
|
"loss": 0.0452, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.203856749311295, |
|
"grad_norm": 0.6999920010566711, |
|
"learning_rate": 1.1309192200557103e-05, |
|
"loss": 0.0443, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.203856749311295, |
|
"eval_accuracy": 0.982818997592471, |
|
"eval_f1": 0.9788533201634265, |
|
"eval_loss": 0.05351677164435387, |
|
"eval_precision": 0.9757429287504475, |
|
"eval_recall": 0.9819836050806233, |
|
"eval_runtime": 3.4789, |
|
"eval_samples_per_second": 7880.134, |
|
"eval_steps_per_second": 7.761, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 2.3686063289642334, |
|
"learning_rate": 1.1030640668523678e-05, |
|
"loss": 0.0414, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.3415977961432506, |
|
"grad_norm": 4.006628036499023, |
|
"learning_rate": 1.0752089136490252e-05, |
|
"loss": 0.0456, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.3415977961432506, |
|
"eval_accuracy": 0.9815057999562268, |
|
"eval_f1": 0.9769765224104264, |
|
"eval_loss": 0.0683741644024849, |
|
"eval_precision": 0.9850732600732601, |
|
"eval_recall": 0.9690118007386722, |
|
"eval_runtime": 3.4666, |
|
"eval_samples_per_second": 7908.086, |
|
"eval_steps_per_second": 7.789, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.4104683195592287, |
|
"grad_norm": 1.687317967414856, |
|
"learning_rate": 1.0473537604456825e-05, |
|
"loss": 0.0459, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.479338842975207, |
|
"grad_norm": 1.9865649938583374, |
|
"learning_rate": 1.0194986072423399e-05, |
|
"loss": 0.0437, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.479338842975207, |
|
"eval_accuracy": 0.9822718319107026, |
|
"eval_f1": 0.9782121402313279, |
|
"eval_loss": 0.055423617362976074, |
|
"eval_precision": 0.9736724676483712, |
|
"eval_recall": 0.9827943428519953, |
|
"eval_runtime": 3.8118, |
|
"eval_samples_per_second": 7191.836, |
|
"eval_steps_per_second": 7.083, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.5482093663911844, |
|
"grad_norm": 1.7087410688400269, |
|
"learning_rate": 9.916434540389973e-06, |
|
"loss": 0.0434, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.6170798898071626, |
|
"grad_norm": 1.4715299606323242, |
|
"learning_rate": 9.637883008356547e-06, |
|
"loss": 0.041, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.6170798898071626, |
|
"eval_accuracy": 0.9830378638651783, |
|
"eval_f1": 0.9790983053894907, |
|
"eval_loss": 0.05513562262058258, |
|
"eval_precision": 0.9771218374304683, |
|
"eval_recall": 0.9810827853346545, |
|
"eval_runtime": 3.4833, |
|
"eval_samples_per_second": 7870.073, |
|
"eval_steps_per_second": 7.751, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.6859504132231407, |
|
"grad_norm": 1.1084128618240356, |
|
"learning_rate": 9.35933147632312e-06, |
|
"loss": 0.0421, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.7548209366391183, |
|
"grad_norm": 1.778534173965454, |
|
"learning_rate": 9.080779944289694e-06, |
|
"loss": 0.0403, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.7548209366391183, |
|
"eval_accuracy": 0.9833296855621215, |
|
"eval_f1": 0.9794283142021157, |
|
"eval_loss": 0.05323425307869911, |
|
"eval_precision": 0.9788554975706316, |
|
"eval_recall": 0.9800018016394919, |
|
"eval_runtime": 3.4664, |
|
"eval_samples_per_second": 7908.452, |
|
"eval_steps_per_second": 7.789, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.8236914600550964, |
|
"grad_norm": 1.2113419771194458, |
|
"learning_rate": 8.802228412256268e-06, |
|
"loss": 0.0373, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.8925619834710745, |
|
"grad_norm": 1.100354552268982, |
|
"learning_rate": 8.523676880222843e-06, |
|
"loss": 0.0408, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.8925619834710745, |
|
"eval_accuracy": 0.9839133289560079, |
|
"eval_f1": 0.980182447310475, |
|
"eval_loss": 0.050565555691719055, |
|
"eval_precision": 0.9779411764705882, |
|
"eval_recall": 0.9824340149536078, |
|
"eval_runtime": 3.4729, |
|
"eval_samples_per_second": 7893.777, |
|
"eval_steps_per_second": 7.775, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.9614325068870526, |
|
"grad_norm": 1.6811611652374268, |
|
"learning_rate": 8.245125348189415e-06, |
|
"loss": 0.0404, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 3.0303030303030303, |
|
"grad_norm": 1.8884915113449097, |
|
"learning_rate": 7.96657381615599e-06, |
|
"loss": 0.0322, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.0303030303030303, |
|
"eval_accuracy": 0.9840592398044795, |
|
"eval_f1": 0.9802949001217478, |
|
"eval_loss": 0.05778279900550842, |
|
"eval_precision": 0.9814012278801011, |
|
"eval_recall": 0.9791910638681199, |
|
"eval_runtime": 3.8894, |
|
"eval_samples_per_second": 7048.308, |
|
"eval_steps_per_second": 6.942, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.0991735537190084, |
|
"grad_norm": 1.4037514925003052, |
|
"learning_rate": 7.688022284122564e-06, |
|
"loss": 0.0367, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 3.168044077134986, |
|
"grad_norm": 1.3870735168457031, |
|
"learning_rate": 7.409470752089137e-06, |
|
"loss": 0.036, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.168044077134986, |
|
"eval_accuracy": 0.98387685124389, |
|
"eval_f1": 0.9801115910727142, |
|
"eval_loss": 0.05209185555577278, |
|
"eval_precision": 0.9791423177200396, |
|
"eval_recall": 0.9810827853346545, |
|
"eval_runtime": 3.4792, |
|
"eval_samples_per_second": 7879.356, |
|
"eval_steps_per_second": 7.76, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.236914600550964, |
|
"grad_norm": 1.1460018157958984, |
|
"learning_rate": 7.130919220055711e-06, |
|
"loss": 0.0329, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 3.3057851239669422, |
|
"grad_norm": 1.3611100912094116, |
|
"learning_rate": 6.852367688022284e-06, |
|
"loss": 0.033, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.3057851239669422, |
|
"eval_accuracy": 0.9842781060771868, |
|
"eval_f1": 0.980535609447681, |
|
"eval_loss": 0.05169374495744705, |
|
"eval_precision": 0.9831552255026264, |
|
"eval_recall": 0.9779299162237636, |
|
"eval_runtime": 3.4677, |
|
"eval_samples_per_second": 7905.479, |
|
"eval_steps_per_second": 7.786, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.3746556473829203, |
|
"grad_norm": 1.9900141954421997, |
|
"learning_rate": 6.573816155988858e-06, |
|
"loss": 0.0346, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 3.443526170798898, |
|
"grad_norm": 1.3785734176635742, |
|
"learning_rate": 6.295264623955433e-06, |
|
"loss": 0.0308, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.443526170798898, |
|
"eval_accuracy": 0.9836944626833005, |
|
"eval_f1": 0.9797508493771234, |
|
"eval_loss": 0.0601566806435585, |
|
"eval_precision": 0.985420083834518, |
|
"eval_recall": 0.9741464732906945, |
|
"eval_runtime": 3.4659, |
|
"eval_samples_per_second": 7909.55, |
|
"eval_steps_per_second": 7.79, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.512396694214876, |
|
"grad_norm": 1.4920400381088257, |
|
"learning_rate": 6.016713091922006e-06, |
|
"loss": 0.0284, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 3.581267217630854, |
|
"grad_norm": 2.0963149070739746, |
|
"learning_rate": 5.7381615598885795e-06, |
|
"loss": 0.0324, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.581267217630854, |
|
"eval_accuracy": 0.9845334500620121, |
|
"eval_f1": 0.9808957375867352, |
|
"eval_loss": 0.05010749772191048, |
|
"eval_precision": 0.981249436581628, |
|
"eval_recall": 0.9805422934870732, |
|
"eval_runtime": 4.0132, |
|
"eval_samples_per_second": 6831.004, |
|
"eval_steps_per_second": 6.728, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.650137741046832, |
|
"grad_norm": 1.2143880128860474, |
|
"learning_rate": 5.459610027855154e-06, |
|
"loss": 0.0295, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 3.71900826446281, |
|
"grad_norm": 1.3561748266220093, |
|
"learning_rate": 5.181058495821727e-06, |
|
"loss": 0.0348, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.71900826446281, |
|
"eval_accuracy": 0.9849711826074269, |
|
"eval_f1": 0.9814062641032584, |
|
"eval_loss": 0.05094814673066139, |
|
"eval_precision": 0.983358958126074, |
|
"eval_recall": 0.9794613097919106, |
|
"eval_runtime": 3.469, |
|
"eval_samples_per_second": 7902.599, |
|
"eval_steps_per_second": 7.783, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.787878787878788, |
|
"grad_norm": 1.4375017881393433, |
|
"learning_rate": 4.902506963788301e-06, |
|
"loss": 0.0296, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 3.8567493112947657, |
|
"grad_norm": 1.0931159257888794, |
|
"learning_rate": 4.623955431754875e-06, |
|
"loss": 0.0338, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.8567493112947657, |
|
"eval_accuracy": 0.9852994820164879, |
|
"eval_f1": 0.9818329351305053, |
|
"eval_loss": 0.047653596848249435, |
|
"eval_precision": 0.9826746074715755, |
|
"eval_recall": 0.9809927033600576, |
|
"eval_runtime": 3.4683, |
|
"eval_samples_per_second": 7904.087, |
|
"eval_steps_per_second": 7.785, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.925619834710744, |
|
"grad_norm": 1.2919989824295044, |
|
"learning_rate": 4.345403899721449e-06, |
|
"loss": 0.0316, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 3.994490358126722, |
|
"grad_norm": 1.893475890159607, |
|
"learning_rate": 4.0668523676880225e-06, |
|
"loss": 0.0285, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.994490358126722, |
|
"eval_accuracy": 0.9851900488801343, |
|
"eval_f1": 0.9816439099376074, |
|
"eval_loss": 0.05093059316277504, |
|
"eval_precision": 0.9853862212943633, |
|
"eval_recall": 0.9779299162237636, |
|
"eval_runtime": 3.8831, |
|
"eval_samples_per_second": 7059.86, |
|
"eval_steps_per_second": 6.953, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 4.0633608815427, |
|
"grad_norm": 1.606433629989624, |
|
"learning_rate": 3.7883008356545963e-06, |
|
"loss": 0.0398, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 4.132231404958677, |
|
"grad_norm": 1.2981059551239014, |
|
"learning_rate": 3.5097493036211698e-06, |
|
"loss": 0.026, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.132231404958677, |
|
"eval_accuracy": 0.9857736922740206, |
|
"eval_f1": 0.9824038982133189, |
|
"eval_loss": 0.05328037962317467, |
|
"eval_precision": 0.9840911145258971, |
|
"eval_recall": 0.980722457436267, |
|
"eval_runtime": 3.4891, |
|
"eval_samples_per_second": 7856.935, |
|
"eval_steps_per_second": 7.738, |
|
"step": 1500 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 1815, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.140915438023948e+16, |
|
"train_batch_size": 1024, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|