diff --git "a/checkpoint-5000/trainer_state.json" "b/checkpoint-5000/trainer_state.json" deleted file mode 100644--- "a/checkpoint-5000/trainer_state.json" +++ /dev/null @@ -1,35483 +0,0 @@ -{ - "best_metric": 0.1314370483160019, - "best_model_checkpoint": "/scratch/csg337/pmf-grn-3dc/train_prior_network/hp_sweep/finetune_nt_lr0.00001_ga32_cw0.4_1.0_dr0.6/checkpoint-5000", - "epoch": 0.852492207688414, - "eval_steps": 100, - "global_step": 5000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 0.00017049844153768282, - "grad_norm": 8.869589805603027, - "learning_rate": 9.998294970161979e-06, - "loss": 0.7814, - "step": 1 - }, - { - "epoch": 0.00034099688307536564, - "grad_norm": 7.021997451782227, - "learning_rate": 9.996589940323957e-06, - "loss": 0.6035, - "step": 2 - }, - { - "epoch": 0.0005114953246130485, - "grad_norm": 5.577943801879883, - "learning_rate": 9.994884910485935e-06, - "loss": 0.4712, - "step": 3 - }, - { - "epoch": 0.0006819937661507313, - "grad_norm": 4.31350564956665, - "learning_rate": 9.993179880647913e-06, - "loss": 0.3819, - "step": 4 - }, - { - "epoch": 0.0008524922076884142, - "grad_norm": 3.362149477005005, - "learning_rate": 9.99147485080989e-06, - "loss": 0.3102, - "step": 5 - }, - { - "epoch": 0.001022990649226097, - "grad_norm": 1.3885729312896729, - "learning_rate": 9.989769820971867e-06, - "loss": 0.3531, - "step": 6 - }, - { - "epoch": 0.0011934890907637798, - "grad_norm": 1.580288052558899, - "learning_rate": 9.988064791133846e-06, - "loss": 0.2337, - "step": 7 - }, - { - "epoch": 0.0013639875323014626, - "grad_norm": 0.9532984495162964, - "learning_rate": 9.986359761295823e-06, - "loss": 0.2231, - "step": 8 - }, - { - "epoch": 0.0015344859738391453, - "grad_norm": 0.7379500865936279, - "learning_rate": 9.9846547314578e-06, - "loss": 0.1995, - "step": 9 - }, - { - "epoch": 0.0017049844153768283, - "grad_norm": 0.45577266812324524, - "learning_rate": 9.98294970161978e-06, - "loss": 0.2418, - "step": 10 - }, - { - "epoch": 0.001875482856914511, - "grad_norm": 0.64174884557724, - "learning_rate": 9.981244671781757e-06, - "loss": 0.122, - "step": 11 - }, - { - "epoch": 0.002045981298452194, - "grad_norm": 1.0696319341659546, - "learning_rate": 9.979539641943734e-06, - "loss": 0.3283, - "step": 12 - }, - { - "epoch": 0.0022164797399898766, - "grad_norm": 0.6181367635726929, - "learning_rate": 9.977834612105712e-06, - "loss": 0.1975, - "step": 13 - }, - { - "epoch": 0.0023869781815275596, - "grad_norm": 0.6715260148048401, - "learning_rate": 9.97612958226769e-06, - "loss": 0.096, - "step": 14 - }, - { - "epoch": 0.002557476623065242, - "grad_norm": 0.6611419320106506, - "learning_rate": 9.974424552429668e-06, - "loss": 0.1216, - "step": 15 - }, - { - "epoch": 0.002727975064602925, - "grad_norm": 0.5713757276535034, - "learning_rate": 9.972719522591646e-06, - "loss": 0.199, - "step": 16 - }, - { - "epoch": 0.002898473506140608, - "grad_norm": 0.9805899262428284, - "learning_rate": 9.971014492753624e-06, - "loss": 0.2685, - "step": 17 - }, - { - "epoch": 0.0030689719476782906, - "grad_norm": 0.5129013061523438, - "learning_rate": 9.969309462915602e-06, - "loss": 0.2221, - "step": 18 - }, - { - "epoch": 0.0032394703892159736, - "grad_norm": 0.503084659576416, - "learning_rate": 9.96760443307758e-06, - "loss": 0.1479, - "step": 19 - }, - { - "epoch": 0.0034099688307536566, - "grad_norm": 0.70774906873703, - "learning_rate": 9.965899403239558e-06, - "loss": 0.1203, - "step": 20 - }, - { - "epoch": 0.003580467272291339, - "grad_norm": 0.5927731394767761, - "learning_rate": 9.964194373401536e-06, - "loss": 0.1222, - "step": 21 - }, - { - "epoch": 0.003750965713829022, - "grad_norm": 0.5986411571502686, - "learning_rate": 9.962489343563512e-06, - "loss": 0.1092, - "step": 22 - }, - { - "epoch": 0.003921464155366705, - "grad_norm": 0.3205660879611969, - "learning_rate": 9.960784313725492e-06, - "loss": 0.1677, - "step": 23 - }, - { - "epoch": 0.004091962596904388, - "grad_norm": 0.4281774163246155, - "learning_rate": 9.959079283887468e-06, - "loss": 0.1171, - "step": 24 - }, - { - "epoch": 0.00426246103844207, - "grad_norm": 0.5072917938232422, - "learning_rate": 9.957374254049446e-06, - "loss": 0.195, - "step": 25 - }, - { - "epoch": 0.004432959479979753, - "grad_norm": 0.45099368691444397, - "learning_rate": 9.955669224211426e-06, - "loss": 0.1907, - "step": 26 - }, - { - "epoch": 0.004603457921517436, - "grad_norm": 0.2903910279273987, - "learning_rate": 9.953964194373402e-06, - "loss": 0.1712, - "step": 27 - }, - { - "epoch": 0.004773956363055119, - "grad_norm": 0.39423874020576477, - "learning_rate": 9.95225916453538e-06, - "loss": 0.0873, - "step": 28 - }, - { - "epoch": 0.004944454804592802, - "grad_norm": 1.3082679510116577, - "learning_rate": 9.950554134697358e-06, - "loss": 0.3284, - "step": 29 - }, - { - "epoch": 0.005114953246130484, - "grad_norm": 0.22596949338912964, - "learning_rate": 9.948849104859336e-06, - "loss": 0.136, - "step": 30 - }, - { - "epoch": 0.005285451687668167, - "grad_norm": 0.32791393995285034, - "learning_rate": 9.947144075021314e-06, - "loss": 0.1208, - "step": 31 - }, - { - "epoch": 0.00545595012920585, - "grad_norm": 0.9837888479232788, - "learning_rate": 9.945439045183292e-06, - "loss": 0.2933, - "step": 32 - }, - { - "epoch": 0.005626448570743533, - "grad_norm": 0.26505422592163086, - "learning_rate": 9.943734015345268e-06, - "loss": 0.1744, - "step": 33 - }, - { - "epoch": 0.005796947012281216, - "grad_norm": 0.3285515308380127, - "learning_rate": 9.942028985507248e-06, - "loss": 0.1913, - "step": 34 - }, - { - "epoch": 0.005967445453818898, - "grad_norm": 0.4692244231700897, - "learning_rate": 9.940323955669226e-06, - "loss": 0.1721, - "step": 35 - }, - { - "epoch": 0.006137943895356581, - "grad_norm": 0.7220498323440552, - "learning_rate": 9.938618925831202e-06, - "loss": 0.1271, - "step": 36 - }, - { - "epoch": 0.006308442336894264, - "grad_norm": 0.5188308358192444, - "learning_rate": 9.936913895993181e-06, - "loss": 0.1699, - "step": 37 - }, - { - "epoch": 0.006478940778431947, - "grad_norm": 0.29906973242759705, - "learning_rate": 9.935208866155158e-06, - "loss": 0.1882, - "step": 38 - }, - { - "epoch": 0.00664943921996963, - "grad_norm": 0.858116626739502, - "learning_rate": 9.933503836317137e-06, - "loss": 0.2837, - "step": 39 - }, - { - "epoch": 0.006819937661507313, - "grad_norm": 0.38193100690841675, - "learning_rate": 9.931798806479114e-06, - "loss": 0.221, - "step": 40 - }, - { - "epoch": 0.006990436103044995, - "grad_norm": 0.3163479268550873, - "learning_rate": 9.930093776641092e-06, - "loss": 0.1911, - "step": 41 - }, - { - "epoch": 0.007160934544582678, - "grad_norm": 0.6785820722579956, - "learning_rate": 9.928388746803071e-06, - "loss": 0.1457, - "step": 42 - }, - { - "epoch": 0.007331432986120361, - "grad_norm": 0.4343841075897217, - "learning_rate": 9.926683716965047e-06, - "loss": 0.2055, - "step": 43 - }, - { - "epoch": 0.007501931427658044, - "grad_norm": 0.5253484845161438, - "learning_rate": 9.924978687127025e-06, - "loss": 0.2558, - "step": 44 - }, - { - "epoch": 0.007672429869195727, - "grad_norm": 0.4470311105251312, - "learning_rate": 9.923273657289003e-06, - "loss": 0.1753, - "step": 45 - }, - { - "epoch": 0.00784292831073341, - "grad_norm": 0.7119751572608948, - "learning_rate": 9.921568627450981e-06, - "loss": 0.2984, - "step": 46 - }, - { - "epoch": 0.008013426752271093, - "grad_norm": 0.634812593460083, - "learning_rate": 9.91986359761296e-06, - "loss": 0.1811, - "step": 47 - }, - { - "epoch": 0.008183925193808776, - "grad_norm": 0.6000546216964722, - "learning_rate": 9.918158567774937e-06, - "loss": 0.2744, - "step": 48 - }, - { - "epoch": 0.008354423635346457, - "grad_norm": 0.7269101142883301, - "learning_rate": 9.916453537936913e-06, - "loss": 0.1516, - "step": 49 - }, - { - "epoch": 0.00852492207688414, - "grad_norm": 0.5989276766777039, - "learning_rate": 9.914748508098893e-06, - "loss": 0.189, - "step": 50 - }, - { - "epoch": 0.008695420518421823, - "grad_norm": 0.5950725078582764, - "learning_rate": 9.913043478260871e-06, - "loss": 0.1791, - "step": 51 - }, - { - "epoch": 0.008865918959959506, - "grad_norm": 0.4925420582294464, - "learning_rate": 9.911338448422847e-06, - "loss": 0.1503, - "step": 52 - }, - { - "epoch": 0.00903641740149719, - "grad_norm": 0.8844504356384277, - "learning_rate": 9.909633418584827e-06, - "loss": 0.2852, - "step": 53 - }, - { - "epoch": 0.009206915843034872, - "grad_norm": 0.32095837593078613, - "learning_rate": 9.907928388746803e-06, - "loss": 0.167, - "step": 54 - }, - { - "epoch": 0.009377414284572555, - "grad_norm": 0.42962777614593506, - "learning_rate": 9.906223358908781e-06, - "loss": 0.1481, - "step": 55 - }, - { - "epoch": 0.009547912726110238, - "grad_norm": 1.1052173376083374, - "learning_rate": 9.904518329070759e-06, - "loss": 0.306, - "step": 56 - }, - { - "epoch": 0.009718411167647921, - "grad_norm": 0.5791013836860657, - "learning_rate": 9.902813299232737e-06, - "loss": 0.2187, - "step": 57 - }, - { - "epoch": 0.009888909609185604, - "grad_norm": 0.5379682183265686, - "learning_rate": 9.901108269394715e-06, - "loss": 0.1645, - "step": 58 - }, - { - "epoch": 0.010059408050723286, - "grad_norm": 0.6275455355644226, - "learning_rate": 9.899403239556693e-06, - "loss": 0.1489, - "step": 59 - }, - { - "epoch": 0.010229906492260969, - "grad_norm": 0.6161112785339355, - "learning_rate": 9.897698209718671e-06, - "loss": 0.1487, - "step": 60 - }, - { - "epoch": 0.010400404933798651, - "grad_norm": 0.5155672430992126, - "learning_rate": 9.895993179880649e-06, - "loss": 0.1816, - "step": 61 - }, - { - "epoch": 0.010570903375336334, - "grad_norm": 1.4397295713424683, - "learning_rate": 9.894288150042627e-06, - "loss": 0.3635, - "step": 62 - }, - { - "epoch": 0.010741401816874017, - "grad_norm": 0.4438450038433075, - "learning_rate": 9.892583120204605e-06, - "loss": 0.1475, - "step": 63 - }, - { - "epoch": 0.0109119002584117, - "grad_norm": 0.8751521110534668, - "learning_rate": 9.890878090366583e-06, - "loss": 0.2665, - "step": 64 - }, - { - "epoch": 0.011082398699949383, - "grad_norm": 0.4153095483779907, - "learning_rate": 9.889173060528559e-06, - "loss": 0.1866, - "step": 65 - }, - { - "epoch": 0.011252897141487066, - "grad_norm": 0.35557761788368225, - "learning_rate": 9.887468030690539e-06, - "loss": 0.1678, - "step": 66 - }, - { - "epoch": 0.01142339558302475, - "grad_norm": 0.7395035624504089, - "learning_rate": 9.885763000852517e-06, - "loss": 0.1002, - "step": 67 - }, - { - "epoch": 0.011593894024562432, - "grad_norm": 0.3700765073299408, - "learning_rate": 9.884057971014493e-06, - "loss": 0.2083, - "step": 68 - }, - { - "epoch": 0.011764392466100115, - "grad_norm": 0.8187807202339172, - "learning_rate": 9.882352941176472e-06, - "loss": 0.072, - "step": 69 - }, - { - "epoch": 0.011934890907637797, - "grad_norm": 0.35573920607566833, - "learning_rate": 9.880647911338449e-06, - "loss": 0.2068, - "step": 70 - }, - { - "epoch": 0.01210538934917548, - "grad_norm": 0.4666770100593567, - "learning_rate": 9.878942881500427e-06, - "loss": 0.2112, - "step": 71 - }, - { - "epoch": 0.012275887790713163, - "grad_norm": 0.23817773163318634, - "learning_rate": 9.877237851662405e-06, - "loss": 0.1685, - "step": 72 - }, - { - "epoch": 0.012446386232250846, - "grad_norm": 0.4650826156139374, - "learning_rate": 9.875532821824383e-06, - "loss": 0.2222, - "step": 73 - }, - { - "epoch": 0.012616884673788529, - "grad_norm": 0.2902311384677887, - "learning_rate": 9.87382779198636e-06, - "loss": 0.1436, - "step": 74 - }, - { - "epoch": 0.012787383115326212, - "grad_norm": 0.38183656334877014, - "learning_rate": 9.872122762148338e-06, - "loss": 0.1163, - "step": 75 - }, - { - "epoch": 0.012957881556863894, - "grad_norm": 0.3778226375579834, - "learning_rate": 9.870417732310316e-06, - "loss": 0.1371, - "step": 76 - }, - { - "epoch": 0.013128379998401577, - "grad_norm": 1.2414989471435547, - "learning_rate": 9.868712702472294e-06, - "loss": 0.3363, - "step": 77 - }, - { - "epoch": 0.01329887843993926, - "grad_norm": 0.1740960031747818, - "learning_rate": 9.867007672634272e-06, - "loss": 0.1663, - "step": 78 - }, - { - "epoch": 0.013469376881476943, - "grad_norm": 0.8906394839286804, - "learning_rate": 9.86530264279625e-06, - "loss": 0.2848, - "step": 79 - }, - { - "epoch": 0.013639875323014626, - "grad_norm": 0.41351833939552307, - "learning_rate": 9.863597612958228e-06, - "loss": 0.2146, - "step": 80 - }, - { - "epoch": 0.013810373764552308, - "grad_norm": 0.4237087070941925, - "learning_rate": 9.861892583120204e-06, - "loss": 0.2408, - "step": 81 - }, - { - "epoch": 0.01398087220608999, - "grad_norm": 0.2752889394760132, - "learning_rate": 9.860187553282184e-06, - "loss": 0.2159, - "step": 82 - }, - { - "epoch": 0.014151370647627674, - "grad_norm": 0.3237917423248291, - "learning_rate": 9.85848252344416e-06, - "loss": 0.2191, - "step": 83 - }, - { - "epoch": 0.014321869089165357, - "grad_norm": 0.41194331645965576, - "learning_rate": 9.856777493606138e-06, - "loss": 0.2122, - "step": 84 - }, - { - "epoch": 0.01449236753070304, - "grad_norm": 0.7924314141273499, - "learning_rate": 9.855072463768118e-06, - "loss": 0.1722, - "step": 85 - }, - { - "epoch": 0.014662865972240723, - "grad_norm": 1.0692120790481567, - "learning_rate": 9.853367433930094e-06, - "loss": 0.137, - "step": 86 - }, - { - "epoch": 0.014833364413778406, - "grad_norm": 0.615675687789917, - "learning_rate": 9.851662404092072e-06, - "loss": 0.1637, - "step": 87 - }, - { - "epoch": 0.015003862855316089, - "grad_norm": 0.23946630954742432, - "learning_rate": 9.84995737425405e-06, - "loss": 0.2113, - "step": 88 - }, - { - "epoch": 0.015174361296853772, - "grad_norm": 0.40212199091911316, - "learning_rate": 9.848252344416028e-06, - "loss": 0.2339, - "step": 89 - }, - { - "epoch": 0.015344859738391455, - "grad_norm": 0.3420916497707367, - "learning_rate": 9.846547314578006e-06, - "loss": 0.145, - "step": 90 - }, - { - "epoch": 0.015515358179929136, - "grad_norm": 0.19429825246334076, - "learning_rate": 9.844842284739984e-06, - "loss": 0.1709, - "step": 91 - }, - { - "epoch": 0.01568585662146682, - "grad_norm": 0.3276514708995819, - "learning_rate": 9.843137254901962e-06, - "loss": 0.1178, - "step": 92 - }, - { - "epoch": 0.015856355063004503, - "grad_norm": 0.7621896862983704, - "learning_rate": 9.84143222506394e-06, - "loss": 0.2462, - "step": 93 - }, - { - "epoch": 0.016026853504542186, - "grad_norm": 0.18447129428386688, - "learning_rate": 9.839727195225918e-06, - "loss": 0.1408, - "step": 94 - }, - { - "epoch": 0.01619735194607987, - "grad_norm": 0.42043575644493103, - "learning_rate": 9.838022165387896e-06, - "loss": 0.1872, - "step": 95 - }, - { - "epoch": 0.016367850387617552, - "grad_norm": 0.23067864775657654, - "learning_rate": 9.836317135549874e-06, - "loss": 0.1475, - "step": 96 - }, - { - "epoch": 0.016538348829155232, - "grad_norm": 0.22350552678108215, - "learning_rate": 9.83461210571185e-06, - "loss": 0.1387, - "step": 97 - }, - { - "epoch": 0.016708847270692915, - "grad_norm": 0.18144337832927704, - "learning_rate": 9.83290707587383e-06, - "loss": 0.1316, - "step": 98 - }, - { - "epoch": 0.016879345712230598, - "grad_norm": 0.4881460666656494, - "learning_rate": 9.831202046035806e-06, - "loss": 0.1929, - "step": 99 - }, - { - "epoch": 0.01704984415376828, - "grad_norm": 0.6317399740219116, - "learning_rate": 9.829497016197784e-06, - "loss": 0.2182, - "step": 100 - }, - { - "epoch": 0.01704984415376828, - "eval_f1_score": 0.0, - "eval_loss": 0.22505781054496765, - "eval_runtime": 183.3482, - "eval_samples_per_second": 54.541, - "eval_steps_per_second": 3.409, - "step": 100 - }, - { - "epoch": 0.017220342595305964, - "grad_norm": 0.2174391895532608, - "learning_rate": 9.827791986359763e-06, - "loss": 0.1484, - "step": 101 - }, - { - "epoch": 0.017390841036843647, - "grad_norm": 0.21559378504753113, - "learning_rate": 9.82608695652174e-06, - "loss": 0.1462, - "step": 102 - }, - { - "epoch": 0.01756133947838133, - "grad_norm": 0.38197803497314453, - "learning_rate": 9.824381926683718e-06, - "loss": 0.1137, - "step": 103 - }, - { - "epoch": 0.017731837919919013, - "grad_norm": 0.609444260597229, - "learning_rate": 9.822676896845696e-06, - "loss": 0.2368, - "step": 104 - }, - { - "epoch": 0.017902336361456696, - "grad_norm": 0.2939724922180176, - "learning_rate": 9.820971867007673e-06, - "loss": 0.1617, - "step": 105 - }, - { - "epoch": 0.01807283480299438, - "grad_norm": 0.5116530656814575, - "learning_rate": 9.819266837169651e-06, - "loss": 0.1289, - "step": 106 - }, - { - "epoch": 0.01824333324453206, - "grad_norm": 0.6672798991203308, - "learning_rate": 9.81756180733163e-06, - "loss": 0.2506, - "step": 107 - }, - { - "epoch": 0.018413831686069745, - "grad_norm": 0.6284186840057373, - "learning_rate": 9.815856777493606e-06, - "loss": 0.0931, - "step": 108 - }, - { - "epoch": 0.018584330127607428, - "grad_norm": 0.38140490651130676, - "learning_rate": 9.814151747655585e-06, - "loss": 0.1199, - "step": 109 - }, - { - "epoch": 0.01875482856914511, - "grad_norm": 0.2449476420879364, - "learning_rate": 9.812446717817563e-06, - "loss": 0.1697, - "step": 110 - }, - { - "epoch": 0.018925327010682794, - "grad_norm": 0.4863952100276947, - "learning_rate": 9.81074168797954e-06, - "loss": 0.2203, - "step": 111 - }, - { - "epoch": 0.019095825452220477, - "grad_norm": 1.2018239498138428, - "learning_rate": 9.809036658141519e-06, - "loss": 0.3048, - "step": 112 - }, - { - "epoch": 0.01926632389375816, - "grad_norm": 0.3844000995159149, - "learning_rate": 9.807331628303495e-06, - "loss": 0.1784, - "step": 113 - }, - { - "epoch": 0.019436822335295843, - "grad_norm": 0.2755303680896759, - "learning_rate": 9.805626598465475e-06, - "loss": 0.1875, - "step": 114 - }, - { - "epoch": 0.019607320776833526, - "grad_norm": 0.3493352234363556, - "learning_rate": 9.803921568627451e-06, - "loss": 0.1688, - "step": 115 - }, - { - "epoch": 0.01977781921837121, - "grad_norm": 0.6989247798919678, - "learning_rate": 9.80221653878943e-06, - "loss": 0.1311, - "step": 116 - }, - { - "epoch": 0.01994831765990889, - "grad_norm": 0.3620999753475189, - "learning_rate": 9.800511508951407e-06, - "loss": 0.1595, - "step": 117 - }, - { - "epoch": 0.02011881610144657, - "grad_norm": 0.36481228470802307, - "learning_rate": 9.798806479113385e-06, - "loss": 0.1641, - "step": 118 - }, - { - "epoch": 0.020289314542984254, - "grad_norm": 0.7398555278778076, - "learning_rate": 9.797101449275363e-06, - "loss": 0.1037, - "step": 119 - }, - { - "epoch": 0.020459812984521937, - "grad_norm": 0.5512336492538452, - "learning_rate": 9.795396419437341e-06, - "loss": 0.2342, - "step": 120 - }, - { - "epoch": 0.02063031142605962, - "grad_norm": 0.25768429040908813, - "learning_rate": 9.793691389599319e-06, - "loss": 0.1686, - "step": 121 - }, - { - "epoch": 0.020800809867597303, - "grad_norm": 0.36216527223587036, - "learning_rate": 9.791986359761297e-06, - "loss": 0.1923, - "step": 122 - }, - { - "epoch": 0.020971308309134986, - "grad_norm": 0.26241618394851685, - "learning_rate": 9.790281329923275e-06, - "loss": 0.1424, - "step": 123 - }, - { - "epoch": 0.02114180675067267, - "grad_norm": 0.25239989161491394, - "learning_rate": 9.788576300085251e-06, - "loss": 0.1437, - "step": 124 - }, - { - "epoch": 0.021312305192210352, - "grad_norm": 0.3067416548728943, - "learning_rate": 9.78687127024723e-06, - "loss": 0.1787, - "step": 125 - }, - { - "epoch": 0.021482803633748035, - "grad_norm": 0.7511736750602722, - "learning_rate": 9.785166240409209e-06, - "loss": 0.2259, - "step": 126 - }, - { - "epoch": 0.021653302075285718, - "grad_norm": 0.5204250812530518, - "learning_rate": 9.783461210571185e-06, - "loss": 0.1876, - "step": 127 - }, - { - "epoch": 0.0218238005168234, - "grad_norm": 0.5185781121253967, - "learning_rate": 9.781756180733165e-06, - "loss": 0.0949, - "step": 128 - }, - { - "epoch": 0.021994298958361084, - "grad_norm": 0.5859097838401794, - "learning_rate": 9.78005115089514e-06, - "loss": 0.1002, - "step": 129 - }, - { - "epoch": 0.022164797399898767, - "grad_norm": 0.3053465187549591, - "learning_rate": 9.778346121057119e-06, - "loss": 0.168, - "step": 130 - }, - { - "epoch": 0.02233529584143645, - "grad_norm": 0.2791380286216736, - "learning_rate": 9.776641091219097e-06, - "loss": 0.1916, - "step": 131 - }, - { - "epoch": 0.022505794282974133, - "grad_norm": 0.5178912878036499, - "learning_rate": 9.774936061381075e-06, - "loss": 0.2206, - "step": 132 - }, - { - "epoch": 0.022676292724511816, - "grad_norm": 0.594906210899353, - "learning_rate": 9.773231031543053e-06, - "loss": 0.08, - "step": 133 - }, - { - "epoch": 0.0228467911660495, - "grad_norm": 0.789562463760376, - "learning_rate": 9.77152600170503e-06, - "loss": 0.2564, - "step": 134 - }, - { - "epoch": 0.023017289607587182, - "grad_norm": 0.8185597062110901, - "learning_rate": 9.769820971867009e-06, - "loss": 0.2694, - "step": 135 - }, - { - "epoch": 0.023187788049124865, - "grad_norm": 0.5645036101341248, - "learning_rate": 9.768115942028986e-06, - "loss": 0.2195, - "step": 136 - }, - { - "epoch": 0.023358286490662548, - "grad_norm": 0.3041114807128906, - "learning_rate": 9.766410912190964e-06, - "loss": 0.1951, - "step": 137 - }, - { - "epoch": 0.02352878493220023, - "grad_norm": 0.6470907926559448, - "learning_rate": 9.764705882352942e-06, - "loss": 0.2616, - "step": 138 - }, - { - "epoch": 0.02369928337373791, - "grad_norm": 0.5063560009002686, - "learning_rate": 9.76300085251492e-06, - "loss": 0.257, - "step": 139 - }, - { - "epoch": 0.023869781815275593, - "grad_norm": 0.5629501342773438, - "learning_rate": 9.761295822676897e-06, - "loss": 0.201, - "step": 140 - }, - { - "epoch": 0.024040280256813276, - "grad_norm": 1.2421547174453735, - "learning_rate": 9.759590792838876e-06, - "loss": 0.124, - "step": 141 - }, - { - "epoch": 0.02421077869835096, - "grad_norm": 0.9891913533210754, - "learning_rate": 9.757885763000852e-06, - "loss": 0.1581, - "step": 142 - }, - { - "epoch": 0.024381277139888642, - "grad_norm": 0.8758956789970398, - "learning_rate": 9.75618073316283e-06, - "loss": 0.1515, - "step": 143 - }, - { - "epoch": 0.024551775581426325, - "grad_norm": 0.6748665571212769, - "learning_rate": 9.75447570332481e-06, - "loss": 0.1611, - "step": 144 - }, - { - "epoch": 0.024722274022964008, - "grad_norm": 0.4170930087566376, - "learning_rate": 9.752770673486786e-06, - "loss": 0.2401, - "step": 145 - }, - { - "epoch": 0.02489277246450169, - "grad_norm": 0.30556097626686096, - "learning_rate": 9.751065643648764e-06, - "loss": 0.1555, - "step": 146 - }, - { - "epoch": 0.025063270906039374, - "grad_norm": 0.3912142813205719, - "learning_rate": 9.749360613810742e-06, - "loss": 0.2025, - "step": 147 - }, - { - "epoch": 0.025233769347577057, - "grad_norm": 0.5749161839485168, - "learning_rate": 9.74765558397272e-06, - "loss": 0.2275, - "step": 148 - }, - { - "epoch": 0.02540426778911474, - "grad_norm": 0.6735514402389526, - "learning_rate": 9.745950554134698e-06, - "loss": 0.2427, - "step": 149 - }, - { - "epoch": 0.025574766230652423, - "grad_norm": 0.33073481917381287, - "learning_rate": 9.744245524296676e-06, - "loss": 0.1338, - "step": 150 - }, - { - "epoch": 0.025745264672190106, - "grad_norm": 0.2953212261199951, - "learning_rate": 9.742540494458654e-06, - "loss": 0.1467, - "step": 151 - }, - { - "epoch": 0.02591576311372779, - "grad_norm": 0.5662497282028198, - "learning_rate": 9.740835464620632e-06, - "loss": 0.2264, - "step": 152 - }, - { - "epoch": 0.026086261555265472, - "grad_norm": 0.5139706134796143, - "learning_rate": 9.73913043478261e-06, - "loss": 0.2136, - "step": 153 - }, - { - "epoch": 0.026256759996803155, - "grad_norm": 0.35498949885368347, - "learning_rate": 9.737425404944588e-06, - "loss": 0.1618, - "step": 154 - }, - { - "epoch": 0.026427258438340838, - "grad_norm": 0.5710799694061279, - "learning_rate": 9.735720375106566e-06, - "loss": 0.2465, - "step": 155 - }, - { - "epoch": 0.02659775687987852, - "grad_norm": 0.4561382830142975, - "learning_rate": 9.734015345268542e-06, - "loss": 0.1765, - "step": 156 - }, - { - "epoch": 0.026768255321416204, - "grad_norm": 0.7372007369995117, - "learning_rate": 9.732310315430522e-06, - "loss": 0.0938, - "step": 157 - }, - { - "epoch": 0.026938753762953887, - "grad_norm": 0.45218053460121155, - "learning_rate": 9.730605285592498e-06, - "loss": 0.2387, - "step": 158 - }, - { - "epoch": 0.02710925220449157, - "grad_norm": 0.6522502899169922, - "learning_rate": 9.728900255754476e-06, - "loss": 0.1163, - "step": 159 - }, - { - "epoch": 0.027279750646029253, - "grad_norm": 0.4411844313144684, - "learning_rate": 9.727195225916456e-06, - "loss": 0.138, - "step": 160 - }, - { - "epoch": 0.027450249087566932, - "grad_norm": 0.3477385938167572, - "learning_rate": 9.725490196078432e-06, - "loss": 0.1822, - "step": 161 - }, - { - "epoch": 0.027620747529104615, - "grad_norm": 0.300945907831192, - "learning_rate": 9.72378516624041e-06, - "loss": 0.1916, - "step": 162 - }, - { - "epoch": 0.0277912459706423, - "grad_norm": 0.37067076563835144, - "learning_rate": 9.722080136402388e-06, - "loss": 0.1904, - "step": 163 - }, - { - "epoch": 0.02796174441217998, - "grad_norm": 0.5103992223739624, - "learning_rate": 9.720375106564366e-06, - "loss": 0.2164, - "step": 164 - }, - { - "epoch": 0.028132242853717664, - "grad_norm": 0.7241944074630737, - "learning_rate": 9.718670076726344e-06, - "loss": 0.2236, - "step": 165 - }, - { - "epoch": 0.028302741295255347, - "grad_norm": 0.6501290202140808, - "learning_rate": 9.716965046888322e-06, - "loss": 0.0946, - "step": 166 - }, - { - "epoch": 0.02847323973679303, - "grad_norm": 0.3597775101661682, - "learning_rate": 9.7152600170503e-06, - "loss": 0.1611, - "step": 167 - }, - { - "epoch": 0.028643738178330713, - "grad_norm": 0.563456654548645, - "learning_rate": 9.713554987212277e-06, - "loss": 0.2554, - "step": 168 - }, - { - "epoch": 0.028814236619868396, - "grad_norm": 0.3869915008544922, - "learning_rate": 9.711849957374255e-06, - "loss": 0.2054, - "step": 169 - }, - { - "epoch": 0.02898473506140608, - "grad_norm": 0.45588812232017517, - "learning_rate": 9.710144927536233e-06, - "loss": 0.2157, - "step": 170 - }, - { - "epoch": 0.029155233502943762, - "grad_norm": 0.32176175713539124, - "learning_rate": 9.708439897698211e-06, - "loss": 0.1984, - "step": 171 - }, - { - "epoch": 0.029325731944481445, - "grad_norm": 0.5632541179656982, - "learning_rate": 9.706734867860187e-06, - "loss": 0.1409, - "step": 172 - }, - { - "epoch": 0.029496230386019128, - "grad_norm": 0.3443940281867981, - "learning_rate": 9.705029838022167e-06, - "loss": 0.1957, - "step": 173 - }, - { - "epoch": 0.02966672882755681, - "grad_norm": 0.5562809109687805, - "learning_rate": 9.703324808184143e-06, - "loss": 0.2574, - "step": 174 - }, - { - "epoch": 0.029837227269094494, - "grad_norm": 0.35625970363616943, - "learning_rate": 9.701619778346121e-06, - "loss": 0.2414, - "step": 175 - }, - { - "epoch": 0.030007725710632177, - "grad_norm": 0.32852473855018616, - "learning_rate": 9.699914748508101e-06, - "loss": 0.2019, - "step": 176 - }, - { - "epoch": 0.03017822415216986, - "grad_norm": 0.6662753224372864, - "learning_rate": 9.698209718670077e-06, - "loss": 0.2822, - "step": 177 - }, - { - "epoch": 0.030348722593707543, - "grad_norm": 0.5043244361877441, - "learning_rate": 9.696504688832055e-06, - "loss": 0.208, - "step": 178 - }, - { - "epoch": 0.030519221035245226, - "grad_norm": 0.542964518070221, - "learning_rate": 9.694799658994033e-06, - "loss": 0.2004, - "step": 179 - }, - { - "epoch": 0.03068971947678291, - "grad_norm": 0.36917969584465027, - "learning_rate": 9.693094629156011e-06, - "loss": 0.22, - "step": 180 - }, - { - "epoch": 0.030860217918320592, - "grad_norm": 0.5634118914604187, - "learning_rate": 9.691389599317989e-06, - "loss": 0.2737, - "step": 181 - }, - { - "epoch": 0.03103071635985827, - "grad_norm": 0.3653755187988281, - "learning_rate": 9.689684569479967e-06, - "loss": 0.2528, - "step": 182 - }, - { - "epoch": 0.031201214801395954, - "grad_norm": 0.5282630920410156, - "learning_rate": 9.687979539641943e-06, - "loss": 0.1684, - "step": 183 - }, - { - "epoch": 0.03137171324293364, - "grad_norm": 0.5614996552467346, - "learning_rate": 9.686274509803923e-06, - "loss": 0.1646, - "step": 184 - }, - { - "epoch": 0.03154221168447132, - "grad_norm": 0.6981064677238464, - "learning_rate": 9.6845694799659e-06, - "loss": 0.15, - "step": 185 - }, - { - "epoch": 0.03171271012600901, - "grad_norm": 0.5235413312911987, - "learning_rate": 9.682864450127877e-06, - "loss": 0.2261, - "step": 186 - }, - { - "epoch": 0.031883208567546686, - "grad_norm": 0.4519073963165283, - "learning_rate": 9.681159420289857e-06, - "loss": 0.215, - "step": 187 - }, - { - "epoch": 0.03205370700908437, - "grad_norm": 0.34490248560905457, - "learning_rate": 9.679454390451833e-06, - "loss": 0.1827, - "step": 188 - }, - { - "epoch": 0.03222420545062205, - "grad_norm": 0.5453905463218689, - "learning_rate": 9.677749360613813e-06, - "loss": 0.1213, - "step": 189 - }, - { - "epoch": 0.03239470389215974, - "grad_norm": 0.42571887373924255, - "learning_rate": 9.676044330775789e-06, - "loss": 0.1913, - "step": 190 - }, - { - "epoch": 0.03256520233369742, - "grad_norm": 0.6918254494667053, - "learning_rate": 9.674339300937767e-06, - "loss": 0.2399, - "step": 191 - }, - { - "epoch": 0.032735700775235105, - "grad_norm": 0.3514208495616913, - "learning_rate": 9.672634271099745e-06, - "loss": 0.171, - "step": 192 - }, - { - "epoch": 0.032906199216772784, - "grad_norm": 0.3752657473087311, - "learning_rate": 9.670929241261723e-06, - "loss": 0.1189, - "step": 193 - }, - { - "epoch": 0.033076697658310464, - "grad_norm": 0.7846695780754089, - "learning_rate": 9.6692242114237e-06, - "loss": 0.2253, - "step": 194 - }, - { - "epoch": 0.03324719609984815, - "grad_norm": 0.5222557187080383, - "learning_rate": 9.667519181585679e-06, - "loss": 0.1143, - "step": 195 - }, - { - "epoch": 0.03341769454138583, - "grad_norm": 0.5478008389472961, - "learning_rate": 9.665814151747657e-06, - "loss": 0.1919, - "step": 196 - }, - { - "epoch": 0.033588192982923516, - "grad_norm": 0.8064622282981873, - "learning_rate": 9.664109121909634e-06, - "loss": 0.2858, - "step": 197 - }, - { - "epoch": 0.033758691424461196, - "grad_norm": 0.33207300305366516, - "learning_rate": 9.662404092071612e-06, - "loss": 0.182, - "step": 198 - }, - { - "epoch": 0.03392918986599888, - "grad_norm": 0.5906654596328735, - "learning_rate": 9.660699062233589e-06, - "loss": 0.1758, - "step": 199 - }, - { - "epoch": 0.03409968830753656, - "grad_norm": 0.48277583718299866, - "learning_rate": 9.658994032395568e-06, - "loss": 0.2225, - "step": 200 - }, - { - "epoch": 0.03409968830753656, - "eval_f1_score": 0.0, - "eval_loss": 0.21619169414043427, - "eval_runtime": 183.3012, - "eval_samples_per_second": 54.555, - "eval_steps_per_second": 3.41, - "step": 200 - }, - { - "epoch": 0.03427018674907425, - "grad_norm": 0.425251305103302, - "learning_rate": 9.657289002557546e-06, - "loss": 0.2334, - "step": 201 - }, - { - "epoch": 0.03444068519061193, - "grad_norm": 0.6187649965286255, - "learning_rate": 9.655583972719523e-06, - "loss": 0.2057, - "step": 202 - }, - { - "epoch": 0.034611183632149614, - "grad_norm": 0.582363486289978, - "learning_rate": 9.653878942881502e-06, - "loss": 0.1884, - "step": 203 - }, - { - "epoch": 0.034781682073687294, - "grad_norm": 0.5536232590675354, - "learning_rate": 9.652173913043478e-06, - "loss": 0.1723, - "step": 204 - }, - { - "epoch": 0.03495218051522498, - "grad_norm": 0.43162399530410767, - "learning_rate": 9.650468883205456e-06, - "loss": 0.2154, - "step": 205 - }, - { - "epoch": 0.03512267895676266, - "grad_norm": 0.887392520904541, - "learning_rate": 9.648763853367434e-06, - "loss": 0.2948, - "step": 206 - }, - { - "epoch": 0.035293177398300346, - "grad_norm": 0.3913290500640869, - "learning_rate": 9.647058823529412e-06, - "loss": 0.1446, - "step": 207 - }, - { - "epoch": 0.035463675839838026, - "grad_norm": 0.32068392634391785, - "learning_rate": 9.64535379369139e-06, - "loss": 0.1863, - "step": 208 - }, - { - "epoch": 0.03563417428137571, - "grad_norm": 0.4926113784313202, - "learning_rate": 9.643648763853368e-06, - "loss": 0.2029, - "step": 209 - }, - { - "epoch": 0.03580467272291339, - "grad_norm": 0.6515610218048096, - "learning_rate": 9.641943734015346e-06, - "loss": 0.2609, - "step": 210 - }, - { - "epoch": 0.03597517116445108, - "grad_norm": 0.3915189802646637, - "learning_rate": 9.640238704177324e-06, - "loss": 0.1486, - "step": 211 - }, - { - "epoch": 0.03614566960598876, - "grad_norm": 0.8154141306877136, - "learning_rate": 9.638533674339302e-06, - "loss": 0.2535, - "step": 212 - }, - { - "epoch": 0.036316168047526444, - "grad_norm": 0.4246624708175659, - "learning_rate": 9.63682864450128e-06, - "loss": 0.1773, - "step": 213 - }, - { - "epoch": 0.03648666648906412, - "grad_norm": 0.37893757224082947, - "learning_rate": 9.635123614663258e-06, - "loss": 0.2304, - "step": 214 - }, - { - "epoch": 0.0366571649306018, - "grad_norm": 0.4338793158531189, - "learning_rate": 9.633418584825234e-06, - "loss": 0.2138, - "step": 215 - }, - { - "epoch": 0.03682766337213949, - "grad_norm": 1.151077151298523, - "learning_rate": 9.631713554987214e-06, - "loss": 0.1226, - "step": 216 - }, - { - "epoch": 0.03699816181367717, - "grad_norm": 0.7071660161018372, - "learning_rate": 9.63000852514919e-06, - "loss": 0.2902, - "step": 217 - }, - { - "epoch": 0.037168660255214855, - "grad_norm": 0.8551703095436096, - "learning_rate": 9.628303495311168e-06, - "loss": 0.1265, - "step": 218 - }, - { - "epoch": 0.037339158696752535, - "grad_norm": 0.6096398234367371, - "learning_rate": 9.626598465473148e-06, - "loss": 0.1407, - "step": 219 - }, - { - "epoch": 0.03750965713829022, - "grad_norm": 0.6498945355415344, - "learning_rate": 9.624893435635124e-06, - "loss": 0.2396, - "step": 220 - }, - { - "epoch": 0.0376801555798279, - "grad_norm": 0.8286121487617493, - "learning_rate": 9.623188405797102e-06, - "loss": 0.2918, - "step": 221 - }, - { - "epoch": 0.03785065402136559, - "grad_norm": 1.0903311967849731, - "learning_rate": 9.62148337595908e-06, - "loss": 0.2785, - "step": 222 - }, - { - "epoch": 0.03802115246290327, - "grad_norm": 0.5139489769935608, - "learning_rate": 9.619778346121058e-06, - "loss": 0.1452, - "step": 223 - }, - { - "epoch": 0.03819165090444095, - "grad_norm": 0.5093590617179871, - "learning_rate": 9.618073316283036e-06, - "loss": 0.1903, - "step": 224 - }, - { - "epoch": 0.03836214934597863, - "grad_norm": 0.548340916633606, - "learning_rate": 9.616368286445014e-06, - "loss": 0.1516, - "step": 225 - }, - { - "epoch": 0.03853264778751632, - "grad_norm": 0.46318405866622925, - "learning_rate": 9.614663256606992e-06, - "loss": 0.202, - "step": 226 - }, - { - "epoch": 0.038703146229054, - "grad_norm": 0.5825653076171875, - "learning_rate": 9.61295822676897e-06, - "loss": 0.2477, - "step": 227 - }, - { - "epoch": 0.038873644670591685, - "grad_norm": 0.9491086602210999, - "learning_rate": 9.611253196930947e-06, - "loss": 0.1244, - "step": 228 - }, - { - "epoch": 0.039044143112129365, - "grad_norm": 0.6441144943237305, - "learning_rate": 9.609548167092925e-06, - "loss": 0.1457, - "step": 229 - }, - { - "epoch": 0.03921464155366705, - "grad_norm": 0.8043190836906433, - "learning_rate": 9.607843137254903e-06, - "loss": 0.1355, - "step": 230 - }, - { - "epoch": 0.03938513999520473, - "grad_norm": 0.5751570463180542, - "learning_rate": 9.60613810741688e-06, - "loss": 0.2567, - "step": 231 - }, - { - "epoch": 0.03955563843674242, - "grad_norm": 0.4224618375301361, - "learning_rate": 9.60443307757886e-06, - "loss": 0.187, - "step": 232 - }, - { - "epoch": 0.0397261368782801, - "grad_norm": 0.3785933256149292, - "learning_rate": 9.602728047740836e-06, - "loss": 0.2013, - "step": 233 - }, - { - "epoch": 0.03989663531981778, - "grad_norm": 0.4648973345756531, - "learning_rate": 9.601023017902813e-06, - "loss": 0.1626, - "step": 234 - }, - { - "epoch": 0.04006713376135546, - "grad_norm": 0.5512021780014038, - "learning_rate": 9.599317988064793e-06, - "loss": 0.214, - "step": 235 - }, - { - "epoch": 0.04023763220289314, - "grad_norm": 0.7625359296798706, - "learning_rate": 9.59761295822677e-06, - "loss": 0.1994, - "step": 236 - }, - { - "epoch": 0.04040813064443083, - "grad_norm": 0.34150898456573486, - "learning_rate": 9.595907928388747e-06, - "loss": 0.1718, - "step": 237 - }, - { - "epoch": 0.04057862908596851, - "grad_norm": 0.3384284973144531, - "learning_rate": 9.594202898550725e-06, - "loss": 0.144, - "step": 238 - }, - { - "epoch": 0.040749127527506195, - "grad_norm": 0.29428425431251526, - "learning_rate": 9.592497868712703e-06, - "loss": 0.167, - "step": 239 - }, - { - "epoch": 0.040919625969043874, - "grad_norm": 1.1551722288131714, - "learning_rate": 9.590792838874681e-06, - "loss": 0.338, - "step": 240 - }, - { - "epoch": 0.04109012441058156, - "grad_norm": 0.37451595067977905, - "learning_rate": 9.589087809036659e-06, - "loss": 0.2053, - "step": 241 - }, - { - "epoch": 0.04126062285211924, - "grad_norm": 0.3323061168193817, - "learning_rate": 9.587382779198637e-06, - "loss": 0.2096, - "step": 242 - }, - { - "epoch": 0.041431121293656926, - "grad_norm": 0.36669957637786865, - "learning_rate": 9.585677749360615e-06, - "loss": 0.1777, - "step": 243 - }, - { - "epoch": 0.041601619735194606, - "grad_norm": 0.5516036152839661, - "learning_rate": 9.583972719522593e-06, - "loss": 0.1608, - "step": 244 - }, - { - "epoch": 0.04177211817673229, - "grad_norm": 0.47251853346824646, - "learning_rate": 9.582267689684571e-06, - "loss": 0.1808, - "step": 245 - }, - { - "epoch": 0.04194261661826997, - "grad_norm": 0.49306562542915344, - "learning_rate": 9.580562659846549e-06, - "loss": 0.2067, - "step": 246 - }, - { - "epoch": 0.04211311505980766, - "grad_norm": 0.5047063231468201, - "learning_rate": 9.578857630008525e-06, - "loss": 0.2037, - "step": 247 - }, - { - "epoch": 0.04228361350134534, - "grad_norm": 0.4270981550216675, - "learning_rate": 9.577152600170505e-06, - "loss": 0.1763, - "step": 248 - }, - { - "epoch": 0.042454111942883024, - "grad_norm": 0.7507210969924927, - "learning_rate": 9.575447570332481e-06, - "loss": 0.3042, - "step": 249 - }, - { - "epoch": 0.042624610384420704, - "grad_norm": 0.39905881881713867, - "learning_rate": 9.573742540494459e-06, - "loss": 0.226, - "step": 250 - }, - { - "epoch": 0.04279510882595839, - "grad_norm": 0.29342296719551086, - "learning_rate": 9.572037510656439e-06, - "loss": 0.2141, - "step": 251 - }, - { - "epoch": 0.04296560726749607, - "grad_norm": 0.31684985756874084, - "learning_rate": 9.570332480818415e-06, - "loss": 0.2133, - "step": 252 - }, - { - "epoch": 0.043136105709033756, - "grad_norm": 0.4727517366409302, - "learning_rate": 9.568627450980393e-06, - "loss": 0.1621, - "step": 253 - }, - { - "epoch": 0.043306604150571436, - "grad_norm": 0.35550886392593384, - "learning_rate": 9.56692242114237e-06, - "loss": 0.196, - "step": 254 - }, - { - "epoch": 0.04347710259210912, - "grad_norm": 0.31078484654426575, - "learning_rate": 9.565217391304349e-06, - "loss": 0.1974, - "step": 255 - }, - { - "epoch": 0.0436476010336468, - "grad_norm": 0.31537219882011414, - "learning_rate": 9.563512361466327e-06, - "loss": 0.1756, - "step": 256 - }, - { - "epoch": 0.04381809947518448, - "grad_norm": 0.44731876254081726, - "learning_rate": 9.561807331628305e-06, - "loss": 0.1415, - "step": 257 - }, - { - "epoch": 0.04398859791672217, - "grad_norm": 0.2439146786928177, - "learning_rate": 9.56010230179028e-06, - "loss": 0.1922, - "step": 258 - }, - { - "epoch": 0.04415909635825985, - "grad_norm": 0.3228803277015686, - "learning_rate": 9.55839727195226e-06, - "loss": 0.167, - "step": 259 - }, - { - "epoch": 0.044329594799797534, - "grad_norm": 0.34834024310112, - "learning_rate": 9.556692242114238e-06, - "loss": 0.1194, - "step": 260 - }, - { - "epoch": 0.04450009324133521, - "grad_norm": 0.49005141854286194, - "learning_rate": 9.554987212276215e-06, - "loss": 0.0857, - "step": 261 - }, - { - "epoch": 0.0446705916828729, - "grad_norm": 0.5683344602584839, - "learning_rate": 9.553282182438194e-06, - "loss": 0.2118, - "step": 262 - }, - { - "epoch": 0.04484109012441058, - "grad_norm": 0.3251049220561981, - "learning_rate": 9.55157715260017e-06, - "loss": 0.1616, - "step": 263 - }, - { - "epoch": 0.045011588565948266, - "grad_norm": 0.220848947763443, - "learning_rate": 9.54987212276215e-06, - "loss": 0.149, - "step": 264 - }, - { - "epoch": 0.045182087007485945, - "grad_norm": 0.4781118929386139, - "learning_rate": 9.548167092924126e-06, - "loss": 0.175, - "step": 265 - }, - { - "epoch": 0.04535258544902363, - "grad_norm": 0.8802744150161743, - "learning_rate": 9.546462063086104e-06, - "loss": 0.2548, - "step": 266 - }, - { - "epoch": 0.04552308389056131, - "grad_norm": 1.0413671731948853, - "learning_rate": 9.544757033248082e-06, - "loss": 0.2834, - "step": 267 - }, - { - "epoch": 0.045693582332099, - "grad_norm": 0.687307596206665, - "learning_rate": 9.54305200341006e-06, - "loss": 0.2089, - "step": 268 - }, - { - "epoch": 0.04586408077363668, - "grad_norm": 0.4433498978614807, - "learning_rate": 9.541346973572038e-06, - "loss": 0.195, - "step": 269 - }, - { - "epoch": 0.046034579215174364, - "grad_norm": 0.470577210187912, - "learning_rate": 9.539641943734016e-06, - "loss": 0.1615, - "step": 270 - }, - { - "epoch": 0.04620507765671204, - "grad_norm": 0.3836967945098877, - "learning_rate": 9.537936913895994e-06, - "loss": 0.1634, - "step": 271 - }, - { - "epoch": 0.04637557609824973, - "grad_norm": 0.5679737329483032, - "learning_rate": 9.536231884057972e-06, - "loss": 0.2454, - "step": 272 - }, - { - "epoch": 0.04654607453978741, - "grad_norm": 0.4014706015586853, - "learning_rate": 9.53452685421995e-06, - "loss": 0.2199, - "step": 273 - }, - { - "epoch": 0.046716572981325095, - "grad_norm": 0.8329876065254211, - "learning_rate": 9.532821824381926e-06, - "loss": 0.1728, - "step": 274 - }, - { - "epoch": 0.046887071422862775, - "grad_norm": 1.1290709972381592, - "learning_rate": 9.531116794543906e-06, - "loss": 0.1316, - "step": 275 - }, - { - "epoch": 0.04705756986440046, - "grad_norm": 0.7160772085189819, - "learning_rate": 9.529411764705882e-06, - "loss": 0.1479, - "step": 276 - }, - { - "epoch": 0.04722806830593814, - "grad_norm": 0.5050806403160095, - "learning_rate": 9.52770673486786e-06, - "loss": 0.1707, - "step": 277 - }, - { - "epoch": 0.04739856674747582, - "grad_norm": 0.4004954993724823, - "learning_rate": 9.52600170502984e-06, - "loss": 0.1797, - "step": 278 - }, - { - "epoch": 0.04756906518901351, - "grad_norm": 0.5790531039237976, - "learning_rate": 9.524296675191816e-06, - "loss": 0.2352, - "step": 279 - }, - { - "epoch": 0.047739563630551186, - "grad_norm": 1.0683724880218506, - "learning_rate": 9.522591645353794e-06, - "loss": 0.2977, - "step": 280 - }, - { - "epoch": 0.04791006207208887, - "grad_norm": 0.44837862253189087, - "learning_rate": 9.520886615515772e-06, - "loss": 0.1652, - "step": 281 - }, - { - "epoch": 0.04808056051362655, - "grad_norm": 0.3902139961719513, - "learning_rate": 9.51918158567775e-06, - "loss": 0.1839, - "step": 282 - }, - { - "epoch": 0.04825105895516424, - "grad_norm": 0.8578200936317444, - "learning_rate": 9.517476555839728e-06, - "loss": 0.2493, - "step": 283 - }, - { - "epoch": 0.04842155739670192, - "grad_norm": 0.8971724510192871, - "learning_rate": 9.515771526001706e-06, - "loss": 0.2395, - "step": 284 - }, - { - "epoch": 0.048592055838239605, - "grad_norm": 0.3035443127155304, - "learning_rate": 9.514066496163684e-06, - "loss": 0.1992, - "step": 285 - }, - { - "epoch": 0.048762554279777284, - "grad_norm": 1.0432074069976807, - "learning_rate": 9.512361466325662e-06, - "loss": 0.2962, - "step": 286 - }, - { - "epoch": 0.04893305272131497, - "grad_norm": 0.7007878422737122, - "learning_rate": 9.51065643648764e-06, - "loss": 0.2185, - "step": 287 - }, - { - "epoch": 0.04910355116285265, - "grad_norm": 0.8017603754997253, - "learning_rate": 9.508951406649618e-06, - "loss": 0.2113, - "step": 288 - }, - { - "epoch": 0.04927404960439034, - "grad_norm": 1.4467297792434692, - "learning_rate": 9.507246376811596e-06, - "loss": 0.1355, - "step": 289 - }, - { - "epoch": 0.049444548045928016, - "grad_norm": 1.106168270111084, - "learning_rate": 9.505541346973572e-06, - "loss": 0.1692, - "step": 290 - }, - { - "epoch": 0.0496150464874657, - "grad_norm": 0.5016661882400513, - "learning_rate": 9.503836317135551e-06, - "loss": 0.2279, - "step": 291 - }, - { - "epoch": 0.04978554492900338, - "grad_norm": 0.644110381603241, - "learning_rate": 9.502131287297528e-06, - "loss": 0.1934, - "step": 292 - }, - { - "epoch": 0.04995604337054107, - "grad_norm": 0.49578043818473816, - "learning_rate": 9.500426257459506e-06, - "loss": 0.1911, - "step": 293 - }, - { - "epoch": 0.05012654181207875, - "grad_norm": 0.42051151394844055, - "learning_rate": 9.498721227621485e-06, - "loss": 0.1954, - "step": 294 - }, - { - "epoch": 0.050297040253616435, - "grad_norm": 0.42945224046707153, - "learning_rate": 9.497016197783462e-06, - "loss": 0.1173, - "step": 295 - }, - { - "epoch": 0.050467538695154114, - "grad_norm": 0.6755464673042297, - "learning_rate": 9.49531116794544e-06, - "loss": 0.2033, - "step": 296 - }, - { - "epoch": 0.0506380371366918, - "grad_norm": 0.294484943151474, - "learning_rate": 9.493606138107417e-06, - "loss": 0.1475, - "step": 297 - }, - { - "epoch": 0.05080853557822948, - "grad_norm": 1.1652215719223022, - "learning_rate": 9.491901108269395e-06, - "loss": 0.2815, - "step": 298 - }, - { - "epoch": 0.050979034019767167, - "grad_norm": 0.4456086754798889, - "learning_rate": 9.490196078431373e-06, - "loss": 0.1949, - "step": 299 - }, - { - "epoch": 0.051149532461304846, - "grad_norm": 0.8742743730545044, - "learning_rate": 9.488491048593351e-06, - "loss": 0.2193, - "step": 300 - }, - { - "epoch": 0.051149532461304846, - "eval_f1_score": 0.0, - "eval_loss": 0.21666215360164642, - "eval_runtime": 183.2895, - "eval_samples_per_second": 54.559, - "eval_steps_per_second": 3.41, - "step": 300 - }, - { - "epoch": 0.051320030902842526, - "grad_norm": 0.7776352763175964, - "learning_rate": 9.48678601875533e-06, - "loss": 0.1994, - "step": 301 - }, - { - "epoch": 0.05149052934438021, - "grad_norm": 0.6161231398582458, - "learning_rate": 9.485080988917307e-06, - "loss": 0.1971, - "step": 302 - }, - { - "epoch": 0.05166102778591789, - "grad_norm": 0.4451903700828552, - "learning_rate": 9.483375959079285e-06, - "loss": 0.1922, - "step": 303 - }, - { - "epoch": 0.05183152622745558, - "grad_norm": 0.5707091093063354, - "learning_rate": 9.481670929241263e-06, - "loss": 0.203, - "step": 304 - }, - { - "epoch": 0.05200202466899326, - "grad_norm": 0.5663946866989136, - "learning_rate": 9.479965899403241e-06, - "loss": 0.1883, - "step": 305 - }, - { - "epoch": 0.052172523110530944, - "grad_norm": 0.7128487229347229, - "learning_rate": 9.478260869565217e-06, - "loss": 0.2129, - "step": 306 - }, - { - "epoch": 0.05234302155206862, - "grad_norm": 0.6801848411560059, - "learning_rate": 9.476555839727197e-06, - "loss": 0.226, - "step": 307 - }, - { - "epoch": 0.05251351999360631, - "grad_norm": 0.8383636474609375, - "learning_rate": 9.474850809889173e-06, - "loss": 0.1514, - "step": 308 - }, - { - "epoch": 0.05268401843514399, - "grad_norm": 0.9758368730545044, - "learning_rate": 9.473145780051151e-06, - "loss": 0.0985, - "step": 309 - }, - { - "epoch": 0.052854516876681676, - "grad_norm": 0.9041672945022583, - "learning_rate": 9.47144075021313e-06, - "loss": 0.2796, - "step": 310 - }, - { - "epoch": 0.053025015318219355, - "grad_norm": 0.721714198589325, - "learning_rate": 9.469735720375107e-06, - "loss": 0.2241, - "step": 311 - }, - { - "epoch": 0.05319551375975704, - "grad_norm": 0.6851015686988831, - "learning_rate": 9.468030690537085e-06, - "loss": 0.1626, - "step": 312 - }, - { - "epoch": 0.05336601220129472, - "grad_norm": 0.914460301399231, - "learning_rate": 9.466325660699063e-06, - "loss": 0.2942, - "step": 313 - }, - { - "epoch": 0.05353651064283241, - "grad_norm": 0.42635348439216614, - "learning_rate": 9.46462063086104e-06, - "loss": 0.1516, - "step": 314 - }, - { - "epoch": 0.05370700908437009, - "grad_norm": 0.4017740786075592, - "learning_rate": 9.462915601023019e-06, - "loss": 0.1743, - "step": 315 - }, - { - "epoch": 0.053877507525907774, - "grad_norm": 0.3991027772426605, - "learning_rate": 9.461210571184997e-06, - "loss": 0.1942, - "step": 316 - }, - { - "epoch": 0.05404800596744545, - "grad_norm": 0.3740632236003876, - "learning_rate": 9.459505541346975e-06, - "loss": 0.1794, - "step": 317 - }, - { - "epoch": 0.05421850440898314, - "grad_norm": 0.8277780413627625, - "learning_rate": 9.457800511508953e-06, - "loss": 0.207, - "step": 318 - }, - { - "epoch": 0.05438900285052082, - "grad_norm": 0.4441271722316742, - "learning_rate": 9.45609548167093e-06, - "loss": 0.1879, - "step": 319 - }, - { - "epoch": 0.054559501292058506, - "grad_norm": 0.4899628460407257, - "learning_rate": 9.454390451832909e-06, - "loss": 0.195, - "step": 320 - }, - { - "epoch": 0.054729999733596185, - "grad_norm": 0.6418739557266235, - "learning_rate": 9.452685421994886e-06, - "loss": 0.1267, - "step": 321 - }, - { - "epoch": 0.054900498175133865, - "grad_norm": 0.9745388031005859, - "learning_rate": 9.450980392156863e-06, - "loss": 0.2859, - "step": 322 - }, - { - "epoch": 0.05507099661667155, - "grad_norm": 0.8999006748199463, - "learning_rate": 9.449275362318842e-06, - "loss": 0.0801, - "step": 323 - }, - { - "epoch": 0.05524149505820923, - "grad_norm": 0.5319007635116577, - "learning_rate": 9.447570332480819e-06, - "loss": 0.2203, - "step": 324 - }, - { - "epoch": 0.05541199349974692, - "grad_norm": 0.7109606266021729, - "learning_rate": 9.445865302642797e-06, - "loss": 0.2016, - "step": 325 - }, - { - "epoch": 0.0555824919412846, - "grad_norm": 0.3947546184062958, - "learning_rate": 9.444160272804774e-06, - "loss": 0.1562, - "step": 326 - }, - { - "epoch": 0.05575299038282228, - "grad_norm": 0.5612919330596924, - "learning_rate": 9.442455242966752e-06, - "loss": 0.1078, - "step": 327 - }, - { - "epoch": 0.05592348882435996, - "grad_norm": 0.4919300079345703, - "learning_rate": 9.44075021312873e-06, - "loss": 0.1453, - "step": 328 - }, - { - "epoch": 0.05609398726589765, - "grad_norm": 1.1095222234725952, - "learning_rate": 9.439045183290708e-06, - "loss": 0.2045, - "step": 329 - }, - { - "epoch": 0.05626448570743533, - "grad_norm": 0.6746119856834412, - "learning_rate": 9.437340153452686e-06, - "loss": 0.2171, - "step": 330 - }, - { - "epoch": 0.056434984148973015, - "grad_norm": 0.6914483308792114, - "learning_rate": 9.435635123614664e-06, - "loss": 0.208, - "step": 331 - }, - { - "epoch": 0.056605482590510695, - "grad_norm": 0.9006068110466003, - "learning_rate": 9.433930093776642e-06, - "loss": 0.2473, - "step": 332 - }, - { - "epoch": 0.05677598103204838, - "grad_norm": 0.5845337510108948, - "learning_rate": 9.432225063938618e-06, - "loss": 0.153, - "step": 333 - }, - { - "epoch": 0.05694647947358606, - "grad_norm": 1.3863515853881836, - "learning_rate": 9.430520034100598e-06, - "loss": 0.1166, - "step": 334 - }, - { - "epoch": 0.05711697791512375, - "grad_norm": 0.9808059334754944, - "learning_rate": 9.428815004262576e-06, - "loss": 0.1342, - "step": 335 - }, - { - "epoch": 0.057287476356661426, - "grad_norm": 0.6818749904632568, - "learning_rate": 9.427109974424552e-06, - "loss": 0.2624, - "step": 336 - }, - { - "epoch": 0.05745797479819911, - "grad_norm": 1.034446120262146, - "learning_rate": 9.425404944586532e-06, - "loss": 0.2494, - "step": 337 - }, - { - "epoch": 0.05762847323973679, - "grad_norm": 0.5185638666152954, - "learning_rate": 9.423699914748508e-06, - "loss": 0.2122, - "step": 338 - }, - { - "epoch": 0.05779897168127448, - "grad_norm": 0.5386376976966858, - "learning_rate": 9.421994884910488e-06, - "loss": 0.1788, - "step": 339 - }, - { - "epoch": 0.05796947012281216, - "grad_norm": 0.6817571520805359, - "learning_rate": 9.420289855072464e-06, - "loss": 0.2466, - "step": 340 - }, - { - "epoch": 0.058139968564349845, - "grad_norm": 0.5243096947669983, - "learning_rate": 9.418584825234442e-06, - "loss": 0.1567, - "step": 341 - }, - { - "epoch": 0.058310467005887524, - "grad_norm": 0.9190734624862671, - "learning_rate": 9.41687979539642e-06, - "loss": 0.2006, - "step": 342 - }, - { - "epoch": 0.058480965447425204, - "grad_norm": 0.659359872341156, - "learning_rate": 9.415174765558398e-06, - "loss": 0.1342, - "step": 343 - }, - { - "epoch": 0.05865146388896289, - "grad_norm": 0.637925922870636, - "learning_rate": 9.413469735720376e-06, - "loss": 0.1694, - "step": 344 - }, - { - "epoch": 0.05882196233050057, - "grad_norm": 1.2218626737594604, - "learning_rate": 9.411764705882354e-06, - "loss": 0.2691, - "step": 345 - }, - { - "epoch": 0.058992460772038256, - "grad_norm": 0.6028181314468384, - "learning_rate": 9.410059676044332e-06, - "loss": 0.1151, - "step": 346 - }, - { - "epoch": 0.059162959213575936, - "grad_norm": 1.4712268114089966, - "learning_rate": 9.40835464620631e-06, - "loss": 0.2712, - "step": 347 - }, - { - "epoch": 0.05933345765511362, - "grad_norm": 0.5645212531089783, - "learning_rate": 9.406649616368288e-06, - "loss": 0.1584, - "step": 348 - }, - { - "epoch": 0.0595039560966513, - "grad_norm": 0.7886714339256287, - "learning_rate": 9.404944586530264e-06, - "loss": 0.1549, - "step": 349 - }, - { - "epoch": 0.05967445453818899, - "grad_norm": 0.8502355813980103, - "learning_rate": 9.403239556692244e-06, - "loss": 0.095, - "step": 350 - }, - { - "epoch": 0.05984495297972667, - "grad_norm": 0.5809105634689331, - "learning_rate": 9.40153452685422e-06, - "loss": 0.1412, - "step": 351 - }, - { - "epoch": 0.060015451421264354, - "grad_norm": 0.7457136511802673, - "learning_rate": 9.399829497016198e-06, - "loss": 0.2125, - "step": 352 - }, - { - "epoch": 0.060185949862802034, - "grad_norm": 0.8455162048339844, - "learning_rate": 9.398124467178177e-06, - "loss": 0.1107, - "step": 353 - }, - { - "epoch": 0.06035644830433972, - "grad_norm": 0.5305351614952087, - "learning_rate": 9.396419437340154e-06, - "loss": 0.1568, - "step": 354 - }, - { - "epoch": 0.0605269467458774, - "grad_norm": 0.6263880729675293, - "learning_rate": 9.394714407502132e-06, - "loss": 0.1768, - "step": 355 - }, - { - "epoch": 0.060697445187415086, - "grad_norm": 0.6323095560073853, - "learning_rate": 9.39300937766411e-06, - "loss": 0.1563, - "step": 356 - }, - { - "epoch": 0.060867943628952766, - "grad_norm": 0.9921282529830933, - "learning_rate": 9.391304347826087e-06, - "loss": 0.2141, - "step": 357 - }, - { - "epoch": 0.06103844207049045, - "grad_norm": 1.3965635299682617, - "learning_rate": 9.389599317988065e-06, - "loss": 0.1306, - "step": 358 - }, - { - "epoch": 0.06120894051202813, - "grad_norm": 0.6225385069847107, - "learning_rate": 9.387894288150043e-06, - "loss": 0.1945, - "step": 359 - }, - { - "epoch": 0.06137943895356582, - "grad_norm": 1.5763331651687622, - "learning_rate": 9.386189258312021e-06, - "loss": 0.3171, - "step": 360 - }, - { - "epoch": 0.0615499373951035, - "grad_norm": 0.6186936497688293, - "learning_rate": 9.384484228474e-06, - "loss": 0.1883, - "step": 361 - }, - { - "epoch": 0.061720435836641184, - "grad_norm": 0.9674474596977234, - "learning_rate": 9.382779198635977e-06, - "loss": 0.2046, - "step": 362 - }, - { - "epoch": 0.061890934278178864, - "grad_norm": 1.3343751430511475, - "learning_rate": 9.381074168797955e-06, - "loss": 0.1711, - "step": 363 - }, - { - "epoch": 0.06206143271971654, - "grad_norm": 0.9126800298690796, - "learning_rate": 9.379369138959933e-06, - "loss": 0.2874, - "step": 364 - }, - { - "epoch": 0.06223193116125423, - "grad_norm": 0.7982877492904663, - "learning_rate": 9.37766410912191e-06, - "loss": 0.2453, - "step": 365 - }, - { - "epoch": 0.06240242960279191, - "grad_norm": 0.8203127384185791, - "learning_rate": 9.375959079283889e-06, - "loss": 0.1784, - "step": 366 - }, - { - "epoch": 0.0625729280443296, - "grad_norm": 0.7186422944068909, - "learning_rate": 9.374254049445865e-06, - "loss": 0.1632, - "step": 367 - }, - { - "epoch": 0.06274342648586728, - "grad_norm": 0.7624729871749878, - "learning_rate": 9.372549019607843e-06, - "loss": 0.1037, - "step": 368 - }, - { - "epoch": 0.06291392492740495, - "grad_norm": 0.6697052121162415, - "learning_rate": 9.370843989769823e-06, - "loss": 0.16, - "step": 369 - }, - { - "epoch": 0.06308442336894264, - "grad_norm": 1.1724293231964111, - "learning_rate": 9.369138959931799e-06, - "loss": 0.2248, - "step": 370 - }, - { - "epoch": 0.06325492181048033, - "grad_norm": 1.5148735046386719, - "learning_rate": 9.367433930093777e-06, - "loss": 0.2683, - "step": 371 - }, - { - "epoch": 0.06342542025201801, - "grad_norm": 1.0104899406433105, - "learning_rate": 9.365728900255755e-06, - "loss": 0.2333, - "step": 372 - }, - { - "epoch": 0.06359591869355569, - "grad_norm": 0.5941121578216553, - "learning_rate": 9.364023870417733e-06, - "loss": 0.1206, - "step": 373 - }, - { - "epoch": 0.06376641713509337, - "grad_norm": 0.39642173051834106, - "learning_rate": 9.362318840579711e-06, - "loss": 0.1378, - "step": 374 - }, - { - "epoch": 0.06393691557663106, - "grad_norm": 0.6411215662956238, - "learning_rate": 9.360613810741689e-06, - "loss": 0.0772, - "step": 375 - }, - { - "epoch": 0.06410741401816875, - "grad_norm": 1.834947109222412, - "learning_rate": 9.358908780903667e-06, - "loss": 0.3382, - "step": 376 - }, - { - "epoch": 0.06427791245970642, - "grad_norm": 1.322509765625, - "learning_rate": 9.357203751065645e-06, - "loss": 0.2573, - "step": 377 - }, - { - "epoch": 0.0644484109012441, - "grad_norm": 1.019263505935669, - "learning_rate": 9.355498721227623e-06, - "loss": 0.1991, - "step": 378 - }, - { - "epoch": 0.06461890934278179, - "grad_norm": 1.1220760345458984, - "learning_rate": 9.3537936913896e-06, - "loss": 0.2133, - "step": 379 - }, - { - "epoch": 0.06478940778431948, - "grad_norm": 1.7320735454559326, - "learning_rate": 9.352088661551579e-06, - "loss": 0.1466, - "step": 380 - }, - { - "epoch": 0.06495990622585715, - "grad_norm": 1.1254284381866455, - "learning_rate": 9.350383631713555e-06, - "loss": 0.2786, - "step": 381 - }, - { - "epoch": 0.06513040466739484, - "grad_norm": 0.7988210320472717, - "learning_rate": 9.348678601875534e-06, - "loss": 0.2236, - "step": 382 - }, - { - "epoch": 0.06530090310893252, - "grad_norm": 1.4079415798187256, - "learning_rate": 9.34697357203751e-06, - "loss": 0.1681, - "step": 383 - }, - { - "epoch": 0.06547140155047021, - "grad_norm": 0.8565404415130615, - "learning_rate": 9.345268542199489e-06, - "loss": 0.2203, - "step": 384 - }, - { - "epoch": 0.06564189999200788, - "grad_norm": 0.8782585859298706, - "learning_rate": 9.343563512361468e-06, - "loss": 0.1947, - "step": 385 - }, - { - "epoch": 0.06581239843354557, - "grad_norm": 0.6504030227661133, - "learning_rate": 9.341858482523445e-06, - "loss": 0.1231, - "step": 386 - }, - { - "epoch": 0.06598289687508326, - "grad_norm": 0.7962436079978943, - "learning_rate": 9.340153452685423e-06, - "loss": 0.1644, - "step": 387 - }, - { - "epoch": 0.06615339531662093, - "grad_norm": 1.63428795337677, - "learning_rate": 9.3384484228474e-06, - "loss": 0.2766, - "step": 388 - }, - { - "epoch": 0.06632389375815861, - "grad_norm": 0.98063725233078, - "learning_rate": 9.336743393009378e-06, - "loss": 0.1638, - "step": 389 - }, - { - "epoch": 0.0664943921996963, - "grad_norm": 1.217576503753662, - "learning_rate": 9.335038363171356e-06, - "loss": 0.2317, - "step": 390 - }, - { - "epoch": 0.06666489064123399, - "grad_norm": 0.7941992878913879, - "learning_rate": 9.333333333333334e-06, - "loss": 0.1589, - "step": 391 - }, - { - "epoch": 0.06683538908277166, - "grad_norm": 0.8984582424163818, - "learning_rate": 9.331628303495312e-06, - "loss": 0.2159, - "step": 392 - }, - { - "epoch": 0.06700588752430935, - "grad_norm": 0.7985866665840149, - "learning_rate": 9.32992327365729e-06, - "loss": 0.1914, - "step": 393 - }, - { - "epoch": 0.06717638596584703, - "grad_norm": 0.7738232016563416, - "learning_rate": 9.328218243819268e-06, - "loss": 0.2207, - "step": 394 - }, - { - "epoch": 0.06734688440738472, - "grad_norm": 1.0320310592651367, - "learning_rate": 9.326513213981246e-06, - "loss": 0.2657, - "step": 395 - }, - { - "epoch": 0.06751738284892239, - "grad_norm": 0.8637803196907043, - "learning_rate": 9.324808184143224e-06, - "loss": 0.2897, - "step": 396 - }, - { - "epoch": 0.06768788129046008, - "grad_norm": 1.9716813564300537, - "learning_rate": 9.3231031543052e-06, - "loss": 0.1362, - "step": 397 - }, - { - "epoch": 0.06785837973199776, - "grad_norm": 1.3121943473815918, - "learning_rate": 9.32139812446718e-06, - "loss": 0.1508, - "step": 398 - }, - { - "epoch": 0.06802887817353545, - "grad_norm": 1.190568447113037, - "learning_rate": 9.319693094629156e-06, - "loss": 0.1805, - "step": 399 - }, - { - "epoch": 0.06819937661507312, - "grad_norm": 0.6798188090324402, - "learning_rate": 9.317988064791134e-06, - "loss": 0.238, - "step": 400 - }, - { - "epoch": 0.06819937661507312, - "eval_f1_score": 0.007434944237918215, - "eval_loss": 0.19798505306243896, - "eval_runtime": 183.2576, - "eval_samples_per_second": 54.568, - "eval_steps_per_second": 3.411, - "step": 400 - }, - { - "epoch": 0.06836987505661081, - "grad_norm": 0.6206487417221069, - "learning_rate": 9.316283034953112e-06, - "loss": 0.1663, - "step": 401 - }, - { - "epoch": 0.0685403734981485, - "grad_norm": 0.9610775113105774, - "learning_rate": 9.31457800511509e-06, - "loss": 0.2153, - "step": 402 - }, - { - "epoch": 0.06871087193968618, - "grad_norm": 0.4641443192958832, - "learning_rate": 9.312872975277068e-06, - "loss": 0.1053, - "step": 403 - }, - { - "epoch": 0.06888137038122386, - "grad_norm": 0.6299108862876892, - "learning_rate": 9.311167945439046e-06, - "loss": 0.1492, - "step": 404 - }, - { - "epoch": 0.06905186882276154, - "grad_norm": 0.42379632592201233, - "learning_rate": 9.309462915601024e-06, - "loss": 0.0972, - "step": 405 - }, - { - "epoch": 0.06922236726429923, - "grad_norm": 1.3610317707061768, - "learning_rate": 9.307757885763002e-06, - "loss": 0.2578, - "step": 406 - }, - { - "epoch": 0.0693928657058369, - "grad_norm": 0.5917848944664001, - "learning_rate": 9.30605285592498e-06, - "loss": 0.1444, - "step": 407 - }, - { - "epoch": 0.06956336414737459, - "grad_norm": 0.9677045345306396, - "learning_rate": 9.304347826086956e-06, - "loss": 0.2294, - "step": 408 - }, - { - "epoch": 0.06973386258891227, - "grad_norm": 0.594524621963501, - "learning_rate": 9.302642796248936e-06, - "loss": 0.1817, - "step": 409 - }, - { - "epoch": 0.06990436103044996, - "grad_norm": 0.571447491645813, - "learning_rate": 9.300937766410914e-06, - "loss": 0.2015, - "step": 410 - }, - { - "epoch": 0.07007485947198763, - "grad_norm": 0.557389497756958, - "learning_rate": 9.29923273657289e-06, - "loss": 0.1314, - "step": 411 - }, - { - "epoch": 0.07024535791352532, - "grad_norm": 0.853410005569458, - "learning_rate": 9.29752770673487e-06, - "loss": 0.1809, - "step": 412 - }, - { - "epoch": 0.070415856355063, - "grad_norm": 1.0953272581100464, - "learning_rate": 9.295822676896846e-06, - "loss": 0.1144, - "step": 413 - }, - { - "epoch": 0.07058635479660069, - "grad_norm": 0.9774150848388672, - "learning_rate": 9.294117647058824e-06, - "loss": 0.1385, - "step": 414 - }, - { - "epoch": 0.07075685323813836, - "grad_norm": 0.7353179454803467, - "learning_rate": 9.292412617220802e-06, - "loss": 0.243, - "step": 415 - }, - { - "epoch": 0.07092735167967605, - "grad_norm": 0.8262443542480469, - "learning_rate": 9.29070758738278e-06, - "loss": 0.2111, - "step": 416 - }, - { - "epoch": 0.07109785012121374, - "grad_norm": 0.6665271520614624, - "learning_rate": 9.289002557544758e-06, - "loss": 0.1799, - "step": 417 - }, - { - "epoch": 0.07126834856275142, - "grad_norm": 0.6875427961349487, - "learning_rate": 9.287297527706736e-06, - "loss": 0.2445, - "step": 418 - }, - { - "epoch": 0.0714388470042891, - "grad_norm": 0.948556661605835, - "learning_rate": 9.285592497868713e-06, - "loss": 0.1675, - "step": 419 - }, - { - "epoch": 0.07160934544582678, - "grad_norm": 0.89045250415802, - "learning_rate": 9.283887468030691e-06, - "loss": 0.2529, - "step": 420 - }, - { - "epoch": 0.07177984388736447, - "grad_norm": 1.0594686269760132, - "learning_rate": 9.28218243819267e-06, - "loss": 0.1746, - "step": 421 - }, - { - "epoch": 0.07195034232890216, - "grad_norm": 0.8712635040283203, - "learning_rate": 9.280477408354647e-06, - "loss": 0.2074, - "step": 422 - }, - { - "epoch": 0.07212084077043983, - "grad_norm": 0.877993643283844, - "learning_rate": 9.278772378516625e-06, - "loss": 0.1457, - "step": 423 - }, - { - "epoch": 0.07229133921197752, - "grad_norm": 1.2433476448059082, - "learning_rate": 9.277067348678602e-06, - "loss": 0.2237, - "step": 424 - }, - { - "epoch": 0.0724618376535152, - "grad_norm": 1.1479034423828125, - "learning_rate": 9.275362318840581e-06, - "loss": 0.2184, - "step": 425 - }, - { - "epoch": 0.07263233609505289, - "grad_norm": 1.4168163537979126, - "learning_rate": 9.273657289002557e-06, - "loss": 0.1419, - "step": 426 - }, - { - "epoch": 0.07280283453659056, - "grad_norm": 0.6461614966392517, - "learning_rate": 9.271952259164535e-06, - "loss": 0.1649, - "step": 427 - }, - { - "epoch": 0.07297333297812825, - "grad_norm": 1.4802743196487427, - "learning_rate": 9.270247229326515e-06, - "loss": 0.2451, - "step": 428 - }, - { - "epoch": 0.07314383141966593, - "grad_norm": 1.2890205383300781, - "learning_rate": 9.268542199488491e-06, - "loss": 0.1541, - "step": 429 - }, - { - "epoch": 0.0733143298612036, - "grad_norm": 0.863760232925415, - "learning_rate": 9.26683716965047e-06, - "loss": 0.1404, - "step": 430 - }, - { - "epoch": 0.07348482830274129, - "grad_norm": 0.9659500122070312, - "learning_rate": 9.265132139812447e-06, - "loss": 0.2369, - "step": 431 - }, - { - "epoch": 0.07365532674427898, - "grad_norm": 1.5475353002548218, - "learning_rate": 9.263427109974425e-06, - "loss": 0.2093, - "step": 432 - }, - { - "epoch": 0.07382582518581667, - "grad_norm": 1.2611944675445557, - "learning_rate": 9.261722080136403e-06, - "loss": 0.0915, - "step": 433 - }, - { - "epoch": 0.07399632362735434, - "grad_norm": 0.6547388434410095, - "learning_rate": 9.260017050298381e-06, - "loss": 0.1208, - "step": 434 - }, - { - "epoch": 0.07416682206889202, - "grad_norm": 2.6680428981781006, - "learning_rate": 9.258312020460359e-06, - "loss": 0.1942, - "step": 435 - }, - { - "epoch": 0.07433732051042971, - "grad_norm": 0.6818041205406189, - "learning_rate": 9.256606990622337e-06, - "loss": 0.1639, - "step": 436 - }, - { - "epoch": 0.0745078189519674, - "grad_norm": 0.8517784476280212, - "learning_rate": 9.254901960784315e-06, - "loss": 0.1031, - "step": 437 - }, - { - "epoch": 0.07467831739350507, - "grad_norm": 0.6099587082862854, - "learning_rate": 9.253196930946293e-06, - "loss": 0.1316, - "step": 438 - }, - { - "epoch": 0.07484881583504276, - "grad_norm": 1.131976842880249, - "learning_rate": 9.25149190110827e-06, - "loss": 0.1404, - "step": 439 - }, - { - "epoch": 0.07501931427658044, - "grad_norm": 0.7698296308517456, - "learning_rate": 9.249786871270247e-06, - "loss": 0.1313, - "step": 440 - }, - { - "epoch": 0.07518981271811813, - "grad_norm": 1.271114468574524, - "learning_rate": 9.248081841432227e-06, - "loss": 0.1686, - "step": 441 - }, - { - "epoch": 0.0753603111596558, - "grad_norm": 0.8797255754470825, - "learning_rate": 9.246376811594203e-06, - "loss": 0.1667, - "step": 442 - }, - { - "epoch": 0.07553080960119349, - "grad_norm": 0.9120625257492065, - "learning_rate": 9.24467178175618e-06, - "loss": 0.1248, - "step": 443 - }, - { - "epoch": 0.07570130804273117, - "grad_norm": 0.8970147967338562, - "learning_rate": 9.24296675191816e-06, - "loss": 0.1601, - "step": 444 - }, - { - "epoch": 0.07587180648426886, - "grad_norm": 1.5787622928619385, - "learning_rate": 9.241261722080137e-06, - "loss": 0.1132, - "step": 445 - }, - { - "epoch": 0.07604230492580653, - "grad_norm": 0.7926347255706787, - "learning_rate": 9.239556692242115e-06, - "loss": 0.1673, - "step": 446 - }, - { - "epoch": 0.07621280336734422, - "grad_norm": 1.0942426919937134, - "learning_rate": 9.237851662404093e-06, - "loss": 0.1726, - "step": 447 - }, - { - "epoch": 0.0763833018088819, - "grad_norm": 0.7824402451515198, - "learning_rate": 9.23614663256607e-06, - "loss": 0.1421, - "step": 448 - }, - { - "epoch": 0.07655380025041958, - "grad_norm": 1.3141944408416748, - "learning_rate": 9.234441602728049e-06, - "loss": 0.2309, - "step": 449 - }, - { - "epoch": 0.07672429869195727, - "grad_norm": 0.86362624168396, - "learning_rate": 9.232736572890026e-06, - "loss": 0.1154, - "step": 450 - }, - { - "epoch": 0.07689479713349495, - "grad_norm": 0.9413621425628662, - "learning_rate": 9.231031543052004e-06, - "loss": 0.1966, - "step": 451 - }, - { - "epoch": 0.07706529557503264, - "grad_norm": 0.6306756734848022, - "learning_rate": 9.229326513213982e-06, - "loss": 0.1538, - "step": 452 - }, - { - "epoch": 0.07723579401657031, - "grad_norm": 1.079569697380066, - "learning_rate": 9.22762148337596e-06, - "loss": 0.2293, - "step": 453 - }, - { - "epoch": 0.077406292458108, - "grad_norm": 1.0969973802566528, - "learning_rate": 9.225916453537938e-06, - "loss": 0.2179, - "step": 454 - }, - { - "epoch": 0.07757679089964568, - "grad_norm": 1.7774721384048462, - "learning_rate": 9.224211423699916e-06, - "loss": 0.233, - "step": 455 - }, - { - "epoch": 0.07774728934118337, - "grad_norm": 0.6311856508255005, - "learning_rate": 9.222506393861892e-06, - "loss": 0.1904, - "step": 456 - }, - { - "epoch": 0.07791778778272104, - "grad_norm": 1.2663410902023315, - "learning_rate": 9.220801364023872e-06, - "loss": 0.0949, - "step": 457 - }, - { - "epoch": 0.07808828622425873, - "grad_norm": 1.3534469604492188, - "learning_rate": 9.219096334185848e-06, - "loss": 0.1809, - "step": 458 - }, - { - "epoch": 0.07825878466579642, - "grad_norm": 0.8532509803771973, - "learning_rate": 9.217391304347826e-06, - "loss": 0.1482, - "step": 459 - }, - { - "epoch": 0.0784292831073341, - "grad_norm": 0.9541242122650146, - "learning_rate": 9.215686274509804e-06, - "loss": 0.1872, - "step": 460 - }, - { - "epoch": 0.07859978154887177, - "grad_norm": 0.6848520040512085, - "learning_rate": 9.213981244671782e-06, - "loss": 0.1767, - "step": 461 - }, - { - "epoch": 0.07877027999040946, - "grad_norm": 0.6618411540985107, - "learning_rate": 9.21227621483376e-06, - "loss": 0.1802, - "step": 462 - }, - { - "epoch": 0.07894077843194715, - "grad_norm": 0.9855371117591858, - "learning_rate": 9.210571184995738e-06, - "loss": 0.113, - "step": 463 - }, - { - "epoch": 0.07911127687348483, - "grad_norm": 0.9223768711090088, - "learning_rate": 9.208866155157716e-06, - "loss": 0.195, - "step": 464 - }, - { - "epoch": 0.0792817753150225, - "grad_norm": 1.2770650386810303, - "learning_rate": 9.207161125319694e-06, - "loss": 0.2939, - "step": 465 - }, - { - "epoch": 0.0794522737565602, - "grad_norm": 0.7938947081565857, - "learning_rate": 9.205456095481672e-06, - "loss": 0.1819, - "step": 466 - }, - { - "epoch": 0.07962277219809788, - "grad_norm": 0.5131012797355652, - "learning_rate": 9.20375106564365e-06, - "loss": 0.1734, - "step": 467 - }, - { - "epoch": 0.07979327063963557, - "grad_norm": 0.7656362652778625, - "learning_rate": 9.202046035805628e-06, - "loss": 0.2224, - "step": 468 - }, - { - "epoch": 0.07996376908117324, - "grad_norm": 0.7925986051559448, - "learning_rate": 9.200341005967606e-06, - "loss": 0.1276, - "step": 469 - }, - { - "epoch": 0.08013426752271093, - "grad_norm": 0.4855455756187439, - "learning_rate": 9.198635976129584e-06, - "loss": 0.1497, - "step": 470 - }, - { - "epoch": 0.08030476596424861, - "grad_norm": 0.7099283337593079, - "learning_rate": 9.196930946291562e-06, - "loss": 0.2033, - "step": 471 - }, - { - "epoch": 0.08047526440578628, - "grad_norm": 0.6167730093002319, - "learning_rate": 9.195225916453538e-06, - "loss": 0.18, - "step": 472 - }, - { - "epoch": 0.08064576284732397, - "grad_norm": 0.8107618689537048, - "learning_rate": 9.193520886615518e-06, - "loss": 0.2034, - "step": 473 - }, - { - "epoch": 0.08081626128886166, - "grad_norm": 0.7333641052246094, - "learning_rate": 9.191815856777494e-06, - "loss": 0.2379, - "step": 474 - }, - { - "epoch": 0.08098675973039934, - "grad_norm": 0.7127386331558228, - "learning_rate": 9.190110826939472e-06, - "loss": 0.1248, - "step": 475 - }, - { - "epoch": 0.08115725817193702, - "grad_norm": 0.801481306552887, - "learning_rate": 9.18840579710145e-06, - "loss": 0.206, - "step": 476 - }, - { - "epoch": 0.0813277566134747, - "grad_norm": 1.2994575500488281, - "learning_rate": 9.186700767263428e-06, - "loss": 0.0752, - "step": 477 - }, - { - "epoch": 0.08149825505501239, - "grad_norm": 0.909691572189331, - "learning_rate": 9.184995737425406e-06, - "loss": 0.1901, - "step": 478 - }, - { - "epoch": 0.08166875349655008, - "grad_norm": 0.850429356098175, - "learning_rate": 9.183290707587384e-06, - "loss": 0.1715, - "step": 479 - }, - { - "epoch": 0.08183925193808775, - "grad_norm": 1.3290419578552246, - "learning_rate": 9.181585677749362e-06, - "loss": 0.267, - "step": 480 - }, - { - "epoch": 0.08200975037962543, - "grad_norm": 0.7993286848068237, - "learning_rate": 9.17988064791134e-06, - "loss": 0.125, - "step": 481 - }, - { - "epoch": 0.08218024882116312, - "grad_norm": 1.093023419380188, - "learning_rate": 9.178175618073317e-06, - "loss": 0.1835, - "step": 482 - }, - { - "epoch": 0.08235074726270081, - "grad_norm": 0.7502027750015259, - "learning_rate": 9.176470588235294e-06, - "loss": 0.146, - "step": 483 - }, - { - "epoch": 0.08252124570423848, - "grad_norm": 0.8021153807640076, - "learning_rate": 9.174765558397273e-06, - "loss": 0.2086, - "step": 484 - }, - { - "epoch": 0.08269174414577617, - "grad_norm": 0.7832606434822083, - "learning_rate": 9.17306052855925e-06, - "loss": 0.1941, - "step": 485 - }, - { - "epoch": 0.08286224258731385, - "grad_norm": 0.7455692887306213, - "learning_rate": 9.171355498721227e-06, - "loss": 0.119, - "step": 486 - }, - { - "epoch": 0.08303274102885154, - "grad_norm": 1.0250868797302246, - "learning_rate": 9.169650468883207e-06, - "loss": 0.2183, - "step": 487 - }, - { - "epoch": 0.08320323947038921, - "grad_norm": 0.5591428875923157, - "learning_rate": 9.167945439045183e-06, - "loss": 0.1261, - "step": 488 - }, - { - "epoch": 0.0833737379119269, - "grad_norm": 0.5469002723693848, - "learning_rate": 9.166240409207161e-06, - "loss": 0.1285, - "step": 489 - }, - { - "epoch": 0.08354423635346458, - "grad_norm": 0.6611772775650024, - "learning_rate": 9.16453537936914e-06, - "loss": 0.2116, - "step": 490 - }, - { - "epoch": 0.08371473479500227, - "grad_norm": 0.9039101004600525, - "learning_rate": 9.162830349531117e-06, - "loss": 0.1971, - "step": 491 - }, - { - "epoch": 0.08388523323653994, - "grad_norm": 1.6236768960952759, - "learning_rate": 9.161125319693095e-06, - "loss": 0.2645, - "step": 492 - }, - { - "epoch": 0.08405573167807763, - "grad_norm": 0.7892815470695496, - "learning_rate": 9.159420289855073e-06, - "loss": 0.1583, - "step": 493 - }, - { - "epoch": 0.08422623011961532, - "grad_norm": 0.7078331708908081, - "learning_rate": 9.157715260017051e-06, - "loss": 0.1316, - "step": 494 - }, - { - "epoch": 0.08439672856115299, - "grad_norm": 0.9892073273658752, - "learning_rate": 9.156010230179029e-06, - "loss": 0.2271, - "step": 495 - }, - { - "epoch": 0.08456722700269068, - "grad_norm": 1.0123225450515747, - "learning_rate": 9.154305200341007e-06, - "loss": 0.1299, - "step": 496 - }, - { - "epoch": 0.08473772544422836, - "grad_norm": 0.7926766276359558, - "learning_rate": 9.152600170502985e-06, - "loss": 0.1479, - "step": 497 - }, - { - "epoch": 0.08490822388576605, - "grad_norm": 1.0032105445861816, - "learning_rate": 9.150895140664963e-06, - "loss": 0.1157, - "step": 498 - }, - { - "epoch": 0.08507872232730372, - "grad_norm": 1.401563286781311, - "learning_rate": 9.149190110826939e-06, - "loss": 0.2905, - "step": 499 - }, - { - "epoch": 0.08524922076884141, - "grad_norm": 0.6407831311225891, - "learning_rate": 9.147485080988919e-06, - "loss": 0.1407, - "step": 500 - }, - { - "epoch": 0.08524922076884141, - "eval_f1_score": 0.028985507246376812, - "eval_loss": 0.18852059543132782, - "eval_runtime": 183.2722, - "eval_samples_per_second": 54.564, - "eval_steps_per_second": 3.41, - "step": 500 - }, - { - "epoch": 0.0854197192103791, - "grad_norm": 0.6139245629310608, - "learning_rate": 9.145780051150895e-06, - "loss": 0.1003, - "step": 501 - }, - { - "epoch": 0.08559021765191678, - "grad_norm": 0.7585770487785339, - "learning_rate": 9.144075021312873e-06, - "loss": 0.1214, - "step": 502 - }, - { - "epoch": 0.08576071609345445, - "grad_norm": 0.732570469379425, - "learning_rate": 9.142369991474853e-06, - "loss": 0.1182, - "step": 503 - }, - { - "epoch": 0.08593121453499214, - "grad_norm": 0.5371119379997253, - "learning_rate": 9.140664961636829e-06, - "loss": 0.1725, - "step": 504 - }, - { - "epoch": 0.08610171297652983, - "grad_norm": 0.9066203236579895, - "learning_rate": 9.138959931798807e-06, - "loss": 0.2375, - "step": 505 - }, - { - "epoch": 0.08627221141806751, - "grad_norm": 0.5497814416885376, - "learning_rate": 9.137254901960785e-06, - "loss": 0.1478, - "step": 506 - }, - { - "epoch": 0.08644270985960519, - "grad_norm": 1.1947273015975952, - "learning_rate": 9.135549872122763e-06, - "loss": 0.1718, - "step": 507 - }, - { - "epoch": 0.08661320830114287, - "grad_norm": 0.8404238224029541, - "learning_rate": 9.13384484228474e-06, - "loss": 0.1625, - "step": 508 - }, - { - "epoch": 0.08678370674268056, - "grad_norm": 1.0687267780303955, - "learning_rate": 9.132139812446719e-06, - "loss": 0.2658, - "step": 509 - }, - { - "epoch": 0.08695420518421824, - "grad_norm": 0.7937365174293518, - "learning_rate": 9.130434782608697e-06, - "loss": 0.1944, - "step": 510 - }, - { - "epoch": 0.08712470362575592, - "grad_norm": 4.469568252563477, - "learning_rate": 9.128729752770674e-06, - "loss": 0.1463, - "step": 511 - }, - { - "epoch": 0.0872952020672936, - "grad_norm": 1.0447407960891724, - "learning_rate": 9.127024722932652e-06, - "loss": 0.2103, - "step": 512 - }, - { - "epoch": 0.08746570050883129, - "grad_norm": 1.9300107955932617, - "learning_rate": 9.12531969309463e-06, - "loss": 0.1259, - "step": 513 - }, - { - "epoch": 0.08763619895036896, - "grad_norm": 1.7074291706085205, - "learning_rate": 9.123614663256608e-06, - "loss": 0.1118, - "step": 514 - }, - { - "epoch": 0.08780669739190665, - "grad_norm": 1.4637730121612549, - "learning_rate": 9.121909633418585e-06, - "loss": 0.1045, - "step": 515 - }, - { - "epoch": 0.08797719583344434, - "grad_norm": 0.7066337466239929, - "learning_rate": 9.120204603580564e-06, - "loss": 0.1469, - "step": 516 - }, - { - "epoch": 0.08814769427498202, - "grad_norm": 1.2253496646881104, - "learning_rate": 9.11849957374254e-06, - "loss": 0.2541, - "step": 517 - }, - { - "epoch": 0.0883181927165197, - "grad_norm": 0.9337403774261475, - "learning_rate": 9.116794543904518e-06, - "loss": 0.1464, - "step": 518 - }, - { - "epoch": 0.08848869115805738, - "grad_norm": 1.1742297410964966, - "learning_rate": 9.115089514066498e-06, - "loss": 0.2045, - "step": 519 - }, - { - "epoch": 0.08865918959959507, - "grad_norm": 0.9703015089035034, - "learning_rate": 9.113384484228474e-06, - "loss": 0.0902, - "step": 520 - }, - { - "epoch": 0.08882968804113275, - "grad_norm": 0.9036072492599487, - "learning_rate": 9.111679454390452e-06, - "loss": 0.2117, - "step": 521 - }, - { - "epoch": 0.08900018648267043, - "grad_norm": 1.4741570949554443, - "learning_rate": 9.10997442455243e-06, - "loss": 0.2575, - "step": 522 - }, - { - "epoch": 0.08917068492420811, - "grad_norm": 0.7809064388275146, - "learning_rate": 9.108269394714408e-06, - "loss": 0.1914, - "step": 523 - }, - { - "epoch": 0.0893411833657458, - "grad_norm": 0.5947693586349487, - "learning_rate": 9.106564364876386e-06, - "loss": 0.1615, - "step": 524 - }, - { - "epoch": 0.08951168180728349, - "grad_norm": 0.701625406742096, - "learning_rate": 9.104859335038364e-06, - "loss": 0.1318, - "step": 525 - }, - { - "epoch": 0.08968218024882116, - "grad_norm": 0.60597825050354, - "learning_rate": 9.103154305200342e-06, - "loss": 0.1605, - "step": 526 - }, - { - "epoch": 0.08985267869035884, - "grad_norm": 0.8819074034690857, - "learning_rate": 9.10144927536232e-06, - "loss": 0.2386, - "step": 527 - }, - { - "epoch": 0.09002317713189653, - "grad_norm": 0.7760066390037537, - "learning_rate": 9.099744245524298e-06, - "loss": 0.2037, - "step": 528 - }, - { - "epoch": 0.09019367557343422, - "grad_norm": 0.8319275379180908, - "learning_rate": 9.098039215686276e-06, - "loss": 0.1922, - "step": 529 - }, - { - "epoch": 0.09036417401497189, - "grad_norm": 1.5015146732330322, - "learning_rate": 9.096334185848254e-06, - "loss": 0.2665, - "step": 530 - }, - { - "epoch": 0.09053467245650958, - "grad_norm": 1.2790735960006714, - "learning_rate": 9.09462915601023e-06, - "loss": 0.1451, - "step": 531 - }, - { - "epoch": 0.09070517089804726, - "grad_norm": 0.9179703593254089, - "learning_rate": 9.09292412617221e-06, - "loss": 0.1774, - "step": 532 - }, - { - "epoch": 0.09087566933958495, - "grad_norm": 0.9182787537574768, - "learning_rate": 9.091219096334186e-06, - "loss": 0.1703, - "step": 533 - }, - { - "epoch": 0.09104616778112262, - "grad_norm": 0.5663113594055176, - "learning_rate": 9.089514066496164e-06, - "loss": 0.1502, - "step": 534 - }, - { - "epoch": 0.09121666622266031, - "grad_norm": 0.6369578838348389, - "learning_rate": 9.087809036658142e-06, - "loss": 0.1443, - "step": 535 - }, - { - "epoch": 0.091387164664198, - "grad_norm": 0.6523211598396301, - "learning_rate": 9.08610400682012e-06, - "loss": 0.1666, - "step": 536 - }, - { - "epoch": 0.09155766310573567, - "grad_norm": 0.7603857517242432, - "learning_rate": 9.084398976982098e-06, - "loss": 0.086, - "step": 537 - }, - { - "epoch": 0.09172816154727335, - "grad_norm": 1.1395196914672852, - "learning_rate": 9.082693947144076e-06, - "loss": 0.2465, - "step": 538 - }, - { - "epoch": 0.09189865998881104, - "grad_norm": 0.682396650314331, - "learning_rate": 9.080988917306054e-06, - "loss": 0.0308, - "step": 539 - }, - { - "epoch": 0.09206915843034873, - "grad_norm": 1.1837222576141357, - "learning_rate": 9.079283887468032e-06, - "loss": 0.2423, - "step": 540 - }, - { - "epoch": 0.0922396568718864, - "grad_norm": 1.1183197498321533, - "learning_rate": 9.07757885763001e-06, - "loss": 0.2646, - "step": 541 - }, - { - "epoch": 0.09241015531342409, - "grad_norm": 0.6793837547302246, - "learning_rate": 9.075873827791987e-06, - "loss": 0.1761, - "step": 542 - }, - { - "epoch": 0.09258065375496177, - "grad_norm": 0.3091171681880951, - "learning_rate": 9.074168797953965e-06, - "loss": 0.1189, - "step": 543 - }, - { - "epoch": 0.09275115219649946, - "grad_norm": 0.7060979008674622, - "learning_rate": 9.072463768115943e-06, - "loss": 0.1776, - "step": 544 - }, - { - "epoch": 0.09292165063803713, - "grad_norm": 0.3717476427555084, - "learning_rate": 9.070758738277921e-06, - "loss": 0.1104, - "step": 545 - }, - { - "epoch": 0.09309214907957482, - "grad_norm": 0.44111332297325134, - "learning_rate": 9.0690537084399e-06, - "loss": 0.1127, - "step": 546 - }, - { - "epoch": 0.0932626475211125, - "grad_norm": 1.1776901483535767, - "learning_rate": 9.067348678601876e-06, - "loss": 0.2734, - "step": 547 - }, - { - "epoch": 0.09343314596265019, - "grad_norm": 0.5457471609115601, - "learning_rate": 9.065643648763855e-06, - "loss": 0.1419, - "step": 548 - }, - { - "epoch": 0.09360364440418786, - "grad_norm": 0.584895670413971, - "learning_rate": 9.063938618925831e-06, - "loss": 0.1478, - "step": 549 - }, - { - "epoch": 0.09377414284572555, - "grad_norm": 1.4400527477264404, - "learning_rate": 9.06223358908781e-06, - "loss": 0.1638, - "step": 550 - }, - { - "epoch": 0.09394464128726324, - "grad_norm": 0.7518956661224365, - "learning_rate": 9.060528559249787e-06, - "loss": 0.1243, - "step": 551 - }, - { - "epoch": 0.09411513972880092, - "grad_norm": 0.5842875838279724, - "learning_rate": 9.058823529411765e-06, - "loss": 0.2061, - "step": 552 - }, - { - "epoch": 0.0942856381703386, - "grad_norm": 0.8577499985694885, - "learning_rate": 9.057118499573743e-06, - "loss": 0.25, - "step": 553 - }, - { - "epoch": 0.09445613661187628, - "grad_norm": 0.5927447080612183, - "learning_rate": 9.055413469735721e-06, - "loss": 0.168, - "step": 554 - }, - { - "epoch": 0.09462663505341397, - "grad_norm": 1.0270954370498657, - "learning_rate": 9.053708439897699e-06, - "loss": 0.251, - "step": 555 - }, - { - "epoch": 0.09479713349495164, - "grad_norm": 0.89007967710495, - "learning_rate": 9.052003410059677e-06, - "loss": 0.1797, - "step": 556 - }, - { - "epoch": 0.09496763193648933, - "grad_norm": 0.8992642760276794, - "learning_rate": 9.050298380221655e-06, - "loss": 0.15, - "step": 557 - }, - { - "epoch": 0.09513813037802701, - "grad_norm": 0.7758667469024658, - "learning_rate": 9.048593350383631e-06, - "loss": 0.1533, - "step": 558 - }, - { - "epoch": 0.0953086288195647, - "grad_norm": 0.9712894558906555, - "learning_rate": 9.046888320545611e-06, - "loss": 0.139, - "step": 559 - }, - { - "epoch": 0.09547912726110237, - "grad_norm": 0.6283227205276489, - "learning_rate": 9.045183290707587e-06, - "loss": 0.131, - "step": 560 - }, - { - "epoch": 0.09564962570264006, - "grad_norm": 0.8274819254875183, - "learning_rate": 9.043478260869565e-06, - "loss": 0.1464, - "step": 561 - }, - { - "epoch": 0.09582012414417775, - "grad_norm": 0.7347449064254761, - "learning_rate": 9.041773231031545e-06, - "loss": 0.1941, - "step": 562 - }, - { - "epoch": 0.09599062258571543, - "grad_norm": 1.8517053127288818, - "learning_rate": 9.040068201193521e-06, - "loss": 0.2219, - "step": 563 - }, - { - "epoch": 0.0961611210272531, - "grad_norm": 0.6514312624931335, - "learning_rate": 9.038363171355499e-06, - "loss": 0.1564, - "step": 564 - }, - { - "epoch": 0.09633161946879079, - "grad_norm": 1.3228540420532227, - "learning_rate": 9.036658141517477e-06, - "loss": 0.2409, - "step": 565 - }, - { - "epoch": 0.09650211791032848, - "grad_norm": 1.0575270652770996, - "learning_rate": 9.034953111679455e-06, - "loss": 0.2279, - "step": 566 - }, - { - "epoch": 0.09667261635186616, - "grad_norm": 0.650841474533081, - "learning_rate": 9.033248081841433e-06, - "loss": 0.131, - "step": 567 - }, - { - "epoch": 0.09684311479340384, - "grad_norm": 0.8555294871330261, - "learning_rate": 9.03154305200341e-06, - "loss": 0.1119, - "step": 568 - }, - { - "epoch": 0.09701361323494152, - "grad_norm": 0.6819137930870056, - "learning_rate": 9.029838022165389e-06, - "loss": 0.1066, - "step": 569 - }, - { - "epoch": 0.09718411167647921, - "grad_norm": 0.65989750623703, - "learning_rate": 9.028132992327367e-06, - "loss": 0.1784, - "step": 570 - }, - { - "epoch": 0.0973546101180169, - "grad_norm": 0.7284032106399536, - "learning_rate": 9.026427962489345e-06, - "loss": 0.1243, - "step": 571 - }, - { - "epoch": 0.09752510855955457, - "grad_norm": 0.9932384490966797, - "learning_rate": 9.024722932651323e-06, - "loss": 0.1838, - "step": 572 - }, - { - "epoch": 0.09769560700109226, - "grad_norm": 0.9998315572738647, - "learning_rate": 9.0230179028133e-06, - "loss": 0.2078, - "step": 573 - }, - { - "epoch": 0.09786610544262994, - "grad_norm": 0.6617645025253296, - "learning_rate": 9.021312872975277e-06, - "loss": 0.1751, - "step": 574 - }, - { - "epoch": 0.09803660388416763, - "grad_norm": 0.6724914312362671, - "learning_rate": 9.019607843137256e-06, - "loss": 0.1654, - "step": 575 - }, - { - "epoch": 0.0982071023257053, - "grad_norm": 0.7818968296051025, - "learning_rate": 9.017902813299233e-06, - "loss": 0.1118, - "step": 576 - }, - { - "epoch": 0.09837760076724299, - "grad_norm": 1.0563021898269653, - "learning_rate": 9.01619778346121e-06, - "loss": 0.176, - "step": 577 - }, - { - "epoch": 0.09854809920878067, - "grad_norm": 0.7812329530715942, - "learning_rate": 9.01449275362319e-06, - "loss": 0.1757, - "step": 578 - }, - { - "epoch": 0.09871859765031835, - "grad_norm": 0.6268443465232849, - "learning_rate": 9.012787723785166e-06, - "loss": 0.1592, - "step": 579 - }, - { - "epoch": 0.09888909609185603, - "grad_norm": 0.744136393070221, - "learning_rate": 9.011082693947144e-06, - "loss": 0.2224, - "step": 580 - }, - { - "epoch": 0.09905959453339372, - "grad_norm": 0.7699022889137268, - "learning_rate": 9.009377664109122e-06, - "loss": 0.1461, - "step": 581 - }, - { - "epoch": 0.0992300929749314, - "grad_norm": 1.6789628267288208, - "learning_rate": 9.0076726342711e-06, - "loss": 0.2659, - "step": 582 - }, - { - "epoch": 0.09940059141646908, - "grad_norm": 0.9754775762557983, - "learning_rate": 9.005967604433078e-06, - "loss": 0.0963, - "step": 583 - }, - { - "epoch": 0.09957108985800676, - "grad_norm": 0.6113495826721191, - "learning_rate": 9.004262574595056e-06, - "loss": 0.1764, - "step": 584 - }, - { - "epoch": 0.09974158829954445, - "grad_norm": 1.0460819005966187, - "learning_rate": 9.002557544757034e-06, - "loss": 0.2345, - "step": 585 - }, - { - "epoch": 0.09991208674108214, - "grad_norm": 0.9724820256233215, - "learning_rate": 9.000852514919012e-06, - "loss": 0.1813, - "step": 586 - }, - { - "epoch": 0.10008258518261981, - "grad_norm": 0.9336734414100647, - "learning_rate": 8.99914748508099e-06, - "loss": 0.2385, - "step": 587 - }, - { - "epoch": 0.1002530836241575, - "grad_norm": 1.145740270614624, - "learning_rate": 8.997442455242968e-06, - "loss": 0.1205, - "step": 588 - }, - { - "epoch": 0.10042358206569518, - "grad_norm": 1.0796016454696655, - "learning_rate": 8.995737425404946e-06, - "loss": 0.1873, - "step": 589 - }, - { - "epoch": 0.10059408050723287, - "grad_norm": 0.8692519068717957, - "learning_rate": 8.994032395566922e-06, - "loss": 0.1789, - "step": 590 - }, - { - "epoch": 0.10076457894877054, - "grad_norm": 0.9212008118629456, - "learning_rate": 8.992327365728902e-06, - "loss": 0.2034, - "step": 591 - }, - { - "epoch": 0.10093507739030823, - "grad_norm": 0.8814923763275146, - "learning_rate": 8.990622335890878e-06, - "loss": 0.1145, - "step": 592 - }, - { - "epoch": 0.10110557583184591, - "grad_norm": 0.8948725461959839, - "learning_rate": 8.988917306052856e-06, - "loss": 0.1507, - "step": 593 - }, - { - "epoch": 0.1012760742733836, - "grad_norm": 1.134162425994873, - "learning_rate": 8.987212276214834e-06, - "loss": 0.1286, - "step": 594 - }, - { - "epoch": 0.10144657271492127, - "grad_norm": 0.455159068107605, - "learning_rate": 8.985507246376812e-06, - "loss": 0.1394, - "step": 595 - }, - { - "epoch": 0.10161707115645896, - "grad_norm": 1.6202359199523926, - "learning_rate": 8.98380221653879e-06, - "loss": 0.3456, - "step": 596 - }, - { - "epoch": 0.10178756959799665, - "grad_norm": 1.1022000312805176, - "learning_rate": 8.982097186700768e-06, - "loss": 0.1169, - "step": 597 - }, - { - "epoch": 0.10195806803953433, - "grad_norm": 0.6847971677780151, - "learning_rate": 8.980392156862746e-06, - "loss": 0.0918, - "step": 598 - }, - { - "epoch": 0.102128566481072, - "grad_norm": 1.6231807470321655, - "learning_rate": 8.978687127024724e-06, - "loss": 0.2878, - "step": 599 - }, - { - "epoch": 0.10229906492260969, - "grad_norm": 0.6163637042045593, - "learning_rate": 8.976982097186702e-06, - "loss": 0.1903, - "step": 600 - }, - { - "epoch": 0.10229906492260969, - "eval_f1_score": 0.0, - "eval_loss": 0.19083337485790253, - "eval_runtime": 183.2063, - "eval_samples_per_second": 54.583, - "eval_steps_per_second": 3.411, - "step": 600 - }, - { - "epoch": 0.10246956336414738, - "grad_norm": 0.401183158159256, - "learning_rate": 8.97527706734868e-06, - "loss": 0.1146, - "step": 601 - }, - { - "epoch": 0.10264006180568505, - "grad_norm": 0.8258224725723267, - "learning_rate": 8.973572037510658e-06, - "loss": 0.2058, - "step": 602 - }, - { - "epoch": 0.10281056024722274, - "grad_norm": 0.7540138363838196, - "learning_rate": 8.971867007672636e-06, - "loss": 0.1825, - "step": 603 - }, - { - "epoch": 0.10298105868876042, - "grad_norm": 0.6636288166046143, - "learning_rate": 8.970161977834613e-06, - "loss": 0.1508, - "step": 604 - }, - { - "epoch": 0.10315155713029811, - "grad_norm": 0.6270397305488586, - "learning_rate": 8.968456947996591e-06, - "loss": 0.1327, - "step": 605 - }, - { - "epoch": 0.10332205557183578, - "grad_norm": 0.7056589722633362, - "learning_rate": 8.966751918158568e-06, - "loss": 0.1897, - "step": 606 - }, - { - "epoch": 0.10349255401337347, - "grad_norm": 0.622838020324707, - "learning_rate": 8.965046888320547e-06, - "loss": 0.1206, - "step": 607 - }, - { - "epoch": 0.10366305245491116, - "grad_norm": 0.6201973557472229, - "learning_rate": 8.963341858482524e-06, - "loss": 0.141, - "step": 608 - }, - { - "epoch": 0.10383355089644884, - "grad_norm": 0.6052491068840027, - "learning_rate": 8.961636828644502e-06, - "loss": 0.1471, - "step": 609 - }, - { - "epoch": 0.10400404933798651, - "grad_norm": 0.8596045970916748, - "learning_rate": 8.95993179880648e-06, - "loss": 0.1085, - "step": 610 - }, - { - "epoch": 0.1041745477795242, - "grad_norm": 1.2298681735992432, - "learning_rate": 8.958226768968457e-06, - "loss": 0.1583, - "step": 611 - }, - { - "epoch": 0.10434504622106189, - "grad_norm": 0.83328777551651, - "learning_rate": 8.956521739130435e-06, - "loss": 0.227, - "step": 612 - }, - { - "epoch": 0.10451554466259957, - "grad_norm": 1.138857364654541, - "learning_rate": 8.954816709292413e-06, - "loss": 0.1638, - "step": 613 - }, - { - "epoch": 0.10468604310413725, - "grad_norm": 0.9594923257827759, - "learning_rate": 8.953111679454391e-06, - "loss": 0.2186, - "step": 614 - }, - { - "epoch": 0.10485654154567493, - "grad_norm": 1.0194865465164185, - "learning_rate": 8.95140664961637e-06, - "loss": 0.1977, - "step": 615 - }, - { - "epoch": 0.10502703998721262, - "grad_norm": 0.7128332853317261, - "learning_rate": 8.949701619778347e-06, - "loss": 0.1246, - "step": 616 - }, - { - "epoch": 0.1051975384287503, - "grad_norm": 0.6208592653274536, - "learning_rate": 8.947996589940325e-06, - "loss": 0.1104, - "step": 617 - }, - { - "epoch": 0.10536803687028798, - "grad_norm": 0.8177977204322815, - "learning_rate": 8.946291560102303e-06, - "loss": 0.1107, - "step": 618 - }, - { - "epoch": 0.10553853531182567, - "grad_norm": 0.9365327954292297, - "learning_rate": 8.94458653026428e-06, - "loss": 0.2365, - "step": 619 - }, - { - "epoch": 0.10570903375336335, - "grad_norm": 1.0838879346847534, - "learning_rate": 8.942881500426259e-06, - "loss": 0.2582, - "step": 620 - }, - { - "epoch": 0.10587953219490102, - "grad_norm": 0.9658844470977783, - "learning_rate": 8.941176470588237e-06, - "loss": 0.0917, - "step": 621 - }, - { - "epoch": 0.10605003063643871, - "grad_norm": 1.2021678686141968, - "learning_rate": 8.939471440750213e-06, - "loss": 0.2124, - "step": 622 - }, - { - "epoch": 0.1062205290779764, - "grad_norm": 0.6625456809997559, - "learning_rate": 8.937766410912193e-06, - "loss": 0.19, - "step": 623 - }, - { - "epoch": 0.10639102751951408, - "grad_norm": 0.8603885173797607, - "learning_rate": 8.936061381074169e-06, - "loss": 0.2054, - "step": 624 - }, - { - "epoch": 0.10656152596105176, - "grad_norm": 0.9610936045646667, - "learning_rate": 8.934356351236147e-06, - "loss": 0.0493, - "step": 625 - }, - { - "epoch": 0.10673202440258944, - "grad_norm": 0.7804873585700989, - "learning_rate": 8.932651321398125e-06, - "loss": 0.1209, - "step": 626 - }, - { - "epoch": 0.10690252284412713, - "grad_norm": 0.6967254877090454, - "learning_rate": 8.930946291560103e-06, - "loss": 0.1907, - "step": 627 - }, - { - "epoch": 0.10707302128566482, - "grad_norm": 0.6492006182670593, - "learning_rate": 8.92924126172208e-06, - "loss": 0.1195, - "step": 628 - }, - { - "epoch": 0.10724351972720249, - "grad_norm": 0.7812399864196777, - "learning_rate": 8.927536231884059e-06, - "loss": 0.0563, - "step": 629 - }, - { - "epoch": 0.10741401816874017, - "grad_norm": 0.5185630321502686, - "learning_rate": 8.925831202046037e-06, - "loss": 0.1077, - "step": 630 - }, - { - "epoch": 0.10758451661027786, - "grad_norm": 0.4689163267612457, - "learning_rate": 8.924126172208015e-06, - "loss": 0.1661, - "step": 631 - }, - { - "epoch": 0.10775501505181555, - "grad_norm": 1.0575098991394043, - "learning_rate": 8.922421142369993e-06, - "loss": 0.2516, - "step": 632 - }, - { - "epoch": 0.10792551349335322, - "grad_norm": 0.9641260504722595, - "learning_rate": 8.920716112531969e-06, - "loss": 0.1721, - "step": 633 - }, - { - "epoch": 0.1080960119348909, - "grad_norm": 0.4975630044937134, - "learning_rate": 8.919011082693949e-06, - "loss": 0.1141, - "step": 634 - }, - { - "epoch": 0.10826651037642859, - "grad_norm": 1.1529088020324707, - "learning_rate": 8.917306052855925e-06, - "loss": 0.1385, - "step": 635 - }, - { - "epoch": 0.10843700881796628, - "grad_norm": 0.9603254795074463, - "learning_rate": 8.915601023017903e-06, - "loss": 0.1957, - "step": 636 - }, - { - "epoch": 0.10860750725950395, - "grad_norm": 1.3553906679153442, - "learning_rate": 8.913895993179882e-06, - "loss": 0.2296, - "step": 637 - }, - { - "epoch": 0.10877800570104164, - "grad_norm": 0.6116076707839966, - "learning_rate": 8.912190963341859e-06, - "loss": 0.0853, - "step": 638 - }, - { - "epoch": 0.10894850414257932, - "grad_norm": 0.7998124957084656, - "learning_rate": 8.910485933503837e-06, - "loss": 0.1961, - "step": 639 - }, - { - "epoch": 0.10911900258411701, - "grad_norm": 0.6033914089202881, - "learning_rate": 8.908780903665815e-06, - "loss": 0.1019, - "step": 640 - }, - { - "epoch": 0.10928950102565468, - "grad_norm": 0.8142050504684448, - "learning_rate": 8.907075873827792e-06, - "loss": 0.0954, - "step": 641 - }, - { - "epoch": 0.10945999946719237, - "grad_norm": 1.1877548694610596, - "learning_rate": 8.90537084398977e-06, - "loss": 0.1673, - "step": 642 - }, - { - "epoch": 0.10963049790873006, - "grad_norm": 1.1034671068191528, - "learning_rate": 8.903665814151748e-06, - "loss": 0.1147, - "step": 643 - }, - { - "epoch": 0.10980099635026773, - "grad_norm": 0.755710780620575, - "learning_rate": 8.901960784313726e-06, - "loss": 0.1529, - "step": 644 - }, - { - "epoch": 0.10997149479180542, - "grad_norm": 0.6754464507102966, - "learning_rate": 8.900255754475704e-06, - "loss": 0.1961, - "step": 645 - }, - { - "epoch": 0.1101419932333431, - "grad_norm": 0.7231892943382263, - "learning_rate": 8.898550724637682e-06, - "loss": 0.1064, - "step": 646 - }, - { - "epoch": 0.11031249167488079, - "grad_norm": 1.5384999513626099, - "learning_rate": 8.89684569479966e-06, - "loss": 0.1895, - "step": 647 - }, - { - "epoch": 0.11048299011641846, - "grad_norm": 0.9464960694313049, - "learning_rate": 8.895140664961638e-06, - "loss": 0.1706, - "step": 648 - }, - { - "epoch": 0.11065348855795615, - "grad_norm": 1.2258471250534058, - "learning_rate": 8.893435635123614e-06, - "loss": 0.2841, - "step": 649 - }, - { - "epoch": 0.11082398699949383, - "grad_norm": 0.6537081003189087, - "learning_rate": 8.891730605285594e-06, - "loss": 0.1588, - "step": 650 - }, - { - "epoch": 0.11099448544103152, - "grad_norm": 0.8599593639373779, - "learning_rate": 8.89002557544757e-06, - "loss": 0.2128, - "step": 651 - }, - { - "epoch": 0.1111649838825692, - "grad_norm": 1.066692590713501, - "learning_rate": 8.888320545609548e-06, - "loss": 0.1172, - "step": 652 - }, - { - "epoch": 0.11133548232410688, - "grad_norm": 1.1547163724899292, - "learning_rate": 8.886615515771528e-06, - "loss": 0.0957, - "step": 653 - }, - { - "epoch": 0.11150598076564457, - "grad_norm": 0.9395566582679749, - "learning_rate": 8.884910485933504e-06, - "loss": 0.1694, - "step": 654 - }, - { - "epoch": 0.11167647920718225, - "grad_norm": 0.8956859707832336, - "learning_rate": 8.883205456095482e-06, - "loss": 0.2008, - "step": 655 - }, - { - "epoch": 0.11184697764871993, - "grad_norm": 2.565577507019043, - "learning_rate": 8.88150042625746e-06, - "loss": 0.159, - "step": 656 - }, - { - "epoch": 0.11201747609025761, - "grad_norm": 0.733633279800415, - "learning_rate": 8.879795396419438e-06, - "loss": 0.1392, - "step": 657 - }, - { - "epoch": 0.1121879745317953, - "grad_norm": 1.230839490890503, - "learning_rate": 8.878090366581416e-06, - "loss": 0.1753, - "step": 658 - }, - { - "epoch": 0.11235847297333298, - "grad_norm": 0.8218492269515991, - "learning_rate": 8.876385336743394e-06, - "loss": 0.1408, - "step": 659 - }, - { - "epoch": 0.11252897141487066, - "grad_norm": 0.7273249626159668, - "learning_rate": 8.874680306905372e-06, - "loss": 0.097, - "step": 660 - }, - { - "epoch": 0.11269946985640834, - "grad_norm": 0.7804353833198547, - "learning_rate": 8.87297527706735e-06, - "loss": 0.1028, - "step": 661 - }, - { - "epoch": 0.11286996829794603, - "grad_norm": 1.1182870864868164, - "learning_rate": 8.871270247229328e-06, - "loss": 0.1596, - "step": 662 - }, - { - "epoch": 0.1130404667394837, - "grad_norm": 0.745988130569458, - "learning_rate": 8.869565217391306e-06, - "loss": 0.1094, - "step": 663 - }, - { - "epoch": 0.11321096518102139, - "grad_norm": 0.8780370354652405, - "learning_rate": 8.867860187553284e-06, - "loss": 0.1809, - "step": 664 - }, - { - "epoch": 0.11338146362255908, - "grad_norm": 1.2652925252914429, - "learning_rate": 8.86615515771526e-06, - "loss": 0.2359, - "step": 665 - }, - { - "epoch": 0.11355196206409676, - "grad_norm": 0.6840066313743591, - "learning_rate": 8.86445012787724e-06, - "loss": 0.1784, - "step": 666 - }, - { - "epoch": 0.11372246050563443, - "grad_norm": 0.7085036039352417, - "learning_rate": 8.862745098039216e-06, - "loss": 0.1476, - "step": 667 - }, - { - "epoch": 0.11389295894717212, - "grad_norm": 0.7280018925666809, - "learning_rate": 8.861040068201194e-06, - "loss": 0.1467, - "step": 668 - }, - { - "epoch": 0.11406345738870981, - "grad_norm": 0.8055374026298523, - "learning_rate": 8.859335038363172e-06, - "loss": 0.1618, - "step": 669 - }, - { - "epoch": 0.1142339558302475, - "grad_norm": 0.7427027225494385, - "learning_rate": 8.85763000852515e-06, - "loss": 0.1322, - "step": 670 - }, - { - "epoch": 0.11440445427178517, - "grad_norm": 1.0781701803207397, - "learning_rate": 8.855924978687127e-06, - "loss": 0.1994, - "step": 671 - }, - { - "epoch": 0.11457495271332285, - "grad_norm": 0.5790367126464844, - "learning_rate": 8.854219948849105e-06, - "loss": 0.1724, - "step": 672 - }, - { - "epoch": 0.11474545115486054, - "grad_norm": 1.1667585372924805, - "learning_rate": 8.852514919011083e-06, - "loss": 0.1996, - "step": 673 - }, - { - "epoch": 0.11491594959639823, - "grad_norm": 1.069802165031433, - "learning_rate": 8.850809889173061e-06, - "loss": 0.2446, - "step": 674 - }, - { - "epoch": 0.1150864480379359, - "grad_norm": 0.8994653820991516, - "learning_rate": 8.84910485933504e-06, - "loss": 0.1097, - "step": 675 - }, - { - "epoch": 0.11525694647947358, - "grad_norm": 0.7296671271324158, - "learning_rate": 8.847399829497017e-06, - "loss": 0.1465, - "step": 676 - }, - { - "epoch": 0.11542744492101127, - "grad_norm": 0.7813960909843445, - "learning_rate": 8.845694799658995e-06, - "loss": 0.1931, - "step": 677 - }, - { - "epoch": 0.11559794336254896, - "grad_norm": 0.8685258030891418, - "learning_rate": 8.843989769820973e-06, - "loss": 0.1205, - "step": 678 - }, - { - "epoch": 0.11576844180408663, - "grad_norm": 0.8280768990516663, - "learning_rate": 8.842284739982951e-06, - "loss": 0.1036, - "step": 679 - }, - { - "epoch": 0.11593894024562432, - "grad_norm": 1.205714464187622, - "learning_rate": 8.840579710144929e-06, - "loss": 0.1832, - "step": 680 - }, - { - "epoch": 0.116109438687162, - "grad_norm": 0.6061711311340332, - "learning_rate": 8.838874680306905e-06, - "loss": 0.1249, - "step": 681 - }, - { - "epoch": 0.11627993712869969, - "grad_norm": 0.6693856716156006, - "learning_rate": 8.837169650468885e-06, - "loss": 0.1709, - "step": 682 - }, - { - "epoch": 0.11645043557023736, - "grad_norm": 0.7403165102005005, - "learning_rate": 8.835464620630861e-06, - "loss": 0.1826, - "step": 683 - }, - { - "epoch": 0.11662093401177505, - "grad_norm": 1.0672601461410522, - "learning_rate": 8.833759590792839e-06, - "loss": 0.187, - "step": 684 - }, - { - "epoch": 0.11679143245331274, - "grad_norm": 0.6454370021820068, - "learning_rate": 8.832054560954817e-06, - "loss": 0.1536, - "step": 685 - }, - { - "epoch": 0.11696193089485041, - "grad_norm": 0.7109602689743042, - "learning_rate": 8.830349531116795e-06, - "loss": 0.1025, - "step": 686 - }, - { - "epoch": 0.1171324293363881, - "grad_norm": 1.0405216217041016, - "learning_rate": 8.828644501278773e-06, - "loss": 0.051, - "step": 687 - }, - { - "epoch": 0.11730292777792578, - "grad_norm": 1.011011004447937, - "learning_rate": 8.826939471440751e-06, - "loss": 0.2072, - "step": 688 - }, - { - "epoch": 0.11747342621946347, - "grad_norm": 0.8015874028205872, - "learning_rate": 8.825234441602729e-06, - "loss": 0.1706, - "step": 689 - }, - { - "epoch": 0.11764392466100114, - "grad_norm": 0.603288471698761, - "learning_rate": 8.823529411764707e-06, - "loss": 0.1464, - "step": 690 - }, - { - "epoch": 0.11781442310253883, - "grad_norm": 0.9477997422218323, - "learning_rate": 8.821824381926685e-06, - "loss": 0.1726, - "step": 691 - }, - { - "epoch": 0.11798492154407651, - "grad_norm": 0.5780686736106873, - "learning_rate": 8.820119352088663e-06, - "loss": 0.1528, - "step": 692 - }, - { - "epoch": 0.1181554199856142, - "grad_norm": 0.5830322504043579, - "learning_rate": 8.81841432225064e-06, - "loss": 0.1505, - "step": 693 - }, - { - "epoch": 0.11832591842715187, - "grad_norm": 1.1404770612716675, - "learning_rate": 8.816709292412617e-06, - "loss": 0.1729, - "step": 694 - }, - { - "epoch": 0.11849641686868956, - "grad_norm": 0.8645011782646179, - "learning_rate": 8.815004262574597e-06, - "loss": 0.1541, - "step": 695 - }, - { - "epoch": 0.11866691531022724, - "grad_norm": 0.5606808662414551, - "learning_rate": 8.813299232736574e-06, - "loss": 0.1299, - "step": 696 - }, - { - "epoch": 0.11883741375176493, - "grad_norm": 1.530091643333435, - "learning_rate": 8.81159420289855e-06, - "loss": 0.1978, - "step": 697 - }, - { - "epoch": 0.1190079121933026, - "grad_norm": 0.5594825744628906, - "learning_rate": 8.80988917306053e-06, - "loss": 0.161, - "step": 698 - }, - { - "epoch": 0.11917841063484029, - "grad_norm": 0.5259520411491394, - "learning_rate": 8.808184143222507e-06, - "loss": 0.083, - "step": 699 - }, - { - "epoch": 0.11934890907637798, - "grad_norm": 0.604471743106842, - "learning_rate": 8.806479113384485e-06, - "loss": 0.0862, - "step": 700 - }, - { - "epoch": 0.11934890907637798, - "eval_f1_score": 0.05693950177935943, - "eval_loss": 0.1808873862028122, - "eval_runtime": 182.561, - "eval_samples_per_second": 54.776, - "eval_steps_per_second": 3.424, - "step": 700 - }, - { - "epoch": 0.11951940751791566, - "grad_norm": 0.7942699193954468, - "learning_rate": 8.804774083546463e-06, - "loss": 0.149, - "step": 701 - }, - { - "epoch": 0.11968990595945334, - "grad_norm": 0.9464733600616455, - "learning_rate": 8.80306905370844e-06, - "loss": 0.1971, - "step": 702 - }, - { - "epoch": 0.11986040440099102, - "grad_norm": 0.8487085103988647, - "learning_rate": 8.801364023870418e-06, - "loss": 0.0749, - "step": 703 - }, - { - "epoch": 0.12003090284252871, - "grad_norm": 0.6099476218223572, - "learning_rate": 8.799658994032396e-06, - "loss": 0.165, - "step": 704 - }, - { - "epoch": 0.12020140128406638, - "grad_norm": 0.7376194000244141, - "learning_rate": 8.797953964194374e-06, - "loss": 0.1395, - "step": 705 - }, - { - "epoch": 0.12037189972560407, - "grad_norm": 0.8289808630943298, - "learning_rate": 8.796248934356352e-06, - "loss": 0.2172, - "step": 706 - }, - { - "epoch": 0.12054239816714175, - "grad_norm": 0.4552036225795746, - "learning_rate": 8.79454390451833e-06, - "loss": 0.1506, - "step": 707 - }, - { - "epoch": 0.12071289660867944, - "grad_norm": 0.5763758420944214, - "learning_rate": 8.792838874680306e-06, - "loss": 0.1564, - "step": 708 - }, - { - "epoch": 0.12088339505021711, - "grad_norm": 0.9908771514892578, - "learning_rate": 8.791133844842286e-06, - "loss": 0.1016, - "step": 709 - }, - { - "epoch": 0.1210538934917548, - "grad_norm": 0.8617611527442932, - "learning_rate": 8.789428815004262e-06, - "loss": 0.1663, - "step": 710 - }, - { - "epoch": 0.12122439193329249, - "grad_norm": 0.6195103526115417, - "learning_rate": 8.78772378516624e-06, - "loss": 0.0743, - "step": 711 - }, - { - "epoch": 0.12139489037483017, - "grad_norm": 0.8117532730102539, - "learning_rate": 8.78601875532822e-06, - "loss": 0.1828, - "step": 712 - }, - { - "epoch": 0.12156538881636784, - "grad_norm": 0.5788756608963013, - "learning_rate": 8.784313725490196e-06, - "loss": 0.1435, - "step": 713 - }, - { - "epoch": 0.12173588725790553, - "grad_norm": 0.6565443277359009, - "learning_rate": 8.782608695652174e-06, - "loss": 0.1332, - "step": 714 - }, - { - "epoch": 0.12190638569944322, - "grad_norm": 0.7393292784690857, - "learning_rate": 8.780903665814152e-06, - "loss": 0.1103, - "step": 715 - }, - { - "epoch": 0.1220768841409809, - "grad_norm": 0.500567615032196, - "learning_rate": 8.77919863597613e-06, - "loss": 0.0809, - "step": 716 - }, - { - "epoch": 0.12224738258251858, - "grad_norm": 1.1821036338806152, - "learning_rate": 8.777493606138108e-06, - "loss": 0.1491, - "step": 717 - }, - { - "epoch": 0.12241788102405626, - "grad_norm": 1.593512773513794, - "learning_rate": 8.775788576300086e-06, - "loss": 0.1833, - "step": 718 - }, - { - "epoch": 0.12258837946559395, - "grad_norm": 0.5709978938102722, - "learning_rate": 8.774083546462064e-06, - "loss": 0.0911, - "step": 719 - }, - { - "epoch": 0.12275887790713164, - "grad_norm": 1.388977289199829, - "learning_rate": 8.772378516624042e-06, - "loss": 0.2518, - "step": 720 - }, - { - "epoch": 0.12292937634866931, - "grad_norm": 0.7217902541160583, - "learning_rate": 8.77067348678602e-06, - "loss": 0.1836, - "step": 721 - }, - { - "epoch": 0.123099874790207, - "grad_norm": 0.7408892512321472, - "learning_rate": 8.768968456947998e-06, - "loss": 0.208, - "step": 722 - }, - { - "epoch": 0.12327037323174468, - "grad_norm": 1.1729018688201904, - "learning_rate": 8.767263427109976e-06, - "loss": 0.0926, - "step": 723 - }, - { - "epoch": 0.12344087167328237, - "grad_norm": 1.1482958793640137, - "learning_rate": 8.765558397271952e-06, - "loss": 0.2234, - "step": 724 - }, - { - "epoch": 0.12361137011482004, - "grad_norm": 1.9159002304077148, - "learning_rate": 8.763853367433932e-06, - "loss": 0.2116, - "step": 725 - }, - { - "epoch": 0.12378186855635773, - "grad_norm": 0.9399540424346924, - "learning_rate": 8.762148337595908e-06, - "loss": 0.1596, - "step": 726 - }, - { - "epoch": 0.12395236699789541, - "grad_norm": 0.8376556038856506, - "learning_rate": 8.760443307757886e-06, - "loss": 0.2133, - "step": 727 - }, - { - "epoch": 0.12412286543943309, - "grad_norm": 0.8435215950012207, - "learning_rate": 8.758738277919865e-06, - "loss": 0.1411, - "step": 728 - }, - { - "epoch": 0.12429336388097077, - "grad_norm": 0.7841005325317383, - "learning_rate": 8.757033248081842e-06, - "loss": 0.1757, - "step": 729 - }, - { - "epoch": 0.12446386232250846, - "grad_norm": 1.5041134357452393, - "learning_rate": 8.75532821824382e-06, - "loss": 0.2328, - "step": 730 - }, - { - "epoch": 0.12463436076404615, - "grad_norm": 1.0415327548980713, - "learning_rate": 8.753623188405798e-06, - "loss": 0.1609, - "step": 731 - }, - { - "epoch": 0.12480485920558382, - "grad_norm": 0.6443787217140198, - "learning_rate": 8.751918158567776e-06, - "loss": 0.1106, - "step": 732 - }, - { - "epoch": 0.1249753576471215, - "grad_norm": 0.5113686919212341, - "learning_rate": 8.750213128729753e-06, - "loss": 0.1231, - "step": 733 - }, - { - "epoch": 0.1251458560886592, - "grad_norm": 0.686532199382782, - "learning_rate": 8.748508098891731e-06, - "loss": 0.1048, - "step": 734 - }, - { - "epoch": 0.12531635453019688, - "grad_norm": 0.7583132982254028, - "learning_rate": 8.74680306905371e-06, - "loss": 0.1473, - "step": 735 - }, - { - "epoch": 0.12548685297173456, - "grad_norm": 0.9985403418540955, - "learning_rate": 8.745098039215687e-06, - "loss": 0.1695, - "step": 736 - }, - { - "epoch": 0.12565735141327225, - "grad_norm": 1.0437244176864624, - "learning_rate": 8.743393009377665e-06, - "loss": 0.1984, - "step": 737 - }, - { - "epoch": 0.1258278498548099, - "grad_norm": 1.27836275100708, - "learning_rate": 8.741687979539643e-06, - "loss": 0.2065, - "step": 738 - }, - { - "epoch": 0.1259983482963476, - "grad_norm": 0.649793267250061, - "learning_rate": 8.739982949701621e-06, - "loss": 0.0989, - "step": 739 - }, - { - "epoch": 0.12616884673788528, - "grad_norm": 0.5728198289871216, - "learning_rate": 8.738277919863597e-06, - "loss": 0.1628, - "step": 740 - }, - { - "epoch": 0.12633934517942297, - "grad_norm": 0.7949087619781494, - "learning_rate": 8.736572890025577e-06, - "loss": 0.1628, - "step": 741 - }, - { - "epoch": 0.12650984362096065, - "grad_norm": 1.0230469703674316, - "learning_rate": 8.734867860187553e-06, - "loss": 0.1416, - "step": 742 - }, - { - "epoch": 0.12668034206249834, - "grad_norm": 0.6708950996398926, - "learning_rate": 8.733162830349531e-06, - "loss": 0.1691, - "step": 743 - }, - { - "epoch": 0.12685084050403603, - "grad_norm": 0.7631368041038513, - "learning_rate": 8.73145780051151e-06, - "loss": 0.1223, - "step": 744 - }, - { - "epoch": 0.12702133894557371, - "grad_norm": 1.418788194656372, - "learning_rate": 8.729752770673487e-06, - "loss": 0.1503, - "step": 745 - }, - { - "epoch": 0.12719183738711137, - "grad_norm": 1.5092041492462158, - "learning_rate": 8.728047740835465e-06, - "loss": 0.2056, - "step": 746 - }, - { - "epoch": 0.12736233582864906, - "grad_norm": 1.0009962320327759, - "learning_rate": 8.726342710997443e-06, - "loss": 0.1508, - "step": 747 - }, - { - "epoch": 0.12753283427018675, - "grad_norm": 1.2288755178451538, - "learning_rate": 8.724637681159421e-06, - "loss": 0.2004, - "step": 748 - }, - { - "epoch": 0.12770333271172443, - "grad_norm": 0.6066832542419434, - "learning_rate": 8.722932651321399e-06, - "loss": 0.1084, - "step": 749 - }, - { - "epoch": 0.12787383115326212, - "grad_norm": 0.8936707973480225, - "learning_rate": 8.721227621483377e-06, - "loss": 0.1569, - "step": 750 - }, - { - "epoch": 0.1280443295947998, - "grad_norm": 1.5922664403915405, - "learning_rate": 8.719522591645355e-06, - "loss": 0.1859, - "step": 751 - }, - { - "epoch": 0.1282148280363375, - "grad_norm": 0.8145460486412048, - "learning_rate": 8.717817561807333e-06, - "loss": 0.1468, - "step": 752 - }, - { - "epoch": 0.12838532647787515, - "grad_norm": 0.8177946209907532, - "learning_rate": 8.71611253196931e-06, - "loss": 0.1329, - "step": 753 - }, - { - "epoch": 0.12855582491941284, - "grad_norm": 0.6831057667732239, - "learning_rate": 8.714407502131289e-06, - "loss": 0.1655, - "step": 754 - }, - { - "epoch": 0.12872632336095052, - "grad_norm": 1.3590229749679565, - "learning_rate": 8.712702472293267e-06, - "loss": 0.1688, - "step": 755 - }, - { - "epoch": 0.1288968218024882, - "grad_norm": 0.8421473503112793, - "learning_rate": 8.710997442455243e-06, - "loss": 0.1545, - "step": 756 - }, - { - "epoch": 0.1290673202440259, - "grad_norm": 0.8665107488632202, - "learning_rate": 8.709292412617223e-06, - "loss": 0.1541, - "step": 757 - }, - { - "epoch": 0.12923781868556358, - "grad_norm": 0.9077842235565186, - "learning_rate": 8.707587382779199e-06, - "loss": 0.1747, - "step": 758 - }, - { - "epoch": 0.12940831712710127, - "grad_norm": 0.773943305015564, - "learning_rate": 8.705882352941177e-06, - "loss": 0.1386, - "step": 759 - }, - { - "epoch": 0.12957881556863896, - "grad_norm": 1.8866589069366455, - "learning_rate": 8.704177323103155e-06, - "loss": 0.1541, - "step": 760 - }, - { - "epoch": 0.12974931401017661, - "grad_norm": 0.8734083771705627, - "learning_rate": 8.702472293265133e-06, - "loss": 0.0674, - "step": 761 - }, - { - "epoch": 0.1299198124517143, - "grad_norm": 0.9331802725791931, - "learning_rate": 8.70076726342711e-06, - "loss": 0.1726, - "step": 762 - }, - { - "epoch": 0.130090310893252, - "grad_norm": 0.7994747757911682, - "learning_rate": 8.699062233589089e-06, - "loss": 0.1104, - "step": 763 - }, - { - "epoch": 0.13026080933478967, - "grad_norm": 0.6318725347518921, - "learning_rate": 8.697357203751066e-06, - "loss": 0.0885, - "step": 764 - }, - { - "epoch": 0.13043130777632736, - "grad_norm": 0.8578609228134155, - "learning_rate": 8.695652173913044e-06, - "loss": 0.1932, - "step": 765 - }, - { - "epoch": 0.13060180621786505, - "grad_norm": 0.5934591293334961, - "learning_rate": 8.693947144075022e-06, - "loss": 0.1409, - "step": 766 - }, - { - "epoch": 0.13077230465940273, - "grad_norm": 0.9246014952659607, - "learning_rate": 8.692242114237e-06, - "loss": 0.1927, - "step": 767 - }, - { - "epoch": 0.13094280310094042, - "grad_norm": 0.7142505049705505, - "learning_rate": 8.690537084398978e-06, - "loss": 0.1031, - "step": 768 - }, - { - "epoch": 0.13111330154247808, - "grad_norm": 1.084960699081421, - "learning_rate": 8.688832054560955e-06, - "loss": 0.1565, - "step": 769 - }, - { - "epoch": 0.13128379998401576, - "grad_norm": 0.9150413274765015, - "learning_rate": 8.687127024722934e-06, - "loss": 0.183, - "step": 770 - }, - { - "epoch": 0.13145429842555345, - "grad_norm": 2.815584421157837, - "learning_rate": 8.685421994884912e-06, - "loss": 0.3257, - "step": 771 - }, - { - "epoch": 0.13162479686709114, - "grad_norm": 0.6796224117279053, - "learning_rate": 8.683716965046888e-06, - "loss": 0.1414, - "step": 772 - }, - { - "epoch": 0.13179529530862882, - "grad_norm": 0.7766322493553162, - "learning_rate": 8.682011935208868e-06, - "loss": 0.1271, - "step": 773 - }, - { - "epoch": 0.1319657937501665, - "grad_norm": 1.089959740638733, - "learning_rate": 8.680306905370844e-06, - "loss": 0.2125, - "step": 774 - }, - { - "epoch": 0.1321362921917042, - "grad_norm": 0.8805111646652222, - "learning_rate": 8.678601875532822e-06, - "loss": 0.271, - "step": 775 - }, - { - "epoch": 0.13230679063324186, - "grad_norm": 0.9524927139282227, - "learning_rate": 8.6768968456948e-06, - "loss": 0.1699, - "step": 776 - }, - { - "epoch": 0.13247728907477954, - "grad_norm": 0.9875715374946594, - "learning_rate": 8.675191815856778e-06, - "loss": 0.1119, - "step": 777 - }, - { - "epoch": 0.13264778751631723, - "grad_norm": 1.037915587425232, - "learning_rate": 8.673486786018756e-06, - "loss": 0.1976, - "step": 778 - }, - { - "epoch": 0.13281828595785491, - "grad_norm": 0.7319202423095703, - "learning_rate": 8.671781756180734e-06, - "loss": 0.1588, - "step": 779 - }, - { - "epoch": 0.1329887843993926, - "grad_norm": 0.7272329330444336, - "learning_rate": 8.670076726342712e-06, - "loss": 0.1671, - "step": 780 - }, - { - "epoch": 0.1331592828409303, - "grad_norm": 2.4887404441833496, - "learning_rate": 8.66837169650469e-06, - "loss": 0.1451, - "step": 781 - }, - { - "epoch": 0.13332978128246797, - "grad_norm": 2.0834434032440186, - "learning_rate": 8.666666666666668e-06, - "loss": 0.1788, - "step": 782 - }, - { - "epoch": 0.13350027972400566, - "grad_norm": 2.073859453201294, - "learning_rate": 8.664961636828644e-06, - "loss": 0.314, - "step": 783 - }, - { - "epoch": 0.13367077816554332, - "grad_norm": 1.1813137531280518, - "learning_rate": 8.663256606990624e-06, - "loss": 0.1565, - "step": 784 - }, - { - "epoch": 0.133841276607081, - "grad_norm": 0.9659483432769775, - "learning_rate": 8.6615515771526e-06, - "loss": 0.1986, - "step": 785 - }, - { - "epoch": 0.1340117750486187, - "grad_norm": 0.7730090618133545, - "learning_rate": 8.659846547314578e-06, - "loss": 0.1395, - "step": 786 - }, - { - "epoch": 0.13418227349015638, - "grad_norm": 1.5010566711425781, - "learning_rate": 8.658141517476558e-06, - "loss": 0.2804, - "step": 787 - }, - { - "epoch": 0.13435277193169406, - "grad_norm": 0.6788565516471863, - "learning_rate": 8.656436487638534e-06, - "loss": 0.1596, - "step": 788 - }, - { - "epoch": 0.13452327037323175, - "grad_norm": 0.6773743629455566, - "learning_rate": 8.654731457800512e-06, - "loss": 0.1682, - "step": 789 - }, - { - "epoch": 0.13469376881476944, - "grad_norm": 0.8464353680610657, - "learning_rate": 8.65302642796249e-06, - "loss": 0.1261, - "step": 790 - }, - { - "epoch": 0.1348642672563071, - "grad_norm": 1.5723978281021118, - "learning_rate": 8.651321398124468e-06, - "loss": 0.1886, - "step": 791 - }, - { - "epoch": 0.13503476569784478, - "grad_norm": 1.281877875328064, - "learning_rate": 8.649616368286446e-06, - "loss": 0.2739, - "step": 792 - }, - { - "epoch": 0.13520526413938247, - "grad_norm": 0.9185252785682678, - "learning_rate": 8.647911338448424e-06, - "loss": 0.1475, - "step": 793 - }, - { - "epoch": 0.13537576258092016, - "grad_norm": 0.8964889049530029, - "learning_rate": 8.646206308610402e-06, - "loss": 0.2107, - "step": 794 - }, - { - "epoch": 0.13554626102245784, - "grad_norm": 0.8502851128578186, - "learning_rate": 8.64450127877238e-06, - "loss": 0.1214, - "step": 795 - }, - { - "epoch": 0.13571675946399553, - "grad_norm": 0.7894145846366882, - "learning_rate": 8.642796248934357e-06, - "loss": 0.1776, - "step": 796 - }, - { - "epoch": 0.13588725790553322, - "grad_norm": 0.8448327779769897, - "learning_rate": 8.641091219096335e-06, - "loss": 0.1178, - "step": 797 - }, - { - "epoch": 0.1360577563470709, - "grad_norm": 0.682449996471405, - "learning_rate": 8.639386189258313e-06, - "loss": 0.1421, - "step": 798 - }, - { - "epoch": 0.13622825478860856, - "grad_norm": 0.7914981842041016, - "learning_rate": 8.63768115942029e-06, - "loss": 0.1547, - "step": 799 - }, - { - "epoch": 0.13639875323014625, - "grad_norm": 1.071673035621643, - "learning_rate": 8.63597612958227e-06, - "loss": 0.146, - "step": 800 - }, - { - "epoch": 0.13639875323014625, - "eval_f1_score": 0.1461794019933555, - "eval_loss": 0.17923100292682648, - "eval_runtime": 182.6021, - "eval_samples_per_second": 54.764, - "eval_steps_per_second": 3.423, - "step": 800 - }, - { - "epoch": 0.13656925167168393, - "grad_norm": 0.9378244876861572, - "learning_rate": 8.634271099744245e-06, - "loss": 0.1741, - "step": 801 - }, - { - "epoch": 0.13673975011322162, - "grad_norm": 0.5133070945739746, - "learning_rate": 8.632566069906223e-06, - "loss": 0.098, - "step": 802 - }, - { - "epoch": 0.1369102485547593, - "grad_norm": 0.6201533079147339, - "learning_rate": 8.630861040068201e-06, - "loss": 0.1666, - "step": 803 - }, - { - "epoch": 0.137080746996297, - "grad_norm": 0.6914802193641663, - "learning_rate": 8.62915601023018e-06, - "loss": 0.121, - "step": 804 - }, - { - "epoch": 0.13725124543783468, - "grad_norm": 1.078437328338623, - "learning_rate": 8.627450980392157e-06, - "loss": 0.1503, - "step": 805 - }, - { - "epoch": 0.13742174387937237, - "grad_norm": 0.5333152413368225, - "learning_rate": 8.625745950554135e-06, - "loss": 0.0863, - "step": 806 - }, - { - "epoch": 0.13759224232091002, - "grad_norm": 0.8483152389526367, - "learning_rate": 8.624040920716113e-06, - "loss": 0.1782, - "step": 807 - }, - { - "epoch": 0.1377627407624477, - "grad_norm": 1.230548620223999, - "learning_rate": 8.622335890878091e-06, - "loss": 0.1795, - "step": 808 - }, - { - "epoch": 0.1379332392039854, - "grad_norm": 1.096155047416687, - "learning_rate": 8.620630861040069e-06, - "loss": 0.15, - "step": 809 - }, - { - "epoch": 0.13810373764552308, - "grad_norm": 1.0700522661209106, - "learning_rate": 8.618925831202047e-06, - "loss": 0.1256, - "step": 810 - }, - { - "epoch": 0.13827423608706077, - "grad_norm": 0.9942691326141357, - "learning_rate": 8.617220801364025e-06, - "loss": 0.0621, - "step": 811 - }, - { - "epoch": 0.13844473452859846, - "grad_norm": 1.1681605577468872, - "learning_rate": 8.615515771526003e-06, - "loss": 0.2706, - "step": 812 - }, - { - "epoch": 0.13861523297013614, - "grad_norm": 0.6961143612861633, - "learning_rate": 8.61381074168798e-06, - "loss": 0.1214, - "step": 813 - }, - { - "epoch": 0.1387857314116738, - "grad_norm": 1.5237188339233398, - "learning_rate": 8.612105711849959e-06, - "loss": 0.1471, - "step": 814 - }, - { - "epoch": 0.1389562298532115, - "grad_norm": 2.2736763954162598, - "learning_rate": 8.610400682011935e-06, - "loss": 0.314, - "step": 815 - }, - { - "epoch": 0.13912672829474917, - "grad_norm": 0.842560887336731, - "learning_rate": 8.608695652173915e-06, - "loss": 0.1069, - "step": 816 - }, - { - "epoch": 0.13929722673628686, - "grad_norm": 0.838711142539978, - "learning_rate": 8.606990622335891e-06, - "loss": 0.1392, - "step": 817 - }, - { - "epoch": 0.13946772517782455, - "grad_norm": 1.5051636695861816, - "learning_rate": 8.605285592497869e-06, - "loss": 0.2429, - "step": 818 - }, - { - "epoch": 0.13963822361936223, - "grad_norm": 0.9790658950805664, - "learning_rate": 8.603580562659847e-06, - "loss": 0.1904, - "step": 819 - }, - { - "epoch": 0.13980872206089992, - "grad_norm": 1.0247228145599365, - "learning_rate": 8.601875532821825e-06, - "loss": 0.1407, - "step": 820 - }, - { - "epoch": 0.1399792205024376, - "grad_norm": 1.1538825035095215, - "learning_rate": 8.600170502983803e-06, - "loss": 0.1281, - "step": 821 - }, - { - "epoch": 0.14014971894397527, - "grad_norm": 1.488327980041504, - "learning_rate": 8.59846547314578e-06, - "loss": 0.1372, - "step": 822 - }, - { - "epoch": 0.14032021738551295, - "grad_norm": 1.2420367002487183, - "learning_rate": 8.596760443307759e-06, - "loss": 0.151, - "step": 823 - }, - { - "epoch": 0.14049071582705064, - "grad_norm": 1.1011592149734497, - "learning_rate": 8.595055413469737e-06, - "loss": 0.2513, - "step": 824 - }, - { - "epoch": 0.14066121426858832, - "grad_norm": 0.8864753246307373, - "learning_rate": 8.593350383631714e-06, - "loss": 0.1389, - "step": 825 - }, - { - "epoch": 0.140831712710126, - "grad_norm": 1.1462125778198242, - "learning_rate": 8.591645353793692e-06, - "loss": 0.167, - "step": 826 - }, - { - "epoch": 0.1410022111516637, - "grad_norm": 0.8221803903579712, - "learning_rate": 8.58994032395567e-06, - "loss": 0.1552, - "step": 827 - }, - { - "epoch": 0.14117270959320138, - "grad_norm": 0.8550702333450317, - "learning_rate": 8.588235294117647e-06, - "loss": 0.1512, - "step": 828 - }, - { - "epoch": 0.14134320803473907, - "grad_norm": 0.9521147012710571, - "learning_rate": 8.586530264279626e-06, - "loss": 0.186, - "step": 829 - }, - { - "epoch": 0.14151370647627673, - "grad_norm": 1.5711631774902344, - "learning_rate": 8.584825234441604e-06, - "loss": 0.3002, - "step": 830 - }, - { - "epoch": 0.14168420491781442, - "grad_norm": 0.6364051103591919, - "learning_rate": 8.58312020460358e-06, - "loss": 0.1998, - "step": 831 - }, - { - "epoch": 0.1418547033593521, - "grad_norm": 1.021705150604248, - "learning_rate": 8.58141517476556e-06, - "loss": 0.0973, - "step": 832 - }, - { - "epoch": 0.1420252018008898, - "grad_norm": 0.7612175345420837, - "learning_rate": 8.579710144927536e-06, - "loss": 0.2029, - "step": 833 - }, - { - "epoch": 0.14219570024242748, - "grad_norm": 0.8757889270782471, - "learning_rate": 8.578005115089514e-06, - "loss": 0.2264, - "step": 834 - }, - { - "epoch": 0.14236619868396516, - "grad_norm": 0.7471174597740173, - "learning_rate": 8.576300085251492e-06, - "loss": 0.1473, - "step": 835 - }, - { - "epoch": 0.14253669712550285, - "grad_norm": 1.4478951692581177, - "learning_rate": 8.57459505541347e-06, - "loss": 0.1916, - "step": 836 - }, - { - "epoch": 0.1427071955670405, - "grad_norm": 1.1510059833526611, - "learning_rate": 8.572890025575448e-06, - "loss": 0.1959, - "step": 837 - }, - { - "epoch": 0.1428776940085782, - "grad_norm": 0.769268810749054, - "learning_rate": 8.571184995737426e-06, - "loss": 0.1295, - "step": 838 - }, - { - "epoch": 0.14304819245011588, - "grad_norm": 0.692801296710968, - "learning_rate": 8.569479965899404e-06, - "loss": 0.1456, - "step": 839 - }, - { - "epoch": 0.14321869089165357, - "grad_norm": 0.8778480291366577, - "learning_rate": 8.567774936061382e-06, - "loss": 0.2729, - "step": 840 - }, - { - "epoch": 0.14338918933319125, - "grad_norm": 0.716971218585968, - "learning_rate": 8.56606990622336e-06, - "loss": 0.1546, - "step": 841 - }, - { - "epoch": 0.14355968777472894, - "grad_norm": 2.0981221199035645, - "learning_rate": 8.564364876385338e-06, - "loss": 0.2563, - "step": 842 - }, - { - "epoch": 0.14373018621626663, - "grad_norm": 0.6106297969818115, - "learning_rate": 8.562659846547316e-06, - "loss": 0.1605, - "step": 843 - }, - { - "epoch": 0.1439006846578043, - "grad_norm": 0.7936435341835022, - "learning_rate": 8.560954816709292e-06, - "loss": 0.0751, - "step": 844 - }, - { - "epoch": 0.14407118309934197, - "grad_norm": 0.8291981816291809, - "learning_rate": 8.559249786871272e-06, - "loss": 0.166, - "step": 845 - }, - { - "epoch": 0.14424168154087966, - "grad_norm": 0.7863637804985046, - "learning_rate": 8.55754475703325e-06, - "loss": 0.1637, - "step": 846 - }, - { - "epoch": 0.14441217998241734, - "grad_norm": 0.5928487777709961, - "learning_rate": 8.555839727195226e-06, - "loss": 0.2067, - "step": 847 - }, - { - "epoch": 0.14458267842395503, - "grad_norm": 0.7105429172515869, - "learning_rate": 8.554134697357206e-06, - "loss": 0.1674, - "step": 848 - }, - { - "epoch": 0.14475317686549272, - "grad_norm": 0.5402317047119141, - "learning_rate": 8.552429667519182e-06, - "loss": 0.1455, - "step": 849 - }, - { - "epoch": 0.1449236753070304, - "grad_norm": 0.8685750961303711, - "learning_rate": 8.55072463768116e-06, - "loss": 0.1576, - "step": 850 - }, - { - "epoch": 0.1450941737485681, - "grad_norm": 0.6241790652275085, - "learning_rate": 8.549019607843138e-06, - "loss": 0.1207, - "step": 851 - }, - { - "epoch": 0.14526467219010578, - "grad_norm": 0.6889399886131287, - "learning_rate": 8.547314578005116e-06, - "loss": 0.1418, - "step": 852 - }, - { - "epoch": 0.14543517063164343, - "grad_norm": 3.354299306869507, - "learning_rate": 8.545609548167094e-06, - "loss": 0.126, - "step": 853 - }, - { - "epoch": 0.14560566907318112, - "grad_norm": 1.1726495027542114, - "learning_rate": 8.543904518329072e-06, - "loss": 0.2264, - "step": 854 - }, - { - "epoch": 0.1457761675147188, - "grad_norm": 0.8029992580413818, - "learning_rate": 8.54219948849105e-06, - "loss": 0.1017, - "step": 855 - }, - { - "epoch": 0.1459466659562565, - "grad_norm": 0.9463875889778137, - "learning_rate": 8.540494458653027e-06, - "loss": 0.1911, - "step": 856 - }, - { - "epoch": 0.14611716439779418, - "grad_norm": 0.8631307482719421, - "learning_rate": 8.538789428815005e-06, - "loss": 0.1079, - "step": 857 - }, - { - "epoch": 0.14628766283933187, - "grad_norm": 0.9463744759559631, - "learning_rate": 8.537084398976982e-06, - "loss": 0.2081, - "step": 858 - }, - { - "epoch": 0.14645816128086955, - "grad_norm": 0.7745504975318909, - "learning_rate": 8.535379369138961e-06, - "loss": 0.1143, - "step": 859 - }, - { - "epoch": 0.1466286597224072, - "grad_norm": 0.7032846808433533, - "learning_rate": 8.533674339300938e-06, - "loss": 0.1648, - "step": 860 - }, - { - "epoch": 0.1467991581639449, - "grad_norm": 1.1793239116668701, - "learning_rate": 8.531969309462916e-06, - "loss": 0.2458, - "step": 861 - }, - { - "epoch": 0.14696965660548258, - "grad_norm": 0.8669989109039307, - "learning_rate": 8.530264279624895e-06, - "loss": 0.0996, - "step": 862 - }, - { - "epoch": 0.14714015504702027, - "grad_norm": 1.4623005390167236, - "learning_rate": 8.528559249786871e-06, - "loss": 0.2536, - "step": 863 - }, - { - "epoch": 0.14731065348855796, - "grad_norm": 0.7780196070671082, - "learning_rate": 8.52685421994885e-06, - "loss": 0.1102, - "step": 864 - }, - { - "epoch": 0.14748115193009564, - "grad_norm": 0.8474672436714172, - "learning_rate": 8.525149190110827e-06, - "loss": 0.1611, - "step": 865 - }, - { - "epoch": 0.14765165037163333, - "grad_norm": 0.7766311168670654, - "learning_rate": 8.523444160272805e-06, - "loss": 0.1206, - "step": 866 - }, - { - "epoch": 0.14782214881317102, - "grad_norm": 0.6879698038101196, - "learning_rate": 8.521739130434783e-06, - "loss": 0.1676, - "step": 867 - }, - { - "epoch": 0.14799264725470868, - "grad_norm": 0.6485670208930969, - "learning_rate": 8.520034100596761e-06, - "loss": 0.1672, - "step": 868 - }, - { - "epoch": 0.14816314569624636, - "grad_norm": 0.7620902061462402, - "learning_rate": 8.518329070758739e-06, - "loss": 0.1299, - "step": 869 - }, - { - "epoch": 0.14833364413778405, - "grad_norm": 0.9798589944839478, - "learning_rate": 8.516624040920717e-06, - "loss": 0.0973, - "step": 870 - }, - { - "epoch": 0.14850414257932174, - "grad_norm": 0.8749964237213135, - "learning_rate": 8.514919011082695e-06, - "loss": 0.065, - "step": 871 - }, - { - "epoch": 0.14867464102085942, - "grad_norm": 0.6423231959342957, - "learning_rate": 8.513213981244673e-06, - "loss": 0.1317, - "step": 872 - }, - { - "epoch": 0.1488451394623971, - "grad_norm": 0.979634165763855, - "learning_rate": 8.511508951406651e-06, - "loss": 0.1331, - "step": 873 - }, - { - "epoch": 0.1490156379039348, - "grad_norm": 1.4354133605957031, - "learning_rate": 8.509803921568627e-06, - "loss": 0.1772, - "step": 874 - }, - { - "epoch": 0.14918613634547248, - "grad_norm": 0.6209403872489929, - "learning_rate": 8.508098891730607e-06, - "loss": 0.1024, - "step": 875 - }, - { - "epoch": 0.14935663478701014, - "grad_norm": 2.1505284309387207, - "learning_rate": 8.506393861892583e-06, - "loss": 0.2204, - "step": 876 - }, - { - "epoch": 0.14952713322854783, - "grad_norm": 1.8646060228347778, - "learning_rate": 8.504688832054561e-06, - "loss": 0.288, - "step": 877 - }, - { - "epoch": 0.1496976316700855, - "grad_norm": 0.7936485409736633, - "learning_rate": 8.502983802216539e-06, - "loss": 0.1333, - "step": 878 - }, - { - "epoch": 0.1498681301116232, - "grad_norm": 0.8515297174453735, - "learning_rate": 8.501278772378517e-06, - "loss": 0.1229, - "step": 879 - }, - { - "epoch": 0.15003862855316089, - "grad_norm": 0.9410006403923035, - "learning_rate": 8.499573742540495e-06, - "loss": 0.1328, - "step": 880 - }, - { - "epoch": 0.15020912699469857, - "grad_norm": 1.014402985572815, - "learning_rate": 8.497868712702473e-06, - "loss": 0.0523, - "step": 881 - }, - { - "epoch": 0.15037962543623626, - "grad_norm": 1.1425182819366455, - "learning_rate": 8.49616368286445e-06, - "loss": 0.152, - "step": 882 - }, - { - "epoch": 0.15055012387777392, - "grad_norm": 1.3995938301086426, - "learning_rate": 8.494458653026429e-06, - "loss": 0.2666, - "step": 883 - }, - { - "epoch": 0.1507206223193116, - "grad_norm": 0.7522984743118286, - "learning_rate": 8.492753623188407e-06, - "loss": 0.1231, - "step": 884 - }, - { - "epoch": 0.1508911207608493, - "grad_norm": 0.9752036929130554, - "learning_rate": 8.491048593350385e-06, - "loss": 0.1259, - "step": 885 - }, - { - "epoch": 0.15106161920238698, - "grad_norm": 1.2285127639770508, - "learning_rate": 8.489343563512363e-06, - "loss": 0.0806, - "step": 886 - }, - { - "epoch": 0.15123211764392466, - "grad_norm": 0.7990505695343018, - "learning_rate": 8.48763853367434e-06, - "loss": 0.1139, - "step": 887 - }, - { - "epoch": 0.15140261608546235, - "grad_norm": 0.7367463111877441, - "learning_rate": 8.485933503836318e-06, - "loss": 0.142, - "step": 888 - }, - { - "epoch": 0.15157311452700004, - "grad_norm": 0.958259642124176, - "learning_rate": 8.484228473998296e-06, - "loss": 0.1229, - "step": 889 - }, - { - "epoch": 0.15174361296853772, - "grad_norm": 0.8944674134254456, - "learning_rate": 8.482523444160273e-06, - "loss": 0.1641, - "step": 890 - }, - { - "epoch": 0.15191411141007538, - "grad_norm": 1.6810100078582764, - "learning_rate": 8.480818414322252e-06, - "loss": 0.2561, - "step": 891 - }, - { - "epoch": 0.15208460985161307, - "grad_norm": 0.8170380592346191, - "learning_rate": 8.479113384484229e-06, - "loss": 0.1703, - "step": 892 - }, - { - "epoch": 0.15225510829315075, - "grad_norm": 0.9030293226242065, - "learning_rate": 8.477408354646206e-06, - "loss": 0.0785, - "step": 893 - }, - { - "epoch": 0.15242560673468844, - "grad_norm": 1.2546629905700684, - "learning_rate": 8.475703324808184e-06, - "loss": 0.1747, - "step": 894 - }, - { - "epoch": 0.15259610517622613, - "grad_norm": 0.7067754864692688, - "learning_rate": 8.473998294970162e-06, - "loss": 0.1664, - "step": 895 - }, - { - "epoch": 0.1527666036177638, - "grad_norm": 1.1066006422042847, - "learning_rate": 8.47229326513214e-06, - "loss": 0.1139, - "step": 896 - }, - { - "epoch": 0.1529371020593015, - "grad_norm": 0.8568253517150879, - "learning_rate": 8.470588235294118e-06, - "loss": 0.1815, - "step": 897 - }, - { - "epoch": 0.15310760050083916, - "grad_norm": 0.5176457166671753, - "learning_rate": 8.468883205456096e-06, - "loss": 0.1411, - "step": 898 - }, - { - "epoch": 0.15327809894237684, - "grad_norm": 1.1265554428100586, - "learning_rate": 8.467178175618074e-06, - "loss": 0.1883, - "step": 899 - }, - { - "epoch": 0.15344859738391453, - "grad_norm": 1.7193188667297363, - "learning_rate": 8.465473145780052e-06, - "loss": 0.2669, - "step": 900 - }, - { - "epoch": 0.15344859738391453, - "eval_f1_score": 0.2702702702702703, - "eval_loss": 0.17168840765953064, - "eval_runtime": 182.5767, - "eval_samples_per_second": 54.772, - "eval_steps_per_second": 3.423, - "step": 900 - }, - { - "epoch": 0.15361909582545222, - "grad_norm": 0.8680484890937805, - "learning_rate": 8.46376811594203e-06, - "loss": 0.1288, - "step": 901 - }, - { - "epoch": 0.1537895942669899, - "grad_norm": 1.037379264831543, - "learning_rate": 8.462063086104008e-06, - "loss": 0.0937, - "step": 902 - }, - { - "epoch": 0.1539600927085276, - "grad_norm": 0.8336663246154785, - "learning_rate": 8.460358056265984e-06, - "loss": 0.2303, - "step": 903 - }, - { - "epoch": 0.15413059115006528, - "grad_norm": 1.141602635383606, - "learning_rate": 8.458653026427964e-06, - "loss": 0.2589, - "step": 904 - }, - { - "epoch": 0.15430108959160296, - "grad_norm": 1.1085407733917236, - "learning_rate": 8.456947996589942e-06, - "loss": 0.2392, - "step": 905 - }, - { - "epoch": 0.15447158803314062, - "grad_norm": 0.7251394987106323, - "learning_rate": 8.455242966751918e-06, - "loss": 0.1609, - "step": 906 - }, - { - "epoch": 0.1546420864746783, - "grad_norm": 0.8698781728744507, - "learning_rate": 8.453537936913898e-06, - "loss": 0.1607, - "step": 907 - }, - { - "epoch": 0.154812584916216, - "grad_norm": 0.7384206056594849, - "learning_rate": 8.451832907075874e-06, - "loss": 0.1801, - "step": 908 - }, - { - "epoch": 0.15498308335775368, - "grad_norm": 0.9124801754951477, - "learning_rate": 8.450127877237852e-06, - "loss": 0.1379, - "step": 909 - }, - { - "epoch": 0.15515358179929137, - "grad_norm": 0.6821106672286987, - "learning_rate": 8.44842284739983e-06, - "loss": 0.1811, - "step": 910 - }, - { - "epoch": 0.15532408024082905, - "grad_norm": 0.6709396839141846, - "learning_rate": 8.446717817561808e-06, - "loss": 0.1118, - "step": 911 - }, - { - "epoch": 0.15549457868236674, - "grad_norm": 1.2577794790267944, - "learning_rate": 8.445012787723786e-06, - "loss": 0.218, - "step": 912 - }, - { - "epoch": 0.15566507712390443, - "grad_norm": 0.7951264977455139, - "learning_rate": 8.443307757885764e-06, - "loss": 0.1542, - "step": 913 - }, - { - "epoch": 0.15583557556544209, - "grad_norm": 1.3476436138153076, - "learning_rate": 8.441602728047742e-06, - "loss": 0.2476, - "step": 914 - }, - { - "epoch": 0.15600607400697977, - "grad_norm": 1.033733606338501, - "learning_rate": 8.43989769820972e-06, - "loss": 0.1993, - "step": 915 - }, - { - "epoch": 0.15617657244851746, - "grad_norm": 1.7906930446624756, - "learning_rate": 8.438192668371698e-06, - "loss": 0.2485, - "step": 916 - }, - { - "epoch": 0.15634707089005515, - "grad_norm": 0.7418613433837891, - "learning_rate": 8.436487638533676e-06, - "loss": 0.1618, - "step": 917 - }, - { - "epoch": 0.15651756933159283, - "grad_norm": 1.316089153289795, - "learning_rate": 8.434782608695653e-06, - "loss": 0.23, - "step": 918 - }, - { - "epoch": 0.15668806777313052, - "grad_norm": 1.0219331979751587, - "learning_rate": 8.43307757885763e-06, - "loss": 0.2063, - "step": 919 - }, - { - "epoch": 0.1568585662146682, - "grad_norm": 1.3739581108093262, - "learning_rate": 8.43137254901961e-06, - "loss": 0.1689, - "step": 920 - }, - { - "epoch": 0.15702906465620586, - "grad_norm": 1.3870887756347656, - "learning_rate": 8.429667519181587e-06, - "loss": 0.2504, - "step": 921 - }, - { - "epoch": 0.15719956309774355, - "grad_norm": 1.140902042388916, - "learning_rate": 8.427962489343564e-06, - "loss": 0.2444, - "step": 922 - }, - { - "epoch": 0.15737006153928124, - "grad_norm": 1.5909425020217896, - "learning_rate": 8.426257459505543e-06, - "loss": 0.221, - "step": 923 - }, - { - "epoch": 0.15754055998081892, - "grad_norm": 1.0957505702972412, - "learning_rate": 8.42455242966752e-06, - "loss": 0.1642, - "step": 924 - }, - { - "epoch": 0.1577110584223566, - "grad_norm": 0.7079620361328125, - "learning_rate": 8.422847399829497e-06, - "loss": 0.1408, - "step": 925 - }, - { - "epoch": 0.1578815568638943, - "grad_norm": 0.9637887477874756, - "learning_rate": 8.421142369991475e-06, - "loss": 0.2089, - "step": 926 - }, - { - "epoch": 0.15805205530543198, - "grad_norm": 1.4444074630737305, - "learning_rate": 8.419437340153453e-06, - "loss": 0.104, - "step": 927 - }, - { - "epoch": 0.15822255374696967, - "grad_norm": 1.3934565782546997, - "learning_rate": 8.417732310315431e-06, - "loss": 0.2762, - "step": 928 - }, - { - "epoch": 0.15839305218850733, - "grad_norm": 0.7416796088218689, - "learning_rate": 8.41602728047741e-06, - "loss": 0.1508, - "step": 929 - }, - { - "epoch": 0.158563550630045, - "grad_norm": 0.5937446355819702, - "learning_rate": 8.414322250639387e-06, - "loss": 0.0793, - "step": 930 - }, - { - "epoch": 0.1587340490715827, - "grad_norm": 0.728579580783844, - "learning_rate": 8.412617220801365e-06, - "loss": 0.1153, - "step": 931 - }, - { - "epoch": 0.1589045475131204, - "grad_norm": 0.7820244431495667, - "learning_rate": 8.410912190963343e-06, - "loss": 0.066, - "step": 932 - }, - { - "epoch": 0.15907504595465807, - "grad_norm": 1.6547861099243164, - "learning_rate": 8.40920716112532e-06, - "loss": 0.2295, - "step": 933 - }, - { - "epoch": 0.15924554439619576, - "grad_norm": 1.0257244110107422, - "learning_rate": 8.407502131287299e-06, - "loss": 0.1739, - "step": 934 - }, - { - "epoch": 0.15941604283773345, - "grad_norm": 1.2456021308898926, - "learning_rate": 8.405797101449275e-06, - "loss": 0.2205, - "step": 935 - }, - { - "epoch": 0.15958654127927113, - "grad_norm": 0.9185108542442322, - "learning_rate": 8.404092071611253e-06, - "loss": 0.1653, - "step": 936 - }, - { - "epoch": 0.1597570397208088, - "grad_norm": 1.904354453086853, - "learning_rate": 8.402387041773231e-06, - "loss": 0.1273, - "step": 937 - }, - { - "epoch": 0.15992753816234648, - "grad_norm": 0.9777550101280212, - "learning_rate": 8.400682011935209e-06, - "loss": 0.254, - "step": 938 - }, - { - "epoch": 0.16009803660388416, - "grad_norm": 1.0197429656982422, - "learning_rate": 8.398976982097187e-06, - "loss": 0.1463, - "step": 939 - }, - { - "epoch": 0.16026853504542185, - "grad_norm": 0.6910821795463562, - "learning_rate": 8.397271952259165e-06, - "loss": 0.1425, - "step": 940 - }, - { - "epoch": 0.16043903348695954, - "grad_norm": 1.0888043642044067, - "learning_rate": 8.395566922421143e-06, - "loss": 0.1992, - "step": 941 - }, - { - "epoch": 0.16060953192849722, - "grad_norm": 0.6020128130912781, - "learning_rate": 8.39386189258312e-06, - "loss": 0.167, - "step": 942 - }, - { - "epoch": 0.1607800303700349, - "grad_norm": 0.7391735315322876, - "learning_rate": 8.392156862745099e-06, - "loss": 0.1455, - "step": 943 - }, - { - "epoch": 0.16095052881157257, - "grad_norm": 0.7194268703460693, - "learning_rate": 8.390451832907077e-06, - "loss": 0.1916, - "step": 944 - }, - { - "epoch": 0.16112102725311025, - "grad_norm": 1.303993582725525, - "learning_rate": 8.388746803069055e-06, - "loss": 0.0889, - "step": 945 - }, - { - "epoch": 0.16129152569464794, - "grad_norm": 0.8701795339584351, - "learning_rate": 8.387041773231033e-06, - "loss": 0.1036, - "step": 946 - }, - { - "epoch": 0.16146202413618563, - "grad_norm": 0.8625507354736328, - "learning_rate": 8.38533674339301e-06, - "loss": 0.1888, - "step": 947 - }, - { - "epoch": 0.16163252257772331, - "grad_norm": 0.8963658809661865, - "learning_rate": 8.383631713554989e-06, - "loss": 0.0969, - "step": 948 - }, - { - "epoch": 0.161803021019261, - "grad_norm": 0.7759039998054504, - "learning_rate": 8.381926683716965e-06, - "loss": 0.098, - "step": 949 - }, - { - "epoch": 0.1619735194607987, - "grad_norm": 0.7342348694801331, - "learning_rate": 8.380221653878944e-06, - "loss": 0.1258, - "step": 950 - }, - { - "epoch": 0.16214401790233637, - "grad_norm": 0.6597558856010437, - "learning_rate": 8.37851662404092e-06, - "loss": 0.1128, - "step": 951 - }, - { - "epoch": 0.16231451634387403, - "grad_norm": 1.2019399404525757, - "learning_rate": 8.376811594202899e-06, - "loss": 0.2601, - "step": 952 - }, - { - "epoch": 0.16248501478541172, - "grad_norm": 0.915368914604187, - "learning_rate": 8.375106564364877e-06, - "loss": 0.121, - "step": 953 - }, - { - "epoch": 0.1626555132269494, - "grad_norm": 0.9407950043678284, - "learning_rate": 8.373401534526855e-06, - "loss": 0.143, - "step": 954 - }, - { - "epoch": 0.1628260116684871, - "grad_norm": 0.8434804081916809, - "learning_rate": 8.371696504688832e-06, - "loss": 0.1648, - "step": 955 - }, - { - "epoch": 0.16299651011002478, - "grad_norm": 1.3952746391296387, - "learning_rate": 8.36999147485081e-06, - "loss": 0.1728, - "step": 956 - }, - { - "epoch": 0.16316700855156246, - "grad_norm": 0.6356755495071411, - "learning_rate": 8.368286445012788e-06, - "loss": 0.1485, - "step": 957 - }, - { - "epoch": 0.16333750699310015, - "grad_norm": 0.6824244260787964, - "learning_rate": 8.366581415174766e-06, - "loss": 0.1239, - "step": 958 - }, - { - "epoch": 0.16350800543463784, - "grad_norm": 0.8188114166259766, - "learning_rate": 8.364876385336744e-06, - "loss": 0.1449, - "step": 959 - }, - { - "epoch": 0.1636785038761755, - "grad_norm": 0.781184196472168, - "learning_rate": 8.363171355498722e-06, - "loss": 0.1508, - "step": 960 - }, - { - "epoch": 0.16384900231771318, - "grad_norm": 0.7655654549598694, - "learning_rate": 8.3614663256607e-06, - "loss": 0.0807, - "step": 961 - }, - { - "epoch": 0.16401950075925087, - "grad_norm": 1.431767463684082, - "learning_rate": 8.359761295822676e-06, - "loss": 0.187, - "step": 962 - }, - { - "epoch": 0.16418999920078856, - "grad_norm": 1.4487807750701904, - "learning_rate": 8.358056265984656e-06, - "loss": 0.2303, - "step": 963 - }, - { - "epoch": 0.16436049764232624, - "grad_norm": 0.8229055404663086, - "learning_rate": 8.356351236146634e-06, - "loss": 0.1904, - "step": 964 - }, - { - "epoch": 0.16453099608386393, - "grad_norm": 1.2401492595672607, - "learning_rate": 8.35464620630861e-06, - "loss": 0.0672, - "step": 965 - }, - { - "epoch": 0.16470149452540161, - "grad_norm": 0.9173046350479126, - "learning_rate": 8.35294117647059e-06, - "loss": 0.1383, - "step": 966 - }, - { - "epoch": 0.16487199296693927, - "grad_norm": 0.7897533774375916, - "learning_rate": 8.351236146632566e-06, - "loss": 0.183, - "step": 967 - }, - { - "epoch": 0.16504249140847696, - "grad_norm": 0.8701984882354736, - "learning_rate": 8.349531116794544e-06, - "loss": 0.156, - "step": 968 - }, - { - "epoch": 0.16521298985001465, - "grad_norm": 0.7386003732681274, - "learning_rate": 8.347826086956522e-06, - "loss": 0.1993, - "step": 969 - }, - { - "epoch": 0.16538348829155233, - "grad_norm": 0.9146957993507385, - "learning_rate": 8.3461210571185e-06, - "loss": 0.1883, - "step": 970 - }, - { - "epoch": 0.16555398673309002, - "grad_norm": 1.4515008926391602, - "learning_rate": 8.344416027280478e-06, - "loss": 0.2608, - "step": 971 - }, - { - "epoch": 0.1657244851746277, - "grad_norm": 0.6535465121269226, - "learning_rate": 8.342710997442456e-06, - "loss": 0.1371, - "step": 972 - }, - { - "epoch": 0.1658949836161654, - "grad_norm": 0.9747045040130615, - "learning_rate": 8.341005967604434e-06, - "loss": 0.2147, - "step": 973 - }, - { - "epoch": 0.16606548205770308, - "grad_norm": 0.5989007353782654, - "learning_rate": 8.339300937766412e-06, - "loss": 0.1839, - "step": 974 - }, - { - "epoch": 0.16623598049924074, - "grad_norm": 0.6900016069412231, - "learning_rate": 8.33759590792839e-06, - "loss": 0.1518, - "step": 975 - }, - { - "epoch": 0.16640647894077842, - "grad_norm": 0.7895710468292236, - "learning_rate": 8.335890878090368e-06, - "loss": 0.1331, - "step": 976 - }, - { - "epoch": 0.1665769773823161, - "grad_norm": 0.867023229598999, - "learning_rate": 8.334185848252346e-06, - "loss": 0.2256, - "step": 977 - }, - { - "epoch": 0.1667474758238538, - "grad_norm": 0.797188401222229, - "learning_rate": 8.332480818414322e-06, - "loss": 0.0746, - "step": 978 - }, - { - "epoch": 0.16691797426539148, - "grad_norm": 0.8969321846961975, - "learning_rate": 8.330775788576302e-06, - "loss": 0.1294, - "step": 979 - }, - { - "epoch": 0.16708847270692917, - "grad_norm": 0.5373475551605225, - "learning_rate": 8.32907075873828e-06, - "loss": 0.0832, - "step": 980 - }, - { - "epoch": 0.16725897114846686, - "grad_norm": 0.7001715302467346, - "learning_rate": 8.327365728900256e-06, - "loss": 0.0546, - "step": 981 - }, - { - "epoch": 0.16742946959000454, - "grad_norm": 0.8702039122581482, - "learning_rate": 8.325660699062235e-06, - "loss": 0.1504, - "step": 982 - }, - { - "epoch": 0.1675999680315422, - "grad_norm": 0.7910013794898987, - "learning_rate": 8.323955669224212e-06, - "loss": 0.1656, - "step": 983 - }, - { - "epoch": 0.1677704664730799, - "grad_norm": 0.6668055653572083, - "learning_rate": 8.32225063938619e-06, - "loss": 0.1416, - "step": 984 - }, - { - "epoch": 0.16794096491461757, - "grad_norm": 0.9219913482666016, - "learning_rate": 8.320545609548167e-06, - "loss": 0.1858, - "step": 985 - }, - { - "epoch": 0.16811146335615526, - "grad_norm": 1.0230827331542969, - "learning_rate": 8.318840579710145e-06, - "loss": 0.1463, - "step": 986 - }, - { - "epoch": 0.16828196179769295, - "grad_norm": 0.9198185801506042, - "learning_rate": 8.317135549872123e-06, - "loss": 0.1762, - "step": 987 - }, - { - "epoch": 0.16845246023923063, - "grad_norm": 0.6115066409111023, - "learning_rate": 8.315430520034101e-06, - "loss": 0.1259, - "step": 988 - }, - { - "epoch": 0.16862295868076832, - "grad_norm": 0.7364334464073181, - "learning_rate": 8.31372549019608e-06, - "loss": 0.2297, - "step": 989 - }, - { - "epoch": 0.16879345712230598, - "grad_norm": 0.6792196035385132, - "learning_rate": 8.312020460358057e-06, - "loss": 0.1877, - "step": 990 - }, - { - "epoch": 0.16896395556384367, - "grad_norm": 0.6307271718978882, - "learning_rate": 8.310315430520035e-06, - "loss": 0.112, - "step": 991 - }, - { - "epoch": 0.16913445400538135, - "grad_norm": 0.7789974212646484, - "learning_rate": 8.308610400682013e-06, - "loss": 0.1531, - "step": 992 - }, - { - "epoch": 0.16930495244691904, - "grad_norm": 0.7418656945228577, - "learning_rate": 8.306905370843991e-06, - "loss": 0.0761, - "step": 993 - }, - { - "epoch": 0.16947545088845672, - "grad_norm": 0.6087162494659424, - "learning_rate": 8.305200341005967e-06, - "loss": 0.1508, - "step": 994 - }, - { - "epoch": 0.1696459493299944, - "grad_norm": 0.7893383502960205, - "learning_rate": 8.303495311167947e-06, - "loss": 0.159, - "step": 995 - }, - { - "epoch": 0.1698164477715321, - "grad_norm": 0.6431482434272766, - "learning_rate": 8.301790281329925e-06, - "loss": 0.1404, - "step": 996 - }, - { - "epoch": 0.16998694621306978, - "grad_norm": 0.6812745928764343, - "learning_rate": 8.300085251491901e-06, - "loss": 0.1582, - "step": 997 - }, - { - "epoch": 0.17015744465460744, - "grad_norm": 0.7111532092094421, - "learning_rate": 8.29838022165388e-06, - "loss": 0.1712, - "step": 998 - }, - { - "epoch": 0.17032794309614513, - "grad_norm": 0.8176578283309937, - "learning_rate": 8.296675191815857e-06, - "loss": 0.169, - "step": 999 - }, - { - "epoch": 0.17049844153768282, - "grad_norm": 0.8261346220970154, - "learning_rate": 8.294970161977835e-06, - "loss": 0.1914, - "step": 1000 - }, - { - "epoch": 0.17049844153768282, - "eval_f1_score": 0.18012422360248448, - "eval_loss": 0.16470393538475037, - "eval_runtime": 182.6084, - "eval_samples_per_second": 54.762, - "eval_steps_per_second": 3.423, - "step": 1000 - }, - { - "epoch": 0.1706689399792205, - "grad_norm": 0.8544616103172302, - "learning_rate": 8.293265132139813e-06, - "loss": 0.1978, - "step": 1001 - }, - { - "epoch": 0.1708394384207582, - "grad_norm": 0.7983449101448059, - "learning_rate": 8.291560102301791e-06, - "loss": 0.1464, - "step": 1002 - }, - { - "epoch": 0.17100993686229587, - "grad_norm": 0.998521625995636, - "learning_rate": 8.289855072463769e-06, - "loss": 0.1008, - "step": 1003 - }, - { - "epoch": 0.17118043530383356, - "grad_norm": 0.9781310558319092, - "learning_rate": 8.288150042625747e-06, - "loss": 0.2678, - "step": 1004 - }, - { - "epoch": 0.17135093374537122, - "grad_norm": 0.8729251623153687, - "learning_rate": 8.286445012787725e-06, - "loss": 0.144, - "step": 1005 - }, - { - "epoch": 0.1715214321869089, - "grad_norm": 0.8118807673454285, - "learning_rate": 8.284739982949703e-06, - "loss": 0.1468, - "step": 1006 - }, - { - "epoch": 0.1716919306284466, - "grad_norm": 0.9156134128570557, - "learning_rate": 8.28303495311168e-06, - "loss": 0.2034, - "step": 1007 - }, - { - "epoch": 0.17186242906998428, - "grad_norm": 0.6133968234062195, - "learning_rate": 8.281329923273657e-06, - "loss": 0.1561, - "step": 1008 - }, - { - "epoch": 0.17203292751152197, - "grad_norm": 0.9954972863197327, - "learning_rate": 8.279624893435637e-06, - "loss": 0.1995, - "step": 1009 - }, - { - "epoch": 0.17220342595305965, - "grad_norm": 0.7595720887184143, - "learning_rate": 8.277919863597613e-06, - "loss": 0.1625, - "step": 1010 - }, - { - "epoch": 0.17237392439459734, - "grad_norm": 0.7384895086288452, - "learning_rate": 8.27621483375959e-06, - "loss": 0.1656, - "step": 1011 - }, - { - "epoch": 0.17254442283613503, - "grad_norm": 1.3423556089401245, - "learning_rate": 8.274509803921569e-06, - "loss": 0.1169, - "step": 1012 - }, - { - "epoch": 0.17271492127767268, - "grad_norm": 0.6995768547058105, - "learning_rate": 8.272804774083547e-06, - "loss": 0.1639, - "step": 1013 - }, - { - "epoch": 0.17288541971921037, - "grad_norm": 1.036745548248291, - "learning_rate": 8.271099744245525e-06, - "loss": 0.2056, - "step": 1014 - }, - { - "epoch": 0.17305591816074806, - "grad_norm": 0.8006750345230103, - "learning_rate": 8.269394714407503e-06, - "loss": 0.1981, - "step": 1015 - }, - { - "epoch": 0.17322641660228574, - "grad_norm": 1.0786309242248535, - "learning_rate": 8.26768968456948e-06, - "loss": 0.2123, - "step": 1016 - }, - { - "epoch": 0.17339691504382343, - "grad_norm": 0.6563388705253601, - "learning_rate": 8.265984654731458e-06, - "loss": 0.121, - "step": 1017 - }, - { - "epoch": 0.17356741348536112, - "grad_norm": 0.6930649876594543, - "learning_rate": 8.264279624893436e-06, - "loss": 0.1634, - "step": 1018 - }, - { - "epoch": 0.1737379119268988, - "grad_norm": 0.7806385159492493, - "learning_rate": 8.262574595055414e-06, - "loss": 0.0994, - "step": 1019 - }, - { - "epoch": 0.1739084103684365, - "grad_norm": 0.6290420889854431, - "learning_rate": 8.260869565217392e-06, - "loss": 0.1625, - "step": 1020 - }, - { - "epoch": 0.17407890880997415, - "grad_norm": 0.5905277132987976, - "learning_rate": 8.25916453537937e-06, - "loss": 0.1627, - "step": 1021 - }, - { - "epoch": 0.17424940725151183, - "grad_norm": 0.7448222637176514, - "learning_rate": 8.257459505541348e-06, - "loss": 0.1767, - "step": 1022 - }, - { - "epoch": 0.17441990569304952, - "grad_norm": 0.7371039390563965, - "learning_rate": 8.255754475703326e-06, - "loss": 0.0966, - "step": 1023 - }, - { - "epoch": 0.1745904041345872, - "grad_norm": 0.8105472326278687, - "learning_rate": 8.254049445865302e-06, - "loss": 0.1085, - "step": 1024 - }, - { - "epoch": 0.1747609025761249, - "grad_norm": 0.6953812837600708, - "learning_rate": 8.252344416027282e-06, - "loss": 0.1188, - "step": 1025 - }, - { - "epoch": 0.17493140101766258, - "grad_norm": 0.595607340335846, - "learning_rate": 8.250639386189258e-06, - "loss": 0.1055, - "step": 1026 - }, - { - "epoch": 0.17510189945920027, - "grad_norm": 1.1801515817642212, - "learning_rate": 8.248934356351236e-06, - "loss": 0.2072, - "step": 1027 - }, - { - "epoch": 0.17527239790073793, - "grad_norm": 0.7170021533966064, - "learning_rate": 8.247229326513214e-06, - "loss": 0.1377, - "step": 1028 - }, - { - "epoch": 0.1754428963422756, - "grad_norm": 0.524588406085968, - "learning_rate": 8.245524296675192e-06, - "loss": 0.0811, - "step": 1029 - }, - { - "epoch": 0.1756133947838133, - "grad_norm": 1.3629536628723145, - "learning_rate": 8.24381926683717e-06, - "loss": 0.1628, - "step": 1030 - }, - { - "epoch": 0.17578389322535098, - "grad_norm": 1.0692698955535889, - "learning_rate": 8.242114236999148e-06, - "loss": 0.1084, - "step": 1031 - }, - { - "epoch": 0.17595439166688867, - "grad_norm": 0.9809920787811279, - "learning_rate": 8.240409207161126e-06, - "loss": 0.168, - "step": 1032 - }, - { - "epoch": 0.17612489010842636, - "grad_norm": 0.5582424998283386, - "learning_rate": 8.238704177323104e-06, - "loss": 0.1203, - "step": 1033 - }, - { - "epoch": 0.17629538854996404, - "grad_norm": 0.6211906671524048, - "learning_rate": 8.236999147485082e-06, - "loss": 0.1139, - "step": 1034 - }, - { - "epoch": 0.17646588699150173, - "grad_norm": 0.8978925347328186, - "learning_rate": 8.23529411764706e-06, - "loss": 0.0401, - "step": 1035 - }, - { - "epoch": 0.1766363854330394, - "grad_norm": 0.7331124544143677, - "learning_rate": 8.233589087809038e-06, - "loss": 0.149, - "step": 1036 - }, - { - "epoch": 0.17680688387457708, - "grad_norm": 0.9506707787513733, - "learning_rate": 8.231884057971014e-06, - "loss": 0.1644, - "step": 1037 - }, - { - "epoch": 0.17697738231611476, - "grad_norm": 0.7511488199234009, - "learning_rate": 8.230179028132994e-06, - "loss": 0.1275, - "step": 1038 - }, - { - "epoch": 0.17714788075765245, - "grad_norm": 0.7483866214752197, - "learning_rate": 8.228473998294972e-06, - "loss": 0.1603, - "step": 1039 - }, - { - "epoch": 0.17731837919919013, - "grad_norm": 0.7479255795478821, - "learning_rate": 8.226768968456948e-06, - "loss": 0.1182, - "step": 1040 - }, - { - "epoch": 0.17748887764072782, - "grad_norm": 0.6701338291168213, - "learning_rate": 8.225063938618927e-06, - "loss": 0.114, - "step": 1041 - }, - { - "epoch": 0.1776593760822655, - "grad_norm": 1.5791823863983154, - "learning_rate": 8.223358908780904e-06, - "loss": 0.2531, - "step": 1042 - }, - { - "epoch": 0.1778298745238032, - "grad_norm": 0.8722712397575378, - "learning_rate": 8.221653878942882e-06, - "loss": 0.163, - "step": 1043 - }, - { - "epoch": 0.17800037296534085, - "grad_norm": 0.8049382567405701, - "learning_rate": 8.21994884910486e-06, - "loss": 0.1425, - "step": 1044 - }, - { - "epoch": 0.17817087140687854, - "grad_norm": 0.8700260519981384, - "learning_rate": 8.218243819266838e-06, - "loss": 0.1701, - "step": 1045 - }, - { - "epoch": 0.17834136984841623, - "grad_norm": 0.7408254146575928, - "learning_rate": 8.216538789428816e-06, - "loss": 0.1226, - "step": 1046 - }, - { - "epoch": 0.1785118682899539, - "grad_norm": 1.1590758562088013, - "learning_rate": 8.214833759590793e-06, - "loss": 0.2347, - "step": 1047 - }, - { - "epoch": 0.1786823667314916, - "grad_norm": 0.891322135925293, - "learning_rate": 8.213128729752771e-06, - "loss": 0.1725, - "step": 1048 - }, - { - "epoch": 0.17885286517302929, - "grad_norm": 0.9721522927284241, - "learning_rate": 8.21142369991475e-06, - "loss": 0.1578, - "step": 1049 - }, - { - "epoch": 0.17902336361456697, - "grad_norm": 1.037187933921814, - "learning_rate": 8.209718670076727e-06, - "loss": 0.1349, - "step": 1050 - }, - { - "epoch": 0.17919386205610463, - "grad_norm": 0.9142031073570251, - "learning_rate": 8.208013640238705e-06, - "loss": 0.1507, - "step": 1051 - }, - { - "epoch": 0.17936436049764232, - "grad_norm": 0.7952942848205566, - "learning_rate": 8.206308610400683e-06, - "loss": 0.0867, - "step": 1052 - }, - { - "epoch": 0.17953485893918, - "grad_norm": 1.0088366270065308, - "learning_rate": 8.20460358056266e-06, - "loss": 0.235, - "step": 1053 - }, - { - "epoch": 0.1797053573807177, - "grad_norm": 1.3762648105621338, - "learning_rate": 8.202898550724639e-06, - "loss": 0.1695, - "step": 1054 - }, - { - "epoch": 0.17987585582225538, - "grad_norm": 0.5625283122062683, - "learning_rate": 8.201193520886617e-06, - "loss": 0.0914, - "step": 1055 - }, - { - "epoch": 0.18004635426379306, - "grad_norm": 1.4387606382369995, - "learning_rate": 8.199488491048593e-06, - "loss": 0.162, - "step": 1056 - }, - { - "epoch": 0.18021685270533075, - "grad_norm": 0.8674620389938354, - "learning_rate": 8.197783461210573e-06, - "loss": 0.1298, - "step": 1057 - }, - { - "epoch": 0.18038735114686844, - "grad_norm": 0.7806848287582397, - "learning_rate": 8.19607843137255e-06, - "loss": 0.1555, - "step": 1058 - }, - { - "epoch": 0.1805578495884061, - "grad_norm": 0.9684147834777832, - "learning_rate": 8.194373401534527e-06, - "loss": 0.1036, - "step": 1059 - }, - { - "epoch": 0.18072834802994378, - "grad_norm": 0.6939026117324829, - "learning_rate": 8.192668371696505e-06, - "loss": 0.1149, - "step": 1060 - }, - { - "epoch": 0.18089884647148147, - "grad_norm": 1.6228690147399902, - "learning_rate": 8.190963341858483e-06, - "loss": 0.2124, - "step": 1061 - }, - { - "epoch": 0.18106934491301915, - "grad_norm": 0.9765108227729797, - "learning_rate": 8.189258312020461e-06, - "loss": 0.1768, - "step": 1062 - }, - { - "epoch": 0.18123984335455684, - "grad_norm": 0.7821102142333984, - "learning_rate": 8.187553282182439e-06, - "loss": 0.1132, - "step": 1063 - }, - { - "epoch": 0.18141034179609453, - "grad_norm": 1.0715186595916748, - "learning_rate": 8.185848252344417e-06, - "loss": 0.1132, - "step": 1064 - }, - { - "epoch": 0.1815808402376322, - "grad_norm": 1.13173508644104, - "learning_rate": 8.184143222506395e-06, - "loss": 0.0617, - "step": 1065 - }, - { - "epoch": 0.1817513386791699, - "grad_norm": 0.7820847630500793, - "learning_rate": 8.182438192668373e-06, - "loss": 0.1588, - "step": 1066 - }, - { - "epoch": 0.18192183712070756, - "grad_norm": 0.8238593935966492, - "learning_rate": 8.18073316283035e-06, - "loss": 0.1767, - "step": 1067 - }, - { - "epoch": 0.18209233556224524, - "grad_norm": 1.0191346406936646, - "learning_rate": 8.179028132992329e-06, - "loss": 0.2245, - "step": 1068 - }, - { - "epoch": 0.18226283400378293, - "grad_norm": 1.069532871246338, - "learning_rate": 8.177323103154305e-06, - "loss": 0.0601, - "step": 1069 - }, - { - "epoch": 0.18243333244532062, - "grad_norm": 1.0120583772659302, - "learning_rate": 8.175618073316285e-06, - "loss": 0.1654, - "step": 1070 - }, - { - "epoch": 0.1826038308868583, - "grad_norm": 2.169363021850586, - "learning_rate": 8.173913043478263e-06, - "loss": 0.2481, - "step": 1071 - }, - { - "epoch": 0.182774329328396, - "grad_norm": 1.7898081541061401, - "learning_rate": 8.172208013640239e-06, - "loss": 0.2461, - "step": 1072 - }, - { - "epoch": 0.18294482776993368, - "grad_norm": 0.9205228090286255, - "learning_rate": 8.170502983802218e-06, - "loss": 0.1533, - "step": 1073 - }, - { - "epoch": 0.18311532621147134, - "grad_norm": 0.8017996549606323, - "learning_rate": 8.168797953964195e-06, - "loss": 0.1057, - "step": 1074 - }, - { - "epoch": 0.18328582465300902, - "grad_norm": 0.9081313610076904, - "learning_rate": 8.167092924126173e-06, - "loss": 0.1907, - "step": 1075 - }, - { - "epoch": 0.1834563230945467, - "grad_norm": 1.5040651559829712, - "learning_rate": 8.16538789428815e-06, - "loss": 0.2621, - "step": 1076 - }, - { - "epoch": 0.1836268215360844, - "grad_norm": 0.8921488523483276, - "learning_rate": 8.163682864450129e-06, - "loss": 0.1452, - "step": 1077 - }, - { - "epoch": 0.18379731997762208, - "grad_norm": 1.1706310510635376, - "learning_rate": 8.161977834612106e-06, - "loss": 0.0867, - "step": 1078 - }, - { - "epoch": 0.18396781841915977, - "grad_norm": 1.15962553024292, - "learning_rate": 8.160272804774084e-06, - "loss": 0.2057, - "step": 1079 - }, - { - "epoch": 0.18413831686069745, - "grad_norm": 1.0124270915985107, - "learning_rate": 8.158567774936062e-06, - "loss": 0.1282, - "step": 1080 - }, - { - "epoch": 0.18430881530223514, - "grad_norm": 0.5859152674674988, - "learning_rate": 8.15686274509804e-06, - "loss": 0.1282, - "step": 1081 - }, - { - "epoch": 0.1844793137437728, - "grad_norm": 0.691265344619751, - "learning_rate": 8.155157715260018e-06, - "loss": 0.1481, - "step": 1082 - }, - { - "epoch": 0.18464981218531049, - "grad_norm": 0.9174695014953613, - "learning_rate": 8.153452685421995e-06, - "loss": 0.1903, - "step": 1083 - }, - { - "epoch": 0.18482031062684817, - "grad_norm": 1.0907355546951294, - "learning_rate": 8.151747655583974e-06, - "loss": 0.1909, - "step": 1084 - }, - { - "epoch": 0.18499080906838586, - "grad_norm": 0.8020131587982178, - "learning_rate": 8.15004262574595e-06, - "loss": 0.1207, - "step": 1085 - }, - { - "epoch": 0.18516130750992355, - "grad_norm": 0.6175723075866699, - "learning_rate": 8.148337595907928e-06, - "loss": 0.1203, - "step": 1086 - }, - { - "epoch": 0.18533180595146123, - "grad_norm": 1.7859039306640625, - "learning_rate": 8.146632566069906e-06, - "loss": 0.2715, - "step": 1087 - }, - { - "epoch": 0.18550230439299892, - "grad_norm": 0.9180811643600464, - "learning_rate": 8.144927536231884e-06, - "loss": 0.1733, - "step": 1088 - }, - { - "epoch": 0.1856728028345366, - "grad_norm": 0.9217977523803711, - "learning_rate": 8.143222506393862e-06, - "loss": 0.151, - "step": 1089 - }, - { - "epoch": 0.18584330127607426, - "grad_norm": 0.5744757652282715, - "learning_rate": 8.14151747655584e-06, - "loss": 0.1393, - "step": 1090 - }, - { - "epoch": 0.18601379971761195, - "grad_norm": 1.082798957824707, - "learning_rate": 8.139812446717818e-06, - "loss": 0.0939, - "step": 1091 - }, - { - "epoch": 0.18618429815914964, - "grad_norm": 0.8758176565170288, - "learning_rate": 8.138107416879796e-06, - "loss": 0.1736, - "step": 1092 - }, - { - "epoch": 0.18635479660068732, - "grad_norm": 0.8503788113594055, - "learning_rate": 8.136402387041774e-06, - "loss": 0.1174, - "step": 1093 - }, - { - "epoch": 0.186525295042225, - "grad_norm": 0.8520966172218323, - "learning_rate": 8.134697357203752e-06, - "loss": 0.1138, - "step": 1094 - }, - { - "epoch": 0.1866957934837627, - "grad_norm": 0.6943926215171814, - "learning_rate": 8.13299232736573e-06, - "loss": 0.1266, - "step": 1095 - }, - { - "epoch": 0.18686629192530038, - "grad_norm": 1.430769920349121, - "learning_rate": 8.131287297527706e-06, - "loss": 0.2663, - "step": 1096 - }, - { - "epoch": 0.18703679036683804, - "grad_norm": 0.6373170018196106, - "learning_rate": 8.129582267689686e-06, - "loss": 0.1744, - "step": 1097 - }, - { - "epoch": 0.18720728880837573, - "grad_norm": 1.0063538551330566, - "learning_rate": 8.127877237851664e-06, - "loss": 0.185, - "step": 1098 - }, - { - "epoch": 0.1873777872499134, - "grad_norm": 0.666248083114624, - "learning_rate": 8.12617220801364e-06, - "loss": 0.1649, - "step": 1099 - }, - { - "epoch": 0.1875482856914511, - "grad_norm": 0.635853111743927, - "learning_rate": 8.12446717817562e-06, - "loss": 0.1536, - "step": 1100 - }, - { - "epoch": 0.1875482856914511, - "eval_f1_score": 0.22028985507246376, - "eval_loss": 0.1652476042509079, - "eval_runtime": 182.5656, - "eval_samples_per_second": 54.775, - "eval_steps_per_second": 3.423, - "step": 1100 - }, - { - "epoch": 0.1877187841329888, - "grad_norm": 0.6525788307189941, - "learning_rate": 8.122762148337596e-06, - "loss": 0.1528, - "step": 1101 - }, - { - "epoch": 0.18788928257452647, - "grad_norm": 0.8407154083251953, - "learning_rate": 8.121057118499574e-06, - "loss": 0.107, - "step": 1102 - }, - { - "epoch": 0.18805978101606416, - "grad_norm": 1.0600398778915405, - "learning_rate": 8.119352088661552e-06, - "loss": 0.1299, - "step": 1103 - }, - { - "epoch": 0.18823027945760185, - "grad_norm": 0.7606744766235352, - "learning_rate": 8.11764705882353e-06, - "loss": 0.1808, - "step": 1104 - }, - { - "epoch": 0.1884007778991395, - "grad_norm": 0.7155107259750366, - "learning_rate": 8.115942028985508e-06, - "loss": 0.1413, - "step": 1105 - }, - { - "epoch": 0.1885712763406772, - "grad_norm": 1.1344115734100342, - "learning_rate": 8.114236999147486e-06, - "loss": 0.2271, - "step": 1106 - }, - { - "epoch": 0.18874177478221488, - "grad_norm": 0.6760721802711487, - "learning_rate": 8.112531969309464e-06, - "loss": 0.1453, - "step": 1107 - }, - { - "epoch": 0.18891227322375256, - "grad_norm": 0.6441182494163513, - "learning_rate": 8.110826939471442e-06, - "loss": 0.0994, - "step": 1108 - }, - { - "epoch": 0.18908277166529025, - "grad_norm": 0.8685035109519958, - "learning_rate": 8.10912190963342e-06, - "loss": 0.1631, - "step": 1109 - }, - { - "epoch": 0.18925327010682794, - "grad_norm": 1.2088656425476074, - "learning_rate": 8.107416879795397e-06, - "loss": 0.204, - "step": 1110 - }, - { - "epoch": 0.18942376854836562, - "grad_norm": 0.5793415307998657, - "learning_rate": 8.105711849957375e-06, - "loss": 0.0975, - "step": 1111 - }, - { - "epoch": 0.18959426698990328, - "grad_norm": 0.8224422931671143, - "learning_rate": 8.104006820119352e-06, - "loss": 0.1729, - "step": 1112 - }, - { - "epoch": 0.18976476543144097, - "grad_norm": 1.1413917541503906, - "learning_rate": 8.102301790281331e-06, - "loss": 0.2047, - "step": 1113 - }, - { - "epoch": 0.18993526387297865, - "grad_norm": 0.9919366240501404, - "learning_rate": 8.10059676044331e-06, - "loss": 0.104, - "step": 1114 - }, - { - "epoch": 0.19010576231451634, - "grad_norm": 0.8470364809036255, - "learning_rate": 8.098891730605285e-06, - "loss": 0.2055, - "step": 1115 - }, - { - "epoch": 0.19027626075605403, - "grad_norm": 0.8210625052452087, - "learning_rate": 8.097186700767265e-06, - "loss": 0.1939, - "step": 1116 - }, - { - "epoch": 0.19044675919759171, - "grad_norm": 0.8386514186859131, - "learning_rate": 8.095481670929241e-06, - "loss": 0.1323, - "step": 1117 - }, - { - "epoch": 0.1906172576391294, - "grad_norm": 0.8340263962745667, - "learning_rate": 8.09377664109122e-06, - "loss": 0.1597, - "step": 1118 - }, - { - "epoch": 0.1907877560806671, - "grad_norm": 0.7370643615722656, - "learning_rate": 8.092071611253197e-06, - "loss": 0.1838, - "step": 1119 - }, - { - "epoch": 0.19095825452220475, - "grad_norm": 0.8056231737136841, - "learning_rate": 8.090366581415175e-06, - "loss": 0.1611, - "step": 1120 - }, - { - "epoch": 0.19112875296374243, - "grad_norm": 0.7211765050888062, - "learning_rate": 8.088661551577153e-06, - "loss": 0.1507, - "step": 1121 - }, - { - "epoch": 0.19129925140528012, - "grad_norm": 1.055554986000061, - "learning_rate": 8.086956521739131e-06, - "loss": 0.1991, - "step": 1122 - }, - { - "epoch": 0.1914697498468178, - "grad_norm": 0.8282398581504822, - "learning_rate": 8.085251491901109e-06, - "loss": 0.1819, - "step": 1123 - }, - { - "epoch": 0.1916402482883555, - "grad_norm": 0.9935968518257141, - "learning_rate": 8.083546462063087e-06, - "loss": 0.1149, - "step": 1124 - }, - { - "epoch": 0.19181074672989318, - "grad_norm": 0.8828055262565613, - "learning_rate": 8.081841432225065e-06, - "loss": 0.1656, - "step": 1125 - }, - { - "epoch": 0.19198124517143086, - "grad_norm": 0.7493565678596497, - "learning_rate": 8.080136402387043e-06, - "loss": 0.1505, - "step": 1126 - }, - { - "epoch": 0.19215174361296855, - "grad_norm": 0.8418991565704346, - "learning_rate": 8.07843137254902e-06, - "loss": 0.1045, - "step": 1127 - }, - { - "epoch": 0.1923222420545062, - "grad_norm": 0.8248168230056763, - "learning_rate": 8.076726342710997e-06, - "loss": 0.2225, - "step": 1128 - }, - { - "epoch": 0.1924927404960439, - "grad_norm": 0.6494383215904236, - "learning_rate": 8.075021312872977e-06, - "loss": 0.1073, - "step": 1129 - }, - { - "epoch": 0.19266323893758158, - "grad_norm": 0.6220464110374451, - "learning_rate": 8.073316283034955e-06, - "loss": 0.089, - "step": 1130 - }, - { - "epoch": 0.19283373737911927, - "grad_norm": 0.7500420212745667, - "learning_rate": 8.071611253196931e-06, - "loss": 0.1275, - "step": 1131 - }, - { - "epoch": 0.19300423582065696, - "grad_norm": 1.1788630485534668, - "learning_rate": 8.06990622335891e-06, - "loss": 0.2489, - "step": 1132 - }, - { - "epoch": 0.19317473426219464, - "grad_norm": 0.9187591671943665, - "learning_rate": 8.068201193520887e-06, - "loss": 0.223, - "step": 1133 - }, - { - "epoch": 0.19334523270373233, - "grad_norm": 0.5593259930610657, - "learning_rate": 8.066496163682865e-06, - "loss": 0.1245, - "step": 1134 - }, - { - "epoch": 0.19351573114527, - "grad_norm": 0.9649487733840942, - "learning_rate": 8.064791133844843e-06, - "loss": 0.1953, - "step": 1135 - }, - { - "epoch": 0.19368622958680767, - "grad_norm": 0.993848443031311, - "learning_rate": 8.06308610400682e-06, - "loss": 0.2279, - "step": 1136 - }, - { - "epoch": 0.19385672802834536, - "grad_norm": 0.593940794467926, - "learning_rate": 8.061381074168799e-06, - "loss": 0.1847, - "step": 1137 - }, - { - "epoch": 0.19402722646988305, - "grad_norm": 0.6698784828186035, - "learning_rate": 8.059676044330777e-06, - "loss": 0.12, - "step": 1138 - }, - { - "epoch": 0.19419772491142073, - "grad_norm": 0.9543096423149109, - "learning_rate": 8.057971014492754e-06, - "loss": 0.1031, - "step": 1139 - }, - { - "epoch": 0.19436822335295842, - "grad_norm": 0.6808764934539795, - "learning_rate": 8.056265984654732e-06, - "loss": 0.1851, - "step": 1140 - }, - { - "epoch": 0.1945387217944961, - "grad_norm": 0.9641972780227661, - "learning_rate": 8.05456095481671e-06, - "loss": 0.2253, - "step": 1141 - }, - { - "epoch": 0.1947092202360338, - "grad_norm": 0.6276617646217346, - "learning_rate": 8.052855924978688e-06, - "loss": 0.2123, - "step": 1142 - }, - { - "epoch": 0.19487971867757145, - "grad_norm": 0.778846025466919, - "learning_rate": 8.051150895140666e-06, - "loss": 0.1573, - "step": 1143 - }, - { - "epoch": 0.19505021711910914, - "grad_norm": 0.6329131722450256, - "learning_rate": 8.049445865302643e-06, - "loss": 0.1638, - "step": 1144 - }, - { - "epoch": 0.19522071556064682, - "grad_norm": 0.7255607843399048, - "learning_rate": 8.047740835464622e-06, - "loss": 0.1964, - "step": 1145 - }, - { - "epoch": 0.1953912140021845, - "grad_norm": 0.7392814755439758, - "learning_rate": 8.046035805626598e-06, - "loss": 0.1729, - "step": 1146 - }, - { - "epoch": 0.1955617124437222, - "grad_norm": 0.5993164777755737, - "learning_rate": 8.044330775788576e-06, - "loss": 0.1532, - "step": 1147 - }, - { - "epoch": 0.19573221088525988, - "grad_norm": 0.6985999345779419, - "learning_rate": 8.042625745950556e-06, - "loss": 0.1615, - "step": 1148 - }, - { - "epoch": 0.19590270932679757, - "grad_norm": 0.7796421051025391, - "learning_rate": 8.040920716112532e-06, - "loss": 0.0907, - "step": 1149 - }, - { - "epoch": 0.19607320776833526, - "grad_norm": 0.5680537819862366, - "learning_rate": 8.03921568627451e-06, - "loss": 0.1613, - "step": 1150 - }, - { - "epoch": 0.19624370620987291, - "grad_norm": 0.5613918900489807, - "learning_rate": 8.037510656436488e-06, - "loss": 0.1104, - "step": 1151 - }, - { - "epoch": 0.1964142046514106, - "grad_norm": 0.660589873790741, - "learning_rate": 8.035805626598466e-06, - "loss": 0.1217, - "step": 1152 - }, - { - "epoch": 0.1965847030929483, - "grad_norm": 0.8582456707954407, - "learning_rate": 8.034100596760444e-06, - "loss": 0.0883, - "step": 1153 - }, - { - "epoch": 0.19675520153448597, - "grad_norm": 1.0223498344421387, - "learning_rate": 8.032395566922422e-06, - "loss": 0.2005, - "step": 1154 - }, - { - "epoch": 0.19692569997602366, - "grad_norm": 0.7808375954627991, - "learning_rate": 8.0306905370844e-06, - "loss": 0.1288, - "step": 1155 - }, - { - "epoch": 0.19709619841756135, - "grad_norm": 0.7909091711044312, - "learning_rate": 8.028985507246378e-06, - "loss": 0.1117, - "step": 1156 - }, - { - "epoch": 0.19726669685909903, - "grad_norm": 1.5872262716293335, - "learning_rate": 8.027280477408356e-06, - "loss": 0.2244, - "step": 1157 - }, - { - "epoch": 0.1974371953006367, - "grad_norm": 0.5757585763931274, - "learning_rate": 8.025575447570332e-06, - "loss": 0.1102, - "step": 1158 - }, - { - "epoch": 0.19760769374217438, - "grad_norm": 0.6899182200431824, - "learning_rate": 8.023870417732312e-06, - "loss": 0.0771, - "step": 1159 - }, - { - "epoch": 0.19777819218371206, - "grad_norm": 1.3838332891464233, - "learning_rate": 8.022165387894288e-06, - "loss": 0.2105, - "step": 1160 - }, - { - "epoch": 0.19794869062524975, - "grad_norm": 1.5885940790176392, - "learning_rate": 8.020460358056266e-06, - "loss": 0.2264, - "step": 1161 - }, - { - "epoch": 0.19811918906678744, - "grad_norm": 1.0133886337280273, - "learning_rate": 8.018755328218244e-06, - "loss": 0.193, - "step": 1162 - }, - { - "epoch": 0.19828968750832512, - "grad_norm": 0.8545687794685364, - "learning_rate": 8.017050298380222e-06, - "loss": 0.1345, - "step": 1163 - }, - { - "epoch": 0.1984601859498628, - "grad_norm": 0.8866453170776367, - "learning_rate": 8.0153452685422e-06, - "loss": 0.1932, - "step": 1164 - }, - { - "epoch": 0.1986306843914005, - "grad_norm": 1.2009869813919067, - "learning_rate": 8.013640238704178e-06, - "loss": 0.1389, - "step": 1165 - }, - { - "epoch": 0.19880118283293816, - "grad_norm": 1.0642001628875732, - "learning_rate": 8.011935208866156e-06, - "loss": 0.1595, - "step": 1166 - }, - { - "epoch": 0.19897168127447584, - "grad_norm": 0.9353564977645874, - "learning_rate": 8.010230179028134e-06, - "loss": 0.17, - "step": 1167 - }, - { - "epoch": 0.19914217971601353, - "grad_norm": 1.202636480331421, - "learning_rate": 8.008525149190112e-06, - "loss": 0.2489, - "step": 1168 - }, - { - "epoch": 0.19931267815755122, - "grad_norm": 0.769627034664154, - "learning_rate": 8.00682011935209e-06, - "loss": 0.1945, - "step": 1169 - }, - { - "epoch": 0.1994831765990889, - "grad_norm": 0.6681427955627441, - "learning_rate": 8.005115089514067e-06, - "loss": 0.1682, - "step": 1170 - }, - { - "epoch": 0.1996536750406266, - "grad_norm": 1.5832921266555786, - "learning_rate": 8.003410059676044e-06, - "loss": 0.2114, - "step": 1171 - }, - { - "epoch": 0.19982417348216427, - "grad_norm": 0.896315336227417, - "learning_rate": 8.001705029838023e-06, - "loss": 0.084, - "step": 1172 - }, - { - "epoch": 0.19999467192370196, - "grad_norm": 1.0023328065872192, - "learning_rate": 8.000000000000001e-06, - "loss": 0.2149, - "step": 1173 - }, - { - "epoch": 0.20016517036523962, - "grad_norm": 0.7651743292808533, - "learning_rate": 7.998294970161978e-06, - "loss": 0.0709, - "step": 1174 - }, - { - "epoch": 0.2003356688067773, - "grad_norm": 1.0578312873840332, - "learning_rate": 7.996589940323957e-06, - "loss": 0.1952, - "step": 1175 - }, - { - "epoch": 0.200506167248315, - "grad_norm": 1.1662102937698364, - "learning_rate": 7.994884910485933e-06, - "loss": 0.227, - "step": 1176 - }, - { - "epoch": 0.20067666568985268, - "grad_norm": 0.852552056312561, - "learning_rate": 7.993179880647911e-06, - "loss": 0.193, - "step": 1177 - }, - { - "epoch": 0.20084716413139037, - "grad_norm": 0.8441449999809265, - "learning_rate": 7.99147485080989e-06, - "loss": 0.1953, - "step": 1178 - }, - { - "epoch": 0.20101766257292805, - "grad_norm": 1.0427707433700562, - "learning_rate": 7.989769820971867e-06, - "loss": 0.0567, - "step": 1179 - }, - { - "epoch": 0.20118816101446574, - "grad_norm": 0.5938249230384827, - "learning_rate": 7.988064791133845e-06, - "loss": 0.137, - "step": 1180 - }, - { - "epoch": 0.2013586594560034, - "grad_norm": 0.6687734723091125, - "learning_rate": 7.986359761295823e-06, - "loss": 0.1103, - "step": 1181 - }, - { - "epoch": 0.20152915789754108, - "grad_norm": 0.6688919067382812, - "learning_rate": 7.984654731457801e-06, - "loss": 0.1135, - "step": 1182 - }, - { - "epoch": 0.20169965633907877, - "grad_norm": 0.7219515442848206, - "learning_rate": 7.982949701619779e-06, - "loss": 0.0858, - "step": 1183 - }, - { - "epoch": 0.20187015478061646, - "grad_norm": 0.4615936279296875, - "learning_rate": 7.981244671781757e-06, - "loss": 0.0989, - "step": 1184 - }, - { - "epoch": 0.20204065322215414, - "grad_norm": 0.6289270520210266, - "learning_rate": 7.979539641943735e-06, - "loss": 0.1416, - "step": 1185 - }, - { - "epoch": 0.20221115166369183, - "grad_norm": 0.5620813965797424, - "learning_rate": 7.977834612105713e-06, - "loss": 0.1293, - "step": 1186 - }, - { - "epoch": 0.20238165010522952, - "grad_norm": 0.8259469866752625, - "learning_rate": 7.97612958226769e-06, - "loss": 0.1645, - "step": 1187 - }, - { - "epoch": 0.2025521485467672, - "grad_norm": 0.4567446708679199, - "learning_rate": 7.974424552429669e-06, - "loss": 0.1148, - "step": 1188 - }, - { - "epoch": 0.20272264698830486, - "grad_norm": 0.7620810866355896, - "learning_rate": 7.972719522591647e-06, - "loss": 0.1775, - "step": 1189 - }, - { - "epoch": 0.20289314542984255, - "grad_norm": 1.8133633136749268, - "learning_rate": 7.971014492753623e-06, - "loss": 0.3183, - "step": 1190 - }, - { - "epoch": 0.20306364387138023, - "grad_norm": 0.7991937398910522, - "learning_rate": 7.969309462915603e-06, - "loss": 0.182, - "step": 1191 - }, - { - "epoch": 0.20323414231291792, - "grad_norm": 1.1091400384902954, - "learning_rate": 7.967604433077579e-06, - "loss": 0.2114, - "step": 1192 - }, - { - "epoch": 0.2034046407544556, - "grad_norm": 0.5558500289916992, - "learning_rate": 7.965899403239557e-06, - "loss": 0.1173, - "step": 1193 - }, - { - "epoch": 0.2035751391959933, - "grad_norm": 0.6900790929794312, - "learning_rate": 7.964194373401535e-06, - "loss": 0.1523, - "step": 1194 - }, - { - "epoch": 0.20374563763753098, - "grad_norm": 0.806940495967865, - "learning_rate": 7.962489343563513e-06, - "loss": 0.2267, - "step": 1195 - }, - { - "epoch": 0.20391613607906867, - "grad_norm": 0.8060824871063232, - "learning_rate": 7.96078431372549e-06, - "loss": 0.1432, - "step": 1196 - }, - { - "epoch": 0.20408663452060632, - "grad_norm": 0.842232882976532, - "learning_rate": 7.959079283887469e-06, - "loss": 0.1044, - "step": 1197 - }, - { - "epoch": 0.204257132962144, - "grad_norm": 0.7650852203369141, - "learning_rate": 7.957374254049447e-06, - "loss": 0.157, - "step": 1198 - }, - { - "epoch": 0.2044276314036817, - "grad_norm": 0.7800074219703674, - "learning_rate": 7.955669224211425e-06, - "loss": 0.1424, - "step": 1199 - }, - { - "epoch": 0.20459812984521938, - "grad_norm": 0.8121132850646973, - "learning_rate": 7.953964194373403e-06, - "loss": 0.1909, - "step": 1200 - }, - { - "epoch": 0.20459812984521938, - "eval_f1_score": 0.2485207100591716, - "eval_loss": 0.1650407910346985, - "eval_runtime": 182.5967, - "eval_samples_per_second": 54.765, - "eval_steps_per_second": 3.423, - "step": 1200 - }, - { - "epoch": 0.20476862828675707, - "grad_norm": 0.9365339875221252, - "learning_rate": 7.95225916453538e-06, - "loss": 0.1804, - "step": 1201 - }, - { - "epoch": 0.20493912672829476, - "grad_norm": 0.4943313002586365, - "learning_rate": 7.950554134697358e-06, - "loss": 0.1368, - "step": 1202 - }, - { - "epoch": 0.20510962516983244, - "grad_norm": 0.6111050248146057, - "learning_rate": 7.948849104859335e-06, - "loss": 0.1726, - "step": 1203 - }, - { - "epoch": 0.2052801236113701, - "grad_norm": 0.8806681036949158, - "learning_rate": 7.947144075021314e-06, - "loss": 0.1393, - "step": 1204 - }, - { - "epoch": 0.2054506220529078, - "grad_norm": 0.8702719807624817, - "learning_rate": 7.945439045183292e-06, - "loss": 0.2051, - "step": 1205 - }, - { - "epoch": 0.20562112049444548, - "grad_norm": 0.5891739130020142, - "learning_rate": 7.943734015345269e-06, - "loss": 0.1182, - "step": 1206 - }, - { - "epoch": 0.20579161893598316, - "grad_norm": 0.7840372920036316, - "learning_rate": 7.942028985507248e-06, - "loss": 0.176, - "step": 1207 - }, - { - "epoch": 0.20596211737752085, - "grad_norm": 0.7101555466651917, - "learning_rate": 7.940323955669224e-06, - "loss": 0.2072, - "step": 1208 - }, - { - "epoch": 0.20613261581905853, - "grad_norm": 0.7602161765098572, - "learning_rate": 7.938618925831202e-06, - "loss": 0.1198, - "step": 1209 - }, - { - "epoch": 0.20630311426059622, - "grad_norm": 0.7662124037742615, - "learning_rate": 7.93691389599318e-06, - "loss": 0.1544, - "step": 1210 - }, - { - "epoch": 0.2064736127021339, - "grad_norm": 0.6383134722709656, - "learning_rate": 7.935208866155158e-06, - "loss": 0.1298, - "step": 1211 - }, - { - "epoch": 0.20664411114367157, - "grad_norm": 0.9750174283981323, - "learning_rate": 7.933503836317136e-06, - "loss": 0.142, - "step": 1212 - }, - { - "epoch": 0.20681460958520925, - "grad_norm": 0.7169644832611084, - "learning_rate": 7.931798806479114e-06, - "loss": 0.1538, - "step": 1213 - }, - { - "epoch": 0.20698510802674694, - "grad_norm": 0.7920701503753662, - "learning_rate": 7.930093776641092e-06, - "loss": 0.1591, - "step": 1214 - }, - { - "epoch": 0.20715560646828463, - "grad_norm": 0.7697597742080688, - "learning_rate": 7.92838874680307e-06, - "loss": 0.1361, - "step": 1215 - }, - { - "epoch": 0.2073261049098223, - "grad_norm": 0.8267337083816528, - "learning_rate": 7.926683716965048e-06, - "loss": 0.1605, - "step": 1216 - }, - { - "epoch": 0.20749660335136, - "grad_norm": 1.0727347135543823, - "learning_rate": 7.924978687127026e-06, - "loss": 0.2054, - "step": 1217 - }, - { - "epoch": 0.20766710179289768, - "grad_norm": 0.9135003089904785, - "learning_rate": 7.923273657289004e-06, - "loss": 0.1439, - "step": 1218 - }, - { - "epoch": 0.20783760023443534, - "grad_norm": 0.9521437287330627, - "learning_rate": 7.92156862745098e-06, - "loss": 0.1991, - "step": 1219 - }, - { - "epoch": 0.20800809867597303, - "grad_norm": 0.8825364708900452, - "learning_rate": 7.91986359761296e-06, - "loss": 0.1299, - "step": 1220 - }, - { - "epoch": 0.20817859711751072, - "grad_norm": 1.088287353515625, - "learning_rate": 7.918158567774936e-06, - "loss": 0.1093, - "step": 1221 - }, - { - "epoch": 0.2083490955590484, - "grad_norm": 1.09437894821167, - "learning_rate": 7.916453537936914e-06, - "loss": 0.0805, - "step": 1222 - }, - { - "epoch": 0.2085195940005861, - "grad_norm": 0.9781740307807922, - "learning_rate": 7.914748508098894e-06, - "loss": 0.1763, - "step": 1223 - }, - { - "epoch": 0.20869009244212378, - "grad_norm": 1.0949674844741821, - "learning_rate": 7.91304347826087e-06, - "loss": 0.1578, - "step": 1224 - }, - { - "epoch": 0.20886059088366146, - "grad_norm": 0.9407842755317688, - "learning_rate": 7.911338448422848e-06, - "loss": 0.0841, - "step": 1225 - }, - { - "epoch": 0.20903108932519915, - "grad_norm": 1.0195931196212769, - "learning_rate": 7.909633418584826e-06, - "loss": 0.1611, - "step": 1226 - }, - { - "epoch": 0.2092015877667368, - "grad_norm": 0.7501747608184814, - "learning_rate": 7.907928388746804e-06, - "loss": 0.0916, - "step": 1227 - }, - { - "epoch": 0.2093720862082745, - "grad_norm": 0.7305780053138733, - "learning_rate": 7.906223358908782e-06, - "loss": 0.1143, - "step": 1228 - }, - { - "epoch": 0.20954258464981218, - "grad_norm": 0.8773810267448425, - "learning_rate": 7.90451832907076e-06, - "loss": 0.1699, - "step": 1229 - }, - { - "epoch": 0.20971308309134987, - "grad_norm": 0.929328441619873, - "learning_rate": 7.902813299232738e-06, - "loss": 0.1352, - "step": 1230 - }, - { - "epoch": 0.20988358153288755, - "grad_norm": 0.6584373712539673, - "learning_rate": 7.901108269394716e-06, - "loss": 0.057, - "step": 1231 - }, - { - "epoch": 0.21005407997442524, - "grad_norm": 0.7133747935295105, - "learning_rate": 7.899403239556693e-06, - "loss": 0.1141, - "step": 1232 - }, - { - "epoch": 0.21022457841596293, - "grad_norm": 0.6574261784553528, - "learning_rate": 7.89769820971867e-06, - "loss": 0.1312, - "step": 1233 - }, - { - "epoch": 0.2103950768575006, - "grad_norm": 0.8198479413986206, - "learning_rate": 7.89599317988065e-06, - "loss": 0.1476, - "step": 1234 - }, - { - "epoch": 0.21056557529903827, - "grad_norm": 1.679710030555725, - "learning_rate": 7.894288150042626e-06, - "loss": 0.2136, - "step": 1235 - }, - { - "epoch": 0.21073607374057596, - "grad_norm": 0.8997982144355774, - "learning_rate": 7.892583120204604e-06, - "loss": 0.1446, - "step": 1236 - }, - { - "epoch": 0.21090657218211364, - "grad_norm": 0.5984455943107605, - "learning_rate": 7.890878090366582e-06, - "loss": 0.1091, - "step": 1237 - }, - { - "epoch": 0.21107707062365133, - "grad_norm": 1.0783398151397705, - "learning_rate": 7.88917306052856e-06, - "loss": 0.1465, - "step": 1238 - }, - { - "epoch": 0.21124756906518902, - "grad_norm": 1.564841389656067, - "learning_rate": 7.887468030690537e-06, - "loss": 0.284, - "step": 1239 - }, - { - "epoch": 0.2114180675067267, - "grad_norm": 0.936279833316803, - "learning_rate": 7.885763000852515e-06, - "loss": 0.0737, - "step": 1240 - }, - { - "epoch": 0.2115885659482644, - "grad_norm": 1.0812268257141113, - "learning_rate": 7.884057971014493e-06, - "loss": 0.1423, - "step": 1241 - }, - { - "epoch": 0.21175906438980205, - "grad_norm": 0.9714178442955017, - "learning_rate": 7.882352941176471e-06, - "loss": 0.1452, - "step": 1242 - }, - { - "epoch": 0.21192956283133974, - "grad_norm": 0.7834417819976807, - "learning_rate": 7.88064791133845e-06, - "loss": 0.1637, - "step": 1243 - }, - { - "epoch": 0.21210006127287742, - "grad_norm": 0.6951526999473572, - "learning_rate": 7.878942881500427e-06, - "loss": 0.1474, - "step": 1244 - }, - { - "epoch": 0.2122705597144151, - "grad_norm": 0.8583109974861145, - "learning_rate": 7.877237851662405e-06, - "loss": 0.171, - "step": 1245 - }, - { - "epoch": 0.2124410581559528, - "grad_norm": 1.11123788356781, - "learning_rate": 7.875532821824381e-06, - "loss": 0.2114, - "step": 1246 - }, - { - "epoch": 0.21261155659749048, - "grad_norm": 0.8634263873100281, - "learning_rate": 7.873827791986361e-06, - "loss": 0.1309, - "step": 1247 - }, - { - "epoch": 0.21278205503902817, - "grad_norm": 1.0174041986465454, - "learning_rate": 7.872122762148339e-06, - "loss": 0.2006, - "step": 1248 - }, - { - "epoch": 0.21295255348056585, - "grad_norm": 0.8859142661094666, - "learning_rate": 7.870417732310315e-06, - "loss": 0.1345, - "step": 1249 - }, - { - "epoch": 0.2131230519221035, - "grad_norm": 0.9608497023582458, - "learning_rate": 7.868712702472295e-06, - "loss": 0.1496, - "step": 1250 - }, - { - "epoch": 0.2132935503636412, - "grad_norm": 0.843275785446167, - "learning_rate": 7.867007672634271e-06, - "loss": 0.1602, - "step": 1251 - }, - { - "epoch": 0.21346404880517889, - "grad_norm": 0.6963731646537781, - "learning_rate": 7.865302642796249e-06, - "loss": 0.1346, - "step": 1252 - }, - { - "epoch": 0.21363454724671657, - "grad_norm": 1.220220685005188, - "learning_rate": 7.863597612958227e-06, - "loss": 0.1669, - "step": 1253 - }, - { - "epoch": 0.21380504568825426, - "grad_norm": 0.7139214277267456, - "learning_rate": 7.861892583120205e-06, - "loss": 0.1291, - "step": 1254 - }, - { - "epoch": 0.21397554412979194, - "grad_norm": 0.7002556920051575, - "learning_rate": 7.860187553282183e-06, - "loss": 0.122, - "step": 1255 - }, - { - "epoch": 0.21414604257132963, - "grad_norm": 1.3244677782058716, - "learning_rate": 7.85848252344416e-06, - "loss": 0.2133, - "step": 1256 - }, - { - "epoch": 0.21431654101286732, - "grad_norm": 0.5991846919059753, - "learning_rate": 7.856777493606139e-06, - "loss": 0.1207, - "step": 1257 - }, - { - "epoch": 0.21448703945440498, - "grad_norm": 0.9258220195770264, - "learning_rate": 7.855072463768117e-06, - "loss": 0.1126, - "step": 1258 - }, - { - "epoch": 0.21465753789594266, - "grad_norm": 0.80934739112854, - "learning_rate": 7.853367433930095e-06, - "loss": 0.0496, - "step": 1259 - }, - { - "epoch": 0.21482803633748035, - "grad_norm": 0.6553164720535278, - "learning_rate": 7.851662404092073e-06, - "loss": 0.1773, - "step": 1260 - }, - { - "epoch": 0.21499853477901804, - "grad_norm": 1.0420881509780884, - "learning_rate": 7.84995737425405e-06, - "loss": 0.133, - "step": 1261 - }, - { - "epoch": 0.21516903322055572, - "grad_norm": 0.9948811531066895, - "learning_rate": 7.848252344416027e-06, - "loss": 0.1441, - "step": 1262 - }, - { - "epoch": 0.2153395316620934, - "grad_norm": 1.6052919626235962, - "learning_rate": 7.846547314578006e-06, - "loss": 0.2128, - "step": 1263 - }, - { - "epoch": 0.2155100301036311, - "grad_norm": 1.10100519657135, - "learning_rate": 7.844842284739984e-06, - "loss": 0.1032, - "step": 1264 - }, - { - "epoch": 0.21568052854516875, - "grad_norm": 0.7574976682662964, - "learning_rate": 7.84313725490196e-06, - "loss": 0.1129, - "step": 1265 - }, - { - "epoch": 0.21585102698670644, - "grad_norm": 1.1044847965240479, - "learning_rate": 7.84143222506394e-06, - "loss": 0.1948, - "step": 1266 - }, - { - "epoch": 0.21602152542824413, - "grad_norm": 0.8704826831817627, - "learning_rate": 7.839727195225917e-06, - "loss": 0.1208, - "step": 1267 - }, - { - "epoch": 0.2161920238697818, - "grad_norm": 1.2776570320129395, - "learning_rate": 7.838022165387895e-06, - "loss": 0.1804, - "step": 1268 - }, - { - "epoch": 0.2163625223113195, - "grad_norm": 0.8619382381439209, - "learning_rate": 7.836317135549872e-06, - "loss": 0.142, - "step": 1269 - }, - { - "epoch": 0.21653302075285719, - "grad_norm": 0.6960404515266418, - "learning_rate": 7.83461210571185e-06, - "loss": 0.0856, - "step": 1270 - }, - { - "epoch": 0.21670351919439487, - "grad_norm": 0.700543999671936, - "learning_rate": 7.832907075873828e-06, - "loss": 0.1139, - "step": 1271 - }, - { - "epoch": 0.21687401763593256, - "grad_norm": 0.5665769577026367, - "learning_rate": 7.831202046035806e-06, - "loss": 0.1006, - "step": 1272 - }, - { - "epoch": 0.21704451607747022, - "grad_norm": 0.911679208278656, - "learning_rate": 7.829497016197784e-06, - "loss": 0.166, - "step": 1273 - }, - { - "epoch": 0.2172150145190079, - "grad_norm": 0.9011550545692444, - "learning_rate": 7.827791986359762e-06, - "loss": 0.161, - "step": 1274 - }, - { - "epoch": 0.2173855129605456, - "grad_norm": 1.1256483793258667, - "learning_rate": 7.82608695652174e-06, - "loss": 0.072, - "step": 1275 - }, - { - "epoch": 0.21755601140208328, - "grad_norm": 0.7013976573944092, - "learning_rate": 7.824381926683718e-06, - "loss": 0.1177, - "step": 1276 - }, - { - "epoch": 0.21772650984362096, - "grad_norm": 0.8687089681625366, - "learning_rate": 7.822676896845696e-06, - "loss": 0.1332, - "step": 1277 - }, - { - "epoch": 0.21789700828515865, - "grad_norm": 0.7818564772605896, - "learning_rate": 7.820971867007672e-06, - "loss": 0.1016, - "step": 1278 - }, - { - "epoch": 0.21806750672669634, - "grad_norm": 1.2303472757339478, - "learning_rate": 7.819266837169652e-06, - "loss": 0.2575, - "step": 1279 - }, - { - "epoch": 0.21823800516823402, - "grad_norm": 0.6944379210472107, - "learning_rate": 7.817561807331628e-06, - "loss": 0.1291, - "step": 1280 - }, - { - "epoch": 0.21840850360977168, - "grad_norm": 0.9652915596961975, - "learning_rate": 7.815856777493606e-06, - "loss": 0.1347, - "step": 1281 - }, - { - "epoch": 0.21857900205130937, - "grad_norm": 0.9090012311935425, - "learning_rate": 7.814151747655586e-06, - "loss": 0.1583, - "step": 1282 - }, - { - "epoch": 0.21874950049284705, - "grad_norm": 0.774336576461792, - "learning_rate": 7.812446717817562e-06, - "loss": 0.0878, - "step": 1283 - }, - { - "epoch": 0.21891999893438474, - "grad_norm": 1.003588318824768, - "learning_rate": 7.81074168797954e-06, - "loss": 0.1432, - "step": 1284 - }, - { - "epoch": 0.21909049737592243, - "grad_norm": 1.4490317106246948, - "learning_rate": 7.809036658141518e-06, - "loss": 0.2245, - "step": 1285 - }, - { - "epoch": 0.2192609958174601, - "grad_norm": 1.0642396211624146, - "learning_rate": 7.807331628303496e-06, - "loss": 0.1324, - "step": 1286 - }, - { - "epoch": 0.2194314942589978, - "grad_norm": 0.7888051271438599, - "learning_rate": 7.805626598465474e-06, - "loss": 0.0722, - "step": 1287 - }, - { - "epoch": 0.21960199270053546, - "grad_norm": 1.4737591743469238, - "learning_rate": 7.803921568627452e-06, - "loss": 0.1382, - "step": 1288 - }, - { - "epoch": 0.21977249114207315, - "grad_norm": 1.012850284576416, - "learning_rate": 7.80221653878943e-06, - "loss": 0.1952, - "step": 1289 - }, - { - "epoch": 0.21994298958361083, - "grad_norm": 1.8369516134262085, - "learning_rate": 7.800511508951408e-06, - "loss": 0.2694, - "step": 1290 - }, - { - "epoch": 0.22011348802514852, - "grad_norm": 0.830058217048645, - "learning_rate": 7.798806479113386e-06, - "loss": 0.1163, - "step": 1291 - }, - { - "epoch": 0.2202839864666862, - "grad_norm": 0.9844965934753418, - "learning_rate": 7.797101449275364e-06, - "loss": 0.1419, - "step": 1292 - }, - { - "epoch": 0.2204544849082239, - "grad_norm": 1.0009138584136963, - "learning_rate": 7.795396419437342e-06, - "loss": 0.2142, - "step": 1293 - }, - { - "epoch": 0.22062498334976158, - "grad_norm": 1.0360790491104126, - "learning_rate": 7.793691389599318e-06, - "loss": 0.1299, - "step": 1294 - }, - { - "epoch": 0.22079548179129926, - "grad_norm": 0.9678265452384949, - "learning_rate": 7.791986359761297e-06, - "loss": 0.1877, - "step": 1295 - }, - { - "epoch": 0.22096598023283692, - "grad_norm": 1.2459754943847656, - "learning_rate": 7.790281329923274e-06, - "loss": 0.075, - "step": 1296 - }, - { - "epoch": 0.2211364786743746, - "grad_norm": 0.7200685143470764, - "learning_rate": 7.788576300085252e-06, - "loss": 0.1422, - "step": 1297 - }, - { - "epoch": 0.2213069771159123, - "grad_norm": 0.9024417996406555, - "learning_rate": 7.786871270247231e-06, - "loss": 0.1777, - "step": 1298 - }, - { - "epoch": 0.22147747555744998, - "grad_norm": 1.0395989418029785, - "learning_rate": 7.785166240409207e-06, - "loss": 0.1318, - "step": 1299 - }, - { - "epoch": 0.22164797399898767, - "grad_norm": 0.775626003742218, - "learning_rate": 7.783461210571185e-06, - "loss": 0.107, - "step": 1300 - }, - { - "epoch": 0.22164797399898767, - "eval_f1_score": 0.2802197802197802, - "eval_loss": 0.16321277618408203, - "eval_runtime": 182.6892, - "eval_samples_per_second": 54.738, - "eval_steps_per_second": 3.421, - "step": 1300 - }, - { - "epoch": 0.22181847244052535, - "grad_norm": 0.8368775248527527, - "learning_rate": 7.781756180733163e-06, - "loss": 0.133, - "step": 1301 - }, - { - "epoch": 0.22198897088206304, - "grad_norm": 0.7635461091995239, - "learning_rate": 7.780051150895141e-06, - "loss": 0.1335, - "step": 1302 - }, - { - "epoch": 0.2221594693236007, - "grad_norm": 1.5250236988067627, - "learning_rate": 7.77834612105712e-06, - "loss": 0.2301, - "step": 1303 - }, - { - "epoch": 0.2223299677651384, - "grad_norm": 0.7985216379165649, - "learning_rate": 7.776641091219097e-06, - "loss": 0.1333, - "step": 1304 - }, - { - "epoch": 0.22250046620667607, - "grad_norm": 0.7213661074638367, - "learning_rate": 7.774936061381073e-06, - "loss": 0.1145, - "step": 1305 - }, - { - "epoch": 0.22267096464821376, - "grad_norm": 0.763440728187561, - "learning_rate": 7.773231031543053e-06, - "loss": 0.1564, - "step": 1306 - }, - { - "epoch": 0.22284146308975145, - "grad_norm": 0.8962368369102478, - "learning_rate": 7.771526001705031e-06, - "loss": 0.1195, - "step": 1307 - }, - { - "epoch": 0.22301196153128913, - "grad_norm": 0.9387539029121399, - "learning_rate": 7.769820971867007e-06, - "loss": 0.1495, - "step": 1308 - }, - { - "epoch": 0.22318245997282682, - "grad_norm": 0.8244006037712097, - "learning_rate": 7.768115942028987e-06, - "loss": 0.1314, - "step": 1309 - }, - { - "epoch": 0.2233529584143645, - "grad_norm": 0.7632812261581421, - "learning_rate": 7.766410912190963e-06, - "loss": 0.1104, - "step": 1310 - }, - { - "epoch": 0.22352345685590216, - "grad_norm": 0.7748917937278748, - "learning_rate": 7.764705882352941e-06, - "loss": 0.1194, - "step": 1311 - }, - { - "epoch": 0.22369395529743985, - "grad_norm": 0.8611569404602051, - "learning_rate": 7.763000852514919e-06, - "loss": 0.1094, - "step": 1312 - }, - { - "epoch": 0.22386445373897754, - "grad_norm": 1.057794213294983, - "learning_rate": 7.761295822676897e-06, - "loss": 0.1883, - "step": 1313 - }, - { - "epoch": 0.22403495218051522, - "grad_norm": 0.7631834149360657, - "learning_rate": 7.759590792838875e-06, - "loss": 0.1968, - "step": 1314 - }, - { - "epoch": 0.2242054506220529, - "grad_norm": 0.7893649339675903, - "learning_rate": 7.757885763000853e-06, - "loss": 0.0726, - "step": 1315 - }, - { - "epoch": 0.2243759490635906, - "grad_norm": 0.6442000865936279, - "learning_rate": 7.756180733162831e-06, - "loss": 0.085, - "step": 1316 - }, - { - "epoch": 0.22454644750512828, - "grad_norm": 0.4879632592201233, - "learning_rate": 7.754475703324809e-06, - "loss": 0.1163, - "step": 1317 - }, - { - "epoch": 0.22471694594666597, - "grad_norm": 1.018438696861267, - "learning_rate": 7.752770673486787e-06, - "loss": 0.1591, - "step": 1318 - }, - { - "epoch": 0.22488744438820363, - "grad_norm": 1.1671644449234009, - "learning_rate": 7.751065643648765e-06, - "loss": 0.1739, - "step": 1319 - }, - { - "epoch": 0.22505794282974131, - "grad_norm": 0.8683679699897766, - "learning_rate": 7.749360613810743e-06, - "loss": 0.1497, - "step": 1320 - }, - { - "epoch": 0.225228441271279, - "grad_norm": 1.1401067972183228, - "learning_rate": 7.747655583972719e-06, - "loss": 0.1526, - "step": 1321 - }, - { - "epoch": 0.2253989397128167, - "grad_norm": 1.02974271774292, - "learning_rate": 7.745950554134699e-06, - "loss": 0.2012, - "step": 1322 - }, - { - "epoch": 0.22556943815435437, - "grad_norm": 0.8591212034225464, - "learning_rate": 7.744245524296677e-06, - "loss": 0.1126, - "step": 1323 - }, - { - "epoch": 0.22573993659589206, - "grad_norm": 1.0445603132247925, - "learning_rate": 7.742540494458653e-06, - "loss": 0.1644, - "step": 1324 - }, - { - "epoch": 0.22591043503742975, - "grad_norm": 0.6658400297164917, - "learning_rate": 7.740835464620632e-06, - "loss": 0.1212, - "step": 1325 - }, - { - "epoch": 0.2260809334789674, - "grad_norm": 0.87973552942276, - "learning_rate": 7.739130434782609e-06, - "loss": 0.1966, - "step": 1326 - }, - { - "epoch": 0.2262514319205051, - "grad_norm": 0.8204584717750549, - "learning_rate": 7.737425404944587e-06, - "loss": 0.1921, - "step": 1327 - }, - { - "epoch": 0.22642193036204278, - "grad_norm": 1.1901997327804565, - "learning_rate": 7.735720375106565e-06, - "loss": 0.2525, - "step": 1328 - }, - { - "epoch": 0.22659242880358046, - "grad_norm": 1.110565423965454, - "learning_rate": 7.734015345268543e-06, - "loss": 0.085, - "step": 1329 - }, - { - "epoch": 0.22676292724511815, - "grad_norm": 0.9930194020271301, - "learning_rate": 7.73231031543052e-06, - "loss": 0.1188, - "step": 1330 - }, - { - "epoch": 0.22693342568665584, - "grad_norm": 1.0601656436920166, - "learning_rate": 7.730605285592498e-06, - "loss": 0.0828, - "step": 1331 - }, - { - "epoch": 0.22710392412819352, - "grad_norm": 0.7120490074157715, - "learning_rate": 7.728900255754476e-06, - "loss": 0.1459, - "step": 1332 - }, - { - "epoch": 0.2272744225697312, - "grad_norm": 0.9619858264923096, - "learning_rate": 7.727195225916454e-06, - "loss": 0.1637, - "step": 1333 - }, - { - "epoch": 0.22744492101126887, - "grad_norm": 0.9100483655929565, - "learning_rate": 7.725490196078432e-06, - "loss": 0.1962, - "step": 1334 - }, - { - "epoch": 0.22761541945280656, - "grad_norm": 0.6363540291786194, - "learning_rate": 7.72378516624041e-06, - "loss": 0.1219, - "step": 1335 - }, - { - "epoch": 0.22778591789434424, - "grad_norm": 0.9817243218421936, - "learning_rate": 7.722080136402388e-06, - "loss": 0.2153, - "step": 1336 - }, - { - "epoch": 0.22795641633588193, - "grad_norm": 0.878165602684021, - "learning_rate": 7.720375106564364e-06, - "loss": 0.1994, - "step": 1337 - }, - { - "epoch": 0.22812691477741961, - "grad_norm": 0.9704774618148804, - "learning_rate": 7.718670076726344e-06, - "loss": 0.1689, - "step": 1338 - }, - { - "epoch": 0.2282974132189573, - "grad_norm": 1.0748244524002075, - "learning_rate": 7.716965046888322e-06, - "loss": 0.2116, - "step": 1339 - }, - { - "epoch": 0.228467911660495, - "grad_norm": 0.9812359809875488, - "learning_rate": 7.715260017050298e-06, - "loss": 0.0619, - "step": 1340 - }, - { - "epoch": 0.22863841010203267, - "grad_norm": 1.5158941745758057, - "learning_rate": 7.713554987212278e-06, - "loss": 0.2373, - "step": 1341 - }, - { - "epoch": 0.22880890854357033, - "grad_norm": 0.8915465474128723, - "learning_rate": 7.711849957374254e-06, - "loss": 0.1591, - "step": 1342 - }, - { - "epoch": 0.22897940698510802, - "grad_norm": 0.6390937566757202, - "learning_rate": 7.710144927536232e-06, - "loss": 0.1521, - "step": 1343 - }, - { - "epoch": 0.2291499054266457, - "grad_norm": 0.7857467532157898, - "learning_rate": 7.70843989769821e-06, - "loss": 0.1484, - "step": 1344 - }, - { - "epoch": 0.2293204038681834, - "grad_norm": 0.6358761191368103, - "learning_rate": 7.706734867860188e-06, - "loss": 0.1284, - "step": 1345 - }, - { - "epoch": 0.22949090230972108, - "grad_norm": 0.9754729270935059, - "learning_rate": 7.705029838022166e-06, - "loss": 0.1711, - "step": 1346 - }, - { - "epoch": 0.22966140075125877, - "grad_norm": 0.9040155410766602, - "learning_rate": 7.703324808184144e-06, - "loss": 0.1411, - "step": 1347 - }, - { - "epoch": 0.22983189919279645, - "grad_norm": 1.1135698556900024, - "learning_rate": 7.701619778346122e-06, - "loss": 0.2615, - "step": 1348 - }, - { - "epoch": 0.2300023976343341, - "grad_norm": 0.6657915115356445, - "learning_rate": 7.6999147485081e-06, - "loss": 0.1502, - "step": 1349 - }, - { - "epoch": 0.2301728960758718, - "grad_norm": 0.8232541084289551, - "learning_rate": 7.698209718670078e-06, - "loss": 0.1593, - "step": 1350 - }, - { - "epoch": 0.23034339451740948, - "grad_norm": 0.9148602485656738, - "learning_rate": 7.696504688832056e-06, - "loss": 0.1059, - "step": 1351 - }, - { - "epoch": 0.23051389295894717, - "grad_norm": 0.8433572053909302, - "learning_rate": 7.694799658994034e-06, - "loss": 0.1235, - "step": 1352 - }, - { - "epoch": 0.23068439140048486, - "grad_norm": 0.9784965515136719, - "learning_rate": 7.69309462915601e-06, - "loss": 0.1795, - "step": 1353 - }, - { - "epoch": 0.23085488984202254, - "grad_norm": 0.5294272899627686, - "learning_rate": 7.69138959931799e-06, - "loss": 0.1123, - "step": 1354 - }, - { - "epoch": 0.23102538828356023, - "grad_norm": 0.5891151428222656, - "learning_rate": 7.689684569479966e-06, - "loss": 0.1402, - "step": 1355 - }, - { - "epoch": 0.23119588672509792, - "grad_norm": 0.6787668466567993, - "learning_rate": 7.687979539641944e-06, - "loss": 0.1335, - "step": 1356 - }, - { - "epoch": 0.23136638516663557, - "grad_norm": 0.49819067120552063, - "learning_rate": 7.686274509803923e-06, - "loss": 0.0955, - "step": 1357 - }, - { - "epoch": 0.23153688360817326, - "grad_norm": 0.7876557111740112, - "learning_rate": 7.6845694799659e-06, - "loss": 0.0986, - "step": 1358 - }, - { - "epoch": 0.23170738204971095, - "grad_norm": 0.745729386806488, - "learning_rate": 7.682864450127878e-06, - "loss": 0.0595, - "step": 1359 - }, - { - "epoch": 0.23187788049124863, - "grad_norm": 0.979210376739502, - "learning_rate": 7.681159420289856e-06, - "loss": 0.1627, - "step": 1360 - }, - { - "epoch": 0.23204837893278632, - "grad_norm": 0.6400150060653687, - "learning_rate": 7.679454390451833e-06, - "loss": 0.1346, - "step": 1361 - }, - { - "epoch": 0.232218877374324, - "grad_norm": 1.5608742237091064, - "learning_rate": 7.677749360613811e-06, - "loss": 0.2716, - "step": 1362 - }, - { - "epoch": 0.2323893758158617, - "grad_norm": 1.263974666595459, - "learning_rate": 7.67604433077579e-06, - "loss": 0.1644, - "step": 1363 - }, - { - "epoch": 0.23255987425739938, - "grad_norm": 0.7581167221069336, - "learning_rate": 7.674339300937767e-06, - "loss": 0.1086, - "step": 1364 - }, - { - "epoch": 0.23273037269893704, - "grad_norm": 1.1444523334503174, - "learning_rate": 7.672634271099745e-06, - "loss": 0.1173, - "step": 1365 - }, - { - "epoch": 0.23290087114047472, - "grad_norm": 0.6748406291007996, - "learning_rate": 7.670929241261723e-06, - "loss": 0.1271, - "step": 1366 - }, - { - "epoch": 0.2330713695820124, - "grad_norm": 0.9369028806686401, - "learning_rate": 7.669224211423701e-06, - "loss": 0.1096, - "step": 1367 - }, - { - "epoch": 0.2332418680235501, - "grad_norm": 1.4002010822296143, - "learning_rate": 7.667519181585679e-06, - "loss": 0.192, - "step": 1368 - }, - { - "epoch": 0.23341236646508778, - "grad_norm": 1.4191194772720337, - "learning_rate": 7.665814151747655e-06, - "loss": 0.1895, - "step": 1369 - }, - { - "epoch": 0.23358286490662547, - "grad_norm": 0.6959466338157654, - "learning_rate": 7.664109121909635e-06, - "loss": 0.0942, - "step": 1370 - }, - { - "epoch": 0.23375336334816316, - "grad_norm": 0.9197830557823181, - "learning_rate": 7.662404092071611e-06, - "loss": 0.1313, - "step": 1371 - }, - { - "epoch": 0.23392386178970082, - "grad_norm": 0.7821164727210999, - "learning_rate": 7.66069906223359e-06, - "loss": 0.1471, - "step": 1372 - }, - { - "epoch": 0.2340943602312385, - "grad_norm": 0.7130285501480103, - "learning_rate": 7.658994032395569e-06, - "loss": 0.1401, - "step": 1373 - }, - { - "epoch": 0.2342648586727762, - "grad_norm": 1.06864333152771, - "learning_rate": 7.657289002557545e-06, - "loss": 0.2456, - "step": 1374 - }, - { - "epoch": 0.23443535711431387, - "grad_norm": 1.1891578435897827, - "learning_rate": 7.655583972719523e-06, - "loss": 0.1243, - "step": 1375 - }, - { - "epoch": 0.23460585555585156, - "grad_norm": 1.2399449348449707, - "learning_rate": 7.653878942881501e-06, - "loss": 0.1893, - "step": 1376 - }, - { - "epoch": 0.23477635399738925, - "grad_norm": 0.650579035282135, - "learning_rate": 7.652173913043479e-06, - "loss": 0.0773, - "step": 1377 - }, - { - "epoch": 0.23494685243892693, - "grad_norm": 0.7715778946876526, - "learning_rate": 7.650468883205457e-06, - "loss": 0.1176, - "step": 1378 - }, - { - "epoch": 0.23511735088046462, - "grad_norm": 0.9234492182731628, - "learning_rate": 7.648763853367435e-06, - "loss": 0.1242, - "step": 1379 - }, - { - "epoch": 0.23528784932200228, - "grad_norm": 0.6156173944473267, - "learning_rate": 7.647058823529411e-06, - "loss": 0.1881, - "step": 1380 - }, - { - "epoch": 0.23545834776353997, - "grad_norm": 0.7190359830856323, - "learning_rate": 7.64535379369139e-06, - "loss": 0.1079, - "step": 1381 - }, - { - "epoch": 0.23562884620507765, - "grad_norm": 0.9452102780342102, - "learning_rate": 7.643648763853369e-06, - "loss": 0.1618, - "step": 1382 - }, - { - "epoch": 0.23579934464661534, - "grad_norm": 0.6474363803863525, - "learning_rate": 7.641943734015345e-06, - "loss": 0.1629, - "step": 1383 - }, - { - "epoch": 0.23596984308815303, - "grad_norm": 1.203114628791809, - "learning_rate": 7.640238704177325e-06, - "loss": 0.1824, - "step": 1384 - }, - { - "epoch": 0.2361403415296907, - "grad_norm": 1.1076685190200806, - "learning_rate": 7.6385336743393e-06, - "loss": 0.1822, - "step": 1385 - }, - { - "epoch": 0.2363108399712284, - "grad_norm": 0.3934735655784607, - "learning_rate": 7.636828644501279e-06, - "loss": 0.0704, - "step": 1386 - }, - { - "epoch": 0.23648133841276608, - "grad_norm": 0.8384243845939636, - "learning_rate": 7.635123614663257e-06, - "loss": 0.1534, - "step": 1387 - }, - { - "epoch": 0.23665183685430374, - "grad_norm": 1.733253002166748, - "learning_rate": 7.633418584825235e-06, - "loss": 0.2937, - "step": 1388 - }, - { - "epoch": 0.23682233529584143, - "grad_norm": 0.605530321598053, - "learning_rate": 7.631713554987213e-06, - "loss": 0.0652, - "step": 1389 - }, - { - "epoch": 0.23699283373737912, - "grad_norm": 0.5830997228622437, - "learning_rate": 7.63000852514919e-06, - "loss": 0.1229, - "step": 1390 - }, - { - "epoch": 0.2371633321789168, - "grad_norm": 0.6826112866401672, - "learning_rate": 7.628303495311169e-06, - "loss": 0.1483, - "step": 1391 - }, - { - "epoch": 0.2373338306204545, - "grad_norm": 0.91994708776474, - "learning_rate": 7.6265984654731465e-06, - "loss": 0.1845, - "step": 1392 - }, - { - "epoch": 0.23750432906199218, - "grad_norm": 0.8456718921661377, - "learning_rate": 7.624893435635124e-06, - "loss": 0.1556, - "step": 1393 - }, - { - "epoch": 0.23767482750352986, - "grad_norm": 0.9300505518913269, - "learning_rate": 7.6231884057971015e-06, - "loss": 0.2081, - "step": 1394 - }, - { - "epoch": 0.23784532594506752, - "grad_norm": 0.6279098391532898, - "learning_rate": 7.62148337595908e-06, - "loss": 0.1495, - "step": 1395 - }, - { - "epoch": 0.2380158243866052, - "grad_norm": 0.5213572978973389, - "learning_rate": 7.619778346121057e-06, - "loss": 0.1036, - "step": 1396 - }, - { - "epoch": 0.2381863228281429, - "grad_norm": 0.6799693703651428, - "learning_rate": 7.618073316283035e-06, - "loss": 0.0962, - "step": 1397 - }, - { - "epoch": 0.23835682126968058, - "grad_norm": 0.8099979758262634, - "learning_rate": 7.616368286445014e-06, - "loss": 0.1187, - "step": 1398 - }, - { - "epoch": 0.23852731971121827, - "grad_norm": 0.7557656168937683, - "learning_rate": 7.614663256606991e-06, - "loss": 0.1806, - "step": 1399 - }, - { - "epoch": 0.23869781815275595, - "grad_norm": 0.753513514995575, - "learning_rate": 7.612958226768969e-06, - "loss": 0.0898, - "step": 1400 - }, - { - "epoch": 0.23869781815275595, - "eval_f1_score": 0.17363344051446947, - "eval_loss": 0.16346921026706696, - "eval_runtime": 182.6256, - "eval_samples_per_second": 54.757, - "eval_steps_per_second": 3.422, - "step": 1400 - }, - { - "epoch": 0.23886831659429364, - "grad_norm": 0.6202503442764282, - "learning_rate": 7.611253196930946e-06, - "loss": 0.1279, - "step": 1401 - }, - { - "epoch": 0.23903881503583133, - "grad_norm": 1.2577037811279297, - "learning_rate": 7.609548167092925e-06, - "loss": 0.1738, - "step": 1402 - }, - { - "epoch": 0.23920931347736898, - "grad_norm": 0.665626049041748, - "learning_rate": 7.607843137254902e-06, - "loss": 0.1537, - "step": 1403 - }, - { - "epoch": 0.23937981191890667, - "grad_norm": 0.6698447465896606, - "learning_rate": 7.60613810741688e-06, - "loss": 0.0781, - "step": 1404 - }, - { - "epoch": 0.23955031036044436, - "grad_norm": 0.7142729163169861, - "learning_rate": 7.604433077578858e-06, - "loss": 0.1481, - "step": 1405 - }, - { - "epoch": 0.23972080880198204, - "grad_norm": 0.6046807765960693, - "learning_rate": 7.602728047740836e-06, - "loss": 0.1471, - "step": 1406 - }, - { - "epoch": 0.23989130724351973, - "grad_norm": 0.7382557988166809, - "learning_rate": 7.601023017902814e-06, - "loss": 0.1355, - "step": 1407 - }, - { - "epoch": 0.24006180568505742, - "grad_norm": 0.6939229369163513, - "learning_rate": 7.599317988064792e-06, - "loss": 0.0534, - "step": 1408 - }, - { - "epoch": 0.2402323041265951, - "grad_norm": 1.3162113428115845, - "learning_rate": 7.59761295822677e-06, - "loss": 0.2284, - "step": 1409 - }, - { - "epoch": 0.24040280256813276, - "grad_norm": 0.7624379396438599, - "learning_rate": 7.595907928388747e-06, - "loss": 0.1293, - "step": 1410 - }, - { - "epoch": 0.24057330100967045, - "grad_norm": 0.676016628742218, - "learning_rate": 7.594202898550726e-06, - "loss": 0.1288, - "step": 1411 - }, - { - "epoch": 0.24074379945120813, - "grad_norm": 0.7912443280220032, - "learning_rate": 7.592497868712703e-06, - "loss": 0.0694, - "step": 1412 - }, - { - "epoch": 0.24091429789274582, - "grad_norm": 0.8490058183670044, - "learning_rate": 7.590792838874681e-06, - "loss": 0.0542, - "step": 1413 - }, - { - "epoch": 0.2410847963342835, - "grad_norm": 1.5905382633209229, - "learning_rate": 7.58908780903666e-06, - "loss": 0.2503, - "step": 1414 - }, - { - "epoch": 0.2412552947758212, - "grad_norm": 0.7132392525672913, - "learning_rate": 7.587382779198637e-06, - "loss": 0.0873, - "step": 1415 - }, - { - "epoch": 0.24142579321735888, - "grad_norm": 0.8989769220352173, - "learning_rate": 7.585677749360615e-06, - "loss": 0.1376, - "step": 1416 - }, - { - "epoch": 0.24159629165889657, - "grad_norm": 0.7588139772415161, - "learning_rate": 7.583972719522592e-06, - "loss": 0.0944, - "step": 1417 - }, - { - "epoch": 0.24176679010043423, - "grad_norm": 0.5211461186408997, - "learning_rate": 7.5822676896845706e-06, - "loss": 0.0748, - "step": 1418 - }, - { - "epoch": 0.2419372885419719, - "grad_norm": 0.6701415181159973, - "learning_rate": 7.580562659846548e-06, - "loss": 0.0785, - "step": 1419 - }, - { - "epoch": 0.2421077869835096, - "grad_norm": 1.8684077262878418, - "learning_rate": 7.578857630008526e-06, - "loss": 0.2471, - "step": 1420 - }, - { - "epoch": 0.24227828542504729, - "grad_norm": 0.7341625690460205, - "learning_rate": 7.577152600170503e-06, - "loss": 0.1161, - "step": 1421 - }, - { - "epoch": 0.24244878386658497, - "grad_norm": 0.9016053676605225, - "learning_rate": 7.5754475703324815e-06, - "loss": 0.1613, - "step": 1422 - }, - { - "epoch": 0.24261928230812266, - "grad_norm": 0.8441269993782043, - "learning_rate": 7.5737425404944595e-06, - "loss": 0.1145, - "step": 1423 - }, - { - "epoch": 0.24278978074966034, - "grad_norm": 0.6860020756721497, - "learning_rate": 7.572037510656437e-06, - "loss": 0.199, - "step": 1424 - }, - { - "epoch": 0.24296027919119803, - "grad_norm": 1.288138747215271, - "learning_rate": 7.570332480818415e-06, - "loss": 0.1958, - "step": 1425 - }, - { - "epoch": 0.2431307776327357, - "grad_norm": 1.4583251476287842, - "learning_rate": 7.5686274509803925e-06, - "loss": 0.2127, - "step": 1426 - }, - { - "epoch": 0.24330127607427338, - "grad_norm": 0.8146334886550903, - "learning_rate": 7.566922421142371e-06, - "loss": 0.1469, - "step": 1427 - }, - { - "epoch": 0.24347177451581106, - "grad_norm": 0.7682924866676331, - "learning_rate": 7.565217391304348e-06, - "loss": 0.1244, - "step": 1428 - }, - { - "epoch": 0.24364227295734875, - "grad_norm": 0.9778890609741211, - "learning_rate": 7.563512361466326e-06, - "loss": 0.0938, - "step": 1429 - }, - { - "epoch": 0.24381277139888644, - "grad_norm": 1.0876981019973755, - "learning_rate": 7.561807331628303e-06, - "loss": 0.2666, - "step": 1430 - }, - { - "epoch": 0.24398326984042412, - "grad_norm": 0.7767715454101562, - "learning_rate": 7.560102301790282e-06, - "loss": 0.1377, - "step": 1431 - }, - { - "epoch": 0.2441537682819618, - "grad_norm": 0.878875195980072, - "learning_rate": 7.55839727195226e-06, - "loss": 0.184, - "step": 1432 - }, - { - "epoch": 0.24432426672349947, - "grad_norm": 1.333276391029358, - "learning_rate": 7.556692242114237e-06, - "loss": 0.1676, - "step": 1433 - }, - { - "epoch": 0.24449476516503715, - "grad_norm": 0.685215413570404, - "learning_rate": 7.554987212276216e-06, - "loss": 0.1374, - "step": 1434 - }, - { - "epoch": 0.24466526360657484, - "grad_norm": 1.133496880531311, - "learning_rate": 7.553282182438193e-06, - "loss": 0.0851, - "step": 1435 - }, - { - "epoch": 0.24483576204811253, - "grad_norm": 0.6287438869476318, - "learning_rate": 7.551577152600171e-06, - "loss": 0.1637, - "step": 1436 - }, - { - "epoch": 0.2450062604896502, - "grad_norm": 0.9850641489028931, - "learning_rate": 7.549872122762148e-06, - "loss": 0.1658, - "step": 1437 - }, - { - "epoch": 0.2451767589311879, - "grad_norm": 0.7101774215698242, - "learning_rate": 7.548167092924127e-06, - "loss": 0.102, - "step": 1438 - }, - { - "epoch": 0.24534725737272559, - "grad_norm": 1.6058562994003296, - "learning_rate": 7.546462063086104e-06, - "loss": 0.2678, - "step": 1439 - }, - { - "epoch": 0.24551775581426327, - "grad_norm": 1.1810576915740967, - "learning_rate": 7.544757033248082e-06, - "loss": 0.2186, - "step": 1440 - }, - { - "epoch": 0.24568825425580093, - "grad_norm": 1.7627534866333008, - "learning_rate": 7.543052003410061e-06, - "loss": 0.311, - "step": 1441 - }, - { - "epoch": 0.24585875269733862, - "grad_norm": 1.5101150274276733, - "learning_rate": 7.541346973572038e-06, - "loss": 0.2104, - "step": 1442 - }, - { - "epoch": 0.2460292511388763, - "grad_norm": 0.5594928860664368, - "learning_rate": 7.539641943734016e-06, - "loss": 0.1275, - "step": 1443 - }, - { - "epoch": 0.246199749580414, - "grad_norm": 0.6583553552627563, - "learning_rate": 7.537936913895994e-06, - "loss": 0.1235, - "step": 1444 - }, - { - "epoch": 0.24637024802195168, - "grad_norm": 0.7115300893783569, - "learning_rate": 7.536231884057972e-06, - "loss": 0.1055, - "step": 1445 - }, - { - "epoch": 0.24654074646348936, - "grad_norm": 0.8568644523620605, - "learning_rate": 7.534526854219949e-06, - "loss": 0.0792, - "step": 1446 - }, - { - "epoch": 0.24671124490502705, - "grad_norm": 0.6825379133224487, - "learning_rate": 7.532821824381928e-06, - "loss": 0.1291, - "step": 1447 - }, - { - "epoch": 0.24688174334656474, - "grad_norm": 0.6188522577285767, - "learning_rate": 7.531116794543906e-06, - "loss": 0.094, - "step": 1448 - }, - { - "epoch": 0.2470522417881024, - "grad_norm": 1.4240697622299194, - "learning_rate": 7.529411764705883e-06, - "loss": 0.2407, - "step": 1449 - }, - { - "epoch": 0.24722274022964008, - "grad_norm": 0.8678609728813171, - "learning_rate": 7.5277067348678615e-06, - "loss": 0.1461, - "step": 1450 - }, - { - "epoch": 0.24739323867117777, - "grad_norm": 0.8159744143486023, - "learning_rate": 7.526001705029839e-06, - "loss": 0.1565, - "step": 1451 - }, - { - "epoch": 0.24756373711271545, - "grad_norm": 1.4507590532302856, - "learning_rate": 7.5242966751918166e-06, - "loss": 0.2225, - "step": 1452 - }, - { - "epoch": 0.24773423555425314, - "grad_norm": 0.7991291880607605, - "learning_rate": 7.522591645353794e-06, - "loss": 0.1174, - "step": 1453 - }, - { - "epoch": 0.24790473399579083, - "grad_norm": 0.8276837468147278, - "learning_rate": 7.5208866155157724e-06, - "loss": 0.0945, - "step": 1454 - }, - { - "epoch": 0.2480752324373285, - "grad_norm": 0.8337952494621277, - "learning_rate": 7.5191815856777495e-06, - "loss": 0.184, - "step": 1455 - }, - { - "epoch": 0.24824573087886617, - "grad_norm": 0.7352725863456726, - "learning_rate": 7.5174765558397275e-06, - "loss": 0.0979, - "step": 1456 - }, - { - "epoch": 0.24841622932040386, - "grad_norm": 0.8864614367485046, - "learning_rate": 7.515771526001706e-06, - "loss": 0.0645, - "step": 1457 - }, - { - "epoch": 0.24858672776194154, - "grad_norm": 0.9972084760665894, - "learning_rate": 7.514066496163683e-06, - "loss": 0.1962, - "step": 1458 - }, - { - "epoch": 0.24875722620347923, - "grad_norm": 1.0494168996810913, - "learning_rate": 7.512361466325661e-06, - "loss": 0.2017, - "step": 1459 - }, - { - "epoch": 0.24892772464501692, - "grad_norm": 0.9285470843315125, - "learning_rate": 7.510656436487639e-06, - "loss": 0.1574, - "step": 1460 - }, - { - "epoch": 0.2490982230865546, - "grad_norm": 0.5147951245307922, - "learning_rate": 7.508951406649617e-06, - "loss": 0.0829, - "step": 1461 - }, - { - "epoch": 0.2492687215280923, - "grad_norm": 0.733745813369751, - "learning_rate": 7.507246376811594e-06, - "loss": 0.0967, - "step": 1462 - }, - { - "epoch": 0.24943921996962998, - "grad_norm": 1.0238525867462158, - "learning_rate": 7.505541346973573e-06, - "loss": 0.1784, - "step": 1463 - }, - { - "epoch": 0.24960971841116764, - "grad_norm": 0.6515784859657288, - "learning_rate": 7.50383631713555e-06, - "loss": 0.1098, - "step": 1464 - }, - { - "epoch": 0.24978021685270532, - "grad_norm": 0.833389401435852, - "learning_rate": 7.502131287297528e-06, - "loss": 0.1741, - "step": 1465 - }, - { - "epoch": 0.249950715294243, - "grad_norm": 1.6986770629882812, - "learning_rate": 7.500426257459507e-06, - "loss": 0.2424, - "step": 1466 - }, - { - "epoch": 0.2501212137357807, - "grad_norm": 0.6558035016059875, - "learning_rate": 7.498721227621484e-06, - "loss": 0.0959, - "step": 1467 - }, - { - "epoch": 0.2502917121773184, - "grad_norm": 0.7692930102348328, - "learning_rate": 7.497016197783462e-06, - "loss": 0.1215, - "step": 1468 - }, - { - "epoch": 0.25046221061885604, - "grad_norm": 0.6363939046859741, - "learning_rate": 7.495311167945439e-06, - "loss": 0.1522, - "step": 1469 - }, - { - "epoch": 0.25063270906039375, - "grad_norm": 0.7761989831924438, - "learning_rate": 7.493606138107418e-06, - "loss": 0.1029, - "step": 1470 - }, - { - "epoch": 0.2508032075019314, - "grad_norm": 0.8327432870864868, - "learning_rate": 7.491901108269395e-06, - "loss": 0.1437, - "step": 1471 - }, - { - "epoch": 0.2509737059434691, - "grad_norm": 0.7670499682426453, - "learning_rate": 7.490196078431373e-06, - "loss": 0.1171, - "step": 1472 - }, - { - "epoch": 0.2511442043850068, - "grad_norm": 0.7924665808677673, - "learning_rate": 7.488491048593352e-06, - "loss": 0.1267, - "step": 1473 - }, - { - "epoch": 0.2513147028265445, - "grad_norm": 0.6221386790275574, - "learning_rate": 7.486786018755329e-06, - "loss": 0.1241, - "step": 1474 - }, - { - "epoch": 0.25148520126808216, - "grad_norm": 0.6074143648147583, - "learning_rate": 7.485080988917307e-06, - "loss": 0.0999, - "step": 1475 - }, - { - "epoch": 0.2516556997096198, - "grad_norm": 0.8128476738929749, - "learning_rate": 7.483375959079284e-06, - "loss": 0.1176, - "step": 1476 - }, - { - "epoch": 0.25182619815115753, - "grad_norm": 0.7725571393966675, - "learning_rate": 7.481670929241263e-06, - "loss": 0.156, - "step": 1477 - }, - { - "epoch": 0.2519966965926952, - "grad_norm": 0.8417410254478455, - "learning_rate": 7.47996589940324e-06, - "loss": 0.1673, - "step": 1478 - }, - { - "epoch": 0.2521671950342329, - "grad_norm": 1.1635167598724365, - "learning_rate": 7.478260869565218e-06, - "loss": 0.1718, - "step": 1479 - }, - { - "epoch": 0.25233769347577056, - "grad_norm": 1.0357716083526611, - "learning_rate": 7.476555839727196e-06, - "loss": 0.2007, - "step": 1480 - }, - { - "epoch": 0.2525081919173083, - "grad_norm": 2.6045241355895996, - "learning_rate": 7.474850809889174e-06, - "loss": 0.3113, - "step": 1481 - }, - { - "epoch": 0.25267869035884594, - "grad_norm": 0.8889693021774292, - "learning_rate": 7.473145780051152e-06, - "loss": 0.1581, - "step": 1482 - }, - { - "epoch": 0.2528491888003836, - "grad_norm": 0.6757845878601074, - "learning_rate": 7.4714407502131295e-06, - "loss": 0.1063, - "step": 1483 - }, - { - "epoch": 0.2530196872419213, - "grad_norm": 1.5937942266464233, - "learning_rate": 7.4697357203751075e-06, - "loss": 0.239, - "step": 1484 - }, - { - "epoch": 0.25319018568345897, - "grad_norm": 0.6575447916984558, - "learning_rate": 7.468030690537085e-06, - "loss": 0.126, - "step": 1485 - }, - { - "epoch": 0.2533606841249967, - "grad_norm": 0.8341320157051086, - "learning_rate": 7.466325660699063e-06, - "loss": 0.1399, - "step": 1486 - }, - { - "epoch": 0.25353118256653434, - "grad_norm": 0.6826508641242981, - "learning_rate": 7.4646206308610405e-06, - "loss": 0.1326, - "step": 1487 - }, - { - "epoch": 0.25370168100807206, - "grad_norm": 1.1577479839324951, - "learning_rate": 7.4629156010230184e-06, - "loss": 0.1091, - "step": 1488 - }, - { - "epoch": 0.2538721794496097, - "grad_norm": 0.8800094723701477, - "learning_rate": 7.4612105711849955e-06, - "loss": 0.1895, - "step": 1489 - }, - { - "epoch": 0.25404267789114743, - "grad_norm": 0.8499655723571777, - "learning_rate": 7.459505541346974e-06, - "loss": 0.1603, - "step": 1490 - }, - { - "epoch": 0.2542131763326851, - "grad_norm": 1.1757783889770508, - "learning_rate": 7.457800511508952e-06, - "loss": 0.1543, - "step": 1491 - }, - { - "epoch": 0.25438367477422275, - "grad_norm": 0.9781895279884338, - "learning_rate": 7.456095481670929e-06, - "loss": 0.1987, - "step": 1492 - }, - { - "epoch": 0.25455417321576046, - "grad_norm": 0.866536557674408, - "learning_rate": 7.454390451832908e-06, - "loss": 0.0631, - "step": 1493 - }, - { - "epoch": 0.2547246716572981, - "grad_norm": 0.7580909729003906, - "learning_rate": 7.452685421994885e-06, - "loss": 0.122, - "step": 1494 - }, - { - "epoch": 0.25489517009883583, - "grad_norm": 1.1645458936691284, - "learning_rate": 7.450980392156863e-06, - "loss": 0.2327, - "step": 1495 - }, - { - "epoch": 0.2550656685403735, - "grad_norm": 0.7042590975761414, - "learning_rate": 7.44927536231884e-06, - "loss": 0.1525, - "step": 1496 - }, - { - "epoch": 0.2552361669819112, - "grad_norm": 1.3674838542938232, - "learning_rate": 7.447570332480819e-06, - "loss": 0.2254, - "step": 1497 - }, - { - "epoch": 0.25540666542344886, - "grad_norm": 0.9225447773933411, - "learning_rate": 7.445865302642797e-06, - "loss": 0.1388, - "step": 1498 - }, - { - "epoch": 0.2555771638649865, - "grad_norm": 0.6369597911834717, - "learning_rate": 7.444160272804775e-06, - "loss": 0.129, - "step": 1499 - }, - { - "epoch": 0.25574766230652424, - "grad_norm": 0.8218846917152405, - "learning_rate": 7.442455242966753e-06, - "loss": 0.1463, - "step": 1500 - }, - { - "epoch": 0.25574766230652424, - "eval_f1_score": 0.24148606811145512, - "eval_loss": 0.1599266529083252, - "eval_runtime": 182.5904, - "eval_samples_per_second": 54.767, - "eval_steps_per_second": 3.423, - "step": 1500 - }, - { - "epoch": 0.2559181607480619, - "grad_norm": 0.6795699596405029, - "learning_rate": 7.44075021312873e-06, - "loss": 0.1336, - "step": 1501 - }, - { - "epoch": 0.2560886591895996, - "grad_norm": 1.0670921802520752, - "learning_rate": 7.439045183290709e-06, - "loss": 0.2411, - "step": 1502 - }, - { - "epoch": 0.25625915763113727, - "grad_norm": 1.0216519832611084, - "learning_rate": 7.437340153452686e-06, - "loss": 0.1929, - "step": 1503 - }, - { - "epoch": 0.256429656072675, - "grad_norm": 0.6687795519828796, - "learning_rate": 7.435635123614664e-06, - "loss": 0.1713, - "step": 1504 - }, - { - "epoch": 0.25660015451421264, - "grad_norm": 0.8873473405838013, - "learning_rate": 7.433930093776641e-06, - "loss": 0.1443, - "step": 1505 - }, - { - "epoch": 0.2567706529557503, - "grad_norm": 0.8436746001243591, - "learning_rate": 7.43222506393862e-06, - "loss": 0.102, - "step": 1506 - }, - { - "epoch": 0.256941151397288, - "grad_norm": 0.6887624263763428, - "learning_rate": 7.430520034100598e-06, - "loss": 0.1807, - "step": 1507 - }, - { - "epoch": 0.2571116498388257, - "grad_norm": 0.7564592361450195, - "learning_rate": 7.428815004262575e-06, - "loss": 0.0905, - "step": 1508 - }, - { - "epoch": 0.2572821482803634, - "grad_norm": 0.6453114151954651, - "learning_rate": 7.427109974424554e-06, - "loss": 0.0818, - "step": 1509 - }, - { - "epoch": 0.25745264672190105, - "grad_norm": 0.5713956356048584, - "learning_rate": 7.425404944586531e-06, - "loss": 0.1265, - "step": 1510 - }, - { - "epoch": 0.25762314516343876, - "grad_norm": 0.7340114712715149, - "learning_rate": 7.423699914748509e-06, - "loss": 0.1635, - "step": 1511 - }, - { - "epoch": 0.2577936436049764, - "grad_norm": 0.7583447694778442, - "learning_rate": 7.421994884910486e-06, - "loss": 0.1548, - "step": 1512 - }, - { - "epoch": 0.25796414204651413, - "grad_norm": 0.750053882598877, - "learning_rate": 7.420289855072465e-06, - "loss": 0.1527, - "step": 1513 - }, - { - "epoch": 0.2581346404880518, - "grad_norm": 1.1867377758026123, - "learning_rate": 7.418584825234442e-06, - "loss": 0.1923, - "step": 1514 - }, - { - "epoch": 0.25830513892958945, - "grad_norm": 1.0661702156066895, - "learning_rate": 7.41687979539642e-06, - "loss": 0.1561, - "step": 1515 - }, - { - "epoch": 0.25847563737112716, - "grad_norm": 0.6860876083374023, - "learning_rate": 7.4151747655583984e-06, - "loss": 0.0991, - "step": 1516 - }, - { - "epoch": 0.2586461358126648, - "grad_norm": 0.6717040538787842, - "learning_rate": 7.4134697357203755e-06, - "loss": 0.2006, - "step": 1517 - }, - { - "epoch": 0.25881663425420254, - "grad_norm": 1.3219722509384155, - "learning_rate": 7.4117647058823535e-06, - "loss": 0.2566, - "step": 1518 - }, - { - "epoch": 0.2589871326957402, - "grad_norm": 1.4991238117218018, - "learning_rate": 7.4100596760443314e-06, - "loss": 0.2157, - "step": 1519 - }, - { - "epoch": 0.2591576311372779, - "grad_norm": 0.6971268653869629, - "learning_rate": 7.408354646206309e-06, - "loss": 0.1123, - "step": 1520 - }, - { - "epoch": 0.25932812957881557, - "grad_norm": 0.9756630659103394, - "learning_rate": 7.4066496163682865e-06, - "loss": 0.1835, - "step": 1521 - }, - { - "epoch": 0.25949862802035323, - "grad_norm": 0.7816482782363892, - "learning_rate": 7.404944586530265e-06, - "loss": 0.1717, - "step": 1522 - }, - { - "epoch": 0.25966912646189094, - "grad_norm": 0.828657865524292, - "learning_rate": 7.403239556692243e-06, - "loss": 0.1442, - "step": 1523 - }, - { - "epoch": 0.2598396249034286, - "grad_norm": 0.9661919474601746, - "learning_rate": 7.40153452685422e-06, - "loss": 0.1028, - "step": 1524 - }, - { - "epoch": 0.2600101233449663, - "grad_norm": 0.9794785976409912, - "learning_rate": 7.399829497016199e-06, - "loss": 0.1285, - "step": 1525 - }, - { - "epoch": 0.260180621786504, - "grad_norm": 1.2873239517211914, - "learning_rate": 7.398124467178176e-06, - "loss": 0.0812, - "step": 1526 - }, - { - "epoch": 0.2603511202280417, - "grad_norm": 0.8300976753234863, - "learning_rate": 7.396419437340154e-06, - "loss": 0.1411, - "step": 1527 - }, - { - "epoch": 0.26052161866957935, - "grad_norm": 0.7613148093223572, - "learning_rate": 7.394714407502131e-06, - "loss": 0.1102, - "step": 1528 - }, - { - "epoch": 0.260692117111117, - "grad_norm": 0.8103572130203247, - "learning_rate": 7.39300937766411e-06, - "loss": 0.1837, - "step": 1529 - }, - { - "epoch": 0.2608626155526547, - "grad_norm": 0.6886932849884033, - "learning_rate": 7.391304347826087e-06, - "loss": 0.0954, - "step": 1530 - }, - { - "epoch": 0.2610331139941924, - "grad_norm": 1.1097701787948608, - "learning_rate": 7.389599317988065e-06, - "loss": 0.1631, - "step": 1531 - }, - { - "epoch": 0.2612036124357301, - "grad_norm": 1.2780195474624634, - "learning_rate": 7.387894288150044e-06, - "loss": 0.1674, - "step": 1532 - }, - { - "epoch": 0.26137411087726775, - "grad_norm": 0.9154688715934753, - "learning_rate": 7.386189258312021e-06, - "loss": 0.1251, - "step": 1533 - }, - { - "epoch": 0.26154460931880547, - "grad_norm": 1.2270097732543945, - "learning_rate": 7.384484228473999e-06, - "loss": 0.1837, - "step": 1534 - }, - { - "epoch": 0.2617151077603431, - "grad_norm": 0.664033055305481, - "learning_rate": 7.382779198635977e-06, - "loss": 0.0543, - "step": 1535 - }, - { - "epoch": 0.26188560620188084, - "grad_norm": 1.0188624858856201, - "learning_rate": 7.381074168797955e-06, - "loss": 0.1608, - "step": 1536 - }, - { - "epoch": 0.2620561046434185, - "grad_norm": 1.2438368797302246, - "learning_rate": 7.379369138959932e-06, - "loss": 0.1671, - "step": 1537 - }, - { - "epoch": 0.26222660308495616, - "grad_norm": 0.8407082557678223, - "learning_rate": 7.377664109121911e-06, - "loss": 0.0658, - "step": 1538 - }, - { - "epoch": 0.26239710152649387, - "grad_norm": 1.376527190208435, - "learning_rate": 7.375959079283888e-06, - "loss": 0.1673, - "step": 1539 - }, - { - "epoch": 0.26256759996803153, - "grad_norm": 0.7966179251670837, - "learning_rate": 7.374254049445866e-06, - "loss": 0.1, - "step": 1540 - }, - { - "epoch": 0.26273809840956924, - "grad_norm": 1.4758623838424683, - "learning_rate": 7.372549019607845e-06, - "loss": 0.2534, - "step": 1541 - }, - { - "epoch": 0.2629085968511069, - "grad_norm": 0.7056615352630615, - "learning_rate": 7.370843989769822e-06, - "loss": 0.1085, - "step": 1542 - }, - { - "epoch": 0.2630790952926446, - "grad_norm": 0.9349361062049866, - "learning_rate": 7.3691389599318e-06, - "loss": 0.1546, - "step": 1543 - }, - { - "epoch": 0.2632495937341823, - "grad_norm": 1.2983108758926392, - "learning_rate": 7.367433930093777e-06, - "loss": 0.086, - "step": 1544 - }, - { - "epoch": 0.26342009217571993, - "grad_norm": 0.9976209998130798, - "learning_rate": 7.3657289002557555e-06, - "loss": 0.0898, - "step": 1545 - }, - { - "epoch": 0.26359059061725765, - "grad_norm": 1.1741667985916138, - "learning_rate": 7.364023870417733e-06, - "loss": 0.1917, - "step": 1546 - }, - { - "epoch": 0.2637610890587953, - "grad_norm": 0.8395033478736877, - "learning_rate": 7.3623188405797106e-06, - "loss": 0.0972, - "step": 1547 - }, - { - "epoch": 0.263931587500333, - "grad_norm": 1.1209566593170166, - "learning_rate": 7.360613810741689e-06, - "loss": 0.1802, - "step": 1548 - }, - { - "epoch": 0.2641020859418707, - "grad_norm": 0.9700178503990173, - "learning_rate": 7.3589087809036665e-06, - "loss": 0.1734, - "step": 1549 - }, - { - "epoch": 0.2642725843834084, - "grad_norm": 0.7460111379623413, - "learning_rate": 7.357203751065644e-06, - "loss": 0.1348, - "step": 1550 - }, - { - "epoch": 0.26444308282494605, - "grad_norm": 0.4582483470439911, - "learning_rate": 7.3554987212276215e-06, - "loss": 0.0815, - "step": 1551 - }, - { - "epoch": 0.2646135812664837, - "grad_norm": 1.1477794647216797, - "learning_rate": 7.3537936913896e-06, - "loss": 0.1943, - "step": 1552 - }, - { - "epoch": 0.2647840797080214, - "grad_norm": 0.8271501660346985, - "learning_rate": 7.352088661551577e-06, - "loss": 0.1753, - "step": 1553 - }, - { - "epoch": 0.2649545781495591, - "grad_norm": 0.8639466762542725, - "learning_rate": 7.350383631713555e-06, - "loss": 0.1813, - "step": 1554 - }, - { - "epoch": 0.2651250765910968, - "grad_norm": 1.1666908264160156, - "learning_rate": 7.348678601875533e-06, - "loss": 0.1782, - "step": 1555 - }, - { - "epoch": 0.26529557503263446, - "grad_norm": 1.2111077308654785, - "learning_rate": 7.346973572037511e-06, - "loss": 0.2437, - "step": 1556 - }, - { - "epoch": 0.26546607347417217, - "grad_norm": 0.5697571635246277, - "learning_rate": 7.345268542199489e-06, - "loss": 0.1359, - "step": 1557 - }, - { - "epoch": 0.26563657191570983, - "grad_norm": 0.6316619515419006, - "learning_rate": 7.343563512361467e-06, - "loss": 0.1581, - "step": 1558 - }, - { - "epoch": 0.26580707035724754, - "grad_norm": 0.9178199768066406, - "learning_rate": 7.341858482523445e-06, - "loss": 0.1383, - "step": 1559 - }, - { - "epoch": 0.2659775687987852, - "grad_norm": 0.9397381544113159, - "learning_rate": 7.340153452685422e-06, - "loss": 0.1028, - "step": 1560 - }, - { - "epoch": 0.26614806724032286, - "grad_norm": 0.6357908248901367, - "learning_rate": 7.338448422847401e-06, - "loss": 0.1687, - "step": 1561 - }, - { - "epoch": 0.2663185656818606, - "grad_norm": 1.449703335762024, - "learning_rate": 7.336743393009378e-06, - "loss": 0.2689, - "step": 1562 - }, - { - "epoch": 0.26648906412339823, - "grad_norm": 1.1114320755004883, - "learning_rate": 7.335038363171356e-06, - "loss": 0.2566, - "step": 1563 - }, - { - "epoch": 0.26665956256493595, - "grad_norm": 0.7258211970329285, - "learning_rate": 7.333333333333333e-06, - "loss": 0.1485, - "step": 1564 - }, - { - "epoch": 0.2668300610064736, - "grad_norm": 0.7410526871681213, - "learning_rate": 7.331628303495312e-06, - "loss": 0.1826, - "step": 1565 - }, - { - "epoch": 0.2670005594480113, - "grad_norm": 0.691279411315918, - "learning_rate": 7.32992327365729e-06, - "loss": 0.174, - "step": 1566 - }, - { - "epoch": 0.267171057889549, - "grad_norm": 0.6794247031211853, - "learning_rate": 7.328218243819267e-06, - "loss": 0.1715, - "step": 1567 - }, - { - "epoch": 0.26734155633108664, - "grad_norm": 0.7400369048118591, - "learning_rate": 7.326513213981246e-06, - "loss": 0.106, - "step": 1568 - }, - { - "epoch": 0.26751205477262435, - "grad_norm": 0.5771182179450989, - "learning_rate": 7.324808184143223e-06, - "loss": 0.1487, - "step": 1569 - }, - { - "epoch": 0.267682553214162, - "grad_norm": 0.8204482197761536, - "learning_rate": 7.323103154305201e-06, - "loss": 0.1984, - "step": 1570 - }, - { - "epoch": 0.2678530516556997, - "grad_norm": 0.8229162693023682, - "learning_rate": 7.321398124467178e-06, - "loss": 0.2344, - "step": 1571 - }, - { - "epoch": 0.2680235500972374, - "grad_norm": 0.6249790191650391, - "learning_rate": 7.319693094629157e-06, - "loss": 0.1283, - "step": 1572 - }, - { - "epoch": 0.2681940485387751, - "grad_norm": 0.9039957523345947, - "learning_rate": 7.317988064791135e-06, - "loss": 0.1595, - "step": 1573 - }, - { - "epoch": 0.26836454698031276, - "grad_norm": 1.0393831729888916, - "learning_rate": 7.316283034953113e-06, - "loss": 0.2209, - "step": 1574 - }, - { - "epoch": 0.2685350454218504, - "grad_norm": 0.6935752034187317, - "learning_rate": 7.3145780051150906e-06, - "loss": 0.1117, - "step": 1575 - }, - { - "epoch": 0.26870554386338813, - "grad_norm": 0.6081529855728149, - "learning_rate": 7.312872975277068e-06, - "loss": 0.0929, - "step": 1576 - }, - { - "epoch": 0.2688760423049258, - "grad_norm": 0.6110521554946899, - "learning_rate": 7.3111679454390465e-06, - "loss": 0.1149, - "step": 1577 - }, - { - "epoch": 0.2690465407464635, - "grad_norm": 0.9071523547172546, - "learning_rate": 7.3094629156010236e-06, - "loss": 0.1966, - "step": 1578 - }, - { - "epoch": 0.26921703918800116, - "grad_norm": 0.7462790608406067, - "learning_rate": 7.3077578857630015e-06, - "loss": 0.0771, - "step": 1579 - }, - { - "epoch": 0.2693875376295389, - "grad_norm": 0.6232742071151733, - "learning_rate": 7.306052855924979e-06, - "loss": 0.1326, - "step": 1580 - }, - { - "epoch": 0.26955803607107653, - "grad_norm": 0.685059130191803, - "learning_rate": 7.304347826086957e-06, - "loss": 0.1197, - "step": 1581 - }, - { - "epoch": 0.2697285345126142, - "grad_norm": 0.9145820140838623, - "learning_rate": 7.302642796248935e-06, - "loss": 0.1311, - "step": 1582 - }, - { - "epoch": 0.2698990329541519, - "grad_norm": 1.031315803527832, - "learning_rate": 7.3009377664109125e-06, - "loss": 0.2123, - "step": 1583 - }, - { - "epoch": 0.27006953139568957, - "grad_norm": 1.0389862060546875, - "learning_rate": 7.299232736572891e-06, - "loss": 0.2036, - "step": 1584 - }, - { - "epoch": 0.2702400298372273, - "grad_norm": 1.0475060939788818, - "learning_rate": 7.297527706734868e-06, - "loss": 0.1878, - "step": 1585 - }, - { - "epoch": 0.27041052827876494, - "grad_norm": 0.5867076516151428, - "learning_rate": 7.295822676896846e-06, - "loss": 0.096, - "step": 1586 - }, - { - "epoch": 0.27058102672030265, - "grad_norm": 0.6274494528770447, - "learning_rate": 7.294117647058823e-06, - "loss": 0.1282, - "step": 1587 - }, - { - "epoch": 0.2707515251618403, - "grad_norm": 0.750312089920044, - "learning_rate": 7.292412617220802e-06, - "loss": 0.1011, - "step": 1588 - }, - { - "epoch": 0.270922023603378, - "grad_norm": 0.9816113710403442, - "learning_rate": 7.290707587382779e-06, - "loss": 0.0997, - "step": 1589 - }, - { - "epoch": 0.2710925220449157, - "grad_norm": 1.259264588356018, - "learning_rate": 7.289002557544757e-06, - "loss": 0.2381, - "step": 1590 - }, - { - "epoch": 0.27126302048645334, - "grad_norm": 0.7334678173065186, - "learning_rate": 7.287297527706736e-06, - "loss": 0.137, - "step": 1591 - }, - { - "epoch": 0.27143351892799106, - "grad_norm": 0.5797063112258911, - "learning_rate": 7.285592497868713e-06, - "loss": 0.101, - "step": 1592 - }, - { - "epoch": 0.2716040173695287, - "grad_norm": 0.707748532295227, - "learning_rate": 7.283887468030691e-06, - "loss": 0.1033, - "step": 1593 - }, - { - "epoch": 0.27177451581106643, - "grad_norm": 0.792129635810852, - "learning_rate": 7.282182438192669e-06, - "loss": 0.1575, - "step": 1594 - }, - { - "epoch": 0.2719450142526041, - "grad_norm": 0.7394453287124634, - "learning_rate": 7.280477408354647e-06, - "loss": 0.159, - "step": 1595 - }, - { - "epoch": 0.2721155126941418, - "grad_norm": 0.6427018642425537, - "learning_rate": 7.278772378516624e-06, - "loss": 0.114, - "step": 1596 - }, - { - "epoch": 0.27228601113567946, - "grad_norm": 0.8114389181137085, - "learning_rate": 7.277067348678603e-06, - "loss": 0.1574, - "step": 1597 - }, - { - "epoch": 0.2724565095772171, - "grad_norm": 0.5855715274810791, - "learning_rate": 7.27536231884058e-06, - "loss": 0.1004, - "step": 1598 - }, - { - "epoch": 0.27262700801875484, - "grad_norm": 1.1192022562026978, - "learning_rate": 7.273657289002558e-06, - "loss": 0.1619, - "step": 1599 - }, - { - "epoch": 0.2727975064602925, - "grad_norm": 0.8948620557785034, - "learning_rate": 7.271952259164537e-06, - "loss": 0.1634, - "step": 1600 - }, - { - "epoch": 0.2727975064602925, - "eval_f1_score": 0.2809917355371901, - "eval_loss": 0.16055278480052948, - "eval_runtime": 182.5791, - "eval_samples_per_second": 54.771, - "eval_steps_per_second": 3.423, - "step": 1600 - }, - { - "epoch": 0.2729680049018302, - "grad_norm": 0.995793879032135, - "learning_rate": 7.270247229326514e-06, - "loss": 0.0949, - "step": 1601 - }, - { - "epoch": 0.27313850334336787, - "grad_norm": 0.572679877281189, - "learning_rate": 7.268542199488492e-06, - "loss": 0.1324, - "step": 1602 - }, - { - "epoch": 0.2733090017849056, - "grad_norm": 1.449594497680664, - "learning_rate": 7.266837169650469e-06, - "loss": 0.1787, - "step": 1603 - }, - { - "epoch": 0.27347950022644324, - "grad_norm": 0.6399373412132263, - "learning_rate": 7.265132139812448e-06, - "loss": 0.1394, - "step": 1604 - }, - { - "epoch": 0.2736499986679809, - "grad_norm": 1.0330250263214111, - "learning_rate": 7.263427109974425e-06, - "loss": 0.111, - "step": 1605 - }, - { - "epoch": 0.2738204971095186, - "grad_norm": 0.699213981628418, - "learning_rate": 7.261722080136403e-06, - "loss": 0.1409, - "step": 1606 - }, - { - "epoch": 0.27399099555105627, - "grad_norm": 0.8001547455787659, - "learning_rate": 7.2600170502983815e-06, - "loss": 0.1732, - "step": 1607 - }, - { - "epoch": 0.274161493992594, - "grad_norm": 1.5362522602081299, - "learning_rate": 7.258312020460359e-06, - "loss": 0.1623, - "step": 1608 - }, - { - "epoch": 0.27433199243413164, - "grad_norm": 0.6935368776321411, - "learning_rate": 7.2566069906223366e-06, - "loss": 0.1215, - "step": 1609 - }, - { - "epoch": 0.27450249087566936, - "grad_norm": 0.9737477898597717, - "learning_rate": 7.2549019607843145e-06, - "loss": 0.1726, - "step": 1610 - }, - { - "epoch": 0.274672989317207, - "grad_norm": 1.0231482982635498, - "learning_rate": 7.2531969309462924e-06, - "loss": 0.1836, - "step": 1611 - }, - { - "epoch": 0.27484348775874473, - "grad_norm": 0.7062311172485352, - "learning_rate": 7.2514919011082696e-06, - "loss": 0.1083, - "step": 1612 - }, - { - "epoch": 0.2750139862002824, - "grad_norm": 0.964033842086792, - "learning_rate": 7.249786871270248e-06, - "loss": 0.2435, - "step": 1613 - }, - { - "epoch": 0.27518448464182005, - "grad_norm": 0.7868807911872864, - "learning_rate": 7.2480818414322254e-06, - "loss": 0.181, - "step": 1614 - }, - { - "epoch": 0.27535498308335776, - "grad_norm": 0.9056547284126282, - "learning_rate": 7.246376811594203e-06, - "loss": 0.1895, - "step": 1615 - }, - { - "epoch": 0.2755254815248954, - "grad_norm": 1.1099858283996582, - "learning_rate": 7.244671781756182e-06, - "loss": 0.173, - "step": 1616 - }, - { - "epoch": 0.27569597996643314, - "grad_norm": 1.1758391857147217, - "learning_rate": 7.242966751918159e-06, - "loss": 0.1648, - "step": 1617 - }, - { - "epoch": 0.2758664784079708, - "grad_norm": 0.8921638131141663, - "learning_rate": 7.241261722080137e-06, - "loss": 0.1423, - "step": 1618 - }, - { - "epoch": 0.2760369768495085, - "grad_norm": 0.8928386569023132, - "learning_rate": 7.239556692242114e-06, - "loss": 0.192, - "step": 1619 - }, - { - "epoch": 0.27620747529104617, - "grad_norm": 0.9611541628837585, - "learning_rate": 7.237851662404093e-06, - "loss": 0.1305, - "step": 1620 - }, - { - "epoch": 0.2763779737325838, - "grad_norm": 0.8896896839141846, - "learning_rate": 7.23614663256607e-06, - "loss": 0.1177, - "step": 1621 - }, - { - "epoch": 0.27654847217412154, - "grad_norm": 0.7848691940307617, - "learning_rate": 7.234441602728048e-06, - "loss": 0.1609, - "step": 1622 - }, - { - "epoch": 0.2767189706156592, - "grad_norm": 0.5970712304115295, - "learning_rate": 7.232736572890025e-06, - "loss": 0.0916, - "step": 1623 - }, - { - "epoch": 0.2768894690571969, - "grad_norm": 0.9145472645759583, - "learning_rate": 7.231031543052004e-06, - "loss": 0.1655, - "step": 1624 - }, - { - "epoch": 0.27705996749873457, - "grad_norm": 0.7860545516014099, - "learning_rate": 7.229326513213982e-06, - "loss": 0.1545, - "step": 1625 - }, - { - "epoch": 0.2772304659402723, - "grad_norm": 0.5441620349884033, - "learning_rate": 7.227621483375959e-06, - "loss": 0.0995, - "step": 1626 - }, - { - "epoch": 0.27740096438180994, - "grad_norm": 1.0052058696746826, - "learning_rate": 7.225916453537938e-06, - "loss": 0.2069, - "step": 1627 - }, - { - "epoch": 0.2775714628233476, - "grad_norm": 1.0887932777404785, - "learning_rate": 7.224211423699915e-06, - "loss": 0.1708, - "step": 1628 - }, - { - "epoch": 0.2777419612648853, - "grad_norm": 0.9987654685974121, - "learning_rate": 7.222506393861893e-06, - "loss": 0.1747, - "step": 1629 - }, - { - "epoch": 0.277912459706423, - "grad_norm": 1.6356117725372314, - "learning_rate": 7.220801364023871e-06, - "loss": 0.296, - "step": 1630 - }, - { - "epoch": 0.2780829581479607, - "grad_norm": 1.5601258277893066, - "learning_rate": 7.219096334185849e-06, - "loss": 0.2036, - "step": 1631 - }, - { - "epoch": 0.27825345658949835, - "grad_norm": 0.47777241468429565, - "learning_rate": 7.217391304347827e-06, - "loss": 0.1113, - "step": 1632 - }, - { - "epoch": 0.27842395503103606, - "grad_norm": 0.7547958493232727, - "learning_rate": 7.215686274509805e-06, - "loss": 0.1498, - "step": 1633 - }, - { - "epoch": 0.2785944534725737, - "grad_norm": 0.6811045408248901, - "learning_rate": 7.213981244671783e-06, - "loss": 0.1155, - "step": 1634 - }, - { - "epoch": 0.27876495191411144, - "grad_norm": 0.8681180477142334, - "learning_rate": 7.21227621483376e-06, - "loss": 0.1752, - "step": 1635 - }, - { - "epoch": 0.2789354503556491, - "grad_norm": 0.6963757276535034, - "learning_rate": 7.210571184995739e-06, - "loss": 0.1981, - "step": 1636 - }, - { - "epoch": 0.27910594879718675, - "grad_norm": 0.6927238702774048, - "learning_rate": 7.208866155157716e-06, - "loss": 0.1345, - "step": 1637 - }, - { - "epoch": 0.27927644723872447, - "grad_norm": 0.7065186500549316, - "learning_rate": 7.207161125319694e-06, - "loss": 0.1286, - "step": 1638 - }, - { - "epoch": 0.2794469456802621, - "grad_norm": 0.6613034605979919, - "learning_rate": 7.205456095481671e-06, - "loss": 0.1637, - "step": 1639 - }, - { - "epoch": 0.27961744412179984, - "grad_norm": 0.7331064343452454, - "learning_rate": 7.2037510656436495e-06, - "loss": 0.1949, - "step": 1640 - }, - { - "epoch": 0.2797879425633375, - "grad_norm": 0.7222153544425964, - "learning_rate": 7.2020460358056275e-06, - "loss": 0.0943, - "step": 1641 - }, - { - "epoch": 0.2799584410048752, - "grad_norm": 0.8640736937522888, - "learning_rate": 7.200341005967605e-06, - "loss": 0.1327, - "step": 1642 - }, - { - "epoch": 0.2801289394464129, - "grad_norm": 0.7286602854728699, - "learning_rate": 7.198635976129583e-06, - "loss": 0.1188, - "step": 1643 - }, - { - "epoch": 0.28029943788795053, - "grad_norm": 1.4019544124603271, - "learning_rate": 7.1969309462915605e-06, - "loss": 0.1201, - "step": 1644 - }, - { - "epoch": 0.28046993632948825, - "grad_norm": 0.6155040860176086, - "learning_rate": 7.1952259164535384e-06, - "loss": 0.1034, - "step": 1645 - }, - { - "epoch": 0.2806404347710259, - "grad_norm": 0.6862358450889587, - "learning_rate": 7.1935208866155155e-06, - "loss": 0.0757, - "step": 1646 - }, - { - "epoch": 0.2808109332125636, - "grad_norm": 0.4926810562610626, - "learning_rate": 7.191815856777494e-06, - "loss": 0.0643, - "step": 1647 - }, - { - "epoch": 0.2809814316541013, - "grad_norm": 0.5891478061676025, - "learning_rate": 7.1901108269394714e-06, - "loss": 0.0608, - "step": 1648 - }, - { - "epoch": 0.281151930095639, - "grad_norm": 0.7541805505752563, - "learning_rate": 7.18840579710145e-06, - "loss": 0.1001, - "step": 1649 - }, - { - "epoch": 0.28132242853717665, - "grad_norm": 0.7641288638114929, - "learning_rate": 7.186700767263428e-06, - "loss": 0.156, - "step": 1650 - }, - { - "epoch": 0.2814929269787143, - "grad_norm": 1.1662489175796509, - "learning_rate": 7.184995737425405e-06, - "loss": 0.1361, - "step": 1651 - }, - { - "epoch": 0.281663425420252, - "grad_norm": 1.0508551597595215, - "learning_rate": 7.183290707587384e-06, - "loss": 0.1872, - "step": 1652 - }, - { - "epoch": 0.2818339238617897, - "grad_norm": 0.7512367963790894, - "learning_rate": 7.181585677749361e-06, - "loss": 0.1021, - "step": 1653 - }, - { - "epoch": 0.2820044223033274, - "grad_norm": 0.6517552137374878, - "learning_rate": 7.179880647911339e-06, - "loss": 0.0645, - "step": 1654 - }, - { - "epoch": 0.28217492074486505, - "grad_norm": 0.8589548468589783, - "learning_rate": 7.178175618073316e-06, - "loss": 0.2049, - "step": 1655 - }, - { - "epoch": 0.28234541918640277, - "grad_norm": 0.8293384313583374, - "learning_rate": 7.176470588235295e-06, - "loss": 0.1309, - "step": 1656 - }, - { - "epoch": 0.2825159176279404, - "grad_norm": 1.2787649631500244, - "learning_rate": 7.174765558397273e-06, - "loss": 0.1629, - "step": 1657 - }, - { - "epoch": 0.28268641606947814, - "grad_norm": 0.6129804849624634, - "learning_rate": 7.17306052855925e-06, - "loss": 0.1121, - "step": 1658 - }, - { - "epoch": 0.2828569145110158, - "grad_norm": 0.8967618346214294, - "learning_rate": 7.171355498721229e-06, - "loss": 0.1106, - "step": 1659 - }, - { - "epoch": 0.28302741295255346, - "grad_norm": 0.7329991459846497, - "learning_rate": 7.169650468883206e-06, - "loss": 0.0529, - "step": 1660 - }, - { - "epoch": 0.2831979113940912, - "grad_norm": 0.9802336096763611, - "learning_rate": 7.167945439045184e-06, - "loss": 0.0964, - "step": 1661 - }, - { - "epoch": 0.28336840983562883, - "grad_norm": 2.165433168411255, - "learning_rate": 7.166240409207161e-06, - "loss": 0.1989, - "step": 1662 - }, - { - "epoch": 0.28353890827716655, - "grad_norm": 0.9615960717201233, - "learning_rate": 7.16453537936914e-06, - "loss": 0.1118, - "step": 1663 - }, - { - "epoch": 0.2837094067187042, - "grad_norm": 0.830369234085083, - "learning_rate": 7.162830349531117e-06, - "loss": 0.0781, - "step": 1664 - }, - { - "epoch": 0.2838799051602419, - "grad_norm": 1.2290384769439697, - "learning_rate": 7.161125319693095e-06, - "loss": 0.1, - "step": 1665 - }, - { - "epoch": 0.2840504036017796, - "grad_norm": 0.7927495837211609, - "learning_rate": 7.159420289855074e-06, - "loss": 0.0979, - "step": 1666 - }, - { - "epoch": 0.28422090204331724, - "grad_norm": 1.4657909870147705, - "learning_rate": 7.157715260017051e-06, - "loss": 0.1313, - "step": 1667 - }, - { - "epoch": 0.28439140048485495, - "grad_norm": 1.055486798286438, - "learning_rate": 7.156010230179029e-06, - "loss": 0.0692, - "step": 1668 - }, - { - "epoch": 0.2845618989263926, - "grad_norm": 1.2817178964614868, - "learning_rate": 7.154305200341007e-06, - "loss": 0.1194, - "step": 1669 - }, - { - "epoch": 0.2847323973679303, - "grad_norm": 1.1928097009658813, - "learning_rate": 7.152600170502985e-06, - "loss": 0.1265, - "step": 1670 - }, - { - "epoch": 0.284902895809468, - "grad_norm": 1.5519382953643799, - "learning_rate": 7.150895140664962e-06, - "loss": 0.1602, - "step": 1671 - }, - { - "epoch": 0.2850733942510057, - "grad_norm": 1.305989384651184, - "learning_rate": 7.1491901108269405e-06, - "loss": 0.2005, - "step": 1672 - }, - { - "epoch": 0.28524389269254335, - "grad_norm": 1.3667876720428467, - "learning_rate": 7.147485080988918e-06, - "loss": 0.212, - "step": 1673 - }, - { - "epoch": 0.285414391134081, - "grad_norm": 1.4790457487106323, - "learning_rate": 7.1457800511508955e-06, - "loss": 0.2204, - "step": 1674 - }, - { - "epoch": 0.28558488957561873, - "grad_norm": 1.683897852897644, - "learning_rate": 7.144075021312874e-06, - "loss": 0.1859, - "step": 1675 - }, - { - "epoch": 0.2857553880171564, - "grad_norm": 0.9878926873207092, - "learning_rate": 7.1423699914748514e-06, - "loss": 0.1382, - "step": 1676 - }, - { - "epoch": 0.2859258864586941, - "grad_norm": 1.0337659120559692, - "learning_rate": 7.140664961636829e-06, - "loss": 0.0803, - "step": 1677 - }, - { - "epoch": 0.28609638490023176, - "grad_norm": 1.0796700716018677, - "learning_rate": 7.1389599317988065e-06, - "loss": 0.0952, - "step": 1678 - }, - { - "epoch": 0.2862668833417695, - "grad_norm": 1.966192364692688, - "learning_rate": 7.137254901960785e-06, - "loss": 0.2293, - "step": 1679 - }, - { - "epoch": 0.28643738178330713, - "grad_norm": 0.7053760290145874, - "learning_rate": 7.135549872122762e-06, - "loss": 0.1088, - "step": 1680 - }, - { - "epoch": 0.28660788022484485, - "grad_norm": 1.5122227668762207, - "learning_rate": 7.13384484228474e-06, - "loss": 0.198, - "step": 1681 - }, - { - "epoch": 0.2867783786663825, - "grad_norm": 0.47033724188804626, - "learning_rate": 7.132139812446719e-06, - "loss": 0.0445, - "step": 1682 - }, - { - "epoch": 0.28694887710792016, - "grad_norm": 0.885513961315155, - "learning_rate": 7.130434782608696e-06, - "loss": 0.0858, - "step": 1683 - }, - { - "epoch": 0.2871193755494579, - "grad_norm": 1.2802006006240845, - "learning_rate": 7.128729752770674e-06, - "loss": 0.1765, - "step": 1684 - }, - { - "epoch": 0.28728987399099554, - "grad_norm": 0.9488986730575562, - "learning_rate": 7.127024722932651e-06, - "loss": 0.1583, - "step": 1685 - }, - { - "epoch": 0.28746037243253325, - "grad_norm": 1.2158293724060059, - "learning_rate": 7.12531969309463e-06, - "loss": 0.1831, - "step": 1686 - }, - { - "epoch": 0.2876308708740709, - "grad_norm": 1.6682029962539673, - "learning_rate": 7.123614663256607e-06, - "loss": 0.234, - "step": 1687 - }, - { - "epoch": 0.2878013693156086, - "grad_norm": 0.6801013350486755, - "learning_rate": 7.121909633418586e-06, - "loss": 0.1266, - "step": 1688 - }, - { - "epoch": 0.2879718677571463, - "grad_norm": 0.8549294471740723, - "learning_rate": 7.120204603580563e-06, - "loss": 0.2075, - "step": 1689 - }, - { - "epoch": 0.28814236619868394, - "grad_norm": 0.9746972918510437, - "learning_rate": 7.118499573742541e-06, - "loss": 0.136, - "step": 1690 - }, - { - "epoch": 0.28831286464022166, - "grad_norm": 0.7878130674362183, - "learning_rate": 7.11679454390452e-06, - "loss": 0.1134, - "step": 1691 - }, - { - "epoch": 0.2884833630817593, - "grad_norm": 0.9971168041229248, - "learning_rate": 7.115089514066497e-06, - "loss": 0.1872, - "step": 1692 - }, - { - "epoch": 0.28865386152329703, - "grad_norm": 0.8819569945335388, - "learning_rate": 7.113384484228475e-06, - "loss": 0.107, - "step": 1693 - }, - { - "epoch": 0.2888243599648347, - "grad_norm": 1.1124303340911865, - "learning_rate": 7.111679454390452e-06, - "loss": 0.2033, - "step": 1694 - }, - { - "epoch": 0.2889948584063724, - "grad_norm": 0.7073795199394226, - "learning_rate": 7.109974424552431e-06, - "loss": 0.0919, - "step": 1695 - }, - { - "epoch": 0.28916535684791006, - "grad_norm": 1.197323203086853, - "learning_rate": 7.108269394714408e-06, - "loss": 0.1908, - "step": 1696 - }, - { - "epoch": 0.2893358552894477, - "grad_norm": 0.7450862526893616, - "learning_rate": 7.106564364876386e-06, - "loss": 0.1421, - "step": 1697 - }, - { - "epoch": 0.28950635373098543, - "grad_norm": 1.0751428604125977, - "learning_rate": 7.104859335038363e-06, - "loss": 0.1508, - "step": 1698 - }, - { - "epoch": 0.2896768521725231, - "grad_norm": 0.6413664817810059, - "learning_rate": 7.103154305200342e-06, - "loss": 0.1564, - "step": 1699 - }, - { - "epoch": 0.2898473506140608, - "grad_norm": 0.7627037167549133, - "learning_rate": 7.10144927536232e-06, - "loss": 0.1049, - "step": 1700 - }, - { - "epoch": 0.2898473506140608, - "eval_f1_score": 0.2712765957446808, - "eval_loss": 0.16056498885154724, - "eval_runtime": 182.6358, - "eval_samples_per_second": 54.754, - "eval_steps_per_second": 3.422, - "step": 1700 - }, - { - "epoch": 0.29001784905559846, - "grad_norm": 0.7326638102531433, - "learning_rate": 7.099744245524297e-06, - "loss": 0.0934, - "step": 1701 - }, - { - "epoch": 0.2901883474971362, - "grad_norm": 0.8272231817245483, - "learning_rate": 7.0980392156862755e-06, - "loss": 0.1346, - "step": 1702 - }, - { - "epoch": 0.29035884593867384, - "grad_norm": 0.7498846054077148, - "learning_rate": 7.096334185848253e-06, - "loss": 0.1581, - "step": 1703 - }, - { - "epoch": 0.29052934438021155, - "grad_norm": 0.7257694602012634, - "learning_rate": 7.0946291560102306e-06, - "loss": 0.1084, - "step": 1704 - }, - { - "epoch": 0.2906998428217492, - "grad_norm": 0.9842420220375061, - "learning_rate": 7.0929241261722085e-06, - "loss": 0.079, - "step": 1705 - }, - { - "epoch": 0.29087034126328687, - "grad_norm": 0.729982852935791, - "learning_rate": 7.0912190963341865e-06, - "loss": 0.1044, - "step": 1706 - }, - { - "epoch": 0.2910408397048246, - "grad_norm": 1.0497548580169678, - "learning_rate": 7.089514066496164e-06, - "loss": 0.2153, - "step": 1707 - }, - { - "epoch": 0.29121133814636224, - "grad_norm": 0.7500994801521301, - "learning_rate": 7.087809036658142e-06, - "loss": 0.0815, - "step": 1708 - }, - { - "epoch": 0.29138183658789996, - "grad_norm": 0.6701683402061462, - "learning_rate": 7.08610400682012e-06, - "loss": 0.0694, - "step": 1709 - }, - { - "epoch": 0.2915523350294376, - "grad_norm": 1.0692698955535889, - "learning_rate": 7.084398976982097e-06, - "loss": 0.1516, - "step": 1710 - }, - { - "epoch": 0.29172283347097533, - "grad_norm": 1.1487675905227661, - "learning_rate": 7.082693947144076e-06, - "loss": 0.1954, - "step": 1711 - }, - { - "epoch": 0.291893331912513, - "grad_norm": 1.0295099020004272, - "learning_rate": 7.080988917306053e-06, - "loss": 0.1305, - "step": 1712 - }, - { - "epoch": 0.29206383035405065, - "grad_norm": 1.4119102954864502, - "learning_rate": 7.079283887468031e-06, - "loss": 0.1735, - "step": 1713 - }, - { - "epoch": 0.29223432879558836, - "grad_norm": 1.2772480249404907, - "learning_rate": 7.077578857630008e-06, - "loss": 0.1499, - "step": 1714 - }, - { - "epoch": 0.292404827237126, - "grad_norm": 1.1898237466812134, - "learning_rate": 7.075873827791987e-06, - "loss": 0.1384, - "step": 1715 - }, - { - "epoch": 0.29257532567866373, - "grad_norm": 1.3872730731964111, - "learning_rate": 7.074168797953965e-06, - "loss": 0.1784, - "step": 1716 - }, - { - "epoch": 0.2927458241202014, - "grad_norm": 0.9282299280166626, - "learning_rate": 7.072463768115942e-06, - "loss": 0.1353, - "step": 1717 - }, - { - "epoch": 0.2929163225617391, - "grad_norm": 1.004987359046936, - "learning_rate": 7.070758738277921e-06, - "loss": 0.0914, - "step": 1718 - }, - { - "epoch": 0.29308682100327677, - "grad_norm": 0.8638032674789429, - "learning_rate": 7.069053708439898e-06, - "loss": 0.1113, - "step": 1719 - }, - { - "epoch": 0.2932573194448144, - "grad_norm": 0.935568630695343, - "learning_rate": 7.067348678601876e-06, - "loss": 0.1775, - "step": 1720 - }, - { - "epoch": 0.29342781788635214, - "grad_norm": 0.967147171497345, - "learning_rate": 7.065643648763853e-06, - "loss": 0.2255, - "step": 1721 - }, - { - "epoch": 0.2935983163278898, - "grad_norm": 1.0089436769485474, - "learning_rate": 7.063938618925832e-06, - "loss": 0.0984, - "step": 1722 - }, - { - "epoch": 0.2937688147694275, - "grad_norm": 1.053886890411377, - "learning_rate": 7.062233589087809e-06, - "loss": 0.1922, - "step": 1723 - }, - { - "epoch": 0.29393931321096517, - "grad_norm": 1.6038528680801392, - "learning_rate": 7.060528559249788e-06, - "loss": 0.1198, - "step": 1724 - }, - { - "epoch": 0.2941098116525029, - "grad_norm": 0.8823066353797913, - "learning_rate": 7.058823529411766e-06, - "loss": 0.1262, - "step": 1725 - }, - { - "epoch": 0.29428031009404054, - "grad_norm": 1.1137727499008179, - "learning_rate": 7.057118499573743e-06, - "loss": 0.1021, - "step": 1726 - }, - { - "epoch": 0.29445080853557826, - "grad_norm": 0.8181190490722656, - "learning_rate": 7.055413469735722e-06, - "loss": 0.1207, - "step": 1727 - }, - { - "epoch": 0.2946213069771159, - "grad_norm": 1.24038565158844, - "learning_rate": 7.053708439897699e-06, - "loss": 0.1683, - "step": 1728 - }, - { - "epoch": 0.2947918054186536, - "grad_norm": 0.7535143494606018, - "learning_rate": 7.052003410059677e-06, - "loss": 0.128, - "step": 1729 - }, - { - "epoch": 0.2949623038601913, - "grad_norm": 0.8402575254440308, - "learning_rate": 7.050298380221654e-06, - "loss": 0.163, - "step": 1730 - }, - { - "epoch": 0.29513280230172895, - "grad_norm": 1.4767165184020996, - "learning_rate": 7.048593350383633e-06, - "loss": 0.2026, - "step": 1731 - }, - { - "epoch": 0.29530330074326666, - "grad_norm": 0.7595881223678589, - "learning_rate": 7.0468883205456106e-06, - "loss": 0.1165, - "step": 1732 - }, - { - "epoch": 0.2954737991848043, - "grad_norm": 0.8625977039337158, - "learning_rate": 7.045183290707588e-06, - "loss": 0.166, - "step": 1733 - }, - { - "epoch": 0.29564429762634203, - "grad_norm": 0.5962086915969849, - "learning_rate": 7.0434782608695665e-06, - "loss": 0.1195, - "step": 1734 - }, - { - "epoch": 0.2958147960678797, - "grad_norm": 0.7212527394294739, - "learning_rate": 7.0417732310315436e-06, - "loss": 0.1048, - "step": 1735 - }, - { - "epoch": 0.29598529450941735, - "grad_norm": 1.1094732284545898, - "learning_rate": 7.0400682011935215e-06, - "loss": 0.1589, - "step": 1736 - }, - { - "epoch": 0.29615579295095507, - "grad_norm": 0.724922239780426, - "learning_rate": 7.038363171355499e-06, - "loss": 0.0816, - "step": 1737 - }, - { - "epoch": 0.2963262913924927, - "grad_norm": 0.8069830536842346, - "learning_rate": 7.036658141517477e-06, - "loss": 0.0891, - "step": 1738 - }, - { - "epoch": 0.29649678983403044, - "grad_norm": 0.9070818424224854, - "learning_rate": 7.0349531116794545e-06, - "loss": 0.0875, - "step": 1739 - }, - { - "epoch": 0.2966672882755681, - "grad_norm": 0.9026073813438416, - "learning_rate": 7.0332480818414325e-06, - "loss": 0.1249, - "step": 1740 - }, - { - "epoch": 0.2968377867171058, - "grad_norm": 0.7167872190475464, - "learning_rate": 7.031543052003411e-06, - "loss": 0.1137, - "step": 1741 - }, - { - "epoch": 0.29700828515864347, - "grad_norm": 0.648733913898468, - "learning_rate": 7.029838022165388e-06, - "loss": 0.0784, - "step": 1742 - }, - { - "epoch": 0.29717878360018113, - "grad_norm": 1.9221540689468384, - "learning_rate": 7.028132992327366e-06, - "loss": 0.2363, - "step": 1743 - }, - { - "epoch": 0.29734928204171884, - "grad_norm": 1.255143404006958, - "learning_rate": 7.026427962489344e-06, - "loss": 0.1992, - "step": 1744 - }, - { - "epoch": 0.2975197804832565, - "grad_norm": 0.926605224609375, - "learning_rate": 7.024722932651322e-06, - "loss": 0.1603, - "step": 1745 - }, - { - "epoch": 0.2976902789247942, - "grad_norm": 1.1837265491485596, - "learning_rate": 7.023017902813299e-06, - "loss": 0.2193, - "step": 1746 - }, - { - "epoch": 0.2978607773663319, - "grad_norm": 0.9852341413497925, - "learning_rate": 7.021312872975278e-06, - "loss": 0.1684, - "step": 1747 - }, - { - "epoch": 0.2980312758078696, - "grad_norm": 0.7624542713165283, - "learning_rate": 7.019607843137255e-06, - "loss": 0.0737, - "step": 1748 - }, - { - "epoch": 0.29820177424940725, - "grad_norm": 0.9837128520011902, - "learning_rate": 7.017902813299233e-06, - "loss": 0.1486, - "step": 1749 - }, - { - "epoch": 0.29837227269094496, - "grad_norm": 0.7164487242698669, - "learning_rate": 7.016197783461212e-06, - "loss": 0.1192, - "step": 1750 - }, - { - "epoch": 0.2985427711324826, - "grad_norm": 1.325129747390747, - "learning_rate": 7.014492753623189e-06, - "loss": 0.1297, - "step": 1751 - }, - { - "epoch": 0.2987132695740203, - "grad_norm": 0.8222716450691223, - "learning_rate": 7.012787723785167e-06, - "loss": 0.1492, - "step": 1752 - }, - { - "epoch": 0.298883768015558, - "grad_norm": 1.1504167318344116, - "learning_rate": 7.011082693947144e-06, - "loss": 0.1816, - "step": 1753 - }, - { - "epoch": 0.29905426645709565, - "grad_norm": 0.7564553618431091, - "learning_rate": 7.009377664109123e-06, - "loss": 0.0947, - "step": 1754 - }, - { - "epoch": 0.29922476489863337, - "grad_norm": 1.0340969562530518, - "learning_rate": 7.0076726342711e-06, - "loss": 0.1252, - "step": 1755 - }, - { - "epoch": 0.299395263340171, - "grad_norm": 0.937289297580719, - "learning_rate": 7.005967604433078e-06, - "loss": 0.1858, - "step": 1756 - }, - { - "epoch": 0.29956576178170874, - "grad_norm": 1.3442150354385376, - "learning_rate": 7.004262574595057e-06, - "loss": 0.2558, - "step": 1757 - }, - { - "epoch": 0.2997362602232464, - "grad_norm": 1.5121102333068848, - "learning_rate": 7.002557544757034e-06, - "loss": 0.183, - "step": 1758 - }, - { - "epoch": 0.29990675866478406, - "grad_norm": 0.8529810905456543, - "learning_rate": 7.000852514919012e-06, - "loss": 0.1801, - "step": 1759 - }, - { - "epoch": 0.30007725710632177, - "grad_norm": 1.0886412858963013, - "learning_rate": 6.999147485080989e-06, - "loss": 0.1705, - "step": 1760 - }, - { - "epoch": 0.30024775554785943, - "grad_norm": 1.0613352060317993, - "learning_rate": 6.997442455242968e-06, - "loss": 0.1219, - "step": 1761 - }, - { - "epoch": 0.30041825398939714, - "grad_norm": 0.9400555491447449, - "learning_rate": 6.995737425404945e-06, - "loss": 0.1675, - "step": 1762 - }, - { - "epoch": 0.3005887524309348, - "grad_norm": 1.252254843711853, - "learning_rate": 6.9940323955669236e-06, - "loss": 0.1409, - "step": 1763 - }, - { - "epoch": 0.3007592508724725, - "grad_norm": 0.9437319040298462, - "learning_rate": 6.992327365728901e-06, - "loss": 0.104, - "step": 1764 - }, - { - "epoch": 0.3009297493140102, - "grad_norm": 0.9890301823616028, - "learning_rate": 6.990622335890879e-06, - "loss": 0.1972, - "step": 1765 - }, - { - "epoch": 0.30110024775554783, - "grad_norm": 0.8138511776924133, - "learning_rate": 6.988917306052857e-06, - "loss": 0.1536, - "step": 1766 - }, - { - "epoch": 0.30127074619708555, - "grad_norm": 0.9302570223808289, - "learning_rate": 6.9872122762148345e-06, - "loss": 0.0714, - "step": 1767 - }, - { - "epoch": 0.3014412446386232, - "grad_norm": 0.696306586265564, - "learning_rate": 6.9855072463768125e-06, - "loss": 0.0719, - "step": 1768 - }, - { - "epoch": 0.3016117430801609, - "grad_norm": 0.8247181177139282, - "learning_rate": 6.9838022165387896e-06, - "loss": 0.1142, - "step": 1769 - }, - { - "epoch": 0.3017822415216986, - "grad_norm": 0.5882242918014526, - "learning_rate": 6.982097186700768e-06, - "loss": 0.1016, - "step": 1770 - }, - { - "epoch": 0.3019527399632363, - "grad_norm": 0.8626717925071716, - "learning_rate": 6.9803921568627454e-06, - "loss": 0.1068, - "step": 1771 - }, - { - "epoch": 0.30212323840477395, - "grad_norm": 1.3977041244506836, - "learning_rate": 6.978687127024723e-06, - "loss": 0.1926, - "step": 1772 - }, - { - "epoch": 0.3022937368463116, - "grad_norm": 1.3635149002075195, - "learning_rate": 6.9769820971867005e-06, - "loss": 0.1599, - "step": 1773 - }, - { - "epoch": 0.3024642352878493, - "grad_norm": 0.7197402119636536, - "learning_rate": 6.975277067348679e-06, - "loss": 0.1477, - "step": 1774 - }, - { - "epoch": 0.302634733729387, - "grad_norm": 1.0707645416259766, - "learning_rate": 6.973572037510657e-06, - "loss": 0.1519, - "step": 1775 - }, - { - "epoch": 0.3028052321709247, - "grad_norm": 0.89174884557724, - "learning_rate": 6.971867007672634e-06, - "loss": 0.1014, - "step": 1776 - }, - { - "epoch": 0.30297573061246236, - "grad_norm": 0.7598327994346619, - "learning_rate": 6.970161977834613e-06, - "loss": 0.1475, - "step": 1777 - }, - { - "epoch": 0.30314622905400007, - "grad_norm": 0.6671323180198669, - "learning_rate": 6.96845694799659e-06, - "loss": 0.0418, - "step": 1778 - }, - { - "epoch": 0.30331672749553773, - "grad_norm": 0.7658970355987549, - "learning_rate": 6.966751918158568e-06, - "loss": 0.1006, - "step": 1779 - }, - { - "epoch": 0.30348722593707544, - "grad_norm": 0.9763972759246826, - "learning_rate": 6.965046888320546e-06, - "loss": 0.1675, - "step": 1780 - }, - { - "epoch": 0.3036577243786131, - "grad_norm": 2.200497627258301, - "learning_rate": 6.963341858482524e-06, - "loss": 0.2854, - "step": 1781 - }, - { - "epoch": 0.30382822282015076, - "grad_norm": 1.3654463291168213, - "learning_rate": 6.961636828644501e-06, - "loss": 0.1426, - "step": 1782 - }, - { - "epoch": 0.3039987212616885, - "grad_norm": 1.0287854671478271, - "learning_rate": 6.95993179880648e-06, - "loss": 0.193, - "step": 1783 - }, - { - "epoch": 0.30416921970322613, - "grad_norm": 0.8132631182670593, - "learning_rate": 6.958226768968458e-06, - "loss": 0.1091, - "step": 1784 - }, - { - "epoch": 0.30433971814476385, - "grad_norm": 0.9241107702255249, - "learning_rate": 6.956521739130435e-06, - "loss": 0.1505, - "step": 1785 - }, - { - "epoch": 0.3045102165863015, - "grad_norm": 1.1344634294509888, - "learning_rate": 6.954816709292414e-06, - "loss": 0.1651, - "step": 1786 - }, - { - "epoch": 0.3046807150278392, - "grad_norm": 1.2102185487747192, - "learning_rate": 6.953111679454391e-06, - "loss": 0.1135, - "step": 1787 - }, - { - "epoch": 0.3048512134693769, - "grad_norm": 0.7724695205688477, - "learning_rate": 6.951406649616369e-06, - "loss": 0.1177, - "step": 1788 - }, - { - "epoch": 0.30502171191091454, - "grad_norm": 1.012211561203003, - "learning_rate": 6.949701619778346e-06, - "loss": 0.1938, - "step": 1789 - }, - { - "epoch": 0.30519221035245225, - "grad_norm": 1.2018117904663086, - "learning_rate": 6.947996589940325e-06, - "loss": 0.1775, - "step": 1790 - }, - { - "epoch": 0.3053627087939899, - "grad_norm": 0.951012909412384, - "learning_rate": 6.946291560102303e-06, - "loss": 0.1552, - "step": 1791 - }, - { - "epoch": 0.3055332072355276, - "grad_norm": 0.7436768412590027, - "learning_rate": 6.94458653026428e-06, - "loss": 0.094, - "step": 1792 - }, - { - "epoch": 0.3057037056770653, - "grad_norm": 1.8450157642364502, - "learning_rate": 6.942881500426259e-06, - "loss": 0.2932, - "step": 1793 - }, - { - "epoch": 0.305874204118603, - "grad_norm": 0.6989912390708923, - "learning_rate": 6.941176470588236e-06, - "loss": 0.1193, - "step": 1794 - }, - { - "epoch": 0.30604470256014066, - "grad_norm": 0.9824447631835938, - "learning_rate": 6.939471440750214e-06, - "loss": 0.1727, - "step": 1795 - }, - { - "epoch": 0.3062152010016783, - "grad_norm": 1.169551968574524, - "learning_rate": 6.937766410912191e-06, - "loss": 0.0808, - "step": 1796 - }, - { - "epoch": 0.30638569944321603, - "grad_norm": 0.6870118379592896, - "learning_rate": 6.9360613810741695e-06, - "loss": 0.1052, - "step": 1797 - }, - { - "epoch": 0.3065561978847537, - "grad_norm": 1.383569598197937, - "learning_rate": 6.934356351236147e-06, - "loss": 0.2392, - "step": 1798 - }, - { - "epoch": 0.3067266963262914, - "grad_norm": 0.6766565442085266, - "learning_rate": 6.9326513213981254e-06, - "loss": 0.097, - "step": 1799 - }, - { - "epoch": 0.30689719476782906, - "grad_norm": 0.6242276430130005, - "learning_rate": 6.930946291560103e-06, - "loss": 0.1012, - "step": 1800 - }, - { - "epoch": 0.30689719476782906, - "eval_f1_score": 0.2803234501347709, - "eval_loss": 0.15971286594867706, - "eval_runtime": 182.6862, - "eval_samples_per_second": 54.739, - "eval_steps_per_second": 3.421, - "step": 1800 - }, - { - "epoch": 0.3070676932093668, - "grad_norm": 1.0924694538116455, - "learning_rate": 6.9292412617220805e-06, - "loss": 0.1936, - "step": 1801 - }, - { - "epoch": 0.30723819165090444, - "grad_norm": 0.8069597482681274, - "learning_rate": 6.927536231884059e-06, - "loss": 0.1299, - "step": 1802 - }, - { - "epoch": 0.30740869009244215, - "grad_norm": 1.007934331893921, - "learning_rate": 6.925831202046036e-06, - "loss": 0.1719, - "step": 1803 - }, - { - "epoch": 0.3075791885339798, - "grad_norm": 1.0395991802215576, - "learning_rate": 6.924126172208014e-06, - "loss": 0.1675, - "step": 1804 - }, - { - "epoch": 0.30774968697551747, - "grad_norm": 0.7160507440567017, - "learning_rate": 6.9224211423699914e-06, - "loss": 0.1178, - "step": 1805 - }, - { - "epoch": 0.3079201854170552, - "grad_norm": 1.3453173637390137, - "learning_rate": 6.92071611253197e-06, - "loss": 0.197, - "step": 1806 - }, - { - "epoch": 0.30809068385859284, - "grad_norm": 0.869848906993866, - "learning_rate": 6.919011082693947e-06, - "loss": 0.1259, - "step": 1807 - }, - { - "epoch": 0.30826118230013055, - "grad_norm": 0.8316013813018799, - "learning_rate": 6.917306052855925e-06, - "loss": 0.1555, - "step": 1808 - }, - { - "epoch": 0.3084316807416682, - "grad_norm": 0.6854284405708313, - "learning_rate": 6.915601023017904e-06, - "loss": 0.1546, - "step": 1809 - }, - { - "epoch": 0.3086021791832059, - "grad_norm": 0.9202719330787659, - "learning_rate": 6.913895993179881e-06, - "loss": 0.1263, - "step": 1810 - }, - { - "epoch": 0.3087726776247436, - "grad_norm": 0.8600966334342957, - "learning_rate": 6.912190963341859e-06, - "loss": 0.2094, - "step": 1811 - }, - { - "epoch": 0.30894317606628124, - "grad_norm": 1.3879214525222778, - "learning_rate": 6.910485933503836e-06, - "loss": 0.1503, - "step": 1812 - }, - { - "epoch": 0.30911367450781896, - "grad_norm": 0.8720242977142334, - "learning_rate": 6.908780903665815e-06, - "loss": 0.1256, - "step": 1813 - }, - { - "epoch": 0.3092841729493566, - "grad_norm": 1.0565900802612305, - "learning_rate": 6.907075873827792e-06, - "loss": 0.1797, - "step": 1814 - }, - { - "epoch": 0.30945467139089433, - "grad_norm": 1.1120818853378296, - "learning_rate": 6.90537084398977e-06, - "loss": 0.1902, - "step": 1815 - }, - { - "epoch": 0.309625169832432, - "grad_norm": 0.8991826772689819, - "learning_rate": 6.903665814151749e-06, - "loss": 0.1553, - "step": 1816 - }, - { - "epoch": 0.3097956682739697, - "grad_norm": 1.149214744567871, - "learning_rate": 6.901960784313726e-06, - "loss": 0.2201, - "step": 1817 - }, - { - "epoch": 0.30996616671550736, - "grad_norm": 0.6388124823570251, - "learning_rate": 6.900255754475704e-06, - "loss": 0.1307, - "step": 1818 - }, - { - "epoch": 0.310136665157045, - "grad_norm": 0.8748459219932556, - "learning_rate": 6.898550724637682e-06, - "loss": 0.1085, - "step": 1819 - }, - { - "epoch": 0.31030716359858274, - "grad_norm": 1.3581770658493042, - "learning_rate": 6.89684569479966e-06, - "loss": 0.2259, - "step": 1820 - }, - { - "epoch": 0.3104776620401204, - "grad_norm": 0.7230364084243774, - "learning_rate": 6.895140664961637e-06, - "loss": 0.0964, - "step": 1821 - }, - { - "epoch": 0.3106481604816581, - "grad_norm": 1.595178246498108, - "learning_rate": 6.893435635123616e-06, - "loss": 0.242, - "step": 1822 - }, - { - "epoch": 0.31081865892319577, - "grad_norm": 0.9053245782852173, - "learning_rate": 6.891730605285593e-06, - "loss": 0.2075, - "step": 1823 - }, - { - "epoch": 0.3109891573647335, - "grad_norm": 0.7452926635742188, - "learning_rate": 6.890025575447571e-06, - "loss": 0.1048, - "step": 1824 - }, - { - "epoch": 0.31115965580627114, - "grad_norm": 0.8311702013015747, - "learning_rate": 6.8883205456095495e-06, - "loss": 0.1312, - "step": 1825 - }, - { - "epoch": 0.31133015424780885, - "grad_norm": 0.9205570220947266, - "learning_rate": 6.886615515771527e-06, - "loss": 0.1585, - "step": 1826 - }, - { - "epoch": 0.3115006526893465, - "grad_norm": 0.9653121829032898, - "learning_rate": 6.884910485933505e-06, - "loss": 0.199, - "step": 1827 - }, - { - "epoch": 0.31167115113088417, - "grad_norm": 0.8807269930839539, - "learning_rate": 6.883205456095482e-06, - "loss": 0.1618, - "step": 1828 - }, - { - "epoch": 0.3118416495724219, - "grad_norm": 0.7480918169021606, - "learning_rate": 6.8815004262574605e-06, - "loss": 0.1095, - "step": 1829 - }, - { - "epoch": 0.31201214801395954, - "grad_norm": 1.1662970781326294, - "learning_rate": 6.879795396419438e-06, - "loss": 0.2175, - "step": 1830 - }, - { - "epoch": 0.31218264645549726, - "grad_norm": 0.8109918832778931, - "learning_rate": 6.8780903665814155e-06, - "loss": 0.1532, - "step": 1831 - }, - { - "epoch": 0.3123531448970349, - "grad_norm": 0.766344428062439, - "learning_rate": 6.876385336743393e-06, - "loss": 0.1499, - "step": 1832 - }, - { - "epoch": 0.31252364333857263, - "grad_norm": 1.1723487377166748, - "learning_rate": 6.8746803069053714e-06, - "loss": 0.1621, - "step": 1833 - }, - { - "epoch": 0.3126941417801103, - "grad_norm": 0.7435732483863831, - "learning_rate": 6.872975277067349e-06, - "loss": 0.0834, - "step": 1834 - }, - { - "epoch": 0.31286464022164795, - "grad_norm": 0.9418601393699646, - "learning_rate": 6.8712702472293265e-06, - "loss": 0.1322, - "step": 1835 - }, - { - "epoch": 0.31303513866318566, - "grad_norm": 0.6800971031188965, - "learning_rate": 6.869565217391305e-06, - "loss": 0.1264, - "step": 1836 - }, - { - "epoch": 0.3132056371047233, - "grad_norm": 0.7308107018470764, - "learning_rate": 6.867860187553282e-06, - "loss": 0.1478, - "step": 1837 - }, - { - "epoch": 0.31337613554626104, - "grad_norm": 0.9065027236938477, - "learning_rate": 6.866155157715261e-06, - "loss": 0.1384, - "step": 1838 - }, - { - "epoch": 0.3135466339877987, - "grad_norm": 1.243708610534668, - "learning_rate": 6.864450127877238e-06, - "loss": 0.1833, - "step": 1839 - }, - { - "epoch": 0.3137171324293364, - "grad_norm": 1.047222375869751, - "learning_rate": 6.862745098039216e-06, - "loss": 0.1548, - "step": 1840 - }, - { - "epoch": 0.31388763087087407, - "grad_norm": 0.6964305639266968, - "learning_rate": 6.861040068201195e-06, - "loss": 0.1444, - "step": 1841 - }, - { - "epoch": 0.3140581293124117, - "grad_norm": 1.2023725509643555, - "learning_rate": 6.859335038363172e-06, - "loss": 0.1943, - "step": 1842 - }, - { - "epoch": 0.31422862775394944, - "grad_norm": 1.2627588510513306, - "learning_rate": 6.85763000852515e-06, - "loss": 0.1828, - "step": 1843 - }, - { - "epoch": 0.3143991261954871, - "grad_norm": 0.8266808986663818, - "learning_rate": 6.855924978687127e-06, - "loss": 0.1081, - "step": 1844 - }, - { - "epoch": 0.3145696246370248, - "grad_norm": 0.79570472240448, - "learning_rate": 6.854219948849106e-06, - "loss": 0.1366, - "step": 1845 - }, - { - "epoch": 0.3147401230785625, - "grad_norm": 1.0187424421310425, - "learning_rate": 6.852514919011083e-06, - "loss": 0.1525, - "step": 1846 - }, - { - "epoch": 0.3149106215201002, - "grad_norm": 0.7972794771194458, - "learning_rate": 6.850809889173061e-06, - "loss": 0.1098, - "step": 1847 - }, - { - "epoch": 0.31508111996163785, - "grad_norm": 1.424127459526062, - "learning_rate": 6.849104859335038e-06, - "loss": 0.1915, - "step": 1848 - }, - { - "epoch": 0.31525161840317556, - "grad_norm": 1.0600439310073853, - "learning_rate": 6.847399829497017e-06, - "loss": 0.1273, - "step": 1849 - }, - { - "epoch": 0.3154221168447132, - "grad_norm": 0.8016909956932068, - "learning_rate": 6.845694799658995e-06, - "loss": 0.0958, - "step": 1850 - }, - { - "epoch": 0.3155926152862509, - "grad_norm": 0.7285593748092651, - "learning_rate": 6.843989769820972e-06, - "loss": 0.1351, - "step": 1851 - }, - { - "epoch": 0.3157631137277886, - "grad_norm": 1.0121415853500366, - "learning_rate": 6.842284739982951e-06, - "loss": 0.0566, - "step": 1852 - }, - { - "epoch": 0.31593361216932625, - "grad_norm": 0.782200038433075, - "learning_rate": 6.840579710144928e-06, - "loss": 0.1084, - "step": 1853 - }, - { - "epoch": 0.31610411061086396, - "grad_norm": 0.8730593323707581, - "learning_rate": 6.838874680306906e-06, - "loss": 0.1147, - "step": 1854 - }, - { - "epoch": 0.3162746090524016, - "grad_norm": 1.0359622240066528, - "learning_rate": 6.837169650468884e-06, - "loss": 0.1731, - "step": 1855 - }, - { - "epoch": 0.31644510749393934, - "grad_norm": 1.381600260734558, - "learning_rate": 6.835464620630862e-06, - "loss": 0.154, - "step": 1856 - }, - { - "epoch": 0.316615605935477, - "grad_norm": 1.3901880979537964, - "learning_rate": 6.833759590792839e-06, - "loss": 0.1854, - "step": 1857 - }, - { - "epoch": 0.31678610437701465, - "grad_norm": 2.308821678161621, - "learning_rate": 6.832054560954818e-06, - "loss": 0.1926, - "step": 1858 - }, - { - "epoch": 0.31695660281855237, - "grad_norm": 2.024123191833496, - "learning_rate": 6.8303495311167955e-06, - "loss": 0.244, - "step": 1859 - }, - { - "epoch": 0.31712710126009, - "grad_norm": 0.6854466795921326, - "learning_rate": 6.828644501278773e-06, - "loss": 0.0982, - "step": 1860 - }, - { - "epoch": 0.31729759970162774, - "grad_norm": 0.8933399319648743, - "learning_rate": 6.826939471440751e-06, - "loss": 0.0826, - "step": 1861 - }, - { - "epoch": 0.3174680981431654, - "grad_norm": 0.8886541128158569, - "learning_rate": 6.8252344416027285e-06, - "loss": 0.1203, - "step": 1862 - }, - { - "epoch": 0.3176385965847031, - "grad_norm": 0.7370439171791077, - "learning_rate": 6.8235294117647065e-06, - "loss": 0.0999, - "step": 1863 - }, - { - "epoch": 0.3178090950262408, - "grad_norm": 1.627724289894104, - "learning_rate": 6.8218243819266836e-06, - "loss": 0.239, - "step": 1864 - }, - { - "epoch": 0.31797959346777843, - "grad_norm": 0.888409435749054, - "learning_rate": 6.820119352088662e-06, - "loss": 0.1712, - "step": 1865 - }, - { - "epoch": 0.31815009190931615, - "grad_norm": 0.9039790034294128, - "learning_rate": 6.81841432225064e-06, - "loss": 0.1022, - "step": 1866 - }, - { - "epoch": 0.3183205903508538, - "grad_norm": 0.6640911102294922, - "learning_rate": 6.816709292412617e-06, - "loss": 0.1023, - "step": 1867 - }, - { - "epoch": 0.3184910887923915, - "grad_norm": 1.0628663301467896, - "learning_rate": 6.815004262574596e-06, - "loss": 0.161, - "step": 1868 - }, - { - "epoch": 0.3186615872339292, - "grad_norm": 0.6437575221061707, - "learning_rate": 6.813299232736573e-06, - "loss": 0.0882, - "step": 1869 - }, - { - "epoch": 0.3188320856754669, - "grad_norm": 0.8309326171875, - "learning_rate": 6.811594202898551e-06, - "loss": 0.1695, - "step": 1870 - }, - { - "epoch": 0.31900258411700455, - "grad_norm": 0.7946399450302124, - "learning_rate": 6.809889173060528e-06, - "loss": 0.1474, - "step": 1871 - }, - { - "epoch": 0.31917308255854226, - "grad_norm": 0.7174097895622253, - "learning_rate": 6.808184143222507e-06, - "loss": 0.0963, - "step": 1872 - }, - { - "epoch": 0.3193435810000799, - "grad_norm": 0.7954621315002441, - "learning_rate": 6.806479113384484e-06, - "loss": 0.1349, - "step": 1873 - }, - { - "epoch": 0.3195140794416176, - "grad_norm": 0.6577284336090088, - "learning_rate": 6.804774083546463e-06, - "loss": 0.1079, - "step": 1874 - }, - { - "epoch": 0.3196845778831553, - "grad_norm": 1.0862046480178833, - "learning_rate": 6.803069053708441e-06, - "loss": 0.2176, - "step": 1875 - }, - { - "epoch": 0.31985507632469296, - "grad_norm": 0.7450264096260071, - "learning_rate": 6.801364023870418e-06, - "loss": 0.0668, - "step": 1876 - }, - { - "epoch": 0.32002557476623067, - "grad_norm": 0.9908052682876587, - "learning_rate": 6.799658994032397e-06, - "loss": 0.1696, - "step": 1877 - }, - { - "epoch": 0.32019607320776833, - "grad_norm": 1.015461802482605, - "learning_rate": 6.797953964194374e-06, - "loss": 0.1509, - "step": 1878 - }, - { - "epoch": 0.32036657164930604, - "grad_norm": 0.8305402398109436, - "learning_rate": 6.796248934356352e-06, - "loss": 0.0678, - "step": 1879 - }, - { - "epoch": 0.3205370700908437, - "grad_norm": 1.0898061990737915, - "learning_rate": 6.794543904518329e-06, - "loss": 0.1734, - "step": 1880 - }, - { - "epoch": 0.32070756853238136, - "grad_norm": 0.6495380997657776, - "learning_rate": 6.792838874680308e-06, - "loss": 0.1133, - "step": 1881 - }, - { - "epoch": 0.3208780669739191, - "grad_norm": 0.9107251167297363, - "learning_rate": 6.791133844842285e-06, - "loss": 0.1741, - "step": 1882 - }, - { - "epoch": 0.32104856541545673, - "grad_norm": 0.8304046392440796, - "learning_rate": 6.789428815004263e-06, - "loss": 0.0817, - "step": 1883 - }, - { - "epoch": 0.32121906385699445, - "grad_norm": 0.6666496396064758, - "learning_rate": 6.787723785166242e-06, - "loss": 0.0445, - "step": 1884 - }, - { - "epoch": 0.3213895622985321, - "grad_norm": 1.2774522304534912, - "learning_rate": 6.786018755328219e-06, - "loss": 0.2335, - "step": 1885 - }, - { - "epoch": 0.3215600607400698, - "grad_norm": 1.08737051486969, - "learning_rate": 6.784313725490197e-06, - "loss": 0.1862, - "step": 1886 - }, - { - "epoch": 0.3217305591816075, - "grad_norm": 1.0256638526916504, - "learning_rate": 6.782608695652174e-06, - "loss": 0.1821, - "step": 1887 - }, - { - "epoch": 0.32190105762314514, - "grad_norm": 1.390364646911621, - "learning_rate": 6.780903665814153e-06, - "loss": 0.186, - "step": 1888 - }, - { - "epoch": 0.32207155606468285, - "grad_norm": 0.5867147445678711, - "learning_rate": 6.77919863597613e-06, - "loss": 0.0891, - "step": 1889 - }, - { - "epoch": 0.3222420545062205, - "grad_norm": 0.7561818957328796, - "learning_rate": 6.777493606138108e-06, - "loss": 0.1058, - "step": 1890 - }, - { - "epoch": 0.3224125529477582, - "grad_norm": 0.6854589581489563, - "learning_rate": 6.7757885763000865e-06, - "loss": 0.1171, - "step": 1891 - }, - { - "epoch": 0.3225830513892959, - "grad_norm": 1.1057910919189453, - "learning_rate": 6.7740835464620636e-06, - "loss": 0.1567, - "step": 1892 - }, - { - "epoch": 0.3227535498308336, - "grad_norm": 1.0578477382659912, - "learning_rate": 6.7723785166240415e-06, - "loss": 0.1911, - "step": 1893 - }, - { - "epoch": 0.32292404827237126, - "grad_norm": 1.0057955980300903, - "learning_rate": 6.7706734867860195e-06, - "loss": 0.0877, - "step": 1894 - }, - { - "epoch": 0.32309454671390897, - "grad_norm": 1.0721625089645386, - "learning_rate": 6.768968456947997e-06, - "loss": 0.2047, - "step": 1895 - }, - { - "epoch": 0.32326504515544663, - "grad_norm": 0.8911730647087097, - "learning_rate": 6.7672634271099745e-06, - "loss": 0.1215, - "step": 1896 - }, - { - "epoch": 0.3234355435969843, - "grad_norm": 0.9344785809516907, - "learning_rate": 6.765558397271953e-06, - "loss": 0.1038, - "step": 1897 - }, - { - "epoch": 0.323606042038522, - "grad_norm": 0.890182375907898, - "learning_rate": 6.76385336743393e-06, - "loss": 0.1262, - "step": 1898 - }, - { - "epoch": 0.32377654048005966, - "grad_norm": 1.6066832542419434, - "learning_rate": 6.762148337595908e-06, - "loss": 0.2593, - "step": 1899 - }, - { - "epoch": 0.3239470389215974, - "grad_norm": 0.9894545078277588, - "learning_rate": 6.760443307757887e-06, - "loss": 0.1459, - "step": 1900 - }, - { - "epoch": 0.3239470389215974, - "eval_f1_score": 0.3062200956937799, - "eval_loss": 0.155869260430336, - "eval_runtime": 182.6505, - "eval_samples_per_second": 54.749, - "eval_steps_per_second": 3.422, - "step": 1900 - }, - { - "epoch": 0.32411753736313503, - "grad_norm": 0.7483901381492615, - "learning_rate": 6.758738277919864e-06, - "loss": 0.1084, - "step": 1901 - }, - { - "epoch": 0.32428803580467275, - "grad_norm": 0.989708423614502, - "learning_rate": 6.757033248081842e-06, - "loss": 0.1565, - "step": 1902 - }, - { - "epoch": 0.3244585342462104, - "grad_norm": 0.9160560369491577, - "learning_rate": 6.755328218243819e-06, - "loss": 0.0911, - "step": 1903 - }, - { - "epoch": 0.32462903268774806, - "grad_norm": 1.524827241897583, - "learning_rate": 6.753623188405798e-06, - "loss": 0.137, - "step": 1904 - }, - { - "epoch": 0.3247995311292858, - "grad_norm": 0.9177931547164917, - "learning_rate": 6.751918158567775e-06, - "loss": 0.1413, - "step": 1905 - }, - { - "epoch": 0.32497002957082344, - "grad_norm": 0.9163035750389099, - "learning_rate": 6.750213128729753e-06, - "loss": 0.1107, - "step": 1906 - }, - { - "epoch": 0.32514052801236115, - "grad_norm": 1.2288998365402222, - "learning_rate": 6.74850809889173e-06, - "loss": 0.1198, - "step": 1907 - }, - { - "epoch": 0.3253110264538988, - "grad_norm": 1.2662756443023682, - "learning_rate": 6.746803069053709e-06, - "loss": 0.1756, - "step": 1908 - }, - { - "epoch": 0.3254815248954365, - "grad_norm": 1.25589919090271, - "learning_rate": 6.745098039215687e-06, - "loss": 0.1848, - "step": 1909 - }, - { - "epoch": 0.3256520233369742, - "grad_norm": 0.8272511959075928, - "learning_rate": 6.743393009377664e-06, - "loss": 0.1146, - "step": 1910 - }, - { - "epoch": 0.32582252177851184, - "grad_norm": 1.0488500595092773, - "learning_rate": 6.741687979539643e-06, - "loss": 0.1479, - "step": 1911 - }, - { - "epoch": 0.32599302022004956, - "grad_norm": 1.2019003629684448, - "learning_rate": 6.73998294970162e-06, - "loss": 0.1923, - "step": 1912 - }, - { - "epoch": 0.3261635186615872, - "grad_norm": 0.8280180096626282, - "learning_rate": 6.738277919863599e-06, - "loss": 0.1428, - "step": 1913 - }, - { - "epoch": 0.32633401710312493, - "grad_norm": 0.8211914300918579, - "learning_rate": 6.736572890025576e-06, - "loss": 0.1007, - "step": 1914 - }, - { - "epoch": 0.3265045155446626, - "grad_norm": 1.6616082191467285, - "learning_rate": 6.734867860187554e-06, - "loss": 0.1183, - "step": 1915 - }, - { - "epoch": 0.3266750139862003, - "grad_norm": 0.7762541174888611, - "learning_rate": 6.733162830349533e-06, - "loss": 0.1671, - "step": 1916 - }, - { - "epoch": 0.32684551242773796, - "grad_norm": 1.5211981534957886, - "learning_rate": 6.73145780051151e-06, - "loss": 0.1602, - "step": 1917 - }, - { - "epoch": 0.3270160108692757, - "grad_norm": 1.5074344873428345, - "learning_rate": 6.729752770673488e-06, - "loss": 0.1641, - "step": 1918 - }, - { - "epoch": 0.32718650931081333, - "grad_norm": 0.8448349833488464, - "learning_rate": 6.728047740835465e-06, - "loss": 0.1223, - "step": 1919 - }, - { - "epoch": 0.327357007752351, - "grad_norm": 1.0949711799621582, - "learning_rate": 6.7263427109974436e-06, - "loss": 0.2247, - "step": 1920 - }, - { - "epoch": 0.3275275061938887, - "grad_norm": 1.326486587524414, - "learning_rate": 6.724637681159421e-06, - "loss": 0.2258, - "step": 1921 - }, - { - "epoch": 0.32769800463542637, - "grad_norm": 1.2110795974731445, - "learning_rate": 6.722932651321399e-06, - "loss": 0.1842, - "step": 1922 - }, - { - "epoch": 0.3278685030769641, - "grad_norm": 1.0380573272705078, - "learning_rate": 6.721227621483376e-06, - "loss": 0.1794, - "step": 1923 - }, - { - "epoch": 0.32803900151850174, - "grad_norm": 1.1367063522338867, - "learning_rate": 6.7195225916453545e-06, - "loss": 0.2063, - "step": 1924 - }, - { - "epoch": 0.32820949996003945, - "grad_norm": 0.9798280596733093, - "learning_rate": 6.7178175618073325e-06, - "loss": 0.1611, - "step": 1925 - }, - { - "epoch": 0.3283799984015771, - "grad_norm": 0.8064085841178894, - "learning_rate": 6.7161125319693096e-06, - "loss": 0.1539, - "step": 1926 - }, - { - "epoch": 0.32855049684311477, - "grad_norm": 0.9155250191688538, - "learning_rate": 6.714407502131288e-06, - "loss": 0.1546, - "step": 1927 - }, - { - "epoch": 0.3287209952846525, - "grad_norm": 0.9837044477462769, - "learning_rate": 6.7127024722932654e-06, - "loss": 0.1067, - "step": 1928 - }, - { - "epoch": 0.32889149372619014, - "grad_norm": 1.281535029411316, - "learning_rate": 6.710997442455243e-06, - "loss": 0.09, - "step": 1929 - }, - { - "epoch": 0.32906199216772786, - "grad_norm": 0.655259370803833, - "learning_rate": 6.709292412617221e-06, - "loss": 0.0973, - "step": 1930 - }, - { - "epoch": 0.3292324906092655, - "grad_norm": 1.0607177019119263, - "learning_rate": 6.707587382779199e-06, - "loss": 0.1247, - "step": 1931 - }, - { - "epoch": 0.32940298905080323, - "grad_norm": 0.7018961310386658, - "learning_rate": 6.705882352941176e-06, - "loss": 0.109, - "step": 1932 - }, - { - "epoch": 0.3295734874923409, - "grad_norm": 0.699153482913971, - "learning_rate": 6.704177323103155e-06, - "loss": 0.1227, - "step": 1933 - }, - { - "epoch": 0.32974398593387855, - "grad_norm": 1.889542818069458, - "learning_rate": 6.702472293265133e-06, - "loss": 0.2145, - "step": 1934 - }, - { - "epoch": 0.32991448437541626, - "grad_norm": 0.8169059753417969, - "learning_rate": 6.70076726342711e-06, - "loss": 0.1256, - "step": 1935 - }, - { - "epoch": 0.3300849828169539, - "grad_norm": 0.8507729172706604, - "learning_rate": 6.699062233589089e-06, - "loss": 0.1536, - "step": 1936 - }, - { - "epoch": 0.33025548125849163, - "grad_norm": 0.7230017781257629, - "learning_rate": 6.697357203751066e-06, - "loss": 0.0726, - "step": 1937 - }, - { - "epoch": 0.3304259797000293, - "grad_norm": 1.5054045915603638, - "learning_rate": 6.695652173913044e-06, - "loss": 0.179, - "step": 1938 - }, - { - "epoch": 0.330596478141567, - "grad_norm": 0.7017834186553955, - "learning_rate": 6.693947144075021e-06, - "loss": 0.0953, - "step": 1939 - }, - { - "epoch": 0.33076697658310467, - "grad_norm": 0.7926969528198242, - "learning_rate": 6.692242114237e-06, - "loss": 0.1255, - "step": 1940 - }, - { - "epoch": 0.3309374750246424, - "grad_norm": 1.4403599500656128, - "learning_rate": 6.690537084398977e-06, - "loss": 0.1628, - "step": 1941 - }, - { - "epoch": 0.33110797346618004, - "grad_norm": 0.8787623047828674, - "learning_rate": 6.688832054560955e-06, - "loss": 0.1328, - "step": 1942 - }, - { - "epoch": 0.3312784719077177, - "grad_norm": 1.183626651763916, - "learning_rate": 6.687127024722934e-06, - "loss": 0.1683, - "step": 1943 - }, - { - "epoch": 0.3314489703492554, - "grad_norm": 0.9939226508140564, - "learning_rate": 6.685421994884911e-06, - "loss": 0.1818, - "step": 1944 - }, - { - "epoch": 0.33161946879079307, - "grad_norm": 1.0192537307739258, - "learning_rate": 6.683716965046889e-06, - "loss": 0.1743, - "step": 1945 - }, - { - "epoch": 0.3317899672323308, - "grad_norm": 1.2089581489562988, - "learning_rate": 6.682011935208866e-06, - "loss": 0.237, - "step": 1946 - }, - { - "epoch": 0.33196046567386844, - "grad_norm": 0.7481584548950195, - "learning_rate": 6.680306905370845e-06, - "loss": 0.1147, - "step": 1947 - }, - { - "epoch": 0.33213096411540616, - "grad_norm": 1.2319127321243286, - "learning_rate": 6.678601875532822e-06, - "loss": 0.207, - "step": 1948 - }, - { - "epoch": 0.3323014625569438, - "grad_norm": 1.2917490005493164, - "learning_rate": 6.676896845694801e-06, - "loss": 0.1417, - "step": 1949 - }, - { - "epoch": 0.3324719609984815, - "grad_norm": 0.9638323783874512, - "learning_rate": 6.675191815856779e-06, - "loss": 0.1012, - "step": 1950 - }, - { - "epoch": 0.3326424594400192, - "grad_norm": 0.9457072615623474, - "learning_rate": 6.673486786018756e-06, - "loss": 0.1296, - "step": 1951 - }, - { - "epoch": 0.33281295788155685, - "grad_norm": 0.8784216046333313, - "learning_rate": 6.6717817561807345e-06, - "loss": 0.1163, - "step": 1952 - }, - { - "epoch": 0.33298345632309456, - "grad_norm": 0.9807690978050232, - "learning_rate": 6.670076726342712e-06, - "loss": 0.1292, - "step": 1953 - }, - { - "epoch": 0.3331539547646322, - "grad_norm": 0.8995668888092041, - "learning_rate": 6.6683716965046895e-06, - "loss": 0.0743, - "step": 1954 - }, - { - "epoch": 0.33332445320616994, - "grad_norm": 0.9565035104751587, - "learning_rate": 6.666666666666667e-06, - "loss": 0.163, - "step": 1955 - }, - { - "epoch": 0.3334949516477076, - "grad_norm": 0.5363723039627075, - "learning_rate": 6.6649616368286454e-06, - "loss": 0.0634, - "step": 1956 - }, - { - "epoch": 0.33366545008924525, - "grad_norm": 0.5674836039543152, - "learning_rate": 6.6632566069906225e-06, - "loss": 0.0746, - "step": 1957 - }, - { - "epoch": 0.33383594853078297, - "grad_norm": 0.8529496192932129, - "learning_rate": 6.6615515771526005e-06, - "loss": 0.148, - "step": 1958 - }, - { - "epoch": 0.3340064469723206, - "grad_norm": 0.6780075430870056, - "learning_rate": 6.659846547314579e-06, - "loss": 0.083, - "step": 1959 - }, - { - "epoch": 0.33417694541385834, - "grad_norm": 1.7016448974609375, - "learning_rate": 6.658141517476556e-06, - "loss": 0.2304, - "step": 1960 - }, - { - "epoch": 0.334347443855396, - "grad_norm": 0.8263558745384216, - "learning_rate": 6.656436487638534e-06, - "loss": 0.0988, - "step": 1961 - }, - { - "epoch": 0.3345179422969337, - "grad_norm": 0.9661087989807129, - "learning_rate": 6.6547314578005114e-06, - "loss": 0.1631, - "step": 1962 - }, - { - "epoch": 0.33468844073847137, - "grad_norm": 0.7975350022315979, - "learning_rate": 6.65302642796249e-06, - "loss": 0.1455, - "step": 1963 - }, - { - "epoch": 0.3348589391800091, - "grad_norm": 1.0657655000686646, - "learning_rate": 6.651321398124467e-06, - "loss": 0.1644, - "step": 1964 - }, - { - "epoch": 0.33502943762154674, - "grad_norm": 0.6555298566818237, - "learning_rate": 6.649616368286445e-06, - "loss": 0.0623, - "step": 1965 - }, - { - "epoch": 0.3351999360630844, - "grad_norm": 0.9956116080284119, - "learning_rate": 6.647911338448423e-06, - "loss": 0.1462, - "step": 1966 - }, - { - "epoch": 0.3353704345046221, - "grad_norm": 1.17995023727417, - "learning_rate": 6.646206308610401e-06, - "loss": 0.1679, - "step": 1967 - }, - { - "epoch": 0.3355409329461598, - "grad_norm": 1.823115348815918, - "learning_rate": 6.644501278772379e-06, - "loss": 0.1644, - "step": 1968 - }, - { - "epoch": 0.3357114313876975, - "grad_norm": 0.6293953657150269, - "learning_rate": 6.642796248934357e-06, - "loss": 0.1359, - "step": 1969 - }, - { - "epoch": 0.33588192982923515, - "grad_norm": 1.1457743644714355, - "learning_rate": 6.641091219096335e-06, - "loss": 0.1746, - "step": 1970 - }, - { - "epoch": 0.33605242827077286, - "grad_norm": 0.7488301396369934, - "learning_rate": 6.639386189258312e-06, - "loss": 0.0864, - "step": 1971 - }, - { - "epoch": 0.3362229267123105, - "grad_norm": 0.9744401574134827, - "learning_rate": 6.637681159420291e-06, - "loss": 0.1332, - "step": 1972 - }, - { - "epoch": 0.3363934251538482, - "grad_norm": 0.7821024060249329, - "learning_rate": 6.635976129582268e-06, - "loss": 0.1356, - "step": 1973 - }, - { - "epoch": 0.3365639235953859, - "grad_norm": 1.1750373840332031, - "learning_rate": 6.634271099744246e-06, - "loss": 0.1853, - "step": 1974 - }, - { - "epoch": 0.33673442203692355, - "grad_norm": 1.186582326889038, - "learning_rate": 6.632566069906225e-06, - "loss": 0.1315, - "step": 1975 - }, - { - "epoch": 0.33690492047846127, - "grad_norm": 1.009081482887268, - "learning_rate": 6.630861040068202e-06, - "loss": 0.1265, - "step": 1976 - }, - { - "epoch": 0.3370754189199989, - "grad_norm": 1.5751765966415405, - "learning_rate": 6.62915601023018e-06, - "loss": 0.1152, - "step": 1977 - }, - { - "epoch": 0.33724591736153664, - "grad_norm": 0.9581623673439026, - "learning_rate": 6.627450980392157e-06, - "loss": 0.1351, - "step": 1978 - }, - { - "epoch": 0.3374164158030743, - "grad_norm": 0.7846093773841858, - "learning_rate": 6.625745950554136e-06, - "loss": 0.142, - "step": 1979 - }, - { - "epoch": 0.33758691424461196, - "grad_norm": 1.0404683351516724, - "learning_rate": 6.624040920716113e-06, - "loss": 0.176, - "step": 1980 - }, - { - "epoch": 0.33775741268614967, - "grad_norm": 0.6047057509422302, - "learning_rate": 6.622335890878091e-06, - "loss": 0.1395, - "step": 1981 - }, - { - "epoch": 0.33792791112768733, - "grad_norm": 0.870232343673706, - "learning_rate": 6.620630861040068e-06, - "loss": 0.1167, - "step": 1982 - }, - { - "epoch": 0.33809840956922504, - "grad_norm": 0.7972772121429443, - "learning_rate": 6.618925831202047e-06, - "loss": 0.1582, - "step": 1983 - }, - { - "epoch": 0.3382689080107627, - "grad_norm": 0.8232150673866272, - "learning_rate": 6.617220801364025e-06, - "loss": 0.1336, - "step": 1984 - }, - { - "epoch": 0.3384394064523004, - "grad_norm": 0.8988503813743591, - "learning_rate": 6.615515771526002e-06, - "loss": 0.0448, - "step": 1985 - }, - { - "epoch": 0.3386099048938381, - "grad_norm": 0.8263369798660278, - "learning_rate": 6.6138107416879805e-06, - "loss": 0.0886, - "step": 1986 - }, - { - "epoch": 0.33878040333537573, - "grad_norm": 0.8381589651107788, - "learning_rate": 6.612105711849958e-06, - "loss": 0.1685, - "step": 1987 - }, - { - "epoch": 0.33895090177691345, - "grad_norm": 1.221974492073059, - "learning_rate": 6.610400682011936e-06, - "loss": 0.1797, - "step": 1988 - }, - { - "epoch": 0.3391214002184511, - "grad_norm": 1.0716266632080078, - "learning_rate": 6.6086956521739135e-06, - "loss": 0.1757, - "step": 1989 - }, - { - "epoch": 0.3392918986599888, - "grad_norm": 0.771088182926178, - "learning_rate": 6.6069906223358914e-06, - "loss": 0.136, - "step": 1990 - }, - { - "epoch": 0.3394623971015265, - "grad_norm": 0.6183826923370361, - "learning_rate": 6.6052855924978685e-06, - "loss": 0.1254, - "step": 1991 - }, - { - "epoch": 0.3396328955430642, - "grad_norm": 1.1764880418777466, - "learning_rate": 6.603580562659847e-06, - "loss": 0.1501, - "step": 1992 - }, - { - "epoch": 0.33980339398460185, - "grad_norm": 1.0482877492904663, - "learning_rate": 6.601875532821825e-06, - "loss": 0.2018, - "step": 1993 - }, - { - "epoch": 0.33997389242613957, - "grad_norm": 0.6771042943000793, - "learning_rate": 6.600170502983802e-06, - "loss": 0.0886, - "step": 1994 - }, - { - "epoch": 0.3401443908676772, - "grad_norm": 1.0513336658477783, - "learning_rate": 6.598465473145781e-06, - "loss": 0.158, - "step": 1995 - }, - { - "epoch": 0.3403148893092149, - "grad_norm": 0.6636187434196472, - "learning_rate": 6.596760443307758e-06, - "loss": 0.1076, - "step": 1996 - }, - { - "epoch": 0.3404853877507526, - "grad_norm": 0.9513442516326904, - "learning_rate": 6.595055413469736e-06, - "loss": 0.1058, - "step": 1997 - }, - { - "epoch": 0.34065588619229026, - "grad_norm": 0.9921936392784119, - "learning_rate": 6.593350383631713e-06, - "loss": 0.1267, - "step": 1998 - }, - { - "epoch": 0.340826384633828, - "grad_norm": 0.9132459759712219, - "learning_rate": 6.591645353793692e-06, - "loss": 0.1163, - "step": 1999 - }, - { - "epoch": 0.34099688307536563, - "grad_norm": 0.9271344542503357, - "learning_rate": 6.58994032395567e-06, - "loss": 0.1207, - "step": 2000 - }, - { - "epoch": 0.34099688307536563, - "eval_f1_score": 0.297029702970297, - "eval_loss": 0.15308597683906555, - "eval_runtime": 182.6807, - "eval_samples_per_second": 54.74, - "eval_steps_per_second": 3.421, - "step": 2000 - }, - { - "epoch": 0.34116738151690335, - "grad_norm": 1.0865306854248047, - "learning_rate": 6.588235294117647e-06, - "loss": 0.1873, - "step": 2001 - }, - { - "epoch": 0.341337879958441, - "grad_norm": 0.7954295873641968, - "learning_rate": 6.586530264279626e-06, - "loss": 0.1125, - "step": 2002 - }, - { - "epoch": 0.34150837839997866, - "grad_norm": 0.9557849764823914, - "learning_rate": 6.584825234441603e-06, - "loss": 0.1104, - "step": 2003 - }, - { - "epoch": 0.3416788768415164, - "grad_norm": 0.7364252805709839, - "learning_rate": 6.583120204603581e-06, - "loss": 0.1215, - "step": 2004 - }, - { - "epoch": 0.34184937528305404, - "grad_norm": 1.0843329429626465, - "learning_rate": 6.581415174765559e-06, - "loss": 0.2042, - "step": 2005 - }, - { - "epoch": 0.34201987372459175, - "grad_norm": 1.011513113975525, - "learning_rate": 6.579710144927537e-06, - "loss": 0.1623, - "step": 2006 - }, - { - "epoch": 0.3421903721661294, - "grad_norm": 0.7970641851425171, - "learning_rate": 6.578005115089514e-06, - "loss": 0.1766, - "step": 2007 - }, - { - "epoch": 0.3423608706076671, - "grad_norm": 1.012313961982727, - "learning_rate": 6.576300085251493e-06, - "loss": 0.1507, - "step": 2008 - }, - { - "epoch": 0.3425313690492048, - "grad_norm": 0.9937087893486023, - "learning_rate": 6.574595055413471e-06, - "loss": 0.1433, - "step": 2009 - }, - { - "epoch": 0.34270186749074244, - "grad_norm": 1.0500171184539795, - "learning_rate": 6.572890025575448e-06, - "loss": 0.1619, - "step": 2010 - }, - { - "epoch": 0.34287236593228015, - "grad_norm": 0.9035862684249878, - "learning_rate": 6.571184995737427e-06, - "loss": 0.1447, - "step": 2011 - }, - { - "epoch": 0.3430428643738178, - "grad_norm": 0.9435558319091797, - "learning_rate": 6.569479965899404e-06, - "loss": 0.1989, - "step": 2012 - }, - { - "epoch": 0.3432133628153555, - "grad_norm": 0.7556560039520264, - "learning_rate": 6.567774936061382e-06, - "loss": 0.1044, - "step": 2013 - }, - { - "epoch": 0.3433838612568932, - "grad_norm": 1.0829527378082275, - "learning_rate": 6.566069906223359e-06, - "loss": 0.1815, - "step": 2014 - }, - { - "epoch": 0.3435543596984309, - "grad_norm": 0.760651707649231, - "learning_rate": 6.564364876385338e-06, - "loss": 0.1291, - "step": 2015 - }, - { - "epoch": 0.34372485813996856, - "grad_norm": 0.8749564290046692, - "learning_rate": 6.562659846547315e-06, - "loss": 0.1251, - "step": 2016 - }, - { - "epoch": 0.3438953565815063, - "grad_norm": 0.8915442228317261, - "learning_rate": 6.560954816709293e-06, - "loss": 0.2182, - "step": 2017 - }, - { - "epoch": 0.34406585502304393, - "grad_norm": 1.0313383340835571, - "learning_rate": 6.559249786871271e-06, - "loss": 0.0874, - "step": 2018 - }, - { - "epoch": 0.3442363534645816, - "grad_norm": 1.4438647031784058, - "learning_rate": 6.5575447570332485e-06, - "loss": 0.2466, - "step": 2019 - }, - { - "epoch": 0.3444068519061193, - "grad_norm": 0.759958028793335, - "learning_rate": 6.5558397271952265e-06, - "loss": 0.1124, - "step": 2020 - }, - { - "epoch": 0.34457735034765696, - "grad_norm": 0.8678929805755615, - "learning_rate": 6.5541346973572036e-06, - "loss": 0.098, - "step": 2021 - }, - { - "epoch": 0.3447478487891947, - "grad_norm": 0.7600451111793518, - "learning_rate": 6.552429667519182e-06, - "loss": 0.0868, - "step": 2022 - }, - { - "epoch": 0.34491834723073234, - "grad_norm": 0.9153589606285095, - "learning_rate": 6.5507246376811595e-06, - "loss": 0.1659, - "step": 2023 - }, - { - "epoch": 0.34508884567227005, - "grad_norm": 0.9439913034439087, - "learning_rate": 6.549019607843137e-06, - "loss": 0.1752, - "step": 2024 - }, - { - "epoch": 0.3452593441138077, - "grad_norm": 0.7293012142181396, - "learning_rate": 6.547314578005116e-06, - "loss": 0.1106, - "step": 2025 - }, - { - "epoch": 0.34542984255534537, - "grad_norm": 1.9949567317962646, - "learning_rate": 6.545609548167093e-06, - "loss": 0.132, - "step": 2026 - }, - { - "epoch": 0.3456003409968831, - "grad_norm": 0.8462551832199097, - "learning_rate": 6.543904518329072e-06, - "loss": 0.1452, - "step": 2027 - }, - { - "epoch": 0.34577083943842074, - "grad_norm": 0.6383141279220581, - "learning_rate": 6.542199488491049e-06, - "loss": 0.1229, - "step": 2028 - }, - { - "epoch": 0.34594133787995845, - "grad_norm": 0.6529576778411865, - "learning_rate": 6.540494458653027e-06, - "loss": 0.0587, - "step": 2029 - }, - { - "epoch": 0.3461118363214961, - "grad_norm": 1.0192657709121704, - "learning_rate": 6.538789428815004e-06, - "loss": 0.1613, - "step": 2030 - }, - { - "epoch": 0.34628233476303383, - "grad_norm": 0.8856669068336487, - "learning_rate": 6.537084398976983e-06, - "loss": 0.1531, - "step": 2031 - }, - { - "epoch": 0.3464528332045715, - "grad_norm": 1.6244677305221558, - "learning_rate": 6.53537936913896e-06, - "loss": 0.1339, - "step": 2032 - }, - { - "epoch": 0.34662333164610915, - "grad_norm": 0.8295205235481262, - "learning_rate": 6.533674339300938e-06, - "loss": 0.0454, - "step": 2033 - }, - { - "epoch": 0.34679383008764686, - "grad_norm": 1.043818712234497, - "learning_rate": 6.531969309462917e-06, - "loss": 0.143, - "step": 2034 - }, - { - "epoch": 0.3469643285291845, - "grad_norm": 0.7247282266616821, - "learning_rate": 6.530264279624894e-06, - "loss": 0.1549, - "step": 2035 - }, - { - "epoch": 0.34713482697072223, - "grad_norm": 1.0297534465789795, - "learning_rate": 6.528559249786872e-06, - "loss": 0.1885, - "step": 2036 - }, - { - "epoch": 0.3473053254122599, - "grad_norm": 0.8988025784492493, - "learning_rate": 6.526854219948849e-06, - "loss": 0.1602, - "step": 2037 - }, - { - "epoch": 0.3474758238537976, - "grad_norm": 0.9362964630126953, - "learning_rate": 6.525149190110828e-06, - "loss": 0.1479, - "step": 2038 - }, - { - "epoch": 0.34764632229533526, - "grad_norm": 0.8799692392349243, - "learning_rate": 6.523444160272805e-06, - "loss": 0.0913, - "step": 2039 - }, - { - "epoch": 0.347816820736873, - "grad_norm": 0.5634396076202393, - "learning_rate": 6.521739130434783e-06, - "loss": 0.0844, - "step": 2040 - }, - { - "epoch": 0.34798731917841064, - "grad_norm": 1.2817353010177612, - "learning_rate": 6.520034100596761e-06, - "loss": 0.1898, - "step": 2041 - }, - { - "epoch": 0.3481578176199483, - "grad_norm": 0.9581865072250366, - "learning_rate": 6.518329070758739e-06, - "loss": 0.1107, - "step": 2042 - }, - { - "epoch": 0.348328316061486, - "grad_norm": 0.8063405156135559, - "learning_rate": 6.516624040920717e-06, - "loss": 0.1494, - "step": 2043 - }, - { - "epoch": 0.34849881450302367, - "grad_norm": 0.8312709927558899, - "learning_rate": 6.514919011082695e-06, - "loss": 0.1413, - "step": 2044 - }, - { - "epoch": 0.3486693129445614, - "grad_norm": 0.8263598680496216, - "learning_rate": 6.513213981244673e-06, - "loss": 0.1417, - "step": 2045 - }, - { - "epoch": 0.34883981138609904, - "grad_norm": 0.9109250903129578, - "learning_rate": 6.51150895140665e-06, - "loss": 0.1575, - "step": 2046 - }, - { - "epoch": 0.34901030982763676, - "grad_norm": 0.9140591025352478, - "learning_rate": 6.5098039215686285e-06, - "loss": 0.1029, - "step": 2047 - }, - { - "epoch": 0.3491808082691744, - "grad_norm": 0.8870648145675659, - "learning_rate": 6.508098891730606e-06, - "loss": 0.0993, - "step": 2048 - }, - { - "epoch": 0.3493513067107121, - "grad_norm": 0.7272403240203857, - "learning_rate": 6.5063938618925836e-06, - "loss": 0.1002, - "step": 2049 - }, - { - "epoch": 0.3495218051522498, - "grad_norm": 1.034851312637329, - "learning_rate": 6.504688832054562e-06, - "loss": 0.1463, - "step": 2050 - }, - { - "epoch": 0.34969230359378745, - "grad_norm": 0.9596707224845886, - "learning_rate": 6.5029838022165395e-06, - "loss": 0.0776, - "step": 2051 - }, - { - "epoch": 0.34986280203532516, - "grad_norm": 0.6040682792663574, - "learning_rate": 6.501278772378517e-06, - "loss": 0.0987, - "step": 2052 - }, - { - "epoch": 0.3500333004768628, - "grad_norm": 1.2424696683883667, - "learning_rate": 6.4995737425404945e-06, - "loss": 0.1698, - "step": 2053 - }, - { - "epoch": 0.35020379891840053, - "grad_norm": 0.6647294163703918, - "learning_rate": 6.497868712702473e-06, - "loss": 0.0825, - "step": 2054 - }, - { - "epoch": 0.3503742973599382, - "grad_norm": 1.229026436805725, - "learning_rate": 6.49616368286445e-06, - "loss": 0.1803, - "step": 2055 - }, - { - "epoch": 0.35054479580147585, - "grad_norm": 1.356615662574768, - "learning_rate": 6.494458653026428e-06, - "loss": 0.2154, - "step": 2056 - }, - { - "epoch": 0.35071529424301356, - "grad_norm": 0.7908090353012085, - "learning_rate": 6.4927536231884055e-06, - "loss": 0.1286, - "step": 2057 - }, - { - "epoch": 0.3508857926845512, - "grad_norm": 1.1387441158294678, - "learning_rate": 6.491048593350384e-06, - "loss": 0.1859, - "step": 2058 - }, - { - "epoch": 0.35105629112608894, - "grad_norm": 0.7091562151908875, - "learning_rate": 6.489343563512362e-06, - "loss": 0.1258, - "step": 2059 - }, - { - "epoch": 0.3512267895676266, - "grad_norm": 0.978642463684082, - "learning_rate": 6.487638533674339e-06, - "loss": 0.1429, - "step": 2060 - }, - { - "epoch": 0.3513972880091643, - "grad_norm": 0.9794069528579712, - "learning_rate": 6.485933503836318e-06, - "loss": 0.1821, - "step": 2061 - }, - { - "epoch": 0.35156778645070197, - "grad_norm": 1.1713330745697021, - "learning_rate": 6.484228473998295e-06, - "loss": 0.1636, - "step": 2062 - }, - { - "epoch": 0.3517382848922397, - "grad_norm": 0.9776598811149597, - "learning_rate": 6.482523444160274e-06, - "loss": 0.1428, - "step": 2063 - }, - { - "epoch": 0.35190878333377734, - "grad_norm": 0.7179242372512817, - "learning_rate": 6.480818414322251e-06, - "loss": 0.0813, - "step": 2064 - }, - { - "epoch": 0.352079281775315, - "grad_norm": 0.9282951354980469, - "learning_rate": 6.479113384484229e-06, - "loss": 0.1871, - "step": 2065 - }, - { - "epoch": 0.3522497802168527, - "grad_norm": 0.9170055389404297, - "learning_rate": 6.477408354646206e-06, - "loss": 0.1127, - "step": 2066 - }, - { - "epoch": 0.3524202786583904, - "grad_norm": 1.0189080238342285, - "learning_rate": 6.475703324808185e-06, - "loss": 0.1624, - "step": 2067 - }, - { - "epoch": 0.3525907770999281, - "grad_norm": 0.6744734644889832, - "learning_rate": 6.473998294970163e-06, - "loss": 0.0822, - "step": 2068 - }, - { - "epoch": 0.35276127554146575, - "grad_norm": 1.0721924304962158, - "learning_rate": 6.47229326513214e-06, - "loss": 0.1305, - "step": 2069 - }, - { - "epoch": 0.35293177398300346, - "grad_norm": 0.8503794074058533, - "learning_rate": 6.470588235294119e-06, - "loss": 0.1042, - "step": 2070 - }, - { - "epoch": 0.3531022724245411, - "grad_norm": 0.9037670493125916, - "learning_rate": 6.468883205456096e-06, - "loss": 0.0831, - "step": 2071 - }, - { - "epoch": 0.3532727708660788, - "grad_norm": 0.6255829334259033, - "learning_rate": 6.467178175618074e-06, - "loss": 0.0766, - "step": 2072 - }, - { - "epoch": 0.3534432693076165, - "grad_norm": 0.6307828426361084, - "learning_rate": 6.465473145780051e-06, - "loss": 0.0733, - "step": 2073 - }, - { - "epoch": 0.35361376774915415, - "grad_norm": 0.819678008556366, - "learning_rate": 6.46376811594203e-06, - "loss": 0.1662, - "step": 2074 - }, - { - "epoch": 0.35378426619069187, - "grad_norm": 0.6275694966316223, - "learning_rate": 6.462063086104008e-06, - "loss": 0.0859, - "step": 2075 - }, - { - "epoch": 0.3539547646322295, - "grad_norm": 0.8501904010772705, - "learning_rate": 6.460358056265985e-06, - "loss": 0.1721, - "step": 2076 - }, - { - "epoch": 0.35412526307376724, - "grad_norm": 0.9360889792442322, - "learning_rate": 6.4586530264279636e-06, - "loss": 0.1261, - "step": 2077 - }, - { - "epoch": 0.3542957615153049, - "grad_norm": 1.2045263051986694, - "learning_rate": 6.456947996589941e-06, - "loss": 0.2182, - "step": 2078 - }, - { - "epoch": 0.35446625995684256, - "grad_norm": 0.9660459160804749, - "learning_rate": 6.455242966751919e-06, - "loss": 0.1774, - "step": 2079 - }, - { - "epoch": 0.35463675839838027, - "grad_norm": 1.315540075302124, - "learning_rate": 6.4535379369138966e-06, - "loss": 0.2046, - "step": 2080 - }, - { - "epoch": 0.35480725683991793, - "grad_norm": 0.9870008826255798, - "learning_rate": 6.4518329070758745e-06, - "loss": 0.1305, - "step": 2081 - }, - { - "epoch": 0.35497775528145564, - "grad_norm": 1.0989959239959717, - "learning_rate": 6.450127877237852e-06, - "loss": 0.172, - "step": 2082 - }, - { - "epoch": 0.3551482537229933, - "grad_norm": 0.7137494087219238, - "learning_rate": 6.44842284739983e-06, - "loss": 0.1374, - "step": 2083 - }, - { - "epoch": 0.355318752164531, - "grad_norm": 0.9373939633369446, - "learning_rate": 6.446717817561808e-06, - "loss": 0.1736, - "step": 2084 - }, - { - "epoch": 0.3554892506060687, - "grad_norm": 0.7139600515365601, - "learning_rate": 6.4450127877237854e-06, - "loss": 0.0822, - "step": 2085 - }, - { - "epoch": 0.3556597490476064, - "grad_norm": 1.1834203004837036, - "learning_rate": 6.443307757885764e-06, - "loss": 0.1904, - "step": 2086 - }, - { - "epoch": 0.35583024748914405, - "grad_norm": 1.1191166639328003, - "learning_rate": 6.441602728047741e-06, - "loss": 0.1758, - "step": 2087 - }, - { - "epoch": 0.3560007459306817, - "grad_norm": 0.8284562826156616, - "learning_rate": 6.439897698209719e-06, - "loss": 0.1564, - "step": 2088 - }, - { - "epoch": 0.3561712443722194, - "grad_norm": 1.2975763082504272, - "learning_rate": 6.438192668371696e-06, - "loss": 0.1769, - "step": 2089 - }, - { - "epoch": 0.3563417428137571, - "grad_norm": 0.823640763759613, - "learning_rate": 6.436487638533675e-06, - "loss": 0.1212, - "step": 2090 - }, - { - "epoch": 0.3565122412552948, - "grad_norm": 0.904367208480835, - "learning_rate": 6.434782608695652e-06, - "loss": 0.1113, - "step": 2091 - }, - { - "epoch": 0.35668273969683245, - "grad_norm": 0.9860475659370422, - "learning_rate": 6.43307757885763e-06, - "loss": 0.1047, - "step": 2092 - }, - { - "epoch": 0.35685323813837017, - "grad_norm": 1.125466227531433, - "learning_rate": 6.431372549019609e-06, - "loss": 0.1366, - "step": 2093 - }, - { - "epoch": 0.3570237365799078, - "grad_norm": 0.9330949783325195, - "learning_rate": 6.429667519181586e-06, - "loss": 0.1179, - "step": 2094 - }, - { - "epoch": 0.3571942350214455, - "grad_norm": 0.9403000473976135, - "learning_rate": 6.427962489343564e-06, - "loss": 0.1651, - "step": 2095 - }, - { - "epoch": 0.3573647334629832, - "grad_norm": 0.839396059513092, - "learning_rate": 6.426257459505541e-06, - "loss": 0.132, - "step": 2096 - }, - { - "epoch": 0.35753523190452086, - "grad_norm": 0.8294098973274231, - "learning_rate": 6.42455242966752e-06, - "loss": 0.1451, - "step": 2097 - }, - { - "epoch": 0.35770573034605857, - "grad_norm": 0.7742642164230347, - "learning_rate": 6.422847399829497e-06, - "loss": 0.1309, - "step": 2098 - }, - { - "epoch": 0.35787622878759623, - "grad_norm": 0.7605495452880859, - "learning_rate": 6.421142369991475e-06, - "loss": 0.109, - "step": 2099 - }, - { - "epoch": 0.35804672722913394, - "grad_norm": 1.56952965259552, - "learning_rate": 6.419437340153453e-06, - "loss": 0.1774, - "step": 2100 - }, - { - "epoch": 0.35804672722913394, - "eval_f1_score": 0.27472527472527475, - "eval_loss": 0.1539647877216339, - "eval_runtime": 182.5857, - "eval_samples_per_second": 54.769, - "eval_steps_per_second": 3.423, - "step": 2100 - }, - { - "epoch": 0.3582172256706716, - "grad_norm": 0.6405848264694214, - "learning_rate": 6.417732310315431e-06, - "loss": 0.1012, - "step": 2101 - }, - { - "epoch": 0.35838772411220926, - "grad_norm": 1.0083062648773193, - "learning_rate": 6.41602728047741e-06, - "loss": 0.1576, - "step": 2102 - }, - { - "epoch": 0.358558222553747, - "grad_norm": 1.0331164598464966, - "learning_rate": 6.414322250639387e-06, - "loss": 0.0868, - "step": 2103 - }, - { - "epoch": 0.35872872099528463, - "grad_norm": 1.3799405097961426, - "learning_rate": 6.412617220801365e-06, - "loss": 0.167, - "step": 2104 - }, - { - "epoch": 0.35889921943682235, - "grad_norm": 1.3380049467086792, - "learning_rate": 6.410912190963342e-06, - "loss": 0.1638, - "step": 2105 - }, - { - "epoch": 0.35906971787836, - "grad_norm": 1.0186902284622192, - "learning_rate": 6.409207161125321e-06, - "loss": 0.1676, - "step": 2106 - }, - { - "epoch": 0.3592402163198977, - "grad_norm": 0.5658654570579529, - "learning_rate": 6.407502131287298e-06, - "loss": 0.0894, - "step": 2107 - }, - { - "epoch": 0.3594107147614354, - "grad_norm": 1.1313732862472534, - "learning_rate": 6.405797101449276e-06, - "loss": 0.1236, - "step": 2108 - }, - { - "epoch": 0.3595812132029731, - "grad_norm": 1.4047788381576538, - "learning_rate": 6.4040920716112545e-06, - "loss": 0.1463, - "step": 2109 - }, - { - "epoch": 0.35975171164451075, - "grad_norm": 1.1757776737213135, - "learning_rate": 6.402387041773232e-06, - "loss": 0.2258, - "step": 2110 - }, - { - "epoch": 0.3599222100860484, - "grad_norm": 1.0296345949172974, - "learning_rate": 6.4006820119352095e-06, - "loss": 0.1408, - "step": 2111 - }, - { - "epoch": 0.3600927085275861, - "grad_norm": 0.879396378993988, - "learning_rate": 6.398976982097187e-06, - "loss": 0.1294, - "step": 2112 - }, - { - "epoch": 0.3602632069691238, - "grad_norm": 1.3096729516983032, - "learning_rate": 6.3972719522591654e-06, - "loss": 0.2237, - "step": 2113 - }, - { - "epoch": 0.3604337054106615, - "grad_norm": 0.7490139007568359, - "learning_rate": 6.3955669224211425e-06, - "loss": 0.0969, - "step": 2114 - }, - { - "epoch": 0.36060420385219916, - "grad_norm": 0.7394745349884033, - "learning_rate": 6.3938618925831205e-06, - "loss": 0.1222, - "step": 2115 - }, - { - "epoch": 0.36077470229373687, - "grad_norm": 0.8975431323051453, - "learning_rate": 6.3921568627450984e-06, - "loss": 0.1205, - "step": 2116 - }, - { - "epoch": 0.36094520073527453, - "grad_norm": 1.3461922407150269, - "learning_rate": 6.390451832907076e-06, - "loss": 0.163, - "step": 2117 - }, - { - "epoch": 0.3611156991768122, - "grad_norm": 0.9350199103355408, - "learning_rate": 6.388746803069054e-06, - "loss": 0.177, - "step": 2118 - }, - { - "epoch": 0.3612861976183499, - "grad_norm": 1.1496632099151611, - "learning_rate": 6.387041773231032e-06, - "loss": 0.1248, - "step": 2119 - }, - { - "epoch": 0.36145669605988756, - "grad_norm": 1.1352415084838867, - "learning_rate": 6.38533674339301e-06, - "loss": 0.1746, - "step": 2120 - }, - { - "epoch": 0.3616271945014253, - "grad_norm": 0.9327718019485474, - "learning_rate": 6.383631713554987e-06, - "loss": 0.1904, - "step": 2121 - }, - { - "epoch": 0.36179769294296293, - "grad_norm": 0.7479307055473328, - "learning_rate": 6.381926683716966e-06, - "loss": 0.0699, - "step": 2122 - }, - { - "epoch": 0.36196819138450065, - "grad_norm": 0.8722934126853943, - "learning_rate": 6.380221653878943e-06, - "loss": 0.0925, - "step": 2123 - }, - { - "epoch": 0.3621386898260383, - "grad_norm": 1.530072569847107, - "learning_rate": 6.378516624040921e-06, - "loss": 0.1793, - "step": 2124 - }, - { - "epoch": 0.36230918826757597, - "grad_norm": 0.6114324927330017, - "learning_rate": 6.376811594202898e-06, - "loss": 0.0849, - "step": 2125 - }, - { - "epoch": 0.3624796867091137, - "grad_norm": 0.9590505957603455, - "learning_rate": 6.375106564364877e-06, - "loss": 0.121, - "step": 2126 - }, - { - "epoch": 0.36265018515065134, - "grad_norm": 0.6645707488059998, - "learning_rate": 6.373401534526855e-06, - "loss": 0.0806, - "step": 2127 - }, - { - "epoch": 0.36282068359218905, - "grad_norm": 0.6528565287590027, - "learning_rate": 6.371696504688832e-06, - "loss": 0.0899, - "step": 2128 - }, - { - "epoch": 0.3629911820337267, - "grad_norm": 1.1450191736221313, - "learning_rate": 6.369991474850811e-06, - "loss": 0.1427, - "step": 2129 - }, - { - "epoch": 0.3631616804752644, - "grad_norm": 1.0308457612991333, - "learning_rate": 6.368286445012788e-06, - "loss": 0.1379, - "step": 2130 - }, - { - "epoch": 0.3633321789168021, - "grad_norm": 1.4925493001937866, - "learning_rate": 6.366581415174766e-06, - "loss": 0.1987, - "step": 2131 - }, - { - "epoch": 0.3635026773583398, - "grad_norm": 0.7048131227493286, - "learning_rate": 6.364876385336743e-06, - "loss": 0.1062, - "step": 2132 - }, - { - "epoch": 0.36367317579987746, - "grad_norm": 0.8379212617874146, - "learning_rate": 6.363171355498722e-06, - "loss": 0.1552, - "step": 2133 - }, - { - "epoch": 0.3638436742414151, - "grad_norm": 1.1558237075805664, - "learning_rate": 6.3614663256607e-06, - "loss": 0.1038, - "step": 2134 - }, - { - "epoch": 0.36401417268295283, - "grad_norm": 0.670208752155304, - "learning_rate": 6.359761295822677e-06, - "loss": 0.0811, - "step": 2135 - }, - { - "epoch": 0.3641846711244905, - "grad_norm": 1.4409935474395752, - "learning_rate": 6.358056265984656e-06, - "loss": 0.1946, - "step": 2136 - }, - { - "epoch": 0.3643551695660282, - "grad_norm": 0.7791793346405029, - "learning_rate": 6.356351236146633e-06, - "loss": 0.1485, - "step": 2137 - }, - { - "epoch": 0.36452566800756586, - "grad_norm": 1.1148107051849365, - "learning_rate": 6.354646206308612e-06, - "loss": 0.2146, - "step": 2138 - }, - { - "epoch": 0.3646961664491036, - "grad_norm": 0.9492986798286438, - "learning_rate": 6.352941176470589e-06, - "loss": 0.1227, - "step": 2139 - }, - { - "epoch": 0.36486666489064123, - "grad_norm": 0.7307665348052979, - "learning_rate": 6.351236146632567e-06, - "loss": 0.0764, - "step": 2140 - }, - { - "epoch": 0.3650371633321789, - "grad_norm": 1.2648804187774658, - "learning_rate": 6.349531116794544e-06, - "loss": 0.1314, - "step": 2141 - }, - { - "epoch": 0.3652076617737166, - "grad_norm": 0.7799243927001953, - "learning_rate": 6.3478260869565225e-06, - "loss": 0.0747, - "step": 2142 - }, - { - "epoch": 0.36537816021525427, - "grad_norm": 0.7744673490524292, - "learning_rate": 6.3461210571185005e-06, - "loss": 0.1224, - "step": 2143 - }, - { - "epoch": 0.365548658656792, - "grad_norm": 0.9403333067893982, - "learning_rate": 6.344416027280478e-06, - "loss": 0.0992, - "step": 2144 - }, - { - "epoch": 0.36571915709832964, - "grad_norm": 0.9702940583229065, - "learning_rate": 6.342710997442456e-06, - "loss": 0.1358, - "step": 2145 - }, - { - "epoch": 0.36588965553986735, - "grad_norm": 1.0209053754806519, - "learning_rate": 6.3410059676044335e-06, - "loss": 0.116, - "step": 2146 - }, - { - "epoch": 0.366060153981405, - "grad_norm": 0.7160688042640686, - "learning_rate": 6.3393009377664114e-06, - "loss": 0.1244, - "step": 2147 - }, - { - "epoch": 0.36623065242294267, - "grad_norm": 1.0120749473571777, - "learning_rate": 6.3375959079283885e-06, - "loss": 0.1397, - "step": 2148 - }, - { - "epoch": 0.3664011508644804, - "grad_norm": 0.6767970323562622, - "learning_rate": 6.335890878090367e-06, - "loss": 0.1211, - "step": 2149 - }, - { - "epoch": 0.36657164930601804, - "grad_norm": 0.5627476572990417, - "learning_rate": 6.3341858482523444e-06, - "loss": 0.0939, - "step": 2150 - }, - { - "epoch": 0.36674214774755576, - "grad_norm": 0.6991590261459351, - "learning_rate": 6.332480818414322e-06, - "loss": 0.1212, - "step": 2151 - }, - { - "epoch": 0.3669126461890934, - "grad_norm": 1.034399151802063, - "learning_rate": 6.330775788576301e-06, - "loss": 0.1489, - "step": 2152 - }, - { - "epoch": 0.36708314463063113, - "grad_norm": 0.8625597953796387, - "learning_rate": 6.329070758738278e-06, - "loss": 0.0497, - "step": 2153 - }, - { - "epoch": 0.3672536430721688, - "grad_norm": 1.123618721961975, - "learning_rate": 6.327365728900256e-06, - "loss": 0.207, - "step": 2154 - }, - { - "epoch": 0.3674241415137065, - "grad_norm": 2.084270715713501, - "learning_rate": 6.325660699062234e-06, - "loss": 0.1573, - "step": 2155 - }, - { - "epoch": 0.36759463995524416, - "grad_norm": 0.6721711754798889, - "learning_rate": 6.323955669224212e-06, - "loss": 0.1054, - "step": 2156 - }, - { - "epoch": 0.3677651383967818, - "grad_norm": 1.4094241857528687, - "learning_rate": 6.322250639386189e-06, - "loss": 0.1926, - "step": 2157 - }, - { - "epoch": 0.36793563683831954, - "grad_norm": 1.0103540420532227, - "learning_rate": 6.320545609548168e-06, - "loss": 0.1321, - "step": 2158 - }, - { - "epoch": 0.3681061352798572, - "grad_norm": 0.9904351234436035, - "learning_rate": 6.318840579710146e-06, - "loss": 0.096, - "step": 2159 - }, - { - "epoch": 0.3682766337213949, - "grad_norm": 1.041150689125061, - "learning_rate": 6.317135549872123e-06, - "loss": 0.126, - "step": 2160 - }, - { - "epoch": 0.36844713216293257, - "grad_norm": 1.380527138710022, - "learning_rate": 6.315430520034102e-06, - "loss": 0.2002, - "step": 2161 - }, - { - "epoch": 0.3686176306044703, - "grad_norm": 0.9068813323974609, - "learning_rate": 6.313725490196079e-06, - "loss": 0.1357, - "step": 2162 - }, - { - "epoch": 0.36878812904600794, - "grad_norm": 0.8607088923454285, - "learning_rate": 6.312020460358057e-06, - "loss": 0.0703, - "step": 2163 - }, - { - "epoch": 0.3689586274875456, - "grad_norm": 0.7945460677146912, - "learning_rate": 6.310315430520034e-06, - "loss": 0.1227, - "step": 2164 - }, - { - "epoch": 0.3691291259290833, - "grad_norm": 1.271232008934021, - "learning_rate": 6.308610400682013e-06, - "loss": 0.1381, - "step": 2165 - }, - { - "epoch": 0.36929962437062097, - "grad_norm": 0.909999668598175, - "learning_rate": 6.30690537084399e-06, - "loss": 0.1043, - "step": 2166 - }, - { - "epoch": 0.3694701228121587, - "grad_norm": 1.6058598756790161, - "learning_rate": 6.305200341005968e-06, - "loss": 0.2397, - "step": 2167 - }, - { - "epoch": 0.36964062125369634, - "grad_norm": 0.7543137669563293, - "learning_rate": 6.303495311167947e-06, - "loss": 0.13, - "step": 2168 - }, - { - "epoch": 0.36981111969523406, - "grad_norm": 1.472127914428711, - "learning_rate": 6.301790281329924e-06, - "loss": 0.1905, - "step": 2169 - }, - { - "epoch": 0.3699816181367717, - "grad_norm": 1.0304391384124756, - "learning_rate": 6.300085251491902e-06, - "loss": 0.1102, - "step": 2170 - }, - { - "epoch": 0.3701521165783094, - "grad_norm": 0.9876975417137146, - "learning_rate": 6.298380221653879e-06, - "loss": 0.2271, - "step": 2171 - }, - { - "epoch": 0.3703226150198471, - "grad_norm": 1.176422357559204, - "learning_rate": 6.296675191815858e-06, - "loss": 0.1687, - "step": 2172 - }, - { - "epoch": 0.37049311346138475, - "grad_norm": 1.720004916191101, - "learning_rate": 6.294970161977835e-06, - "loss": 0.2063, - "step": 2173 - }, - { - "epoch": 0.37066361190292246, - "grad_norm": 0.9195306897163391, - "learning_rate": 6.293265132139813e-06, - "loss": 0.1601, - "step": 2174 - }, - { - "epoch": 0.3708341103444601, - "grad_norm": 1.4446338415145874, - "learning_rate": 6.291560102301791e-06, - "loss": 0.1781, - "step": 2175 - }, - { - "epoch": 0.37100460878599784, - "grad_norm": 1.0467724800109863, - "learning_rate": 6.2898550724637685e-06, - "loss": 0.1786, - "step": 2176 - }, - { - "epoch": 0.3711751072275355, - "grad_norm": 0.8518681526184082, - "learning_rate": 6.288150042625747e-06, - "loss": 0.137, - "step": 2177 - }, - { - "epoch": 0.3713456056690732, - "grad_norm": 1.0049577951431274, - "learning_rate": 6.286445012787724e-06, - "loss": 0.1792, - "step": 2178 - }, - { - "epoch": 0.37151610411061087, - "grad_norm": 1.1468156576156616, - "learning_rate": 6.284739982949702e-06, - "loss": 0.0627, - "step": 2179 - }, - { - "epoch": 0.3716866025521485, - "grad_norm": 1.0543559789657593, - "learning_rate": 6.2830349531116795e-06, - "loss": 0.1402, - "step": 2180 - }, - { - "epoch": 0.37185710099368624, - "grad_norm": 1.046242594718933, - "learning_rate": 6.281329923273658e-06, - "loss": 0.1341, - "step": 2181 - }, - { - "epoch": 0.3720275994352239, - "grad_norm": 0.9223925471305847, - "learning_rate": 6.279624893435635e-06, - "loss": 0.0667, - "step": 2182 - }, - { - "epoch": 0.3721980978767616, - "grad_norm": 0.9626695513725281, - "learning_rate": 6.277919863597613e-06, - "loss": 0.1337, - "step": 2183 - }, - { - "epoch": 0.37236859631829927, - "grad_norm": 0.7089354991912842, - "learning_rate": 6.276214833759592e-06, - "loss": 0.1158, - "step": 2184 - }, - { - "epoch": 0.372539094759837, - "grad_norm": 0.5326353311538696, - "learning_rate": 6.274509803921569e-06, - "loss": 0.0786, - "step": 2185 - }, - { - "epoch": 0.37270959320137464, - "grad_norm": 0.8547235727310181, - "learning_rate": 6.272804774083547e-06, - "loss": 0.1664, - "step": 2186 - }, - { - "epoch": 0.3728800916429123, - "grad_norm": 0.9501067996025085, - "learning_rate": 6.271099744245524e-06, - "loss": 0.1644, - "step": 2187 - }, - { - "epoch": 0.37305059008445, - "grad_norm": 0.8592075109481812, - "learning_rate": 6.269394714407503e-06, - "loss": 0.1002, - "step": 2188 - }, - { - "epoch": 0.3732210885259877, - "grad_norm": 1.842674732208252, - "learning_rate": 6.26768968456948e-06, - "loss": 0.2311, - "step": 2189 - }, - { - "epoch": 0.3733915869675254, - "grad_norm": 1.5288238525390625, - "learning_rate": 6.265984654731458e-06, - "loss": 0.1733, - "step": 2190 - }, - { - "epoch": 0.37356208540906305, - "grad_norm": 0.6214331388473511, - "learning_rate": 6.264279624893436e-06, - "loss": 0.1119, - "step": 2191 - }, - { - "epoch": 0.37373258385060076, - "grad_norm": 0.7459906935691833, - "learning_rate": 6.262574595055414e-06, - "loss": 0.0975, - "step": 2192 - }, - { - "epoch": 0.3739030822921384, - "grad_norm": 1.2965167760849, - "learning_rate": 6.260869565217392e-06, - "loss": 0.1533, - "step": 2193 - }, - { - "epoch": 0.3740735807336761, - "grad_norm": 0.9013891220092773, - "learning_rate": 6.25916453537937e-06, - "loss": 0.1074, - "step": 2194 - }, - { - "epoch": 0.3742440791752138, - "grad_norm": 1.2202616930007935, - "learning_rate": 6.257459505541348e-06, - "loss": 0.1525, - "step": 2195 - }, - { - "epoch": 0.37441457761675145, - "grad_norm": 1.3237905502319336, - "learning_rate": 6.255754475703325e-06, - "loss": 0.1626, - "step": 2196 - }, - { - "epoch": 0.37458507605828917, - "grad_norm": 0.794424831867218, - "learning_rate": 6.254049445865304e-06, - "loss": 0.1528, - "step": 2197 - }, - { - "epoch": 0.3747555744998268, - "grad_norm": 0.7876736521720886, - "learning_rate": 6.252344416027281e-06, - "loss": 0.0942, - "step": 2198 - }, - { - "epoch": 0.37492607294136454, - "grad_norm": 1.0070785284042358, - "learning_rate": 6.250639386189259e-06, - "loss": 0.1803, - "step": 2199 - }, - { - "epoch": 0.3750965713829022, - "grad_norm": 1.2898237705230713, - "learning_rate": 6.248934356351236e-06, - "loss": 0.1796, - "step": 2200 - }, - { - "epoch": 0.3750965713829022, - "eval_f1_score": 0.2619047619047619, - "eval_loss": 0.1529710590839386, - "eval_runtime": 182.6841, - "eval_samples_per_second": 54.739, - "eval_steps_per_second": 3.421, - "step": 2200 - }, - { - "epoch": 0.37526706982443986, - "grad_norm": 1.1882941722869873, - "learning_rate": 6.247229326513215e-06, - "loss": 0.1849, - "step": 2201 - }, - { - "epoch": 0.3754375682659776, - "grad_norm": 0.7369084358215332, - "learning_rate": 6.245524296675193e-06, - "loss": 0.1282, - "step": 2202 - }, - { - "epoch": 0.37560806670751523, - "grad_norm": 0.88145911693573, - "learning_rate": 6.24381926683717e-06, - "loss": 0.0625, - "step": 2203 - }, - { - "epoch": 0.37577856514905295, - "grad_norm": 0.8131815195083618, - "learning_rate": 6.2421142369991485e-06, - "loss": 0.1424, - "step": 2204 - }, - { - "epoch": 0.3759490635905906, - "grad_norm": 0.9672574996948242, - "learning_rate": 6.240409207161126e-06, - "loss": 0.1381, - "step": 2205 - }, - { - "epoch": 0.3761195620321283, - "grad_norm": 0.8028636574745178, - "learning_rate": 6.2387041773231036e-06, - "loss": 0.1439, - "step": 2206 - }, - { - "epoch": 0.376290060473666, - "grad_norm": 1.0327125787734985, - "learning_rate": 6.236999147485081e-06, - "loss": 0.154, - "step": 2207 - }, - { - "epoch": 0.3764605589152037, - "grad_norm": 0.8577430248260498, - "learning_rate": 6.2352941176470595e-06, - "loss": 0.1516, - "step": 2208 - }, - { - "epoch": 0.37663105735674135, - "grad_norm": 0.625697135925293, - "learning_rate": 6.233589087809037e-06, - "loss": 0.1135, - "step": 2209 - }, - { - "epoch": 0.376801555798279, - "grad_norm": 0.8129452466964722, - "learning_rate": 6.2318840579710145e-06, - "loss": 0.1557, - "step": 2210 - }, - { - "epoch": 0.3769720542398167, - "grad_norm": 0.8950178623199463, - "learning_rate": 6.230179028132993e-06, - "loss": 0.1792, - "step": 2211 - }, - { - "epoch": 0.3771425526813544, - "grad_norm": 0.8850390911102295, - "learning_rate": 6.22847399829497e-06, - "loss": 0.161, - "step": 2212 - }, - { - "epoch": 0.3773130511228921, - "grad_norm": 0.9739559888839722, - "learning_rate": 6.226768968456949e-06, - "loss": 0.1295, - "step": 2213 - }, - { - "epoch": 0.37748354956442975, - "grad_norm": 1.1448336839675903, - "learning_rate": 6.225063938618926e-06, - "loss": 0.1796, - "step": 2214 - }, - { - "epoch": 0.37765404800596747, - "grad_norm": 0.9003750085830688, - "learning_rate": 6.223358908780904e-06, - "loss": 0.1529, - "step": 2215 - }, - { - "epoch": 0.3778245464475051, - "grad_norm": 0.7714499235153198, - "learning_rate": 6.221653878942881e-06, - "loss": 0.1255, - "step": 2216 - }, - { - "epoch": 0.3779950448890428, - "grad_norm": 0.6482207179069519, - "learning_rate": 6.21994884910486e-06, - "loss": 0.1168, - "step": 2217 - }, - { - "epoch": 0.3781655433305805, - "grad_norm": 0.67027747631073, - "learning_rate": 6.218243819266838e-06, - "loss": 0.096, - "step": 2218 - }, - { - "epoch": 0.37833604177211816, - "grad_norm": 0.8746976256370544, - "learning_rate": 6.216538789428815e-06, - "loss": 0.1316, - "step": 2219 - }, - { - "epoch": 0.3785065402136559, - "grad_norm": 0.8811997175216675, - "learning_rate": 6.214833759590794e-06, - "loss": 0.1429, - "step": 2220 - }, - { - "epoch": 0.37867703865519353, - "grad_norm": 0.872652530670166, - "learning_rate": 6.213128729752771e-06, - "loss": 0.0681, - "step": 2221 - }, - { - "epoch": 0.37884753709673125, - "grad_norm": 0.7932902574539185, - "learning_rate": 6.211423699914749e-06, - "loss": 0.1206, - "step": 2222 - }, - { - "epoch": 0.3790180355382689, - "grad_norm": 1.5943965911865234, - "learning_rate": 6.209718670076726e-06, - "loss": 0.1709, - "step": 2223 - }, - { - "epoch": 0.37918853397980656, - "grad_norm": 0.9252127408981323, - "learning_rate": 6.208013640238705e-06, - "loss": 0.1187, - "step": 2224 - }, - { - "epoch": 0.3793590324213443, - "grad_norm": 0.6615201830863953, - "learning_rate": 6.206308610400682e-06, - "loss": 0.1144, - "step": 2225 - }, - { - "epoch": 0.37952953086288194, - "grad_norm": 0.9420225620269775, - "learning_rate": 6.20460358056266e-06, - "loss": 0.1841, - "step": 2226 - }, - { - "epoch": 0.37970002930441965, - "grad_norm": 0.7444522380828857, - "learning_rate": 6.202898550724639e-06, - "loss": 0.1269, - "step": 2227 - }, - { - "epoch": 0.3798705277459573, - "grad_norm": 1.0659302473068237, - "learning_rate": 6.201193520886616e-06, - "loss": 0.1517, - "step": 2228 - }, - { - "epoch": 0.380041026187495, - "grad_norm": 0.8699449300765991, - "learning_rate": 6.199488491048594e-06, - "loss": 0.1347, - "step": 2229 - }, - { - "epoch": 0.3802115246290327, - "grad_norm": 0.9697796106338501, - "learning_rate": 6.197783461210572e-06, - "loss": 0.1445, - "step": 2230 - }, - { - "epoch": 0.3803820230705704, - "grad_norm": 1.4614863395690918, - "learning_rate": 6.19607843137255e-06, - "loss": 0.2126, - "step": 2231 - }, - { - "epoch": 0.38055252151210806, - "grad_norm": 0.6998443007469177, - "learning_rate": 6.194373401534527e-06, - "loss": 0.106, - "step": 2232 - }, - { - "epoch": 0.3807230199536457, - "grad_norm": 1.6563403606414795, - "learning_rate": 6.192668371696506e-06, - "loss": 0.2081, - "step": 2233 - }, - { - "epoch": 0.38089351839518343, - "grad_norm": 0.9660869240760803, - "learning_rate": 6.1909633418584836e-06, - "loss": 0.2294, - "step": 2234 - }, - { - "epoch": 0.3810640168367211, - "grad_norm": 1.1604372262954712, - "learning_rate": 6.189258312020461e-06, - "loss": 0.093, - "step": 2235 - }, - { - "epoch": 0.3812345152782588, - "grad_norm": 1.12831449508667, - "learning_rate": 6.1875532821824395e-06, - "loss": 0.1999, - "step": 2236 - }, - { - "epoch": 0.38140501371979646, - "grad_norm": 0.8325589299201965, - "learning_rate": 6.1858482523444166e-06, - "loss": 0.1504, - "step": 2237 - }, - { - "epoch": 0.3815755121613342, - "grad_norm": 1.3243125677108765, - "learning_rate": 6.1841432225063945e-06, - "loss": 0.1809, - "step": 2238 - }, - { - "epoch": 0.38174601060287183, - "grad_norm": 0.9601839184761047, - "learning_rate": 6.182438192668372e-06, - "loss": 0.125, - "step": 2239 - }, - { - "epoch": 0.3819165090444095, - "grad_norm": 1.0139927864074707, - "learning_rate": 6.18073316283035e-06, - "loss": 0.128, - "step": 2240 - }, - { - "epoch": 0.3820870074859472, - "grad_norm": 0.9789314866065979, - "learning_rate": 6.1790281329923275e-06, - "loss": 0.1124, - "step": 2241 - }, - { - "epoch": 0.38225750592748486, - "grad_norm": 0.8316740989685059, - "learning_rate": 6.1773231031543054e-06, - "loss": 0.1605, - "step": 2242 - }, - { - "epoch": 0.3824280043690226, - "grad_norm": 0.8386850357055664, - "learning_rate": 6.175618073316284e-06, - "loss": 0.1018, - "step": 2243 - }, - { - "epoch": 0.38259850281056024, - "grad_norm": 0.849038302898407, - "learning_rate": 6.173913043478261e-06, - "loss": 0.1214, - "step": 2244 - }, - { - "epoch": 0.38276900125209795, - "grad_norm": 0.8374232053756714, - "learning_rate": 6.172208013640239e-06, - "loss": 0.1207, - "step": 2245 - }, - { - "epoch": 0.3829394996936356, - "grad_norm": 0.8584263324737549, - "learning_rate": 6.170502983802216e-06, - "loss": 0.101, - "step": 2246 - }, - { - "epoch": 0.38310999813517327, - "grad_norm": 0.8575809597969055, - "learning_rate": 6.168797953964195e-06, - "loss": 0.1521, - "step": 2247 - }, - { - "epoch": 0.383280496576711, - "grad_norm": 1.1430909633636475, - "learning_rate": 6.167092924126172e-06, - "loss": 0.1273, - "step": 2248 - }, - { - "epoch": 0.38345099501824864, - "grad_norm": 1.7613104581832886, - "learning_rate": 6.16538789428815e-06, - "loss": 0.093, - "step": 2249 - }, - { - "epoch": 0.38362149345978636, - "grad_norm": 1.461144208908081, - "learning_rate": 6.163682864450128e-06, - "loss": 0.1602, - "step": 2250 - }, - { - "epoch": 0.383791991901324, - "grad_norm": 0.9410567283630371, - "learning_rate": 6.161977834612106e-06, - "loss": 0.1331, - "step": 2251 - }, - { - "epoch": 0.38396249034286173, - "grad_norm": 0.6462012529373169, - "learning_rate": 6.160272804774085e-06, - "loss": 0.0823, - "step": 2252 - }, - { - "epoch": 0.3841329887843994, - "grad_norm": 1.0113216638565063, - "learning_rate": 6.158567774936062e-06, - "loss": 0.1388, - "step": 2253 - }, - { - "epoch": 0.3843034872259371, - "grad_norm": 0.8928611278533936, - "learning_rate": 6.15686274509804e-06, - "loss": 0.17, - "step": 2254 - }, - { - "epoch": 0.38447398566747476, - "grad_norm": 1.2479135990142822, - "learning_rate": 6.155157715260017e-06, - "loss": 0.1164, - "step": 2255 - }, - { - "epoch": 0.3846444841090124, - "grad_norm": 1.9075334072113037, - "learning_rate": 6.153452685421996e-06, - "loss": 0.1827, - "step": 2256 - }, - { - "epoch": 0.38481498255055013, - "grad_norm": 0.836900532245636, - "learning_rate": 6.151747655583973e-06, - "loss": 0.0707, - "step": 2257 - }, - { - "epoch": 0.3849854809920878, - "grad_norm": 0.961868941783905, - "learning_rate": 6.150042625745951e-06, - "loss": 0.0876, - "step": 2258 - }, - { - "epoch": 0.3851559794336255, - "grad_norm": 1.439834713935852, - "learning_rate": 6.14833759590793e-06, - "loss": 0.177, - "step": 2259 - }, - { - "epoch": 0.38532647787516316, - "grad_norm": 0.7403258681297302, - "learning_rate": 6.146632566069907e-06, - "loss": 0.0915, - "step": 2260 - }, - { - "epoch": 0.3854969763167009, - "grad_norm": 0.9234430193901062, - "learning_rate": 6.144927536231885e-06, - "loss": 0.129, - "step": 2261 - }, - { - "epoch": 0.38566747475823854, - "grad_norm": 1.1485732793807983, - "learning_rate": 6.143222506393862e-06, - "loss": 0.1597, - "step": 2262 - }, - { - "epoch": 0.3858379731997762, - "grad_norm": 1.0878947973251343, - "learning_rate": 6.141517476555841e-06, - "loss": 0.1158, - "step": 2263 - }, - { - "epoch": 0.3860084716413139, - "grad_norm": 1.095202088356018, - "learning_rate": 6.139812446717818e-06, - "loss": 0.1728, - "step": 2264 - }, - { - "epoch": 0.38617897008285157, - "grad_norm": 0.9918746948242188, - "learning_rate": 6.138107416879796e-06, - "loss": 0.0813, - "step": 2265 - }, - { - "epoch": 0.3863494685243893, - "grad_norm": 1.457942247390747, - "learning_rate": 6.136402387041774e-06, - "loss": 0.1729, - "step": 2266 - }, - { - "epoch": 0.38651996696592694, - "grad_norm": 0.9414834976196289, - "learning_rate": 6.134697357203752e-06, - "loss": 0.0493, - "step": 2267 - }, - { - "epoch": 0.38669046540746466, - "grad_norm": 0.8592334389686584, - "learning_rate": 6.1329923273657295e-06, - "loss": 0.115, - "step": 2268 - }, - { - "epoch": 0.3868609638490023, - "grad_norm": 0.8972296118736267, - "learning_rate": 6.1312872975277075e-06, - "loss": 0.1338, - "step": 2269 - }, - { - "epoch": 0.38703146229054, - "grad_norm": 0.7704724073410034, - "learning_rate": 6.1295822676896854e-06, - "loss": 0.0741, - "step": 2270 - }, - { - "epoch": 0.3872019607320777, - "grad_norm": 0.8177960515022278, - "learning_rate": 6.1278772378516625e-06, - "loss": 0.0844, - "step": 2271 - }, - { - "epoch": 0.38737245917361535, - "grad_norm": 0.719103217124939, - "learning_rate": 6.126172208013641e-06, - "loss": 0.089, - "step": 2272 - }, - { - "epoch": 0.38754295761515306, - "grad_norm": 1.591321349143982, - "learning_rate": 6.1244671781756184e-06, - "loss": 0.2422, - "step": 2273 - }, - { - "epoch": 0.3877134560566907, - "grad_norm": 0.5507732629776001, - "learning_rate": 6.122762148337596e-06, - "loss": 0.0382, - "step": 2274 - }, - { - "epoch": 0.38788395449822843, - "grad_norm": 0.6792088747024536, - "learning_rate": 6.1210571184995735e-06, - "loss": 0.1013, - "step": 2275 - }, - { - "epoch": 0.3880544529397661, - "grad_norm": 0.6091058850288391, - "learning_rate": 6.119352088661552e-06, - "loss": 0.0477, - "step": 2276 - }, - { - "epoch": 0.3882249513813038, - "grad_norm": 1.6229771375656128, - "learning_rate": 6.11764705882353e-06, - "loss": 0.1975, - "step": 2277 - }, - { - "epoch": 0.38839544982284147, - "grad_norm": 1.1221989393234253, - "learning_rate": 6.115942028985507e-06, - "loss": 0.1165, - "step": 2278 - }, - { - "epoch": 0.3885659482643791, - "grad_norm": 1.2790297269821167, - "learning_rate": 6.114236999147486e-06, - "loss": 0.1424, - "step": 2279 - }, - { - "epoch": 0.38873644670591684, - "grad_norm": 0.8660223484039307, - "learning_rate": 6.112531969309463e-06, - "loss": 0.1013, - "step": 2280 - }, - { - "epoch": 0.3889069451474545, - "grad_norm": 1.3250168561935425, - "learning_rate": 6.110826939471441e-06, - "loss": 0.1911, - "step": 2281 - }, - { - "epoch": 0.3890774435889922, - "grad_norm": 0.8523133397102356, - "learning_rate": 6.109121909633418e-06, - "loss": 0.1004, - "step": 2282 - }, - { - "epoch": 0.38924794203052987, - "grad_norm": 1.2505394220352173, - "learning_rate": 6.107416879795397e-06, - "loss": 0.1516, - "step": 2283 - }, - { - "epoch": 0.3894184404720676, - "grad_norm": 1.5190775394439697, - "learning_rate": 6.105711849957374e-06, - "loss": 0.1719, - "step": 2284 - }, - { - "epoch": 0.38958893891360524, - "grad_norm": 0.8516865968704224, - "learning_rate": 6.104006820119352e-06, - "loss": 0.0865, - "step": 2285 - }, - { - "epoch": 0.3897594373551429, - "grad_norm": 0.7336435317993164, - "learning_rate": 6.102301790281331e-06, - "loss": 0.0977, - "step": 2286 - }, - { - "epoch": 0.3899299357966806, - "grad_norm": 1.225153923034668, - "learning_rate": 6.100596760443308e-06, - "loss": 0.1039, - "step": 2287 - }, - { - "epoch": 0.3901004342382183, - "grad_norm": 0.9977211952209473, - "learning_rate": 6.098891730605287e-06, - "loss": 0.1931, - "step": 2288 - }, - { - "epoch": 0.390270932679756, - "grad_norm": 0.9641508460044861, - "learning_rate": 6.097186700767264e-06, - "loss": 0.1688, - "step": 2289 - }, - { - "epoch": 0.39044143112129365, - "grad_norm": 1.0043286085128784, - "learning_rate": 6.095481670929242e-06, - "loss": 0.1296, - "step": 2290 - }, - { - "epoch": 0.39061192956283136, - "grad_norm": 0.9000137448310852, - "learning_rate": 6.093776641091219e-06, - "loss": 0.1551, - "step": 2291 - }, - { - "epoch": 0.390782428004369, - "grad_norm": 1.0267075300216675, - "learning_rate": 6.092071611253198e-06, - "loss": 0.1337, - "step": 2292 - }, - { - "epoch": 0.3909529264459067, - "grad_norm": 1.451011061668396, - "learning_rate": 6.090366581415176e-06, - "loss": 0.218, - "step": 2293 - }, - { - "epoch": 0.3911234248874444, - "grad_norm": 1.2185776233673096, - "learning_rate": 6.088661551577153e-06, - "loss": 0.2079, - "step": 2294 - }, - { - "epoch": 0.39129392332898205, - "grad_norm": 1.1939146518707275, - "learning_rate": 6.086956521739132e-06, - "loss": 0.1223, - "step": 2295 - }, - { - "epoch": 0.39146442177051977, - "grad_norm": 0.7564530372619629, - "learning_rate": 6.085251491901109e-06, - "loss": 0.1188, - "step": 2296 - }, - { - "epoch": 0.3916349202120574, - "grad_norm": 1.1471365690231323, - "learning_rate": 6.083546462063087e-06, - "loss": 0.1619, - "step": 2297 - }, - { - "epoch": 0.39180541865359514, - "grad_norm": 1.486033320426941, - "learning_rate": 6.081841432225064e-06, - "loss": 0.1774, - "step": 2298 - }, - { - "epoch": 0.3919759170951328, - "grad_norm": 0.9266932010650635, - "learning_rate": 6.0801364023870425e-06, - "loss": 0.1548, - "step": 2299 - }, - { - "epoch": 0.3921464155366705, - "grad_norm": 0.9137939214706421, - "learning_rate": 6.07843137254902e-06, - "loss": 0.1048, - "step": 2300 - }, - { - "epoch": 0.3921464155366705, - "eval_f1_score": 0.29896907216494845, - "eval_loss": 0.14963874220848083, - "eval_runtime": 182.6197, - "eval_samples_per_second": 54.759, - "eval_steps_per_second": 3.422, - "step": 2300 - }, - { - "epoch": 0.39231691397820817, - "grad_norm": 0.8086863160133362, - "learning_rate": 6.076726342710998e-06, - "loss": 0.117, - "step": 2301 - }, - { - "epoch": 0.39248741241974583, - "grad_norm": 0.8732712268829346, - "learning_rate": 6.075021312872976e-06, - "loss": 0.0763, - "step": 2302 - }, - { - "epoch": 0.39265791086128354, - "grad_norm": 0.8038264513015747, - "learning_rate": 6.0733162830349535e-06, - "loss": 0.1339, - "step": 2303 - }, - { - "epoch": 0.3928284093028212, - "grad_norm": 0.7566570043563843, - "learning_rate": 6.0716112531969314e-06, - "loss": 0.0899, - "step": 2304 - }, - { - "epoch": 0.3929989077443589, - "grad_norm": 0.9701877236366272, - "learning_rate": 6.069906223358909e-06, - "loss": 0.114, - "step": 2305 - }, - { - "epoch": 0.3931694061858966, - "grad_norm": 1.1687184572219849, - "learning_rate": 6.068201193520887e-06, - "loss": 0.1838, - "step": 2306 - }, - { - "epoch": 0.3933399046274343, - "grad_norm": 1.289287805557251, - "learning_rate": 6.0664961636828644e-06, - "loss": 0.1721, - "step": 2307 - }, - { - "epoch": 0.39351040306897195, - "grad_norm": 0.985457718372345, - "learning_rate": 6.064791133844843e-06, - "loss": 0.1179, - "step": 2308 - }, - { - "epoch": 0.3936809015105096, - "grad_norm": 1.1207270622253418, - "learning_rate": 6.06308610400682e-06, - "loss": 0.1381, - "step": 2309 - }, - { - "epoch": 0.3938513999520473, - "grad_norm": 1.2061148881912231, - "learning_rate": 6.061381074168798e-06, - "loss": 0.1639, - "step": 2310 - }, - { - "epoch": 0.394021898393585, - "grad_norm": 1.4372804164886475, - "learning_rate": 6.059676044330777e-06, - "loss": 0.2017, - "step": 2311 - }, - { - "epoch": 0.3941923968351227, - "grad_norm": 1.2852275371551514, - "learning_rate": 6.057971014492754e-06, - "loss": 0.1528, - "step": 2312 - }, - { - "epoch": 0.39436289527666035, - "grad_norm": 1.009509801864624, - "learning_rate": 6.056265984654732e-06, - "loss": 0.1401, - "step": 2313 - }, - { - "epoch": 0.39453339371819807, - "grad_norm": 0.8609738349914551, - "learning_rate": 6.054560954816709e-06, - "loss": 0.1215, - "step": 2314 - }, - { - "epoch": 0.3947038921597357, - "grad_norm": 0.8159873485565186, - "learning_rate": 6.052855924978688e-06, - "loss": 0.117, - "step": 2315 - }, - { - "epoch": 0.3948743906012734, - "grad_norm": 0.9817201495170593, - "learning_rate": 6.051150895140665e-06, - "loss": 0.1891, - "step": 2316 - }, - { - "epoch": 0.3950448890428111, - "grad_norm": 0.7218739986419678, - "learning_rate": 6.049445865302643e-06, - "loss": 0.0861, - "step": 2317 - }, - { - "epoch": 0.39521538748434876, - "grad_norm": 0.8500337600708008, - "learning_rate": 6.047740835464622e-06, - "loss": 0.1335, - "step": 2318 - }, - { - "epoch": 0.39538588592588647, - "grad_norm": 0.9201745986938477, - "learning_rate": 6.046035805626599e-06, - "loss": 0.1352, - "step": 2319 - }, - { - "epoch": 0.39555638436742413, - "grad_norm": 1.1116868257522583, - "learning_rate": 6.044330775788577e-06, - "loss": 0.1093, - "step": 2320 - }, - { - "epoch": 0.39572688280896184, - "grad_norm": 1.2167072296142578, - "learning_rate": 6.042625745950554e-06, - "loss": 0.072, - "step": 2321 - }, - { - "epoch": 0.3958973812504995, - "grad_norm": 0.785332202911377, - "learning_rate": 6.040920716112533e-06, - "loss": 0.1055, - "step": 2322 - }, - { - "epoch": 0.3960678796920372, - "grad_norm": 0.8508793115615845, - "learning_rate": 6.03921568627451e-06, - "loss": 0.139, - "step": 2323 - }, - { - "epoch": 0.3962383781335749, - "grad_norm": 1.0231958627700806, - "learning_rate": 6.037510656436488e-06, - "loss": 0.1833, - "step": 2324 - }, - { - "epoch": 0.39640887657511253, - "grad_norm": 0.5762442350387573, - "learning_rate": 6.035805626598466e-06, - "loss": 0.0719, - "step": 2325 - }, - { - "epoch": 0.39657937501665025, - "grad_norm": 1.129563808441162, - "learning_rate": 6.034100596760444e-06, - "loss": 0.1799, - "step": 2326 - }, - { - "epoch": 0.3967498734581879, - "grad_norm": 1.4465104341506958, - "learning_rate": 6.0323955669224225e-06, - "loss": 0.1404, - "step": 2327 - }, - { - "epoch": 0.3969203718997256, - "grad_norm": 1.0937806367874146, - "learning_rate": 6.0306905370844e-06, - "loss": 0.1169, - "step": 2328 - }, - { - "epoch": 0.3970908703412633, - "grad_norm": 1.3986912965774536, - "learning_rate": 6.028985507246378e-06, - "loss": 0.1871, - "step": 2329 - }, - { - "epoch": 0.397261368782801, - "grad_norm": 1.3107599020004272, - "learning_rate": 6.027280477408355e-06, - "loss": 0.224, - "step": 2330 - }, - { - "epoch": 0.39743186722433865, - "grad_norm": 1.5524184703826904, - "learning_rate": 6.0255754475703335e-06, - "loss": 0.242, - "step": 2331 - }, - { - "epoch": 0.3976023656658763, - "grad_norm": 0.8739752173423767, - "learning_rate": 6.023870417732311e-06, - "loss": 0.1174, - "step": 2332 - }, - { - "epoch": 0.397772864107414, - "grad_norm": 0.711715042591095, - "learning_rate": 6.0221653878942885e-06, - "loss": 0.0916, - "step": 2333 - }, - { - "epoch": 0.3979433625489517, - "grad_norm": 0.7139453291893005, - "learning_rate": 6.020460358056266e-06, - "loss": 0.0759, - "step": 2334 - }, - { - "epoch": 0.3981138609904894, - "grad_norm": 0.8483171463012695, - "learning_rate": 6.018755328218244e-06, - "loss": 0.136, - "step": 2335 - }, - { - "epoch": 0.39828435943202706, - "grad_norm": 0.8804246187210083, - "learning_rate": 6.017050298380222e-06, - "loss": 0.1069, - "step": 2336 - }, - { - "epoch": 0.39845485787356477, - "grad_norm": 1.0845495462417603, - "learning_rate": 6.0153452685421995e-06, - "loss": 0.148, - "step": 2337 - }, - { - "epoch": 0.39862535631510243, - "grad_norm": 0.8838271498680115, - "learning_rate": 6.013640238704178e-06, - "loss": 0.0673, - "step": 2338 - }, - { - "epoch": 0.3987958547566401, - "grad_norm": 0.9725067019462585, - "learning_rate": 6.011935208866155e-06, - "loss": 0.177, - "step": 2339 - }, - { - "epoch": 0.3989663531981778, - "grad_norm": 1.3371416330337524, - "learning_rate": 6.010230179028133e-06, - "loss": 0.2014, - "step": 2340 - }, - { - "epoch": 0.39913685163971546, - "grad_norm": 0.7962145209312439, - "learning_rate": 6.008525149190111e-06, - "loss": 0.1034, - "step": 2341 - }, - { - "epoch": 0.3993073500812532, - "grad_norm": 0.7338995933532715, - "learning_rate": 6.006820119352089e-06, - "loss": 0.1091, - "step": 2342 - }, - { - "epoch": 0.39947784852279083, - "grad_norm": 0.82045578956604, - "learning_rate": 6.005115089514067e-06, - "loss": 0.1205, - "step": 2343 - }, - { - "epoch": 0.39964834696432855, - "grad_norm": 0.7830996513366699, - "learning_rate": 6.003410059676045e-06, - "loss": 0.1117, - "step": 2344 - }, - { - "epoch": 0.3998188454058662, - "grad_norm": 1.263693928718567, - "learning_rate": 6.001705029838023e-06, - "loss": 0.243, - "step": 2345 - }, - { - "epoch": 0.3999893438474039, - "grad_norm": 0.866906464099884, - "learning_rate": 6e-06, - "loss": 0.1502, - "step": 2346 - }, - { - "epoch": 0.4001598422889416, - "grad_norm": 0.856949508190155, - "learning_rate": 5.998294970161979e-06, - "loss": 0.149, - "step": 2347 - }, - { - "epoch": 0.40033034073047924, - "grad_norm": 0.8175544738769531, - "learning_rate": 5.996589940323956e-06, - "loss": 0.1468, - "step": 2348 - }, - { - "epoch": 0.40050083917201695, - "grad_norm": 0.986709713935852, - "learning_rate": 5.994884910485934e-06, - "loss": 0.144, - "step": 2349 - }, - { - "epoch": 0.4006713376135546, - "grad_norm": 1.01996910572052, - "learning_rate": 5.993179880647911e-06, - "loss": 0.1834, - "step": 2350 - }, - { - "epoch": 0.4008418360550923, - "grad_norm": 1.0860642194747925, - "learning_rate": 5.99147485080989e-06, - "loss": 0.1928, - "step": 2351 - }, - { - "epoch": 0.40101233449663, - "grad_norm": 0.6442664861679077, - "learning_rate": 5.989769820971868e-06, - "loss": 0.0916, - "step": 2352 - }, - { - "epoch": 0.4011828329381677, - "grad_norm": 1.1461782455444336, - "learning_rate": 5.988064791133845e-06, - "loss": 0.1605, - "step": 2353 - }, - { - "epoch": 0.40135333137970536, - "grad_norm": 0.7978892922401428, - "learning_rate": 5.986359761295824e-06, - "loss": 0.129, - "step": 2354 - }, - { - "epoch": 0.401523829821243, - "grad_norm": 0.7703652381896973, - "learning_rate": 5.984654731457801e-06, - "loss": 0.1015, - "step": 2355 - }, - { - "epoch": 0.40169432826278073, - "grad_norm": 0.939119815826416, - "learning_rate": 5.982949701619779e-06, - "loss": 0.1502, - "step": 2356 - }, - { - "epoch": 0.4018648267043184, - "grad_norm": 0.6256553530693054, - "learning_rate": 5.981244671781756e-06, - "loss": 0.1149, - "step": 2357 - }, - { - "epoch": 0.4020353251458561, - "grad_norm": 0.7845792174339294, - "learning_rate": 5.979539641943735e-06, - "loss": 0.1002, - "step": 2358 - }, - { - "epoch": 0.40220582358739376, - "grad_norm": 1.0356690883636475, - "learning_rate": 5.977834612105712e-06, - "loss": 0.1763, - "step": 2359 - }, - { - "epoch": 0.4023763220289315, - "grad_norm": 0.631354808807373, - "learning_rate": 5.97612958226769e-06, - "loss": 0.1229, - "step": 2360 - }, - { - "epoch": 0.40254682047046914, - "grad_norm": 0.8355875015258789, - "learning_rate": 5.9744245524296685e-06, - "loss": 0.1467, - "step": 2361 - }, - { - "epoch": 0.4027173189120068, - "grad_norm": 0.8429080843925476, - "learning_rate": 5.972719522591646e-06, - "loss": 0.1641, - "step": 2362 - }, - { - "epoch": 0.4028878173535445, - "grad_norm": 1.1086691617965698, - "learning_rate": 5.9710144927536236e-06, - "loss": 0.2006, - "step": 2363 - }, - { - "epoch": 0.40305831579508217, - "grad_norm": 1.2838102579116821, - "learning_rate": 5.9693094629156015e-06, - "loss": 0.1822, - "step": 2364 - }, - { - "epoch": 0.4032288142366199, - "grad_norm": 1.2585407495498657, - "learning_rate": 5.9676044330775795e-06, - "loss": 0.2134, - "step": 2365 - }, - { - "epoch": 0.40339931267815754, - "grad_norm": 1.0826866626739502, - "learning_rate": 5.9658994032395566e-06, - "loss": 0.1734, - "step": 2366 - }, - { - "epoch": 0.40356981111969525, - "grad_norm": 0.88109290599823, - "learning_rate": 5.964194373401535e-06, - "loss": 0.1703, - "step": 2367 - }, - { - "epoch": 0.4037403095612329, - "grad_norm": 0.7502427697181702, - "learning_rate": 5.962489343563513e-06, - "loss": 0.1001, - "step": 2368 - }, - { - "epoch": 0.4039108080027706, - "grad_norm": 1.4130350351333618, - "learning_rate": 5.96078431372549e-06, - "loss": 0.2545, - "step": 2369 - }, - { - "epoch": 0.4040813064443083, - "grad_norm": 0.9190012216567993, - "learning_rate": 5.959079283887469e-06, - "loss": 0.1548, - "step": 2370 - }, - { - "epoch": 0.40425180488584594, - "grad_norm": 0.8156841397285461, - "learning_rate": 5.957374254049446e-06, - "loss": 0.0857, - "step": 2371 - }, - { - "epoch": 0.40442230332738366, - "grad_norm": 1.1620196104049683, - "learning_rate": 5.955669224211424e-06, - "loss": 0.1501, - "step": 2372 - }, - { - "epoch": 0.4045928017689213, - "grad_norm": 0.8737968802452087, - "learning_rate": 5.953964194373401e-06, - "loss": 0.1385, - "step": 2373 - }, - { - "epoch": 0.40476330021045903, - "grad_norm": 0.7628181576728821, - "learning_rate": 5.95225916453538e-06, - "loss": 0.1445, - "step": 2374 - }, - { - "epoch": 0.4049337986519967, - "grad_norm": 0.750722348690033, - "learning_rate": 5.950554134697357e-06, - "loss": 0.1131, - "step": 2375 - }, - { - "epoch": 0.4051042970935344, - "grad_norm": 1.0749493837356567, - "learning_rate": 5.948849104859335e-06, - "loss": 0.1112, - "step": 2376 - }, - { - "epoch": 0.40527479553507206, - "grad_norm": 0.653072714805603, - "learning_rate": 5.947144075021314e-06, - "loss": 0.094, - "step": 2377 - }, - { - "epoch": 0.4054452939766097, - "grad_norm": 0.8208218812942505, - "learning_rate": 5.945439045183291e-06, - "loss": 0.1553, - "step": 2378 - }, - { - "epoch": 0.40561579241814744, - "grad_norm": 0.857717752456665, - "learning_rate": 5.943734015345269e-06, - "loss": 0.1312, - "step": 2379 - }, - { - "epoch": 0.4057862908596851, - "grad_norm": 0.6189414858818054, - "learning_rate": 5.942028985507247e-06, - "loss": 0.1063, - "step": 2380 - }, - { - "epoch": 0.4059567893012228, - "grad_norm": 1.1543437242507935, - "learning_rate": 5.940323955669225e-06, - "loss": 0.194, - "step": 2381 - }, - { - "epoch": 0.40612728774276047, - "grad_norm": 1.019155502319336, - "learning_rate": 5.938618925831202e-06, - "loss": 0.1542, - "step": 2382 - }, - { - "epoch": 0.4062977861842982, - "grad_norm": 0.5917924642562866, - "learning_rate": 5.936913895993181e-06, - "loss": 0.0949, - "step": 2383 - }, - { - "epoch": 0.40646828462583584, - "grad_norm": 1.1552553176879883, - "learning_rate": 5.935208866155158e-06, - "loss": 0.132, - "step": 2384 - }, - { - "epoch": 0.4066387830673735, - "grad_norm": 1.129095196723938, - "learning_rate": 5.933503836317136e-06, - "loss": 0.1757, - "step": 2385 - }, - { - "epoch": 0.4068092815089112, - "grad_norm": 0.8844802975654602, - "learning_rate": 5.931798806479115e-06, - "loss": 0.1515, - "step": 2386 - }, - { - "epoch": 0.40697977995044887, - "grad_norm": 0.7525590062141418, - "learning_rate": 5.930093776641092e-06, - "loss": 0.121, - "step": 2387 - }, - { - "epoch": 0.4071502783919866, - "grad_norm": 0.6554398536682129, - "learning_rate": 5.92838874680307e-06, - "loss": 0.0379, - "step": 2388 - }, - { - "epoch": 0.40732077683352425, - "grad_norm": 0.8680796027183533, - "learning_rate": 5.926683716965047e-06, - "loss": 0.1016, - "step": 2389 - }, - { - "epoch": 0.40749127527506196, - "grad_norm": 0.7904180288314819, - "learning_rate": 5.924978687127026e-06, - "loss": 0.0739, - "step": 2390 - }, - { - "epoch": 0.4076617737165996, - "grad_norm": 0.5275554060935974, - "learning_rate": 5.923273657289003e-06, - "loss": 0.0848, - "step": 2391 - }, - { - "epoch": 0.40783227215813733, - "grad_norm": 0.6864534020423889, - "learning_rate": 5.921568627450981e-06, - "loss": 0.0928, - "step": 2392 - }, - { - "epoch": 0.408002770599675, - "grad_norm": 0.8688926696777344, - "learning_rate": 5.9198635976129595e-06, - "loss": 0.1337, - "step": 2393 - }, - { - "epoch": 0.40817326904121265, - "grad_norm": 0.8549246191978455, - "learning_rate": 5.9181585677749366e-06, - "loss": 0.1339, - "step": 2394 - }, - { - "epoch": 0.40834376748275036, - "grad_norm": 1.4684349298477173, - "learning_rate": 5.9164535379369145e-06, - "loss": 0.2219, - "step": 2395 - }, - { - "epoch": 0.408514265924288, - "grad_norm": 1.7323837280273438, - "learning_rate": 5.914748508098892e-06, - "loss": 0.324, - "step": 2396 - }, - { - "epoch": 0.40868476436582574, - "grad_norm": 0.6567860245704651, - "learning_rate": 5.91304347826087e-06, - "loss": 0.0704, - "step": 2397 - }, - { - "epoch": 0.4088552628073634, - "grad_norm": 0.7284254431724548, - "learning_rate": 5.9113384484228475e-06, - "loss": 0.1179, - "step": 2398 - }, - { - "epoch": 0.4090257612489011, - "grad_norm": 1.1074248552322388, - "learning_rate": 5.9096334185848255e-06, - "loss": 0.1273, - "step": 2399 - }, - { - "epoch": 0.40919625969043877, - "grad_norm": 0.9865325093269348, - "learning_rate": 5.907928388746803e-06, - "loss": 0.1429, - "step": 2400 - }, - { - "epoch": 0.40919625969043877, - "eval_f1_score": 0.31876606683804626, - "eval_loss": 0.14625637233257294, - "eval_runtime": 182.619, - "eval_samples_per_second": 54.759, - "eval_steps_per_second": 3.422, - "step": 2400 - }, - { - "epoch": 0.4093667581319764, - "grad_norm": 0.9457055926322937, - "learning_rate": 5.906223358908781e-06, - "loss": 0.1687, - "step": 2401 - }, - { - "epoch": 0.40953725657351414, - "grad_norm": 1.100854754447937, - "learning_rate": 5.90451832907076e-06, - "loss": 0.1754, - "step": 2402 - }, - { - "epoch": 0.4097077550150518, - "grad_norm": 0.8571153879165649, - "learning_rate": 5.902813299232737e-06, - "loss": 0.1214, - "step": 2403 - }, - { - "epoch": 0.4098782534565895, - "grad_norm": 0.955055832862854, - "learning_rate": 5.901108269394715e-06, - "loss": 0.1678, - "step": 2404 - }, - { - "epoch": 0.4100487518981272, - "grad_norm": 0.8582931160926819, - "learning_rate": 5.899403239556692e-06, - "loss": 0.1298, - "step": 2405 - }, - { - "epoch": 0.4102192503396649, - "grad_norm": 1.1340924501419067, - "learning_rate": 5.897698209718671e-06, - "loss": 0.1669, - "step": 2406 - }, - { - "epoch": 0.41038974878120255, - "grad_norm": 1.0416170358657837, - "learning_rate": 5.895993179880648e-06, - "loss": 0.1432, - "step": 2407 - }, - { - "epoch": 0.4105602472227402, - "grad_norm": 1.0653588771820068, - "learning_rate": 5.894288150042626e-06, - "loss": 0.1826, - "step": 2408 - }, - { - "epoch": 0.4107307456642779, - "grad_norm": 1.1291371583938599, - "learning_rate": 5.892583120204603e-06, - "loss": 0.2454, - "step": 2409 - }, - { - "epoch": 0.4109012441058156, - "grad_norm": 1.0526398420333862, - "learning_rate": 5.890878090366582e-06, - "loss": 0.1268, - "step": 2410 - }, - { - "epoch": 0.4110717425473533, - "grad_norm": 1.0905719995498657, - "learning_rate": 5.88917306052856e-06, - "loss": 0.174, - "step": 2411 - }, - { - "epoch": 0.41124224098889095, - "grad_norm": 0.8474220037460327, - "learning_rate": 5.887468030690537e-06, - "loss": 0.1421, - "step": 2412 - }, - { - "epoch": 0.41141273943042866, - "grad_norm": 1.1635980606079102, - "learning_rate": 5.885763000852516e-06, - "loss": 0.1261, - "step": 2413 - }, - { - "epoch": 0.4115832378719663, - "grad_norm": 0.8044309616088867, - "learning_rate": 5.884057971014493e-06, - "loss": 0.1569, - "step": 2414 - }, - { - "epoch": 0.411753736313504, - "grad_norm": 0.7889132499694824, - "learning_rate": 5.882352941176471e-06, - "loss": 0.1622, - "step": 2415 - }, - { - "epoch": 0.4119242347550417, - "grad_norm": 0.9566287398338318, - "learning_rate": 5.880647911338449e-06, - "loss": 0.0778, - "step": 2416 - }, - { - "epoch": 0.41209473319657935, - "grad_norm": 0.743043839931488, - "learning_rate": 5.878942881500427e-06, - "loss": 0.0944, - "step": 2417 - }, - { - "epoch": 0.41226523163811707, - "grad_norm": 1.1795198917388916, - "learning_rate": 5.877237851662405e-06, - "loss": 0.213, - "step": 2418 - }, - { - "epoch": 0.4124357300796547, - "grad_norm": 0.9205105304718018, - "learning_rate": 5.875532821824383e-06, - "loss": 0.0509, - "step": 2419 - }, - { - "epoch": 0.41260622852119244, - "grad_norm": 0.9393637180328369, - "learning_rate": 5.873827791986361e-06, - "loss": 0.1202, - "step": 2420 - }, - { - "epoch": 0.4127767269627301, - "grad_norm": 0.8949941396713257, - "learning_rate": 5.872122762148338e-06, - "loss": 0.1036, - "step": 2421 - }, - { - "epoch": 0.4129472254042678, - "grad_norm": 0.9795252680778503, - "learning_rate": 5.8704177323103166e-06, - "loss": 0.1586, - "step": 2422 - }, - { - "epoch": 0.4131177238458055, - "grad_norm": 0.8107895255088806, - "learning_rate": 5.868712702472294e-06, - "loss": 0.1404, - "step": 2423 - }, - { - "epoch": 0.41328822228734313, - "grad_norm": 0.9952834248542786, - "learning_rate": 5.867007672634272e-06, - "loss": 0.1253, - "step": 2424 - }, - { - "epoch": 0.41345872072888085, - "grad_norm": 1.0126919746398926, - "learning_rate": 5.865302642796249e-06, - "loss": 0.1819, - "step": 2425 - }, - { - "epoch": 0.4136292191704185, - "grad_norm": 1.6776386499404907, - "learning_rate": 5.8635976129582275e-06, - "loss": 0.1804, - "step": 2426 - }, - { - "epoch": 0.4137997176119562, - "grad_norm": 0.8046997785568237, - "learning_rate": 5.8618925831202054e-06, - "loss": 0.0819, - "step": 2427 - }, - { - "epoch": 0.4139702160534939, - "grad_norm": 1.4850029945373535, - "learning_rate": 5.8601875532821825e-06, - "loss": 0.1769, - "step": 2428 - }, - { - "epoch": 0.4141407144950316, - "grad_norm": 1.0972051620483398, - "learning_rate": 5.858482523444161e-06, - "loss": 0.1405, - "step": 2429 - }, - { - "epoch": 0.41431121293656925, - "grad_norm": 1.0775092840194702, - "learning_rate": 5.8567774936061384e-06, - "loss": 0.1195, - "step": 2430 - }, - { - "epoch": 0.4144817113781069, - "grad_norm": 1.1166963577270508, - "learning_rate": 5.855072463768116e-06, - "loss": 0.2072, - "step": 2431 - }, - { - "epoch": 0.4146522098196446, - "grad_norm": 0.7372758388519287, - "learning_rate": 5.8533674339300935e-06, - "loss": 0.0925, - "step": 2432 - }, - { - "epoch": 0.4148227082611823, - "grad_norm": 1.672086477279663, - "learning_rate": 5.851662404092072e-06, - "loss": 0.1642, - "step": 2433 - }, - { - "epoch": 0.41499320670272, - "grad_norm": 1.4401135444641113, - "learning_rate": 5.849957374254049e-06, - "loss": 0.1533, - "step": 2434 - }, - { - "epoch": 0.41516370514425766, - "grad_norm": 1.2260637283325195, - "learning_rate": 5.848252344416027e-06, - "loss": 0.2409, - "step": 2435 - }, - { - "epoch": 0.41533420358579537, - "grad_norm": 0.8733996748924255, - "learning_rate": 5.846547314578006e-06, - "loss": 0.1268, - "step": 2436 - }, - { - "epoch": 0.41550470202733303, - "grad_norm": 1.0725549459457397, - "learning_rate": 5.844842284739983e-06, - "loss": 0.1409, - "step": 2437 - }, - { - "epoch": 0.4156752004688707, - "grad_norm": 0.8036240339279175, - "learning_rate": 5.843137254901961e-06, - "loss": 0.1157, - "step": 2438 - }, - { - "epoch": 0.4158456989104084, - "grad_norm": 0.9177947044372559, - "learning_rate": 5.841432225063939e-06, - "loss": 0.1678, - "step": 2439 - }, - { - "epoch": 0.41601619735194606, - "grad_norm": 1.0422391891479492, - "learning_rate": 5.839727195225917e-06, - "loss": 0.1663, - "step": 2440 - }, - { - "epoch": 0.4161866957934838, - "grad_norm": 0.8394445776939392, - "learning_rate": 5.838022165387894e-06, - "loss": 0.1335, - "step": 2441 - }, - { - "epoch": 0.41635719423502143, - "grad_norm": 0.8703233599662781, - "learning_rate": 5.836317135549873e-06, - "loss": 0.1439, - "step": 2442 - }, - { - "epoch": 0.41652769267655915, - "grad_norm": 0.8049616813659668, - "learning_rate": 5.83461210571185e-06, - "loss": 0.1275, - "step": 2443 - }, - { - "epoch": 0.4166981911180968, - "grad_norm": 1.0591437816619873, - "learning_rate": 5.832907075873828e-06, - "loss": 0.1545, - "step": 2444 - }, - { - "epoch": 0.4168686895596345, - "grad_norm": 0.7541891932487488, - "learning_rate": 5.831202046035807e-06, - "loss": 0.0799, - "step": 2445 - }, - { - "epoch": 0.4170391880011722, - "grad_norm": 1.1001821756362915, - "learning_rate": 5.829497016197784e-06, - "loss": 0.0832, - "step": 2446 - }, - { - "epoch": 0.41720968644270984, - "grad_norm": 0.8933374881744385, - "learning_rate": 5.827791986359762e-06, - "loss": 0.1245, - "step": 2447 - }, - { - "epoch": 0.41738018488424755, - "grad_norm": 0.9707329273223877, - "learning_rate": 5.826086956521739e-06, - "loss": 0.1819, - "step": 2448 - }, - { - "epoch": 0.4175506833257852, - "grad_norm": 0.82521653175354, - "learning_rate": 5.824381926683718e-06, - "loss": 0.1236, - "step": 2449 - }, - { - "epoch": 0.4177211817673229, - "grad_norm": 1.5881226062774658, - "learning_rate": 5.822676896845695e-06, - "loss": 0.2405, - "step": 2450 - }, - { - "epoch": 0.4178916802088606, - "grad_norm": 0.9384194016456604, - "learning_rate": 5.820971867007673e-06, - "loss": 0.1518, - "step": 2451 - }, - { - "epoch": 0.4180621786503983, - "grad_norm": 1.1350349187850952, - "learning_rate": 5.819266837169652e-06, - "loss": 0.1233, - "step": 2452 - }, - { - "epoch": 0.41823267709193596, - "grad_norm": 0.788532555103302, - "learning_rate": 5.817561807331629e-06, - "loss": 0.0644, - "step": 2453 - }, - { - "epoch": 0.4184031755334736, - "grad_norm": 1.639487862586975, - "learning_rate": 5.815856777493607e-06, - "loss": 0.2061, - "step": 2454 - }, - { - "epoch": 0.41857367397501133, - "grad_norm": 0.8484280705451965, - "learning_rate": 5.814151747655585e-06, - "loss": 0.1293, - "step": 2455 - }, - { - "epoch": 0.418744172416549, - "grad_norm": 0.7462180852890015, - "learning_rate": 5.8124467178175625e-06, - "loss": 0.0625, - "step": 2456 - }, - { - "epoch": 0.4189146708580867, - "grad_norm": 0.6771153211593628, - "learning_rate": 5.81074168797954e-06, - "loss": 0.0534, - "step": 2457 - }, - { - "epoch": 0.41908516929962436, - "grad_norm": 1.4879751205444336, - "learning_rate": 5.8090366581415184e-06, - "loss": 0.2232, - "step": 2458 - }, - { - "epoch": 0.4192556677411621, - "grad_norm": 1.8108857870101929, - "learning_rate": 5.8073316283034955e-06, - "loss": 0.2234, - "step": 2459 - }, - { - "epoch": 0.41942616618269973, - "grad_norm": 0.8870851993560791, - "learning_rate": 5.8056265984654735e-06, - "loss": 0.1446, - "step": 2460 - }, - { - "epoch": 0.4195966646242374, - "grad_norm": 1.2096353769302368, - "learning_rate": 5.803921568627452e-06, - "loss": 0.1226, - "step": 2461 - }, - { - "epoch": 0.4197671630657751, - "grad_norm": 0.8411592245101929, - "learning_rate": 5.802216538789429e-06, - "loss": 0.0914, - "step": 2462 - }, - { - "epoch": 0.41993766150731276, - "grad_norm": 0.7253265380859375, - "learning_rate": 5.800511508951407e-06, - "loss": 0.1, - "step": 2463 - }, - { - "epoch": 0.4201081599488505, - "grad_norm": 1.141318440437317, - "learning_rate": 5.7988064791133844e-06, - "loss": 0.1903, - "step": 2464 - }, - { - "epoch": 0.42027865839038814, - "grad_norm": 1.2091392278671265, - "learning_rate": 5.797101449275363e-06, - "loss": 0.1197, - "step": 2465 - }, - { - "epoch": 0.42044915683192585, - "grad_norm": 1.1575111150741577, - "learning_rate": 5.79539641943734e-06, - "loss": 0.1327, - "step": 2466 - }, - { - "epoch": 0.4206196552734635, - "grad_norm": 0.7505564093589783, - "learning_rate": 5.793691389599318e-06, - "loss": 0.0827, - "step": 2467 - }, - { - "epoch": 0.4207901537150012, - "grad_norm": 0.7998400330543518, - "learning_rate": 5.791986359761295e-06, - "loss": 0.0942, - "step": 2468 - }, - { - "epoch": 0.4209606521565389, - "grad_norm": 1.3579339981079102, - "learning_rate": 5.790281329923274e-06, - "loss": 0.2063, - "step": 2469 - }, - { - "epoch": 0.42113115059807654, - "grad_norm": 0.9726285934448242, - "learning_rate": 5.788576300085252e-06, - "loss": 0.1824, - "step": 2470 - }, - { - "epoch": 0.42130164903961426, - "grad_norm": 0.8264228105545044, - "learning_rate": 5.786871270247229e-06, - "loss": 0.1467, - "step": 2471 - }, - { - "epoch": 0.4214721474811519, - "grad_norm": 0.8461482524871826, - "learning_rate": 5.785166240409208e-06, - "loss": 0.1412, - "step": 2472 - }, - { - "epoch": 0.42164264592268963, - "grad_norm": 0.8347538709640503, - "learning_rate": 5.783461210571185e-06, - "loss": 0.0636, - "step": 2473 - }, - { - "epoch": 0.4218131443642273, - "grad_norm": 0.5894155502319336, - "learning_rate": 5.781756180733163e-06, - "loss": 0.0559, - "step": 2474 - }, - { - "epoch": 0.421983642805765, - "grad_norm": 1.2116296291351318, - "learning_rate": 5.780051150895141e-06, - "loss": 0.1289, - "step": 2475 - }, - { - "epoch": 0.42215414124730266, - "grad_norm": 0.7940623164176941, - "learning_rate": 5.778346121057119e-06, - "loss": 0.0536, - "step": 2476 - }, - { - "epoch": 0.4223246396888403, - "grad_norm": 0.9565810561180115, - "learning_rate": 5.776641091219098e-06, - "loss": 0.1271, - "step": 2477 - }, - { - "epoch": 0.42249513813037803, - "grad_norm": 0.9501217007637024, - "learning_rate": 5.774936061381075e-06, - "loss": 0.1019, - "step": 2478 - }, - { - "epoch": 0.4226656365719157, - "grad_norm": 1.0174156427383423, - "learning_rate": 5.773231031543053e-06, - "loss": 0.1307, - "step": 2479 - }, - { - "epoch": 0.4228361350134534, - "grad_norm": 1.1585232019424438, - "learning_rate": 5.77152600170503e-06, - "loss": 0.1416, - "step": 2480 - }, - { - "epoch": 0.42300663345499107, - "grad_norm": 1.277368426322937, - "learning_rate": 5.769820971867009e-06, - "loss": 0.1621, - "step": 2481 - }, - { - "epoch": 0.4231771318965288, - "grad_norm": 0.6454185843467712, - "learning_rate": 5.768115942028986e-06, - "loss": 0.0797, - "step": 2482 - }, - { - "epoch": 0.42334763033806644, - "grad_norm": 0.8542653918266296, - "learning_rate": 5.766410912190964e-06, - "loss": 0.0422, - "step": 2483 - }, - { - "epoch": 0.4235181287796041, - "grad_norm": 1.4021434783935547, - "learning_rate": 5.764705882352941e-06, - "loss": 0.1871, - "step": 2484 - }, - { - "epoch": 0.4236886272211418, - "grad_norm": 1.434823751449585, - "learning_rate": 5.76300085251492e-06, - "loss": 0.1693, - "step": 2485 - }, - { - "epoch": 0.42385912566267947, - "grad_norm": 1.4753504991531372, - "learning_rate": 5.761295822676898e-06, - "loss": 0.1383, - "step": 2486 - }, - { - "epoch": 0.4240296241042172, - "grad_norm": 1.8268640041351318, - "learning_rate": 5.759590792838875e-06, - "loss": 0.213, - "step": 2487 - }, - { - "epoch": 0.42420012254575484, - "grad_norm": 1.1246039867401123, - "learning_rate": 5.7578857630008535e-06, - "loss": 0.1891, - "step": 2488 - }, - { - "epoch": 0.42437062098729256, - "grad_norm": 1.0577718019485474, - "learning_rate": 5.756180733162831e-06, - "loss": 0.1782, - "step": 2489 - }, - { - "epoch": 0.4245411194288302, - "grad_norm": 1.2953256368637085, - "learning_rate": 5.7544757033248085e-06, - "loss": 0.1958, - "step": 2490 - }, - { - "epoch": 0.42471161787036793, - "grad_norm": 1.3340262174606323, - "learning_rate": 5.7527706734867865e-06, - "loss": 0.2098, - "step": 2491 - }, - { - "epoch": 0.4248821163119056, - "grad_norm": 1.4178156852722168, - "learning_rate": 5.751065643648764e-06, - "loss": 0.1211, - "step": 2492 - }, - { - "epoch": 0.42505261475344325, - "grad_norm": 1.2180986404418945, - "learning_rate": 5.7493606138107415e-06, - "loss": 0.17, - "step": 2493 - }, - { - "epoch": 0.42522311319498096, - "grad_norm": 1.0153529644012451, - "learning_rate": 5.74765558397272e-06, - "loss": 0.1066, - "step": 2494 - }, - { - "epoch": 0.4253936116365186, - "grad_norm": 1.0207457542419434, - "learning_rate": 5.745950554134698e-06, - "loss": 0.1531, - "step": 2495 - }, - { - "epoch": 0.42556411007805633, - "grad_norm": 1.096200942993164, - "learning_rate": 5.744245524296675e-06, - "loss": 0.0736, - "step": 2496 - }, - { - "epoch": 0.425734608519594, - "grad_norm": 1.0620962381362915, - "learning_rate": 5.742540494458654e-06, - "loss": 0.1664, - "step": 2497 - }, - { - "epoch": 0.4259051069611317, - "grad_norm": 1.4350156784057617, - "learning_rate": 5.740835464620631e-06, - "loss": 0.1166, - "step": 2498 - }, - { - "epoch": 0.42607560540266937, - "grad_norm": 1.700987458229065, - "learning_rate": 5.739130434782609e-06, - "loss": 0.2208, - "step": 2499 - }, - { - "epoch": 0.426246103844207, - "grad_norm": 0.8968698978424072, - "learning_rate": 5.737425404944586e-06, - "loss": 0.1277, - "step": 2500 - }, - { - "epoch": 0.426246103844207, - "eval_f1_score": 0.3444730077120823, - "eval_loss": 0.14721998572349548, - "eval_runtime": 182.6201, - "eval_samples_per_second": 54.758, - "eval_steps_per_second": 3.422, - "step": 2500 - }, - { - "epoch": 0.42641660228574474, - "grad_norm": 1.3592954874038696, - "learning_rate": 5.735720375106565e-06, - "loss": 0.1583, - "step": 2501 - }, - { - "epoch": 0.4265871007272824, - "grad_norm": 0.8200425505638123, - "learning_rate": 5.734015345268543e-06, - "loss": 0.1133, - "step": 2502 - }, - { - "epoch": 0.4267575991688201, - "grad_norm": 0.929311990737915, - "learning_rate": 5.73231031543052e-06, - "loss": 0.1461, - "step": 2503 - }, - { - "epoch": 0.42692809761035777, - "grad_norm": 0.7825623154640198, - "learning_rate": 5.730605285592499e-06, - "loss": 0.1384, - "step": 2504 - }, - { - "epoch": 0.4270985960518955, - "grad_norm": 0.8742008805274963, - "learning_rate": 5.728900255754476e-06, - "loss": 0.1035, - "step": 2505 - }, - { - "epoch": 0.42726909449343314, - "grad_norm": 0.7557575106620789, - "learning_rate": 5.727195225916454e-06, - "loss": 0.1381, - "step": 2506 - }, - { - "epoch": 0.4274395929349708, - "grad_norm": 1.0146187543869019, - "learning_rate": 5.725490196078431e-06, - "loss": 0.1468, - "step": 2507 - }, - { - "epoch": 0.4276100913765085, - "grad_norm": 1.1496630907058716, - "learning_rate": 5.72378516624041e-06, - "loss": 0.1925, - "step": 2508 - }, - { - "epoch": 0.4277805898180462, - "grad_norm": 1.1652343273162842, - "learning_rate": 5.722080136402387e-06, - "loss": 0.1873, - "step": 2509 - }, - { - "epoch": 0.4279510882595839, - "grad_norm": 1.3467806577682495, - "learning_rate": 5.720375106564365e-06, - "loss": 0.1659, - "step": 2510 - }, - { - "epoch": 0.42812158670112155, - "grad_norm": 1.2672559022903442, - "learning_rate": 5.718670076726344e-06, - "loss": 0.18, - "step": 2511 - }, - { - "epoch": 0.42829208514265926, - "grad_norm": 0.8156704306602478, - "learning_rate": 5.716965046888321e-06, - "loss": 0.1384, - "step": 2512 - }, - { - "epoch": 0.4284625835841969, - "grad_norm": 0.9403845071792603, - "learning_rate": 5.715260017050299e-06, - "loss": 0.1376, - "step": 2513 - }, - { - "epoch": 0.42863308202573464, - "grad_norm": 0.8247608542442322, - "learning_rate": 5.713554987212277e-06, - "loss": 0.1188, - "step": 2514 - }, - { - "epoch": 0.4288035804672723, - "grad_norm": 1.153664231300354, - "learning_rate": 5.711849957374255e-06, - "loss": 0.0713, - "step": 2515 - }, - { - "epoch": 0.42897407890880995, - "grad_norm": 1.1019221544265747, - "learning_rate": 5.710144927536232e-06, - "loss": 0.1233, - "step": 2516 - }, - { - "epoch": 0.42914457735034767, - "grad_norm": 1.1238685846328735, - "learning_rate": 5.7084398976982106e-06, - "loss": 0.1802, - "step": 2517 - }, - { - "epoch": 0.4293150757918853, - "grad_norm": 0.8956630229949951, - "learning_rate": 5.706734867860188e-06, - "loss": 0.111, - "step": 2518 - }, - { - "epoch": 0.42948557423342304, - "grad_norm": 0.7892765402793884, - "learning_rate": 5.705029838022166e-06, - "loss": 0.0833, - "step": 2519 - }, - { - "epoch": 0.4296560726749607, - "grad_norm": 0.8521721363067627, - "learning_rate": 5.703324808184144e-06, - "loss": 0.0968, - "step": 2520 - }, - { - "epoch": 0.4298265711164984, - "grad_norm": 0.9164546132087708, - "learning_rate": 5.7016197783461215e-06, - "loss": 0.1135, - "step": 2521 - }, - { - "epoch": 0.42999706955803607, - "grad_norm": 0.948046088218689, - "learning_rate": 5.6999147485080995e-06, - "loss": 0.0859, - "step": 2522 - }, - { - "epoch": 0.43016756799957373, - "grad_norm": 0.5832633376121521, - "learning_rate": 5.6982097186700766e-06, - "loss": 0.0928, - "step": 2523 - }, - { - "epoch": 0.43033806644111144, - "grad_norm": 0.8962041735649109, - "learning_rate": 5.696504688832055e-06, - "loss": 0.1255, - "step": 2524 - }, - { - "epoch": 0.4305085648826491, - "grad_norm": 0.7559568881988525, - "learning_rate": 5.6947996589940325e-06, - "loss": 0.0877, - "step": 2525 - }, - { - "epoch": 0.4306790633241868, - "grad_norm": 1.1643157005310059, - "learning_rate": 5.69309462915601e-06, - "loss": 0.1177, - "step": 2526 - }, - { - "epoch": 0.4308495617657245, - "grad_norm": 1.4461086988449097, - "learning_rate": 5.691389599317989e-06, - "loss": 0.1757, - "step": 2527 - }, - { - "epoch": 0.4310200602072622, - "grad_norm": 1.2662277221679688, - "learning_rate": 5.689684569479966e-06, - "loss": 0.1868, - "step": 2528 - }, - { - "epoch": 0.43119055864879985, - "grad_norm": 0.7486140727996826, - "learning_rate": 5.687979539641944e-06, - "loss": 0.0875, - "step": 2529 - }, - { - "epoch": 0.4313610570903375, - "grad_norm": 0.9595978260040283, - "learning_rate": 5.686274509803922e-06, - "loss": 0.1163, - "step": 2530 - }, - { - "epoch": 0.4315315555318752, - "grad_norm": 0.8530360460281372, - "learning_rate": 5.6845694799659e-06, - "loss": 0.1208, - "step": 2531 - }, - { - "epoch": 0.4317020539734129, - "grad_norm": 1.6544944047927856, - "learning_rate": 5.682864450127877e-06, - "loss": 0.1726, - "step": 2532 - }, - { - "epoch": 0.4318725524149506, - "grad_norm": 1.4921165704727173, - "learning_rate": 5.681159420289856e-06, - "loss": 0.1588, - "step": 2533 - }, - { - "epoch": 0.43204305085648825, - "grad_norm": 0.7876043915748596, - "learning_rate": 5.679454390451833e-06, - "loss": 0.1184, - "step": 2534 - }, - { - "epoch": 0.43221354929802597, - "grad_norm": 1.3764668703079224, - "learning_rate": 5.677749360613811e-06, - "loss": 0.1859, - "step": 2535 - }, - { - "epoch": 0.4323840477395636, - "grad_norm": 0.6521998047828674, - "learning_rate": 5.67604433077579e-06, - "loss": 0.0985, - "step": 2536 - }, - { - "epoch": 0.43255454618110134, - "grad_norm": 0.9827353358268738, - "learning_rate": 5.674339300937767e-06, - "loss": 0.0902, - "step": 2537 - }, - { - "epoch": 0.432725044622639, - "grad_norm": 0.9378973245620728, - "learning_rate": 5.672634271099745e-06, - "loss": 0.1793, - "step": 2538 - }, - { - "epoch": 0.43289554306417666, - "grad_norm": 0.8462679982185364, - "learning_rate": 5.670929241261722e-06, - "loss": 0.1012, - "step": 2539 - }, - { - "epoch": 0.43306604150571437, - "grad_norm": 0.865755021572113, - "learning_rate": 5.669224211423701e-06, - "loss": 0.1187, - "step": 2540 - }, - { - "epoch": 0.43323653994725203, - "grad_norm": 0.9696370363235474, - "learning_rate": 5.667519181585678e-06, - "loss": 0.1371, - "step": 2541 - }, - { - "epoch": 0.43340703838878974, - "grad_norm": 0.8784907460212708, - "learning_rate": 5.665814151747656e-06, - "loss": 0.0928, - "step": 2542 - }, - { - "epoch": 0.4335775368303274, - "grad_norm": 0.9317367076873779, - "learning_rate": 5.664109121909633e-06, - "loss": 0.1148, - "step": 2543 - }, - { - "epoch": 0.4337480352718651, - "grad_norm": 0.8701735734939575, - "learning_rate": 5.662404092071612e-06, - "loss": 0.1485, - "step": 2544 - }, - { - "epoch": 0.4339185337134028, - "grad_norm": 0.946918249130249, - "learning_rate": 5.66069906223359e-06, - "loss": 0.1782, - "step": 2545 - }, - { - "epoch": 0.43408903215494044, - "grad_norm": 1.190557599067688, - "learning_rate": 5.658994032395567e-06, - "loss": 0.1412, - "step": 2546 - }, - { - "epoch": 0.43425953059647815, - "grad_norm": 0.9963315725326538, - "learning_rate": 5.657289002557546e-06, - "loss": 0.1271, - "step": 2547 - }, - { - "epoch": 0.4344300290380158, - "grad_norm": 0.9686456918716431, - "learning_rate": 5.655583972719523e-06, - "loss": 0.1744, - "step": 2548 - }, - { - "epoch": 0.4346005274795535, - "grad_norm": 1.0159608125686646, - "learning_rate": 5.653878942881501e-06, - "loss": 0.1458, - "step": 2549 - }, - { - "epoch": 0.4347710259210912, - "grad_norm": 0.9071111679077148, - "learning_rate": 5.652173913043479e-06, - "loss": 0.1453, - "step": 2550 - }, - { - "epoch": 0.4349415243626289, - "grad_norm": 1.0276676416397095, - "learning_rate": 5.6504688832054566e-06, - "loss": 0.2083, - "step": 2551 - }, - { - "epoch": 0.43511202280416655, - "grad_norm": 0.961588978767395, - "learning_rate": 5.648763853367435e-06, - "loss": 0.113, - "step": 2552 - }, - { - "epoch": 0.4352825212457042, - "grad_norm": 1.0574619770050049, - "learning_rate": 5.6470588235294125e-06, - "loss": 0.07, - "step": 2553 - }, - { - "epoch": 0.4354530196872419, - "grad_norm": 1.0037537813186646, - "learning_rate": 5.64535379369139e-06, - "loss": 0.098, - "step": 2554 - }, - { - "epoch": 0.4356235181287796, - "grad_norm": 1.0496973991394043, - "learning_rate": 5.6436487638533675e-06, - "loss": 0.0868, - "step": 2555 - }, - { - "epoch": 0.4357940165703173, - "grad_norm": 0.7770395874977112, - "learning_rate": 5.641943734015346e-06, - "loss": 0.1122, - "step": 2556 - }, - { - "epoch": 0.43596451501185496, - "grad_norm": 1.2403090000152588, - "learning_rate": 5.640238704177323e-06, - "loss": 0.151, - "step": 2557 - }, - { - "epoch": 0.4361350134533927, - "grad_norm": 0.9569463133811951, - "learning_rate": 5.638533674339301e-06, - "loss": 0.1395, - "step": 2558 - }, - { - "epoch": 0.43630551189493033, - "grad_norm": 1.369671106338501, - "learning_rate": 5.6368286445012784e-06, - "loss": 0.1845, - "step": 2559 - }, - { - "epoch": 0.43647601033646805, - "grad_norm": 1.5948243141174316, - "learning_rate": 5.635123614663257e-06, - "loss": 0.1768, - "step": 2560 - }, - { - "epoch": 0.4366465087780057, - "grad_norm": 0.9856839179992676, - "learning_rate": 5.633418584825235e-06, - "loss": 0.1632, - "step": 2561 - }, - { - "epoch": 0.43681700721954336, - "grad_norm": 1.1320617198944092, - "learning_rate": 5.631713554987212e-06, - "loss": 0.1526, - "step": 2562 - }, - { - "epoch": 0.4369875056610811, - "grad_norm": 0.9488789439201355, - "learning_rate": 5.630008525149191e-06, - "loss": 0.1419, - "step": 2563 - }, - { - "epoch": 0.43715800410261874, - "grad_norm": 1.1834876537322998, - "learning_rate": 5.628303495311168e-06, - "loss": 0.1438, - "step": 2564 - }, - { - "epoch": 0.43732850254415645, - "grad_norm": 0.8948205709457397, - "learning_rate": 5.626598465473146e-06, - "loss": 0.1362, - "step": 2565 - }, - { - "epoch": 0.4374990009856941, - "grad_norm": 0.7960487008094788, - "learning_rate": 5.624893435635124e-06, - "loss": 0.0527, - "step": 2566 - }, - { - "epoch": 0.4376694994272318, - "grad_norm": 1.006632685661316, - "learning_rate": 5.623188405797102e-06, - "loss": 0.0975, - "step": 2567 - }, - { - "epoch": 0.4378399978687695, - "grad_norm": 0.7732202410697937, - "learning_rate": 5.621483375959079e-06, - "loss": 0.0871, - "step": 2568 - }, - { - "epoch": 0.43801049631030714, - "grad_norm": 1.3993794918060303, - "learning_rate": 5.619778346121058e-06, - "loss": 0.12, - "step": 2569 - }, - { - "epoch": 0.43818099475184485, - "grad_norm": 0.6594243049621582, - "learning_rate": 5.618073316283036e-06, - "loss": 0.0496, - "step": 2570 - }, - { - "epoch": 0.4383514931933825, - "grad_norm": 1.2651886940002441, - "learning_rate": 5.616368286445013e-06, - "loss": 0.207, - "step": 2571 - }, - { - "epoch": 0.4385219916349202, - "grad_norm": 1.066919207572937, - "learning_rate": 5.614663256606992e-06, - "loss": 0.1872, - "step": 2572 - }, - { - "epoch": 0.4386924900764579, - "grad_norm": 0.9206414222717285, - "learning_rate": 5.612958226768969e-06, - "loss": 0.1029, - "step": 2573 - }, - { - "epoch": 0.4388629885179956, - "grad_norm": 0.7342972159385681, - "learning_rate": 5.611253196930947e-06, - "loss": 0.0985, - "step": 2574 - }, - { - "epoch": 0.43903348695953326, - "grad_norm": 1.378420352935791, - "learning_rate": 5.609548167092924e-06, - "loss": 0.1871, - "step": 2575 - }, - { - "epoch": 0.4392039854010709, - "grad_norm": 0.7292861938476562, - "learning_rate": 5.607843137254903e-06, - "loss": 0.0928, - "step": 2576 - }, - { - "epoch": 0.43937448384260863, - "grad_norm": 0.9209851026535034, - "learning_rate": 5.606138107416881e-06, - "loss": 0.1098, - "step": 2577 - }, - { - "epoch": 0.4395449822841463, - "grad_norm": 1.275430679321289, - "learning_rate": 5.604433077578858e-06, - "loss": 0.1728, - "step": 2578 - }, - { - "epoch": 0.439715480725684, - "grad_norm": 1.1448216438293457, - "learning_rate": 5.6027280477408366e-06, - "loss": 0.155, - "step": 2579 - }, - { - "epoch": 0.43988597916722166, - "grad_norm": 1.0969963073730469, - "learning_rate": 5.601023017902814e-06, - "loss": 0.1484, - "step": 2580 - }, - { - "epoch": 0.4400564776087594, - "grad_norm": 1.186651349067688, - "learning_rate": 5.599317988064792e-06, - "loss": 0.1065, - "step": 2581 - }, - { - "epoch": 0.44022697605029704, - "grad_norm": 1.65304696559906, - "learning_rate": 5.597612958226769e-06, - "loss": 0.2714, - "step": 2582 - }, - { - "epoch": 0.44039747449183475, - "grad_norm": 0.9879024624824524, - "learning_rate": 5.5959079283887475e-06, - "loss": 0.1222, - "step": 2583 - }, - { - "epoch": 0.4405679729333724, - "grad_norm": 1.8686827421188354, - "learning_rate": 5.594202898550725e-06, - "loss": 0.2354, - "step": 2584 - }, - { - "epoch": 0.44073847137491007, - "grad_norm": 0.6579173803329468, - "learning_rate": 5.5924978687127025e-06, - "loss": 0.1011, - "step": 2585 - }, - { - "epoch": 0.4409089698164478, - "grad_norm": 0.9362009167671204, - "learning_rate": 5.590792838874681e-06, - "loss": 0.0956, - "step": 2586 - }, - { - "epoch": 0.44107946825798544, - "grad_norm": 0.7479416728019714, - "learning_rate": 5.5890878090366584e-06, - "loss": 0.1188, - "step": 2587 - }, - { - "epoch": 0.44124996669952316, - "grad_norm": 1.0462018251419067, - "learning_rate": 5.587382779198636e-06, - "loss": 0.104, - "step": 2588 - }, - { - "epoch": 0.4414204651410608, - "grad_norm": 1.1154561042785645, - "learning_rate": 5.585677749360614e-06, - "loss": 0.1217, - "step": 2589 - }, - { - "epoch": 0.44159096358259853, - "grad_norm": 0.7707915306091309, - "learning_rate": 5.583972719522592e-06, - "loss": 0.1014, - "step": 2590 - }, - { - "epoch": 0.4417614620241362, - "grad_norm": 0.7629173994064331, - "learning_rate": 5.582267689684569e-06, - "loss": 0.11, - "step": 2591 - }, - { - "epoch": 0.44193196046567385, - "grad_norm": 1.3871240615844727, - "learning_rate": 5.580562659846548e-06, - "loss": 0.1768, - "step": 2592 - }, - { - "epoch": 0.44210245890721156, - "grad_norm": 0.8774785399436951, - "learning_rate": 5.578857630008525e-06, - "loss": 0.0447, - "step": 2593 - }, - { - "epoch": 0.4422729573487492, - "grad_norm": 0.794389545917511, - "learning_rate": 5.577152600170503e-06, - "loss": 0.1039, - "step": 2594 - }, - { - "epoch": 0.44244345579028693, - "grad_norm": 0.7697991728782654, - "learning_rate": 5.575447570332482e-06, - "loss": 0.1103, - "step": 2595 - }, - { - "epoch": 0.4426139542318246, - "grad_norm": 0.800696849822998, - "learning_rate": 5.573742540494459e-06, - "loss": 0.0875, - "step": 2596 - }, - { - "epoch": 0.4427844526733623, - "grad_norm": 2.0255141258239746, - "learning_rate": 5.572037510656437e-06, - "loss": 0.2019, - "step": 2597 - }, - { - "epoch": 0.44295495111489996, - "grad_norm": 1.5932087898254395, - "learning_rate": 5.570332480818414e-06, - "loss": 0.1325, - "step": 2598 - }, - { - "epoch": 0.4431254495564376, - "grad_norm": 1.2170913219451904, - "learning_rate": 5.568627450980393e-06, - "loss": 0.1787, - "step": 2599 - }, - { - "epoch": 0.44329594799797534, - "grad_norm": 1.0791867971420288, - "learning_rate": 5.56692242114237e-06, - "loss": 0.1481, - "step": 2600 - }, - { - "epoch": 0.44329594799797534, - "eval_f1_score": 0.30978260869565216, - "eval_loss": 0.14772719144821167, - "eval_runtime": 182.7464, - "eval_samples_per_second": 54.721, - "eval_steps_per_second": 3.42, - "step": 2600 - }, - { - "epoch": 0.443466446439513, - "grad_norm": 1.393898367881775, - "learning_rate": 5.565217391304348e-06, - "loss": 0.1351, - "step": 2601 - }, - { - "epoch": 0.4436369448810507, - "grad_norm": 1.1062179803848267, - "learning_rate": 5.563512361466326e-06, - "loss": 0.098, - "step": 2602 - }, - { - "epoch": 0.44380744332258837, - "grad_norm": 0.7602462768554688, - "learning_rate": 5.561807331628304e-06, - "loss": 0.0767, - "step": 2603 - }, - { - "epoch": 0.4439779417641261, - "grad_norm": 0.8465493321418762, - "learning_rate": 5.560102301790282e-06, - "loss": 0.0802, - "step": 2604 - }, - { - "epoch": 0.44414844020566374, - "grad_norm": 1.0767474174499512, - "learning_rate": 5.55839727195226e-06, - "loss": 0.0754, - "step": 2605 - }, - { - "epoch": 0.4443189386472014, - "grad_norm": 1.0401229858398438, - "learning_rate": 5.556692242114238e-06, - "loss": 0.1615, - "step": 2606 - }, - { - "epoch": 0.4444894370887391, - "grad_norm": 1.0522851943969727, - "learning_rate": 5.554987212276215e-06, - "loss": 0.1591, - "step": 2607 - }, - { - "epoch": 0.4446599355302768, - "grad_norm": 1.0689300298690796, - "learning_rate": 5.553282182438194e-06, - "loss": 0.1307, - "step": 2608 - }, - { - "epoch": 0.4448304339718145, - "grad_norm": 0.6239145398139954, - "learning_rate": 5.551577152600171e-06, - "loss": 0.0851, - "step": 2609 - }, - { - "epoch": 0.44500093241335215, - "grad_norm": 1.2811641693115234, - "learning_rate": 5.549872122762149e-06, - "loss": 0.2046, - "step": 2610 - }, - { - "epoch": 0.44517143085488986, - "grad_norm": 0.8296309113502502, - "learning_rate": 5.5481670929241275e-06, - "loss": 0.0499, - "step": 2611 - }, - { - "epoch": 0.4453419292964275, - "grad_norm": 1.7726424932479858, - "learning_rate": 5.546462063086105e-06, - "loss": 0.069, - "step": 2612 - }, - { - "epoch": 0.44551242773796523, - "grad_norm": 0.927315890789032, - "learning_rate": 5.5447570332480825e-06, - "loss": 0.0817, - "step": 2613 - }, - { - "epoch": 0.4456829261795029, - "grad_norm": 0.8551962375640869, - "learning_rate": 5.54305200341006e-06, - "loss": 0.0736, - "step": 2614 - }, - { - "epoch": 0.44585342462104055, - "grad_norm": 0.9609452486038208, - "learning_rate": 5.5413469735720384e-06, - "loss": 0.1334, - "step": 2615 - }, - { - "epoch": 0.44602392306257826, - "grad_norm": 0.8067399263381958, - "learning_rate": 5.5396419437340155e-06, - "loss": 0.1182, - "step": 2616 - }, - { - "epoch": 0.4461944215041159, - "grad_norm": 1.0610885620117188, - "learning_rate": 5.5379369138959935e-06, - "loss": 0.1351, - "step": 2617 - }, - { - "epoch": 0.44636491994565364, - "grad_norm": 1.2180925607681274, - "learning_rate": 5.536231884057971e-06, - "loss": 0.1059, - "step": 2618 - }, - { - "epoch": 0.4465354183871913, - "grad_norm": 0.5902736186981201, - "learning_rate": 5.534526854219949e-06, - "loss": 0.0571, - "step": 2619 - }, - { - "epoch": 0.446705916828729, - "grad_norm": 1.0947083234786987, - "learning_rate": 5.532821824381927e-06, - "loss": 0.1059, - "step": 2620 - }, - { - "epoch": 0.44687641527026667, - "grad_norm": 0.7008499503135681, - "learning_rate": 5.5311167945439044e-06, - "loss": 0.0926, - "step": 2621 - }, - { - "epoch": 0.44704691371180433, - "grad_norm": 1.785274863243103, - "learning_rate": 5.529411764705883e-06, - "loss": 0.2751, - "step": 2622 - }, - { - "epoch": 0.44721741215334204, - "grad_norm": 0.6739360690116882, - "learning_rate": 5.52770673486786e-06, - "loss": 0.0731, - "step": 2623 - }, - { - "epoch": 0.4473879105948797, - "grad_norm": 0.648942768573761, - "learning_rate": 5.526001705029838e-06, - "loss": 0.0356, - "step": 2624 - }, - { - "epoch": 0.4475584090364174, - "grad_norm": 0.6886118054389954, - "learning_rate": 5.524296675191816e-06, - "loss": 0.0757, - "step": 2625 - }, - { - "epoch": 0.4477289074779551, - "grad_norm": 0.6833609938621521, - "learning_rate": 5.522591645353794e-06, - "loss": 0.0329, - "step": 2626 - }, - { - "epoch": 0.4478994059194928, - "grad_norm": 1.5278652906417847, - "learning_rate": 5.520886615515771e-06, - "loss": 0.2367, - "step": 2627 - }, - { - "epoch": 0.44806990436103045, - "grad_norm": 0.5609342455863953, - "learning_rate": 5.51918158567775e-06, - "loss": 0.0851, - "step": 2628 - }, - { - "epoch": 0.4482404028025681, - "grad_norm": 1.545210361480713, - "learning_rate": 5.517476555839728e-06, - "loss": 0.1495, - "step": 2629 - }, - { - "epoch": 0.4484109012441058, - "grad_norm": 0.9157249927520752, - "learning_rate": 5.515771526001705e-06, - "loss": 0.1217, - "step": 2630 - }, - { - "epoch": 0.4485813996856435, - "grad_norm": 0.5781745910644531, - "learning_rate": 5.514066496163684e-06, - "loss": 0.0635, - "step": 2631 - }, - { - "epoch": 0.4487518981271812, - "grad_norm": 1.195381760597229, - "learning_rate": 5.512361466325661e-06, - "loss": 0.1764, - "step": 2632 - }, - { - "epoch": 0.44892239656871885, - "grad_norm": 1.0727818012237549, - "learning_rate": 5.510656436487639e-06, - "loss": 0.1179, - "step": 2633 - }, - { - "epoch": 0.44909289501025657, - "grad_norm": 1.5280828475952148, - "learning_rate": 5.508951406649616e-06, - "loss": 0.1587, - "step": 2634 - }, - { - "epoch": 0.4492633934517942, - "grad_norm": 1.2746130228042603, - "learning_rate": 5.507246376811595e-06, - "loss": 0.1229, - "step": 2635 - }, - { - "epoch": 0.44943389189333194, - "grad_norm": 0.7133687734603882, - "learning_rate": 5.505541346973573e-06, - "loss": 0.0498, - "step": 2636 - }, - { - "epoch": 0.4496043903348696, - "grad_norm": 0.8298734426498413, - "learning_rate": 5.50383631713555e-06, - "loss": 0.0974, - "step": 2637 - }, - { - "epoch": 0.44977488877640726, - "grad_norm": 0.7758700847625732, - "learning_rate": 5.502131287297529e-06, - "loss": 0.111, - "step": 2638 - }, - { - "epoch": 0.44994538721794497, - "grad_norm": 1.034207820892334, - "learning_rate": 5.500426257459506e-06, - "loss": 0.0883, - "step": 2639 - }, - { - "epoch": 0.45011588565948263, - "grad_norm": 0.9524661898612976, - "learning_rate": 5.498721227621484e-06, - "loss": 0.1076, - "step": 2640 - }, - { - "epoch": 0.45028638410102034, - "grad_norm": 1.1032789945602417, - "learning_rate": 5.497016197783462e-06, - "loss": 0.1365, - "step": 2641 - }, - { - "epoch": 0.450456882542558, - "grad_norm": 1.4389866590499878, - "learning_rate": 5.49531116794544e-06, - "loss": 0.0961, - "step": 2642 - }, - { - "epoch": 0.4506273809840957, - "grad_norm": 1.1861354112625122, - "learning_rate": 5.493606138107417e-06, - "loss": 0.1775, - "step": 2643 - }, - { - "epoch": 0.4507978794256334, - "grad_norm": 2.20349383354187, - "learning_rate": 5.4919011082693955e-06, - "loss": 0.245, - "step": 2644 - }, - { - "epoch": 0.45096837786717103, - "grad_norm": 0.9795979857444763, - "learning_rate": 5.4901960784313735e-06, - "loss": 0.1968, - "step": 2645 - }, - { - "epoch": 0.45113887630870875, - "grad_norm": 1.4784023761749268, - "learning_rate": 5.488491048593351e-06, - "loss": 0.2209, - "step": 2646 - }, - { - "epoch": 0.4513093747502464, - "grad_norm": 1.928300380706787, - "learning_rate": 5.486786018755329e-06, - "loss": 0.1449, - "step": 2647 - }, - { - "epoch": 0.4514798731917841, - "grad_norm": 1.053138017654419, - "learning_rate": 5.4850809889173065e-06, - "loss": 0.1044, - "step": 2648 - }, - { - "epoch": 0.4516503716333218, - "grad_norm": 1.2316356897354126, - "learning_rate": 5.4833759590792844e-06, - "loss": 0.149, - "step": 2649 - }, - { - "epoch": 0.4518208700748595, - "grad_norm": 0.7047519087791443, - "learning_rate": 5.4816709292412615e-06, - "loss": 0.0785, - "step": 2650 - }, - { - "epoch": 0.45199136851639715, - "grad_norm": 1.0934659242630005, - "learning_rate": 5.47996589940324e-06, - "loss": 0.1481, - "step": 2651 - }, - { - "epoch": 0.4521618669579348, - "grad_norm": 1.102778673171997, - "learning_rate": 5.478260869565217e-06, - "loss": 0.2052, - "step": 2652 - }, - { - "epoch": 0.4523323653994725, - "grad_norm": 1.1186070442199707, - "learning_rate": 5.476555839727195e-06, - "loss": 0.1752, - "step": 2653 - }, - { - "epoch": 0.4525028638410102, - "grad_norm": 1.0531350374221802, - "learning_rate": 5.474850809889174e-06, - "loss": 0.0994, - "step": 2654 - }, - { - "epoch": 0.4526733622825479, - "grad_norm": 1.003851294517517, - "learning_rate": 5.473145780051151e-06, - "loss": 0.1072, - "step": 2655 - }, - { - "epoch": 0.45284386072408556, - "grad_norm": 1.0673835277557373, - "learning_rate": 5.471440750213129e-06, - "loss": 0.1134, - "step": 2656 - }, - { - "epoch": 0.45301435916562327, - "grad_norm": 1.0477086305618286, - "learning_rate": 5.469735720375106e-06, - "loss": 0.0982, - "step": 2657 - }, - { - "epoch": 0.45318485760716093, - "grad_norm": 1.4844733476638794, - "learning_rate": 5.468030690537085e-06, - "loss": 0.1586, - "step": 2658 - }, - { - "epoch": 0.45335535604869864, - "grad_norm": 0.7507916688919067, - "learning_rate": 5.466325660699062e-06, - "loss": 0.1154, - "step": 2659 - }, - { - "epoch": 0.4535258544902363, - "grad_norm": 0.8093088269233704, - "learning_rate": 5.46462063086104e-06, - "loss": 0.088, - "step": 2660 - }, - { - "epoch": 0.45369635293177396, - "grad_norm": 0.9286861419677734, - "learning_rate": 5.462915601023019e-06, - "loss": 0.121, - "step": 2661 - }, - { - "epoch": 0.4538668513733117, - "grad_norm": 1.1589971780776978, - "learning_rate": 5.461210571184996e-06, - "loss": 0.1477, - "step": 2662 - }, - { - "epoch": 0.45403734981484933, - "grad_norm": 1.4251331090927124, - "learning_rate": 5.459505541346974e-06, - "loss": 0.1629, - "step": 2663 - }, - { - "epoch": 0.45420784825638705, - "grad_norm": 0.899058997631073, - "learning_rate": 5.457800511508952e-06, - "loss": 0.1363, - "step": 2664 - }, - { - "epoch": 0.4543783466979247, - "grad_norm": 0.684359073638916, - "learning_rate": 5.45609548167093e-06, - "loss": 0.0957, - "step": 2665 - }, - { - "epoch": 0.4545488451394624, - "grad_norm": 1.1812783479690552, - "learning_rate": 5.454390451832907e-06, - "loss": 0.1239, - "step": 2666 - }, - { - "epoch": 0.4547193435810001, - "grad_norm": 0.7902812361717224, - "learning_rate": 5.452685421994886e-06, - "loss": 0.1446, - "step": 2667 - }, - { - "epoch": 0.45488984202253774, - "grad_norm": 0.7050306797027588, - "learning_rate": 5.450980392156863e-06, - "loss": 0.065, - "step": 2668 - }, - { - "epoch": 0.45506034046407545, - "grad_norm": 0.608092725276947, - "learning_rate": 5.449275362318841e-06, - "loss": 0.0506, - "step": 2669 - }, - { - "epoch": 0.4552308389056131, - "grad_norm": 0.7197535037994385, - "learning_rate": 5.44757033248082e-06, - "loss": 0.0762, - "step": 2670 - }, - { - "epoch": 0.4554013373471508, - "grad_norm": 1.2062654495239258, - "learning_rate": 5.445865302642797e-06, - "loss": 0.1647, - "step": 2671 - }, - { - "epoch": 0.4555718357886885, - "grad_norm": 1.2899487018585205, - "learning_rate": 5.444160272804775e-06, - "loss": 0.1509, - "step": 2672 - }, - { - "epoch": 0.4557423342302262, - "grad_norm": 0.7482165098190308, - "learning_rate": 5.442455242966752e-06, - "loss": 0.1098, - "step": 2673 - }, - { - "epoch": 0.45591283267176386, - "grad_norm": 0.8238338232040405, - "learning_rate": 5.4407502131287306e-06, - "loss": 0.071, - "step": 2674 - }, - { - "epoch": 0.4560833311133015, - "grad_norm": 0.6520843505859375, - "learning_rate": 5.439045183290708e-06, - "loss": 0.062, - "step": 2675 - }, - { - "epoch": 0.45625382955483923, - "grad_norm": 1.111775279045105, - "learning_rate": 5.437340153452686e-06, - "loss": 0.1101, - "step": 2676 - }, - { - "epoch": 0.4564243279963769, - "grad_norm": 0.7632780075073242, - "learning_rate": 5.4356351236146636e-06, - "loss": 0.0655, - "step": 2677 - }, - { - "epoch": 0.4565948264379146, - "grad_norm": 1.4325367212295532, - "learning_rate": 5.4339300937766415e-06, - "loss": 0.1902, - "step": 2678 - }, - { - "epoch": 0.45676532487945226, - "grad_norm": 2.3953022956848145, - "learning_rate": 5.4322250639386195e-06, - "loss": 0.2011, - "step": 2679 - }, - { - "epoch": 0.45693582332099, - "grad_norm": 0.8128249049186707, - "learning_rate": 5.430520034100597e-06, - "loss": 0.1284, - "step": 2680 - }, - { - "epoch": 0.45710632176252763, - "grad_norm": 1.0645301342010498, - "learning_rate": 5.428815004262575e-06, - "loss": 0.1573, - "step": 2681 - }, - { - "epoch": 0.45727682020406535, - "grad_norm": 0.9294531345367432, - "learning_rate": 5.4271099744245525e-06, - "loss": 0.1008, - "step": 2682 - }, - { - "epoch": 0.457447318645603, - "grad_norm": 0.8018505573272705, - "learning_rate": 5.425404944586531e-06, - "loss": 0.0975, - "step": 2683 - }, - { - "epoch": 0.45761781708714067, - "grad_norm": 1.1697618961334229, - "learning_rate": 5.423699914748508e-06, - "loss": 0.1187, - "step": 2684 - }, - { - "epoch": 0.4577883155286784, - "grad_norm": 0.8472014665603638, - "learning_rate": 5.421994884910486e-06, - "loss": 0.1486, - "step": 2685 - }, - { - "epoch": 0.45795881397021604, - "grad_norm": 0.9629762768745422, - "learning_rate": 5.420289855072465e-06, - "loss": 0.1444, - "step": 2686 - }, - { - "epoch": 0.45812931241175375, - "grad_norm": 0.8507773876190186, - "learning_rate": 5.418584825234442e-06, - "loss": 0.1284, - "step": 2687 - }, - { - "epoch": 0.4582998108532914, - "grad_norm": 0.8529912829399109, - "learning_rate": 5.41687979539642e-06, - "loss": 0.115, - "step": 2688 - }, - { - "epoch": 0.4584703092948291, - "grad_norm": 0.6989152431488037, - "learning_rate": 5.415174765558397e-06, - "loss": 0.0956, - "step": 2689 - }, - { - "epoch": 0.4586408077363668, - "grad_norm": 1.049714207649231, - "learning_rate": 5.413469735720376e-06, - "loss": 0.157, - "step": 2690 - }, - { - "epoch": 0.45881130617790444, - "grad_norm": 1.126670002937317, - "learning_rate": 5.411764705882353e-06, - "loss": 0.1814, - "step": 2691 - }, - { - "epoch": 0.45898180461944216, - "grad_norm": 0.8889474868774414, - "learning_rate": 5.410059676044331e-06, - "loss": 0.0571, - "step": 2692 - }, - { - "epoch": 0.4591523030609798, - "grad_norm": 1.0496242046356201, - "learning_rate": 5.408354646206308e-06, - "loss": 0.1473, - "step": 2693 - }, - { - "epoch": 0.45932280150251753, - "grad_norm": 1.1733475923538208, - "learning_rate": 5.406649616368287e-06, - "loss": 0.1563, - "step": 2694 - }, - { - "epoch": 0.4594932999440552, - "grad_norm": 1.4026211500167847, - "learning_rate": 5.404944586530265e-06, - "loss": 0.2016, - "step": 2695 - }, - { - "epoch": 0.4596637983855929, - "grad_norm": 1.75924813747406, - "learning_rate": 5.403239556692242e-06, - "loss": 0.28, - "step": 2696 - }, - { - "epoch": 0.45983429682713056, - "grad_norm": 0.8990041613578796, - "learning_rate": 5.401534526854221e-06, - "loss": 0.1298, - "step": 2697 - }, - { - "epoch": 0.4600047952686682, - "grad_norm": 1.098027229309082, - "learning_rate": 5.399829497016198e-06, - "loss": 0.1223, - "step": 2698 - }, - { - "epoch": 0.46017529371020593, - "grad_norm": 1.0936179161071777, - "learning_rate": 5.398124467178176e-06, - "loss": 0.1047, - "step": 2699 - }, - { - "epoch": 0.4603457921517436, - "grad_norm": 0.7299230098724365, - "learning_rate": 5.396419437340154e-06, - "loss": 0.117, - "step": 2700 - }, - { - "epoch": 0.4603457921517436, - "eval_f1_score": 0.2802197802197802, - "eval_loss": 0.1476231813430786, - "eval_runtime": 182.6762, - "eval_samples_per_second": 54.742, - "eval_steps_per_second": 3.421, - "step": 2700 - }, - { - "epoch": 0.4605162905932813, - "grad_norm": 0.8915308713912964, - "learning_rate": 5.394714407502132e-06, - "loss": 0.1093, - "step": 2701 - }, - { - "epoch": 0.46068678903481897, - "grad_norm": 1.1240203380584717, - "learning_rate": 5.393009377664109e-06, - "loss": 0.1667, - "step": 2702 - }, - { - "epoch": 0.4608572874763567, - "grad_norm": 0.9414085149765015, - "learning_rate": 5.391304347826088e-06, - "loss": 0.1606, - "step": 2703 - }, - { - "epoch": 0.46102778591789434, - "grad_norm": 0.9260140657424927, - "learning_rate": 5.389599317988066e-06, - "loss": 0.0523, - "step": 2704 - }, - { - "epoch": 0.46119828435943205, - "grad_norm": 0.8883020877838135, - "learning_rate": 5.387894288150043e-06, - "loss": 0.0896, - "step": 2705 - }, - { - "epoch": 0.4613687828009697, - "grad_norm": 0.8151699304580688, - "learning_rate": 5.3861892583120215e-06, - "loss": 0.1237, - "step": 2706 - }, - { - "epoch": 0.46153928124250737, - "grad_norm": 0.612257182598114, - "learning_rate": 5.384484228473999e-06, - "loss": 0.052, - "step": 2707 - }, - { - "epoch": 0.4617097796840451, - "grad_norm": 0.8854296207427979, - "learning_rate": 5.3827791986359766e-06, - "loss": 0.1263, - "step": 2708 - }, - { - "epoch": 0.46188027812558274, - "grad_norm": 0.6729491949081421, - "learning_rate": 5.381074168797954e-06, - "loss": 0.1134, - "step": 2709 - }, - { - "epoch": 0.46205077656712046, - "grad_norm": 0.995721697807312, - "learning_rate": 5.3793691389599325e-06, - "loss": 0.1188, - "step": 2710 - }, - { - "epoch": 0.4622212750086581, - "grad_norm": 2.032381772994995, - "learning_rate": 5.37766410912191e-06, - "loss": 0.2624, - "step": 2711 - }, - { - "epoch": 0.46239177345019583, - "grad_norm": 0.7103998064994812, - "learning_rate": 5.3759590792838875e-06, - "loss": 0.0889, - "step": 2712 - }, - { - "epoch": 0.4625622718917335, - "grad_norm": 1.155537724494934, - "learning_rate": 5.374254049445866e-06, - "loss": 0.1714, - "step": 2713 - }, - { - "epoch": 0.46273277033327115, - "grad_norm": 1.4818527698516846, - "learning_rate": 5.372549019607843e-06, - "loss": 0.1816, - "step": 2714 - }, - { - "epoch": 0.46290326877480886, - "grad_norm": 0.5952757000923157, - "learning_rate": 5.370843989769821e-06, - "loss": 0.0669, - "step": 2715 - }, - { - "epoch": 0.4630737672163465, - "grad_norm": 0.9609639644622803, - "learning_rate": 5.369138959931799e-06, - "loss": 0.1793, - "step": 2716 - }, - { - "epoch": 0.46324426565788424, - "grad_norm": 1.3182636499404907, - "learning_rate": 5.367433930093777e-06, - "loss": 0.1719, - "step": 2717 - }, - { - "epoch": 0.4634147640994219, - "grad_norm": 0.7964705228805542, - "learning_rate": 5.365728900255754e-06, - "loss": 0.1311, - "step": 2718 - }, - { - "epoch": 0.4635852625409596, - "grad_norm": 0.6149096488952637, - "learning_rate": 5.364023870417733e-06, - "loss": 0.0855, - "step": 2719 - }, - { - "epoch": 0.46375576098249727, - "grad_norm": 1.1285103559494019, - "learning_rate": 5.362318840579711e-06, - "loss": 0.1798, - "step": 2720 - }, - { - "epoch": 0.4639262594240349, - "grad_norm": 0.6177775263786316, - "learning_rate": 5.360613810741688e-06, - "loss": 0.1305, - "step": 2721 - }, - { - "epoch": 0.46409675786557264, - "grad_norm": 1.480620265007019, - "learning_rate": 5.358908780903667e-06, - "loss": 0.1964, - "step": 2722 - }, - { - "epoch": 0.4642672563071103, - "grad_norm": 0.7413175702095032, - "learning_rate": 5.357203751065644e-06, - "loss": 0.1041, - "step": 2723 - }, - { - "epoch": 0.464437754748648, - "grad_norm": 0.8490052223205566, - "learning_rate": 5.355498721227622e-06, - "loss": 0.1172, - "step": 2724 - }, - { - "epoch": 0.46460825319018567, - "grad_norm": 0.7542960047721863, - "learning_rate": 5.353793691389599e-06, - "loss": 0.103, - "step": 2725 - }, - { - "epoch": 0.4647787516317234, - "grad_norm": 1.2431647777557373, - "learning_rate": 5.352088661551578e-06, - "loss": 0.2465, - "step": 2726 - }, - { - "epoch": 0.46494925007326104, - "grad_norm": 1.1575838327407837, - "learning_rate": 5.350383631713555e-06, - "loss": 0.1723, - "step": 2727 - }, - { - "epoch": 0.46511974851479876, - "grad_norm": 0.9394764304161072, - "learning_rate": 5.348678601875533e-06, - "loss": 0.0999, - "step": 2728 - }, - { - "epoch": 0.4652902469563364, - "grad_norm": 1.261762261390686, - "learning_rate": 5.346973572037512e-06, - "loss": 0.1649, - "step": 2729 - }, - { - "epoch": 0.4654607453978741, - "grad_norm": 0.990092933177948, - "learning_rate": 5.345268542199489e-06, - "loss": 0.1377, - "step": 2730 - }, - { - "epoch": 0.4656312438394118, - "grad_norm": 0.90677410364151, - "learning_rate": 5.343563512361467e-06, - "loss": 0.1167, - "step": 2731 - }, - { - "epoch": 0.46580174228094945, - "grad_norm": 0.9878215789794922, - "learning_rate": 5.341858482523444e-06, - "loss": 0.1888, - "step": 2732 - }, - { - "epoch": 0.46597224072248716, - "grad_norm": 0.8524580001831055, - "learning_rate": 5.340153452685423e-06, - "loss": 0.0938, - "step": 2733 - }, - { - "epoch": 0.4661427391640248, - "grad_norm": 1.0039318799972534, - "learning_rate": 5.3384484228474e-06, - "loss": 0.061, - "step": 2734 - }, - { - "epoch": 0.46631323760556254, - "grad_norm": 0.7102029323577881, - "learning_rate": 5.336743393009378e-06, - "loss": 0.0841, - "step": 2735 - }, - { - "epoch": 0.4664837360471002, - "grad_norm": 0.9649211168289185, - "learning_rate": 5.3350383631713566e-06, - "loss": 0.1681, - "step": 2736 - }, - { - "epoch": 0.46665423448863785, - "grad_norm": 1.3707305192947388, - "learning_rate": 5.333333333333334e-06, - "loss": 0.1815, - "step": 2737 - }, - { - "epoch": 0.46682473293017557, - "grad_norm": 1.0654339790344238, - "learning_rate": 5.331628303495312e-06, - "loss": 0.1352, - "step": 2738 - }, - { - "epoch": 0.4669952313717132, - "grad_norm": 0.7589823603630066, - "learning_rate": 5.3299232736572896e-06, - "loss": 0.0867, - "step": 2739 - }, - { - "epoch": 0.46716572981325094, - "grad_norm": 1.3560172319412231, - "learning_rate": 5.3282182438192675e-06, - "loss": 0.1688, - "step": 2740 - }, - { - "epoch": 0.4673362282547886, - "grad_norm": 1.1395232677459717, - "learning_rate": 5.326513213981245e-06, - "loss": 0.1397, - "step": 2741 - }, - { - "epoch": 0.4675067266963263, - "grad_norm": 1.3840816020965576, - "learning_rate": 5.324808184143223e-06, - "loss": 0.1409, - "step": 2742 - }, - { - "epoch": 0.46767722513786397, - "grad_norm": 0.9683536887168884, - "learning_rate": 5.3231031543052005e-06, - "loss": 0.0896, - "step": 2743 - }, - { - "epoch": 0.46784772357940163, - "grad_norm": 1.3210676908493042, - "learning_rate": 5.3213981244671784e-06, - "loss": 0.1244, - "step": 2744 - }, - { - "epoch": 0.46801822202093935, - "grad_norm": 0.9296742677688599, - "learning_rate": 5.319693094629157e-06, - "loss": 0.1292, - "step": 2745 - }, - { - "epoch": 0.468188720462477, - "grad_norm": 0.8764903545379639, - "learning_rate": 5.317988064791134e-06, - "loss": 0.1354, - "step": 2746 - }, - { - "epoch": 0.4683592189040147, - "grad_norm": 1.4558213949203491, - "learning_rate": 5.316283034953112e-06, - "loss": 0.2131, - "step": 2747 - }, - { - "epoch": 0.4685297173455524, - "grad_norm": 0.6826790571212769, - "learning_rate": 5.314578005115089e-06, - "loss": 0.0777, - "step": 2748 - }, - { - "epoch": 0.4687002157870901, - "grad_norm": 1.1762099266052246, - "learning_rate": 5.312872975277068e-06, - "loss": 0.0992, - "step": 2749 - }, - { - "epoch": 0.46887071422862775, - "grad_norm": 0.9933898448944092, - "learning_rate": 5.311167945439045e-06, - "loss": 0.0883, - "step": 2750 - }, - { - "epoch": 0.46904121267016546, - "grad_norm": 0.7758609056472778, - "learning_rate": 5.309462915601023e-06, - "loss": 0.0931, - "step": 2751 - }, - { - "epoch": 0.4692117111117031, - "grad_norm": 1.1159803867340088, - "learning_rate": 5.307757885763001e-06, - "loss": 0.1881, - "step": 2752 - }, - { - "epoch": 0.4693822095532408, - "grad_norm": 1.1558820009231567, - "learning_rate": 5.306052855924979e-06, - "loss": 0.1709, - "step": 2753 - }, - { - "epoch": 0.4695527079947785, - "grad_norm": 1.1873338222503662, - "learning_rate": 5.304347826086957e-06, - "loss": 0.1503, - "step": 2754 - }, - { - "epoch": 0.46972320643631615, - "grad_norm": 1.2646960020065308, - "learning_rate": 5.302642796248935e-06, - "loss": 0.0855, - "step": 2755 - }, - { - "epoch": 0.46989370487785387, - "grad_norm": 1.0196788311004639, - "learning_rate": 5.300937766410913e-06, - "loss": 0.1259, - "step": 2756 - }, - { - "epoch": 0.4700642033193915, - "grad_norm": 0.7964122891426086, - "learning_rate": 5.29923273657289e-06, - "loss": 0.0569, - "step": 2757 - }, - { - "epoch": 0.47023470176092924, - "grad_norm": 0.9101222157478333, - "learning_rate": 5.297527706734869e-06, - "loss": 0.1175, - "step": 2758 - }, - { - "epoch": 0.4704052002024669, - "grad_norm": 1.1495468616485596, - "learning_rate": 5.295822676896846e-06, - "loss": 0.1371, - "step": 2759 - }, - { - "epoch": 0.47057569864400456, - "grad_norm": 1.1147866249084473, - "learning_rate": 5.294117647058824e-06, - "loss": 0.1404, - "step": 2760 - }, - { - "epoch": 0.4707461970855423, - "grad_norm": 0.897382378578186, - "learning_rate": 5.292412617220803e-06, - "loss": 0.0796, - "step": 2761 - }, - { - "epoch": 0.47091669552707993, - "grad_norm": 1.366015911102295, - "learning_rate": 5.29070758738278e-06, - "loss": 0.1658, - "step": 2762 - }, - { - "epoch": 0.47108719396861765, - "grad_norm": 1.143560528755188, - "learning_rate": 5.289002557544758e-06, - "loss": 0.1538, - "step": 2763 - }, - { - "epoch": 0.4712576924101553, - "grad_norm": 0.8982717394828796, - "learning_rate": 5.287297527706735e-06, - "loss": 0.1504, - "step": 2764 - }, - { - "epoch": 0.471428190851693, - "grad_norm": 0.8663648366928101, - "learning_rate": 5.285592497868714e-06, - "loss": 0.0669, - "step": 2765 - }, - { - "epoch": 0.4715986892932307, - "grad_norm": 1.088739037513733, - "learning_rate": 5.283887468030691e-06, - "loss": 0.1432, - "step": 2766 - }, - { - "epoch": 0.47176918773476834, - "grad_norm": 1.2358800172805786, - "learning_rate": 5.282182438192669e-06, - "loss": 0.0928, - "step": 2767 - }, - { - "epoch": 0.47193968617630605, - "grad_norm": 1.063958764076233, - "learning_rate": 5.280477408354646e-06, - "loss": 0.1624, - "step": 2768 - }, - { - "epoch": 0.4721101846178437, - "grad_norm": 1.3645049333572388, - "learning_rate": 5.278772378516625e-06, - "loss": 0.1668, - "step": 2769 - }, - { - "epoch": 0.4722806830593814, - "grad_norm": 1.0899858474731445, - "learning_rate": 5.2770673486786025e-06, - "loss": 0.1343, - "step": 2770 - }, - { - "epoch": 0.4724511815009191, - "grad_norm": 1.2116706371307373, - "learning_rate": 5.27536231884058e-06, - "loss": 0.1919, - "step": 2771 - }, - { - "epoch": 0.4726216799424568, - "grad_norm": 1.0772473812103271, - "learning_rate": 5.2736572890025584e-06, - "loss": 0.1613, - "step": 2772 - }, - { - "epoch": 0.47279217838399445, - "grad_norm": 0.9527348279953003, - "learning_rate": 5.2719522591645355e-06, - "loss": 0.1292, - "step": 2773 - }, - { - "epoch": 0.47296267682553217, - "grad_norm": 1.2668654918670654, - "learning_rate": 5.2702472293265135e-06, - "loss": 0.2069, - "step": 2774 - }, - { - "epoch": 0.4731331752670698, - "grad_norm": 1.488873839378357, - "learning_rate": 5.2685421994884914e-06, - "loss": 0.1628, - "step": 2775 - }, - { - "epoch": 0.4733036737086075, - "grad_norm": 0.842333972454071, - "learning_rate": 5.266837169650469e-06, - "loss": 0.1079, - "step": 2776 - }, - { - "epoch": 0.4734741721501452, - "grad_norm": 0.8590933680534363, - "learning_rate": 5.2651321398124465e-06, - "loss": 0.1045, - "step": 2777 - }, - { - "epoch": 0.47364467059168286, - "grad_norm": 1.0166113376617432, - "learning_rate": 5.263427109974425e-06, - "loss": 0.1297, - "step": 2778 - }, - { - "epoch": 0.4738151690332206, - "grad_norm": 1.1054891347885132, - "learning_rate": 5.261722080136403e-06, - "loss": 0.0694, - "step": 2779 - }, - { - "epoch": 0.47398566747475823, - "grad_norm": 1.1980565786361694, - "learning_rate": 5.26001705029838e-06, - "loss": 0.1124, - "step": 2780 - }, - { - "epoch": 0.47415616591629595, - "grad_norm": 0.9477231502532959, - "learning_rate": 5.258312020460359e-06, - "loss": 0.1674, - "step": 2781 - }, - { - "epoch": 0.4743266643578336, - "grad_norm": 1.079034686088562, - "learning_rate": 5.256606990622336e-06, - "loss": 0.1674, - "step": 2782 - }, - { - "epoch": 0.47449716279937126, - "grad_norm": 1.0305124521255493, - "learning_rate": 5.254901960784314e-06, - "loss": 0.1413, - "step": 2783 - }, - { - "epoch": 0.474667661240909, - "grad_norm": 0.8747472763061523, - "learning_rate": 5.253196930946291e-06, - "loss": 0.1355, - "step": 2784 - }, - { - "epoch": 0.47483815968244664, - "grad_norm": 0.9253730177879333, - "learning_rate": 5.25149190110827e-06, - "loss": 0.1316, - "step": 2785 - }, - { - "epoch": 0.47500865812398435, - "grad_norm": 0.8443253636360168, - "learning_rate": 5.249786871270247e-06, - "loss": 0.1079, - "step": 2786 - }, - { - "epoch": 0.475179156565522, - "grad_norm": 1.0115752220153809, - "learning_rate": 5.248081841432225e-06, - "loss": 0.1634, - "step": 2787 - }, - { - "epoch": 0.4753496550070597, - "grad_norm": 0.876071572303772, - "learning_rate": 5.246376811594204e-06, - "loss": 0.09, - "step": 2788 - }, - { - "epoch": 0.4755201534485974, - "grad_norm": 0.644968569278717, - "learning_rate": 5.244671781756181e-06, - "loss": 0.0792, - "step": 2789 - }, - { - "epoch": 0.47569065189013504, - "grad_norm": 0.8539202213287354, - "learning_rate": 5.242966751918159e-06, - "loss": 0.11, - "step": 2790 - }, - { - "epoch": 0.47586115033167276, - "grad_norm": 1.7425650358200073, - "learning_rate": 5.241261722080137e-06, - "loss": 0.2118, - "step": 2791 - }, - { - "epoch": 0.4760316487732104, - "grad_norm": 1.3849416971206665, - "learning_rate": 5.239556692242115e-06, - "loss": 0.1975, - "step": 2792 - }, - { - "epoch": 0.47620214721474813, - "grad_norm": 0.8895875215530396, - "learning_rate": 5.237851662404092e-06, - "loss": 0.1633, - "step": 2793 - }, - { - "epoch": 0.4763726456562858, - "grad_norm": 0.7971814870834351, - "learning_rate": 5.236146632566071e-06, - "loss": 0.0991, - "step": 2794 - }, - { - "epoch": 0.4765431440978235, - "grad_norm": 1.0921452045440674, - "learning_rate": 5.234441602728049e-06, - "loss": 0.1087, - "step": 2795 - }, - { - "epoch": 0.47671364253936116, - "grad_norm": 1.0986230373382568, - "learning_rate": 5.232736572890026e-06, - "loss": 0.1827, - "step": 2796 - }, - { - "epoch": 0.4768841409808989, - "grad_norm": 1.2531445026397705, - "learning_rate": 5.231031543052005e-06, - "loss": 0.2311, - "step": 2797 - }, - { - "epoch": 0.47705463942243653, - "grad_norm": 0.633826732635498, - "learning_rate": 5.229326513213982e-06, - "loss": 0.0877, - "step": 2798 - }, - { - "epoch": 0.4772251378639742, - "grad_norm": 1.069523811340332, - "learning_rate": 5.22762148337596e-06, - "loss": 0.1876, - "step": 2799 - }, - { - "epoch": 0.4773956363055119, - "grad_norm": 0.5807836055755615, - "learning_rate": 5.225916453537937e-06, - "loss": 0.0568, - "step": 2800 - }, - { - "epoch": 0.4773956363055119, - "eval_f1_score": 0.2947976878612717, - "eval_loss": 0.14735326170921326, - "eval_runtime": 182.6683, - "eval_samples_per_second": 54.744, - "eval_steps_per_second": 3.422, - "step": 2800 - }, - { - "epoch": 0.47756613474704956, - "grad_norm": 1.1166603565216064, - "learning_rate": 5.2242114236999155e-06, - "loss": 0.2046, - "step": 2801 - }, - { - "epoch": 0.4777366331885873, - "grad_norm": 0.9348694086074829, - "learning_rate": 5.222506393861893e-06, - "loss": 0.0616, - "step": 2802 - }, - { - "epoch": 0.47790713163012494, - "grad_norm": 0.8302772045135498, - "learning_rate": 5.220801364023871e-06, - "loss": 0.1194, - "step": 2803 - }, - { - "epoch": 0.47807763007166265, - "grad_norm": 1.2290252447128296, - "learning_rate": 5.219096334185849e-06, - "loss": 0.1349, - "step": 2804 - }, - { - "epoch": 0.4782481285132003, - "grad_norm": 0.9503166079521179, - "learning_rate": 5.2173913043478265e-06, - "loss": 0.138, - "step": 2805 - }, - { - "epoch": 0.47841862695473797, - "grad_norm": 0.8801444172859192, - "learning_rate": 5.2156862745098044e-06, - "loss": 0.1008, - "step": 2806 - }, - { - "epoch": 0.4785891253962757, - "grad_norm": 1.240086555480957, - "learning_rate": 5.2139812446717815e-06, - "loss": 0.1937, - "step": 2807 - }, - { - "epoch": 0.47875962383781334, - "grad_norm": 0.7774300575256348, - "learning_rate": 5.21227621483376e-06, - "loss": 0.061, - "step": 2808 - }, - { - "epoch": 0.47893012227935106, - "grad_norm": 0.9553461670875549, - "learning_rate": 5.210571184995737e-06, - "loss": 0.0696, - "step": 2809 - }, - { - "epoch": 0.4791006207208887, - "grad_norm": 0.8926774263381958, - "learning_rate": 5.208866155157715e-06, - "loss": 0.1474, - "step": 2810 - }, - { - "epoch": 0.47927111916242643, - "grad_norm": 0.8952902555465698, - "learning_rate": 5.207161125319693e-06, - "loss": 0.1221, - "step": 2811 - }, - { - "epoch": 0.4794416176039641, - "grad_norm": 0.8415282964706421, - "learning_rate": 5.205456095481671e-06, - "loss": 0.0966, - "step": 2812 - }, - { - "epoch": 0.47961211604550175, - "grad_norm": 0.746725857257843, - "learning_rate": 5.203751065643649e-06, - "loss": 0.067, - "step": 2813 - }, - { - "epoch": 0.47978261448703946, - "grad_norm": 0.9865466952323914, - "learning_rate": 5.202046035805627e-06, - "loss": 0.1327, - "step": 2814 - }, - { - "epoch": 0.4799531129285771, - "grad_norm": 0.7144932150840759, - "learning_rate": 5.200341005967605e-06, - "loss": 0.1007, - "step": 2815 - }, - { - "epoch": 0.48012361137011483, - "grad_norm": 0.7237259745597839, - "learning_rate": 5.198635976129582e-06, - "loss": 0.1027, - "step": 2816 - }, - { - "epoch": 0.4802941098116525, - "grad_norm": 1.5335054397583008, - "learning_rate": 5.196930946291561e-06, - "loss": 0.1574, - "step": 2817 - }, - { - "epoch": 0.4804646082531902, - "grad_norm": 1.2245336771011353, - "learning_rate": 5.195225916453538e-06, - "loss": 0.136, - "step": 2818 - }, - { - "epoch": 0.48063510669472786, - "grad_norm": 1.8195768594741821, - "learning_rate": 5.193520886615516e-06, - "loss": 0.2907, - "step": 2819 - }, - { - "epoch": 0.4808056051362655, - "grad_norm": 1.5869253873825073, - "learning_rate": 5.191815856777495e-06, - "loss": 0.1904, - "step": 2820 - }, - { - "epoch": 0.48097610357780324, - "grad_norm": 1.2377763986587524, - "learning_rate": 5.190110826939472e-06, - "loss": 0.1379, - "step": 2821 - }, - { - "epoch": 0.4811466020193409, - "grad_norm": 1.3938157558441162, - "learning_rate": 5.18840579710145e-06, - "loss": 0.1687, - "step": 2822 - }, - { - "epoch": 0.4813171004608786, - "grad_norm": 0.8821825981140137, - "learning_rate": 5.186700767263427e-06, - "loss": 0.1219, - "step": 2823 - }, - { - "epoch": 0.48148759890241627, - "grad_norm": 1.9296631813049316, - "learning_rate": 5.184995737425406e-06, - "loss": 0.1452, - "step": 2824 - }, - { - "epoch": 0.481658097343954, - "grad_norm": 1.841987133026123, - "learning_rate": 5.183290707587383e-06, - "loss": 0.2546, - "step": 2825 - }, - { - "epoch": 0.48182859578549164, - "grad_norm": 0.8879536390304565, - "learning_rate": 5.181585677749361e-06, - "loss": 0.0895, - "step": 2826 - }, - { - "epoch": 0.48199909422702936, - "grad_norm": 1.2907721996307373, - "learning_rate": 5.179880647911339e-06, - "loss": 0.1423, - "step": 2827 - }, - { - "epoch": 0.482169592668567, - "grad_norm": 1.2071154117584229, - "learning_rate": 5.178175618073317e-06, - "loss": 0.1553, - "step": 2828 - }, - { - "epoch": 0.4823400911101047, - "grad_norm": 0.9301887154579163, - "learning_rate": 5.176470588235295e-06, - "loss": 0.1438, - "step": 2829 - }, - { - "epoch": 0.4825105895516424, - "grad_norm": 1.1748899221420288, - "learning_rate": 5.174765558397273e-06, - "loss": 0.1349, - "step": 2830 - }, - { - "epoch": 0.48268108799318005, - "grad_norm": 1.4293502569198608, - "learning_rate": 5.1730605285592506e-06, - "loss": 0.2146, - "step": 2831 - }, - { - "epoch": 0.48285158643471776, - "grad_norm": 1.4519188404083252, - "learning_rate": 5.171355498721228e-06, - "loss": 0.2329, - "step": 2832 - }, - { - "epoch": 0.4830220848762554, - "grad_norm": 1.1554458141326904, - "learning_rate": 5.1696504688832065e-06, - "loss": 0.186, - "step": 2833 - }, - { - "epoch": 0.48319258331779313, - "grad_norm": 0.9985054731369019, - "learning_rate": 5.1679454390451836e-06, - "loss": 0.1415, - "step": 2834 - }, - { - "epoch": 0.4833630817593308, - "grad_norm": 1.1093112230300903, - "learning_rate": 5.1662404092071615e-06, - "loss": 0.1449, - "step": 2835 - }, - { - "epoch": 0.48353358020086845, - "grad_norm": 1.0962409973144531, - "learning_rate": 5.164535379369139e-06, - "loss": 0.1308, - "step": 2836 - }, - { - "epoch": 0.48370407864240617, - "grad_norm": 0.9815444946289062, - "learning_rate": 5.162830349531117e-06, - "loss": 0.0833, - "step": 2837 - }, - { - "epoch": 0.4838745770839438, - "grad_norm": 0.8832153081893921, - "learning_rate": 5.161125319693095e-06, - "loss": 0.135, - "step": 2838 - }, - { - "epoch": 0.48404507552548154, - "grad_norm": 1.1795408725738525, - "learning_rate": 5.1594202898550725e-06, - "loss": 0.1517, - "step": 2839 - }, - { - "epoch": 0.4842155739670192, - "grad_norm": 0.7381297945976257, - "learning_rate": 5.157715260017051e-06, - "loss": 0.1156, - "step": 2840 - }, - { - "epoch": 0.4843860724085569, - "grad_norm": 0.8825168609619141, - "learning_rate": 5.156010230179028e-06, - "loss": 0.1181, - "step": 2841 - }, - { - "epoch": 0.48455657085009457, - "grad_norm": 0.8557326197624207, - "learning_rate": 5.154305200341006e-06, - "loss": 0.1357, - "step": 2842 - }, - { - "epoch": 0.48472706929163223, - "grad_norm": 1.8939331769943237, - "learning_rate": 5.152600170502983e-06, - "loss": 0.1252, - "step": 2843 - }, - { - "epoch": 0.48489756773316994, - "grad_norm": 0.8608604669570923, - "learning_rate": 5.150895140664962e-06, - "loss": 0.0843, - "step": 2844 - }, - { - "epoch": 0.4850680661747076, - "grad_norm": 0.7309491038322449, - "learning_rate": 5.14919011082694e-06, - "loss": 0.1124, - "step": 2845 - }, - { - "epoch": 0.4852385646162453, - "grad_norm": 1.0825344324111938, - "learning_rate": 5.147485080988917e-06, - "loss": 0.171, - "step": 2846 - }, - { - "epoch": 0.485409063057783, - "grad_norm": 0.9518781900405884, - "learning_rate": 5.145780051150896e-06, - "loss": 0.1327, - "step": 2847 - }, - { - "epoch": 0.4855795614993207, - "grad_norm": 0.7426133155822754, - "learning_rate": 5.144075021312873e-06, - "loss": 0.1078, - "step": 2848 - }, - { - "epoch": 0.48575005994085835, - "grad_norm": 1.0284055471420288, - "learning_rate": 5.142369991474851e-06, - "loss": 0.103, - "step": 2849 - }, - { - "epoch": 0.48592055838239606, - "grad_norm": 1.573033094406128, - "learning_rate": 5.140664961636829e-06, - "loss": 0.1213, - "step": 2850 - }, - { - "epoch": 0.4860910568239337, - "grad_norm": 1.4963144063949585, - "learning_rate": 5.138959931798807e-06, - "loss": 0.1755, - "step": 2851 - }, - { - "epoch": 0.4862615552654714, - "grad_norm": 1.6056444644927979, - "learning_rate": 5.137254901960784e-06, - "loss": 0.2098, - "step": 2852 - }, - { - "epoch": 0.4864320537070091, - "grad_norm": 1.5847902297973633, - "learning_rate": 5.135549872122763e-06, - "loss": 0.2086, - "step": 2853 - }, - { - "epoch": 0.48660255214854675, - "grad_norm": 1.0895462036132812, - "learning_rate": 5.133844842284741e-06, - "loss": 0.1435, - "step": 2854 - }, - { - "epoch": 0.48677305059008447, - "grad_norm": 0.9047597646713257, - "learning_rate": 5.132139812446718e-06, - "loss": 0.106, - "step": 2855 - }, - { - "epoch": 0.4869435490316221, - "grad_norm": 0.8072808384895325, - "learning_rate": 5.130434782608697e-06, - "loss": 0.103, - "step": 2856 - }, - { - "epoch": 0.48711404747315984, - "grad_norm": 1.1774007081985474, - "learning_rate": 5.128729752770674e-06, - "loss": 0.1697, - "step": 2857 - }, - { - "epoch": 0.4872845459146975, - "grad_norm": 1.1065585613250732, - "learning_rate": 5.127024722932652e-06, - "loss": 0.1599, - "step": 2858 - }, - { - "epoch": 0.48745504435623516, - "grad_norm": 1.0990142822265625, - "learning_rate": 5.125319693094629e-06, - "loss": 0.0708, - "step": 2859 - }, - { - "epoch": 0.48762554279777287, - "grad_norm": 2.216906785964966, - "learning_rate": 5.123614663256608e-06, - "loss": 0.1162, - "step": 2860 - }, - { - "epoch": 0.48779604123931053, - "grad_norm": 0.9633966684341431, - "learning_rate": 5.121909633418585e-06, - "loss": 0.0864, - "step": 2861 - }, - { - "epoch": 0.48796653968084824, - "grad_norm": 0.914915919303894, - "learning_rate": 5.120204603580563e-06, - "loss": 0.1159, - "step": 2862 - }, - { - "epoch": 0.4881370381223859, - "grad_norm": 0.8440526723861694, - "learning_rate": 5.1184995737425415e-06, - "loss": 0.1087, - "step": 2863 - }, - { - "epoch": 0.4883075365639236, - "grad_norm": 1.1910771131515503, - "learning_rate": 5.116794543904519e-06, - "loss": 0.1818, - "step": 2864 - }, - { - "epoch": 0.4884780350054613, - "grad_norm": 0.9938591122627258, - "learning_rate": 5.1150895140664966e-06, - "loss": 0.1431, - "step": 2865 - }, - { - "epoch": 0.48864853344699893, - "grad_norm": 1.0255413055419922, - "learning_rate": 5.1133844842284745e-06, - "loss": 0.1353, - "step": 2866 - }, - { - "epoch": 0.48881903188853665, - "grad_norm": 1.0184707641601562, - "learning_rate": 5.1116794543904525e-06, - "loss": 0.0948, - "step": 2867 - }, - { - "epoch": 0.4889895303300743, - "grad_norm": 0.8346421718597412, - "learning_rate": 5.1099744245524296e-06, - "loss": 0.1123, - "step": 2868 - }, - { - "epoch": 0.489160028771612, - "grad_norm": 1.4496159553527832, - "learning_rate": 5.108269394714408e-06, - "loss": 0.1994, - "step": 2869 - }, - { - "epoch": 0.4893305272131497, - "grad_norm": 1.0109056234359741, - "learning_rate": 5.106564364876386e-06, - "loss": 0.1317, - "step": 2870 - }, - { - "epoch": 0.4895010256546874, - "grad_norm": 1.2369807958602905, - "learning_rate": 5.104859335038363e-06, - "loss": 0.165, - "step": 2871 - }, - { - "epoch": 0.48967152409622505, - "grad_norm": 0.7144286036491394, - "learning_rate": 5.103154305200342e-06, - "loss": 0.0609, - "step": 2872 - }, - { - "epoch": 0.48984202253776277, - "grad_norm": 0.9330503940582275, - "learning_rate": 5.101449275362319e-06, - "loss": 0.1263, - "step": 2873 - }, - { - "epoch": 0.4900125209793004, - "grad_norm": 1.3567122220993042, - "learning_rate": 5.099744245524297e-06, - "loss": 0.2209, - "step": 2874 - }, - { - "epoch": 0.4901830194208381, - "grad_norm": 0.6284449100494385, - "learning_rate": 5.098039215686274e-06, - "loss": 0.0759, - "step": 2875 - }, - { - "epoch": 0.4903535178623758, - "grad_norm": 0.9885140061378479, - "learning_rate": 5.096334185848253e-06, - "loss": 0.1227, - "step": 2876 - }, - { - "epoch": 0.49052401630391346, - "grad_norm": 1.0801233053207397, - "learning_rate": 5.09462915601023e-06, - "loss": 0.1315, - "step": 2877 - }, - { - "epoch": 0.49069451474545117, - "grad_norm": 0.9521180391311646, - "learning_rate": 5.092924126172208e-06, - "loss": 0.0846, - "step": 2878 - }, - { - "epoch": 0.49086501318698883, - "grad_norm": 1.128046989440918, - "learning_rate": 5.091219096334187e-06, - "loss": 0.0865, - "step": 2879 - }, - { - "epoch": 0.49103551162852654, - "grad_norm": 0.8198946714401245, - "learning_rate": 5.089514066496164e-06, - "loss": 0.1146, - "step": 2880 - }, - { - "epoch": 0.4912060100700642, - "grad_norm": 1.3413338661193848, - "learning_rate": 5.087809036658142e-06, - "loss": 0.1639, - "step": 2881 - }, - { - "epoch": 0.49137650851160186, - "grad_norm": 1.301024317741394, - "learning_rate": 5.086104006820119e-06, - "loss": 0.1802, - "step": 2882 - }, - { - "epoch": 0.4915470069531396, - "grad_norm": 1.308898687362671, - "learning_rate": 5.084398976982098e-06, - "loss": 0.225, - "step": 2883 - }, - { - "epoch": 0.49171750539467723, - "grad_norm": 1.2367159128189087, - "learning_rate": 5.082693947144075e-06, - "loss": 0.1484, - "step": 2884 - }, - { - "epoch": 0.49188800383621495, - "grad_norm": 0.9292266964912415, - "learning_rate": 5.080988917306053e-06, - "loss": 0.1084, - "step": 2885 - }, - { - "epoch": 0.4920585022777526, - "grad_norm": 1.3165147304534912, - "learning_rate": 5.079283887468031e-06, - "loss": 0.2169, - "step": 2886 - }, - { - "epoch": 0.4922290007192903, - "grad_norm": 1.9130513668060303, - "learning_rate": 5.077578857630009e-06, - "loss": 0.2492, - "step": 2887 - }, - { - "epoch": 0.492399499160828, - "grad_norm": 0.8905692100524902, - "learning_rate": 5.075873827791987e-06, - "loss": 0.114, - "step": 2888 - }, - { - "epoch": 0.49256999760236564, - "grad_norm": 1.8215012550354004, - "learning_rate": 5.074168797953965e-06, - "loss": 0.1037, - "step": 2889 - }, - { - "epoch": 0.49274049604390335, - "grad_norm": 1.8712091445922852, - "learning_rate": 5.072463768115943e-06, - "loss": 0.1958, - "step": 2890 - }, - { - "epoch": 0.492910994485441, - "grad_norm": 0.8169884085655212, - "learning_rate": 5.07075873827792e-06, - "loss": 0.098, - "step": 2891 - }, - { - "epoch": 0.4930814929269787, - "grad_norm": 1.1399061679840088, - "learning_rate": 5.069053708439899e-06, - "loss": 0.1082, - "step": 2892 - }, - { - "epoch": 0.4932519913685164, - "grad_norm": 1.0081355571746826, - "learning_rate": 5.067348678601876e-06, - "loss": 0.0983, - "step": 2893 - }, - { - "epoch": 0.4934224898100541, - "grad_norm": 1.1947318315505981, - "learning_rate": 5.065643648763854e-06, - "loss": 0.1476, - "step": 2894 - }, - { - "epoch": 0.49359298825159176, - "grad_norm": 1.250249981880188, - "learning_rate": 5.0639386189258325e-06, - "loss": 0.1711, - "step": 2895 - }, - { - "epoch": 0.49376348669312947, - "grad_norm": 1.09302818775177, - "learning_rate": 5.0622335890878096e-06, - "loss": 0.1568, - "step": 2896 - }, - { - "epoch": 0.49393398513466713, - "grad_norm": 0.880730390548706, - "learning_rate": 5.0605285592497875e-06, - "loss": 0.1393, - "step": 2897 - }, - { - "epoch": 0.4941044835762048, - "grad_norm": 1.1225374937057495, - "learning_rate": 5.058823529411765e-06, - "loss": 0.1444, - "step": 2898 - }, - { - "epoch": 0.4942749820177425, - "grad_norm": 1.02659273147583, - "learning_rate": 5.057118499573743e-06, - "loss": 0.1071, - "step": 2899 - }, - { - "epoch": 0.49444548045928016, - "grad_norm": 0.6902765035629272, - "learning_rate": 5.0554134697357205e-06, - "loss": 0.0797, - "step": 2900 - }, - { - "epoch": 0.49444548045928016, - "eval_f1_score": 0.32275132275132273, - "eval_loss": 0.14594615995883942, - "eval_runtime": 182.6881, - "eval_samples_per_second": 54.738, - "eval_steps_per_second": 3.421, - "step": 2900 - }, - { - "epoch": 0.4946159789008179, - "grad_norm": 1.128650188446045, - "learning_rate": 5.0537084398976984e-06, - "loss": 0.1338, - "step": 2901 - }, - { - "epoch": 0.49478647734235554, - "grad_norm": 0.9671938419342041, - "learning_rate": 5.052003410059676e-06, - "loss": 0.1105, - "step": 2902 - }, - { - "epoch": 0.49495697578389325, - "grad_norm": 0.7639597058296204, - "learning_rate": 5.050298380221654e-06, - "loss": 0.0725, - "step": 2903 - }, - { - "epoch": 0.4951274742254309, - "grad_norm": 0.9767041206359863, - "learning_rate": 5.048593350383632e-06, - "loss": 0.0928, - "step": 2904 - }, - { - "epoch": 0.49529797266696857, - "grad_norm": 0.9453780651092529, - "learning_rate": 5.04688832054561e-06, - "loss": 0.1052, - "step": 2905 - }, - { - "epoch": 0.4954684711085063, - "grad_norm": 1.1028945446014404, - "learning_rate": 5.045183290707588e-06, - "loss": 0.0835, - "step": 2906 - }, - { - "epoch": 0.49563896955004394, - "grad_norm": 0.7256104350090027, - "learning_rate": 5.043478260869565e-06, - "loss": 0.0648, - "step": 2907 - }, - { - "epoch": 0.49580946799158165, - "grad_norm": 1.5962961912155151, - "learning_rate": 5.041773231031544e-06, - "loss": 0.1047, - "step": 2908 - }, - { - "epoch": 0.4959799664331193, - "grad_norm": 0.8366924524307251, - "learning_rate": 5.040068201193521e-06, - "loss": 0.1633, - "step": 2909 - }, - { - "epoch": 0.496150464874657, - "grad_norm": 0.9724977016448975, - "learning_rate": 5.038363171355499e-06, - "loss": 0.1732, - "step": 2910 - }, - { - "epoch": 0.4963209633161947, - "grad_norm": 1.251202940940857, - "learning_rate": 5.036658141517476e-06, - "loss": 0.1237, - "step": 2911 - }, - { - "epoch": 0.49649146175773234, - "grad_norm": 1.2317320108413696, - "learning_rate": 5.034953111679455e-06, - "loss": 0.1581, - "step": 2912 - }, - { - "epoch": 0.49666196019927006, - "grad_norm": 1.3477345705032349, - "learning_rate": 5.033248081841433e-06, - "loss": 0.1353, - "step": 2913 - }, - { - "epoch": 0.4968324586408077, - "grad_norm": 0.7249893546104431, - "learning_rate": 5.03154305200341e-06, - "loss": 0.0885, - "step": 2914 - }, - { - "epoch": 0.49700295708234543, - "grad_norm": 1.0673930644989014, - "learning_rate": 5.029838022165389e-06, - "loss": 0.1094, - "step": 2915 - }, - { - "epoch": 0.4971734555238831, - "grad_norm": 0.9129320383071899, - "learning_rate": 5.028132992327366e-06, - "loss": 0.0932, - "step": 2916 - }, - { - "epoch": 0.4973439539654208, - "grad_norm": 1.1062793731689453, - "learning_rate": 5.026427962489344e-06, - "loss": 0.2301, - "step": 2917 - }, - { - "epoch": 0.49751445240695846, - "grad_norm": 1.1716173887252808, - "learning_rate": 5.024722932651321e-06, - "loss": 0.1204, - "step": 2918 - }, - { - "epoch": 0.4976849508484962, - "grad_norm": 0.6853263974189758, - "learning_rate": 5.0230179028133e-06, - "loss": 0.0942, - "step": 2919 - }, - { - "epoch": 0.49785544929003384, - "grad_norm": 0.8545072078704834, - "learning_rate": 5.021312872975278e-06, - "loss": 0.1283, - "step": 2920 - }, - { - "epoch": 0.4980259477315715, - "grad_norm": 1.072073221206665, - "learning_rate": 5.019607843137255e-06, - "loss": 0.1356, - "step": 2921 - }, - { - "epoch": 0.4981964461731092, - "grad_norm": 1.1019538640975952, - "learning_rate": 5.017902813299234e-06, - "loss": 0.0915, - "step": 2922 - }, - { - "epoch": 0.49836694461464687, - "grad_norm": 1.5704419612884521, - "learning_rate": 5.016197783461211e-06, - "loss": 0.2405, - "step": 2923 - }, - { - "epoch": 0.4985374430561846, - "grad_norm": 1.09921395778656, - "learning_rate": 5.014492753623189e-06, - "loss": 0.1231, - "step": 2924 - }, - { - "epoch": 0.49870794149772224, - "grad_norm": 0.9660294055938721, - "learning_rate": 5.012787723785167e-06, - "loss": 0.0873, - "step": 2925 - }, - { - "epoch": 0.49887843993925995, - "grad_norm": 0.7957599759101868, - "learning_rate": 5.011082693947145e-06, - "loss": 0.097, - "step": 2926 - }, - { - "epoch": 0.4990489383807976, - "grad_norm": 1.182488203048706, - "learning_rate": 5.009377664109122e-06, - "loss": 0.1552, - "step": 2927 - }, - { - "epoch": 0.49921943682233527, - "grad_norm": 1.1175565719604492, - "learning_rate": 5.0076726342711005e-06, - "loss": 0.1212, - "step": 2928 - }, - { - "epoch": 0.499389935263873, - "grad_norm": 1.1008256673812866, - "learning_rate": 5.0059676044330784e-06, - "loss": 0.106, - "step": 2929 - }, - { - "epoch": 0.49956043370541064, - "grad_norm": 1.1122069358825684, - "learning_rate": 5.0042625745950555e-06, - "loss": 0.1277, - "step": 2930 - }, - { - "epoch": 0.49973093214694836, - "grad_norm": 0.8675044178962708, - "learning_rate": 5.002557544757034e-06, - "loss": 0.0812, - "step": 2931 - }, - { - "epoch": 0.499901430588486, - "grad_norm": 1.1391392946243286, - "learning_rate": 5.0008525149190114e-06, - "loss": 0.1313, - "step": 2932 - }, - { - "epoch": 0.5000719290300237, - "grad_norm": 1.2596222162246704, - "learning_rate": 4.999147485080989e-06, - "loss": 0.1338, - "step": 2933 - }, - { - "epoch": 0.5002424274715614, - "grad_norm": 1.128591775894165, - "learning_rate": 4.997442455242967e-06, - "loss": 0.1136, - "step": 2934 - }, - { - "epoch": 0.500412925913099, - "grad_norm": 1.137433648109436, - "learning_rate": 4.995737425404945e-06, - "loss": 0.1382, - "step": 2935 - }, - { - "epoch": 0.5005834243546368, - "grad_norm": 1.3177850246429443, - "learning_rate": 4.994032395566923e-06, - "loss": 0.1373, - "step": 2936 - }, - { - "epoch": 0.5007539227961745, - "grad_norm": 1.0512694120407104, - "learning_rate": 4.9923273657289e-06, - "loss": 0.1067, - "step": 2937 - }, - { - "epoch": 0.5009244212377121, - "grad_norm": 1.1488574743270874, - "learning_rate": 4.990622335890878e-06, - "loss": 0.1672, - "step": 2938 - }, - { - "epoch": 0.5010949196792498, - "grad_norm": 0.9468741416931152, - "learning_rate": 4.988917306052856e-06, - "loss": 0.096, - "step": 2939 - }, - { - "epoch": 0.5012654181207875, - "grad_norm": 1.8275294303894043, - "learning_rate": 4.987212276214834e-06, - "loss": 0.1998, - "step": 2940 - }, - { - "epoch": 0.5014359165623252, - "grad_norm": 0.7763018608093262, - "learning_rate": 4.985507246376812e-06, - "loss": 0.0775, - "step": 2941 - }, - { - "epoch": 0.5016064150038628, - "grad_norm": 0.9659168720245361, - "learning_rate": 4.98380221653879e-06, - "loss": 0.1099, - "step": 2942 - }, - { - "epoch": 0.5017769134454005, - "grad_norm": 1.334991455078125, - "learning_rate": 4.982097186700768e-06, - "loss": 0.15, - "step": 2943 - }, - { - "epoch": 0.5019474118869383, - "grad_norm": 1.7190552949905396, - "learning_rate": 4.980392156862746e-06, - "loss": 0.202, - "step": 2944 - }, - { - "epoch": 0.5021179103284759, - "grad_norm": 0.994452714920044, - "learning_rate": 4.978687127024723e-06, - "loss": 0.086, - "step": 2945 - }, - { - "epoch": 0.5022884087700136, - "grad_norm": 1.280116081237793, - "learning_rate": 4.976982097186701e-06, - "loss": 0.2435, - "step": 2946 - }, - { - "epoch": 0.5024589072115513, - "grad_norm": 1.1104083061218262, - "learning_rate": 4.975277067348679e-06, - "loss": 0.1192, - "step": 2947 - }, - { - "epoch": 0.502629405653089, - "grad_norm": 1.1849713325500488, - "learning_rate": 4.973572037510657e-06, - "loss": 0.1277, - "step": 2948 - }, - { - "epoch": 0.5027999040946266, - "grad_norm": 1.0629472732543945, - "learning_rate": 4.971867007672634e-06, - "loss": 0.0563, - "step": 2949 - }, - { - "epoch": 0.5029704025361643, - "grad_norm": 1.0110509395599365, - "learning_rate": 4.970161977834613e-06, - "loss": 0.1684, - "step": 2950 - }, - { - "epoch": 0.503140900977702, - "grad_norm": 1.2462400197982788, - "learning_rate": 4.968456947996591e-06, - "loss": 0.1531, - "step": 2951 - }, - { - "epoch": 0.5033113994192396, - "grad_norm": 1.3758798837661743, - "learning_rate": 4.966751918158569e-06, - "loss": 0.107, - "step": 2952 - }, - { - "epoch": 0.5034818978607773, - "grad_norm": 0.9632350206375122, - "learning_rate": 4.965046888320546e-06, - "loss": 0.1426, - "step": 2953 - }, - { - "epoch": 0.5036523963023151, - "grad_norm": 0.9977644681930542, - "learning_rate": 4.963341858482524e-06, - "loss": 0.1266, - "step": 2954 - }, - { - "epoch": 0.5038228947438528, - "grad_norm": 1.0415880680084229, - "learning_rate": 4.961636828644502e-06, - "loss": 0.1329, - "step": 2955 - }, - { - "epoch": 0.5039933931853904, - "grad_norm": 1.1172159910202026, - "learning_rate": 4.95993179880648e-06, - "loss": 0.1161, - "step": 2956 - }, - { - "epoch": 0.5041638916269281, - "grad_norm": 1.2988855838775635, - "learning_rate": 4.958226768968457e-06, - "loss": 0.1676, - "step": 2957 - }, - { - "epoch": 0.5043343900684658, - "grad_norm": 1.8825470209121704, - "learning_rate": 4.9565217391304355e-06, - "loss": 0.216, - "step": 2958 - }, - { - "epoch": 0.5045048885100034, - "grad_norm": 1.2938297986984253, - "learning_rate": 4.9548167092924135e-06, - "loss": 0.1605, - "step": 2959 - }, - { - "epoch": 0.5046753869515411, - "grad_norm": 1.0553568601608276, - "learning_rate": 4.953111679454391e-06, - "loss": 0.1528, - "step": 2960 - }, - { - "epoch": 0.5048458853930788, - "grad_norm": 0.8495917916297913, - "learning_rate": 4.9514066496163685e-06, - "loss": 0.1034, - "step": 2961 - }, - { - "epoch": 0.5050163838346166, - "grad_norm": 1.1607139110565186, - "learning_rate": 4.9497016197783465e-06, - "loss": 0.1775, - "step": 2962 - }, - { - "epoch": 0.5051868822761542, - "grad_norm": 0.98664391040802, - "learning_rate": 4.9479965899403244e-06, - "loss": 0.1173, - "step": 2963 - }, - { - "epoch": 0.5053573807176919, - "grad_norm": 1.0900707244873047, - "learning_rate": 4.946291560102302e-06, - "loss": 0.1118, - "step": 2964 - }, - { - "epoch": 0.5055278791592296, - "grad_norm": 1.483394980430603, - "learning_rate": 4.9445865302642795e-06, - "loss": 0.2319, - "step": 2965 - }, - { - "epoch": 0.5056983776007672, - "grad_norm": 1.5776678323745728, - "learning_rate": 4.942881500426258e-06, - "loss": 0.1674, - "step": 2966 - }, - { - "epoch": 0.5058688760423049, - "grad_norm": 0.9520050883293152, - "learning_rate": 4.941176470588236e-06, - "loss": 0.1343, - "step": 2967 - }, - { - "epoch": 0.5060393744838426, - "grad_norm": 0.9713250398635864, - "learning_rate": 4.939471440750213e-06, - "loss": 0.163, - "step": 2968 - }, - { - "epoch": 0.5062098729253803, - "grad_norm": 1.0562556982040405, - "learning_rate": 4.937766410912191e-06, - "loss": 0.1187, - "step": 2969 - }, - { - "epoch": 0.5063803713669179, - "grad_norm": 0.8522936105728149, - "learning_rate": 4.936061381074169e-06, - "loss": 0.1625, - "step": 2970 - }, - { - "epoch": 0.5065508698084557, - "grad_norm": 0.9506287574768066, - "learning_rate": 4.934356351236147e-06, - "loss": 0.0804, - "step": 2971 - }, - { - "epoch": 0.5067213682499934, - "grad_norm": 0.8114155530929565, - "learning_rate": 4.932651321398125e-06, - "loss": 0.0813, - "step": 2972 - }, - { - "epoch": 0.506891866691531, - "grad_norm": 0.8814626336097717, - "learning_rate": 4.930946291560102e-06, - "loss": 0.1039, - "step": 2973 - }, - { - "epoch": 0.5070623651330687, - "grad_norm": 0.7418640851974487, - "learning_rate": 4.92924126172208e-06, - "loss": 0.0652, - "step": 2974 - }, - { - "epoch": 0.5072328635746064, - "grad_norm": 0.748171865940094, - "learning_rate": 4.927536231884059e-06, - "loss": 0.0882, - "step": 2975 - }, - { - "epoch": 0.5074033620161441, - "grad_norm": 1.8255096673965454, - "learning_rate": 4.925831202046036e-06, - "loss": 0.237, - "step": 2976 - }, - { - "epoch": 0.5075738604576817, - "grad_norm": 1.0054211616516113, - "learning_rate": 4.924126172208014e-06, - "loss": 0.0576, - "step": 2977 - }, - { - "epoch": 0.5077443588992194, - "grad_norm": 1.8089407682418823, - "learning_rate": 4.922421142369992e-06, - "loss": 0.291, - "step": 2978 - }, - { - "epoch": 0.5079148573407571, - "grad_norm": 1.2715364694595337, - "learning_rate": 4.92071611253197e-06, - "loss": 0.1506, - "step": 2979 - }, - { - "epoch": 0.5080853557822949, - "grad_norm": 1.0056416988372803, - "learning_rate": 4.919011082693948e-06, - "loss": 0.1127, - "step": 2980 - }, - { - "epoch": 0.5082558542238325, - "grad_norm": 2.1583173274993896, - "learning_rate": 4.917306052855925e-06, - "loss": 0.2778, - "step": 2981 - }, - { - "epoch": 0.5084263526653702, - "grad_norm": 1.2089869976043701, - "learning_rate": 4.915601023017903e-06, - "loss": 0.1245, - "step": 2982 - }, - { - "epoch": 0.5085968511069079, - "grad_norm": 1.3445782661437988, - "learning_rate": 4.913895993179882e-06, - "loss": 0.1726, - "step": 2983 - }, - { - "epoch": 0.5087673495484455, - "grad_norm": 0.9058725237846375, - "learning_rate": 4.912190963341859e-06, - "loss": 0.1508, - "step": 2984 - }, - { - "epoch": 0.5089378479899832, - "grad_norm": 0.7840000987052917, - "learning_rate": 4.910485933503837e-06, - "loss": 0.0673, - "step": 2985 - }, - { - "epoch": 0.5091083464315209, - "grad_norm": 0.9684233069419861, - "learning_rate": 4.908780903665815e-06, - "loss": 0.1054, - "step": 2986 - }, - { - "epoch": 0.5092788448730586, - "grad_norm": 1.2829673290252686, - "learning_rate": 4.907075873827793e-06, - "loss": 0.1898, - "step": 2987 - }, - { - "epoch": 0.5094493433145962, - "grad_norm": 1.809422492980957, - "learning_rate": 4.90537084398977e-06, - "loss": 0.1678, - "step": 2988 - }, - { - "epoch": 0.509619841756134, - "grad_norm": 0.8061268925666809, - "learning_rate": 4.903665814151748e-06, - "loss": 0.1195, - "step": 2989 - }, - { - "epoch": 0.5097903401976717, - "grad_norm": 0.7315105199813843, - "learning_rate": 4.901960784313726e-06, - "loss": 0.1081, - "step": 2990 - }, - { - "epoch": 0.5099608386392093, - "grad_norm": 0.9622489213943481, - "learning_rate": 4.9002557544757036e-06, - "loss": 0.1252, - "step": 2991 - }, - { - "epoch": 0.510131337080747, - "grad_norm": 1.0779286623001099, - "learning_rate": 4.8985507246376815e-06, - "loss": 0.2076, - "step": 2992 - }, - { - "epoch": 0.5103018355222847, - "grad_norm": 0.7318807244300842, - "learning_rate": 4.8968456947996595e-06, - "loss": 0.0755, - "step": 2993 - }, - { - "epoch": 0.5104723339638224, - "grad_norm": 0.7845926880836487, - "learning_rate": 4.895140664961637e-06, - "loss": 0.1215, - "step": 2994 - }, - { - "epoch": 0.51064283240536, - "grad_norm": 0.8664889335632324, - "learning_rate": 4.893435635123615e-06, - "loss": 0.0785, - "step": 2995 - }, - { - "epoch": 0.5108133308468977, - "grad_norm": 1.0492292642593384, - "learning_rate": 4.8917306052855925e-06, - "loss": 0.0691, - "step": 2996 - }, - { - "epoch": 0.5109838292884354, - "grad_norm": 1.4236756563186646, - "learning_rate": 4.89002557544757e-06, - "loss": 0.1649, - "step": 2997 - }, - { - "epoch": 0.511154327729973, - "grad_norm": 0.8544982671737671, - "learning_rate": 4.888320545609548e-06, - "loss": 0.1691, - "step": 2998 - }, - { - "epoch": 0.5113248261715108, - "grad_norm": 1.7541369199752808, - "learning_rate": 4.886615515771526e-06, - "loss": 0.234, - "step": 2999 - }, - { - "epoch": 0.5114953246130485, - "grad_norm": 1.8814383745193481, - "learning_rate": 4.884910485933504e-06, - "loss": 0.2522, - "step": 3000 - }, - { - "epoch": 0.5114953246130485, - "eval_f1_score": 0.36180904522613067, - "eval_loss": 0.14497753977775574, - "eval_runtime": 182.6894, - "eval_samples_per_second": 54.738, - "eval_steps_per_second": 3.421, - "step": 3000 - }, - { - "epoch": 0.5116658230545862, - "grad_norm": 1.3755370378494263, - "learning_rate": 4.883205456095482e-06, - "loss": 0.1503, - "step": 3001 - }, - { - "epoch": 0.5118363214961238, - "grad_norm": 1.4907002449035645, - "learning_rate": 4.88150042625746e-06, - "loss": 0.1312, - "step": 3002 - }, - { - "epoch": 0.5120068199376615, - "grad_norm": 0.9052005410194397, - "learning_rate": 4.879795396419438e-06, - "loss": 0.1519, - "step": 3003 - }, - { - "epoch": 0.5121773183791992, - "grad_norm": 0.86555415391922, - "learning_rate": 4.878090366581415e-06, - "loss": 0.1146, - "step": 3004 - }, - { - "epoch": 0.5123478168207368, - "grad_norm": 1.230872631072998, - "learning_rate": 4.876385336743393e-06, - "loss": 0.1394, - "step": 3005 - }, - { - "epoch": 0.5125183152622745, - "grad_norm": 1.2168004512786865, - "learning_rate": 4.874680306905371e-06, - "loss": 0.2157, - "step": 3006 - }, - { - "epoch": 0.5126888137038123, - "grad_norm": 1.3998253345489502, - "learning_rate": 4.872975277067349e-06, - "loss": 0.1221, - "step": 3007 - }, - { - "epoch": 0.51285931214535, - "grad_norm": 1.2359734773635864, - "learning_rate": 4.871270247229327e-06, - "loss": 0.1726, - "step": 3008 - }, - { - "epoch": 0.5130298105868876, - "grad_norm": 1.387755274772644, - "learning_rate": 4.869565217391305e-06, - "loss": 0.2025, - "step": 3009 - }, - { - "epoch": 0.5132003090284253, - "grad_norm": 1.1594918966293335, - "learning_rate": 4.867860187553283e-06, - "loss": 0.1617, - "step": 3010 - }, - { - "epoch": 0.513370807469963, - "grad_norm": 0.8933932185173035, - "learning_rate": 4.866155157715261e-06, - "loss": 0.1102, - "step": 3011 - }, - { - "epoch": 0.5135413059115006, - "grad_norm": 1.0764583349227905, - "learning_rate": 4.864450127877238e-06, - "loss": 0.1151, - "step": 3012 - }, - { - "epoch": 0.5137118043530383, - "grad_norm": 0.9294535517692566, - "learning_rate": 4.862745098039216e-06, - "loss": 0.1442, - "step": 3013 - }, - { - "epoch": 0.513882302794576, - "grad_norm": 0.8940418362617493, - "learning_rate": 4.861040068201194e-06, - "loss": 0.1079, - "step": 3014 - }, - { - "epoch": 0.5140528012361137, - "grad_norm": 0.8228653073310852, - "learning_rate": 4.859335038363172e-06, - "loss": 0.0792, - "step": 3015 - }, - { - "epoch": 0.5142232996776513, - "grad_norm": 0.860935389995575, - "learning_rate": 4.85763000852515e-06, - "loss": 0.0919, - "step": 3016 - }, - { - "epoch": 0.5143937981191891, - "grad_norm": 0.9354767203330994, - "learning_rate": 4.855924978687128e-06, - "loss": 0.145, - "step": 3017 - }, - { - "epoch": 0.5145642965607268, - "grad_norm": 0.8232112526893616, - "learning_rate": 4.854219948849106e-06, - "loss": 0.1096, - "step": 3018 - }, - { - "epoch": 0.5147347950022644, - "grad_norm": 1.0975641012191772, - "learning_rate": 4.8525149190110836e-06, - "loss": 0.0981, - "step": 3019 - }, - { - "epoch": 0.5149052934438021, - "grad_norm": 1.0185004472732544, - "learning_rate": 4.850809889173061e-06, - "loss": 0.1236, - "step": 3020 - }, - { - "epoch": 0.5150757918853398, - "grad_norm": 0.8207215666770935, - "learning_rate": 4.849104859335039e-06, - "loss": 0.0702, - "step": 3021 - }, - { - "epoch": 0.5152462903268775, - "grad_norm": 1.1175296306610107, - "learning_rate": 4.8473998294970166e-06, - "loss": 0.1325, - "step": 3022 - }, - { - "epoch": 0.5154167887684151, - "grad_norm": 0.7806240916252136, - "learning_rate": 4.8456947996589945e-06, - "loss": 0.0861, - "step": 3023 - }, - { - "epoch": 0.5155872872099528, - "grad_norm": 1.094958782196045, - "learning_rate": 4.843989769820972e-06, - "loss": 0.0783, - "step": 3024 - }, - { - "epoch": 0.5157577856514906, - "grad_norm": 1.0804235935211182, - "learning_rate": 4.84228473998295e-06, - "loss": 0.1074, - "step": 3025 - }, - { - "epoch": 0.5159282840930283, - "grad_norm": 0.8760350346565247, - "learning_rate": 4.840579710144928e-06, - "loss": 0.0962, - "step": 3026 - }, - { - "epoch": 0.5160987825345659, - "grad_norm": 1.3372377157211304, - "learning_rate": 4.838874680306906e-06, - "loss": 0.1071, - "step": 3027 - }, - { - "epoch": 0.5162692809761036, - "grad_norm": 1.0021148920059204, - "learning_rate": 4.837169650468883e-06, - "loss": 0.119, - "step": 3028 - }, - { - "epoch": 0.5164397794176413, - "grad_norm": 1.2906092405319214, - "learning_rate": 4.835464620630861e-06, - "loss": 0.1074, - "step": 3029 - }, - { - "epoch": 0.5166102778591789, - "grad_norm": 1.09266197681427, - "learning_rate": 4.833759590792839e-06, - "loss": 0.1086, - "step": 3030 - }, - { - "epoch": 0.5167807763007166, - "grad_norm": 1.7703126668930054, - "learning_rate": 4.832054560954817e-06, - "loss": 0.2657, - "step": 3031 - }, - { - "epoch": 0.5169512747422543, - "grad_norm": 0.8526506423950195, - "learning_rate": 4.830349531116794e-06, - "loss": 0.0627, - "step": 3032 - }, - { - "epoch": 0.517121773183792, - "grad_norm": 1.1537318229675293, - "learning_rate": 4.828644501278773e-06, - "loss": 0.118, - "step": 3033 - }, - { - "epoch": 0.5172922716253296, - "grad_norm": 1.1283197402954102, - "learning_rate": 4.826939471440751e-06, - "loss": 0.0813, - "step": 3034 - }, - { - "epoch": 0.5174627700668674, - "grad_norm": 0.9512043595314026, - "learning_rate": 4.825234441602728e-06, - "loss": 0.0994, - "step": 3035 - }, - { - "epoch": 0.5176332685084051, - "grad_norm": 1.416027545928955, - "learning_rate": 4.823529411764706e-06, - "loss": 0.1292, - "step": 3036 - }, - { - "epoch": 0.5178037669499427, - "grad_norm": 1.2295200824737549, - "learning_rate": 4.821824381926684e-06, - "loss": 0.1406, - "step": 3037 - }, - { - "epoch": 0.5179742653914804, - "grad_norm": 1.3271145820617676, - "learning_rate": 4.820119352088662e-06, - "loss": 0.1723, - "step": 3038 - }, - { - "epoch": 0.5181447638330181, - "grad_norm": 0.9563559293746948, - "learning_rate": 4.81841432225064e-06, - "loss": 0.0892, - "step": 3039 - }, - { - "epoch": 0.5183152622745558, - "grad_norm": 1.0865267515182495, - "learning_rate": 4.816709292412617e-06, - "loss": 0.1565, - "step": 3040 - }, - { - "epoch": 0.5184857607160934, - "grad_norm": 0.8583990931510925, - "learning_rate": 4.815004262574595e-06, - "loss": 0.0959, - "step": 3041 - }, - { - "epoch": 0.5186562591576311, - "grad_norm": 1.1742157936096191, - "learning_rate": 4.813299232736574e-06, - "loss": 0.1373, - "step": 3042 - }, - { - "epoch": 0.5188267575991689, - "grad_norm": 0.895906925201416, - "learning_rate": 4.811594202898551e-06, - "loss": 0.0875, - "step": 3043 - }, - { - "epoch": 0.5189972560407065, - "grad_norm": 0.9671127200126648, - "learning_rate": 4.809889173060529e-06, - "loss": 0.1015, - "step": 3044 - }, - { - "epoch": 0.5191677544822442, - "grad_norm": 1.157288670539856, - "learning_rate": 4.808184143222507e-06, - "loss": 0.1169, - "step": 3045 - }, - { - "epoch": 0.5193382529237819, - "grad_norm": 1.1225751638412476, - "learning_rate": 4.806479113384485e-06, - "loss": 0.1367, - "step": 3046 - }, - { - "epoch": 0.5195087513653196, - "grad_norm": 0.8157687187194824, - "learning_rate": 4.804774083546463e-06, - "loss": 0.0866, - "step": 3047 - }, - { - "epoch": 0.5196792498068572, - "grad_norm": 0.908694863319397, - "learning_rate": 4.80306905370844e-06, - "loss": 0.0787, - "step": 3048 - }, - { - "epoch": 0.5198497482483949, - "grad_norm": 1.340047836303711, - "learning_rate": 4.801364023870418e-06, - "loss": 0.1833, - "step": 3049 - }, - { - "epoch": 0.5200202466899326, - "grad_norm": 0.9516338109970093, - "learning_rate": 4.7996589940323966e-06, - "loss": 0.159, - "step": 3050 - }, - { - "epoch": 0.5201907451314702, - "grad_norm": 0.9492541551589966, - "learning_rate": 4.797953964194374e-06, - "loss": 0.1062, - "step": 3051 - }, - { - "epoch": 0.520361243573008, - "grad_norm": 0.9833723306655884, - "learning_rate": 4.796248934356352e-06, - "loss": 0.137, - "step": 3052 - }, - { - "epoch": 0.5205317420145457, - "grad_norm": 0.9533904790878296, - "learning_rate": 4.7945439045183296e-06, - "loss": 0.1134, - "step": 3053 - }, - { - "epoch": 0.5207022404560834, - "grad_norm": 1.4486286640167236, - "learning_rate": 4.7928388746803075e-06, - "loss": 0.193, - "step": 3054 - }, - { - "epoch": 0.520872738897621, - "grad_norm": 1.293747901916504, - "learning_rate": 4.7911338448422854e-06, - "loss": 0.0931, - "step": 3055 - }, - { - "epoch": 0.5210432373391587, - "grad_norm": 0.9425202012062073, - "learning_rate": 4.7894288150042626e-06, - "loss": 0.0833, - "step": 3056 - }, - { - "epoch": 0.5212137357806964, - "grad_norm": 1.0497663021087646, - "learning_rate": 4.7877237851662405e-06, - "loss": 0.1519, - "step": 3057 - }, - { - "epoch": 0.521384234222234, - "grad_norm": 1.0354875326156616, - "learning_rate": 4.786018755328219e-06, - "loss": 0.0838, - "step": 3058 - }, - { - "epoch": 0.5215547326637717, - "grad_norm": 1.1730378866195679, - "learning_rate": 4.784313725490196e-06, - "loss": 0.151, - "step": 3059 - }, - { - "epoch": 0.5217252311053094, - "grad_norm": 2.1272621154785156, - "learning_rate": 4.782608695652174e-06, - "loss": 0.2293, - "step": 3060 - }, - { - "epoch": 0.5218957295468472, - "grad_norm": 1.4439617395401, - "learning_rate": 4.780903665814152e-06, - "loss": 0.1704, - "step": 3061 - }, - { - "epoch": 0.5220662279883848, - "grad_norm": 1.2619868516921997, - "learning_rate": 4.77919863597613e-06, - "loss": 0.1558, - "step": 3062 - }, - { - "epoch": 0.5222367264299225, - "grad_norm": 2.5501723289489746, - "learning_rate": 4.777493606138107e-06, - "loss": 0.2912, - "step": 3063 - }, - { - "epoch": 0.5224072248714602, - "grad_norm": 1.7127344608306885, - "learning_rate": 4.775788576300085e-06, - "loss": 0.2276, - "step": 3064 - }, - { - "epoch": 0.5225777233129978, - "grad_norm": 0.9259654879570007, - "learning_rate": 4.774083546462063e-06, - "loss": 0.1232, - "step": 3065 - }, - { - "epoch": 0.5227482217545355, - "grad_norm": 1.1065436601638794, - "learning_rate": 4.772378516624041e-06, - "loss": 0.1829, - "step": 3066 - }, - { - "epoch": 0.5229187201960732, - "grad_norm": 1.0913331508636475, - "learning_rate": 4.770673486786019e-06, - "loss": 0.1666, - "step": 3067 - }, - { - "epoch": 0.5230892186376109, - "grad_norm": 1.3781788349151611, - "learning_rate": 4.768968456947997e-06, - "loss": 0.1954, - "step": 3068 - }, - { - "epoch": 0.5232597170791485, - "grad_norm": 1.0182466506958008, - "learning_rate": 4.767263427109975e-06, - "loss": 0.1347, - "step": 3069 - }, - { - "epoch": 0.5234302155206862, - "grad_norm": 1.5784564018249512, - "learning_rate": 4.765558397271953e-06, - "loss": 0.1382, - "step": 3070 - }, - { - "epoch": 0.523600713962224, - "grad_norm": 1.2984943389892578, - "learning_rate": 4.76385336743393e-06, - "loss": 0.0943, - "step": 3071 - }, - { - "epoch": 0.5237712124037617, - "grad_norm": 1.1550954580307007, - "learning_rate": 4.762148337595908e-06, - "loss": 0.1466, - "step": 3072 - }, - { - "epoch": 0.5239417108452993, - "grad_norm": 1.4043045043945312, - "learning_rate": 4.760443307757886e-06, - "loss": 0.1147, - "step": 3073 - }, - { - "epoch": 0.524112209286837, - "grad_norm": 1.1770408153533936, - "learning_rate": 4.758738277919864e-06, - "loss": 0.168, - "step": 3074 - }, - { - "epoch": 0.5242827077283747, - "grad_norm": 0.9252126812934875, - "learning_rate": 4.757033248081842e-06, - "loss": 0.153, - "step": 3075 - }, - { - "epoch": 0.5244532061699123, - "grad_norm": 1.0992802381515503, - "learning_rate": 4.75532821824382e-06, - "loss": 0.1047, - "step": 3076 - }, - { - "epoch": 0.52462370461145, - "grad_norm": 1.1755380630493164, - "learning_rate": 4.753623188405798e-06, - "loss": 0.0625, - "step": 3077 - }, - { - "epoch": 0.5247942030529877, - "grad_norm": 1.2509602308273315, - "learning_rate": 4.751918158567776e-06, - "loss": 0.179, - "step": 3078 - }, - { - "epoch": 0.5249647014945255, - "grad_norm": 1.23047935962677, - "learning_rate": 4.750213128729753e-06, - "loss": 0.1415, - "step": 3079 - }, - { - "epoch": 0.5251351999360631, - "grad_norm": 1.2384694814682007, - "learning_rate": 4.748508098891731e-06, - "loss": 0.166, - "step": 3080 - }, - { - "epoch": 0.5253056983776008, - "grad_norm": 0.8851529359817505, - "learning_rate": 4.746803069053709e-06, - "loss": 0.105, - "step": 3081 - }, - { - "epoch": 0.5254761968191385, - "grad_norm": 0.9816237688064575, - "learning_rate": 4.745098039215687e-06, - "loss": 0.1696, - "step": 3082 - }, - { - "epoch": 0.5256466952606761, - "grad_norm": 1.4306472539901733, - "learning_rate": 4.743393009377665e-06, - "loss": 0.1873, - "step": 3083 - }, - { - "epoch": 0.5258171937022138, - "grad_norm": 0.8195698857307434, - "learning_rate": 4.7416879795396425e-06, - "loss": 0.1025, - "step": 3084 - }, - { - "epoch": 0.5259876921437515, - "grad_norm": 0.8986068367958069, - "learning_rate": 4.7399829497016205e-06, - "loss": 0.1152, - "step": 3085 - }, - { - "epoch": 0.5261581905852892, - "grad_norm": 1.3748445510864258, - "learning_rate": 4.7382779198635984e-06, - "loss": 0.1902, - "step": 3086 - }, - { - "epoch": 0.5263286890268268, - "grad_norm": 0.8912947773933411, - "learning_rate": 4.7365728900255755e-06, - "loss": 0.0736, - "step": 3087 - }, - { - "epoch": 0.5264991874683645, - "grad_norm": 1.600932002067566, - "learning_rate": 4.7348678601875535e-06, - "loss": 0.2012, - "step": 3088 - }, - { - "epoch": 0.5266696859099023, - "grad_norm": 1.0017476081848145, - "learning_rate": 4.7331628303495314e-06, - "loss": 0.1378, - "step": 3089 - }, - { - "epoch": 0.5268401843514399, - "grad_norm": 0.7620334029197693, - "learning_rate": 4.731457800511509e-06, - "loss": 0.0362, - "step": 3090 - }, - { - "epoch": 0.5270106827929776, - "grad_norm": 1.2506636381149292, - "learning_rate": 4.729752770673487e-06, - "loss": 0.1245, - "step": 3091 - }, - { - "epoch": 0.5271811812345153, - "grad_norm": 1.176638126373291, - "learning_rate": 4.728047740835465e-06, - "loss": 0.1591, - "step": 3092 - }, - { - "epoch": 0.527351679676053, - "grad_norm": 0.878821611404419, - "learning_rate": 4.726342710997443e-06, - "loss": 0.0863, - "step": 3093 - }, - { - "epoch": 0.5275221781175906, - "grad_norm": 1.617058515548706, - "learning_rate": 4.724637681159421e-06, - "loss": 0.1962, - "step": 3094 - }, - { - "epoch": 0.5276926765591283, - "grad_norm": 0.9386082887649536, - "learning_rate": 4.722932651321398e-06, - "loss": 0.0925, - "step": 3095 - }, - { - "epoch": 0.527863175000666, - "grad_norm": 1.0812885761260986, - "learning_rate": 4.721227621483376e-06, - "loss": 0.0651, - "step": 3096 - }, - { - "epoch": 0.5280336734422036, - "grad_norm": 0.925205409526825, - "learning_rate": 4.719522591645354e-06, - "loss": 0.0984, - "step": 3097 - }, - { - "epoch": 0.5282041718837414, - "grad_norm": 1.1007323265075684, - "learning_rate": 4.717817561807332e-06, - "loss": 0.1695, - "step": 3098 - }, - { - "epoch": 0.5283746703252791, - "grad_norm": 0.8048510551452637, - "learning_rate": 4.716112531969309e-06, - "loss": 0.0666, - "step": 3099 - }, - { - "epoch": 0.5285451687668168, - "grad_norm": 0.813273549079895, - "learning_rate": 4.714407502131288e-06, - "loss": 0.0776, - "step": 3100 - }, - { - "epoch": 0.5285451687668168, - "eval_f1_score": 0.3657957244655582, - "eval_loss": 0.1428575962781906, - "eval_runtime": 182.6607, - "eval_samples_per_second": 54.746, - "eval_steps_per_second": 3.422, - "step": 3100 - }, - { - "epoch": 0.5287156672083544, - "grad_norm": 1.064518928527832, - "learning_rate": 4.712702472293266e-06, - "loss": 0.1235, - "step": 3101 - }, - { - "epoch": 0.5288861656498921, - "grad_norm": 1.4153354167938232, - "learning_rate": 4.710997442455244e-06, - "loss": 0.2323, - "step": 3102 - }, - { - "epoch": 0.5290566640914298, - "grad_norm": 1.1358619928359985, - "learning_rate": 4.709292412617221e-06, - "loss": 0.1239, - "step": 3103 - }, - { - "epoch": 0.5292271625329674, - "grad_norm": 1.07460618019104, - "learning_rate": 4.707587382779199e-06, - "loss": 0.1017, - "step": 3104 - }, - { - "epoch": 0.5293976609745051, - "grad_norm": 1.2761073112487793, - "learning_rate": 4.705882352941177e-06, - "loss": 0.1273, - "step": 3105 - }, - { - "epoch": 0.5295681594160428, - "grad_norm": 1.2396087646484375, - "learning_rate": 4.704177323103155e-06, - "loss": 0.1489, - "step": 3106 - }, - { - "epoch": 0.5297386578575806, - "grad_norm": 1.3469845056533813, - "learning_rate": 4.702472293265132e-06, - "loss": 0.1379, - "step": 3107 - }, - { - "epoch": 0.5299091562991182, - "grad_norm": 0.8891554474830627, - "learning_rate": 4.70076726342711e-06, - "loss": 0.0653, - "step": 3108 - }, - { - "epoch": 0.5300796547406559, - "grad_norm": 1.4598875045776367, - "learning_rate": 4.699062233589089e-06, - "loss": 0.2002, - "step": 3109 - }, - { - "epoch": 0.5302501531821936, - "grad_norm": 0.964966893196106, - "learning_rate": 4.697357203751066e-06, - "loss": 0.1214, - "step": 3110 - }, - { - "epoch": 0.5304206516237312, - "grad_norm": 0.827178418636322, - "learning_rate": 4.695652173913044e-06, - "loss": 0.0768, - "step": 3111 - }, - { - "epoch": 0.5305911500652689, - "grad_norm": 1.1814799308776855, - "learning_rate": 4.693947144075022e-06, - "loss": 0.1719, - "step": 3112 - }, - { - "epoch": 0.5307616485068066, - "grad_norm": 1.0025556087493896, - "learning_rate": 4.692242114237e-06, - "loss": 0.1409, - "step": 3113 - }, - { - "epoch": 0.5309321469483443, - "grad_norm": 0.6644530892372131, - "learning_rate": 4.690537084398978e-06, - "loss": 0.063, - "step": 3114 - }, - { - "epoch": 0.5311026453898819, - "grad_norm": 1.0593664646148682, - "learning_rate": 4.688832054560955e-06, - "loss": 0.1329, - "step": 3115 - }, - { - "epoch": 0.5312731438314197, - "grad_norm": 1.1395896673202515, - "learning_rate": 4.687127024722933e-06, - "loss": 0.114, - "step": 3116 - }, - { - "epoch": 0.5314436422729574, - "grad_norm": 1.1389448642730713, - "learning_rate": 4.6854219948849114e-06, - "loss": 0.0903, - "step": 3117 - }, - { - "epoch": 0.5316141407144951, - "grad_norm": 0.9166980981826782, - "learning_rate": 4.6837169650468885e-06, - "loss": 0.1301, - "step": 3118 - }, - { - "epoch": 0.5317846391560327, - "grad_norm": 1.4888975620269775, - "learning_rate": 4.6820119352088665e-06, - "loss": 0.1246, - "step": 3119 - }, - { - "epoch": 0.5319551375975704, - "grad_norm": 0.9784233570098877, - "learning_rate": 4.6803069053708444e-06, - "loss": 0.074, - "step": 3120 - }, - { - "epoch": 0.5321256360391081, - "grad_norm": 1.2400338649749756, - "learning_rate": 4.678601875532822e-06, - "loss": 0.1186, - "step": 3121 - }, - { - "epoch": 0.5322961344806457, - "grad_norm": 1.0570576190948486, - "learning_rate": 4.6768968456948e-06, - "loss": 0.1189, - "step": 3122 - }, - { - "epoch": 0.5324666329221834, - "grad_norm": 0.9685819745063782, - "learning_rate": 4.675191815856777e-06, - "loss": 0.0836, - "step": 3123 - }, - { - "epoch": 0.5326371313637212, - "grad_norm": 0.8460286259651184, - "learning_rate": 4.673486786018755e-06, - "loss": 0.0361, - "step": 3124 - }, - { - "epoch": 0.5328076298052589, - "grad_norm": 1.683355689048767, - "learning_rate": 4.671781756180734e-06, - "loss": 0.1801, - "step": 3125 - }, - { - "epoch": 0.5329781282467965, - "grad_norm": 0.8989179134368896, - "learning_rate": 4.670076726342711e-06, - "loss": 0.0931, - "step": 3126 - }, - { - "epoch": 0.5331486266883342, - "grad_norm": 1.4232580661773682, - "learning_rate": 4.668371696504689e-06, - "loss": 0.1033, - "step": 3127 - }, - { - "epoch": 0.5333191251298719, - "grad_norm": 1.0907427072525024, - "learning_rate": 4.666666666666667e-06, - "loss": 0.0894, - "step": 3128 - }, - { - "epoch": 0.5334896235714095, - "grad_norm": 0.8971290588378906, - "learning_rate": 4.664961636828645e-06, - "loss": 0.1184, - "step": 3129 - }, - { - "epoch": 0.5336601220129472, - "grad_norm": 1.0233826637268066, - "learning_rate": 4.663256606990623e-06, - "loss": 0.0814, - "step": 3130 - }, - { - "epoch": 0.5338306204544849, - "grad_norm": 2.420344591140747, - "learning_rate": 4.6615515771526e-06, - "loss": 0.2647, - "step": 3131 - }, - { - "epoch": 0.5340011188960226, - "grad_norm": 1.550612211227417, - "learning_rate": 4.659846547314578e-06, - "loss": 0.1122, - "step": 3132 - }, - { - "epoch": 0.5341716173375602, - "grad_norm": 1.053017258644104, - "learning_rate": 4.658141517476556e-06, - "loss": 0.0879, - "step": 3133 - }, - { - "epoch": 0.534342115779098, - "grad_norm": 0.7617818117141724, - "learning_rate": 4.656436487638534e-06, - "loss": 0.0668, - "step": 3134 - }, - { - "epoch": 0.5345126142206357, - "grad_norm": 2.0507922172546387, - "learning_rate": 4.654731457800512e-06, - "loss": 0.2682, - "step": 3135 - }, - { - "epoch": 0.5346831126621733, - "grad_norm": 1.3223862648010254, - "learning_rate": 4.65302642796249e-06, - "loss": 0.1969, - "step": 3136 - }, - { - "epoch": 0.534853611103711, - "grad_norm": 0.9410365223884583, - "learning_rate": 4.651321398124468e-06, - "loss": 0.0857, - "step": 3137 - }, - { - "epoch": 0.5350241095452487, - "grad_norm": 1.1620447635650635, - "learning_rate": 4.649616368286445e-06, - "loss": 0.1836, - "step": 3138 - }, - { - "epoch": 0.5351946079867864, - "grad_norm": 1.2213563919067383, - "learning_rate": 4.647911338448423e-06, - "loss": 0.1202, - "step": 3139 - }, - { - "epoch": 0.535365106428324, - "grad_norm": 1.7266759872436523, - "learning_rate": 4.646206308610401e-06, - "loss": 0.2072, - "step": 3140 - }, - { - "epoch": 0.5355356048698617, - "grad_norm": 1.4056097269058228, - "learning_rate": 4.644501278772379e-06, - "loss": 0.2147, - "step": 3141 - }, - { - "epoch": 0.5357061033113995, - "grad_norm": 1.0043752193450928, - "learning_rate": 4.642796248934357e-06, - "loss": 0.1131, - "step": 3142 - }, - { - "epoch": 0.535876601752937, - "grad_norm": 0.7852779030799866, - "learning_rate": 4.641091219096335e-06, - "loss": 0.0757, - "step": 3143 - }, - { - "epoch": 0.5360471001944748, - "grad_norm": 1.6596177816390991, - "learning_rate": 4.639386189258313e-06, - "loss": 0.2113, - "step": 3144 - }, - { - "epoch": 0.5362175986360125, - "grad_norm": 1.0587300062179565, - "learning_rate": 4.637681159420291e-06, - "loss": 0.1213, - "step": 3145 - }, - { - "epoch": 0.5363880970775502, - "grad_norm": 1.4061554670333862, - "learning_rate": 4.635976129582268e-06, - "loss": 0.0821, - "step": 3146 - }, - { - "epoch": 0.5365585955190878, - "grad_norm": 1.5772520303726196, - "learning_rate": 4.634271099744246e-06, - "loss": 0.2397, - "step": 3147 - }, - { - "epoch": 0.5367290939606255, - "grad_norm": 1.2124773263931274, - "learning_rate": 4.6325660699062236e-06, - "loss": 0.208, - "step": 3148 - }, - { - "epoch": 0.5368995924021632, - "grad_norm": 1.1358237266540527, - "learning_rate": 4.6308610400682015e-06, - "loss": 0.1777, - "step": 3149 - }, - { - "epoch": 0.5370700908437008, - "grad_norm": 0.6872425675392151, - "learning_rate": 4.6291560102301795e-06, - "loss": 0.0927, - "step": 3150 - }, - { - "epoch": 0.5372405892852385, - "grad_norm": 1.0333954095840454, - "learning_rate": 4.627450980392157e-06, - "loss": 0.046, - "step": 3151 - }, - { - "epoch": 0.5374110877267763, - "grad_norm": 1.1037591695785522, - "learning_rate": 4.625745950554135e-06, - "loss": 0.1501, - "step": 3152 - }, - { - "epoch": 0.537581586168314, - "grad_norm": 0.85722416639328, - "learning_rate": 4.624040920716113e-06, - "loss": 0.1153, - "step": 3153 - }, - { - "epoch": 0.5377520846098516, - "grad_norm": 1.087350606918335, - "learning_rate": 4.62233589087809e-06, - "loss": 0.1529, - "step": 3154 - }, - { - "epoch": 0.5379225830513893, - "grad_norm": 0.8361851572990417, - "learning_rate": 4.620630861040068e-06, - "loss": 0.0732, - "step": 3155 - }, - { - "epoch": 0.538093081492927, - "grad_norm": 1.204031229019165, - "learning_rate": 4.618925831202046e-06, - "loss": 0.1681, - "step": 3156 - }, - { - "epoch": 0.5382635799344646, - "grad_norm": 0.6141670346260071, - "learning_rate": 4.617220801364024e-06, - "loss": 0.0487, - "step": 3157 - }, - { - "epoch": 0.5384340783760023, - "grad_norm": 0.8045641779899597, - "learning_rate": 4.615515771526002e-06, - "loss": 0.0821, - "step": 3158 - }, - { - "epoch": 0.53860457681754, - "grad_norm": 1.0021306276321411, - "learning_rate": 4.61381074168798e-06, - "loss": 0.1144, - "step": 3159 - }, - { - "epoch": 0.5387750752590778, - "grad_norm": 1.6075087785720825, - "learning_rate": 4.612105711849958e-06, - "loss": 0.1496, - "step": 3160 - }, - { - "epoch": 0.5389455737006154, - "grad_norm": 0.6130344271659851, - "learning_rate": 4.610400682011936e-06, - "loss": 0.0743, - "step": 3161 - }, - { - "epoch": 0.5391160721421531, - "grad_norm": 0.8822925090789795, - "learning_rate": 4.608695652173913e-06, - "loss": 0.1014, - "step": 3162 - }, - { - "epoch": 0.5392865705836908, - "grad_norm": 1.3176257610321045, - "learning_rate": 4.606990622335891e-06, - "loss": 0.1362, - "step": 3163 - }, - { - "epoch": 0.5394570690252284, - "grad_norm": 1.2960896492004395, - "learning_rate": 4.605285592497869e-06, - "loss": 0.1509, - "step": 3164 - }, - { - "epoch": 0.5396275674667661, - "grad_norm": 1.0878572463989258, - "learning_rate": 4.603580562659847e-06, - "loss": 0.1354, - "step": 3165 - }, - { - "epoch": 0.5397980659083038, - "grad_norm": 1.2113642692565918, - "learning_rate": 4.601875532821825e-06, - "loss": 0.1624, - "step": 3166 - }, - { - "epoch": 0.5399685643498415, - "grad_norm": 0.9236718416213989, - "learning_rate": 4.600170502983803e-06, - "loss": 0.108, - "step": 3167 - }, - { - "epoch": 0.5401390627913791, - "grad_norm": 1.6693158149719238, - "learning_rate": 4.598465473145781e-06, - "loss": 0.1436, - "step": 3168 - }, - { - "epoch": 0.5403095612329168, - "grad_norm": 1.2242648601531982, - "learning_rate": 4.596760443307759e-06, - "loss": 0.1203, - "step": 3169 - }, - { - "epoch": 0.5404800596744546, - "grad_norm": 1.18451988697052, - "learning_rate": 4.595055413469736e-06, - "loss": 0.1556, - "step": 3170 - }, - { - "epoch": 0.5406505581159923, - "grad_norm": 1.312850832939148, - "learning_rate": 4.593350383631714e-06, - "loss": 0.1569, - "step": 3171 - }, - { - "epoch": 0.5408210565575299, - "grad_norm": 0.8044204115867615, - "learning_rate": 4.591645353793692e-06, - "loss": 0.1119, - "step": 3172 - }, - { - "epoch": 0.5409915549990676, - "grad_norm": 1.1342873573303223, - "learning_rate": 4.58994032395567e-06, - "loss": 0.0958, - "step": 3173 - }, - { - "epoch": 0.5411620534406053, - "grad_norm": 0.992530107498169, - "learning_rate": 4.588235294117647e-06, - "loss": 0.0664, - "step": 3174 - }, - { - "epoch": 0.5413325518821429, - "grad_norm": 1.0137033462524414, - "learning_rate": 4.586530264279625e-06, - "loss": 0.0971, - "step": 3175 - }, - { - "epoch": 0.5415030503236806, - "grad_norm": 1.353960633277893, - "learning_rate": 4.5848252344416036e-06, - "loss": 0.1803, - "step": 3176 - }, - { - "epoch": 0.5416735487652183, - "grad_norm": 0.8238753080368042, - "learning_rate": 4.583120204603581e-06, - "loss": 0.0998, - "step": 3177 - }, - { - "epoch": 0.541844047206756, - "grad_norm": 1.0195826292037964, - "learning_rate": 4.581415174765559e-06, - "loss": 0.0941, - "step": 3178 - }, - { - "epoch": 0.5420145456482937, - "grad_norm": 1.0271581411361694, - "learning_rate": 4.5797101449275366e-06, - "loss": 0.1519, - "step": 3179 - }, - { - "epoch": 0.5421850440898314, - "grad_norm": 1.2750300168991089, - "learning_rate": 4.5780051150895145e-06, - "loss": 0.1654, - "step": 3180 - }, - { - "epoch": 0.5423555425313691, - "grad_norm": 1.4126577377319336, - "learning_rate": 4.5763000852514925e-06, - "loss": 0.244, - "step": 3181 - }, - { - "epoch": 0.5425260409729067, - "grad_norm": 1.528464913368225, - "learning_rate": 4.5745950554134696e-06, - "loss": 0.1097, - "step": 3182 - }, - { - "epoch": 0.5426965394144444, - "grad_norm": 1.406211018562317, - "learning_rate": 4.5728900255754475e-06, - "loss": 0.1076, - "step": 3183 - }, - { - "epoch": 0.5428670378559821, - "grad_norm": 1.0801411867141724, - "learning_rate": 4.571184995737426e-06, - "loss": 0.1189, - "step": 3184 - }, - { - "epoch": 0.5430375362975198, - "grad_norm": 0.8011865019798279, - "learning_rate": 4.569479965899403e-06, - "loss": 0.1224, - "step": 3185 - }, - { - "epoch": 0.5432080347390574, - "grad_norm": 1.7127665281295776, - "learning_rate": 4.567774936061381e-06, - "loss": 0.2387, - "step": 3186 - }, - { - "epoch": 0.5433785331805951, - "grad_norm": 1.7253592014312744, - "learning_rate": 4.566069906223359e-06, - "loss": 0.2265, - "step": 3187 - }, - { - "epoch": 0.5435490316221329, - "grad_norm": 1.0014303922653198, - "learning_rate": 4.564364876385337e-06, - "loss": 0.1081, - "step": 3188 - }, - { - "epoch": 0.5437195300636705, - "grad_norm": 0.881578803062439, - "learning_rate": 4.562659846547315e-06, - "loss": 0.079, - "step": 3189 - }, - { - "epoch": 0.5438900285052082, - "grad_norm": 1.217948079109192, - "learning_rate": 4.560954816709292e-06, - "loss": 0.1245, - "step": 3190 - }, - { - "epoch": 0.5440605269467459, - "grad_norm": 1.711934208869934, - "learning_rate": 4.55924978687127e-06, - "loss": 0.2053, - "step": 3191 - }, - { - "epoch": 0.5442310253882836, - "grad_norm": 1.4666311740875244, - "learning_rate": 4.557544757033249e-06, - "loss": 0.1972, - "step": 3192 - }, - { - "epoch": 0.5444015238298212, - "grad_norm": 0.7553595900535583, - "learning_rate": 4.555839727195226e-06, - "loss": 0.0775, - "step": 3193 - }, - { - "epoch": 0.5445720222713589, - "grad_norm": 1.167011022567749, - "learning_rate": 4.554134697357204e-06, - "loss": 0.1076, - "step": 3194 - }, - { - "epoch": 0.5447425207128966, - "grad_norm": 1.2989052534103394, - "learning_rate": 4.552429667519182e-06, - "loss": 0.2399, - "step": 3195 - }, - { - "epoch": 0.5449130191544342, - "grad_norm": 1.0434139966964722, - "learning_rate": 4.55072463768116e-06, - "loss": 0.0606, - "step": 3196 - }, - { - "epoch": 0.545083517595972, - "grad_norm": 0.8419189453125, - "learning_rate": 4.549019607843138e-06, - "loss": 0.0666, - "step": 3197 - }, - { - "epoch": 0.5452540160375097, - "grad_norm": 1.121074914932251, - "learning_rate": 4.547314578005115e-06, - "loss": 0.1093, - "step": 3198 - }, - { - "epoch": 0.5454245144790474, - "grad_norm": 0.7801600098609924, - "learning_rate": 4.545609548167093e-06, - "loss": 0.1091, - "step": 3199 - }, - { - "epoch": 0.545595012920585, - "grad_norm": 1.791938066482544, - "learning_rate": 4.543904518329071e-06, - "loss": 0.118, - "step": 3200 - }, - { - "epoch": 0.545595012920585, - "eval_f1_score": 0.35294117647058826, - "eval_loss": 0.13997682929039001, - "eval_runtime": 182.7002, - "eval_samples_per_second": 54.734, - "eval_steps_per_second": 3.421, - "step": 3200 - }, - { - "epoch": 0.5457655113621227, - "grad_norm": 0.9591838121414185, - "learning_rate": 4.542199488491049e-06, - "loss": 0.1401, - "step": 3201 - }, - { - "epoch": 0.5459360098036604, - "grad_norm": 0.9607254266738892, - "learning_rate": 4.540494458653027e-06, - "loss": 0.1178, - "step": 3202 - }, - { - "epoch": 0.546106508245198, - "grad_norm": 1.1044963598251343, - "learning_rate": 4.538789428815005e-06, - "loss": 0.1426, - "step": 3203 - }, - { - "epoch": 0.5462770066867357, - "grad_norm": 0.8686749935150146, - "learning_rate": 4.537084398976983e-06, - "loss": 0.098, - "step": 3204 - }, - { - "epoch": 0.5464475051282734, - "grad_norm": 1.3227715492248535, - "learning_rate": 4.535379369138961e-06, - "loss": 0.1531, - "step": 3205 - }, - { - "epoch": 0.5466180035698112, - "grad_norm": 1.3910294771194458, - "learning_rate": 4.533674339300938e-06, - "loss": 0.1657, - "step": 3206 - }, - { - "epoch": 0.5467885020113488, - "grad_norm": 0.758912205696106, - "learning_rate": 4.531969309462916e-06, - "loss": 0.0972, - "step": 3207 - }, - { - "epoch": 0.5469590004528865, - "grad_norm": 1.038732647895813, - "learning_rate": 4.530264279624894e-06, - "loss": 0.1369, - "step": 3208 - }, - { - "epoch": 0.5471294988944242, - "grad_norm": 1.2257394790649414, - "learning_rate": 4.528559249786872e-06, - "loss": 0.1925, - "step": 3209 - }, - { - "epoch": 0.5472999973359618, - "grad_norm": 0.6908015012741089, - "learning_rate": 4.5268542199488496e-06, - "loss": 0.1089, - "step": 3210 - }, - { - "epoch": 0.5474704957774995, - "grad_norm": 2.2734169960021973, - "learning_rate": 4.5251491901108275e-06, - "loss": 0.24, - "step": 3211 - }, - { - "epoch": 0.5476409942190372, - "grad_norm": 0.8731111884117126, - "learning_rate": 4.5234441602728055e-06, - "loss": 0.0902, - "step": 3212 - }, - { - "epoch": 0.5478114926605749, - "grad_norm": 1.2739516496658325, - "learning_rate": 4.5217391304347826e-06, - "loss": 0.1523, - "step": 3213 - }, - { - "epoch": 0.5479819911021125, - "grad_norm": 1.1364442110061646, - "learning_rate": 4.5200341005967605e-06, - "loss": 0.112, - "step": 3214 - }, - { - "epoch": 0.5481524895436503, - "grad_norm": 1.0354875326156616, - "learning_rate": 4.5183290707587384e-06, - "loss": 0.1174, - "step": 3215 - }, - { - "epoch": 0.548322987985188, - "grad_norm": 0.9430590271949768, - "learning_rate": 4.516624040920716e-06, - "loss": 0.1376, - "step": 3216 - }, - { - "epoch": 0.5484934864267257, - "grad_norm": 1.5261237621307373, - "learning_rate": 4.514919011082694e-06, - "loss": 0.1735, - "step": 3217 - }, - { - "epoch": 0.5486639848682633, - "grad_norm": 1.2810159921646118, - "learning_rate": 4.513213981244672e-06, - "loss": 0.1195, - "step": 3218 - }, - { - "epoch": 0.548834483309801, - "grad_norm": 1.7963886260986328, - "learning_rate": 4.51150895140665e-06, - "loss": 0.1406, - "step": 3219 - }, - { - "epoch": 0.5490049817513387, - "grad_norm": 1.2858192920684814, - "learning_rate": 4.509803921568628e-06, - "loss": 0.1395, - "step": 3220 - }, - { - "epoch": 0.5491754801928763, - "grad_norm": 0.7192848324775696, - "learning_rate": 4.508098891730605e-06, - "loss": 0.0846, - "step": 3221 - }, - { - "epoch": 0.549345978634414, - "grad_norm": 1.022417426109314, - "learning_rate": 4.506393861892583e-06, - "loss": 0.0808, - "step": 3222 - }, - { - "epoch": 0.5495164770759517, - "grad_norm": 1.14131760597229, - "learning_rate": 4.504688832054561e-06, - "loss": 0.142, - "step": 3223 - }, - { - "epoch": 0.5496869755174895, - "grad_norm": 0.8375114798545837, - "learning_rate": 4.502983802216539e-06, - "loss": 0.0939, - "step": 3224 - }, - { - "epoch": 0.5498574739590271, - "grad_norm": 1.2288278341293335, - "learning_rate": 4.501278772378517e-06, - "loss": 0.1465, - "step": 3225 - }, - { - "epoch": 0.5500279724005648, - "grad_norm": 1.3353421688079834, - "learning_rate": 4.499573742540495e-06, - "loss": 0.1193, - "step": 3226 - }, - { - "epoch": 0.5501984708421025, - "grad_norm": 1.305950403213501, - "learning_rate": 4.497868712702473e-06, - "loss": 0.1002, - "step": 3227 - }, - { - "epoch": 0.5503689692836401, - "grad_norm": 1.2217447757720947, - "learning_rate": 4.496163682864451e-06, - "loss": 0.1215, - "step": 3228 - }, - { - "epoch": 0.5505394677251778, - "grad_norm": 1.4431780576705933, - "learning_rate": 4.494458653026428e-06, - "loss": 0.1579, - "step": 3229 - }, - { - "epoch": 0.5507099661667155, - "grad_norm": 0.9493196606636047, - "learning_rate": 4.492753623188406e-06, - "loss": 0.0546, - "step": 3230 - }, - { - "epoch": 0.5508804646082532, - "grad_norm": 1.1164307594299316, - "learning_rate": 4.491048593350384e-06, - "loss": 0.1236, - "step": 3231 - }, - { - "epoch": 0.5510509630497908, - "grad_norm": 1.8813072443008423, - "learning_rate": 4.489343563512362e-06, - "loss": 0.1998, - "step": 3232 - }, - { - "epoch": 0.5512214614913286, - "grad_norm": 0.7001495957374573, - "learning_rate": 4.48763853367434e-06, - "loss": 0.1295, - "step": 3233 - }, - { - "epoch": 0.5513919599328663, - "grad_norm": 0.6423521041870117, - "learning_rate": 4.485933503836318e-06, - "loss": 0.0689, - "step": 3234 - }, - { - "epoch": 0.5515624583744039, - "grad_norm": 0.7978529930114746, - "learning_rate": 4.484228473998296e-06, - "loss": 0.074, - "step": 3235 - }, - { - "epoch": 0.5517329568159416, - "grad_norm": 0.6903978586196899, - "learning_rate": 4.482523444160274e-06, - "loss": 0.0555, - "step": 3236 - }, - { - "epoch": 0.5519034552574793, - "grad_norm": 1.7434931993484497, - "learning_rate": 4.480818414322251e-06, - "loss": 0.1858, - "step": 3237 - }, - { - "epoch": 0.552073953699017, - "grad_norm": 0.8836773037910461, - "learning_rate": 4.479113384484229e-06, - "loss": 0.1221, - "step": 3238 - }, - { - "epoch": 0.5522444521405546, - "grad_norm": 1.0610016584396362, - "learning_rate": 4.477408354646207e-06, - "loss": 0.1436, - "step": 3239 - }, - { - "epoch": 0.5524149505820923, - "grad_norm": 1.3645976781845093, - "learning_rate": 4.475703324808185e-06, - "loss": 0.1772, - "step": 3240 - }, - { - "epoch": 0.55258544902363, - "grad_norm": 2.251723527908325, - "learning_rate": 4.4739982949701625e-06, - "loss": 0.2335, - "step": 3241 - }, - { - "epoch": 0.5527559474651677, - "grad_norm": 1.4421985149383545, - "learning_rate": 4.47229326513214e-06, - "loss": 0.1223, - "step": 3242 - }, - { - "epoch": 0.5529264459067054, - "grad_norm": 1.719007968902588, - "learning_rate": 4.4705882352941184e-06, - "loss": 0.1606, - "step": 3243 - }, - { - "epoch": 0.5530969443482431, - "grad_norm": 0.9367814064025879, - "learning_rate": 4.468883205456096e-06, - "loss": 0.0743, - "step": 3244 - }, - { - "epoch": 0.5532674427897808, - "grad_norm": 1.8511252403259277, - "learning_rate": 4.4671781756180735e-06, - "loss": 0.2017, - "step": 3245 - }, - { - "epoch": 0.5534379412313184, - "grad_norm": 0.7525725364685059, - "learning_rate": 4.4654731457800514e-06, - "loss": 0.0691, - "step": 3246 - }, - { - "epoch": 0.5536084396728561, - "grad_norm": 0.9518800377845764, - "learning_rate": 4.463768115942029e-06, - "loss": 0.1091, - "step": 3247 - }, - { - "epoch": 0.5537789381143938, - "grad_norm": 1.3286054134368896, - "learning_rate": 4.462063086104007e-06, - "loss": 0.1454, - "step": 3248 - }, - { - "epoch": 0.5539494365559314, - "grad_norm": 1.1900709867477417, - "learning_rate": 4.4603580562659844e-06, - "loss": 0.0533, - "step": 3249 - }, - { - "epoch": 0.5541199349974691, - "grad_norm": 1.1784849166870117, - "learning_rate": 4.458653026427962e-06, - "loss": 0.1506, - "step": 3250 - }, - { - "epoch": 0.5542904334390069, - "grad_norm": 1.5197845697402954, - "learning_rate": 4.456947996589941e-06, - "loss": 0.1212, - "step": 3251 - }, - { - "epoch": 0.5544609318805446, - "grad_norm": 1.4505349397659302, - "learning_rate": 4.455242966751918e-06, - "loss": 0.163, - "step": 3252 - }, - { - "epoch": 0.5546314303220822, - "grad_norm": 1.64879310131073, - "learning_rate": 4.453537936913896e-06, - "loss": 0.1799, - "step": 3253 - }, - { - "epoch": 0.5548019287636199, - "grad_norm": 1.1190886497497559, - "learning_rate": 4.451832907075874e-06, - "loss": 0.1426, - "step": 3254 - }, - { - "epoch": 0.5549724272051576, - "grad_norm": 0.9179021120071411, - "learning_rate": 4.450127877237852e-06, - "loss": 0.1091, - "step": 3255 - }, - { - "epoch": 0.5551429256466952, - "grad_norm": 1.9500393867492676, - "learning_rate": 4.44842284739983e-06, - "loss": 0.1813, - "step": 3256 - }, - { - "epoch": 0.5553134240882329, - "grad_norm": 1.007033109664917, - "learning_rate": 4.446717817561807e-06, - "loss": 0.078, - "step": 3257 - }, - { - "epoch": 0.5554839225297706, - "grad_norm": 1.0274510383605957, - "learning_rate": 4.445012787723785e-06, - "loss": 0.0701, - "step": 3258 - }, - { - "epoch": 0.5556544209713083, - "grad_norm": 1.0544816255569458, - "learning_rate": 4.443307757885764e-06, - "loss": 0.1107, - "step": 3259 - }, - { - "epoch": 0.555824919412846, - "grad_norm": 1.1101003885269165, - "learning_rate": 4.441602728047741e-06, - "loss": 0.1783, - "step": 3260 - }, - { - "epoch": 0.5559954178543837, - "grad_norm": 1.0594007968902588, - "learning_rate": 4.439897698209719e-06, - "loss": 0.1132, - "step": 3261 - }, - { - "epoch": 0.5561659162959214, - "grad_norm": 1.0110628604888916, - "learning_rate": 4.438192668371697e-06, - "loss": 0.1181, - "step": 3262 - }, - { - "epoch": 0.5563364147374591, - "grad_norm": 1.122523546218872, - "learning_rate": 4.436487638533675e-06, - "loss": 0.1384, - "step": 3263 - }, - { - "epoch": 0.5565069131789967, - "grad_norm": 1.0796430110931396, - "learning_rate": 4.434782608695653e-06, - "loss": 0.118, - "step": 3264 - }, - { - "epoch": 0.5566774116205344, - "grad_norm": 1.0949740409851074, - "learning_rate": 4.43307757885763e-06, - "loss": 0.0754, - "step": 3265 - }, - { - "epoch": 0.5568479100620721, - "grad_norm": 1.1344374418258667, - "learning_rate": 4.431372549019608e-06, - "loss": 0.1355, - "step": 3266 - }, - { - "epoch": 0.5570184085036097, - "grad_norm": 0.9445719718933105, - "learning_rate": 4.429667519181586e-06, - "loss": 0.0758, - "step": 3267 - }, - { - "epoch": 0.5571889069451474, - "grad_norm": 1.1814863681793213, - "learning_rate": 4.427962489343564e-06, - "loss": 0.157, - "step": 3268 - }, - { - "epoch": 0.5573594053866852, - "grad_norm": 1.7171623706817627, - "learning_rate": 4.426257459505542e-06, - "loss": 0.2329, - "step": 3269 - }, - { - "epoch": 0.5575299038282229, - "grad_norm": 2.123250722885132, - "learning_rate": 4.42455242966752e-06, - "loss": 0.2147, - "step": 3270 - }, - { - "epoch": 0.5577004022697605, - "grad_norm": 0.832039475440979, - "learning_rate": 4.422847399829498e-06, - "loss": 0.1344, - "step": 3271 - }, - { - "epoch": 0.5578709007112982, - "grad_norm": 1.1542187929153442, - "learning_rate": 4.4211423699914755e-06, - "loss": 0.1422, - "step": 3272 - }, - { - "epoch": 0.5580413991528359, - "grad_norm": 1.8834720849990845, - "learning_rate": 4.419437340153453e-06, - "loss": 0.1717, - "step": 3273 - }, - { - "epoch": 0.5582118975943735, - "grad_norm": 0.9512043595314026, - "learning_rate": 4.417732310315431e-06, - "loss": 0.1079, - "step": 3274 - }, - { - "epoch": 0.5583823960359112, - "grad_norm": 1.4551664590835571, - "learning_rate": 4.4160272804774085e-06, - "loss": 0.1559, - "step": 3275 - }, - { - "epoch": 0.5585528944774489, - "grad_norm": 1.529512882232666, - "learning_rate": 4.4143222506393865e-06, - "loss": 0.1045, - "step": 3276 - }, - { - "epoch": 0.5587233929189866, - "grad_norm": 1.2470623254776, - "learning_rate": 4.4126172208013644e-06, - "loss": 0.1676, - "step": 3277 - }, - { - "epoch": 0.5588938913605243, - "grad_norm": 1.3405524492263794, - "learning_rate": 4.410912190963342e-06, - "loss": 0.1573, - "step": 3278 - }, - { - "epoch": 0.559064389802062, - "grad_norm": 0.9211791157722473, - "learning_rate": 4.40920716112532e-06, - "loss": 0.1034, - "step": 3279 - }, - { - "epoch": 0.5592348882435997, - "grad_norm": 0.8946516513824463, - "learning_rate": 4.407502131287298e-06, - "loss": 0.0788, - "step": 3280 - }, - { - "epoch": 0.5594053866851373, - "grad_norm": 1.298832893371582, - "learning_rate": 4.405797101449275e-06, - "loss": 0.1464, - "step": 3281 - }, - { - "epoch": 0.559575885126675, - "grad_norm": 0.7750890254974365, - "learning_rate": 4.404092071611253e-06, - "loss": 0.0968, - "step": 3282 - }, - { - "epoch": 0.5597463835682127, - "grad_norm": 1.0168732404708862, - "learning_rate": 4.402387041773231e-06, - "loss": 0.1289, - "step": 3283 - }, - { - "epoch": 0.5599168820097504, - "grad_norm": 0.8322630524635315, - "learning_rate": 4.400682011935209e-06, - "loss": 0.0385, - "step": 3284 - }, - { - "epoch": 0.560087380451288, - "grad_norm": 0.9391579627990723, - "learning_rate": 4.398976982097187e-06, - "loss": 0.0804, - "step": 3285 - }, - { - "epoch": 0.5602578788928257, - "grad_norm": 1.2026317119598389, - "learning_rate": 4.397271952259165e-06, - "loss": 0.1053, - "step": 3286 - }, - { - "epoch": 0.5604283773343635, - "grad_norm": 0.9412278532981873, - "learning_rate": 4.395566922421143e-06, - "loss": 0.0869, - "step": 3287 - }, - { - "epoch": 0.5605988757759011, - "grad_norm": 0.8000788688659668, - "learning_rate": 4.39386189258312e-06, - "loss": 0.0791, - "step": 3288 - }, - { - "epoch": 0.5607693742174388, - "grad_norm": 1.013322353363037, - "learning_rate": 4.392156862745098e-06, - "loss": 0.1263, - "step": 3289 - }, - { - "epoch": 0.5609398726589765, - "grad_norm": 1.9154322147369385, - "learning_rate": 4.390451832907076e-06, - "loss": 0.1904, - "step": 3290 - }, - { - "epoch": 0.5611103711005142, - "grad_norm": 0.8865220546722412, - "learning_rate": 4.388746803069054e-06, - "loss": 0.0427, - "step": 3291 - }, - { - "epoch": 0.5612808695420518, - "grad_norm": 1.1697574853897095, - "learning_rate": 4.387041773231032e-06, - "loss": 0.0839, - "step": 3292 - }, - { - "epoch": 0.5614513679835895, - "grad_norm": 0.6758675575256348, - "learning_rate": 4.38533674339301e-06, - "loss": 0.0544, - "step": 3293 - }, - { - "epoch": 0.5616218664251272, - "grad_norm": 1.0683929920196533, - "learning_rate": 4.383631713554988e-06, - "loss": 0.1041, - "step": 3294 - }, - { - "epoch": 0.5617923648666648, - "grad_norm": 0.6282282471656799, - "learning_rate": 4.381926683716966e-06, - "loss": 0.06, - "step": 3295 - }, - { - "epoch": 0.5619628633082026, - "grad_norm": 1.2010581493377686, - "learning_rate": 4.380221653878943e-06, - "loss": 0.101, - "step": 3296 - }, - { - "epoch": 0.5621333617497403, - "grad_norm": 0.8598968386650085, - "learning_rate": 4.378516624040921e-06, - "loss": 0.0899, - "step": 3297 - }, - { - "epoch": 0.562303860191278, - "grad_norm": 1.1526156663894653, - "learning_rate": 4.376811594202899e-06, - "loss": 0.1155, - "step": 3298 - }, - { - "epoch": 0.5624743586328156, - "grad_norm": 1.0849175453186035, - "learning_rate": 4.375106564364877e-06, - "loss": 0.1122, - "step": 3299 - }, - { - "epoch": 0.5626448570743533, - "grad_norm": 1.1330934762954712, - "learning_rate": 4.373401534526855e-06, - "loss": 0.1131, - "step": 3300 - }, - { - "epoch": 0.5626448570743533, - "eval_f1_score": 0.3415977961432507, - "eval_loss": 0.14648857712745667, - "eval_runtime": 182.7055, - "eval_samples_per_second": 54.733, - "eval_steps_per_second": 3.421, - "step": 3300 - }, - { - "epoch": 0.562815355515891, - "grad_norm": 1.0012074708938599, - "learning_rate": 4.371696504688833e-06, - "loss": 0.1121, - "step": 3301 - }, - { - "epoch": 0.5629858539574286, - "grad_norm": 1.2949241399765015, - "learning_rate": 4.369991474850811e-06, - "loss": 0.0782, - "step": 3302 - }, - { - "epoch": 0.5631563523989663, - "grad_norm": 1.4209520816802979, - "learning_rate": 4.3682864450127885e-06, - "loss": 0.1423, - "step": 3303 - }, - { - "epoch": 0.563326850840504, - "grad_norm": 1.8024206161499023, - "learning_rate": 4.366581415174766e-06, - "loss": 0.1967, - "step": 3304 - }, - { - "epoch": 0.5634973492820418, - "grad_norm": 1.5164883136749268, - "learning_rate": 4.3648763853367436e-06, - "loss": 0.1384, - "step": 3305 - }, - { - "epoch": 0.5636678477235794, - "grad_norm": 0.7025049924850464, - "learning_rate": 4.3631713554987215e-06, - "loss": 0.0665, - "step": 3306 - }, - { - "epoch": 0.5638383461651171, - "grad_norm": 0.8478127717971802, - "learning_rate": 4.3614663256606995e-06, - "loss": 0.0797, - "step": 3307 - }, - { - "epoch": 0.5640088446066548, - "grad_norm": 1.3572134971618652, - "learning_rate": 4.359761295822677e-06, - "loss": 0.1691, - "step": 3308 - }, - { - "epoch": 0.5641793430481925, - "grad_norm": 1.7278790473937988, - "learning_rate": 4.358056265984655e-06, - "loss": 0.0971, - "step": 3309 - }, - { - "epoch": 0.5643498414897301, - "grad_norm": 1.7452386617660522, - "learning_rate": 4.356351236146633e-06, - "loss": 0.0885, - "step": 3310 - }, - { - "epoch": 0.5645203399312678, - "grad_norm": 1.2867670059204102, - "learning_rate": 4.354646206308611e-06, - "loss": 0.138, - "step": 3311 - }, - { - "epoch": 0.5646908383728055, - "grad_norm": 1.348374843597412, - "learning_rate": 4.352941176470588e-06, - "loss": 0.204, - "step": 3312 - }, - { - "epoch": 0.5648613368143431, - "grad_norm": 1.2316983938217163, - "learning_rate": 4.351236146632566e-06, - "loss": 0.1296, - "step": 3313 - }, - { - "epoch": 0.5650318352558809, - "grad_norm": 0.9647215008735657, - "learning_rate": 4.349531116794544e-06, - "loss": 0.077, - "step": 3314 - }, - { - "epoch": 0.5652023336974186, - "grad_norm": 1.275435209274292, - "learning_rate": 4.347826086956522e-06, - "loss": 0.1548, - "step": 3315 - }, - { - "epoch": 0.5653728321389563, - "grad_norm": 0.8637387752532959, - "learning_rate": 4.3461210571185e-06, - "loss": 0.082, - "step": 3316 - }, - { - "epoch": 0.5655433305804939, - "grad_norm": 0.9807612299919128, - "learning_rate": 4.344416027280477e-06, - "loss": 0.1143, - "step": 3317 - }, - { - "epoch": 0.5657138290220316, - "grad_norm": 1.227411150932312, - "learning_rate": 4.342710997442456e-06, - "loss": 0.0981, - "step": 3318 - }, - { - "epoch": 0.5658843274635693, - "grad_norm": 1.0344433784484863, - "learning_rate": 4.341005967604434e-06, - "loss": 0.0503, - "step": 3319 - }, - { - "epoch": 0.5660548259051069, - "grad_norm": 1.2785519361495972, - "learning_rate": 4.339300937766411e-06, - "loss": 0.1693, - "step": 3320 - }, - { - "epoch": 0.5662253243466446, - "grad_norm": 1.272032618522644, - "learning_rate": 4.337595907928389e-06, - "loss": 0.1297, - "step": 3321 - }, - { - "epoch": 0.5663958227881823, - "grad_norm": 0.9052433967590332, - "learning_rate": 4.335890878090367e-06, - "loss": 0.0809, - "step": 3322 - }, - { - "epoch": 0.5665663212297201, - "grad_norm": 1.420202612876892, - "learning_rate": 4.334185848252345e-06, - "loss": 0.1847, - "step": 3323 - }, - { - "epoch": 0.5667368196712577, - "grad_norm": 1.4241760969161987, - "learning_rate": 4.332480818414322e-06, - "loss": 0.1444, - "step": 3324 - }, - { - "epoch": 0.5669073181127954, - "grad_norm": 1.4881341457366943, - "learning_rate": 4.3307757885763e-06, - "loss": 0.134, - "step": 3325 - }, - { - "epoch": 0.5670778165543331, - "grad_norm": 0.7717912197113037, - "learning_rate": 4.329070758738279e-06, - "loss": 0.0363, - "step": 3326 - }, - { - "epoch": 0.5672483149958707, - "grad_norm": 0.9192156195640564, - "learning_rate": 4.327365728900256e-06, - "loss": 0.1181, - "step": 3327 - }, - { - "epoch": 0.5674188134374084, - "grad_norm": 1.1816763877868652, - "learning_rate": 4.325660699062234e-06, - "loss": 0.1295, - "step": 3328 - }, - { - "epoch": 0.5675893118789461, - "grad_norm": 2.1257288455963135, - "learning_rate": 4.323955669224212e-06, - "loss": 0.1418, - "step": 3329 - }, - { - "epoch": 0.5677598103204838, - "grad_norm": 1.3970263004302979, - "learning_rate": 4.32225063938619e-06, - "loss": 0.1446, - "step": 3330 - }, - { - "epoch": 0.5679303087620214, - "grad_norm": 1.0706558227539062, - "learning_rate": 4.320545609548168e-06, - "loss": 0.1379, - "step": 3331 - }, - { - "epoch": 0.5681008072035592, - "grad_norm": 1.8220258951187134, - "learning_rate": 4.318840579710145e-06, - "loss": 0.0634, - "step": 3332 - }, - { - "epoch": 0.5682713056450969, - "grad_norm": 0.7084428071975708, - "learning_rate": 4.317135549872123e-06, - "loss": 0.0661, - "step": 3333 - }, - { - "epoch": 0.5684418040866345, - "grad_norm": 1.0817866325378418, - "learning_rate": 4.315430520034101e-06, - "loss": 0.1151, - "step": 3334 - }, - { - "epoch": 0.5686123025281722, - "grad_norm": 0.9813879728317261, - "learning_rate": 4.313725490196079e-06, - "loss": 0.1207, - "step": 3335 - }, - { - "epoch": 0.5687828009697099, - "grad_norm": 1.0461468696594238, - "learning_rate": 4.3120204603580566e-06, - "loss": 0.0888, - "step": 3336 - }, - { - "epoch": 0.5689532994112476, - "grad_norm": 1.514338731765747, - "learning_rate": 4.3103154305200345e-06, - "loss": 0.1898, - "step": 3337 - }, - { - "epoch": 0.5691237978527852, - "grad_norm": 1.2786973714828491, - "learning_rate": 4.3086104006820125e-06, - "loss": 0.1808, - "step": 3338 - }, - { - "epoch": 0.5692942962943229, - "grad_norm": 1.331338882446289, - "learning_rate": 4.30690537084399e-06, - "loss": 0.1076, - "step": 3339 - }, - { - "epoch": 0.5694647947358606, - "grad_norm": 1.214442491531372, - "learning_rate": 4.3052003410059675e-06, - "loss": 0.1187, - "step": 3340 - }, - { - "epoch": 0.5696352931773982, - "grad_norm": 1.40016770362854, - "learning_rate": 4.3034953111679455e-06, - "loss": 0.1684, - "step": 3341 - }, - { - "epoch": 0.569805791618936, - "grad_norm": 1.2763607501983643, - "learning_rate": 4.301790281329923e-06, - "loss": 0.1473, - "step": 3342 - }, - { - "epoch": 0.5699762900604737, - "grad_norm": 1.1418249607086182, - "learning_rate": 4.300085251491901e-06, - "loss": 0.1407, - "step": 3343 - }, - { - "epoch": 0.5701467885020114, - "grad_norm": 0.975607693195343, - "learning_rate": 4.298380221653879e-06, - "loss": 0.1556, - "step": 3344 - }, - { - "epoch": 0.570317286943549, - "grad_norm": 0.8215646743774414, - "learning_rate": 4.296675191815857e-06, - "loss": 0.0585, - "step": 3345 - }, - { - "epoch": 0.5704877853850867, - "grad_norm": 1.139883279800415, - "learning_rate": 4.294970161977835e-06, - "loss": 0.1284, - "step": 3346 - }, - { - "epoch": 0.5706582838266244, - "grad_norm": 1.4383643865585327, - "learning_rate": 4.293265132139813e-06, - "loss": 0.1621, - "step": 3347 - }, - { - "epoch": 0.570828782268162, - "grad_norm": 0.9598284959793091, - "learning_rate": 4.29156010230179e-06, - "loss": 0.0739, - "step": 3348 - }, - { - "epoch": 0.5709992807096997, - "grad_norm": 1.20563805103302, - "learning_rate": 4.289855072463768e-06, - "loss": 0.0754, - "step": 3349 - }, - { - "epoch": 0.5711697791512375, - "grad_norm": 1.4110171794891357, - "learning_rate": 4.288150042625746e-06, - "loss": 0.112, - "step": 3350 - }, - { - "epoch": 0.5713402775927752, - "grad_norm": 0.9314337968826294, - "learning_rate": 4.286445012787724e-06, - "loss": 0.1166, - "step": 3351 - }, - { - "epoch": 0.5715107760343128, - "grad_norm": 1.6237143278121948, - "learning_rate": 4.284739982949702e-06, - "loss": 0.178, - "step": 3352 - }, - { - "epoch": 0.5716812744758505, - "grad_norm": 1.2615511417388916, - "learning_rate": 4.28303495311168e-06, - "loss": 0.1454, - "step": 3353 - }, - { - "epoch": 0.5718517729173882, - "grad_norm": 0.9481834173202515, - "learning_rate": 4.281329923273658e-06, - "loss": 0.0704, - "step": 3354 - }, - { - "epoch": 0.5720222713589258, - "grad_norm": 0.8312090039253235, - "learning_rate": 4.279624893435636e-06, - "loss": 0.0806, - "step": 3355 - }, - { - "epoch": 0.5721927698004635, - "grad_norm": 1.0366708040237427, - "learning_rate": 4.277919863597613e-06, - "loss": 0.1035, - "step": 3356 - }, - { - "epoch": 0.5723632682420012, - "grad_norm": 1.049806833267212, - "learning_rate": 4.276214833759591e-06, - "loss": 0.1438, - "step": 3357 - }, - { - "epoch": 0.572533766683539, - "grad_norm": 1.5999555587768555, - "learning_rate": 4.274509803921569e-06, - "loss": 0.1176, - "step": 3358 - }, - { - "epoch": 0.5727042651250766, - "grad_norm": 1.3076897859573364, - "learning_rate": 4.272804774083547e-06, - "loss": 0.1431, - "step": 3359 - }, - { - "epoch": 0.5728747635666143, - "grad_norm": 1.069909930229187, - "learning_rate": 4.271099744245525e-06, - "loss": 0.0521, - "step": 3360 - }, - { - "epoch": 0.573045262008152, - "grad_norm": 1.748558759689331, - "learning_rate": 4.269394714407503e-06, - "loss": 0.186, - "step": 3361 - }, - { - "epoch": 0.5732157604496897, - "grad_norm": 2.2109155654907227, - "learning_rate": 4.267689684569481e-06, - "loss": 0.1384, - "step": 3362 - }, - { - "epoch": 0.5733862588912273, - "grad_norm": 2.6417386531829834, - "learning_rate": 4.265984654731458e-06, - "loss": 0.2327, - "step": 3363 - }, - { - "epoch": 0.573556757332765, - "grad_norm": 1.4780867099761963, - "learning_rate": 4.264279624893436e-06, - "loss": 0.168, - "step": 3364 - }, - { - "epoch": 0.5737272557743027, - "grad_norm": 1.3667103052139282, - "learning_rate": 4.262574595055414e-06, - "loss": 0.1058, - "step": 3365 - }, - { - "epoch": 0.5738977542158403, - "grad_norm": 1.0596626996994019, - "learning_rate": 4.260869565217392e-06, - "loss": 0.086, - "step": 3366 - }, - { - "epoch": 0.574068252657378, - "grad_norm": 1.2773600816726685, - "learning_rate": 4.2591645353793696e-06, - "loss": 0.2099, - "step": 3367 - }, - { - "epoch": 0.5742387510989158, - "grad_norm": 1.5703792572021484, - "learning_rate": 4.2574595055413475e-06, - "loss": 0.1702, - "step": 3368 - }, - { - "epoch": 0.5744092495404535, - "grad_norm": 1.1637566089630127, - "learning_rate": 4.2557544757033255e-06, - "loss": 0.1827, - "step": 3369 - }, - { - "epoch": 0.5745797479819911, - "grad_norm": 0.9364039897918701, - "learning_rate": 4.254049445865303e-06, - "loss": 0.0797, - "step": 3370 - }, - { - "epoch": 0.5747502464235288, - "grad_norm": 1.0836312770843506, - "learning_rate": 4.2523444160272805e-06, - "loss": 0.0998, - "step": 3371 - }, - { - "epoch": 0.5749207448650665, - "grad_norm": 1.0185374021530151, - "learning_rate": 4.2506393861892584e-06, - "loss": 0.1401, - "step": 3372 - }, - { - "epoch": 0.5750912433066041, - "grad_norm": 1.0646388530731201, - "learning_rate": 4.248934356351236e-06, - "loss": 0.1173, - "step": 3373 - }, - { - "epoch": 0.5752617417481418, - "grad_norm": 1.3602221012115479, - "learning_rate": 4.247229326513214e-06, - "loss": 0.154, - "step": 3374 - }, - { - "epoch": 0.5754322401896795, - "grad_norm": 0.8349620699882507, - "learning_rate": 4.245524296675192e-06, - "loss": 0.0912, - "step": 3375 - }, - { - "epoch": 0.5756027386312172, - "grad_norm": 1.0016189813613892, - "learning_rate": 4.24381926683717e-06, - "loss": 0.1697, - "step": 3376 - }, - { - "epoch": 0.5757732370727549, - "grad_norm": 1.3852578401565552, - "learning_rate": 4.242114236999148e-06, - "loss": 0.171, - "step": 3377 - }, - { - "epoch": 0.5759437355142926, - "grad_norm": 0.9213678240776062, - "learning_rate": 4.240409207161126e-06, - "loss": 0.069, - "step": 3378 - }, - { - "epoch": 0.5761142339558303, - "grad_norm": 2.2820992469787598, - "learning_rate": 4.238704177323103e-06, - "loss": 0.1907, - "step": 3379 - }, - { - "epoch": 0.5762847323973679, - "grad_norm": 0.8198938965797424, - "learning_rate": 4.236999147485081e-06, - "loss": 0.0571, - "step": 3380 - }, - { - "epoch": 0.5764552308389056, - "grad_norm": 0.998900830745697, - "learning_rate": 4.235294117647059e-06, - "loss": 0.1048, - "step": 3381 - }, - { - "epoch": 0.5766257292804433, - "grad_norm": 1.1199216842651367, - "learning_rate": 4.233589087809037e-06, - "loss": 0.13, - "step": 3382 - }, - { - "epoch": 0.576796227721981, - "grad_norm": 0.9903812408447266, - "learning_rate": 4.231884057971015e-06, - "loss": 0.0964, - "step": 3383 - }, - { - "epoch": 0.5769667261635186, - "grad_norm": 1.2220855951309204, - "learning_rate": 4.230179028132992e-06, - "loss": 0.1361, - "step": 3384 - }, - { - "epoch": 0.5771372246050563, - "grad_norm": 1.296735405921936, - "learning_rate": 4.228473998294971e-06, - "loss": 0.1406, - "step": 3385 - }, - { - "epoch": 0.5773077230465941, - "grad_norm": 1.0935611724853516, - "learning_rate": 4.226768968456949e-06, - "loss": 0.1243, - "step": 3386 - }, - { - "epoch": 0.5774782214881317, - "grad_norm": 1.115261197090149, - "learning_rate": 4.225063938618926e-06, - "loss": 0.1335, - "step": 3387 - }, - { - "epoch": 0.5776487199296694, - "grad_norm": 1.1657440662384033, - "learning_rate": 4.223358908780904e-06, - "loss": 0.1457, - "step": 3388 - }, - { - "epoch": 0.5778192183712071, - "grad_norm": 0.8687394857406616, - "learning_rate": 4.221653878942882e-06, - "loss": 0.0648, - "step": 3389 - }, - { - "epoch": 0.5779897168127448, - "grad_norm": 1.134837031364441, - "learning_rate": 4.21994884910486e-06, - "loss": 0.1265, - "step": 3390 - }, - { - "epoch": 0.5781602152542824, - "grad_norm": 1.6776621341705322, - "learning_rate": 4.218243819266838e-06, - "loss": 0.2094, - "step": 3391 - }, - { - "epoch": 0.5783307136958201, - "grad_norm": 1.5459142923355103, - "learning_rate": 4.216538789428815e-06, - "loss": 0.1759, - "step": 3392 - }, - { - "epoch": 0.5785012121373578, - "grad_norm": 1.0331432819366455, - "learning_rate": 4.214833759590794e-06, - "loss": 0.1374, - "step": 3393 - }, - { - "epoch": 0.5786717105788954, - "grad_norm": 0.9535524845123291, - "learning_rate": 4.213128729752772e-06, - "loss": 0.0827, - "step": 3394 - }, - { - "epoch": 0.5788422090204332, - "grad_norm": 1.3507156372070312, - "learning_rate": 4.211423699914749e-06, - "loss": 0.1657, - "step": 3395 - }, - { - "epoch": 0.5790127074619709, - "grad_norm": 0.9514930248260498, - "learning_rate": 4.209718670076727e-06, - "loss": 0.1019, - "step": 3396 - }, - { - "epoch": 0.5791832059035086, - "grad_norm": 1.091905951499939, - "learning_rate": 4.208013640238705e-06, - "loss": 0.1319, - "step": 3397 - }, - { - "epoch": 0.5793537043450462, - "grad_norm": 0.9134871363639832, - "learning_rate": 4.2063086104006825e-06, - "loss": 0.0862, - "step": 3398 - }, - { - "epoch": 0.5795242027865839, - "grad_norm": 1.7938615083694458, - "learning_rate": 4.20460358056266e-06, - "loss": 0.1689, - "step": 3399 - }, - { - "epoch": 0.5796947012281216, - "grad_norm": 2.3345704078674316, - "learning_rate": 4.202898550724638e-06, - "loss": 0.2431, - "step": 3400 - }, - { - "epoch": 0.5796947012281216, - "eval_f1_score": 0.36548223350253806, - "eval_loss": 0.1384306102991104, - "eval_runtime": 182.6782, - "eval_samples_per_second": 54.741, - "eval_steps_per_second": 3.421, - "step": 3400 - }, - { - "epoch": 0.5798651996696592, - "grad_norm": 1.92391037940979, - "learning_rate": 4.2011935208866155e-06, - "loss": 0.1983, - "step": 3401 - }, - { - "epoch": 0.5800356981111969, - "grad_norm": 2.0211410522460938, - "learning_rate": 4.1994884910485935e-06, - "loss": 0.2481, - "step": 3402 - }, - { - "epoch": 0.5802061965527346, - "grad_norm": 0.8273227214813232, - "learning_rate": 4.1977834612105714e-06, - "loss": 0.0832, - "step": 3403 - }, - { - "epoch": 0.5803766949942724, - "grad_norm": 1.275863766670227, - "learning_rate": 4.196078431372549e-06, - "loss": 0.1543, - "step": 3404 - }, - { - "epoch": 0.58054719343581, - "grad_norm": 0.9006935358047485, - "learning_rate": 4.194373401534527e-06, - "loss": 0.1069, - "step": 3405 - }, - { - "epoch": 0.5807176918773477, - "grad_norm": 1.3350001573562622, - "learning_rate": 4.192668371696505e-06, - "loss": 0.1434, - "step": 3406 - }, - { - "epoch": 0.5808881903188854, - "grad_norm": 0.8021498322486877, - "learning_rate": 4.190963341858482e-06, - "loss": 0.1094, - "step": 3407 - }, - { - "epoch": 0.5810586887604231, - "grad_norm": 0.9336419105529785, - "learning_rate": 4.18925831202046e-06, - "loss": 0.1128, - "step": 3408 - }, - { - "epoch": 0.5812291872019607, - "grad_norm": 0.8901069760322571, - "learning_rate": 4.187553282182438e-06, - "loss": 0.1042, - "step": 3409 - }, - { - "epoch": 0.5813996856434984, - "grad_norm": 1.0303782224655151, - "learning_rate": 4.185848252344416e-06, - "loss": 0.0964, - "step": 3410 - }, - { - "epoch": 0.5815701840850361, - "grad_norm": 1.1685336828231812, - "learning_rate": 4.184143222506394e-06, - "loss": 0.1435, - "step": 3411 - }, - { - "epoch": 0.5817406825265737, - "grad_norm": 1.0988837480545044, - "learning_rate": 4.182438192668372e-06, - "loss": 0.1413, - "step": 3412 - }, - { - "epoch": 0.5819111809681115, - "grad_norm": 1.4995113611221313, - "learning_rate": 4.18073316283035e-06, - "loss": 0.1951, - "step": 3413 - }, - { - "epoch": 0.5820816794096492, - "grad_norm": 1.328747272491455, - "learning_rate": 4.179028132992328e-06, - "loss": 0.1393, - "step": 3414 - }, - { - "epoch": 0.5822521778511869, - "grad_norm": 1.1531280279159546, - "learning_rate": 4.177323103154305e-06, - "loss": 0.068, - "step": 3415 - }, - { - "epoch": 0.5824226762927245, - "grad_norm": 0.9878232479095459, - "learning_rate": 4.175618073316283e-06, - "loss": 0.0741, - "step": 3416 - }, - { - "epoch": 0.5825931747342622, - "grad_norm": 1.3367799520492554, - "learning_rate": 4.173913043478261e-06, - "loss": 0.1205, - "step": 3417 - }, - { - "epoch": 0.5827636731757999, - "grad_norm": 1.0414282083511353, - "learning_rate": 4.172208013640239e-06, - "loss": 0.1037, - "step": 3418 - }, - { - "epoch": 0.5829341716173375, - "grad_norm": 0.865533173084259, - "learning_rate": 4.170502983802217e-06, - "loss": 0.1143, - "step": 3419 - }, - { - "epoch": 0.5831046700588752, - "grad_norm": 1.3106995820999146, - "learning_rate": 4.168797953964195e-06, - "loss": 0.1218, - "step": 3420 - }, - { - "epoch": 0.5832751685004129, - "grad_norm": 0.8746435046195984, - "learning_rate": 4.167092924126173e-06, - "loss": 0.1248, - "step": 3421 - }, - { - "epoch": 0.5834456669419507, - "grad_norm": 0.6834039092063904, - "learning_rate": 4.165387894288151e-06, - "loss": 0.0722, - "step": 3422 - }, - { - "epoch": 0.5836161653834883, - "grad_norm": 1.1980594396591187, - "learning_rate": 4.163682864450128e-06, - "loss": 0.1023, - "step": 3423 - }, - { - "epoch": 0.583786663825026, - "grad_norm": 0.9332726001739502, - "learning_rate": 4.161977834612106e-06, - "loss": 0.0713, - "step": 3424 - }, - { - "epoch": 0.5839571622665637, - "grad_norm": 1.0442097187042236, - "learning_rate": 4.160272804774084e-06, - "loss": 0.0868, - "step": 3425 - }, - { - "epoch": 0.5841276607081013, - "grad_norm": 1.9792109727859497, - "learning_rate": 4.158567774936062e-06, - "loss": 0.1933, - "step": 3426 - }, - { - "epoch": 0.584298159149639, - "grad_norm": 0.6008835434913635, - "learning_rate": 4.15686274509804e-06, - "loss": 0.0551, - "step": 3427 - }, - { - "epoch": 0.5844686575911767, - "grad_norm": 1.496996283531189, - "learning_rate": 4.155157715260018e-06, - "loss": 0.1422, - "step": 3428 - }, - { - "epoch": 0.5846391560327144, - "grad_norm": 0.8596488833427429, - "learning_rate": 4.1534526854219955e-06, - "loss": 0.0683, - "step": 3429 - }, - { - "epoch": 0.584809654474252, - "grad_norm": 0.5586788654327393, - "learning_rate": 4.1517476555839735e-06, - "loss": 0.1202, - "step": 3430 - }, - { - "epoch": 0.5849801529157898, - "grad_norm": 1.0073561668395996, - "learning_rate": 4.150042625745951e-06, - "loss": 0.0933, - "step": 3431 - }, - { - "epoch": 0.5851506513573275, - "grad_norm": 1.4064346551895142, - "learning_rate": 4.1483375959079285e-06, - "loss": 0.1591, - "step": 3432 - }, - { - "epoch": 0.5853211497988651, - "grad_norm": 1.2082147598266602, - "learning_rate": 4.1466325660699065e-06, - "loss": 0.1669, - "step": 3433 - }, - { - "epoch": 0.5854916482404028, - "grad_norm": 1.391356348991394, - "learning_rate": 4.1449275362318844e-06, - "loss": 0.1946, - "step": 3434 - }, - { - "epoch": 0.5856621466819405, - "grad_norm": 0.9328740835189819, - "learning_rate": 4.143222506393862e-06, - "loss": 0.0658, - "step": 3435 - }, - { - "epoch": 0.5858326451234782, - "grad_norm": 1.4556068181991577, - "learning_rate": 4.14151747655584e-06, - "loss": 0.1474, - "step": 3436 - }, - { - "epoch": 0.5860031435650158, - "grad_norm": 1.2817412614822388, - "learning_rate": 4.139812446717818e-06, - "loss": 0.1583, - "step": 3437 - }, - { - "epoch": 0.5861736420065535, - "grad_norm": 1.0311654806137085, - "learning_rate": 4.138107416879795e-06, - "loss": 0.1175, - "step": 3438 - }, - { - "epoch": 0.5863441404480912, - "grad_norm": 1.2356747388839722, - "learning_rate": 4.136402387041773e-06, - "loss": 0.1375, - "step": 3439 - }, - { - "epoch": 0.5865146388896288, - "grad_norm": 1.3934600353240967, - "learning_rate": 4.134697357203751e-06, - "loss": 0.1493, - "step": 3440 - }, - { - "epoch": 0.5866851373311666, - "grad_norm": 0.7090312838554382, - "learning_rate": 4.132992327365729e-06, - "loss": 0.082, - "step": 3441 - }, - { - "epoch": 0.5868556357727043, - "grad_norm": 1.185271978378296, - "learning_rate": 4.131287297527707e-06, - "loss": 0.1339, - "step": 3442 - }, - { - "epoch": 0.587026134214242, - "grad_norm": 1.2944258451461792, - "learning_rate": 4.129582267689685e-06, - "loss": 0.1124, - "step": 3443 - }, - { - "epoch": 0.5871966326557796, - "grad_norm": 1.0491424798965454, - "learning_rate": 4.127877237851663e-06, - "loss": 0.1042, - "step": 3444 - }, - { - "epoch": 0.5873671310973173, - "grad_norm": 1.1843968629837036, - "learning_rate": 4.126172208013641e-06, - "loss": 0.1105, - "step": 3445 - }, - { - "epoch": 0.587537629538855, - "grad_norm": 1.6263676881790161, - "learning_rate": 4.124467178175618e-06, - "loss": 0.1593, - "step": 3446 - }, - { - "epoch": 0.5877081279803926, - "grad_norm": 0.8808202743530273, - "learning_rate": 4.122762148337596e-06, - "loss": 0.1022, - "step": 3447 - }, - { - "epoch": 0.5878786264219303, - "grad_norm": 0.7366436123847961, - "learning_rate": 4.121057118499574e-06, - "loss": 0.0866, - "step": 3448 - }, - { - "epoch": 0.588049124863468, - "grad_norm": 1.401879072189331, - "learning_rate": 4.119352088661552e-06, - "loss": 0.2235, - "step": 3449 - }, - { - "epoch": 0.5882196233050058, - "grad_norm": 0.8640059232711792, - "learning_rate": 4.11764705882353e-06, - "loss": 0.0559, - "step": 3450 - }, - { - "epoch": 0.5883901217465434, - "grad_norm": 1.500951886177063, - "learning_rate": 4.115942028985507e-06, - "loss": 0.2105, - "step": 3451 - }, - { - "epoch": 0.5885606201880811, - "grad_norm": 0.9160787463188171, - "learning_rate": 4.114236999147486e-06, - "loss": 0.1177, - "step": 3452 - }, - { - "epoch": 0.5887311186296188, - "grad_norm": 1.2144598960876465, - "learning_rate": 4.112531969309464e-06, - "loss": 0.1491, - "step": 3453 - }, - { - "epoch": 0.5889016170711565, - "grad_norm": 1.3946925401687622, - "learning_rate": 4.110826939471441e-06, - "loss": 0.1667, - "step": 3454 - }, - { - "epoch": 0.5890721155126941, - "grad_norm": 1.3554019927978516, - "learning_rate": 4.109121909633419e-06, - "loss": 0.1413, - "step": 3455 - }, - { - "epoch": 0.5892426139542318, - "grad_norm": 1.6783015727996826, - "learning_rate": 4.107416879795397e-06, - "loss": 0.1542, - "step": 3456 - }, - { - "epoch": 0.5894131123957695, - "grad_norm": 1.0624831914901733, - "learning_rate": 4.105711849957375e-06, - "loss": 0.1066, - "step": 3457 - }, - { - "epoch": 0.5895836108373071, - "grad_norm": 1.2082974910736084, - "learning_rate": 4.104006820119353e-06, - "loss": 0.1176, - "step": 3458 - }, - { - "epoch": 0.5897541092788449, - "grad_norm": 0.7654959559440613, - "learning_rate": 4.10230179028133e-06, - "loss": 0.0742, - "step": 3459 - }, - { - "epoch": 0.5899246077203826, - "grad_norm": 1.610687255859375, - "learning_rate": 4.1005967604433085e-06, - "loss": 0.1725, - "step": 3460 - }, - { - "epoch": 0.5900951061619203, - "grad_norm": 0.9428104162216187, - "learning_rate": 4.0988917306052865e-06, - "loss": 0.1361, - "step": 3461 - }, - { - "epoch": 0.5902656046034579, - "grad_norm": 1.117551565170288, - "learning_rate": 4.0971867007672636e-06, - "loss": 0.0913, - "step": 3462 - }, - { - "epoch": 0.5904361030449956, - "grad_norm": 1.391464114189148, - "learning_rate": 4.0954816709292415e-06, - "loss": 0.1721, - "step": 3463 - }, - { - "epoch": 0.5906066014865333, - "grad_norm": 0.8190073370933533, - "learning_rate": 4.0937766410912195e-06, - "loss": 0.0675, - "step": 3464 - }, - { - "epoch": 0.5907770999280709, - "grad_norm": 1.8234949111938477, - "learning_rate": 4.092071611253197e-06, - "loss": 0.1997, - "step": 3465 - }, - { - "epoch": 0.5909475983696086, - "grad_norm": 1.2955584526062012, - "learning_rate": 4.090366581415175e-06, - "loss": 0.1551, - "step": 3466 - }, - { - "epoch": 0.5911180968111464, - "grad_norm": 1.6789677143096924, - "learning_rate": 4.0886615515771525e-06, - "loss": 0.1606, - "step": 3467 - }, - { - "epoch": 0.5912885952526841, - "grad_norm": 0.9646121859550476, - "learning_rate": 4.086956521739131e-06, - "loss": 0.0902, - "step": 3468 - }, - { - "epoch": 0.5914590936942217, - "grad_norm": 0.8161157369613647, - "learning_rate": 4.085251491901109e-06, - "loss": 0.0834, - "step": 3469 - }, - { - "epoch": 0.5916295921357594, - "grad_norm": 1.4160490036010742, - "learning_rate": 4.083546462063086e-06, - "loss": 0.1711, - "step": 3470 - }, - { - "epoch": 0.5918000905772971, - "grad_norm": 1.1606277227401733, - "learning_rate": 4.081841432225064e-06, - "loss": 0.1086, - "step": 3471 - }, - { - "epoch": 0.5919705890188347, - "grad_norm": 1.933803677558899, - "learning_rate": 4.080136402387042e-06, - "loss": 0.1797, - "step": 3472 - }, - { - "epoch": 0.5921410874603724, - "grad_norm": 0.8342519998550415, - "learning_rate": 4.07843137254902e-06, - "loss": 0.0939, - "step": 3473 - }, - { - "epoch": 0.5923115859019101, - "grad_norm": 1.1106159687042236, - "learning_rate": 4.076726342710997e-06, - "loss": 0.1578, - "step": 3474 - }, - { - "epoch": 0.5924820843434478, - "grad_norm": 1.416045904159546, - "learning_rate": 4.075021312872975e-06, - "loss": 0.1282, - "step": 3475 - }, - { - "epoch": 0.5926525827849854, - "grad_norm": 0.9018409252166748, - "learning_rate": 4.073316283034953e-06, - "loss": 0.0761, - "step": 3476 - }, - { - "epoch": 0.5928230812265232, - "grad_norm": 0.9520737528800964, - "learning_rate": 4.071611253196931e-06, - "loss": 0.1169, - "step": 3477 - }, - { - "epoch": 0.5929935796680609, - "grad_norm": 1.7352948188781738, - "learning_rate": 4.069906223358909e-06, - "loss": 0.1989, - "step": 3478 - }, - { - "epoch": 0.5931640781095985, - "grad_norm": 1.0018260478973389, - "learning_rate": 4.068201193520887e-06, - "loss": 0.0931, - "step": 3479 - }, - { - "epoch": 0.5933345765511362, - "grad_norm": 1.7456133365631104, - "learning_rate": 4.066496163682865e-06, - "loss": 0.1758, - "step": 3480 - }, - { - "epoch": 0.5935050749926739, - "grad_norm": 1.323087215423584, - "learning_rate": 4.064791133844843e-06, - "loss": 0.1263, - "step": 3481 - }, - { - "epoch": 0.5936755734342116, - "grad_norm": 1.2114142179489136, - "learning_rate": 4.06308610400682e-06, - "loss": 0.1273, - "step": 3482 - }, - { - "epoch": 0.5938460718757492, - "grad_norm": 0.7798213958740234, - "learning_rate": 4.061381074168798e-06, - "loss": 0.0675, - "step": 3483 - }, - { - "epoch": 0.5940165703172869, - "grad_norm": 1.5562611818313599, - "learning_rate": 4.059676044330776e-06, - "loss": 0.181, - "step": 3484 - }, - { - "epoch": 0.5941870687588247, - "grad_norm": 0.8951976299285889, - "learning_rate": 4.057971014492754e-06, - "loss": 0.0977, - "step": 3485 - }, - { - "epoch": 0.5943575672003623, - "grad_norm": 1.1890310049057007, - "learning_rate": 4.056265984654732e-06, - "loss": 0.1344, - "step": 3486 - }, - { - "epoch": 0.5945280656419, - "grad_norm": 1.9643477201461792, - "learning_rate": 4.05456095481671e-06, - "loss": 0.2614, - "step": 3487 - }, - { - "epoch": 0.5946985640834377, - "grad_norm": 1.0449378490447998, - "learning_rate": 4.052855924978688e-06, - "loss": 0.0596, - "step": 3488 - }, - { - "epoch": 0.5948690625249754, - "grad_norm": 1.2042781114578247, - "learning_rate": 4.051150895140666e-06, - "loss": 0.1624, - "step": 3489 - }, - { - "epoch": 0.595039560966513, - "grad_norm": 0.9169061779975891, - "learning_rate": 4.049445865302643e-06, - "loss": 0.1545, - "step": 3490 - }, - { - "epoch": 0.5952100594080507, - "grad_norm": 1.092411756515503, - "learning_rate": 4.047740835464621e-06, - "loss": 0.123, - "step": 3491 - }, - { - "epoch": 0.5953805578495884, - "grad_norm": 2.1680240631103516, - "learning_rate": 4.046035805626599e-06, - "loss": 0.2052, - "step": 3492 - }, - { - "epoch": 0.595551056291126, - "grad_norm": 1.5322246551513672, - "learning_rate": 4.0443307757885766e-06, - "loss": 0.1582, - "step": 3493 - }, - { - "epoch": 0.5957215547326637, - "grad_norm": 1.3674644231796265, - "learning_rate": 4.0426257459505545e-06, - "loss": 0.12, - "step": 3494 - }, - { - "epoch": 0.5958920531742015, - "grad_norm": 1.8770805597305298, - "learning_rate": 4.0409207161125325e-06, - "loss": 0.1854, - "step": 3495 - }, - { - "epoch": 0.5960625516157392, - "grad_norm": 1.1395719051361084, - "learning_rate": 4.03921568627451e-06, - "loss": 0.1042, - "step": 3496 - }, - { - "epoch": 0.5962330500572768, - "grad_norm": 1.364762544631958, - "learning_rate": 4.037510656436488e-06, - "loss": 0.1567, - "step": 3497 - }, - { - "epoch": 0.5964035484988145, - "grad_norm": 0.9239014387130737, - "learning_rate": 4.0358056265984655e-06, - "loss": 0.127, - "step": 3498 - }, - { - "epoch": 0.5965740469403522, - "grad_norm": 1.2227081060409546, - "learning_rate": 4.034100596760443e-06, - "loss": 0.123, - "step": 3499 - }, - { - "epoch": 0.5967445453818899, - "grad_norm": 1.3842462301254272, - "learning_rate": 4.032395566922421e-06, - "loss": 0.1772, - "step": 3500 - }, - { - "epoch": 0.5967445453818899, - "eval_f1_score": 0.3755656108597285, - "eval_loss": 0.13983075320720673, - "eval_runtime": 182.7252, - "eval_samples_per_second": 54.727, - "eval_steps_per_second": 3.42, - "step": 3500 - }, - { - "epoch": 0.5969150438234275, - "grad_norm": 1.9180755615234375, - "learning_rate": 4.030690537084399e-06, - "loss": 0.1808, - "step": 3501 - }, - { - "epoch": 0.5970855422649652, - "grad_norm": 1.2909221649169922, - "learning_rate": 4.028985507246377e-06, - "loss": 0.0999, - "step": 3502 - }, - { - "epoch": 0.597256040706503, - "grad_norm": 1.1232291460037231, - "learning_rate": 4.027280477408355e-06, - "loss": 0.1751, - "step": 3503 - }, - { - "epoch": 0.5974265391480406, - "grad_norm": 1.0960628986358643, - "learning_rate": 4.025575447570333e-06, - "loss": 0.0742, - "step": 3504 - }, - { - "epoch": 0.5975970375895783, - "grad_norm": 1.700650691986084, - "learning_rate": 4.023870417732311e-06, - "loss": 0.1958, - "step": 3505 - }, - { - "epoch": 0.597767536031116, - "grad_norm": 1.1192090511322021, - "learning_rate": 4.022165387894288e-06, - "loss": 0.084, - "step": 3506 - }, - { - "epoch": 0.5979380344726537, - "grad_norm": 0.9199531078338623, - "learning_rate": 4.020460358056266e-06, - "loss": 0.0806, - "step": 3507 - }, - { - "epoch": 0.5981085329141913, - "grad_norm": 0.8182160258293152, - "learning_rate": 4.018755328218244e-06, - "loss": 0.0819, - "step": 3508 - }, - { - "epoch": 0.598279031355729, - "grad_norm": 1.237420916557312, - "learning_rate": 4.017050298380222e-06, - "loss": 0.1106, - "step": 3509 - }, - { - "epoch": 0.5984495297972667, - "grad_norm": 0.9624441266059875, - "learning_rate": 4.0153452685422e-06, - "loss": 0.0933, - "step": 3510 - }, - { - "epoch": 0.5986200282388043, - "grad_norm": 0.7896148562431335, - "learning_rate": 4.013640238704178e-06, - "loss": 0.0938, - "step": 3511 - }, - { - "epoch": 0.598790526680342, - "grad_norm": 1.687626838684082, - "learning_rate": 4.011935208866156e-06, - "loss": 0.2776, - "step": 3512 - }, - { - "epoch": 0.5989610251218798, - "grad_norm": 0.9339796304702759, - "learning_rate": 4.010230179028133e-06, - "loss": 0.1304, - "step": 3513 - }, - { - "epoch": 0.5991315235634175, - "grad_norm": 0.9085601568222046, - "learning_rate": 4.008525149190111e-06, - "loss": 0.0723, - "step": 3514 - }, - { - "epoch": 0.5993020220049551, - "grad_norm": 1.3926743268966675, - "learning_rate": 4.006820119352089e-06, - "loss": 0.1761, - "step": 3515 - }, - { - "epoch": 0.5994725204464928, - "grad_norm": 0.9031898379325867, - "learning_rate": 4.005115089514067e-06, - "loss": 0.0809, - "step": 3516 - }, - { - "epoch": 0.5996430188880305, - "grad_norm": 0.839905321598053, - "learning_rate": 4.003410059676045e-06, - "loss": 0.0704, - "step": 3517 - }, - { - "epoch": 0.5998135173295681, - "grad_norm": 1.688146710395813, - "learning_rate": 4.001705029838022e-06, - "loss": 0.1111, - "step": 3518 - }, - { - "epoch": 0.5999840157711058, - "grad_norm": 1.9072600603103638, - "learning_rate": 4.000000000000001e-06, - "loss": 0.1669, - "step": 3519 - }, - { - "epoch": 0.6001545142126435, - "grad_norm": 1.4360688924789429, - "learning_rate": 3.998294970161979e-06, - "loss": 0.1837, - "step": 3520 - }, - { - "epoch": 0.6003250126541813, - "grad_norm": 1.0013833045959473, - "learning_rate": 3.996589940323956e-06, - "loss": 0.0866, - "step": 3521 - }, - { - "epoch": 0.6004955110957189, - "grad_norm": 2.101608991622925, - "learning_rate": 3.994884910485934e-06, - "loss": 0.1842, - "step": 3522 - }, - { - "epoch": 0.6006660095372566, - "grad_norm": 1.478049874305725, - "learning_rate": 3.993179880647912e-06, - "loss": 0.1477, - "step": 3523 - }, - { - "epoch": 0.6008365079787943, - "grad_norm": 1.2308162450790405, - "learning_rate": 3.9914748508098896e-06, - "loss": 0.1252, - "step": 3524 - }, - { - "epoch": 0.6010070064203319, - "grad_norm": 1.1021760702133179, - "learning_rate": 3.9897698209718675e-06, - "loss": 0.0901, - "step": 3525 - }, - { - "epoch": 0.6011775048618696, - "grad_norm": 1.6949081420898438, - "learning_rate": 3.988064791133845e-06, - "loss": 0.2353, - "step": 3526 - }, - { - "epoch": 0.6013480033034073, - "grad_norm": 1.4775804281234741, - "learning_rate": 3.986359761295823e-06, - "loss": 0.2189, - "step": 3527 - }, - { - "epoch": 0.601518501744945, - "grad_norm": 1.1562957763671875, - "learning_rate": 3.984654731457801e-06, - "loss": 0.1427, - "step": 3528 - }, - { - "epoch": 0.6016890001864826, - "grad_norm": 1.0964555740356445, - "learning_rate": 3.9829497016197784e-06, - "loss": 0.1065, - "step": 3529 - }, - { - "epoch": 0.6018594986280204, - "grad_norm": 0.8958936929702759, - "learning_rate": 3.981244671781756e-06, - "loss": 0.0687, - "step": 3530 - }, - { - "epoch": 0.6020299970695581, - "grad_norm": 1.092345118522644, - "learning_rate": 3.979539641943734e-06, - "loss": 0.105, - "step": 3531 - }, - { - "epoch": 0.6022004955110957, - "grad_norm": 0.8545036911964417, - "learning_rate": 3.977834612105712e-06, - "loss": 0.0719, - "step": 3532 - }, - { - "epoch": 0.6023709939526334, - "grad_norm": 1.1633192300796509, - "learning_rate": 3.97612958226769e-06, - "loss": 0.084, - "step": 3533 - }, - { - "epoch": 0.6025414923941711, - "grad_norm": 1.2050695419311523, - "learning_rate": 3.974424552429667e-06, - "loss": 0.1153, - "step": 3534 - }, - { - "epoch": 0.6027119908357088, - "grad_norm": 1.1858649253845215, - "learning_rate": 3.972719522591646e-06, - "loss": 0.1457, - "step": 3535 - }, - { - "epoch": 0.6028824892772464, - "grad_norm": 1.2307145595550537, - "learning_rate": 3.971014492753624e-06, - "loss": 0.1005, - "step": 3536 - }, - { - "epoch": 0.6030529877187841, - "grad_norm": 1.0503499507904053, - "learning_rate": 3.969309462915601e-06, - "loss": 0.1343, - "step": 3537 - }, - { - "epoch": 0.6032234861603218, - "grad_norm": 1.2630836963653564, - "learning_rate": 3.967604433077579e-06, - "loss": 0.0734, - "step": 3538 - }, - { - "epoch": 0.6033939846018594, - "grad_norm": 1.0602471828460693, - "learning_rate": 3.965899403239557e-06, - "loss": 0.1298, - "step": 3539 - }, - { - "epoch": 0.6035644830433972, - "grad_norm": 1.1123082637786865, - "learning_rate": 3.964194373401535e-06, - "loss": 0.1645, - "step": 3540 - }, - { - "epoch": 0.6037349814849349, - "grad_norm": 0.9318695068359375, - "learning_rate": 3.962489343563513e-06, - "loss": 0.1145, - "step": 3541 - }, - { - "epoch": 0.6039054799264726, - "grad_norm": 1.139296054840088, - "learning_rate": 3.96078431372549e-06, - "loss": 0.1015, - "step": 3542 - }, - { - "epoch": 0.6040759783680102, - "grad_norm": 1.1052963733673096, - "learning_rate": 3.959079283887468e-06, - "loss": 0.0879, - "step": 3543 - }, - { - "epoch": 0.6042464768095479, - "grad_norm": 1.128821611404419, - "learning_rate": 3.957374254049447e-06, - "loss": 0.1126, - "step": 3544 - }, - { - "epoch": 0.6044169752510856, - "grad_norm": 0.9472864866256714, - "learning_rate": 3.955669224211424e-06, - "loss": 0.072, - "step": 3545 - }, - { - "epoch": 0.6045874736926232, - "grad_norm": 0.9522503018379211, - "learning_rate": 3.953964194373402e-06, - "loss": 0.0858, - "step": 3546 - }, - { - "epoch": 0.6047579721341609, - "grad_norm": 1.2889169454574585, - "learning_rate": 3.95225916453538e-06, - "loss": 0.163, - "step": 3547 - }, - { - "epoch": 0.6049284705756987, - "grad_norm": 1.1692979335784912, - "learning_rate": 3.950554134697358e-06, - "loss": 0.1005, - "step": 3548 - }, - { - "epoch": 0.6050989690172364, - "grad_norm": 0.6599539518356323, - "learning_rate": 3.948849104859335e-06, - "loss": 0.0532, - "step": 3549 - }, - { - "epoch": 0.605269467458774, - "grad_norm": 1.4034006595611572, - "learning_rate": 3.947144075021313e-06, - "loss": 0.0865, - "step": 3550 - }, - { - "epoch": 0.6054399659003117, - "grad_norm": 0.9632552862167358, - "learning_rate": 3.945439045183291e-06, - "loss": 0.0768, - "step": 3551 - }, - { - "epoch": 0.6056104643418494, - "grad_norm": 1.3031929731369019, - "learning_rate": 3.943734015345269e-06, - "loss": 0.1316, - "step": 3552 - }, - { - "epoch": 0.6057809627833871, - "grad_norm": 0.8928091526031494, - "learning_rate": 3.942028985507247e-06, - "loss": 0.0637, - "step": 3553 - }, - { - "epoch": 0.6059514612249247, - "grad_norm": 1.0940797328948975, - "learning_rate": 3.940323955669225e-06, - "loss": 0.1166, - "step": 3554 - }, - { - "epoch": 0.6061219596664624, - "grad_norm": 0.6712972521781921, - "learning_rate": 3.9386189258312025e-06, - "loss": 0.0324, - "step": 3555 - }, - { - "epoch": 0.6062924581080001, - "grad_norm": 1.1135120391845703, - "learning_rate": 3.9369138959931805e-06, - "loss": 0.1124, - "step": 3556 - }, - { - "epoch": 0.6064629565495377, - "grad_norm": 1.8652747869491577, - "learning_rate": 3.935208866155158e-06, - "loss": 0.1985, - "step": 3557 - }, - { - "epoch": 0.6066334549910755, - "grad_norm": 1.6062350273132324, - "learning_rate": 3.9335038363171355e-06, - "loss": 0.1151, - "step": 3558 - }, - { - "epoch": 0.6068039534326132, - "grad_norm": 0.5442900657653809, - "learning_rate": 3.9317988064791135e-06, - "loss": 0.0433, - "step": 3559 - }, - { - "epoch": 0.6069744518741509, - "grad_norm": 1.7723592519760132, - "learning_rate": 3.9300937766410914e-06, - "loss": 0.2031, - "step": 3560 - }, - { - "epoch": 0.6071449503156885, - "grad_norm": 1.5068206787109375, - "learning_rate": 3.928388746803069e-06, - "loss": 0.1334, - "step": 3561 - }, - { - "epoch": 0.6073154487572262, - "grad_norm": 1.8842564821243286, - "learning_rate": 3.926683716965047e-06, - "loss": 0.2207, - "step": 3562 - }, - { - "epoch": 0.6074859471987639, - "grad_norm": 1.6659555435180664, - "learning_rate": 3.924978687127025e-06, - "loss": 0.1166, - "step": 3563 - }, - { - "epoch": 0.6076564456403015, - "grad_norm": 1.5390290021896362, - "learning_rate": 3.923273657289003e-06, - "loss": 0.2027, - "step": 3564 - }, - { - "epoch": 0.6078269440818392, - "grad_norm": 1.0417344570159912, - "learning_rate": 3.92156862745098e-06, - "loss": 0.0851, - "step": 3565 - }, - { - "epoch": 0.607997442523377, - "grad_norm": 1.357947587966919, - "learning_rate": 3.919863597612958e-06, - "loss": 0.1115, - "step": 3566 - }, - { - "epoch": 0.6081679409649147, - "grad_norm": 1.5044528245925903, - "learning_rate": 3.918158567774936e-06, - "loss": 0.1992, - "step": 3567 - }, - { - "epoch": 0.6083384394064523, - "grad_norm": 0.89848792552948, - "learning_rate": 3.916453537936914e-06, - "loss": 0.1002, - "step": 3568 - }, - { - "epoch": 0.60850893784799, - "grad_norm": 0.9809942245483398, - "learning_rate": 3.914748508098892e-06, - "loss": 0.1006, - "step": 3569 - }, - { - "epoch": 0.6086794362895277, - "grad_norm": 1.763635516166687, - "learning_rate": 3.91304347826087e-06, - "loss": 0.1184, - "step": 3570 - }, - { - "epoch": 0.6088499347310653, - "grad_norm": 1.5588911771774292, - "learning_rate": 3.911338448422848e-06, - "loss": 0.2385, - "step": 3571 - }, - { - "epoch": 0.609020433172603, - "grad_norm": 0.902176022529602, - "learning_rate": 3.909633418584826e-06, - "loss": 0.0772, - "step": 3572 - }, - { - "epoch": 0.6091909316141407, - "grad_norm": 1.4685441255569458, - "learning_rate": 3.907928388746803e-06, - "loss": 0.2082, - "step": 3573 - }, - { - "epoch": 0.6093614300556784, - "grad_norm": 1.2932947874069214, - "learning_rate": 3.906223358908781e-06, - "loss": 0.1485, - "step": 3574 - }, - { - "epoch": 0.609531928497216, - "grad_norm": 1.1713253259658813, - "learning_rate": 3.904518329070759e-06, - "loss": 0.101, - "step": 3575 - }, - { - "epoch": 0.6097024269387538, - "grad_norm": 1.101948618888855, - "learning_rate": 3.902813299232737e-06, - "loss": 0.0719, - "step": 3576 - }, - { - "epoch": 0.6098729253802915, - "grad_norm": 1.0822521448135376, - "learning_rate": 3.901108269394715e-06, - "loss": 0.1476, - "step": 3577 - }, - { - "epoch": 0.6100434238218291, - "grad_norm": 1.064866304397583, - "learning_rate": 3.899403239556693e-06, - "loss": 0.1732, - "step": 3578 - }, - { - "epoch": 0.6102139222633668, - "grad_norm": 1.401872992515564, - "learning_rate": 3.897698209718671e-06, - "loss": 0.1713, - "step": 3579 - }, - { - "epoch": 0.6103844207049045, - "grad_norm": 0.7649058103561401, - "learning_rate": 3.895993179880649e-06, - "loss": 0.079, - "step": 3580 - }, - { - "epoch": 0.6105549191464422, - "grad_norm": 0.8753597736358643, - "learning_rate": 3.894288150042626e-06, - "loss": 0.1105, - "step": 3581 - }, - { - "epoch": 0.6107254175879798, - "grad_norm": 1.8067176342010498, - "learning_rate": 3.892583120204604e-06, - "loss": 0.1334, - "step": 3582 - }, - { - "epoch": 0.6108959160295175, - "grad_norm": 1.6385232210159302, - "learning_rate": 3.890878090366582e-06, - "loss": 0.2134, - "step": 3583 - }, - { - "epoch": 0.6110664144710553, - "grad_norm": 1.6331698894500732, - "learning_rate": 3.88917306052856e-06, - "loss": 0.1875, - "step": 3584 - }, - { - "epoch": 0.6112369129125929, - "grad_norm": 1.0041797161102295, - "learning_rate": 3.887468030690537e-06, - "loss": 0.1336, - "step": 3585 - }, - { - "epoch": 0.6114074113541306, - "grad_norm": 1.4989748001098633, - "learning_rate": 3.8857630008525155e-06, - "loss": 0.1608, - "step": 3586 - }, - { - "epoch": 0.6115779097956683, - "grad_norm": 0.7531823515892029, - "learning_rate": 3.8840579710144935e-06, - "loss": 0.0711, - "step": 3587 - }, - { - "epoch": 0.611748408237206, - "grad_norm": 1.2867484092712402, - "learning_rate": 3.882352941176471e-06, - "loss": 0.1445, - "step": 3588 - }, - { - "epoch": 0.6119189066787436, - "grad_norm": 1.0655556917190552, - "learning_rate": 3.8806479113384485e-06, - "loss": 0.1091, - "step": 3589 - }, - { - "epoch": 0.6120894051202813, - "grad_norm": 1.1923772096633911, - "learning_rate": 3.8789428815004265e-06, - "loss": 0.1657, - "step": 3590 - }, - { - "epoch": 0.612259903561819, - "grad_norm": 0.8422601222991943, - "learning_rate": 3.8772378516624044e-06, - "loss": 0.1081, - "step": 3591 - }, - { - "epoch": 0.6124304020033566, - "grad_norm": 0.8882838487625122, - "learning_rate": 3.875532821824382e-06, - "loss": 0.1266, - "step": 3592 - }, - { - "epoch": 0.6126009004448943, - "grad_norm": 0.9292901158332825, - "learning_rate": 3.8738277919863595e-06, - "loss": 0.097, - "step": 3593 - }, - { - "epoch": 0.6127713988864321, - "grad_norm": 1.0757999420166016, - "learning_rate": 3.872122762148338e-06, - "loss": 0.1103, - "step": 3594 - }, - { - "epoch": 0.6129418973279698, - "grad_norm": 0.8698549866676331, - "learning_rate": 3.870417732310316e-06, - "loss": 0.0912, - "step": 3595 - }, - { - "epoch": 0.6131123957695074, - "grad_norm": 1.0098843574523926, - "learning_rate": 3.868712702472293e-06, - "loss": 0.1067, - "step": 3596 - }, - { - "epoch": 0.6132828942110451, - "grad_norm": 0.9158385992050171, - "learning_rate": 3.867007672634271e-06, - "loss": 0.1195, - "step": 3597 - }, - { - "epoch": 0.6134533926525828, - "grad_norm": 1.382043480873108, - "learning_rate": 3.865302642796249e-06, - "loss": 0.1285, - "step": 3598 - }, - { - "epoch": 0.6136238910941205, - "grad_norm": 1.214117407798767, - "learning_rate": 3.863597612958227e-06, - "loss": 0.1821, - "step": 3599 - }, - { - "epoch": 0.6137943895356581, - "grad_norm": 1.2158795595169067, - "learning_rate": 3.861892583120205e-06, - "loss": 0.0981, - "step": 3600 - }, - { - "epoch": 0.6137943895356581, - "eval_f1_score": 0.37468354430379747, - "eval_loss": 0.13799934089183807, - "eval_runtime": 182.6656, - "eval_samples_per_second": 54.745, - "eval_steps_per_second": 3.422, - "step": 3600 - }, - { - "epoch": 0.6139648879771958, - "grad_norm": 1.1329641342163086, - "learning_rate": 3.860187553282182e-06, - "loss": 0.1213, - "step": 3601 - }, - { - "epoch": 0.6141353864187336, - "grad_norm": 1.107250452041626, - "learning_rate": 3.858482523444161e-06, - "loss": 0.1135, - "step": 3602 - }, - { - "epoch": 0.6143058848602712, - "grad_norm": 1.2173339128494263, - "learning_rate": 3.856777493606139e-06, - "loss": 0.1582, - "step": 3603 - }, - { - "epoch": 0.6144763833018089, - "grad_norm": 0.8356215953826904, - "learning_rate": 3.855072463768116e-06, - "loss": 0.0714, - "step": 3604 - }, - { - "epoch": 0.6146468817433466, - "grad_norm": 1.1072112321853638, - "learning_rate": 3.853367433930094e-06, - "loss": 0.1626, - "step": 3605 - }, - { - "epoch": 0.6148173801848843, - "grad_norm": 1.1094462871551514, - "learning_rate": 3.851662404092072e-06, - "loss": 0.1109, - "step": 3606 - }, - { - "epoch": 0.6149878786264219, - "grad_norm": 0.8917166590690613, - "learning_rate": 3.84995737425405e-06, - "loss": 0.0976, - "step": 3607 - }, - { - "epoch": 0.6151583770679596, - "grad_norm": 0.9127071499824524, - "learning_rate": 3.848252344416028e-06, - "loss": 0.0789, - "step": 3608 - }, - { - "epoch": 0.6153288755094973, - "grad_norm": 0.988960325717926, - "learning_rate": 3.846547314578005e-06, - "loss": 0.1172, - "step": 3609 - }, - { - "epoch": 0.6154993739510349, - "grad_norm": 1.2566884756088257, - "learning_rate": 3.844842284739983e-06, - "loss": 0.1606, - "step": 3610 - }, - { - "epoch": 0.6156698723925726, - "grad_norm": 0.613823413848877, - "learning_rate": 3.843137254901962e-06, - "loss": 0.0685, - "step": 3611 - }, - { - "epoch": 0.6158403708341104, - "grad_norm": 1.1060439348220825, - "learning_rate": 3.841432225063939e-06, - "loss": 0.0973, - "step": 3612 - }, - { - "epoch": 0.6160108692756481, - "grad_norm": 1.327811360359192, - "learning_rate": 3.839727195225917e-06, - "loss": 0.1621, - "step": 3613 - }, - { - "epoch": 0.6161813677171857, - "grad_norm": 0.819416344165802, - "learning_rate": 3.838022165387895e-06, - "loss": 0.0598, - "step": 3614 - }, - { - "epoch": 0.6163518661587234, - "grad_norm": 0.9425433278083801, - "learning_rate": 3.836317135549873e-06, - "loss": 0.0993, - "step": 3615 - }, - { - "epoch": 0.6165223646002611, - "grad_norm": 0.7881563305854797, - "learning_rate": 3.834612105711851e-06, - "loss": 0.0835, - "step": 3616 - }, - { - "epoch": 0.6166928630417987, - "grad_norm": 1.2177718877792358, - "learning_rate": 3.832907075873828e-06, - "loss": 0.1378, - "step": 3617 - }, - { - "epoch": 0.6168633614833364, - "grad_norm": 1.150881290435791, - "learning_rate": 3.831202046035806e-06, - "loss": 0.1218, - "step": 3618 - }, - { - "epoch": 0.6170338599248741, - "grad_norm": 0.942000150680542, - "learning_rate": 3.8294970161977844e-06, - "loss": 0.1079, - "step": 3619 - }, - { - "epoch": 0.6172043583664119, - "grad_norm": 1.0197066068649292, - "learning_rate": 3.8277919863597615e-06, - "loss": 0.1312, - "step": 3620 - }, - { - "epoch": 0.6173748568079495, - "grad_norm": 1.200119137763977, - "learning_rate": 3.8260869565217395e-06, - "loss": 0.1345, - "step": 3621 - }, - { - "epoch": 0.6175453552494872, - "grad_norm": 0.7664061188697815, - "learning_rate": 3.824381926683717e-06, - "loss": 0.0989, - "step": 3622 - }, - { - "epoch": 0.6177158536910249, - "grad_norm": 1.594568133354187, - "learning_rate": 3.822676896845695e-06, - "loss": 0.1371, - "step": 3623 - }, - { - "epoch": 0.6178863521325625, - "grad_norm": 0.9898309111595154, - "learning_rate": 3.8209718670076725e-06, - "loss": 0.1059, - "step": 3624 - }, - { - "epoch": 0.6180568505741002, - "grad_norm": 1.4782969951629639, - "learning_rate": 3.81926683716965e-06, - "loss": 0.117, - "step": 3625 - }, - { - "epoch": 0.6182273490156379, - "grad_norm": 1.10325288772583, - "learning_rate": 3.817561807331628e-06, - "loss": 0.1597, - "step": 3626 - }, - { - "epoch": 0.6183978474571756, - "grad_norm": 0.9282763600349426, - "learning_rate": 3.815856777493606e-06, - "loss": 0.0651, - "step": 3627 - }, - { - "epoch": 0.6185683458987132, - "grad_norm": 1.0336819887161255, - "learning_rate": 3.8141517476555847e-06, - "loss": 0.063, - "step": 3628 - }, - { - "epoch": 0.618738844340251, - "grad_norm": 0.8412141799926758, - "learning_rate": 3.812446717817562e-06, - "loss": 0.0629, - "step": 3629 - }, - { - "epoch": 0.6189093427817887, - "grad_norm": 0.7322096228599548, - "learning_rate": 3.81074168797954e-06, - "loss": 0.0489, - "step": 3630 - }, - { - "epoch": 0.6190798412233263, - "grad_norm": 1.0511646270751953, - "learning_rate": 3.8090366581415177e-06, - "loss": 0.0878, - "step": 3631 - }, - { - "epoch": 0.619250339664864, - "grad_norm": 1.8431638479232788, - "learning_rate": 3.8073316283034956e-06, - "loss": 0.1911, - "step": 3632 - }, - { - "epoch": 0.6194208381064017, - "grad_norm": 1.5251615047454834, - "learning_rate": 3.805626598465473e-06, - "loss": 0.0968, - "step": 3633 - }, - { - "epoch": 0.6195913365479394, - "grad_norm": 0.9593001008033752, - "learning_rate": 3.803921568627451e-06, - "loss": 0.1134, - "step": 3634 - }, - { - "epoch": 0.619761834989477, - "grad_norm": 1.0317368507385254, - "learning_rate": 3.802216538789429e-06, - "loss": 0.0664, - "step": 3635 - }, - { - "epoch": 0.6199323334310147, - "grad_norm": 1.1496648788452148, - "learning_rate": 3.800511508951407e-06, - "loss": 0.1018, - "step": 3636 - }, - { - "epoch": 0.6201028318725524, - "grad_norm": 1.7094920873641968, - "learning_rate": 3.798806479113385e-06, - "loss": 0.1388, - "step": 3637 - }, - { - "epoch": 0.62027333031409, - "grad_norm": 1.065308928489685, - "learning_rate": 3.797101449275363e-06, - "loss": 0.1489, - "step": 3638 - }, - { - "epoch": 0.6204438287556278, - "grad_norm": 0.7095522880554199, - "learning_rate": 3.7953964194373404e-06, - "loss": 0.0547, - "step": 3639 - }, - { - "epoch": 0.6206143271971655, - "grad_norm": 1.4230934381484985, - "learning_rate": 3.7936913895993184e-06, - "loss": 0.1406, - "step": 3640 - }, - { - "epoch": 0.6207848256387032, - "grad_norm": 0.6358377933502197, - "learning_rate": 3.791986359761296e-06, - "loss": 0.0872, - "step": 3641 - }, - { - "epoch": 0.6209553240802408, - "grad_norm": 1.4998244047164917, - "learning_rate": 3.790281329923274e-06, - "loss": 0.1274, - "step": 3642 - }, - { - "epoch": 0.6211258225217785, - "grad_norm": 1.0013731718063354, - "learning_rate": 3.7885763000852514e-06, - "loss": 0.0714, - "step": 3643 - }, - { - "epoch": 0.6212963209633162, - "grad_norm": 1.1653109788894653, - "learning_rate": 3.7868712702472297e-06, - "loss": 0.0909, - "step": 3644 - }, - { - "epoch": 0.6214668194048539, - "grad_norm": 0.8749520182609558, - "learning_rate": 3.7851662404092077e-06, - "loss": 0.0743, - "step": 3645 - }, - { - "epoch": 0.6216373178463915, - "grad_norm": 1.5417793989181519, - "learning_rate": 3.7834612105711856e-06, - "loss": 0.1736, - "step": 3646 - }, - { - "epoch": 0.6218078162879292, - "grad_norm": 1.2029529809951782, - "learning_rate": 3.781756180733163e-06, - "loss": 0.1049, - "step": 3647 - }, - { - "epoch": 0.621978314729467, - "grad_norm": 1.119659423828125, - "learning_rate": 3.780051150895141e-06, - "loss": 0.0916, - "step": 3648 - }, - { - "epoch": 0.6221488131710046, - "grad_norm": 1.0468127727508545, - "learning_rate": 3.7783461210571186e-06, - "loss": 0.111, - "step": 3649 - }, - { - "epoch": 0.6223193116125423, - "grad_norm": 1.7078468799591064, - "learning_rate": 3.7766410912190966e-06, - "loss": 0.1596, - "step": 3650 - }, - { - "epoch": 0.62248981005408, - "grad_norm": 1.146774172782898, - "learning_rate": 3.774936061381074e-06, - "loss": 0.1152, - "step": 3651 - }, - { - "epoch": 0.6226603084956177, - "grad_norm": 0.7872032523155212, - "learning_rate": 3.773231031543052e-06, - "loss": 0.0634, - "step": 3652 - }, - { - "epoch": 0.6228308069371553, - "grad_norm": 1.1197142601013184, - "learning_rate": 3.7715260017050304e-06, - "loss": 0.1147, - "step": 3653 - }, - { - "epoch": 0.623001305378693, - "grad_norm": 0.7918756604194641, - "learning_rate": 3.769820971867008e-06, - "loss": 0.078, - "step": 3654 - }, - { - "epoch": 0.6231718038202307, - "grad_norm": 1.0666242837905884, - "learning_rate": 3.768115942028986e-06, - "loss": 0.152, - "step": 3655 - }, - { - "epoch": 0.6233423022617683, - "grad_norm": 2.1137020587921143, - "learning_rate": 3.766410912190964e-06, - "loss": 0.2122, - "step": 3656 - }, - { - "epoch": 0.6235128007033061, - "grad_norm": 0.9335868954658508, - "learning_rate": 3.7647058823529414e-06, - "loss": 0.1008, - "step": 3657 - }, - { - "epoch": 0.6236832991448438, - "grad_norm": 1.6401317119598389, - "learning_rate": 3.7630008525149193e-06, - "loss": 0.1642, - "step": 3658 - }, - { - "epoch": 0.6238537975863815, - "grad_norm": 1.146842122077942, - "learning_rate": 3.761295822676897e-06, - "loss": 0.1184, - "step": 3659 - }, - { - "epoch": 0.6240242960279191, - "grad_norm": 0.8928496241569519, - "learning_rate": 3.7595907928388748e-06, - "loss": 0.0952, - "step": 3660 - }, - { - "epoch": 0.6241947944694568, - "grad_norm": 1.258764386177063, - "learning_rate": 3.757885763000853e-06, - "loss": 0.156, - "step": 3661 - }, - { - "epoch": 0.6243652929109945, - "grad_norm": 1.6915022134780884, - "learning_rate": 3.7561807331628307e-06, - "loss": 0.1303, - "step": 3662 - }, - { - "epoch": 0.6245357913525321, - "grad_norm": 0.8792423605918884, - "learning_rate": 3.7544757033248086e-06, - "loss": 0.0861, - "step": 3663 - }, - { - "epoch": 0.6247062897940698, - "grad_norm": 1.505020022392273, - "learning_rate": 3.7527706734867866e-06, - "loss": 0.1456, - "step": 3664 - }, - { - "epoch": 0.6248767882356075, - "grad_norm": 0.7560902833938599, - "learning_rate": 3.751065643648764e-06, - "loss": 0.1045, - "step": 3665 - }, - { - "epoch": 0.6250472866771453, - "grad_norm": 1.0680619478225708, - "learning_rate": 3.749360613810742e-06, - "loss": 0.1004, - "step": 3666 - }, - { - "epoch": 0.6252177851186829, - "grad_norm": 1.1420097351074219, - "learning_rate": 3.7476555839727196e-06, - "loss": 0.1339, - "step": 3667 - }, - { - "epoch": 0.6253882835602206, - "grad_norm": 1.1699634790420532, - "learning_rate": 3.7459505541346975e-06, - "loss": 0.1435, - "step": 3668 - }, - { - "epoch": 0.6255587820017583, - "grad_norm": 1.75743567943573, - "learning_rate": 3.744245524296676e-06, - "loss": 0.1742, - "step": 3669 - }, - { - "epoch": 0.6257292804432959, - "grad_norm": 0.8941011428833008, - "learning_rate": 3.7425404944586534e-06, - "loss": 0.0935, - "step": 3670 - }, - { - "epoch": 0.6258997788848336, - "grad_norm": 1.1393169164657593, - "learning_rate": 3.7408354646206314e-06, - "loss": 0.1109, - "step": 3671 - }, - { - "epoch": 0.6260702773263713, - "grad_norm": 0.9778454303741455, - "learning_rate": 3.739130434782609e-06, - "loss": 0.0932, - "step": 3672 - }, - { - "epoch": 0.626240775767909, - "grad_norm": 0.9243488311767578, - "learning_rate": 3.737425404944587e-06, - "loss": 0.1189, - "step": 3673 - }, - { - "epoch": 0.6264112742094466, - "grad_norm": 1.424014687538147, - "learning_rate": 3.7357203751065648e-06, - "loss": 0.1866, - "step": 3674 - }, - { - "epoch": 0.6265817726509844, - "grad_norm": 1.0741225481033325, - "learning_rate": 3.7340153452685423e-06, - "loss": 0.1011, - "step": 3675 - }, - { - "epoch": 0.6267522710925221, - "grad_norm": 0.9581297636032104, - "learning_rate": 3.7323103154305202e-06, - "loss": 0.0813, - "step": 3676 - }, - { - "epoch": 0.6269227695340597, - "grad_norm": 1.1146941184997559, - "learning_rate": 3.7306052855924978e-06, - "loss": 0.0959, - "step": 3677 - }, - { - "epoch": 0.6270932679755974, - "grad_norm": 1.449524998664856, - "learning_rate": 3.728900255754476e-06, - "loss": 0.1614, - "step": 3678 - }, - { - "epoch": 0.6272637664171351, - "grad_norm": 1.197462558746338, - "learning_rate": 3.727195225916454e-06, - "loss": 0.1207, - "step": 3679 - }, - { - "epoch": 0.6274342648586728, - "grad_norm": 1.4450664520263672, - "learning_rate": 3.7254901960784316e-06, - "loss": 0.1771, - "step": 3680 - }, - { - "epoch": 0.6276047633002104, - "grad_norm": 0.942268967628479, - "learning_rate": 3.7237851662404096e-06, - "loss": 0.1028, - "step": 3681 - }, - { - "epoch": 0.6277752617417481, - "grad_norm": 0.8110191226005554, - "learning_rate": 3.7220801364023875e-06, - "loss": 0.0894, - "step": 3682 - }, - { - "epoch": 0.6279457601832859, - "grad_norm": 0.8219690322875977, - "learning_rate": 3.720375106564365e-06, - "loss": 0.0416, - "step": 3683 - }, - { - "epoch": 0.6281162586248235, - "grad_norm": 1.3544223308563232, - "learning_rate": 3.718670076726343e-06, - "loss": 0.1467, - "step": 3684 - }, - { - "epoch": 0.6282867570663612, - "grad_norm": 0.9889111518859863, - "learning_rate": 3.7169650468883205e-06, - "loss": 0.0893, - "step": 3685 - }, - { - "epoch": 0.6284572555078989, - "grad_norm": 1.080906629562378, - "learning_rate": 3.715260017050299e-06, - "loss": 0.1105, - "step": 3686 - }, - { - "epoch": 0.6286277539494366, - "grad_norm": 1.4223904609680176, - "learning_rate": 3.713554987212277e-06, - "loss": 0.1193, - "step": 3687 - }, - { - "epoch": 0.6287982523909742, - "grad_norm": 1.2540326118469238, - "learning_rate": 3.7118499573742543e-06, - "loss": 0.1006, - "step": 3688 - }, - { - "epoch": 0.6289687508325119, - "grad_norm": 1.8358228206634521, - "learning_rate": 3.7101449275362323e-06, - "loss": 0.2074, - "step": 3689 - }, - { - "epoch": 0.6291392492740496, - "grad_norm": 0.9577386975288391, - "learning_rate": 3.70843989769821e-06, - "loss": 0.1069, - "step": 3690 - }, - { - "epoch": 0.6293097477155873, - "grad_norm": 1.1612111330032349, - "learning_rate": 3.7067348678601878e-06, - "loss": 0.1156, - "step": 3691 - }, - { - "epoch": 0.629480246157125, - "grad_norm": 0.8670212626457214, - "learning_rate": 3.7050298380221657e-06, - "loss": 0.0882, - "step": 3692 - }, - { - "epoch": 0.6296507445986627, - "grad_norm": 1.4416753053665161, - "learning_rate": 3.7033248081841432e-06, - "loss": 0.2159, - "step": 3693 - }, - { - "epoch": 0.6298212430402004, - "grad_norm": 1.1963956356048584, - "learning_rate": 3.7016197783461216e-06, - "loss": 0.1619, - "step": 3694 - }, - { - "epoch": 0.629991741481738, - "grad_norm": 1.2075142860412598, - "learning_rate": 3.6999147485080996e-06, - "loss": 0.144, - "step": 3695 - }, - { - "epoch": 0.6301622399232757, - "grad_norm": 1.5625414848327637, - "learning_rate": 3.698209718670077e-06, - "loss": 0.1359, - "step": 3696 - }, - { - "epoch": 0.6303327383648134, - "grad_norm": 0.7365015149116516, - "learning_rate": 3.696504688832055e-06, - "loss": 0.0789, - "step": 3697 - }, - { - "epoch": 0.6305032368063511, - "grad_norm": 1.8156697750091553, - "learning_rate": 3.6947996589940326e-06, - "loss": 0.1729, - "step": 3698 - }, - { - "epoch": 0.6306737352478887, - "grad_norm": 1.6419410705566406, - "learning_rate": 3.6930946291560105e-06, - "loss": 0.1807, - "step": 3699 - }, - { - "epoch": 0.6308442336894264, - "grad_norm": 1.471413016319275, - "learning_rate": 3.6913895993179884e-06, - "loss": 0.1174, - "step": 3700 - }, - { - "epoch": 0.6308442336894264, - "eval_f1_score": 0.39243498817966904, - "eval_loss": 0.13921166956424713, - "eval_runtime": 182.6385, - "eval_samples_per_second": 54.753, - "eval_steps_per_second": 3.422, - "step": 3700 - }, - { - "epoch": 0.6310147321309642, - "grad_norm": 1.068755030632019, - "learning_rate": 3.689684569479966e-06, - "loss": 0.1144, - "step": 3701 - }, - { - "epoch": 0.6311852305725018, - "grad_norm": 1.3145835399627686, - "learning_rate": 3.687979539641944e-06, - "loss": 0.1531, - "step": 3702 - }, - { - "epoch": 0.6313557290140395, - "grad_norm": 0.9875397086143494, - "learning_rate": 3.6862745098039223e-06, - "loss": 0.0833, - "step": 3703 - }, - { - "epoch": 0.6315262274555772, - "grad_norm": 1.2603150606155396, - "learning_rate": 3.6845694799659e-06, - "loss": 0.1177, - "step": 3704 - }, - { - "epoch": 0.6316967258971149, - "grad_norm": 1.634809970855713, - "learning_rate": 3.6828644501278778e-06, - "loss": 0.1531, - "step": 3705 - }, - { - "epoch": 0.6318672243386525, - "grad_norm": 1.2602183818817139, - "learning_rate": 3.6811594202898553e-06, - "loss": 0.087, - "step": 3706 - }, - { - "epoch": 0.6320377227801902, - "grad_norm": 1.5184547901153564, - "learning_rate": 3.6794543904518332e-06, - "loss": 0.1241, - "step": 3707 - }, - { - "epoch": 0.6322082212217279, - "grad_norm": 1.2422740459442139, - "learning_rate": 3.6777493606138108e-06, - "loss": 0.1015, - "step": 3708 - }, - { - "epoch": 0.6323787196632655, - "grad_norm": 1.2954447269439697, - "learning_rate": 3.6760443307757887e-06, - "loss": 0.1166, - "step": 3709 - }, - { - "epoch": 0.6325492181048032, - "grad_norm": 1.390432596206665, - "learning_rate": 3.6743393009377667e-06, - "loss": 0.0809, - "step": 3710 - }, - { - "epoch": 0.632719716546341, - "grad_norm": 1.4333117008209229, - "learning_rate": 3.6726342710997446e-06, - "loss": 0.1123, - "step": 3711 - }, - { - "epoch": 0.6328902149878787, - "grad_norm": 1.1851670742034912, - "learning_rate": 3.6709292412617225e-06, - "loss": 0.1495, - "step": 3712 - }, - { - "epoch": 0.6330607134294163, - "grad_norm": 1.3659263849258423, - "learning_rate": 3.6692242114237005e-06, - "loss": 0.1568, - "step": 3713 - }, - { - "epoch": 0.633231211870954, - "grad_norm": 0.888971745967865, - "learning_rate": 3.667519181585678e-06, - "loss": 0.1037, - "step": 3714 - }, - { - "epoch": 0.6334017103124917, - "grad_norm": 0.7728138566017151, - "learning_rate": 3.665814151747656e-06, - "loss": 0.0762, - "step": 3715 - }, - { - "epoch": 0.6335722087540293, - "grad_norm": 0.8941513895988464, - "learning_rate": 3.6641091219096335e-06, - "loss": 0.0907, - "step": 3716 - }, - { - "epoch": 0.633742707195567, - "grad_norm": 1.4324272871017456, - "learning_rate": 3.6624040920716114e-06, - "loss": 0.1573, - "step": 3717 - }, - { - "epoch": 0.6339132056371047, - "grad_norm": 1.6339035034179688, - "learning_rate": 3.660699062233589e-06, - "loss": 0.1796, - "step": 3718 - }, - { - "epoch": 0.6340837040786425, - "grad_norm": 1.3584506511688232, - "learning_rate": 3.6589940323955673e-06, - "loss": 0.0951, - "step": 3719 - }, - { - "epoch": 0.63425420252018, - "grad_norm": 1.2877280712127686, - "learning_rate": 3.6572890025575453e-06, - "loss": 0.1783, - "step": 3720 - }, - { - "epoch": 0.6344247009617178, - "grad_norm": 1.242985725402832, - "learning_rate": 3.6555839727195232e-06, - "loss": 0.1605, - "step": 3721 - }, - { - "epoch": 0.6345951994032555, - "grad_norm": 0.9977898597717285, - "learning_rate": 3.6538789428815008e-06, - "loss": 0.0877, - "step": 3722 - }, - { - "epoch": 0.6347656978447931, - "grad_norm": 1.6359201669692993, - "learning_rate": 3.6521739130434787e-06, - "loss": 0.1524, - "step": 3723 - }, - { - "epoch": 0.6349361962863308, - "grad_norm": 1.876238465309143, - "learning_rate": 3.6504688832054562e-06, - "loss": 0.23, - "step": 3724 - }, - { - "epoch": 0.6351066947278685, - "grad_norm": 0.8664019107818604, - "learning_rate": 3.648763853367434e-06, - "loss": 0.0898, - "step": 3725 - }, - { - "epoch": 0.6352771931694062, - "grad_norm": 1.3366501331329346, - "learning_rate": 3.6470588235294117e-06, - "loss": 0.1431, - "step": 3726 - }, - { - "epoch": 0.6354476916109438, - "grad_norm": 1.0310951471328735, - "learning_rate": 3.6453537936913896e-06, - "loss": 0.1006, - "step": 3727 - }, - { - "epoch": 0.6356181900524815, - "grad_norm": 1.419967770576477, - "learning_rate": 3.643648763853368e-06, - "loss": 0.1024, - "step": 3728 - }, - { - "epoch": 0.6357886884940193, - "grad_norm": 1.6248990297317505, - "learning_rate": 3.6419437340153455e-06, - "loss": 0.1434, - "step": 3729 - }, - { - "epoch": 0.6359591869355569, - "grad_norm": 0.9425191879272461, - "learning_rate": 3.6402387041773235e-06, - "loss": 0.1084, - "step": 3730 - }, - { - "epoch": 0.6361296853770946, - "grad_norm": 0.997732400894165, - "learning_rate": 3.6385336743393014e-06, - "loss": 0.1066, - "step": 3731 - }, - { - "epoch": 0.6363001838186323, - "grad_norm": 1.0537697076797485, - "learning_rate": 3.636828644501279e-06, - "loss": 0.095, - "step": 3732 - }, - { - "epoch": 0.63647068226017, - "grad_norm": 1.1263371706008911, - "learning_rate": 3.635123614663257e-06, - "loss": 0.1279, - "step": 3733 - }, - { - "epoch": 0.6366411807017076, - "grad_norm": 1.06753408908844, - "learning_rate": 3.6334185848252344e-06, - "loss": 0.1181, - "step": 3734 - }, - { - "epoch": 0.6368116791432453, - "grad_norm": 1.4207340478897095, - "learning_rate": 3.6317135549872124e-06, - "loss": 0.1478, - "step": 3735 - }, - { - "epoch": 0.636982177584783, - "grad_norm": 0.9397812485694885, - "learning_rate": 3.6300085251491908e-06, - "loss": 0.1224, - "step": 3736 - }, - { - "epoch": 0.6371526760263208, - "grad_norm": 1.3355766534805298, - "learning_rate": 3.6283034953111683e-06, - "loss": 0.1126, - "step": 3737 - }, - { - "epoch": 0.6373231744678584, - "grad_norm": 1.1690781116485596, - "learning_rate": 3.6265984654731462e-06, - "loss": 0.083, - "step": 3738 - }, - { - "epoch": 0.6374936729093961, - "grad_norm": 1.7492491006851196, - "learning_rate": 3.624893435635124e-06, - "loss": 0.0986, - "step": 3739 - }, - { - "epoch": 0.6376641713509338, - "grad_norm": 1.1186761856079102, - "learning_rate": 3.6231884057971017e-06, - "loss": 0.0829, - "step": 3740 - }, - { - "epoch": 0.6378346697924714, - "grad_norm": 1.0030421018600464, - "learning_rate": 3.6214833759590796e-06, - "loss": 0.1317, - "step": 3741 - }, - { - "epoch": 0.6380051682340091, - "grad_norm": 1.014840006828308, - "learning_rate": 3.619778346121057e-06, - "loss": 0.0665, - "step": 3742 - }, - { - "epoch": 0.6381756666755468, - "grad_norm": 0.9337121844291687, - "learning_rate": 3.618073316283035e-06, - "loss": 0.0969, - "step": 3743 - }, - { - "epoch": 0.6383461651170845, - "grad_norm": 1.4615178108215332, - "learning_rate": 3.6163682864450126e-06, - "loss": 0.1539, - "step": 3744 - }, - { - "epoch": 0.6385166635586221, - "grad_norm": 1.2855770587921143, - "learning_rate": 3.614663256606991e-06, - "loss": 0.0709, - "step": 3745 - }, - { - "epoch": 0.6386871620001598, - "grad_norm": 0.9363089203834534, - "learning_rate": 3.612958226768969e-06, - "loss": 0.0952, - "step": 3746 - }, - { - "epoch": 0.6388576604416976, - "grad_norm": 1.3684135675430298, - "learning_rate": 3.6112531969309465e-06, - "loss": 0.1522, - "step": 3747 - }, - { - "epoch": 0.6390281588832352, - "grad_norm": 1.185192346572876, - "learning_rate": 3.6095481670929244e-06, - "loss": 0.1127, - "step": 3748 - }, - { - "epoch": 0.6391986573247729, - "grad_norm": 0.8936169147491455, - "learning_rate": 3.6078431372549024e-06, - "loss": 0.0814, - "step": 3749 - }, - { - "epoch": 0.6393691557663106, - "grad_norm": 1.8236827850341797, - "learning_rate": 3.60613810741688e-06, - "loss": 0.216, - "step": 3750 - }, - { - "epoch": 0.6395396542078483, - "grad_norm": 0.8242852687835693, - "learning_rate": 3.604433077578858e-06, - "loss": 0.0427, - "step": 3751 - }, - { - "epoch": 0.6397101526493859, - "grad_norm": 0.960161566734314, - "learning_rate": 3.6027280477408354e-06, - "loss": 0.1096, - "step": 3752 - }, - { - "epoch": 0.6398806510909236, - "grad_norm": 1.8381279706954956, - "learning_rate": 3.6010230179028137e-06, - "loss": 0.1023, - "step": 3753 - }, - { - "epoch": 0.6400511495324613, - "grad_norm": 1.9192173480987549, - "learning_rate": 3.5993179880647917e-06, - "loss": 0.0642, - "step": 3754 - }, - { - "epoch": 0.6402216479739989, - "grad_norm": 1.2048285007476807, - "learning_rate": 3.5976129582267692e-06, - "loss": 0.1275, - "step": 3755 - }, - { - "epoch": 0.6403921464155367, - "grad_norm": 0.9745681285858154, - "learning_rate": 3.595907928388747e-06, - "loss": 0.0782, - "step": 3756 - }, - { - "epoch": 0.6405626448570744, - "grad_norm": 1.9655649662017822, - "learning_rate": 3.594202898550725e-06, - "loss": 0.1831, - "step": 3757 - }, - { - "epoch": 0.6407331432986121, - "grad_norm": 2.731715440750122, - "learning_rate": 3.5924978687127026e-06, - "loss": 0.2583, - "step": 3758 - }, - { - "epoch": 0.6409036417401497, - "grad_norm": 1.749211072921753, - "learning_rate": 3.5907928388746806e-06, - "loss": 0.1881, - "step": 3759 - }, - { - "epoch": 0.6410741401816874, - "grad_norm": 2.5945215225219727, - "learning_rate": 3.589087809036658e-06, - "loss": 0.2383, - "step": 3760 - }, - { - "epoch": 0.6412446386232251, - "grad_norm": 1.4661320447921753, - "learning_rate": 3.5873827791986365e-06, - "loss": 0.1612, - "step": 3761 - }, - { - "epoch": 0.6414151370647627, - "grad_norm": 1.5145949125289917, - "learning_rate": 3.5856777493606144e-06, - "loss": 0.1546, - "step": 3762 - }, - { - "epoch": 0.6415856355063004, - "grad_norm": 1.1292080879211426, - "learning_rate": 3.583972719522592e-06, - "loss": 0.1296, - "step": 3763 - }, - { - "epoch": 0.6417561339478381, - "grad_norm": 1.090998888015747, - "learning_rate": 3.58226768968457e-06, - "loss": 0.1035, - "step": 3764 - }, - { - "epoch": 0.6419266323893759, - "grad_norm": 2.3366146087646484, - "learning_rate": 3.5805626598465474e-06, - "loss": 0.2259, - "step": 3765 - }, - { - "epoch": 0.6420971308309135, - "grad_norm": 1.2206898927688599, - "learning_rate": 3.5788576300085254e-06, - "loss": 0.1429, - "step": 3766 - }, - { - "epoch": 0.6422676292724512, - "grad_norm": 1.031091332435608, - "learning_rate": 3.5771526001705033e-06, - "loss": 0.1154, - "step": 3767 - }, - { - "epoch": 0.6424381277139889, - "grad_norm": 1.4148261547088623, - "learning_rate": 3.575447570332481e-06, - "loss": 0.214, - "step": 3768 - }, - { - "epoch": 0.6426086261555265, - "grad_norm": 1.4270838499069214, - "learning_rate": 3.573742540494459e-06, - "loss": 0.113, - "step": 3769 - }, - { - "epoch": 0.6427791245970642, - "grad_norm": 1.338118314743042, - "learning_rate": 3.572037510656437e-06, - "loss": 0.0562, - "step": 3770 - }, - { - "epoch": 0.6429496230386019, - "grad_norm": 1.1331686973571777, - "learning_rate": 3.5703324808184147e-06, - "loss": 0.1756, - "step": 3771 - }, - { - "epoch": 0.6431201214801396, - "grad_norm": 1.1654812097549438, - "learning_rate": 3.5686274509803926e-06, - "loss": 0.147, - "step": 3772 - }, - { - "epoch": 0.6432906199216772, - "grad_norm": 1.7656182050704956, - "learning_rate": 3.56692242114237e-06, - "loss": 0.2249, - "step": 3773 - }, - { - "epoch": 0.643461118363215, - "grad_norm": 1.0476933717727661, - "learning_rate": 3.565217391304348e-06, - "loss": 0.1162, - "step": 3774 - }, - { - "epoch": 0.6436316168047527, - "grad_norm": 1.039013385772705, - "learning_rate": 3.5635123614663256e-06, - "loss": 0.1064, - "step": 3775 - }, - { - "epoch": 0.6438021152462903, - "grad_norm": 0.9786112904548645, - "learning_rate": 3.5618073316283036e-06, - "loss": 0.0754, - "step": 3776 - }, - { - "epoch": 0.643972613687828, - "grad_norm": 1.4536173343658447, - "learning_rate": 3.5601023017902815e-06, - "loss": 0.1466, - "step": 3777 - }, - { - "epoch": 0.6441431121293657, - "grad_norm": 0.7579110860824585, - "learning_rate": 3.55839727195226e-06, - "loss": 0.0687, - "step": 3778 - }, - { - "epoch": 0.6443136105709034, - "grad_norm": 0.8089442849159241, - "learning_rate": 3.5566922421142374e-06, - "loss": 0.106, - "step": 3779 - }, - { - "epoch": 0.644484109012441, - "grad_norm": 1.1764006614685059, - "learning_rate": 3.5549872122762154e-06, - "loss": 0.1383, - "step": 3780 - }, - { - "epoch": 0.6446546074539787, - "grad_norm": 1.211492657661438, - "learning_rate": 3.553282182438193e-06, - "loss": 0.145, - "step": 3781 - }, - { - "epoch": 0.6448251058955164, - "grad_norm": 1.0914140939712524, - "learning_rate": 3.551577152600171e-06, - "loss": 0.1325, - "step": 3782 - }, - { - "epoch": 0.644995604337054, - "grad_norm": 1.4939686059951782, - "learning_rate": 3.5498721227621484e-06, - "loss": 0.1502, - "step": 3783 - }, - { - "epoch": 0.6451661027785918, - "grad_norm": 0.7499918341636658, - "learning_rate": 3.5481670929241263e-06, - "loss": 0.0849, - "step": 3784 - }, - { - "epoch": 0.6453366012201295, - "grad_norm": 0.8260846734046936, - "learning_rate": 3.5464620630861043e-06, - "loss": 0.0753, - "step": 3785 - }, - { - "epoch": 0.6455070996616672, - "grad_norm": 1.132487177848816, - "learning_rate": 3.544757033248082e-06, - "loss": 0.141, - "step": 3786 - }, - { - "epoch": 0.6456775981032048, - "grad_norm": 0.7627885937690735, - "learning_rate": 3.54305200341006e-06, - "loss": 0.0792, - "step": 3787 - }, - { - "epoch": 0.6458480965447425, - "grad_norm": 1.0610328912734985, - "learning_rate": 3.541346973572038e-06, - "loss": 0.0832, - "step": 3788 - }, - { - "epoch": 0.6460185949862802, - "grad_norm": 1.3593807220458984, - "learning_rate": 3.5396419437340156e-06, - "loss": 0.1432, - "step": 3789 - }, - { - "epoch": 0.6461890934278179, - "grad_norm": 1.1016868352890015, - "learning_rate": 3.5379369138959936e-06, - "loss": 0.1177, - "step": 3790 - }, - { - "epoch": 0.6463595918693555, - "grad_norm": 0.8754037022590637, - "learning_rate": 3.536231884057971e-06, - "loss": 0.0605, - "step": 3791 - }, - { - "epoch": 0.6465300903108933, - "grad_norm": 1.6358007192611694, - "learning_rate": 3.534526854219949e-06, - "loss": 0.1972, - "step": 3792 - }, - { - "epoch": 0.646700588752431, - "grad_norm": 1.2707304954528809, - "learning_rate": 3.5328218243819266e-06, - "loss": 0.1678, - "step": 3793 - }, - { - "epoch": 0.6468710871939686, - "grad_norm": 1.263748288154602, - "learning_rate": 3.5311167945439045e-06, - "loss": 0.1438, - "step": 3794 - }, - { - "epoch": 0.6470415856355063, - "grad_norm": 1.232202172279358, - "learning_rate": 3.529411764705883e-06, - "loss": 0.1368, - "step": 3795 - }, - { - "epoch": 0.647212084077044, - "grad_norm": 2.188258171081543, - "learning_rate": 3.527706734867861e-06, - "loss": 0.1507, - "step": 3796 - }, - { - "epoch": 0.6473825825185817, - "grad_norm": 1.0352145433425903, - "learning_rate": 3.5260017050298384e-06, - "loss": 0.0969, - "step": 3797 - }, - { - "epoch": 0.6475530809601193, - "grad_norm": 1.2449350357055664, - "learning_rate": 3.5242966751918163e-06, - "loss": 0.1264, - "step": 3798 - }, - { - "epoch": 0.647723579401657, - "grad_norm": 0.8735178709030151, - "learning_rate": 3.522591645353794e-06, - "loss": 0.0992, - "step": 3799 - }, - { - "epoch": 0.6478940778431947, - "grad_norm": 1.3349305391311646, - "learning_rate": 3.5208866155157718e-06, - "loss": 0.0783, - "step": 3800 - }, - { - "epoch": 0.6478940778431947, - "eval_f1_score": 0.36507936507936506, - "eval_loss": 0.1384362429380417, - "eval_runtime": 182.6236, - "eval_samples_per_second": 54.757, - "eval_steps_per_second": 3.422, - "step": 3800 - }, - { - "epoch": 0.6480645762847324, - "grad_norm": 1.3321579694747925, - "learning_rate": 3.5191815856777493e-06, - "loss": 0.1426, - "step": 3801 - }, - { - "epoch": 0.6482350747262701, - "grad_norm": 2.1399898529052734, - "learning_rate": 3.5174765558397273e-06, - "loss": 0.1892, - "step": 3802 - }, - { - "epoch": 0.6484055731678078, - "grad_norm": 1.6246662139892578, - "learning_rate": 3.5157715260017056e-06, - "loss": 0.1607, - "step": 3803 - }, - { - "epoch": 0.6485760716093455, - "grad_norm": 1.098414421081543, - "learning_rate": 3.514066496163683e-06, - "loss": 0.1288, - "step": 3804 - }, - { - "epoch": 0.6487465700508831, - "grad_norm": 1.7618242502212524, - "learning_rate": 3.512361466325661e-06, - "loss": 0.1852, - "step": 3805 - }, - { - "epoch": 0.6489170684924208, - "grad_norm": 2.6852781772613525, - "learning_rate": 3.510656436487639e-06, - "loss": 0.2276, - "step": 3806 - }, - { - "epoch": 0.6490875669339585, - "grad_norm": 0.8381413221359253, - "learning_rate": 3.5089514066496166e-06, - "loss": 0.0753, - "step": 3807 - }, - { - "epoch": 0.6492580653754961, - "grad_norm": 1.3367550373077393, - "learning_rate": 3.5072463768115945e-06, - "loss": 0.1042, - "step": 3808 - }, - { - "epoch": 0.6494285638170338, - "grad_norm": 1.3574849367141724, - "learning_rate": 3.505541346973572e-06, - "loss": 0.1217, - "step": 3809 - }, - { - "epoch": 0.6495990622585716, - "grad_norm": 1.8206461668014526, - "learning_rate": 3.50383631713555e-06, - "loss": 0.2149, - "step": 3810 - }, - { - "epoch": 0.6497695607001093, - "grad_norm": 1.9767436981201172, - "learning_rate": 3.5021312872975284e-06, - "loss": 0.1617, - "step": 3811 - }, - { - "epoch": 0.6499400591416469, - "grad_norm": 1.1481764316558838, - "learning_rate": 3.500426257459506e-06, - "loss": 0.126, - "step": 3812 - }, - { - "epoch": 0.6501105575831846, - "grad_norm": 1.4578038454055786, - "learning_rate": 3.498721227621484e-06, - "loss": 0.17, - "step": 3813 - }, - { - "epoch": 0.6502810560247223, - "grad_norm": 1.791718602180481, - "learning_rate": 3.4970161977834618e-06, - "loss": 0.1549, - "step": 3814 - }, - { - "epoch": 0.6504515544662599, - "grad_norm": 1.4585214853286743, - "learning_rate": 3.4953111679454393e-06, - "loss": 0.1895, - "step": 3815 - }, - { - "epoch": 0.6506220529077976, - "grad_norm": 1.3441143035888672, - "learning_rate": 3.4936061381074173e-06, - "loss": 0.1039, - "step": 3816 - }, - { - "epoch": 0.6507925513493353, - "grad_norm": 1.3681156635284424, - "learning_rate": 3.4919011082693948e-06, - "loss": 0.1486, - "step": 3817 - }, - { - "epoch": 0.650963049790873, - "grad_norm": 1.5697520971298218, - "learning_rate": 3.4901960784313727e-06, - "loss": 0.1344, - "step": 3818 - }, - { - "epoch": 0.6511335482324107, - "grad_norm": 1.6502066850662231, - "learning_rate": 3.4884910485933502e-06, - "loss": 0.2005, - "step": 3819 - }, - { - "epoch": 0.6513040466739484, - "grad_norm": 1.2139993906021118, - "learning_rate": 3.4867860187553286e-06, - "loss": 0.1447, - "step": 3820 - }, - { - "epoch": 0.6514745451154861, - "grad_norm": 1.3323631286621094, - "learning_rate": 3.4850809889173066e-06, - "loss": 0.099, - "step": 3821 - }, - { - "epoch": 0.6516450435570237, - "grad_norm": 2.062469720840454, - "learning_rate": 3.483375959079284e-06, - "loss": 0.2513, - "step": 3822 - }, - { - "epoch": 0.6518155419985614, - "grad_norm": 1.5062077045440674, - "learning_rate": 3.481670929241262e-06, - "loss": 0.1807, - "step": 3823 - }, - { - "epoch": 0.6519860404400991, - "grad_norm": 1.6660444736480713, - "learning_rate": 3.47996589940324e-06, - "loss": 0.1442, - "step": 3824 - }, - { - "epoch": 0.6521565388816368, - "grad_norm": 1.4998832941055298, - "learning_rate": 3.4782608695652175e-06, - "loss": 0.1212, - "step": 3825 - }, - { - "epoch": 0.6523270373231744, - "grad_norm": 0.9821255207061768, - "learning_rate": 3.4765558397271955e-06, - "loss": 0.1042, - "step": 3826 - }, - { - "epoch": 0.6524975357647121, - "grad_norm": 1.3991197347640991, - "learning_rate": 3.474850809889173e-06, - "loss": 0.1926, - "step": 3827 - }, - { - "epoch": 0.6526680342062499, - "grad_norm": 1.1085835695266724, - "learning_rate": 3.4731457800511514e-06, - "loss": 0.1106, - "step": 3828 - }, - { - "epoch": 0.6528385326477875, - "grad_norm": 0.8781197667121887, - "learning_rate": 3.4714407502131293e-06, - "loss": 0.0754, - "step": 3829 - }, - { - "epoch": 0.6530090310893252, - "grad_norm": 1.2629003524780273, - "learning_rate": 3.469735720375107e-06, - "loss": 0.0979, - "step": 3830 - }, - { - "epoch": 0.6531795295308629, - "grad_norm": 1.2773722410202026, - "learning_rate": 3.4680306905370848e-06, - "loss": 0.16, - "step": 3831 - }, - { - "epoch": 0.6533500279724006, - "grad_norm": 1.459194540977478, - "learning_rate": 3.4663256606990627e-06, - "loss": 0.1644, - "step": 3832 - }, - { - "epoch": 0.6535205264139382, - "grad_norm": 1.4942381381988525, - "learning_rate": 3.4646206308610402e-06, - "loss": 0.1566, - "step": 3833 - }, - { - "epoch": 0.6536910248554759, - "grad_norm": 1.6898901462554932, - "learning_rate": 3.462915601023018e-06, - "loss": 0.1428, - "step": 3834 - }, - { - "epoch": 0.6538615232970136, - "grad_norm": 1.0706831216812134, - "learning_rate": 3.4612105711849957e-06, - "loss": 0.0861, - "step": 3835 - }, - { - "epoch": 0.6540320217385514, - "grad_norm": 1.789047360420227, - "learning_rate": 3.4595055413469737e-06, - "loss": 0.138, - "step": 3836 - }, - { - "epoch": 0.654202520180089, - "grad_norm": 1.168027400970459, - "learning_rate": 3.457800511508952e-06, - "loss": 0.1506, - "step": 3837 - }, - { - "epoch": 0.6543730186216267, - "grad_norm": 1.7767421007156372, - "learning_rate": 3.4560954816709296e-06, - "loss": 0.2074, - "step": 3838 - }, - { - "epoch": 0.6545435170631644, - "grad_norm": 0.7228270173072815, - "learning_rate": 3.4543904518329075e-06, - "loss": 0.086, - "step": 3839 - }, - { - "epoch": 0.654714015504702, - "grad_norm": 1.0253210067749023, - "learning_rate": 3.452685421994885e-06, - "loss": 0.0986, - "step": 3840 - }, - { - "epoch": 0.6548845139462397, - "grad_norm": 0.9064176082611084, - "learning_rate": 3.450980392156863e-06, - "loss": 0.063, - "step": 3841 - }, - { - "epoch": 0.6550550123877774, - "grad_norm": 0.9495208859443665, - "learning_rate": 3.449275362318841e-06, - "loss": 0.0893, - "step": 3842 - }, - { - "epoch": 0.6552255108293151, - "grad_norm": 1.4983458518981934, - "learning_rate": 3.4475703324808185e-06, - "loss": 0.1644, - "step": 3843 - }, - { - "epoch": 0.6553960092708527, - "grad_norm": 0.6246259808540344, - "learning_rate": 3.4458653026427964e-06, - "loss": 0.0532, - "step": 3844 - }, - { - "epoch": 0.6555665077123904, - "grad_norm": 1.510249376296997, - "learning_rate": 3.4441602728047748e-06, - "loss": 0.1764, - "step": 3845 - }, - { - "epoch": 0.6557370061539282, - "grad_norm": 0.9134432673454285, - "learning_rate": 3.4424552429667523e-06, - "loss": 0.0811, - "step": 3846 - }, - { - "epoch": 0.6559075045954658, - "grad_norm": 1.1383296251296997, - "learning_rate": 3.4407502131287302e-06, - "loss": 0.0533, - "step": 3847 - }, - { - "epoch": 0.6560780030370035, - "grad_norm": 1.3036679029464722, - "learning_rate": 3.4390451832907078e-06, - "loss": 0.1011, - "step": 3848 - }, - { - "epoch": 0.6562485014785412, - "grad_norm": 1.6370482444763184, - "learning_rate": 3.4373401534526857e-06, - "loss": 0.1688, - "step": 3849 - }, - { - "epoch": 0.6564189999200789, - "grad_norm": 1.7106876373291016, - "learning_rate": 3.4356351236146632e-06, - "loss": 0.1582, - "step": 3850 - }, - { - "epoch": 0.6565894983616165, - "grad_norm": 2.144707441329956, - "learning_rate": 3.433930093776641e-06, - "loss": 0.1703, - "step": 3851 - }, - { - "epoch": 0.6567599968031542, - "grad_norm": 1.745017647743225, - "learning_rate": 3.432225063938619e-06, - "loss": 0.1742, - "step": 3852 - }, - { - "epoch": 0.6569304952446919, - "grad_norm": 1.7782965898513794, - "learning_rate": 3.4305200341005975e-06, - "loss": 0.1941, - "step": 3853 - }, - { - "epoch": 0.6571009936862295, - "grad_norm": 1.200868844985962, - "learning_rate": 3.428815004262575e-06, - "loss": 0.0851, - "step": 3854 - }, - { - "epoch": 0.6572714921277673, - "grad_norm": 1.544946551322937, - "learning_rate": 3.427109974424553e-06, - "loss": 0.1409, - "step": 3855 - }, - { - "epoch": 0.657441990569305, - "grad_norm": 1.847928762435913, - "learning_rate": 3.4254049445865305e-06, - "loss": 0.2057, - "step": 3856 - }, - { - "epoch": 0.6576124890108427, - "grad_norm": 1.2779704332351685, - "learning_rate": 3.4236999147485084e-06, - "loss": 0.1512, - "step": 3857 - }, - { - "epoch": 0.6577829874523803, - "grad_norm": 1.6082793474197388, - "learning_rate": 3.421994884910486e-06, - "loss": 0.1457, - "step": 3858 - }, - { - "epoch": 0.657953485893918, - "grad_norm": 1.6311509609222412, - "learning_rate": 3.420289855072464e-06, - "loss": 0.1427, - "step": 3859 - }, - { - "epoch": 0.6581239843354557, - "grad_norm": 1.1573950052261353, - "learning_rate": 3.418584825234442e-06, - "loss": 0.0945, - "step": 3860 - }, - { - "epoch": 0.6582944827769933, - "grad_norm": 0.9491661190986633, - "learning_rate": 3.4168797953964194e-06, - "loss": 0.0815, - "step": 3861 - }, - { - "epoch": 0.658464981218531, - "grad_norm": 1.9215099811553955, - "learning_rate": 3.4151747655583978e-06, - "loss": 0.2201, - "step": 3862 - }, - { - "epoch": 0.6586354796600687, - "grad_norm": 1.824528455734253, - "learning_rate": 3.4134697357203757e-06, - "loss": 0.1709, - "step": 3863 - }, - { - "epoch": 0.6588059781016065, - "grad_norm": 1.0630437135696411, - "learning_rate": 3.4117647058823532e-06, - "loss": 0.131, - "step": 3864 - }, - { - "epoch": 0.6589764765431441, - "grad_norm": 1.065984845161438, - "learning_rate": 3.410059676044331e-06, - "loss": 0.1075, - "step": 3865 - }, - { - "epoch": 0.6591469749846818, - "grad_norm": 1.3609118461608887, - "learning_rate": 3.4083546462063087e-06, - "loss": 0.0909, - "step": 3866 - }, - { - "epoch": 0.6593174734262195, - "grad_norm": 1.439451813697815, - "learning_rate": 3.4066496163682867e-06, - "loss": 0.1493, - "step": 3867 - }, - { - "epoch": 0.6594879718677571, - "grad_norm": 1.1943893432617188, - "learning_rate": 3.404944586530264e-06, - "loss": 0.0954, - "step": 3868 - }, - { - "epoch": 0.6596584703092948, - "grad_norm": 1.0462403297424316, - "learning_rate": 3.403239556692242e-06, - "loss": 0.1184, - "step": 3869 - }, - { - "epoch": 0.6598289687508325, - "grad_norm": 1.055091142654419, - "learning_rate": 3.4015345268542205e-06, - "loss": 0.1287, - "step": 3870 - }, - { - "epoch": 0.6599994671923702, - "grad_norm": 1.0880324840545654, - "learning_rate": 3.3998294970161984e-06, - "loss": 0.0777, - "step": 3871 - }, - { - "epoch": 0.6601699656339078, - "grad_norm": 2.1348769664764404, - "learning_rate": 3.398124467178176e-06, - "loss": 0.2215, - "step": 3872 - }, - { - "epoch": 0.6603404640754456, - "grad_norm": 0.8190430402755737, - "learning_rate": 3.396419437340154e-06, - "loss": 0.1036, - "step": 3873 - }, - { - "epoch": 0.6605109625169833, - "grad_norm": 1.4419548511505127, - "learning_rate": 3.3947144075021314e-06, - "loss": 0.108, - "step": 3874 - }, - { - "epoch": 0.6606814609585209, - "grad_norm": 1.437876582145691, - "learning_rate": 3.3930093776641094e-06, - "loss": 0.1422, - "step": 3875 - }, - { - "epoch": 0.6608519594000586, - "grad_norm": 1.0468285083770752, - "learning_rate": 3.391304347826087e-06, - "loss": 0.0848, - "step": 3876 - }, - { - "epoch": 0.6610224578415963, - "grad_norm": 0.9778571724891663, - "learning_rate": 3.389599317988065e-06, - "loss": 0.1, - "step": 3877 - }, - { - "epoch": 0.661192956283134, - "grad_norm": 1.0217633247375488, - "learning_rate": 3.3878942881500432e-06, - "loss": 0.1275, - "step": 3878 - }, - { - "epoch": 0.6613634547246716, - "grad_norm": 1.4020358324050903, - "learning_rate": 3.3861892583120208e-06, - "loss": 0.1543, - "step": 3879 - }, - { - "epoch": 0.6615339531662093, - "grad_norm": 1.001997709274292, - "learning_rate": 3.3844842284739987e-06, - "loss": 0.0907, - "step": 3880 - }, - { - "epoch": 0.661704451607747, - "grad_norm": 0.9944140315055847, - "learning_rate": 3.3827791986359767e-06, - "loss": 0.0432, - "step": 3881 - }, - { - "epoch": 0.6618749500492848, - "grad_norm": 0.9731889963150024, - "learning_rate": 3.381074168797954e-06, - "loss": 0.103, - "step": 3882 - }, - { - "epoch": 0.6620454484908224, - "grad_norm": 0.8612279891967773, - "learning_rate": 3.379369138959932e-06, - "loss": 0.0693, - "step": 3883 - }, - { - "epoch": 0.6622159469323601, - "grad_norm": 1.4289515018463135, - "learning_rate": 3.3776641091219096e-06, - "loss": 0.1183, - "step": 3884 - }, - { - "epoch": 0.6623864453738978, - "grad_norm": 1.729993224143982, - "learning_rate": 3.3759590792838876e-06, - "loss": 0.1182, - "step": 3885 - }, - { - "epoch": 0.6625569438154354, - "grad_norm": 0.764060378074646, - "learning_rate": 3.374254049445865e-06, - "loss": 0.0518, - "step": 3886 - }, - { - "epoch": 0.6627274422569731, - "grad_norm": 1.8344427347183228, - "learning_rate": 3.3725490196078435e-06, - "loss": 0.1916, - "step": 3887 - }, - { - "epoch": 0.6628979406985108, - "grad_norm": 1.5775954723358154, - "learning_rate": 3.3708439897698214e-06, - "loss": 0.1746, - "step": 3888 - }, - { - "epoch": 0.6630684391400485, - "grad_norm": 1.4308315515518188, - "learning_rate": 3.3691389599317994e-06, - "loss": 0.1282, - "step": 3889 - }, - { - "epoch": 0.6632389375815861, - "grad_norm": 0.7640116810798645, - "learning_rate": 3.367433930093777e-06, - "loss": 0.0536, - "step": 3890 - }, - { - "epoch": 0.6634094360231239, - "grad_norm": 0.9877718687057495, - "learning_rate": 3.365728900255755e-06, - "loss": 0.0749, - "step": 3891 - }, - { - "epoch": 0.6635799344646616, - "grad_norm": 0.7810732126235962, - "learning_rate": 3.3640238704177324e-06, - "loss": 0.0778, - "step": 3892 - }, - { - "epoch": 0.6637504329061992, - "grad_norm": 1.4315524101257324, - "learning_rate": 3.3623188405797103e-06, - "loss": 0.1389, - "step": 3893 - }, - { - "epoch": 0.6639209313477369, - "grad_norm": 0.85233074426651, - "learning_rate": 3.360613810741688e-06, - "loss": 0.0658, - "step": 3894 - }, - { - "epoch": 0.6640914297892746, - "grad_norm": 1.3665393590927124, - "learning_rate": 3.3589087809036662e-06, - "loss": 0.1203, - "step": 3895 - }, - { - "epoch": 0.6642619282308123, - "grad_norm": 1.551367998123169, - "learning_rate": 3.357203751065644e-06, - "loss": 0.1798, - "step": 3896 - }, - { - "epoch": 0.6644324266723499, - "grad_norm": 1.6179540157318115, - "learning_rate": 3.3554987212276217e-06, - "loss": 0.1041, - "step": 3897 - }, - { - "epoch": 0.6646029251138876, - "grad_norm": 1.2029205560684204, - "learning_rate": 3.3537936913895996e-06, - "loss": 0.1263, - "step": 3898 - }, - { - "epoch": 0.6647734235554253, - "grad_norm": 1.257601022720337, - "learning_rate": 3.3520886615515776e-06, - "loss": 0.146, - "step": 3899 - }, - { - "epoch": 0.664943921996963, - "grad_norm": 1.4511607885360718, - "learning_rate": 3.350383631713555e-06, - "loss": 0.244, - "step": 3900 - }, - { - "epoch": 0.664943921996963, - "eval_f1_score": 0.40294840294840295, - "eval_loss": 0.13890038430690765, - "eval_runtime": 182.6853, - "eval_samples_per_second": 54.739, - "eval_steps_per_second": 3.421, - "step": 3900 - }, - { - "epoch": 0.6651144204385007, - "grad_norm": 1.7217893600463867, - "learning_rate": 3.348678601875533e-06, - "loss": 0.2274, - "step": 3901 - }, - { - "epoch": 0.6652849188800384, - "grad_norm": 1.937384843826294, - "learning_rate": 3.3469735720375106e-06, - "loss": 0.2111, - "step": 3902 - }, - { - "epoch": 0.6654554173215761, - "grad_norm": 1.0646719932556152, - "learning_rate": 3.3452685421994885e-06, - "loss": 0.1148, - "step": 3903 - }, - { - "epoch": 0.6656259157631137, - "grad_norm": 1.8812726736068726, - "learning_rate": 3.343563512361467e-06, - "loss": 0.1844, - "step": 3904 - }, - { - "epoch": 0.6657964142046514, - "grad_norm": 1.9182530641555786, - "learning_rate": 3.3418584825234444e-06, - "loss": 0.1732, - "step": 3905 - }, - { - "epoch": 0.6659669126461891, - "grad_norm": 1.1144908666610718, - "learning_rate": 3.3401534526854224e-06, - "loss": 0.106, - "step": 3906 - }, - { - "epoch": 0.6661374110877267, - "grad_norm": 1.099799633026123, - "learning_rate": 3.3384484228474003e-06, - "loss": 0.1331, - "step": 3907 - }, - { - "epoch": 0.6663079095292644, - "grad_norm": 1.1822335720062256, - "learning_rate": 3.336743393009378e-06, - "loss": 0.1259, - "step": 3908 - }, - { - "epoch": 0.6664784079708022, - "grad_norm": 1.166798710823059, - "learning_rate": 3.335038363171356e-06, - "loss": 0.101, - "step": 3909 - }, - { - "epoch": 0.6666489064123399, - "grad_norm": 1.1110976934432983, - "learning_rate": 3.3333333333333333e-06, - "loss": 0.0847, - "step": 3910 - }, - { - "epoch": 0.6668194048538775, - "grad_norm": 1.2571207284927368, - "learning_rate": 3.3316283034953113e-06, - "loss": 0.1258, - "step": 3911 - }, - { - "epoch": 0.6669899032954152, - "grad_norm": 0.9218897223472595, - "learning_rate": 3.3299232736572896e-06, - "loss": 0.1004, - "step": 3912 - }, - { - "epoch": 0.6671604017369529, - "grad_norm": 3.1962733268737793, - "learning_rate": 3.328218243819267e-06, - "loss": 0.1432, - "step": 3913 - }, - { - "epoch": 0.6673309001784905, - "grad_norm": 0.97951740026474, - "learning_rate": 3.326513213981245e-06, - "loss": 0.0763, - "step": 3914 - }, - { - "epoch": 0.6675013986200282, - "grad_norm": 1.1191253662109375, - "learning_rate": 3.3248081841432226e-06, - "loss": 0.1483, - "step": 3915 - }, - { - "epoch": 0.6676718970615659, - "grad_norm": 1.0806386470794678, - "learning_rate": 3.3231031543052006e-06, - "loss": 0.0958, - "step": 3916 - }, - { - "epoch": 0.6678423955031036, - "grad_norm": 2.092670440673828, - "learning_rate": 3.3213981244671785e-06, - "loss": 0.1947, - "step": 3917 - }, - { - "epoch": 0.6680128939446413, - "grad_norm": 1.6471540927886963, - "learning_rate": 3.319693094629156e-06, - "loss": 0.1681, - "step": 3918 - }, - { - "epoch": 0.668183392386179, - "grad_norm": 1.5735267400741577, - "learning_rate": 3.317988064791134e-06, - "loss": 0.1907, - "step": 3919 - }, - { - "epoch": 0.6683538908277167, - "grad_norm": 1.0324760675430298, - "learning_rate": 3.3162830349531124e-06, - "loss": 0.1219, - "step": 3920 - }, - { - "epoch": 0.6685243892692543, - "grad_norm": 1.28639817237854, - "learning_rate": 3.31457800511509e-06, - "loss": 0.1547, - "step": 3921 - }, - { - "epoch": 0.668694887710792, - "grad_norm": 0.7513296604156494, - "learning_rate": 3.312872975277068e-06, - "loss": 0.0756, - "step": 3922 - }, - { - "epoch": 0.6688653861523297, - "grad_norm": 0.8335238695144653, - "learning_rate": 3.3111679454390454e-06, - "loss": 0.1004, - "step": 3923 - }, - { - "epoch": 0.6690358845938674, - "grad_norm": 1.2294464111328125, - "learning_rate": 3.3094629156010233e-06, - "loss": 0.1355, - "step": 3924 - }, - { - "epoch": 0.669206383035405, - "grad_norm": 1.066673755645752, - "learning_rate": 3.307757885763001e-06, - "loss": 0.0938, - "step": 3925 - }, - { - "epoch": 0.6693768814769427, - "grad_norm": 1.3101329803466797, - "learning_rate": 3.306052855924979e-06, - "loss": 0.1462, - "step": 3926 - }, - { - "epoch": 0.6695473799184805, - "grad_norm": 1.177607536315918, - "learning_rate": 3.3043478260869567e-06, - "loss": 0.1007, - "step": 3927 - }, - { - "epoch": 0.6697178783600182, - "grad_norm": 1.1281089782714844, - "learning_rate": 3.3026427962489343e-06, - "loss": 0.1019, - "step": 3928 - }, - { - "epoch": 0.6698883768015558, - "grad_norm": 1.3807610273361206, - "learning_rate": 3.3009377664109126e-06, - "loss": 0.1984, - "step": 3929 - }, - { - "epoch": 0.6700588752430935, - "grad_norm": 2.5573902130126953, - "learning_rate": 3.2992327365728906e-06, - "loss": 0.2041, - "step": 3930 - }, - { - "epoch": 0.6702293736846312, - "grad_norm": 1.2900224924087524, - "learning_rate": 3.297527706734868e-06, - "loss": 0.1484, - "step": 3931 - }, - { - "epoch": 0.6703998721261688, - "grad_norm": 1.0510343313217163, - "learning_rate": 3.295822676896846e-06, - "loss": 0.1248, - "step": 3932 - }, - { - "epoch": 0.6705703705677065, - "grad_norm": 1.1407992839813232, - "learning_rate": 3.2941176470588236e-06, - "loss": 0.1056, - "step": 3933 - }, - { - "epoch": 0.6707408690092442, - "grad_norm": 1.8074103593826294, - "learning_rate": 3.2924126172208015e-06, - "loss": 0.1311, - "step": 3934 - }, - { - "epoch": 0.670911367450782, - "grad_norm": 1.2502901554107666, - "learning_rate": 3.2907075873827795e-06, - "loss": 0.1774, - "step": 3935 - }, - { - "epoch": 0.6710818658923196, - "grad_norm": 1.6198917627334595, - "learning_rate": 3.289002557544757e-06, - "loss": 0.1086, - "step": 3936 - }, - { - "epoch": 0.6712523643338573, - "grad_norm": 1.1426345109939575, - "learning_rate": 3.2872975277067354e-06, - "loss": 0.1018, - "step": 3937 - }, - { - "epoch": 0.671422862775395, - "grad_norm": 1.3190062046051025, - "learning_rate": 3.2855924978687133e-06, - "loss": 0.1381, - "step": 3938 - }, - { - "epoch": 0.6715933612169326, - "grad_norm": 1.0100187063217163, - "learning_rate": 3.283887468030691e-06, - "loss": 0.0888, - "step": 3939 - }, - { - "epoch": 0.6717638596584703, - "grad_norm": 1.5038955211639404, - "learning_rate": 3.282182438192669e-06, - "loss": 0.1789, - "step": 3940 - }, - { - "epoch": 0.671934358100008, - "grad_norm": 1.029555320739746, - "learning_rate": 3.2804774083546463e-06, - "loss": 0.0934, - "step": 3941 - }, - { - "epoch": 0.6721048565415457, - "grad_norm": 2.1041479110717773, - "learning_rate": 3.2787723785166243e-06, - "loss": 0.1591, - "step": 3942 - }, - { - "epoch": 0.6722753549830833, - "grad_norm": 0.9763666987419128, - "learning_rate": 3.2770673486786018e-06, - "loss": 0.059, - "step": 3943 - }, - { - "epoch": 0.672445853424621, - "grad_norm": 1.5250053405761719, - "learning_rate": 3.2753623188405797e-06, - "loss": 0.1455, - "step": 3944 - }, - { - "epoch": 0.6726163518661588, - "grad_norm": 1.7633816003799438, - "learning_rate": 3.273657289002558e-06, - "loss": 0.2283, - "step": 3945 - }, - { - "epoch": 0.6727868503076964, - "grad_norm": 1.6598435640335083, - "learning_rate": 3.271952259164536e-06, - "loss": 0.1581, - "step": 3946 - }, - { - "epoch": 0.6729573487492341, - "grad_norm": 1.2925745248794556, - "learning_rate": 3.2702472293265136e-06, - "loss": 0.1303, - "step": 3947 - }, - { - "epoch": 0.6731278471907718, - "grad_norm": 1.2952626943588257, - "learning_rate": 3.2685421994884915e-06, - "loss": 0.1938, - "step": 3948 - }, - { - "epoch": 0.6732983456323095, - "grad_norm": 1.3310290575027466, - "learning_rate": 3.266837169650469e-06, - "loss": 0.0832, - "step": 3949 - }, - { - "epoch": 0.6734688440738471, - "grad_norm": 1.3135647773742676, - "learning_rate": 3.265132139812447e-06, - "loss": 0.1581, - "step": 3950 - }, - { - "epoch": 0.6736393425153848, - "grad_norm": 1.2514424324035645, - "learning_rate": 3.2634271099744245e-06, - "loss": 0.1717, - "step": 3951 - }, - { - "epoch": 0.6738098409569225, - "grad_norm": 2.424001693725586, - "learning_rate": 3.2617220801364025e-06, - "loss": 0.2024, - "step": 3952 - }, - { - "epoch": 0.6739803393984601, - "grad_norm": 1.5078508853912354, - "learning_rate": 3.2600170502983804e-06, - "loss": 0.2054, - "step": 3953 - }, - { - "epoch": 0.6741508378399979, - "grad_norm": 1.4088560342788696, - "learning_rate": 3.2583120204603584e-06, - "loss": 0.1318, - "step": 3954 - }, - { - "epoch": 0.6743213362815356, - "grad_norm": 1.03024423122406, - "learning_rate": 3.2566069906223363e-06, - "loss": 0.123, - "step": 3955 - }, - { - "epoch": 0.6744918347230733, - "grad_norm": 1.146548867225647, - "learning_rate": 3.2549019607843143e-06, - "loss": 0.1411, - "step": 3956 - }, - { - "epoch": 0.6746623331646109, - "grad_norm": 1.3883435726165771, - "learning_rate": 3.2531969309462918e-06, - "loss": 0.1419, - "step": 3957 - }, - { - "epoch": 0.6748328316061486, - "grad_norm": 1.400600552558899, - "learning_rate": 3.2514919011082697e-06, - "loss": 0.1479, - "step": 3958 - }, - { - "epoch": 0.6750033300476863, - "grad_norm": 0.9958585500717163, - "learning_rate": 3.2497868712702473e-06, - "loss": 0.0901, - "step": 3959 - }, - { - "epoch": 0.6751738284892239, - "grad_norm": 1.0494383573532104, - "learning_rate": 3.248081841432225e-06, - "loss": 0.0813, - "step": 3960 - }, - { - "epoch": 0.6753443269307616, - "grad_norm": 2.5229740142822266, - "learning_rate": 3.2463768115942027e-06, - "loss": 0.1091, - "step": 3961 - }, - { - "epoch": 0.6755148253722993, - "grad_norm": 1.1590737104415894, - "learning_rate": 3.244671781756181e-06, - "loss": 0.074, - "step": 3962 - }, - { - "epoch": 0.6756853238138371, - "grad_norm": 1.0404776334762573, - "learning_rate": 3.242966751918159e-06, - "loss": 0.0466, - "step": 3963 - }, - { - "epoch": 0.6758558222553747, - "grad_norm": 0.9535297751426697, - "learning_rate": 3.241261722080137e-06, - "loss": 0.1187, - "step": 3964 - }, - { - "epoch": 0.6760263206969124, - "grad_norm": 1.555312991142273, - "learning_rate": 3.2395566922421145e-06, - "loss": 0.0863, - "step": 3965 - }, - { - "epoch": 0.6761968191384501, - "grad_norm": 1.0932700634002686, - "learning_rate": 3.2378516624040925e-06, - "loss": 0.1172, - "step": 3966 - }, - { - "epoch": 0.6763673175799877, - "grad_norm": 1.41453218460083, - "learning_rate": 3.23614663256607e-06, - "loss": 0.1164, - "step": 3967 - }, - { - "epoch": 0.6765378160215254, - "grad_norm": 1.6799843311309814, - "learning_rate": 3.234441602728048e-06, - "loss": 0.1375, - "step": 3968 - }, - { - "epoch": 0.6767083144630631, - "grad_norm": 1.497155785560608, - "learning_rate": 3.2327365728900255e-06, - "loss": 0.1625, - "step": 3969 - }, - { - "epoch": 0.6768788129046008, - "grad_norm": 1.5166429281234741, - "learning_rate": 3.231031543052004e-06, - "loss": 0.1459, - "step": 3970 - }, - { - "epoch": 0.6770493113461384, - "grad_norm": 1.0572724342346191, - "learning_rate": 3.2293265132139818e-06, - "loss": 0.0913, - "step": 3971 - }, - { - "epoch": 0.6772198097876762, - "grad_norm": 1.0608195066452026, - "learning_rate": 3.2276214833759593e-06, - "loss": 0.102, - "step": 3972 - }, - { - "epoch": 0.6773903082292139, - "grad_norm": 1.6367521286010742, - "learning_rate": 3.2259164535379373e-06, - "loss": 0.151, - "step": 3973 - }, - { - "epoch": 0.6775608066707515, - "grad_norm": 1.0436595678329468, - "learning_rate": 3.224211423699915e-06, - "loss": 0.0479, - "step": 3974 - }, - { - "epoch": 0.6777313051122892, - "grad_norm": 0.6527305245399475, - "learning_rate": 3.2225063938618927e-06, - "loss": 0.0419, - "step": 3975 - }, - { - "epoch": 0.6779018035538269, - "grad_norm": 1.0341905355453491, - "learning_rate": 3.2208013640238707e-06, - "loss": 0.1052, - "step": 3976 - }, - { - "epoch": 0.6780723019953646, - "grad_norm": 1.846094012260437, - "learning_rate": 3.219096334185848e-06, - "loss": 0.1429, - "step": 3977 - }, - { - "epoch": 0.6782428004369022, - "grad_norm": 1.3365362882614136, - "learning_rate": 3.217391304347826e-06, - "loss": 0.176, - "step": 3978 - }, - { - "epoch": 0.6784132988784399, - "grad_norm": 1.9188679456710815, - "learning_rate": 3.2156862745098045e-06, - "loss": 0.1989, - "step": 3979 - }, - { - "epoch": 0.6785837973199776, - "grad_norm": 0.7098460793495178, - "learning_rate": 3.213981244671782e-06, - "loss": 0.0572, - "step": 3980 - }, - { - "epoch": 0.6787542957615154, - "grad_norm": 1.2970669269561768, - "learning_rate": 3.21227621483376e-06, - "loss": 0.0805, - "step": 3981 - }, - { - "epoch": 0.678924794203053, - "grad_norm": 0.8300220966339111, - "learning_rate": 3.2105711849957375e-06, - "loss": 0.0349, - "step": 3982 - }, - { - "epoch": 0.6790952926445907, - "grad_norm": 1.6387343406677246, - "learning_rate": 3.2088661551577155e-06, - "loss": 0.1912, - "step": 3983 - }, - { - "epoch": 0.6792657910861284, - "grad_norm": 1.5236952304840088, - "learning_rate": 3.2071611253196934e-06, - "loss": 0.1046, - "step": 3984 - }, - { - "epoch": 0.679436289527666, - "grad_norm": 1.3057942390441895, - "learning_rate": 3.205456095481671e-06, - "loss": 0.1108, - "step": 3985 - }, - { - "epoch": 0.6796067879692037, - "grad_norm": 1.1730042695999146, - "learning_rate": 3.203751065643649e-06, - "loss": 0.0957, - "step": 3986 - }, - { - "epoch": 0.6797772864107414, - "grad_norm": 1.2910196781158447, - "learning_rate": 3.2020460358056272e-06, - "loss": 0.0905, - "step": 3987 - }, - { - "epoch": 0.6799477848522791, - "grad_norm": 1.2115230560302734, - "learning_rate": 3.2003410059676048e-06, - "loss": 0.1406, - "step": 3988 - }, - { - "epoch": 0.6801182832938167, - "grad_norm": 1.7413958311080933, - "learning_rate": 3.1986359761295827e-06, - "loss": 0.1538, - "step": 3989 - }, - { - "epoch": 0.6802887817353545, - "grad_norm": 1.3703621625900269, - "learning_rate": 3.1969309462915602e-06, - "loss": 0.1362, - "step": 3990 - }, - { - "epoch": 0.6804592801768922, - "grad_norm": 1.4095655679702759, - "learning_rate": 3.195225916453538e-06, - "loss": 0.1855, - "step": 3991 - }, - { - "epoch": 0.6806297786184298, - "grad_norm": 1.5323487520217896, - "learning_rate": 3.193520886615516e-06, - "loss": 0.1647, - "step": 3992 - }, - { - "epoch": 0.6808002770599675, - "grad_norm": 1.5383172035217285, - "learning_rate": 3.1918158567774937e-06, - "loss": 0.1779, - "step": 3993 - }, - { - "epoch": 0.6809707755015052, - "grad_norm": 0.8922589421272278, - "learning_rate": 3.1901108269394716e-06, - "loss": 0.0839, - "step": 3994 - }, - { - "epoch": 0.6811412739430429, - "grad_norm": 1.0920547246932983, - "learning_rate": 3.188405797101449e-06, - "loss": 0.0963, - "step": 3995 - }, - { - "epoch": 0.6813117723845805, - "grad_norm": 1.4073317050933838, - "learning_rate": 3.1867007672634275e-06, - "loss": 0.1311, - "step": 3996 - }, - { - "epoch": 0.6814822708261182, - "grad_norm": 1.1026973724365234, - "learning_rate": 3.1849957374254055e-06, - "loss": 0.0933, - "step": 3997 - }, - { - "epoch": 0.681652769267656, - "grad_norm": 0.6690061688423157, - "learning_rate": 3.183290707587383e-06, - "loss": 0.0364, - "step": 3998 - }, - { - "epoch": 0.6818232677091935, - "grad_norm": 0.898353099822998, - "learning_rate": 3.181585677749361e-06, - "loss": 0.0633, - "step": 3999 - }, - { - "epoch": 0.6819937661507313, - "grad_norm": 1.5099992752075195, - "learning_rate": 3.1798806479113385e-06, - "loss": 0.1217, - "step": 4000 - }, - { - "epoch": 0.6819937661507313, - "eval_f1_score": 0.40487804878048783, - "eval_loss": 0.13649117946624756, - "eval_runtime": 182.8572, - "eval_samples_per_second": 54.687, - "eval_steps_per_second": 3.418, - "step": 4000 - }, - { - "epoch": 0.682164264592269, - "grad_norm": 1.6470274925231934, - "learning_rate": 3.1781756180733164e-06, - "loss": 0.1265, - "step": 4001 - }, - { - "epoch": 0.6823347630338067, - "grad_norm": 1.2008510828018188, - "learning_rate": 3.1764705882352943e-06, - "loss": 0.1013, - "step": 4002 - }, - { - "epoch": 0.6825052614753443, - "grad_norm": 1.8212544918060303, - "learning_rate": 3.174765558397272e-06, - "loss": 0.0905, - "step": 4003 - }, - { - "epoch": 0.682675759916882, - "grad_norm": 1.2132388353347778, - "learning_rate": 3.1730605285592502e-06, - "loss": 0.1148, - "step": 4004 - }, - { - "epoch": 0.6828462583584197, - "grad_norm": 1.000791311264038, - "learning_rate": 3.171355498721228e-06, - "loss": 0.0653, - "step": 4005 - }, - { - "epoch": 0.6830167567999573, - "grad_norm": 0.9185832738876343, - "learning_rate": 3.1696504688832057e-06, - "loss": 0.0586, - "step": 4006 - }, - { - "epoch": 0.683187255241495, - "grad_norm": 0.9736605882644653, - "learning_rate": 3.1679454390451837e-06, - "loss": 0.0481, - "step": 4007 - }, - { - "epoch": 0.6833577536830328, - "grad_norm": 1.5643205642700195, - "learning_rate": 3.166240409207161e-06, - "loss": 0.1509, - "step": 4008 - }, - { - "epoch": 0.6835282521245705, - "grad_norm": 1.8469020128250122, - "learning_rate": 3.164535379369139e-06, - "loss": 0.1601, - "step": 4009 - }, - { - "epoch": 0.6836987505661081, - "grad_norm": 2.011707067489624, - "learning_rate": 3.162830349531117e-06, - "loss": 0.1713, - "step": 4010 - }, - { - "epoch": 0.6838692490076458, - "grad_norm": 1.1956074237823486, - "learning_rate": 3.1611253196930946e-06, - "loss": 0.118, - "step": 4011 - }, - { - "epoch": 0.6840397474491835, - "grad_norm": 0.9073688387870789, - "learning_rate": 3.159420289855073e-06, - "loss": 0.0697, - "step": 4012 - }, - { - "epoch": 0.6842102458907211, - "grad_norm": 1.6583490371704102, - "learning_rate": 3.157715260017051e-06, - "loss": 0.1523, - "step": 4013 - }, - { - "epoch": 0.6843807443322588, - "grad_norm": 0.9067796468734741, - "learning_rate": 3.1560102301790284e-06, - "loss": 0.0845, - "step": 4014 - }, - { - "epoch": 0.6845512427737965, - "grad_norm": 1.350319743156433, - "learning_rate": 3.1543052003410064e-06, - "loss": 0.1404, - "step": 4015 - }, - { - "epoch": 0.6847217412153342, - "grad_norm": 0.9945334792137146, - "learning_rate": 3.152600170502984e-06, - "loss": 0.0774, - "step": 4016 - }, - { - "epoch": 0.6848922396568718, - "grad_norm": 1.5443482398986816, - "learning_rate": 3.150895140664962e-06, - "loss": 0.1621, - "step": 4017 - }, - { - "epoch": 0.6850627380984096, - "grad_norm": 1.2873508930206299, - "learning_rate": 3.1491901108269394e-06, - "loss": 0.093, - "step": 4018 - }, - { - "epoch": 0.6852332365399473, - "grad_norm": 1.3037817478179932, - "learning_rate": 3.1474850809889173e-06, - "loss": 0.1174, - "step": 4019 - }, - { - "epoch": 0.6854037349814849, - "grad_norm": 1.2276246547698975, - "learning_rate": 3.1457800511508953e-06, - "loss": 0.0997, - "step": 4020 - }, - { - "epoch": 0.6855742334230226, - "grad_norm": 0.9074063897132874, - "learning_rate": 3.1440750213128737e-06, - "loss": 0.0857, - "step": 4021 - }, - { - "epoch": 0.6857447318645603, - "grad_norm": 2.060265302658081, - "learning_rate": 3.142369991474851e-06, - "loss": 0.1786, - "step": 4022 - }, - { - "epoch": 0.685915230306098, - "grad_norm": 1.4568548202514648, - "learning_rate": 3.140664961636829e-06, - "loss": 0.1515, - "step": 4023 - }, - { - "epoch": 0.6860857287476356, - "grad_norm": 0.9228938817977905, - "learning_rate": 3.1389599317988067e-06, - "loss": 0.0982, - "step": 4024 - }, - { - "epoch": 0.6862562271891733, - "grad_norm": 1.6610695123672485, - "learning_rate": 3.1372549019607846e-06, - "loss": 0.1442, - "step": 4025 - }, - { - "epoch": 0.686426725630711, - "grad_norm": 2.3436145782470703, - "learning_rate": 3.135549872122762e-06, - "loss": 0.1543, - "step": 4026 - }, - { - "epoch": 0.6865972240722488, - "grad_norm": 1.4873566627502441, - "learning_rate": 3.13384484228474e-06, - "loss": 0.1753, - "step": 4027 - }, - { - "epoch": 0.6867677225137864, - "grad_norm": 1.3169639110565186, - "learning_rate": 3.132139812446718e-06, - "loss": 0.0885, - "step": 4028 - }, - { - "epoch": 0.6869382209553241, - "grad_norm": 1.3342516422271729, - "learning_rate": 3.130434782608696e-06, - "loss": 0.1357, - "step": 4029 - }, - { - "epoch": 0.6871087193968618, - "grad_norm": 1.2794122695922852, - "learning_rate": 3.128729752770674e-06, - "loss": 0.1444, - "step": 4030 - }, - { - "epoch": 0.6872792178383994, - "grad_norm": 1.5904749631881714, - "learning_rate": 3.127024722932652e-06, - "loss": 0.133, - "step": 4031 - }, - { - "epoch": 0.6874497162799371, - "grad_norm": 1.5989582538604736, - "learning_rate": 3.1253196930946294e-06, - "loss": 0.1855, - "step": 4032 - }, - { - "epoch": 0.6876202147214748, - "grad_norm": 1.0583912134170532, - "learning_rate": 3.1236146632566073e-06, - "loss": 0.093, - "step": 4033 - }, - { - "epoch": 0.6877907131630125, - "grad_norm": 0.9540891647338867, - "learning_rate": 3.121909633418585e-06, - "loss": 0.0612, - "step": 4034 - }, - { - "epoch": 0.6879612116045501, - "grad_norm": 1.2029814720153809, - "learning_rate": 3.120204603580563e-06, - "loss": 0.1068, - "step": 4035 - }, - { - "epoch": 0.6881317100460879, - "grad_norm": 1.2139424085617065, - "learning_rate": 3.1184995737425403e-06, - "loss": 0.1181, - "step": 4036 - }, - { - "epoch": 0.6883022084876256, - "grad_norm": 1.311047077178955, - "learning_rate": 3.1167945439045187e-06, - "loss": 0.1427, - "step": 4037 - }, - { - "epoch": 0.6884727069291632, - "grad_norm": 1.864897608757019, - "learning_rate": 3.1150895140664967e-06, - "loss": 0.1878, - "step": 4038 - }, - { - "epoch": 0.6886432053707009, - "grad_norm": 1.1222472190856934, - "learning_rate": 3.1133844842284746e-06, - "loss": 0.0889, - "step": 4039 - }, - { - "epoch": 0.6888137038122386, - "grad_norm": 1.16517174243927, - "learning_rate": 3.111679454390452e-06, - "loss": 0.0943, - "step": 4040 - }, - { - "epoch": 0.6889842022537763, - "grad_norm": 0.8637740015983582, - "learning_rate": 3.10997442455243e-06, - "loss": 0.083, - "step": 4041 - }, - { - "epoch": 0.6891547006953139, - "grad_norm": 1.0583702325820923, - "learning_rate": 3.1082693947144076e-06, - "loss": 0.0842, - "step": 4042 - }, - { - "epoch": 0.6893251991368516, - "grad_norm": 2.0392816066741943, - "learning_rate": 3.1065643648763855e-06, - "loss": 0.1701, - "step": 4043 - }, - { - "epoch": 0.6894956975783894, - "grad_norm": 0.7414406538009644, - "learning_rate": 3.104859335038363e-06, - "loss": 0.0429, - "step": 4044 - }, - { - "epoch": 0.689666196019927, - "grad_norm": 1.78953218460083, - "learning_rate": 3.103154305200341e-06, - "loss": 0.1752, - "step": 4045 - }, - { - "epoch": 0.6898366944614647, - "grad_norm": 1.2555387020111084, - "learning_rate": 3.1014492753623194e-06, - "loss": 0.1371, - "step": 4046 - }, - { - "epoch": 0.6900071929030024, - "grad_norm": 1.6776463985443115, - "learning_rate": 3.099744245524297e-06, - "loss": 0.1209, - "step": 4047 - }, - { - "epoch": 0.6901776913445401, - "grad_norm": 1.0086376667022705, - "learning_rate": 3.098039215686275e-06, - "loss": 0.0673, - "step": 4048 - }, - { - "epoch": 0.6903481897860777, - "grad_norm": 1.0416656732559204, - "learning_rate": 3.096334185848253e-06, - "loss": 0.0607, - "step": 4049 - }, - { - "epoch": 0.6905186882276154, - "grad_norm": 2.047964572906494, - "learning_rate": 3.0946291560102303e-06, - "loss": 0.2094, - "step": 4050 - }, - { - "epoch": 0.6906891866691531, - "grad_norm": 0.8563204407691956, - "learning_rate": 3.0929241261722083e-06, - "loss": 0.0883, - "step": 4051 - }, - { - "epoch": 0.6908596851106907, - "grad_norm": 2.1121885776519775, - "learning_rate": 3.091219096334186e-06, - "loss": 0.1619, - "step": 4052 - }, - { - "epoch": 0.6910301835522284, - "grad_norm": 1.5385748147964478, - "learning_rate": 3.0895140664961638e-06, - "loss": 0.1547, - "step": 4053 - }, - { - "epoch": 0.6912006819937662, - "grad_norm": 1.6678493022918701, - "learning_rate": 3.087809036658142e-06, - "loss": 0.1065, - "step": 4054 - }, - { - "epoch": 0.6913711804353039, - "grad_norm": 1.3417023420333862, - "learning_rate": 3.0861040068201196e-06, - "loss": 0.0756, - "step": 4055 - }, - { - "epoch": 0.6915416788768415, - "grad_norm": 0.9588251709938049, - "learning_rate": 3.0843989769820976e-06, - "loss": 0.0774, - "step": 4056 - }, - { - "epoch": 0.6917121773183792, - "grad_norm": 1.2807377576828003, - "learning_rate": 3.082693947144075e-06, - "loss": 0.1068, - "step": 4057 - }, - { - "epoch": 0.6918826757599169, - "grad_norm": 1.0533798933029175, - "learning_rate": 3.080988917306053e-06, - "loss": 0.1061, - "step": 4058 - }, - { - "epoch": 0.6920531742014545, - "grad_norm": 2.105905532836914, - "learning_rate": 3.079283887468031e-06, - "loss": 0.1713, - "step": 4059 - }, - { - "epoch": 0.6922236726429922, - "grad_norm": 1.4359697103500366, - "learning_rate": 3.0775788576300085e-06, - "loss": 0.1761, - "step": 4060 - }, - { - "epoch": 0.6923941710845299, - "grad_norm": 1.4418187141418457, - "learning_rate": 3.0758738277919865e-06, - "loss": 0.1127, - "step": 4061 - }, - { - "epoch": 0.6925646695260677, - "grad_norm": 1.7965011596679688, - "learning_rate": 3.074168797953965e-06, - "loss": 0.1559, - "step": 4062 - }, - { - "epoch": 0.6927351679676053, - "grad_norm": 0.8797532916069031, - "learning_rate": 3.0724637681159424e-06, - "loss": 0.0962, - "step": 4063 - }, - { - "epoch": 0.692905666409143, - "grad_norm": 1.4908859729766846, - "learning_rate": 3.0707587382779203e-06, - "loss": 0.1333, - "step": 4064 - }, - { - "epoch": 0.6930761648506807, - "grad_norm": 1.1060150861740112, - "learning_rate": 3.069053708439898e-06, - "loss": 0.1229, - "step": 4065 - }, - { - "epoch": 0.6932466632922183, - "grad_norm": 0.9352675080299377, - "learning_rate": 3.067348678601876e-06, - "loss": 0.0877, - "step": 4066 - }, - { - "epoch": 0.693417161733756, - "grad_norm": 1.0809539556503296, - "learning_rate": 3.0656436487638537e-06, - "loss": 0.1242, - "step": 4067 - }, - { - "epoch": 0.6935876601752937, - "grad_norm": 1.624000072479248, - "learning_rate": 3.0639386189258313e-06, - "loss": 0.1616, - "step": 4068 - }, - { - "epoch": 0.6937581586168314, - "grad_norm": 1.7362465858459473, - "learning_rate": 3.0622335890878092e-06, - "loss": 0.1814, - "step": 4069 - }, - { - "epoch": 0.693928657058369, - "grad_norm": 1.0399138927459717, - "learning_rate": 3.0605285592497867e-06, - "loss": 0.1063, - "step": 4070 - }, - { - "epoch": 0.6940991554999068, - "grad_norm": 1.5530822277069092, - "learning_rate": 3.058823529411765e-06, - "loss": 0.0814, - "step": 4071 - }, - { - "epoch": 0.6942696539414445, - "grad_norm": 1.186566710472107, - "learning_rate": 3.057118499573743e-06, - "loss": 0.098, - "step": 4072 - }, - { - "epoch": 0.6944401523829822, - "grad_norm": 1.128265142440796, - "learning_rate": 3.0554134697357206e-06, - "loss": 0.0888, - "step": 4073 - }, - { - "epoch": 0.6946106508245198, - "grad_norm": 1.3690623044967651, - "learning_rate": 3.0537084398976985e-06, - "loss": 0.1201, - "step": 4074 - }, - { - "epoch": 0.6947811492660575, - "grad_norm": 1.3120359182357788, - "learning_rate": 3.052003410059676e-06, - "loss": 0.0738, - "step": 4075 - }, - { - "epoch": 0.6949516477075952, - "grad_norm": 1.1368895769119263, - "learning_rate": 3.050298380221654e-06, - "loss": 0.1423, - "step": 4076 - }, - { - "epoch": 0.6951221461491328, - "grad_norm": 1.0708452463150024, - "learning_rate": 3.048593350383632e-06, - "loss": 0.0803, - "step": 4077 - }, - { - "epoch": 0.6952926445906705, - "grad_norm": 1.069138526916504, - "learning_rate": 3.0468883205456095e-06, - "loss": 0.1072, - "step": 4078 - }, - { - "epoch": 0.6954631430322082, - "grad_norm": 1.5960021018981934, - "learning_rate": 3.045183290707588e-06, - "loss": 0.2298, - "step": 4079 - }, - { - "epoch": 0.695633641473746, - "grad_norm": 1.1349047422409058, - "learning_rate": 3.043478260869566e-06, - "loss": 0.0945, - "step": 4080 - }, - { - "epoch": 0.6958041399152836, - "grad_norm": 0.9507064819335938, - "learning_rate": 3.0417732310315433e-06, - "loss": 0.0711, - "step": 4081 - }, - { - "epoch": 0.6959746383568213, - "grad_norm": 2.050335168838501, - "learning_rate": 3.0400682011935213e-06, - "loss": 0.183, - "step": 4082 - }, - { - "epoch": 0.696145136798359, - "grad_norm": 1.2211310863494873, - "learning_rate": 3.038363171355499e-06, - "loss": 0.1362, - "step": 4083 - }, - { - "epoch": 0.6963156352398966, - "grad_norm": 1.4060834646224976, - "learning_rate": 3.0366581415174767e-06, - "loss": 0.1536, - "step": 4084 - }, - { - "epoch": 0.6964861336814343, - "grad_norm": 0.946904182434082, - "learning_rate": 3.0349531116794547e-06, - "loss": 0.084, - "step": 4085 - }, - { - "epoch": 0.696656632122972, - "grad_norm": 1.6174473762512207, - "learning_rate": 3.0332480818414322e-06, - "loss": 0.1601, - "step": 4086 - }, - { - "epoch": 0.6968271305645097, - "grad_norm": 0.7835435271263123, - "learning_rate": 3.03154305200341e-06, - "loss": 0.0676, - "step": 4087 - }, - { - "epoch": 0.6969976290060473, - "grad_norm": 0.8758642673492432, - "learning_rate": 3.0298380221653885e-06, - "loss": 0.0672, - "step": 4088 - }, - { - "epoch": 0.697168127447585, - "grad_norm": 1.4428333044052124, - "learning_rate": 3.028132992327366e-06, - "loss": 0.1454, - "step": 4089 - }, - { - "epoch": 0.6973386258891228, - "grad_norm": 1.5521656274795532, - "learning_rate": 3.026427962489344e-06, - "loss": 0.1404, - "step": 4090 - }, - { - "epoch": 0.6975091243306604, - "grad_norm": 1.1999118328094482, - "learning_rate": 3.0247229326513215e-06, - "loss": 0.0745, - "step": 4091 - }, - { - "epoch": 0.6976796227721981, - "grad_norm": 1.1342631578445435, - "learning_rate": 3.0230179028132995e-06, - "loss": 0.1336, - "step": 4092 - }, - { - "epoch": 0.6978501212137358, - "grad_norm": 2.592848300933838, - "learning_rate": 3.021312872975277e-06, - "loss": 0.217, - "step": 4093 - }, - { - "epoch": 0.6980206196552735, - "grad_norm": 1.6239129304885864, - "learning_rate": 3.019607843137255e-06, - "loss": 0.1574, - "step": 4094 - }, - { - "epoch": 0.6981911180968111, - "grad_norm": 0.7634371519088745, - "learning_rate": 3.017902813299233e-06, - "loss": 0.0563, - "step": 4095 - }, - { - "epoch": 0.6983616165383488, - "grad_norm": 1.2029179334640503, - "learning_rate": 3.0161977834612113e-06, - "loss": 0.0893, - "step": 4096 - }, - { - "epoch": 0.6985321149798865, - "grad_norm": 1.4013758897781372, - "learning_rate": 3.014492753623189e-06, - "loss": 0.1706, - "step": 4097 - }, - { - "epoch": 0.6987026134214241, - "grad_norm": 1.3356106281280518, - "learning_rate": 3.0127877237851667e-06, - "loss": 0.1122, - "step": 4098 - }, - { - "epoch": 0.6988731118629619, - "grad_norm": 0.9753081202507019, - "learning_rate": 3.0110826939471443e-06, - "loss": 0.0938, - "step": 4099 - }, - { - "epoch": 0.6990436103044996, - "grad_norm": 1.8733267784118652, - "learning_rate": 3.009377664109122e-06, - "loss": 0.1907, - "step": 4100 - }, - { - "epoch": 0.6990436103044996, - "eval_f1_score": 0.40389294403892945, - "eval_loss": 0.13582302629947662, - "eval_runtime": 183.3886, - "eval_samples_per_second": 54.529, - "eval_steps_per_second": 3.408, - "step": 4100 - }, - { - "epoch": 0.6992141087460373, - "grad_norm": 1.568135142326355, - "learning_rate": 3.0076726342710997e-06, - "loss": 0.164, - "step": 4101 - }, - { - "epoch": 0.6993846071875749, - "grad_norm": 1.4794212579727173, - "learning_rate": 3.0059676044330777e-06, - "loss": 0.1211, - "step": 4102 - }, - { - "epoch": 0.6995551056291126, - "grad_norm": 1.19968843460083, - "learning_rate": 3.0042625745950556e-06, - "loss": 0.0848, - "step": 4103 - }, - { - "epoch": 0.6997256040706503, - "grad_norm": 0.8885430693626404, - "learning_rate": 3.0025575447570336e-06, - "loss": 0.0861, - "step": 4104 - }, - { - "epoch": 0.6998961025121879, - "grad_norm": 1.1285784244537354, - "learning_rate": 3.0008525149190115e-06, - "loss": 0.0899, - "step": 4105 - }, - { - "epoch": 0.7000666009537256, - "grad_norm": 1.2111326456069946, - "learning_rate": 2.9991474850809895e-06, - "loss": 0.1186, - "step": 4106 - }, - { - "epoch": 0.7002370993952634, - "grad_norm": 1.571638584136963, - "learning_rate": 2.997442455242967e-06, - "loss": 0.119, - "step": 4107 - }, - { - "epoch": 0.7004075978368011, - "grad_norm": 1.3643468618392944, - "learning_rate": 2.995737425404945e-06, - "loss": 0.1753, - "step": 4108 - }, - { - "epoch": 0.7005780962783387, - "grad_norm": 1.8419893980026245, - "learning_rate": 2.9940323955669225e-06, - "loss": 0.1758, - "step": 4109 - }, - { - "epoch": 0.7007485947198764, - "grad_norm": 1.5465821027755737, - "learning_rate": 2.9923273657289004e-06, - "loss": 0.1774, - "step": 4110 - }, - { - "epoch": 0.7009190931614141, - "grad_norm": 0.8390033841133118, - "learning_rate": 2.990622335890878e-06, - "loss": 0.0841, - "step": 4111 - }, - { - "epoch": 0.7010895916029517, - "grad_norm": 1.2229130268096924, - "learning_rate": 2.988917306052856e-06, - "loss": 0.1386, - "step": 4112 - }, - { - "epoch": 0.7012600900444894, - "grad_norm": 1.1811176538467407, - "learning_rate": 2.9872122762148343e-06, - "loss": 0.1048, - "step": 4113 - }, - { - "epoch": 0.7014305884860271, - "grad_norm": 1.3642230033874512, - "learning_rate": 2.9855072463768118e-06, - "loss": 0.0605, - "step": 4114 - }, - { - "epoch": 0.7016010869275648, - "grad_norm": 2.046272039413452, - "learning_rate": 2.9838022165387897e-06, - "loss": 0.1666, - "step": 4115 - }, - { - "epoch": 0.7017715853691024, - "grad_norm": 0.9750205874443054, - "learning_rate": 2.9820971867007677e-06, - "loss": 0.1009, - "step": 4116 - }, - { - "epoch": 0.7019420838106402, - "grad_norm": 1.9477434158325195, - "learning_rate": 2.980392156862745e-06, - "loss": 0.1598, - "step": 4117 - }, - { - "epoch": 0.7021125822521779, - "grad_norm": 1.599894642829895, - "learning_rate": 2.978687127024723e-06, - "loss": 0.1349, - "step": 4118 - }, - { - "epoch": 0.7022830806937156, - "grad_norm": 0.8582209944725037, - "learning_rate": 2.9769820971867007e-06, - "loss": 0.0778, - "step": 4119 - }, - { - "epoch": 0.7024535791352532, - "grad_norm": 1.3194434642791748, - "learning_rate": 2.9752770673486786e-06, - "loss": 0.1299, - "step": 4120 - }, - { - "epoch": 0.7026240775767909, - "grad_norm": 1.538150668144226, - "learning_rate": 2.973572037510657e-06, - "loss": 0.1519, - "step": 4121 - }, - { - "epoch": 0.7027945760183286, - "grad_norm": 1.2453093528747559, - "learning_rate": 2.9718670076726345e-06, - "loss": 0.1228, - "step": 4122 - }, - { - "epoch": 0.7029650744598662, - "grad_norm": 0.9578561186790466, - "learning_rate": 2.9701619778346125e-06, - "loss": 0.0931, - "step": 4123 - }, - { - "epoch": 0.7031355729014039, - "grad_norm": 2.0141830444335938, - "learning_rate": 2.9684569479965904e-06, - "loss": 0.096, - "step": 4124 - }, - { - "epoch": 0.7033060713429417, - "grad_norm": 1.8215370178222656, - "learning_rate": 2.966751918158568e-06, - "loss": 0.1514, - "step": 4125 - }, - { - "epoch": 0.7034765697844794, - "grad_norm": 1.2377793788909912, - "learning_rate": 2.965046888320546e-06, - "loss": 0.1562, - "step": 4126 - }, - { - "epoch": 0.703647068226017, - "grad_norm": 1.0801706314086914, - "learning_rate": 2.9633418584825234e-06, - "loss": 0.1053, - "step": 4127 - }, - { - "epoch": 0.7038175666675547, - "grad_norm": 1.0231587886810303, - "learning_rate": 2.9616368286445014e-06, - "loss": 0.0982, - "step": 4128 - }, - { - "epoch": 0.7039880651090924, - "grad_norm": 1.714358925819397, - "learning_rate": 2.9599317988064797e-06, - "loss": 0.1542, - "step": 4129 - }, - { - "epoch": 0.70415856355063, - "grad_norm": 1.3221189975738525, - "learning_rate": 2.9582267689684573e-06, - "loss": 0.1671, - "step": 4130 - }, - { - "epoch": 0.7043290619921677, - "grad_norm": 1.0923004150390625, - "learning_rate": 2.956521739130435e-06, - "loss": 0.0877, - "step": 4131 - }, - { - "epoch": 0.7044995604337054, - "grad_norm": 1.122705340385437, - "learning_rate": 2.9548167092924127e-06, - "loss": 0.0929, - "step": 4132 - }, - { - "epoch": 0.7046700588752431, - "grad_norm": 1.1311506032943726, - "learning_rate": 2.9531116794543907e-06, - "loss": 0.0871, - "step": 4133 - }, - { - "epoch": 0.7048405573167807, - "grad_norm": 1.2216159105300903, - "learning_rate": 2.9514066496163686e-06, - "loss": 0.1291, - "step": 4134 - }, - { - "epoch": 0.7050110557583185, - "grad_norm": 1.0954818725585938, - "learning_rate": 2.949701619778346e-06, - "loss": 0.0986, - "step": 4135 - }, - { - "epoch": 0.7051815541998562, - "grad_norm": 1.4039642810821533, - "learning_rate": 2.947996589940324e-06, - "loss": 0.142, - "step": 4136 - }, - { - "epoch": 0.7053520526413938, - "grad_norm": 1.124967098236084, - "learning_rate": 2.9462915601023016e-06, - "loss": 0.1346, - "step": 4137 - }, - { - "epoch": 0.7055225510829315, - "grad_norm": 1.3469061851501465, - "learning_rate": 2.94458653026428e-06, - "loss": 0.0981, - "step": 4138 - }, - { - "epoch": 0.7056930495244692, - "grad_norm": 1.1395944356918335, - "learning_rate": 2.942881500426258e-06, - "loss": 0.07, - "step": 4139 - }, - { - "epoch": 0.7058635479660069, - "grad_norm": 2.9476287364959717, - "learning_rate": 2.9411764705882355e-06, - "loss": 0.1005, - "step": 4140 - }, - { - "epoch": 0.7060340464075445, - "grad_norm": 0.6996238231658936, - "learning_rate": 2.9394714407502134e-06, - "loss": 0.0563, - "step": 4141 - }, - { - "epoch": 0.7062045448490822, - "grad_norm": 1.2846816778182983, - "learning_rate": 2.9377664109121914e-06, - "loss": 0.0969, - "step": 4142 - }, - { - "epoch": 0.70637504329062, - "grad_norm": 1.0030068159103394, - "learning_rate": 2.936061381074169e-06, - "loss": 0.0635, - "step": 4143 - }, - { - "epoch": 0.7065455417321576, - "grad_norm": 1.61794114112854, - "learning_rate": 2.934356351236147e-06, - "loss": 0.1276, - "step": 4144 - }, - { - "epoch": 0.7067160401736953, - "grad_norm": 1.2051515579223633, - "learning_rate": 2.9326513213981244e-06, - "loss": 0.1229, - "step": 4145 - }, - { - "epoch": 0.706886538615233, - "grad_norm": 1.1893885135650635, - "learning_rate": 2.9309462915601027e-06, - "loss": 0.1083, - "step": 4146 - }, - { - "epoch": 0.7070570370567707, - "grad_norm": 1.0019936561584473, - "learning_rate": 2.9292412617220807e-06, - "loss": 0.0553, - "step": 4147 - }, - { - "epoch": 0.7072275354983083, - "grad_norm": 1.0891516208648682, - "learning_rate": 2.927536231884058e-06, - "loss": 0.1181, - "step": 4148 - }, - { - "epoch": 0.707398033939846, - "grad_norm": 1.939622163772583, - "learning_rate": 2.925831202046036e-06, - "loss": 0.1261, - "step": 4149 - }, - { - "epoch": 0.7075685323813837, - "grad_norm": 1.939447283744812, - "learning_rate": 2.9241261722080137e-06, - "loss": 0.2224, - "step": 4150 - }, - { - "epoch": 0.7077390308229213, - "grad_norm": 1.927830457687378, - "learning_rate": 2.9224211423699916e-06, - "loss": 0.1948, - "step": 4151 - }, - { - "epoch": 0.707909529264459, - "grad_norm": 1.7494922876358032, - "learning_rate": 2.9207161125319696e-06, - "loss": 0.1486, - "step": 4152 - }, - { - "epoch": 0.7080800277059968, - "grad_norm": 1.2902439832687378, - "learning_rate": 2.919011082693947e-06, - "loss": 0.0676, - "step": 4153 - }, - { - "epoch": 0.7082505261475345, - "grad_norm": 1.1832592487335205, - "learning_rate": 2.917306052855925e-06, - "loss": 0.1257, - "step": 4154 - }, - { - "epoch": 0.7084210245890721, - "grad_norm": 1.3747416734695435, - "learning_rate": 2.9156010230179034e-06, - "loss": 0.0945, - "step": 4155 - }, - { - "epoch": 0.7085915230306098, - "grad_norm": 1.8421868085861206, - "learning_rate": 2.913895993179881e-06, - "loss": 0.1391, - "step": 4156 - }, - { - "epoch": 0.7087620214721475, - "grad_norm": 1.0732749700546265, - "learning_rate": 2.912190963341859e-06, - "loss": 0.0927, - "step": 4157 - }, - { - "epoch": 0.7089325199136851, - "grad_norm": 0.9617205858230591, - "learning_rate": 2.9104859335038364e-06, - "loss": 0.0774, - "step": 4158 - }, - { - "epoch": 0.7091030183552228, - "grad_norm": 1.061026692390442, - "learning_rate": 2.9087809036658143e-06, - "loss": 0.0845, - "step": 4159 - }, - { - "epoch": 0.7092735167967605, - "grad_norm": 1.1157773733139038, - "learning_rate": 2.9070758738277923e-06, - "loss": 0.1153, - "step": 4160 - }, - { - "epoch": 0.7094440152382983, - "grad_norm": 1.2340047359466553, - "learning_rate": 2.90537084398977e-06, - "loss": 0.0949, - "step": 4161 - }, - { - "epoch": 0.7096145136798359, - "grad_norm": 0.9534417986869812, - "learning_rate": 2.9036658141517478e-06, - "loss": 0.1063, - "step": 4162 - }, - { - "epoch": 0.7097850121213736, - "grad_norm": 1.6519148349761963, - "learning_rate": 2.901960784313726e-06, - "loss": 0.1103, - "step": 4163 - }, - { - "epoch": 0.7099555105629113, - "grad_norm": 1.2995315790176392, - "learning_rate": 2.9002557544757037e-06, - "loss": 0.1218, - "step": 4164 - }, - { - "epoch": 0.710126009004449, - "grad_norm": 1.3705964088439941, - "learning_rate": 2.8985507246376816e-06, - "loss": 0.1569, - "step": 4165 - }, - { - "epoch": 0.7102965074459866, - "grad_norm": 1.0150760412216187, - "learning_rate": 2.896845694799659e-06, - "loss": 0.0828, - "step": 4166 - }, - { - "epoch": 0.7104670058875243, - "grad_norm": 1.2685070037841797, - "learning_rate": 2.895140664961637e-06, - "loss": 0.1415, - "step": 4167 - }, - { - "epoch": 0.710637504329062, - "grad_norm": 2.023293972015381, - "learning_rate": 2.8934356351236146e-06, - "loss": 0.1005, - "step": 4168 - }, - { - "epoch": 0.7108080027705996, - "grad_norm": 1.7603518962860107, - "learning_rate": 2.8917306052855926e-06, - "loss": 0.1835, - "step": 4169 - }, - { - "epoch": 0.7109785012121373, - "grad_norm": 1.636339545249939, - "learning_rate": 2.8900255754475705e-06, - "loss": 0.1074, - "step": 4170 - }, - { - "epoch": 0.7111489996536751, - "grad_norm": 0.8647012114524841, - "learning_rate": 2.888320545609549e-06, - "loss": 0.0743, - "step": 4171 - }, - { - "epoch": 0.7113194980952128, - "grad_norm": 1.7029750347137451, - "learning_rate": 2.8866155157715264e-06, - "loss": 0.1231, - "step": 4172 - }, - { - "epoch": 0.7114899965367504, - "grad_norm": 0.8566877245903015, - "learning_rate": 2.8849104859335043e-06, - "loss": 0.0483, - "step": 4173 - }, - { - "epoch": 0.7116604949782881, - "grad_norm": 1.5091886520385742, - "learning_rate": 2.883205456095482e-06, - "loss": 0.1455, - "step": 4174 - }, - { - "epoch": 0.7118309934198258, - "grad_norm": 1.0542126893997192, - "learning_rate": 2.88150042625746e-06, - "loss": 0.0929, - "step": 4175 - }, - { - "epoch": 0.7120014918613634, - "grad_norm": 1.0845410823822021, - "learning_rate": 2.8797953964194373e-06, - "loss": 0.0825, - "step": 4176 - }, - { - "epoch": 0.7121719903029011, - "grad_norm": 1.2993086576461792, - "learning_rate": 2.8780903665814153e-06, - "loss": 0.1161, - "step": 4177 - }, - { - "epoch": 0.7123424887444388, - "grad_norm": 1.0071661472320557, - "learning_rate": 2.8763853367433932e-06, - "loss": 0.1235, - "step": 4178 - }, - { - "epoch": 0.7125129871859766, - "grad_norm": 1.3799209594726562, - "learning_rate": 2.8746803069053708e-06, - "loss": 0.1558, - "step": 4179 - }, - { - "epoch": 0.7126834856275142, - "grad_norm": 1.4128234386444092, - "learning_rate": 2.872975277067349e-06, - "loss": 0.0733, - "step": 4180 - }, - { - "epoch": 0.7128539840690519, - "grad_norm": 2.611358880996704, - "learning_rate": 2.871270247229327e-06, - "loss": 0.162, - "step": 4181 - }, - { - "epoch": 0.7130244825105896, - "grad_norm": 1.768095850944519, - "learning_rate": 2.8695652173913046e-06, - "loss": 0.1795, - "step": 4182 - }, - { - "epoch": 0.7131949809521272, - "grad_norm": 1.983628511428833, - "learning_rate": 2.8678601875532826e-06, - "loss": 0.2206, - "step": 4183 - }, - { - "epoch": 0.7133654793936649, - "grad_norm": 0.8268606066703796, - "learning_rate": 2.86615515771526e-06, - "loss": 0.0654, - "step": 4184 - }, - { - "epoch": 0.7135359778352026, - "grad_norm": 2.2783286571502686, - "learning_rate": 2.864450127877238e-06, - "loss": 0.1699, - "step": 4185 - }, - { - "epoch": 0.7137064762767403, - "grad_norm": 1.525234341621399, - "learning_rate": 2.8627450980392155e-06, - "loss": 0.115, - "step": 4186 - }, - { - "epoch": 0.7138769747182779, - "grad_norm": 1.6166064739227295, - "learning_rate": 2.8610400682011935e-06, - "loss": 0.1621, - "step": 4187 - }, - { - "epoch": 0.7140474731598156, - "grad_norm": 1.0671355724334717, - "learning_rate": 2.859335038363172e-06, - "loss": 0.0987, - "step": 4188 - }, - { - "epoch": 0.7142179716013534, - "grad_norm": 1.1636643409729004, - "learning_rate": 2.8576300085251494e-06, - "loss": 0.0428, - "step": 4189 - }, - { - "epoch": 0.714388470042891, - "grad_norm": 1.1787340641021729, - "learning_rate": 2.8559249786871273e-06, - "loss": 0.1225, - "step": 4190 - }, - { - "epoch": 0.7145589684844287, - "grad_norm": 1.5593682527542114, - "learning_rate": 2.8542199488491053e-06, - "loss": 0.0722, - "step": 4191 - }, - { - "epoch": 0.7147294669259664, - "grad_norm": 1.4937877655029297, - "learning_rate": 2.852514919011083e-06, - "loss": 0.1429, - "step": 4192 - }, - { - "epoch": 0.7148999653675041, - "grad_norm": 1.341630220413208, - "learning_rate": 2.8508098891730608e-06, - "loss": 0.1719, - "step": 4193 - }, - { - "epoch": 0.7150704638090417, - "grad_norm": 1.9753972291946411, - "learning_rate": 2.8491048593350383e-06, - "loss": 0.1641, - "step": 4194 - }, - { - "epoch": 0.7152409622505794, - "grad_norm": 1.4761886596679688, - "learning_rate": 2.8473998294970162e-06, - "loss": 0.1814, - "step": 4195 - }, - { - "epoch": 0.7154114606921171, - "grad_norm": 1.4910000562667847, - "learning_rate": 2.8456947996589946e-06, - "loss": 0.1179, - "step": 4196 - }, - { - "epoch": 0.7155819591336547, - "grad_norm": 1.1857808828353882, - "learning_rate": 2.843989769820972e-06, - "loss": 0.1288, - "step": 4197 - }, - { - "epoch": 0.7157524575751925, - "grad_norm": 1.2303504943847656, - "learning_rate": 2.84228473998295e-06, - "loss": 0.0762, - "step": 4198 - }, - { - "epoch": 0.7159229560167302, - "grad_norm": 1.384795904159546, - "learning_rate": 2.840579710144928e-06, - "loss": 0.1227, - "step": 4199 - }, - { - "epoch": 0.7160934544582679, - "grad_norm": 1.126552939414978, - "learning_rate": 2.8388746803069055e-06, - "loss": 0.106, - "step": 4200 - }, - { - "epoch": 0.7160934544582679, - "eval_f1_score": 0.41904761904761906, - "eval_loss": 0.1373097002506256, - "eval_runtime": 183.3693, - "eval_samples_per_second": 54.535, - "eval_steps_per_second": 3.408, - "step": 4200 - }, - { - "epoch": 0.7162639528998055, - "grad_norm": 1.542959213256836, - "learning_rate": 2.8371696504688835e-06, - "loss": 0.1462, - "step": 4201 - }, - { - "epoch": 0.7164344513413432, - "grad_norm": 1.6853175163269043, - "learning_rate": 2.835464620630861e-06, - "loss": 0.155, - "step": 4202 - }, - { - "epoch": 0.7166049497828809, - "grad_norm": 1.2853667736053467, - "learning_rate": 2.833759590792839e-06, - "loss": 0.1062, - "step": 4203 - }, - { - "epoch": 0.7167754482244185, - "grad_norm": 1.1857186555862427, - "learning_rate": 2.8320545609548165e-06, - "loss": 0.1316, - "step": 4204 - }, - { - "epoch": 0.7169459466659562, - "grad_norm": 1.0130743980407715, - "learning_rate": 2.830349531116795e-06, - "loss": 0.0867, - "step": 4205 - }, - { - "epoch": 0.717116445107494, - "grad_norm": 2.006258249282837, - "learning_rate": 2.828644501278773e-06, - "loss": 0.103, - "step": 4206 - }, - { - "epoch": 0.7172869435490317, - "grad_norm": 0.9961538910865784, - "learning_rate": 2.8269394714407503e-06, - "loss": 0.1068, - "step": 4207 - }, - { - "epoch": 0.7174574419905693, - "grad_norm": 1.3043984174728394, - "learning_rate": 2.8252344416027283e-06, - "loss": 0.1233, - "step": 4208 - }, - { - "epoch": 0.717627940432107, - "grad_norm": 1.4612125158309937, - "learning_rate": 2.8235294117647062e-06, - "loss": 0.1093, - "step": 4209 - }, - { - "epoch": 0.7177984388736447, - "grad_norm": 1.5382938385009766, - "learning_rate": 2.8218243819266838e-06, - "loss": 0.1405, - "step": 4210 - }, - { - "epoch": 0.7179689373151823, - "grad_norm": 0.8454248309135437, - "learning_rate": 2.8201193520886617e-06, - "loss": 0.0629, - "step": 4211 - }, - { - "epoch": 0.71813943575672, - "grad_norm": 1.388529658317566, - "learning_rate": 2.8184143222506392e-06, - "loss": 0.1287, - "step": 4212 - }, - { - "epoch": 0.7183099341982577, - "grad_norm": 1.1707777976989746, - "learning_rate": 2.8167092924126176e-06, - "loss": 0.0902, - "step": 4213 - }, - { - "epoch": 0.7184804326397954, - "grad_norm": 1.8198975324630737, - "learning_rate": 2.8150042625745955e-06, - "loss": 0.1535, - "step": 4214 - }, - { - "epoch": 0.718650931081333, - "grad_norm": 0.971316397190094, - "learning_rate": 2.813299232736573e-06, - "loss": 0.0817, - "step": 4215 - }, - { - "epoch": 0.7188214295228708, - "grad_norm": 2.003599166870117, - "learning_rate": 2.811594202898551e-06, - "loss": 0.1548, - "step": 4216 - }, - { - "epoch": 0.7189919279644085, - "grad_norm": 1.0757346153259277, - "learning_rate": 2.809889173060529e-06, - "loss": 0.0863, - "step": 4217 - }, - { - "epoch": 0.7191624264059462, - "grad_norm": 1.4649049043655396, - "learning_rate": 2.8081841432225065e-06, - "loss": 0.0921, - "step": 4218 - }, - { - "epoch": 0.7193329248474838, - "grad_norm": 1.4503200054168701, - "learning_rate": 2.8064791133844844e-06, - "loss": 0.1385, - "step": 4219 - }, - { - "epoch": 0.7195034232890215, - "grad_norm": 1.5700362920761108, - "learning_rate": 2.804774083546462e-06, - "loss": 0.1012, - "step": 4220 - }, - { - "epoch": 0.7196739217305592, - "grad_norm": 1.4330483675003052, - "learning_rate": 2.8030690537084403e-06, - "loss": 0.1198, - "step": 4221 - }, - { - "epoch": 0.7198444201720968, - "grad_norm": 1.444413661956787, - "learning_rate": 2.8013640238704183e-06, - "loss": 0.1491, - "step": 4222 - }, - { - "epoch": 0.7200149186136345, - "grad_norm": 1.5214699506759644, - "learning_rate": 2.799658994032396e-06, - "loss": 0.1354, - "step": 4223 - }, - { - "epoch": 0.7201854170551723, - "grad_norm": 1.3857860565185547, - "learning_rate": 2.7979539641943737e-06, - "loss": 0.1061, - "step": 4224 - }, - { - "epoch": 0.72035591549671, - "grad_norm": 1.6461029052734375, - "learning_rate": 2.7962489343563513e-06, - "loss": 0.1131, - "step": 4225 - }, - { - "epoch": 0.7205264139382476, - "grad_norm": 1.16617751121521, - "learning_rate": 2.7945439045183292e-06, - "loss": 0.104, - "step": 4226 - }, - { - "epoch": 0.7206969123797853, - "grad_norm": 1.5141948461532593, - "learning_rate": 2.792838874680307e-06, - "loss": 0.1551, - "step": 4227 - }, - { - "epoch": 0.720867410821323, - "grad_norm": 1.6117165088653564, - "learning_rate": 2.7911338448422847e-06, - "loss": 0.1252, - "step": 4228 - }, - { - "epoch": 0.7210379092628606, - "grad_norm": 1.0355428457260132, - "learning_rate": 2.7894288150042626e-06, - "loss": 0.113, - "step": 4229 - }, - { - "epoch": 0.7212084077043983, - "grad_norm": 1.0461231470108032, - "learning_rate": 2.787723785166241e-06, - "loss": 0.076, - "step": 4230 - }, - { - "epoch": 0.721378906145936, - "grad_norm": 0.8947053551673889, - "learning_rate": 2.7860187553282185e-06, - "loss": 0.0648, - "step": 4231 - }, - { - "epoch": 0.7215494045874737, - "grad_norm": 0.8495338559150696, - "learning_rate": 2.7843137254901965e-06, - "loss": 0.0694, - "step": 4232 - }, - { - "epoch": 0.7217199030290113, - "grad_norm": 1.1855164766311646, - "learning_rate": 2.782608695652174e-06, - "loss": 0.119, - "step": 4233 - }, - { - "epoch": 0.7218904014705491, - "grad_norm": 1.35049569606781, - "learning_rate": 2.780903665814152e-06, - "loss": 0.0734, - "step": 4234 - }, - { - "epoch": 0.7220608999120868, - "grad_norm": 0.6907497048377991, - "learning_rate": 2.77919863597613e-06, - "loss": 0.0636, - "step": 4235 - }, - { - "epoch": 0.7222313983536244, - "grad_norm": 1.5610991716384888, - "learning_rate": 2.7774936061381074e-06, - "loss": 0.128, - "step": 4236 - }, - { - "epoch": 0.7224018967951621, - "grad_norm": 1.1115251779556274, - "learning_rate": 2.7757885763000854e-06, - "loss": 0.1234, - "step": 4237 - }, - { - "epoch": 0.7225723952366998, - "grad_norm": 1.9755184650421143, - "learning_rate": 2.7740835464620637e-06, - "loss": 0.1831, - "step": 4238 - }, - { - "epoch": 0.7227428936782375, - "grad_norm": 1.4163349866867065, - "learning_rate": 2.7723785166240413e-06, - "loss": 0.1199, - "step": 4239 - }, - { - "epoch": 0.7229133921197751, - "grad_norm": 1.1437273025512695, - "learning_rate": 2.7706734867860192e-06, - "loss": 0.1224, - "step": 4240 - }, - { - "epoch": 0.7230838905613128, - "grad_norm": 0.7897486090660095, - "learning_rate": 2.7689684569479967e-06, - "loss": 0.0404, - "step": 4241 - }, - { - "epoch": 0.7232543890028506, - "grad_norm": 0.8485286235809326, - "learning_rate": 2.7672634271099747e-06, - "loss": 0.106, - "step": 4242 - }, - { - "epoch": 0.7234248874443882, - "grad_norm": 1.5928601026535034, - "learning_rate": 2.7655583972719522e-06, - "loss": 0.1414, - "step": 4243 - }, - { - "epoch": 0.7235953858859259, - "grad_norm": 1.260733962059021, - "learning_rate": 2.76385336743393e-06, - "loss": 0.127, - "step": 4244 - }, - { - "epoch": 0.7237658843274636, - "grad_norm": 1.6917012929916382, - "learning_rate": 2.762148337595908e-06, - "loss": 0.1737, - "step": 4245 - }, - { - "epoch": 0.7239363827690013, - "grad_norm": 1.3605605363845825, - "learning_rate": 2.7604433077578856e-06, - "loss": 0.1303, - "step": 4246 - }, - { - "epoch": 0.7241068812105389, - "grad_norm": 0.9476409554481506, - "learning_rate": 2.758738277919864e-06, - "loss": 0.0679, - "step": 4247 - }, - { - "epoch": 0.7242773796520766, - "grad_norm": 1.033415675163269, - "learning_rate": 2.757033248081842e-06, - "loss": 0.083, - "step": 4248 - }, - { - "epoch": 0.7244478780936143, - "grad_norm": 1.710024118423462, - "learning_rate": 2.7553282182438195e-06, - "loss": 0.1698, - "step": 4249 - }, - { - "epoch": 0.7246183765351519, - "grad_norm": 1.2438137531280518, - "learning_rate": 2.7536231884057974e-06, - "loss": 0.0689, - "step": 4250 - }, - { - "epoch": 0.7247888749766896, - "grad_norm": 0.8587600588798523, - "learning_rate": 2.751918158567775e-06, - "loss": 0.0427, - "step": 4251 - }, - { - "epoch": 0.7249593734182274, - "grad_norm": 0.9046088457107544, - "learning_rate": 2.750213128729753e-06, - "loss": 0.0771, - "step": 4252 - }, - { - "epoch": 0.7251298718597651, - "grad_norm": 1.3793909549713135, - "learning_rate": 2.748508098891731e-06, - "loss": 0.1231, - "step": 4253 - }, - { - "epoch": 0.7253003703013027, - "grad_norm": 0.9312161207199097, - "learning_rate": 2.7468030690537084e-06, - "loss": 0.07, - "step": 4254 - }, - { - "epoch": 0.7254708687428404, - "grad_norm": 0.8483788967132568, - "learning_rate": 2.7450980392156867e-06, - "loss": 0.0578, - "step": 4255 - }, - { - "epoch": 0.7256413671843781, - "grad_norm": 0.8836938142776489, - "learning_rate": 2.7433930093776647e-06, - "loss": 0.0601, - "step": 4256 - }, - { - "epoch": 0.7258118656259157, - "grad_norm": 2.299175977706909, - "learning_rate": 2.7416879795396422e-06, - "loss": 0.263, - "step": 4257 - }, - { - "epoch": 0.7259823640674534, - "grad_norm": 1.600693941116333, - "learning_rate": 2.73998294970162e-06, - "loss": 0.1501, - "step": 4258 - }, - { - "epoch": 0.7261528625089911, - "grad_norm": 1.5393942594528198, - "learning_rate": 2.7382779198635977e-06, - "loss": 0.106, - "step": 4259 - }, - { - "epoch": 0.7263233609505289, - "grad_norm": 1.9113017320632935, - "learning_rate": 2.7365728900255756e-06, - "loss": 0.1802, - "step": 4260 - }, - { - "epoch": 0.7264938593920665, - "grad_norm": 1.0482639074325562, - "learning_rate": 2.734867860187553e-06, - "loss": 0.1159, - "step": 4261 - }, - { - "epoch": 0.7266643578336042, - "grad_norm": 1.4692020416259766, - "learning_rate": 2.733162830349531e-06, - "loss": 0.1104, - "step": 4262 - }, - { - "epoch": 0.7268348562751419, - "grad_norm": 1.8054784536361694, - "learning_rate": 2.7314578005115095e-06, - "loss": 0.1175, - "step": 4263 - }, - { - "epoch": 0.7270053547166796, - "grad_norm": 1.0958493947982788, - "learning_rate": 2.729752770673487e-06, - "loss": 0.1065, - "step": 4264 - }, - { - "epoch": 0.7271758531582172, - "grad_norm": 1.799086570739746, - "learning_rate": 2.728047740835465e-06, - "loss": 0.1377, - "step": 4265 - }, - { - "epoch": 0.7273463515997549, - "grad_norm": 1.0919666290283203, - "learning_rate": 2.726342710997443e-06, - "loss": 0.0761, - "step": 4266 - }, - { - "epoch": 0.7275168500412926, - "grad_norm": 0.9615311622619629, - "learning_rate": 2.7246376811594204e-06, - "loss": 0.1008, - "step": 4267 - }, - { - "epoch": 0.7276873484828302, - "grad_norm": 1.4227288961410522, - "learning_rate": 2.7229326513213984e-06, - "loss": 0.1297, - "step": 4268 - }, - { - "epoch": 0.727857846924368, - "grad_norm": 2.2517104148864746, - "learning_rate": 2.721227621483376e-06, - "loss": 0.1958, - "step": 4269 - }, - { - "epoch": 0.7280283453659057, - "grad_norm": 0.826910674571991, - "learning_rate": 2.719522591645354e-06, - "loss": 0.0842, - "step": 4270 - }, - { - "epoch": 0.7281988438074434, - "grad_norm": 1.608984112739563, - "learning_rate": 2.7178175618073318e-06, - "loss": 0.1673, - "step": 4271 - }, - { - "epoch": 0.728369342248981, - "grad_norm": 2.3391051292419434, - "learning_rate": 2.7161125319693097e-06, - "loss": 0.2653, - "step": 4272 - }, - { - "epoch": 0.7285398406905187, - "grad_norm": 1.6860833168029785, - "learning_rate": 2.7144075021312877e-06, - "loss": 0.142, - "step": 4273 - }, - { - "epoch": 0.7287103391320564, - "grad_norm": 1.008806586265564, - "learning_rate": 2.7127024722932656e-06, - "loss": 0.0992, - "step": 4274 - }, - { - "epoch": 0.728880837573594, - "grad_norm": 0.8580942153930664, - "learning_rate": 2.710997442455243e-06, - "loss": 0.0887, - "step": 4275 - }, - { - "epoch": 0.7290513360151317, - "grad_norm": 1.5033763647079468, - "learning_rate": 2.709292412617221e-06, - "loss": 0.1098, - "step": 4276 - }, - { - "epoch": 0.7292218344566694, - "grad_norm": 1.1777950525283813, - "learning_rate": 2.7075873827791986e-06, - "loss": 0.1614, - "step": 4277 - }, - { - "epoch": 0.7293923328982072, - "grad_norm": 1.6841565370559692, - "learning_rate": 2.7058823529411766e-06, - "loss": 0.1036, - "step": 4278 - }, - { - "epoch": 0.7295628313397448, - "grad_norm": 1.382503628730774, - "learning_rate": 2.704177323103154e-06, - "loss": 0.1626, - "step": 4279 - }, - { - "epoch": 0.7297333297812825, - "grad_norm": 0.8704547882080078, - "learning_rate": 2.7024722932651325e-06, - "loss": 0.0475, - "step": 4280 - }, - { - "epoch": 0.7299038282228202, - "grad_norm": 0.9458300471305847, - "learning_rate": 2.7007672634271104e-06, - "loss": 0.0854, - "step": 4281 - }, - { - "epoch": 0.7300743266643578, - "grad_norm": 1.3750553131103516, - "learning_rate": 2.699062233589088e-06, - "loss": 0.1525, - "step": 4282 - }, - { - "epoch": 0.7302448251058955, - "grad_norm": 1.59765625, - "learning_rate": 2.697357203751066e-06, - "loss": 0.1628, - "step": 4283 - }, - { - "epoch": 0.7304153235474332, - "grad_norm": 0.8783493638038635, - "learning_rate": 2.695652173913044e-06, - "loss": 0.0422, - "step": 4284 - }, - { - "epoch": 0.7305858219889709, - "grad_norm": 1.324453592300415, - "learning_rate": 2.6939471440750214e-06, - "loss": 0.1313, - "step": 4285 - }, - { - "epoch": 0.7307563204305085, - "grad_norm": 1.4083690643310547, - "learning_rate": 2.6922421142369993e-06, - "loss": 0.1731, - "step": 4286 - }, - { - "epoch": 0.7309268188720462, - "grad_norm": 1.37704336643219, - "learning_rate": 2.690537084398977e-06, - "loss": 0.1982, - "step": 4287 - }, - { - "epoch": 0.731097317313584, - "grad_norm": 1.0839030742645264, - "learning_rate": 2.688832054560955e-06, - "loss": 0.1559, - "step": 4288 - }, - { - "epoch": 0.7312678157551216, - "grad_norm": 0.9779359698295593, - "learning_rate": 2.687127024722933e-06, - "loss": 0.1027, - "step": 4289 - }, - { - "epoch": 0.7314383141966593, - "grad_norm": 0.768750011920929, - "learning_rate": 2.6854219948849107e-06, - "loss": 0.0665, - "step": 4290 - }, - { - "epoch": 0.731608812638197, - "grad_norm": 1.0980817079544067, - "learning_rate": 2.6837169650468886e-06, - "loss": 0.1393, - "step": 4291 - }, - { - "epoch": 0.7317793110797347, - "grad_norm": 1.476439356803894, - "learning_rate": 2.6820119352088666e-06, - "loss": 0.1317, - "step": 4292 - }, - { - "epoch": 0.7319498095212723, - "grad_norm": 2.1569950580596924, - "learning_rate": 2.680306905370844e-06, - "loss": 0.2718, - "step": 4293 - }, - { - "epoch": 0.73212030796281, - "grad_norm": 1.522475242614746, - "learning_rate": 2.678601875532822e-06, - "loss": 0.1145, - "step": 4294 - }, - { - "epoch": 0.7322908064043477, - "grad_norm": 0.9734368324279785, - "learning_rate": 2.6768968456947996e-06, - "loss": 0.0975, - "step": 4295 - }, - { - "epoch": 0.7324613048458853, - "grad_norm": 1.308054804801941, - "learning_rate": 2.6751918158567775e-06, - "loss": 0.1428, - "step": 4296 - }, - { - "epoch": 0.7326318032874231, - "grad_norm": 1.8643138408660889, - "learning_rate": 2.673486786018756e-06, - "loss": 0.1347, - "step": 4297 - }, - { - "epoch": 0.7328023017289608, - "grad_norm": 1.876632809638977, - "learning_rate": 2.6717817561807334e-06, - "loss": 0.1321, - "step": 4298 - }, - { - "epoch": 0.7329728001704985, - "grad_norm": 1.1990265846252441, - "learning_rate": 2.6700767263427114e-06, - "loss": 0.1154, - "step": 4299 - }, - { - "epoch": 0.7331432986120361, - "grad_norm": 1.6998820304870605, - "learning_rate": 2.668371696504689e-06, - "loss": 0.2055, - "step": 4300 - }, - { - "epoch": 0.7331432986120361, - "eval_f1_score": 0.38242894056847543, - "eval_loss": 0.13687299191951752, - "eval_runtime": 183.3795, - "eval_samples_per_second": 54.532, - "eval_steps_per_second": 3.408, - "step": 4300 - }, - { - "epoch": 0.7333137970535738, - "grad_norm": 1.0364716053009033, - "learning_rate": 2.666666666666667e-06, - "loss": 0.1074, - "step": 4301 - }, - { - "epoch": 0.7334842954951115, - "grad_norm": 1.6394861936569214, - "learning_rate": 2.6649616368286448e-06, - "loss": 0.14, - "step": 4302 - }, - { - "epoch": 0.7336547939366491, - "grad_norm": 1.0912730693817139, - "learning_rate": 2.6632566069906223e-06, - "loss": 0.1327, - "step": 4303 - }, - { - "epoch": 0.7338252923781868, - "grad_norm": 1.2479007244110107, - "learning_rate": 2.6615515771526002e-06, - "loss": 0.1071, - "step": 4304 - }, - { - "epoch": 0.7339957908197245, - "grad_norm": 1.0514239072799683, - "learning_rate": 2.6598465473145786e-06, - "loss": 0.0963, - "step": 4305 - }, - { - "epoch": 0.7341662892612623, - "grad_norm": 1.7217882871627808, - "learning_rate": 2.658141517476556e-06, - "loss": 0.1427, - "step": 4306 - }, - { - "epoch": 0.7343367877027999, - "grad_norm": 1.6627919673919678, - "learning_rate": 2.656436487638534e-06, - "loss": 0.1591, - "step": 4307 - }, - { - "epoch": 0.7345072861443376, - "grad_norm": 1.7781330347061157, - "learning_rate": 2.6547314578005116e-06, - "loss": 0.2108, - "step": 4308 - }, - { - "epoch": 0.7346777845858753, - "grad_norm": 0.9791902303695679, - "learning_rate": 2.6530264279624896e-06, - "loss": 0.0956, - "step": 4309 - }, - { - "epoch": 0.734848283027413, - "grad_norm": 1.3857060670852661, - "learning_rate": 2.6513213981244675e-06, - "loss": 0.1057, - "step": 4310 - }, - { - "epoch": 0.7350187814689506, - "grad_norm": 1.7408908605575562, - "learning_rate": 2.649616368286445e-06, - "loss": 0.1027, - "step": 4311 - }, - { - "epoch": 0.7351892799104883, - "grad_norm": 0.9111679792404175, - "learning_rate": 2.647911338448423e-06, - "loss": 0.0736, - "step": 4312 - }, - { - "epoch": 0.735359778352026, - "grad_norm": 1.2416996955871582, - "learning_rate": 2.6462063086104014e-06, - "loss": 0.1333, - "step": 4313 - }, - { - "epoch": 0.7355302767935636, - "grad_norm": 1.0298309326171875, - "learning_rate": 2.644501278772379e-06, - "loss": 0.11, - "step": 4314 - }, - { - "epoch": 0.7357007752351014, - "grad_norm": 0.9649758338928223, - "learning_rate": 2.642796248934357e-06, - "loss": 0.0822, - "step": 4315 - }, - { - "epoch": 0.7358712736766391, - "grad_norm": 2.0734007358551025, - "learning_rate": 2.6410912190963343e-06, - "loss": 0.2293, - "step": 4316 - }, - { - "epoch": 0.7360417721181768, - "grad_norm": 1.662545919418335, - "learning_rate": 2.6393861892583123e-06, - "loss": 0.1653, - "step": 4317 - }, - { - "epoch": 0.7362122705597144, - "grad_norm": 0.939878523349762, - "learning_rate": 2.63768115942029e-06, - "loss": 0.1111, - "step": 4318 - }, - { - "epoch": 0.7363827690012521, - "grad_norm": 1.9839119911193848, - "learning_rate": 2.6359761295822678e-06, - "loss": 0.2171, - "step": 4319 - }, - { - "epoch": 0.7365532674427898, - "grad_norm": 1.956137776374817, - "learning_rate": 2.6342710997442457e-06, - "loss": 0.1606, - "step": 4320 - }, - { - "epoch": 0.7367237658843274, - "grad_norm": 1.4634205102920532, - "learning_rate": 2.6325660699062232e-06, - "loss": 0.1347, - "step": 4321 - }, - { - "epoch": 0.7368942643258651, - "grad_norm": 1.2985095977783203, - "learning_rate": 2.6308610400682016e-06, - "loss": 0.1213, - "step": 4322 - }, - { - "epoch": 0.7370647627674028, - "grad_norm": 1.6353211402893066, - "learning_rate": 2.6291560102301796e-06, - "loss": 0.1546, - "step": 4323 - }, - { - "epoch": 0.7372352612089406, - "grad_norm": 1.1860822439193726, - "learning_rate": 2.627450980392157e-06, - "loss": 0.1052, - "step": 4324 - }, - { - "epoch": 0.7374057596504782, - "grad_norm": 0.985072910785675, - "learning_rate": 2.625745950554135e-06, - "loss": 0.0964, - "step": 4325 - }, - { - "epoch": 0.7375762580920159, - "grad_norm": 2.0165584087371826, - "learning_rate": 2.6240409207161126e-06, - "loss": 0.1631, - "step": 4326 - }, - { - "epoch": 0.7377467565335536, - "grad_norm": 1.6807523965835571, - "learning_rate": 2.6223358908780905e-06, - "loss": 0.1516, - "step": 4327 - }, - { - "epoch": 0.7379172549750912, - "grad_norm": 1.2592802047729492, - "learning_rate": 2.6206308610400685e-06, - "loss": 0.0987, - "step": 4328 - }, - { - "epoch": 0.7380877534166289, - "grad_norm": 1.1863470077514648, - "learning_rate": 2.618925831202046e-06, - "loss": 0.1345, - "step": 4329 - }, - { - "epoch": 0.7382582518581666, - "grad_norm": 2.7960240840911865, - "learning_rate": 2.6172208013640243e-06, - "loss": 0.1873, - "step": 4330 - }, - { - "epoch": 0.7384287502997043, - "grad_norm": 1.290603756904602, - "learning_rate": 2.6155157715260023e-06, - "loss": 0.1136, - "step": 4331 - }, - { - "epoch": 0.7385992487412419, - "grad_norm": 0.8807956576347351, - "learning_rate": 2.61381074168798e-06, - "loss": 0.0936, - "step": 4332 - }, - { - "epoch": 0.7387697471827797, - "grad_norm": 1.3434580564498901, - "learning_rate": 2.6121057118499578e-06, - "loss": 0.1259, - "step": 4333 - }, - { - "epoch": 0.7389402456243174, - "grad_norm": 1.2540873289108276, - "learning_rate": 2.6104006820119353e-06, - "loss": 0.0962, - "step": 4334 - }, - { - "epoch": 0.739110744065855, - "grad_norm": 2.339768648147583, - "learning_rate": 2.6086956521739132e-06, - "loss": 0.1698, - "step": 4335 - }, - { - "epoch": 0.7392812425073927, - "grad_norm": 1.055652379989624, - "learning_rate": 2.6069906223358908e-06, - "loss": 0.1213, - "step": 4336 - }, - { - "epoch": 0.7394517409489304, - "grad_norm": 1.548759937286377, - "learning_rate": 2.6052855924978687e-06, - "loss": 0.182, - "step": 4337 - }, - { - "epoch": 0.7396222393904681, - "grad_norm": 2.2357752323150635, - "learning_rate": 2.6035805626598467e-06, - "loss": 0.0535, - "step": 4338 - }, - { - "epoch": 0.7397927378320057, - "grad_norm": 1.687495470046997, - "learning_rate": 2.6018755328218246e-06, - "loss": 0.1639, - "step": 4339 - }, - { - "epoch": 0.7399632362735434, - "grad_norm": 1.113901972770691, - "learning_rate": 2.6001705029838026e-06, - "loss": 0.115, - "step": 4340 - }, - { - "epoch": 0.7401337347150811, - "grad_norm": 1.1302788257598877, - "learning_rate": 2.5984654731457805e-06, - "loss": 0.0996, - "step": 4341 - }, - { - "epoch": 0.7403042331566188, - "grad_norm": 1.3049522638320923, - "learning_rate": 2.596760443307758e-06, - "loss": 0.1667, - "step": 4342 - }, - { - "epoch": 0.7404747315981565, - "grad_norm": 1.607433557510376, - "learning_rate": 2.595055413469736e-06, - "loss": 0.1755, - "step": 4343 - }, - { - "epoch": 0.7406452300396942, - "grad_norm": 0.6860228776931763, - "learning_rate": 2.5933503836317135e-06, - "loss": 0.0924, - "step": 4344 - }, - { - "epoch": 0.7408157284812319, - "grad_norm": 1.0273911952972412, - "learning_rate": 2.5916453537936914e-06, - "loss": 0.1122, - "step": 4345 - }, - { - "epoch": 0.7409862269227695, - "grad_norm": 1.5581039190292358, - "learning_rate": 2.5899403239556694e-06, - "loss": 0.1814, - "step": 4346 - }, - { - "epoch": 0.7411567253643072, - "grad_norm": 1.4215240478515625, - "learning_rate": 2.5882352941176473e-06, - "loss": 0.089, - "step": 4347 - }, - { - "epoch": 0.7413272238058449, - "grad_norm": 1.4152482748031616, - "learning_rate": 2.5865302642796253e-06, - "loss": 0.1896, - "step": 4348 - }, - { - "epoch": 0.7414977222473825, - "grad_norm": 1.5308514833450317, - "learning_rate": 2.5848252344416032e-06, - "loss": 0.1202, - "step": 4349 - }, - { - "epoch": 0.7416682206889202, - "grad_norm": 0.9181138277053833, - "learning_rate": 2.5831202046035808e-06, - "loss": 0.1222, - "step": 4350 - }, - { - "epoch": 0.741838719130458, - "grad_norm": 1.5006439685821533, - "learning_rate": 2.5814151747655587e-06, - "loss": 0.1436, - "step": 4351 - }, - { - "epoch": 0.7420092175719957, - "grad_norm": 1.5951142311096191, - "learning_rate": 2.5797101449275362e-06, - "loss": 0.1597, - "step": 4352 - }, - { - "epoch": 0.7421797160135333, - "grad_norm": 1.2084048986434937, - "learning_rate": 2.578005115089514e-06, - "loss": 0.1351, - "step": 4353 - }, - { - "epoch": 0.742350214455071, - "grad_norm": 1.3123643398284912, - "learning_rate": 2.5763000852514917e-06, - "loss": 0.1916, - "step": 4354 - }, - { - "epoch": 0.7425207128966087, - "grad_norm": 1.2805266380310059, - "learning_rate": 2.57459505541347e-06, - "loss": 0.1025, - "step": 4355 - }, - { - "epoch": 0.7426912113381464, - "grad_norm": 0.7374964356422424, - "learning_rate": 2.572890025575448e-06, - "loss": 0.0495, - "step": 4356 - }, - { - "epoch": 0.742861709779684, - "grad_norm": 1.0054583549499512, - "learning_rate": 2.5711849957374255e-06, - "loss": 0.0919, - "step": 4357 - }, - { - "epoch": 0.7430322082212217, - "grad_norm": 1.0947014093399048, - "learning_rate": 2.5694799658994035e-06, - "loss": 0.1137, - "step": 4358 - }, - { - "epoch": 0.7432027066627594, - "grad_norm": 1.2278332710266113, - "learning_rate": 2.5677749360613814e-06, - "loss": 0.1139, - "step": 4359 - }, - { - "epoch": 0.743373205104297, - "grad_norm": 1.3202364444732666, - "learning_rate": 2.566069906223359e-06, - "loss": 0.1668, - "step": 4360 - }, - { - "epoch": 0.7435437035458348, - "grad_norm": 1.5769107341766357, - "learning_rate": 2.564364876385337e-06, - "loss": 0.1187, - "step": 4361 - }, - { - "epoch": 0.7437142019873725, - "grad_norm": 1.1885755062103271, - "learning_rate": 2.5626598465473144e-06, - "loss": 0.129, - "step": 4362 - }, - { - "epoch": 0.7438847004289102, - "grad_norm": 1.2387917041778564, - "learning_rate": 2.5609548167092924e-06, - "loss": 0.1576, - "step": 4363 - }, - { - "epoch": 0.7440551988704478, - "grad_norm": 1.4065356254577637, - "learning_rate": 2.5592497868712708e-06, - "loss": 0.1399, - "step": 4364 - }, - { - "epoch": 0.7442256973119855, - "grad_norm": 1.1759954690933228, - "learning_rate": 2.5575447570332483e-06, - "loss": 0.1346, - "step": 4365 - }, - { - "epoch": 0.7443961957535232, - "grad_norm": 1.0152462720870972, - "learning_rate": 2.5558397271952262e-06, - "loss": 0.1211, - "step": 4366 - }, - { - "epoch": 0.7445666941950608, - "grad_norm": 1.4754053354263306, - "learning_rate": 2.554134697357204e-06, - "loss": 0.0932, - "step": 4367 - }, - { - "epoch": 0.7447371926365985, - "grad_norm": 1.8604658842086792, - "learning_rate": 2.5524296675191817e-06, - "loss": 0.1788, - "step": 4368 - }, - { - "epoch": 0.7449076910781363, - "grad_norm": 1.0769208669662476, - "learning_rate": 2.5507246376811596e-06, - "loss": 0.1241, - "step": 4369 - }, - { - "epoch": 0.745078189519674, - "grad_norm": 2.2280867099761963, - "learning_rate": 2.549019607843137e-06, - "loss": 0.2057, - "step": 4370 - }, - { - "epoch": 0.7452486879612116, - "grad_norm": 1.528975248336792, - "learning_rate": 2.547314578005115e-06, - "loss": 0.1394, - "step": 4371 - }, - { - "epoch": 0.7454191864027493, - "grad_norm": 1.2800006866455078, - "learning_rate": 2.5456095481670935e-06, - "loss": 0.1064, - "step": 4372 - }, - { - "epoch": 0.745589684844287, - "grad_norm": 0.9087845683097839, - "learning_rate": 2.543904518329071e-06, - "loss": 0.0957, - "step": 4373 - }, - { - "epoch": 0.7457601832858246, - "grad_norm": 1.1006022691726685, - "learning_rate": 2.542199488491049e-06, - "loss": 0.1061, - "step": 4374 - }, - { - "epoch": 0.7459306817273623, - "grad_norm": 1.150396466255188, - "learning_rate": 2.5404944586530265e-06, - "loss": 0.1074, - "step": 4375 - }, - { - "epoch": 0.7461011801689, - "grad_norm": 0.9069077968597412, - "learning_rate": 2.5387894288150044e-06, - "loss": 0.0905, - "step": 4376 - }, - { - "epoch": 0.7462716786104377, - "grad_norm": 1.106367588043213, - "learning_rate": 2.5370843989769824e-06, - "loss": 0.1537, - "step": 4377 - }, - { - "epoch": 0.7464421770519754, - "grad_norm": 1.9207699298858643, - "learning_rate": 2.53537936913896e-06, - "loss": 0.2223, - "step": 4378 - }, - { - "epoch": 0.7466126754935131, - "grad_norm": 0.9847105145454407, - "learning_rate": 2.533674339300938e-06, - "loss": 0.0824, - "step": 4379 - }, - { - "epoch": 0.7467831739350508, - "grad_norm": 1.7170462608337402, - "learning_rate": 2.5319693094629162e-06, - "loss": 0.1208, - "step": 4380 - }, - { - "epoch": 0.7469536723765884, - "grad_norm": 1.0074905157089233, - "learning_rate": 2.5302642796248938e-06, - "loss": 0.1053, - "step": 4381 - }, - { - "epoch": 0.7471241708181261, - "grad_norm": 2.5140938758850098, - "learning_rate": 2.5285592497868717e-06, - "loss": 0.2067, - "step": 4382 - }, - { - "epoch": 0.7472946692596638, - "grad_norm": 1.0962971448898315, - "learning_rate": 2.5268542199488492e-06, - "loss": 0.1203, - "step": 4383 - }, - { - "epoch": 0.7474651677012015, - "grad_norm": 1.105176568031311, - "learning_rate": 2.525149190110827e-06, - "loss": 0.0925, - "step": 4384 - }, - { - "epoch": 0.7476356661427391, - "grad_norm": 1.3398061990737915, - "learning_rate": 2.523444160272805e-06, - "loss": 0.1793, - "step": 4385 - }, - { - "epoch": 0.7478061645842768, - "grad_norm": 1.1867016553878784, - "learning_rate": 2.5217391304347826e-06, - "loss": 0.1557, - "step": 4386 - }, - { - "epoch": 0.7479766630258146, - "grad_norm": 1.3503540754318237, - "learning_rate": 2.5200341005967606e-06, - "loss": 0.1372, - "step": 4387 - }, - { - "epoch": 0.7481471614673522, - "grad_norm": 1.147642970085144, - "learning_rate": 2.518329070758738e-06, - "loss": 0.1113, - "step": 4388 - }, - { - "epoch": 0.7483176599088899, - "grad_norm": 1.246585488319397, - "learning_rate": 2.5166240409207165e-06, - "loss": 0.1425, - "step": 4389 - }, - { - "epoch": 0.7484881583504276, - "grad_norm": 1.5160149335861206, - "learning_rate": 2.5149190110826944e-06, - "loss": 0.07, - "step": 4390 - }, - { - "epoch": 0.7486586567919653, - "grad_norm": 1.313860535621643, - "learning_rate": 2.513213981244672e-06, - "loss": 0.1085, - "step": 4391 - }, - { - "epoch": 0.7488291552335029, - "grad_norm": 0.953596830368042, - "learning_rate": 2.51150895140665e-06, - "loss": 0.0741, - "step": 4392 - }, - { - "epoch": 0.7489996536750406, - "grad_norm": 0.7907807230949402, - "learning_rate": 2.5098039215686274e-06, - "loss": 0.0392, - "step": 4393 - }, - { - "epoch": 0.7491701521165783, - "grad_norm": 1.9617118835449219, - "learning_rate": 2.5080988917306054e-06, - "loss": 0.2056, - "step": 4394 - }, - { - "epoch": 0.7493406505581159, - "grad_norm": 0.8299599885940552, - "learning_rate": 2.5063938618925833e-06, - "loss": 0.0701, - "step": 4395 - }, - { - "epoch": 0.7495111489996537, - "grad_norm": 1.3734976053237915, - "learning_rate": 2.504688832054561e-06, - "loss": 0.146, - "step": 4396 - }, - { - "epoch": 0.7496816474411914, - "grad_norm": 1.1142525672912598, - "learning_rate": 2.5029838022165392e-06, - "loss": 0.1469, - "step": 4397 - }, - { - "epoch": 0.7498521458827291, - "grad_norm": 1.0340555906295776, - "learning_rate": 2.501278772378517e-06, - "loss": 0.109, - "step": 4398 - }, - { - "epoch": 0.7500226443242667, - "grad_norm": 1.8724783658981323, - "learning_rate": 2.4995737425404947e-06, - "loss": 0.2038, - "step": 4399 - }, - { - "epoch": 0.7501931427658044, - "grad_norm": 1.4347814321517944, - "learning_rate": 2.4978687127024726e-06, - "loss": 0.1388, - "step": 4400 - }, - { - "epoch": 0.7501931427658044, - "eval_f1_score": 0.4258373205741627, - "eval_loss": 0.13514938950538635, - "eval_runtime": 183.3576, - "eval_samples_per_second": 54.538, - "eval_steps_per_second": 3.409, - "step": 4400 - }, - { - "epoch": 0.7503636412073421, - "grad_norm": 2.3867857456207275, - "learning_rate": 2.49616368286445e-06, - "loss": 0.1136, - "step": 4401 - }, - { - "epoch": 0.7505341396488797, - "grad_norm": 1.4532997608184814, - "learning_rate": 2.494458653026428e-06, - "loss": 0.132, - "step": 4402 - }, - { - "epoch": 0.7507046380904174, - "grad_norm": 1.3240541219711304, - "learning_rate": 2.492753623188406e-06, - "loss": 0.1223, - "step": 4403 - }, - { - "epoch": 0.7508751365319551, - "grad_norm": 1.0867290496826172, - "learning_rate": 2.491048593350384e-06, - "loss": 0.1179, - "step": 4404 - }, - { - "epoch": 0.7510456349734929, - "grad_norm": 1.9877395629882812, - "learning_rate": 2.4893435635123615e-06, - "loss": 0.1358, - "step": 4405 - }, - { - "epoch": 0.7512161334150305, - "grad_norm": 0.991084098815918, - "learning_rate": 2.4876385336743395e-06, - "loss": 0.0833, - "step": 4406 - }, - { - "epoch": 0.7513866318565682, - "grad_norm": 1.5853980779647827, - "learning_rate": 2.485933503836317e-06, - "loss": 0.1493, - "step": 4407 - }, - { - "epoch": 0.7515571302981059, - "grad_norm": 1.4868155717849731, - "learning_rate": 2.4842284739982954e-06, - "loss": 0.167, - "step": 4408 - }, - { - "epoch": 0.7517276287396436, - "grad_norm": 1.1008561849594116, - "learning_rate": 2.482523444160273e-06, - "loss": 0.1315, - "step": 4409 - }, - { - "epoch": 0.7518981271811812, - "grad_norm": 1.977602243423462, - "learning_rate": 2.480818414322251e-06, - "loss": 0.2289, - "step": 4410 - }, - { - "epoch": 0.7520686256227189, - "grad_norm": 1.166752576828003, - "learning_rate": 2.4791133844842284e-06, - "loss": 0.0632, - "step": 4411 - }, - { - "epoch": 0.7522391240642566, - "grad_norm": 0.9453310370445251, - "learning_rate": 2.4774083546462067e-06, - "loss": 0.0958, - "step": 4412 - }, - { - "epoch": 0.7524096225057942, - "grad_norm": 1.2444056272506714, - "learning_rate": 2.4757033248081843e-06, - "loss": 0.1525, - "step": 4413 - }, - { - "epoch": 0.752580120947332, - "grad_norm": 1.092913031578064, - "learning_rate": 2.4739982949701622e-06, - "loss": 0.0853, - "step": 4414 - }, - { - "epoch": 0.7527506193888697, - "grad_norm": 0.9639697074890137, - "learning_rate": 2.4722932651321397e-06, - "loss": 0.1036, - "step": 4415 - }, - { - "epoch": 0.7529211178304074, - "grad_norm": 1.2381495237350464, - "learning_rate": 2.470588235294118e-06, - "loss": 0.0988, - "step": 4416 - }, - { - "epoch": 0.753091616271945, - "grad_norm": 1.0427268743515015, - "learning_rate": 2.4688832054560956e-06, - "loss": 0.0812, - "step": 4417 - }, - { - "epoch": 0.7532621147134827, - "grad_norm": 0.9707403182983398, - "learning_rate": 2.4671781756180736e-06, - "loss": 0.0893, - "step": 4418 - }, - { - "epoch": 0.7534326131550204, - "grad_norm": 0.9385908842086792, - "learning_rate": 2.465473145780051e-06, - "loss": 0.0769, - "step": 4419 - }, - { - "epoch": 0.753603111596558, - "grad_norm": 1.4962774515151978, - "learning_rate": 2.4637681159420295e-06, - "loss": 0.1397, - "step": 4420 - }, - { - "epoch": 0.7537736100380957, - "grad_norm": 0.847218930721283, - "learning_rate": 2.462063086104007e-06, - "loss": 0.0686, - "step": 4421 - }, - { - "epoch": 0.7539441084796334, - "grad_norm": 0.836495041847229, - "learning_rate": 2.460358056265985e-06, - "loss": 0.0678, - "step": 4422 - }, - { - "epoch": 0.7541146069211712, - "grad_norm": 0.7686975002288818, - "learning_rate": 2.4586530264279625e-06, - "loss": 0.0581, - "step": 4423 - }, - { - "epoch": 0.7542851053627088, - "grad_norm": 1.2687360048294067, - "learning_rate": 2.456947996589941e-06, - "loss": 0.1058, - "step": 4424 - }, - { - "epoch": 0.7544556038042465, - "grad_norm": 0.7973870635032654, - "learning_rate": 2.4552429667519184e-06, - "loss": 0.058, - "step": 4425 - }, - { - "epoch": 0.7546261022457842, - "grad_norm": 1.092147707939148, - "learning_rate": 2.4535379369138963e-06, - "loss": 0.0922, - "step": 4426 - }, - { - "epoch": 0.7547966006873218, - "grad_norm": 1.2011126279830933, - "learning_rate": 2.451832907075874e-06, - "loss": 0.0785, - "step": 4427 - }, - { - "epoch": 0.7549670991288595, - "grad_norm": 1.0751177072525024, - "learning_rate": 2.4501278772378518e-06, - "loss": 0.1342, - "step": 4428 - }, - { - "epoch": 0.7551375975703972, - "grad_norm": 1.8159641027450562, - "learning_rate": 2.4484228473998297e-06, - "loss": 0.129, - "step": 4429 - }, - { - "epoch": 0.7553080960119349, - "grad_norm": 0.9125354290008545, - "learning_rate": 2.4467178175618077e-06, - "loss": 0.1658, - "step": 4430 - }, - { - "epoch": 0.7554785944534725, - "grad_norm": 1.7383756637573242, - "learning_rate": 2.445012787723785e-06, - "loss": 0.1719, - "step": 4431 - }, - { - "epoch": 0.7556490928950103, - "grad_norm": 1.2764480113983154, - "learning_rate": 2.443307757885763e-06, - "loss": 0.0987, - "step": 4432 - }, - { - "epoch": 0.755819591336548, - "grad_norm": 1.415343165397644, - "learning_rate": 2.441602728047741e-06, - "loss": 0.0729, - "step": 4433 - }, - { - "epoch": 0.7559900897780856, - "grad_norm": 1.5825785398483276, - "learning_rate": 2.439897698209719e-06, - "loss": 0.1534, - "step": 4434 - }, - { - "epoch": 0.7561605882196233, - "grad_norm": 1.9668054580688477, - "learning_rate": 2.4381926683716966e-06, - "loss": 0.2567, - "step": 4435 - }, - { - "epoch": 0.756331086661161, - "grad_norm": 1.0087342262268066, - "learning_rate": 2.4364876385336745e-06, - "loss": 0.0754, - "step": 4436 - }, - { - "epoch": 0.7565015851026987, - "grad_norm": 1.6823344230651855, - "learning_rate": 2.4347826086956525e-06, - "loss": 0.0898, - "step": 4437 - }, - { - "epoch": 0.7566720835442363, - "grad_norm": 1.1069707870483398, - "learning_rate": 2.4330775788576304e-06, - "loss": 0.1075, - "step": 4438 - }, - { - "epoch": 0.756842581985774, - "grad_norm": 1.4310376644134521, - "learning_rate": 2.431372549019608e-06, - "loss": 0.1312, - "step": 4439 - }, - { - "epoch": 0.7570130804273117, - "grad_norm": 1.1864593029022217, - "learning_rate": 2.429667519181586e-06, - "loss": 0.1176, - "step": 4440 - }, - { - "epoch": 0.7571835788688493, - "grad_norm": 0.9127157330513, - "learning_rate": 2.427962489343564e-06, - "loss": 0.049, - "step": 4441 - }, - { - "epoch": 0.7573540773103871, - "grad_norm": 1.7356191873550415, - "learning_rate": 2.4262574595055418e-06, - "loss": 0.1689, - "step": 4442 - }, - { - "epoch": 0.7575245757519248, - "grad_norm": 1.5915813446044922, - "learning_rate": 2.4245524296675193e-06, - "loss": 0.1468, - "step": 4443 - }, - { - "epoch": 0.7576950741934625, - "grad_norm": 1.4367433786392212, - "learning_rate": 2.4228473998294973e-06, - "loss": 0.1447, - "step": 4444 - }, - { - "epoch": 0.7578655726350001, - "grad_norm": 1.4117807149887085, - "learning_rate": 2.421142369991475e-06, - "loss": 0.1131, - "step": 4445 - }, - { - "epoch": 0.7580360710765378, - "grad_norm": 1.4179257154464722, - "learning_rate": 2.419437340153453e-06, - "loss": 0.1428, - "step": 4446 - }, - { - "epoch": 0.7582065695180755, - "grad_norm": 1.7990797758102417, - "learning_rate": 2.4177323103154307e-06, - "loss": 0.1489, - "step": 4447 - }, - { - "epoch": 0.7583770679596131, - "grad_norm": 0.9080357551574707, - "learning_rate": 2.4160272804774086e-06, - "loss": 0.0824, - "step": 4448 - }, - { - "epoch": 0.7585475664011508, - "grad_norm": 1.3778406381607056, - "learning_rate": 2.4143222506393866e-06, - "loss": 0.1252, - "step": 4449 - }, - { - "epoch": 0.7587180648426886, - "grad_norm": 1.2085192203521729, - "learning_rate": 2.412617220801364e-06, - "loss": 0.1522, - "step": 4450 - }, - { - "epoch": 0.7588885632842263, - "grad_norm": 0.9220867156982422, - "learning_rate": 2.410912190963342e-06, - "loss": 0.0691, - "step": 4451 - }, - { - "epoch": 0.7590590617257639, - "grad_norm": 1.1078466176986694, - "learning_rate": 2.40920716112532e-06, - "loss": 0.0868, - "step": 4452 - }, - { - "epoch": 0.7592295601673016, - "grad_norm": 1.5541517734527588, - "learning_rate": 2.4075021312872975e-06, - "loss": 0.153, - "step": 4453 - }, - { - "epoch": 0.7594000586088393, - "grad_norm": 1.4000375270843506, - "learning_rate": 2.4057971014492755e-06, - "loss": 0.1326, - "step": 4454 - }, - { - "epoch": 0.759570557050377, - "grad_norm": 1.2640565633773804, - "learning_rate": 2.4040920716112534e-06, - "loss": 0.0853, - "step": 4455 - }, - { - "epoch": 0.7597410554919146, - "grad_norm": 0.8130654692649841, - "learning_rate": 2.4023870417732314e-06, - "loss": 0.0956, - "step": 4456 - }, - { - "epoch": 0.7599115539334523, - "grad_norm": 1.007473111152649, - "learning_rate": 2.400682011935209e-06, - "loss": 0.0567, - "step": 4457 - }, - { - "epoch": 0.76008205237499, - "grad_norm": 0.8968228697776794, - "learning_rate": 2.398976982097187e-06, - "loss": 0.0818, - "step": 4458 - }, - { - "epoch": 0.7602525508165277, - "grad_norm": 1.0275651216506958, - "learning_rate": 2.3972719522591648e-06, - "loss": 0.1403, - "step": 4459 - }, - { - "epoch": 0.7604230492580654, - "grad_norm": 1.5832469463348389, - "learning_rate": 2.3955669224211427e-06, - "loss": 0.1343, - "step": 4460 - }, - { - "epoch": 0.7605935476996031, - "grad_norm": 0.951404869556427, - "learning_rate": 2.3938618925831202e-06, - "loss": 0.0802, - "step": 4461 - }, - { - "epoch": 0.7607640461411408, - "grad_norm": 0.8085765242576599, - "learning_rate": 2.392156862745098e-06, - "loss": 0.0801, - "step": 4462 - }, - { - "epoch": 0.7609345445826784, - "grad_norm": 1.6486432552337646, - "learning_rate": 2.390451832907076e-06, - "loss": 0.1259, - "step": 4463 - }, - { - "epoch": 0.7611050430242161, - "grad_norm": 1.229943871498108, - "learning_rate": 2.3887468030690537e-06, - "loss": 0.0928, - "step": 4464 - }, - { - "epoch": 0.7612755414657538, - "grad_norm": 1.0928643941879272, - "learning_rate": 2.3870417732310316e-06, - "loss": 0.1039, - "step": 4465 - }, - { - "epoch": 0.7614460399072914, - "grad_norm": 1.9494421482086182, - "learning_rate": 2.3853367433930096e-06, - "loss": 0.1601, - "step": 4466 - }, - { - "epoch": 0.7616165383488291, - "grad_norm": 1.933903694152832, - "learning_rate": 2.3836317135549875e-06, - "loss": 0.1642, - "step": 4467 - }, - { - "epoch": 0.7617870367903669, - "grad_norm": 1.6810734272003174, - "learning_rate": 2.381926683716965e-06, - "loss": 0.186, - "step": 4468 - }, - { - "epoch": 0.7619575352319046, - "grad_norm": 0.9030094742774963, - "learning_rate": 2.380221653878943e-06, - "loss": 0.0896, - "step": 4469 - }, - { - "epoch": 0.7621280336734422, - "grad_norm": 1.5973889827728271, - "learning_rate": 2.378516624040921e-06, - "loss": 0.1563, - "step": 4470 - }, - { - "epoch": 0.7622985321149799, - "grad_norm": 1.6514700651168823, - "learning_rate": 2.376811594202899e-06, - "loss": 0.1492, - "step": 4471 - }, - { - "epoch": 0.7624690305565176, - "grad_norm": 1.5564332008361816, - "learning_rate": 2.3751065643648764e-06, - "loss": 0.1419, - "step": 4472 - }, - { - "epoch": 0.7626395289980552, - "grad_norm": 1.4853767156600952, - "learning_rate": 2.3734015345268544e-06, - "loss": 0.1656, - "step": 4473 - }, - { - "epoch": 0.7628100274395929, - "grad_norm": 1.4599570035934448, - "learning_rate": 2.3716965046888323e-06, - "loss": 0.1308, - "step": 4474 - }, - { - "epoch": 0.7629805258811306, - "grad_norm": 1.1921581029891968, - "learning_rate": 2.3699914748508102e-06, - "loss": 0.1128, - "step": 4475 - }, - { - "epoch": 0.7631510243226683, - "grad_norm": 1.045434832572937, - "learning_rate": 2.3682864450127878e-06, - "loss": 0.0534, - "step": 4476 - }, - { - "epoch": 0.763321522764206, - "grad_norm": 1.5900884866714478, - "learning_rate": 2.3665814151747657e-06, - "loss": 0.1463, - "step": 4477 - }, - { - "epoch": 0.7634920212057437, - "grad_norm": 1.286139965057373, - "learning_rate": 2.3648763853367437e-06, - "loss": 0.1475, - "step": 4478 - }, - { - "epoch": 0.7636625196472814, - "grad_norm": 1.091168761253357, - "learning_rate": 2.3631713554987216e-06, - "loss": 0.1028, - "step": 4479 - }, - { - "epoch": 0.763833018088819, - "grad_norm": 1.0828571319580078, - "learning_rate": 2.361466325660699e-06, - "loss": 0.1278, - "step": 4480 - }, - { - "epoch": 0.7640035165303567, - "grad_norm": 2.2095322608947754, - "learning_rate": 2.359761295822677e-06, - "loss": 0.2602, - "step": 4481 - }, - { - "epoch": 0.7641740149718944, - "grad_norm": 1.4665558338165283, - "learning_rate": 2.3580562659846546e-06, - "loss": 0.1733, - "step": 4482 - }, - { - "epoch": 0.7643445134134321, - "grad_norm": 1.0846915245056152, - "learning_rate": 2.356351236146633e-06, - "loss": 0.1111, - "step": 4483 - }, - { - "epoch": 0.7645150118549697, - "grad_norm": 1.8451837301254272, - "learning_rate": 2.3546462063086105e-06, - "loss": 0.1506, - "step": 4484 - }, - { - "epoch": 0.7646855102965074, - "grad_norm": 1.1041662693023682, - "learning_rate": 2.3529411764705885e-06, - "loss": 0.1269, - "step": 4485 - }, - { - "epoch": 0.7648560087380452, - "grad_norm": 1.6395684480667114, - "learning_rate": 2.351236146632566e-06, - "loss": 0.1488, - "step": 4486 - }, - { - "epoch": 0.7650265071795828, - "grad_norm": 1.3365883827209473, - "learning_rate": 2.3495311167945443e-06, - "loss": 0.1094, - "step": 4487 - }, - { - "epoch": 0.7651970056211205, - "grad_norm": 1.0546587705612183, - "learning_rate": 2.347826086956522e-06, - "loss": 0.1041, - "step": 4488 - }, - { - "epoch": 0.7653675040626582, - "grad_norm": 2.1003003120422363, - "learning_rate": 2.3461210571185e-06, - "loss": 0.2355, - "step": 4489 - }, - { - "epoch": 0.7655380025041959, - "grad_norm": 1.71974778175354, - "learning_rate": 2.3444160272804773e-06, - "loss": 0.1549, - "step": 4490 - }, - { - "epoch": 0.7657085009457335, - "grad_norm": 1.3704986572265625, - "learning_rate": 2.3427109974424557e-06, - "loss": 0.0997, - "step": 4491 - }, - { - "epoch": 0.7658789993872712, - "grad_norm": 0.7516347765922546, - "learning_rate": 2.3410059676044332e-06, - "loss": 0.0518, - "step": 4492 - }, - { - "epoch": 0.7660494978288089, - "grad_norm": 1.6383329629898071, - "learning_rate": 2.339300937766411e-06, - "loss": 0.096, - "step": 4493 - }, - { - "epoch": 0.7662199962703465, - "grad_norm": 1.824157953262329, - "learning_rate": 2.3375959079283887e-06, - "loss": 0.1887, - "step": 4494 - }, - { - "epoch": 0.7663904947118843, - "grad_norm": 1.9850565195083618, - "learning_rate": 2.335890878090367e-06, - "loss": 0.1113, - "step": 4495 - }, - { - "epoch": 0.766560993153422, - "grad_norm": 1.7400462627410889, - "learning_rate": 2.3341858482523446e-06, - "loss": 0.1139, - "step": 4496 - }, - { - "epoch": 0.7667314915949597, - "grad_norm": 0.9568600654602051, - "learning_rate": 2.3324808184143226e-06, - "loss": 0.0496, - "step": 4497 - }, - { - "epoch": 0.7669019900364973, - "grad_norm": 1.088996171951294, - "learning_rate": 2.3307757885763e-06, - "loss": 0.0943, - "step": 4498 - }, - { - "epoch": 0.767072488478035, - "grad_norm": 1.688697338104248, - "learning_rate": 2.329070758738278e-06, - "loss": 0.1739, - "step": 4499 - }, - { - "epoch": 0.7672429869195727, - "grad_norm": 0.9527409076690674, - "learning_rate": 2.327365728900256e-06, - "loss": 0.0959, - "step": 4500 - }, - { - "epoch": 0.7672429869195727, - "eval_f1_score": 0.4222737819025522, - "eval_loss": 0.1333077847957611, - "eval_runtime": 183.4295, - "eval_samples_per_second": 54.517, - "eval_steps_per_second": 3.407, - "step": 4500 - }, - { - "epoch": 0.7674134853611104, - "grad_norm": 2.166240692138672, - "learning_rate": 2.325660699062234e-06, - "loss": 0.1514, - "step": 4501 - }, - { - "epoch": 0.767583983802648, - "grad_norm": 2.316793441772461, - "learning_rate": 2.3239556692242114e-06, - "loss": 0.1376, - "step": 4502 - }, - { - "epoch": 0.7677544822441857, - "grad_norm": 1.5623490810394287, - "learning_rate": 2.3222506393861894e-06, - "loss": 0.1548, - "step": 4503 - }, - { - "epoch": 0.7679249806857235, - "grad_norm": 1.0329327583312988, - "learning_rate": 2.3205456095481673e-06, - "loss": 0.1004, - "step": 4504 - }, - { - "epoch": 0.7680954791272611, - "grad_norm": 1.3712180852890015, - "learning_rate": 2.3188405797101453e-06, - "loss": 0.1254, - "step": 4505 - }, - { - "epoch": 0.7682659775687988, - "grad_norm": 1.363422155380249, - "learning_rate": 2.317135549872123e-06, - "loss": 0.1472, - "step": 4506 - }, - { - "epoch": 0.7684364760103365, - "grad_norm": 1.4349713325500488, - "learning_rate": 2.3154305200341008e-06, - "loss": 0.1377, - "step": 4507 - }, - { - "epoch": 0.7686069744518742, - "grad_norm": 1.409959316253662, - "learning_rate": 2.3137254901960787e-06, - "loss": 0.096, - "step": 4508 - }, - { - "epoch": 0.7687774728934118, - "grad_norm": 1.6371657848358154, - "learning_rate": 2.3120204603580567e-06, - "loss": 0.1444, - "step": 4509 - }, - { - "epoch": 0.7689479713349495, - "grad_norm": 1.1964426040649414, - "learning_rate": 2.310315430520034e-06, - "loss": 0.0879, - "step": 4510 - }, - { - "epoch": 0.7691184697764872, - "grad_norm": 2.026524782180786, - "learning_rate": 2.308610400682012e-06, - "loss": 0.1026, - "step": 4511 - }, - { - "epoch": 0.7692889682180248, - "grad_norm": 1.329511284828186, - "learning_rate": 2.30690537084399e-06, - "loss": 0.1055, - "step": 4512 - }, - { - "epoch": 0.7694594666595626, - "grad_norm": 1.415218710899353, - "learning_rate": 2.305200341005968e-06, - "loss": 0.14, - "step": 4513 - }, - { - "epoch": 0.7696299651011003, - "grad_norm": 1.076926827430725, - "learning_rate": 2.3034953111679455e-06, - "loss": 0.1197, - "step": 4514 - }, - { - "epoch": 0.769800463542638, - "grad_norm": 0.8736515045166016, - "learning_rate": 2.3017902813299235e-06, - "loss": 0.0643, - "step": 4515 - }, - { - "epoch": 0.7699709619841756, - "grad_norm": 0.9680917859077454, - "learning_rate": 2.3000852514919014e-06, - "loss": 0.0981, - "step": 4516 - }, - { - "epoch": 0.7701414604257133, - "grad_norm": 1.3549295663833618, - "learning_rate": 2.2983802216538794e-06, - "loss": 0.1523, - "step": 4517 - }, - { - "epoch": 0.770311958867251, - "grad_norm": 1.550164818763733, - "learning_rate": 2.296675191815857e-06, - "loss": 0.1469, - "step": 4518 - }, - { - "epoch": 0.7704824573087886, - "grad_norm": 0.9949040412902832, - "learning_rate": 2.294970161977835e-06, - "loss": 0.0529, - "step": 4519 - }, - { - "epoch": 0.7706529557503263, - "grad_norm": 1.1915141344070435, - "learning_rate": 2.2932651321398124e-06, - "loss": 0.1141, - "step": 4520 - }, - { - "epoch": 0.770823454191864, - "grad_norm": 1.5403510332107544, - "learning_rate": 2.2915601023017903e-06, - "loss": 0.1201, - "step": 4521 - }, - { - "epoch": 0.7709939526334018, - "grad_norm": 0.8535776138305664, - "learning_rate": 2.2898550724637683e-06, - "loss": 0.0777, - "step": 4522 - }, - { - "epoch": 0.7711644510749394, - "grad_norm": 1.0403300523757935, - "learning_rate": 2.2881500426257462e-06, - "loss": 0.0568, - "step": 4523 - }, - { - "epoch": 0.7713349495164771, - "grad_norm": 1.436591625213623, - "learning_rate": 2.2864450127877238e-06, - "loss": 0.1344, - "step": 4524 - }, - { - "epoch": 0.7715054479580148, - "grad_norm": 1.678331732749939, - "learning_rate": 2.2847399829497017e-06, - "loss": 0.1449, - "step": 4525 - }, - { - "epoch": 0.7716759463995524, - "grad_norm": 1.3845634460449219, - "learning_rate": 2.2830349531116796e-06, - "loss": 0.1028, - "step": 4526 - }, - { - "epoch": 0.7718464448410901, - "grad_norm": 1.875254511833191, - "learning_rate": 2.2813299232736576e-06, - "loss": 0.2095, - "step": 4527 - }, - { - "epoch": 0.7720169432826278, - "grad_norm": 1.1919282674789429, - "learning_rate": 2.279624893435635e-06, - "loss": 0.12, - "step": 4528 - }, - { - "epoch": 0.7721874417241655, - "grad_norm": 1.207365870475769, - "learning_rate": 2.277919863597613e-06, - "loss": 0.0699, - "step": 4529 - }, - { - "epoch": 0.7723579401657031, - "grad_norm": 1.3329031467437744, - "learning_rate": 2.276214833759591e-06, - "loss": 0.1017, - "step": 4530 - }, - { - "epoch": 0.7725284386072409, - "grad_norm": 1.2029889822006226, - "learning_rate": 2.274509803921569e-06, - "loss": 0.0918, - "step": 4531 - }, - { - "epoch": 0.7726989370487786, - "grad_norm": 1.4061421155929565, - "learning_rate": 2.2728047740835465e-06, - "loss": 0.1064, - "step": 4532 - }, - { - "epoch": 0.7728694354903162, - "grad_norm": 1.9878129959106445, - "learning_rate": 2.2710997442455244e-06, - "loss": 0.1742, - "step": 4533 - }, - { - "epoch": 0.7730399339318539, - "grad_norm": 1.2761764526367188, - "learning_rate": 2.2693947144075024e-06, - "loss": 0.0986, - "step": 4534 - }, - { - "epoch": 0.7732104323733916, - "grad_norm": 0.9076769948005676, - "learning_rate": 2.2676896845694803e-06, - "loss": 0.0792, - "step": 4535 - }, - { - "epoch": 0.7733809308149293, - "grad_norm": 0.8845469355583191, - "learning_rate": 2.265984654731458e-06, - "loss": 0.0581, - "step": 4536 - }, - { - "epoch": 0.7735514292564669, - "grad_norm": 1.3369200229644775, - "learning_rate": 2.264279624893436e-06, - "loss": 0.082, - "step": 4537 - }, - { - "epoch": 0.7737219276980046, - "grad_norm": 1.3708652257919312, - "learning_rate": 2.2625745950554138e-06, - "loss": 0.1127, - "step": 4538 - }, - { - "epoch": 0.7738924261395423, - "grad_norm": 1.0232014656066895, - "learning_rate": 2.2608695652173913e-06, - "loss": 0.0548, - "step": 4539 - }, - { - "epoch": 0.77406292458108, - "grad_norm": 1.4051992893218994, - "learning_rate": 2.2591645353793692e-06, - "loss": 0.157, - "step": 4540 - }, - { - "epoch": 0.7742334230226177, - "grad_norm": 0.9153316617012024, - "learning_rate": 2.257459505541347e-06, - "loss": 0.0903, - "step": 4541 - }, - { - "epoch": 0.7744039214641554, - "grad_norm": 0.7737805843353271, - "learning_rate": 2.255754475703325e-06, - "loss": 0.0629, - "step": 4542 - }, - { - "epoch": 0.7745744199056931, - "grad_norm": 1.317366123199463, - "learning_rate": 2.2540494458653026e-06, - "loss": 0.1337, - "step": 4543 - }, - { - "epoch": 0.7747449183472307, - "grad_norm": 1.833511471748352, - "learning_rate": 2.2523444160272806e-06, - "loss": 0.2257, - "step": 4544 - }, - { - "epoch": 0.7749154167887684, - "grad_norm": 1.0858491659164429, - "learning_rate": 2.2506393861892585e-06, - "loss": 0.0734, - "step": 4545 - }, - { - "epoch": 0.7750859152303061, - "grad_norm": 1.2199957370758057, - "learning_rate": 2.2489343563512365e-06, - "loss": 0.1057, - "step": 4546 - }, - { - "epoch": 0.7752564136718438, - "grad_norm": 1.3122528791427612, - "learning_rate": 2.247229326513214e-06, - "loss": 0.0903, - "step": 4547 - }, - { - "epoch": 0.7754269121133814, - "grad_norm": 1.3773401975631714, - "learning_rate": 2.245524296675192e-06, - "loss": 0.1184, - "step": 4548 - }, - { - "epoch": 0.7755974105549192, - "grad_norm": 0.9603626132011414, - "learning_rate": 2.24381926683717e-06, - "loss": 0.1162, - "step": 4549 - }, - { - "epoch": 0.7757679089964569, - "grad_norm": 1.2490376234054565, - "learning_rate": 2.242114236999148e-06, - "loss": 0.1059, - "step": 4550 - }, - { - "epoch": 0.7759384074379945, - "grad_norm": 1.633994221687317, - "learning_rate": 2.2404092071611254e-06, - "loss": 0.0966, - "step": 4551 - }, - { - "epoch": 0.7761089058795322, - "grad_norm": 1.8625892400741577, - "learning_rate": 2.2387041773231033e-06, - "loss": 0.1668, - "step": 4552 - }, - { - "epoch": 0.7762794043210699, - "grad_norm": 1.9468538761138916, - "learning_rate": 2.2369991474850813e-06, - "loss": 0.1483, - "step": 4553 - }, - { - "epoch": 0.7764499027626076, - "grad_norm": 1.5350470542907715, - "learning_rate": 2.2352941176470592e-06, - "loss": 0.1652, - "step": 4554 - }, - { - "epoch": 0.7766204012041452, - "grad_norm": 1.5547540187835693, - "learning_rate": 2.2335890878090367e-06, - "loss": 0.1503, - "step": 4555 - }, - { - "epoch": 0.7767908996456829, - "grad_norm": 1.884204387664795, - "learning_rate": 2.2318840579710147e-06, - "loss": 0.1608, - "step": 4556 - }, - { - "epoch": 0.7769613980872206, - "grad_norm": 1.4525254964828491, - "learning_rate": 2.2301790281329922e-06, - "loss": 0.1111, - "step": 4557 - }, - { - "epoch": 0.7771318965287582, - "grad_norm": 1.7327322959899902, - "learning_rate": 2.2284739982949706e-06, - "loss": 0.178, - "step": 4558 - }, - { - "epoch": 0.777302394970296, - "grad_norm": 1.8476815223693848, - "learning_rate": 2.226768968456948e-06, - "loss": 0.1236, - "step": 4559 - }, - { - "epoch": 0.7774728934118337, - "grad_norm": 1.6292099952697754, - "learning_rate": 2.225063938618926e-06, - "loss": 0.1675, - "step": 4560 - }, - { - "epoch": 0.7776433918533714, - "grad_norm": 1.247784972190857, - "learning_rate": 2.2233589087809036e-06, - "loss": 0.1312, - "step": 4561 - }, - { - "epoch": 0.777813890294909, - "grad_norm": 1.5058189630508423, - "learning_rate": 2.221653878942882e-06, - "loss": 0.1229, - "step": 4562 - }, - { - "epoch": 0.7779843887364467, - "grad_norm": 1.342382788658142, - "learning_rate": 2.2199488491048595e-06, - "loss": 0.0972, - "step": 4563 - }, - { - "epoch": 0.7781548871779844, - "grad_norm": 1.530906319618225, - "learning_rate": 2.2182438192668374e-06, - "loss": 0.095, - "step": 4564 - }, - { - "epoch": 0.778325385619522, - "grad_norm": 1.5444718599319458, - "learning_rate": 2.216538789428815e-06, - "loss": 0.1519, - "step": 4565 - }, - { - "epoch": 0.7784958840610597, - "grad_norm": 0.7900330424308777, - "learning_rate": 2.214833759590793e-06, - "loss": 0.0426, - "step": 4566 - }, - { - "epoch": 0.7786663825025975, - "grad_norm": 1.4255989789962769, - "learning_rate": 2.213128729752771e-06, - "loss": 0.1045, - "step": 4567 - }, - { - "epoch": 0.7788368809441352, - "grad_norm": 1.0543164014816284, - "learning_rate": 2.211423699914749e-06, - "loss": 0.11, - "step": 4568 - }, - { - "epoch": 0.7790073793856728, - "grad_norm": 1.1432523727416992, - "learning_rate": 2.2097186700767263e-06, - "loss": 0.1237, - "step": 4569 - }, - { - "epoch": 0.7791778778272105, - "grad_norm": 1.9459341764450073, - "learning_rate": 2.2080136402387043e-06, - "loss": 0.1522, - "step": 4570 - }, - { - "epoch": 0.7793483762687482, - "grad_norm": 2.064140558242798, - "learning_rate": 2.2063086104006822e-06, - "loss": 0.1779, - "step": 4571 - }, - { - "epoch": 0.7795188747102858, - "grad_norm": 1.0942349433898926, - "learning_rate": 2.20460358056266e-06, - "loss": 0.112, - "step": 4572 - }, - { - "epoch": 0.7796893731518235, - "grad_norm": 1.4263490438461304, - "learning_rate": 2.2028985507246377e-06, - "loss": 0.1094, - "step": 4573 - }, - { - "epoch": 0.7798598715933612, - "grad_norm": 1.002995252609253, - "learning_rate": 2.2011935208866156e-06, - "loss": 0.1146, - "step": 4574 - }, - { - "epoch": 0.780030370034899, - "grad_norm": 1.8714383840560913, - "learning_rate": 2.1994884910485936e-06, - "loss": 0.1763, - "step": 4575 - }, - { - "epoch": 0.7802008684764365, - "grad_norm": 1.303443193435669, - "learning_rate": 2.1977834612105715e-06, - "loss": 0.1416, - "step": 4576 - }, - { - "epoch": 0.7803713669179743, - "grad_norm": 1.5888121128082275, - "learning_rate": 2.196078431372549e-06, - "loss": 0.2209, - "step": 4577 - }, - { - "epoch": 0.780541865359512, - "grad_norm": 1.908546805381775, - "learning_rate": 2.194373401534527e-06, - "loss": 0.1524, - "step": 4578 - }, - { - "epoch": 0.7807123638010496, - "grad_norm": 2.010317087173462, - "learning_rate": 2.192668371696505e-06, - "loss": 0.1973, - "step": 4579 - }, - { - "epoch": 0.7808828622425873, - "grad_norm": 1.123651385307312, - "learning_rate": 2.190963341858483e-06, - "loss": 0.1256, - "step": 4580 - }, - { - "epoch": 0.781053360684125, - "grad_norm": 1.4087166786193848, - "learning_rate": 2.1892583120204604e-06, - "loss": 0.1015, - "step": 4581 - }, - { - "epoch": 0.7812238591256627, - "grad_norm": 1.4993146657943726, - "learning_rate": 2.1875532821824384e-06, - "loss": 0.1835, - "step": 4582 - }, - { - "epoch": 0.7813943575672003, - "grad_norm": 1.4932217597961426, - "learning_rate": 2.1858482523444163e-06, - "loss": 0.1404, - "step": 4583 - }, - { - "epoch": 0.781564856008738, - "grad_norm": 2.5937252044677734, - "learning_rate": 2.1841432225063943e-06, - "loss": 0.1626, - "step": 4584 - }, - { - "epoch": 0.7817353544502758, - "grad_norm": 1.0918924808502197, - "learning_rate": 2.1824381926683718e-06, - "loss": 0.0876, - "step": 4585 - }, - { - "epoch": 0.7819058528918134, - "grad_norm": 0.8180877566337585, - "learning_rate": 2.1807331628303497e-06, - "loss": 0.0312, - "step": 4586 - }, - { - "epoch": 0.7820763513333511, - "grad_norm": 3.8481969833374023, - "learning_rate": 2.1790281329923277e-06, - "loss": 0.2022, - "step": 4587 - }, - { - "epoch": 0.7822468497748888, - "grad_norm": 0.9197229146957397, - "learning_rate": 2.1773231031543056e-06, - "loss": 0.0463, - "step": 4588 - }, - { - "epoch": 0.7824173482164265, - "grad_norm": 0.9426055550575256, - "learning_rate": 2.175618073316283e-06, - "loss": 0.066, - "step": 4589 - }, - { - "epoch": 0.7825878466579641, - "grad_norm": 1.4380828142166138, - "learning_rate": 2.173913043478261e-06, - "loss": 0.1376, - "step": 4590 - }, - { - "epoch": 0.7827583450995018, - "grad_norm": 1.3979727029800415, - "learning_rate": 2.1722080136402386e-06, - "loss": 0.1191, - "step": 4591 - }, - { - "epoch": 0.7829288435410395, - "grad_norm": 1.105899691581726, - "learning_rate": 2.170502983802217e-06, - "loss": 0.0955, - "step": 4592 - }, - { - "epoch": 0.7830993419825771, - "grad_norm": 1.049070954322815, - "learning_rate": 2.1687979539641945e-06, - "loss": 0.1086, - "step": 4593 - }, - { - "epoch": 0.7832698404241148, - "grad_norm": 1.0583131313323975, - "learning_rate": 2.1670929241261725e-06, - "loss": 0.1121, - "step": 4594 - }, - { - "epoch": 0.7834403388656526, - "grad_norm": 2.0508458614349365, - "learning_rate": 2.16538789428815e-06, - "loss": 0.2152, - "step": 4595 - }, - { - "epoch": 0.7836108373071903, - "grad_norm": 1.0575108528137207, - "learning_rate": 2.163682864450128e-06, - "loss": 0.0627, - "step": 4596 - }, - { - "epoch": 0.7837813357487279, - "grad_norm": 1.286759614944458, - "learning_rate": 2.161977834612106e-06, - "loss": 0.1289, - "step": 4597 - }, - { - "epoch": 0.7839518341902656, - "grad_norm": 2.3939502239227295, - "learning_rate": 2.160272804774084e-06, - "loss": 0.1199, - "step": 4598 - }, - { - "epoch": 0.7841223326318033, - "grad_norm": 1.2347538471221924, - "learning_rate": 2.1585677749360614e-06, - "loss": 0.145, - "step": 4599 - }, - { - "epoch": 0.784292831073341, - "grad_norm": 0.7571415901184082, - "learning_rate": 2.1568627450980393e-06, - "loss": 0.0456, - "step": 4600 - }, - { - "epoch": 0.784292831073341, - "eval_f1_score": 0.3776595744680851, - "eval_loss": 0.13533847033977509, - "eval_runtime": 183.3624, - "eval_samples_per_second": 54.537, - "eval_steps_per_second": 3.409, - "step": 4600 - }, - { - "epoch": 0.7844633295148786, - "grad_norm": 0.9207397103309631, - "learning_rate": 2.1551577152600173e-06, - "loss": 0.0878, - "step": 4601 - }, - { - "epoch": 0.7846338279564163, - "grad_norm": 1.8424160480499268, - "learning_rate": 2.153452685421995e-06, - "loss": 0.106, - "step": 4602 - }, - { - "epoch": 0.784804326397954, - "grad_norm": 1.3503949642181396, - "learning_rate": 2.1517476555839727e-06, - "loss": 0.1342, - "step": 4603 - }, - { - "epoch": 0.7849748248394917, - "grad_norm": 1.642559289932251, - "learning_rate": 2.1500426257459507e-06, - "loss": 0.1322, - "step": 4604 - }, - { - "epoch": 0.7851453232810294, - "grad_norm": 1.4871376752853394, - "learning_rate": 2.1483375959079286e-06, - "loss": 0.14, - "step": 4605 - }, - { - "epoch": 0.7853158217225671, - "grad_norm": 1.3946406841278076, - "learning_rate": 2.1466325660699066e-06, - "loss": 0.1535, - "step": 4606 - }, - { - "epoch": 0.7854863201641048, - "grad_norm": 1.222882866859436, - "learning_rate": 2.144927536231884e-06, - "loss": 0.0676, - "step": 4607 - }, - { - "epoch": 0.7856568186056424, - "grad_norm": 1.5095902681350708, - "learning_rate": 2.143222506393862e-06, - "loss": 0.1177, - "step": 4608 - }, - { - "epoch": 0.7858273170471801, - "grad_norm": 1.2253878116607666, - "learning_rate": 2.14151747655584e-06, - "loss": 0.1358, - "step": 4609 - }, - { - "epoch": 0.7859978154887178, - "grad_norm": 1.1032277345657349, - "learning_rate": 2.139812446717818e-06, - "loss": 0.0618, - "step": 4610 - }, - { - "epoch": 0.7861683139302554, - "grad_norm": 1.2632951736450195, - "learning_rate": 2.1381074168797955e-06, - "loss": 0.101, - "step": 4611 - }, - { - "epoch": 0.7863388123717931, - "grad_norm": 0.7547646164894104, - "learning_rate": 2.1364023870417734e-06, - "loss": 0.0467, - "step": 4612 - }, - { - "epoch": 0.7865093108133309, - "grad_norm": 1.7445420026779175, - "learning_rate": 2.1346973572037514e-06, - "loss": 0.1337, - "step": 4613 - }, - { - "epoch": 0.7866798092548686, - "grad_norm": 3.457361936569214, - "learning_rate": 2.132992327365729e-06, - "loss": 0.224, - "step": 4614 - }, - { - "epoch": 0.7868503076964062, - "grad_norm": 1.6196421384811401, - "learning_rate": 2.131287297527707e-06, - "loss": 0.0935, - "step": 4615 - }, - { - "epoch": 0.7870208061379439, - "grad_norm": 1.171519160270691, - "learning_rate": 2.1295822676896848e-06, - "loss": 0.0683, - "step": 4616 - }, - { - "epoch": 0.7871913045794816, - "grad_norm": 1.2372194528579712, - "learning_rate": 2.1278772378516627e-06, - "loss": 0.1254, - "step": 4617 - }, - { - "epoch": 0.7873618030210192, - "grad_norm": 1.5402429103851318, - "learning_rate": 2.1261722080136403e-06, - "loss": 0.1376, - "step": 4618 - }, - { - "epoch": 0.7875323014625569, - "grad_norm": 1.385901689529419, - "learning_rate": 2.124467178175618e-06, - "loss": 0.1525, - "step": 4619 - }, - { - "epoch": 0.7877027999040946, - "grad_norm": 1.2449439764022827, - "learning_rate": 2.122762148337596e-06, - "loss": 0.1271, - "step": 4620 - }, - { - "epoch": 0.7878732983456324, - "grad_norm": 1.6943930387496948, - "learning_rate": 2.121057118499574e-06, - "loss": 0.1269, - "step": 4621 - }, - { - "epoch": 0.78804379678717, - "grad_norm": 1.9401750564575195, - "learning_rate": 2.1193520886615516e-06, - "loss": 0.2205, - "step": 4622 - }, - { - "epoch": 0.7882142952287077, - "grad_norm": 1.010846734046936, - "learning_rate": 2.1176470588235296e-06, - "loss": 0.0951, - "step": 4623 - }, - { - "epoch": 0.7883847936702454, - "grad_norm": 0.9300770163536072, - "learning_rate": 2.1159420289855075e-06, - "loss": 0.0448, - "step": 4624 - }, - { - "epoch": 0.788555292111783, - "grad_norm": 1.5240856409072876, - "learning_rate": 2.1142369991474855e-06, - "loss": 0.1576, - "step": 4625 - }, - { - "epoch": 0.7887257905533207, - "grad_norm": 1.2999317646026611, - "learning_rate": 2.112531969309463e-06, - "loss": 0.0857, - "step": 4626 - }, - { - "epoch": 0.7888962889948584, - "grad_norm": 1.4437685012817383, - "learning_rate": 2.110826939471441e-06, - "loss": 0.134, - "step": 4627 - }, - { - "epoch": 0.7890667874363961, - "grad_norm": 1.392773151397705, - "learning_rate": 2.109121909633419e-06, - "loss": 0.135, - "step": 4628 - }, - { - "epoch": 0.7892372858779337, - "grad_norm": 1.3370131254196167, - "learning_rate": 2.107416879795397e-06, - "loss": 0.1207, - "step": 4629 - }, - { - "epoch": 0.7894077843194715, - "grad_norm": 1.3597493171691895, - "learning_rate": 2.1057118499573744e-06, - "loss": 0.1099, - "step": 4630 - }, - { - "epoch": 0.7895782827610092, - "grad_norm": 2.7039904594421387, - "learning_rate": 2.1040068201193523e-06, - "loss": 0.2182, - "step": 4631 - }, - { - "epoch": 0.7897487812025468, - "grad_norm": 1.292164921760559, - "learning_rate": 2.10230179028133e-06, - "loss": 0.1013, - "step": 4632 - }, - { - "epoch": 0.7899192796440845, - "grad_norm": 1.3703480958938599, - "learning_rate": 2.1005967604433078e-06, - "loss": 0.0705, - "step": 4633 - }, - { - "epoch": 0.7900897780856222, - "grad_norm": 1.4119629859924316, - "learning_rate": 2.0988917306052857e-06, - "loss": 0.1079, - "step": 4634 - }, - { - "epoch": 0.7902602765271599, - "grad_norm": 2.1801319122314453, - "learning_rate": 2.0971867007672637e-06, - "loss": 0.1929, - "step": 4635 - }, - { - "epoch": 0.7904307749686975, - "grad_norm": 1.5141770839691162, - "learning_rate": 2.095481670929241e-06, - "loss": 0.1497, - "step": 4636 - }, - { - "epoch": 0.7906012734102352, - "grad_norm": 1.2041051387786865, - "learning_rate": 2.093776641091219e-06, - "loss": 0.0976, - "step": 4637 - }, - { - "epoch": 0.7907717718517729, - "grad_norm": 2.3297648429870605, - "learning_rate": 2.092071611253197e-06, - "loss": 0.2369, - "step": 4638 - }, - { - "epoch": 0.7909422702933105, - "grad_norm": 0.6728577613830566, - "learning_rate": 2.090366581415175e-06, - "loss": 0.0651, - "step": 4639 - }, - { - "epoch": 0.7911127687348483, - "grad_norm": 1.3690725564956665, - "learning_rate": 2.0886615515771526e-06, - "loss": 0.0873, - "step": 4640 - }, - { - "epoch": 0.791283267176386, - "grad_norm": 1.7001179456710815, - "learning_rate": 2.0869565217391305e-06, - "loss": 0.1586, - "step": 4641 - }, - { - "epoch": 0.7914537656179237, - "grad_norm": 1.7392905950546265, - "learning_rate": 2.0852514919011085e-06, - "loss": 0.1737, - "step": 4642 - }, - { - "epoch": 0.7916242640594613, - "grad_norm": 2.2579967975616455, - "learning_rate": 2.0835464620630864e-06, - "loss": 0.125, - "step": 4643 - }, - { - "epoch": 0.791794762500999, - "grad_norm": 1.8998128175735474, - "learning_rate": 2.081841432225064e-06, - "loss": 0.1891, - "step": 4644 - }, - { - "epoch": 0.7919652609425367, - "grad_norm": 1.4053045511245728, - "learning_rate": 2.080136402387042e-06, - "loss": 0.0873, - "step": 4645 - }, - { - "epoch": 0.7921357593840744, - "grad_norm": 2.0661635398864746, - "learning_rate": 2.07843137254902e-06, - "loss": 0.169, - "step": 4646 - }, - { - "epoch": 0.792306257825612, - "grad_norm": 1.3633352518081665, - "learning_rate": 2.0767263427109978e-06, - "loss": 0.0958, - "step": 4647 - }, - { - "epoch": 0.7924767562671498, - "grad_norm": 0.9656442999839783, - "learning_rate": 2.0750213128729753e-06, - "loss": 0.0431, - "step": 4648 - }, - { - "epoch": 0.7926472547086875, - "grad_norm": 1.8652012348175049, - "learning_rate": 2.0733162830349532e-06, - "loss": 0.2104, - "step": 4649 - }, - { - "epoch": 0.7928177531502251, - "grad_norm": 1.1847926378250122, - "learning_rate": 2.071611253196931e-06, - "loss": 0.103, - "step": 4650 - }, - { - "epoch": 0.7929882515917628, - "grad_norm": 1.349178671836853, - "learning_rate": 2.069906223358909e-06, - "loss": 0.1452, - "step": 4651 - }, - { - "epoch": 0.7931587500333005, - "grad_norm": 2.4015870094299316, - "learning_rate": 2.0682011935208867e-06, - "loss": 0.1767, - "step": 4652 - }, - { - "epoch": 0.7933292484748382, - "grad_norm": 1.461780309677124, - "learning_rate": 2.0664961636828646e-06, - "loss": 0.0683, - "step": 4653 - }, - { - "epoch": 0.7934997469163758, - "grad_norm": 1.072913646697998, - "learning_rate": 2.0647911338448426e-06, - "loss": 0.1258, - "step": 4654 - }, - { - "epoch": 0.7936702453579135, - "grad_norm": 1.6158761978149414, - "learning_rate": 2.0630861040068205e-06, - "loss": 0.1743, - "step": 4655 - }, - { - "epoch": 0.7938407437994512, - "grad_norm": 1.0536906719207764, - "learning_rate": 2.061381074168798e-06, - "loss": 0.1707, - "step": 4656 - }, - { - "epoch": 0.7940112422409888, - "grad_norm": 0.9950072169303894, - "learning_rate": 2.059676044330776e-06, - "loss": 0.0788, - "step": 4657 - }, - { - "epoch": 0.7941817406825266, - "grad_norm": 1.13616943359375, - "learning_rate": 2.0579710144927535e-06, - "loss": 0.1079, - "step": 4658 - }, - { - "epoch": 0.7943522391240643, - "grad_norm": 1.375840663909912, - "learning_rate": 2.056265984654732e-06, - "loss": 0.123, - "step": 4659 - }, - { - "epoch": 0.794522737565602, - "grad_norm": 1.3938781023025513, - "learning_rate": 2.0545609548167094e-06, - "loss": 0.1818, - "step": 4660 - }, - { - "epoch": 0.7946932360071396, - "grad_norm": 1.6234029531478882, - "learning_rate": 2.0528559249786873e-06, - "loss": 0.1373, - "step": 4661 - }, - { - "epoch": 0.7948637344486773, - "grad_norm": 1.4031246900558472, - "learning_rate": 2.051150895140665e-06, - "loss": 0.1111, - "step": 4662 - }, - { - "epoch": 0.795034232890215, - "grad_norm": 0.9812381267547607, - "learning_rate": 2.0494458653026432e-06, - "loss": 0.1086, - "step": 4663 - }, - { - "epoch": 0.7952047313317526, - "grad_norm": 1.050758957862854, - "learning_rate": 2.0477408354646208e-06, - "loss": 0.1264, - "step": 4664 - }, - { - "epoch": 0.7953752297732903, - "grad_norm": 1.0812690258026123, - "learning_rate": 2.0460358056265987e-06, - "loss": 0.1103, - "step": 4665 - }, - { - "epoch": 0.795545728214828, - "grad_norm": 1.3163347244262695, - "learning_rate": 2.0443307757885762e-06, - "loss": 0.1515, - "step": 4666 - }, - { - "epoch": 0.7957162266563658, - "grad_norm": 1.0045158863067627, - "learning_rate": 2.0426257459505546e-06, - "loss": 0.0468, - "step": 4667 - }, - { - "epoch": 0.7958867250979034, - "grad_norm": 0.9080355763435364, - "learning_rate": 2.040920716112532e-06, - "loss": 0.0406, - "step": 4668 - }, - { - "epoch": 0.7960572235394411, - "grad_norm": 1.759256362915039, - "learning_rate": 2.03921568627451e-06, - "loss": 0.1086, - "step": 4669 - }, - { - "epoch": 0.7962277219809788, - "grad_norm": 1.5688270330429077, - "learning_rate": 2.0375106564364876e-06, - "loss": 0.1196, - "step": 4670 - }, - { - "epoch": 0.7963982204225164, - "grad_norm": 1.7334173917770386, - "learning_rate": 2.0358056265984655e-06, - "loss": 0.1583, - "step": 4671 - }, - { - "epoch": 0.7965687188640541, - "grad_norm": 1.627323865890503, - "learning_rate": 2.0341005967604435e-06, - "loss": 0.0697, - "step": 4672 - }, - { - "epoch": 0.7967392173055918, - "grad_norm": 1.538482666015625, - "learning_rate": 2.0323955669224214e-06, - "loss": 0.1348, - "step": 4673 - }, - { - "epoch": 0.7969097157471295, - "grad_norm": 1.303102731704712, - "learning_rate": 2.030690537084399e-06, - "loss": 0.1172, - "step": 4674 - }, - { - "epoch": 0.7970802141886671, - "grad_norm": 1.290687918663025, - "learning_rate": 2.028985507246377e-06, - "loss": 0.1082, - "step": 4675 - }, - { - "epoch": 0.7972507126302049, - "grad_norm": 2.2509918212890625, - "learning_rate": 2.027280477408355e-06, - "loss": 0.1668, - "step": 4676 - }, - { - "epoch": 0.7974212110717426, - "grad_norm": 1.1365306377410889, - "learning_rate": 2.025575447570333e-06, - "loss": 0.1073, - "step": 4677 - }, - { - "epoch": 0.7975917095132802, - "grad_norm": 1.3142699003219604, - "learning_rate": 2.0238704177323103e-06, - "loss": 0.1258, - "step": 4678 - }, - { - "epoch": 0.7977622079548179, - "grad_norm": 1.2691103219985962, - "learning_rate": 2.0221653878942883e-06, - "loss": 0.0808, - "step": 4679 - }, - { - "epoch": 0.7979327063963556, - "grad_norm": 1.9458601474761963, - "learning_rate": 2.0204603580562662e-06, - "loss": 0.1571, - "step": 4680 - }, - { - "epoch": 0.7981032048378933, - "grad_norm": 0.9146658182144165, - "learning_rate": 2.018755328218244e-06, - "loss": 0.0478, - "step": 4681 - }, - { - "epoch": 0.7982737032794309, - "grad_norm": 1.0852900743484497, - "learning_rate": 2.0170502983802217e-06, - "loss": 0.1092, - "step": 4682 - }, - { - "epoch": 0.7984442017209686, - "grad_norm": 1.535971999168396, - "learning_rate": 2.0153452685421997e-06, - "loss": 0.103, - "step": 4683 - }, - { - "epoch": 0.7986147001625064, - "grad_norm": 1.4463566541671753, - "learning_rate": 2.0136402387041776e-06, - "loss": 0.1268, - "step": 4684 - }, - { - "epoch": 0.798785198604044, - "grad_norm": 1.2712289094924927, - "learning_rate": 2.0119352088661555e-06, - "loss": 0.0915, - "step": 4685 - }, - { - "epoch": 0.7989556970455817, - "grad_norm": 0.8879340887069702, - "learning_rate": 2.010230179028133e-06, - "loss": 0.0396, - "step": 4686 - }, - { - "epoch": 0.7991261954871194, - "grad_norm": 1.5370845794677734, - "learning_rate": 2.008525149190111e-06, - "loss": 0.1355, - "step": 4687 - }, - { - "epoch": 0.7992966939286571, - "grad_norm": 1.276149868965149, - "learning_rate": 2.006820119352089e-06, - "loss": 0.0813, - "step": 4688 - }, - { - "epoch": 0.7994671923701947, - "grad_norm": 1.219150185585022, - "learning_rate": 2.0051150895140665e-06, - "loss": 0.1205, - "step": 4689 - }, - { - "epoch": 0.7996376908117324, - "grad_norm": 1.353977918624878, - "learning_rate": 2.0034100596760444e-06, - "loss": 0.0943, - "step": 4690 - }, - { - "epoch": 0.7998081892532701, - "grad_norm": 1.6376045942306519, - "learning_rate": 2.0017050298380224e-06, - "loss": 0.0888, - "step": 4691 - }, - { - "epoch": 0.7999786876948078, - "grad_norm": 0.8724763989448547, - "learning_rate": 2.0000000000000003e-06, - "loss": 0.0747, - "step": 4692 - }, - { - "epoch": 0.8001491861363454, - "grad_norm": 1.2550649642944336, - "learning_rate": 1.998294970161978e-06, - "loss": 0.1348, - "step": 4693 - }, - { - "epoch": 0.8003196845778832, - "grad_norm": 1.7393126487731934, - "learning_rate": 1.996589940323956e-06, - "loss": 0.1726, - "step": 4694 - }, - { - "epoch": 0.8004901830194209, - "grad_norm": 0.9823253154754639, - "learning_rate": 1.9948849104859338e-06, - "loss": 0.1048, - "step": 4695 - }, - { - "epoch": 0.8006606814609585, - "grad_norm": 2.0837788581848145, - "learning_rate": 1.9931798806479117e-06, - "loss": 0.1357, - "step": 4696 - }, - { - "epoch": 0.8008311799024962, - "grad_norm": 1.061660647392273, - "learning_rate": 1.9914748508098892e-06, - "loss": 0.0809, - "step": 4697 - }, - { - "epoch": 0.8010016783440339, - "grad_norm": 1.0696051120758057, - "learning_rate": 1.989769820971867e-06, - "loss": 0.0773, - "step": 4698 - }, - { - "epoch": 0.8011721767855716, - "grad_norm": 0.8411040306091309, - "learning_rate": 1.988064791133845e-06, - "loss": 0.0861, - "step": 4699 - }, - { - "epoch": 0.8013426752271092, - "grad_norm": 2.4212546348571777, - "learning_rate": 1.986359761295823e-06, - "loss": 0.1266, - "step": 4700 - }, - { - "epoch": 0.8013426752271092, - "eval_f1_score": 0.4120603015075377, - "eval_loss": 0.13393385708332062, - "eval_runtime": 183.4415, - "eval_samples_per_second": 54.513, - "eval_steps_per_second": 3.407, - "step": 4700 - }, - { - "epoch": 0.8015131736686469, - "grad_norm": 1.3715274333953857, - "learning_rate": 1.9846547314578006e-06, - "loss": 0.1156, - "step": 4701 - }, - { - "epoch": 0.8016836721101847, - "grad_norm": 2.326615571975708, - "learning_rate": 1.9829497016197785e-06, - "loss": 0.1544, - "step": 4702 - }, - { - "epoch": 0.8018541705517223, - "grad_norm": 1.6274570226669312, - "learning_rate": 1.9812446717817565e-06, - "loss": 0.1187, - "step": 4703 - }, - { - "epoch": 0.80202466899326, - "grad_norm": 1.5323635339736938, - "learning_rate": 1.979539641943734e-06, - "loss": 0.1284, - "step": 4704 - }, - { - "epoch": 0.8021951674347977, - "grad_norm": 1.5731792449951172, - "learning_rate": 1.977834612105712e-06, - "loss": 0.1295, - "step": 4705 - }, - { - "epoch": 0.8023656658763354, - "grad_norm": 1.1835929155349731, - "learning_rate": 1.97612958226769e-06, - "loss": 0.0636, - "step": 4706 - }, - { - "epoch": 0.802536164317873, - "grad_norm": 1.0732511281967163, - "learning_rate": 1.9744245524296674e-06, - "loss": 0.0785, - "step": 4707 - }, - { - "epoch": 0.8027066627594107, - "grad_norm": 1.892913818359375, - "learning_rate": 1.9727195225916454e-06, - "loss": 0.1303, - "step": 4708 - }, - { - "epoch": 0.8028771612009484, - "grad_norm": 1.480954885482788, - "learning_rate": 1.9710144927536233e-06, - "loss": 0.1414, - "step": 4709 - }, - { - "epoch": 0.803047659642486, - "grad_norm": 1.3224931955337524, - "learning_rate": 1.9693094629156013e-06, - "loss": 0.112, - "step": 4710 - }, - { - "epoch": 0.8032181580840237, - "grad_norm": 1.0266205072402954, - "learning_rate": 1.967604433077579e-06, - "loss": 0.0677, - "step": 4711 - }, - { - "epoch": 0.8033886565255615, - "grad_norm": 1.4693719148635864, - "learning_rate": 1.9658994032395567e-06, - "loss": 0.0957, - "step": 4712 - }, - { - "epoch": 0.8035591549670992, - "grad_norm": 1.6250672340393066, - "learning_rate": 1.9641943734015347e-06, - "loss": 0.159, - "step": 4713 - }, - { - "epoch": 0.8037296534086368, - "grad_norm": 2.1153995990753174, - "learning_rate": 1.9624893435635126e-06, - "loss": 0.1968, - "step": 4714 - }, - { - "epoch": 0.8039001518501745, - "grad_norm": 2.047391891479492, - "learning_rate": 1.96078431372549e-06, - "loss": 0.1825, - "step": 4715 - }, - { - "epoch": 0.8040706502917122, - "grad_norm": 1.5375449657440186, - "learning_rate": 1.959079283887468e-06, - "loss": 0.124, - "step": 4716 - }, - { - "epoch": 0.8042411487332498, - "grad_norm": 1.9991374015808105, - "learning_rate": 1.957374254049446e-06, - "loss": 0.1525, - "step": 4717 - }, - { - "epoch": 0.8044116471747875, - "grad_norm": 1.095762848854065, - "learning_rate": 1.955669224211424e-06, - "loss": 0.1023, - "step": 4718 - }, - { - "epoch": 0.8045821456163252, - "grad_norm": 1.0201056003570557, - "learning_rate": 1.9539641943734015e-06, - "loss": 0.103, - "step": 4719 - }, - { - "epoch": 0.804752644057863, - "grad_norm": 1.3266394138336182, - "learning_rate": 1.9522591645353795e-06, - "loss": 0.1133, - "step": 4720 - }, - { - "epoch": 0.8049231424994006, - "grad_norm": 1.4412189722061157, - "learning_rate": 1.9505541346973574e-06, - "loss": 0.0988, - "step": 4721 - }, - { - "epoch": 0.8050936409409383, - "grad_norm": 1.7581360340118408, - "learning_rate": 1.9488491048593354e-06, - "loss": 0.2479, - "step": 4722 - }, - { - "epoch": 0.805264139382476, - "grad_norm": 1.7024379968643188, - "learning_rate": 1.947144075021313e-06, - "loss": 0.1456, - "step": 4723 - }, - { - "epoch": 0.8054346378240136, - "grad_norm": 1.8094717264175415, - "learning_rate": 1.945439045183291e-06, - "loss": 0.1349, - "step": 4724 - }, - { - "epoch": 0.8056051362655513, - "grad_norm": 2.129314422607422, - "learning_rate": 1.9437340153452684e-06, - "loss": 0.218, - "step": 4725 - }, - { - "epoch": 0.805775634707089, - "grad_norm": 1.45778226852417, - "learning_rate": 1.9420289855072467e-06, - "loss": 0.1524, - "step": 4726 - }, - { - "epoch": 0.8059461331486267, - "grad_norm": 1.0143263339996338, - "learning_rate": 1.9403239556692243e-06, - "loss": 0.0989, - "step": 4727 - }, - { - "epoch": 0.8061166315901643, - "grad_norm": 1.1257439851760864, - "learning_rate": 1.9386189258312022e-06, - "loss": 0.1012, - "step": 4728 - }, - { - "epoch": 0.806287130031702, - "grad_norm": 1.2927134037017822, - "learning_rate": 1.9369138959931797e-06, - "loss": 0.0731, - "step": 4729 - }, - { - "epoch": 0.8064576284732398, - "grad_norm": 1.4720287322998047, - "learning_rate": 1.935208866155158e-06, - "loss": 0.1371, - "step": 4730 - }, - { - "epoch": 0.8066281269147774, - "grad_norm": 1.858988642692566, - "learning_rate": 1.9335038363171356e-06, - "loss": 0.2001, - "step": 4731 - }, - { - "epoch": 0.8067986253563151, - "grad_norm": 1.4337401390075684, - "learning_rate": 1.9317988064791136e-06, - "loss": 0.1453, - "step": 4732 - }, - { - "epoch": 0.8069691237978528, - "grad_norm": 1.4073408842086792, - "learning_rate": 1.930093776641091e-06, - "loss": 0.0994, - "step": 4733 - }, - { - "epoch": 0.8071396222393905, - "grad_norm": 0.98497474193573, - "learning_rate": 1.9283887468030695e-06, - "loss": 0.0706, - "step": 4734 - }, - { - "epoch": 0.8073101206809281, - "grad_norm": 1.5933598279953003, - "learning_rate": 1.926683716965047e-06, - "loss": 0.1477, - "step": 4735 - }, - { - "epoch": 0.8074806191224658, - "grad_norm": 1.8039004802703857, - "learning_rate": 1.924978687127025e-06, - "loss": 0.1542, - "step": 4736 - }, - { - "epoch": 0.8076511175640035, - "grad_norm": 0.9617605805397034, - "learning_rate": 1.9232736572890025e-06, - "loss": 0.0702, - "step": 4737 - }, - { - "epoch": 0.8078216160055413, - "grad_norm": 1.2599680423736572, - "learning_rate": 1.921568627450981e-06, - "loss": 0.0808, - "step": 4738 - }, - { - "epoch": 0.8079921144470789, - "grad_norm": 1.4257581233978271, - "learning_rate": 1.9198635976129584e-06, - "loss": 0.1185, - "step": 4739 - }, - { - "epoch": 0.8081626128886166, - "grad_norm": 1.2117432355880737, - "learning_rate": 1.9181585677749363e-06, - "loss": 0.1182, - "step": 4740 - }, - { - "epoch": 0.8083331113301543, - "grad_norm": 2.570119619369507, - "learning_rate": 1.916453537936914e-06, - "loss": 0.2357, - "step": 4741 - }, - { - "epoch": 0.8085036097716919, - "grad_norm": 1.4911688566207886, - "learning_rate": 1.9147485080988922e-06, - "loss": 0.1041, - "step": 4742 - }, - { - "epoch": 0.8086741082132296, - "grad_norm": 1.1588139533996582, - "learning_rate": 1.9130434782608697e-06, - "loss": 0.1045, - "step": 4743 - }, - { - "epoch": 0.8088446066547673, - "grad_norm": 0.8025126457214355, - "learning_rate": 1.9113384484228477e-06, - "loss": 0.0936, - "step": 4744 - }, - { - "epoch": 0.809015105096305, - "grad_norm": 1.339248538017273, - "learning_rate": 1.909633418584825e-06, - "loss": 0.1248, - "step": 4745 - }, - { - "epoch": 0.8091856035378426, - "grad_norm": 0.8242576122283936, - "learning_rate": 1.907928388746803e-06, - "loss": 0.0414, - "step": 4746 - }, - { - "epoch": 0.8093561019793803, - "grad_norm": 1.1313693523406982, - "learning_rate": 1.906223358908781e-06, - "loss": 0.1032, - "step": 4747 - }, - { - "epoch": 0.8095266004209181, - "grad_norm": 1.785373568534851, - "learning_rate": 1.9045183290707588e-06, - "loss": 0.1188, - "step": 4748 - }, - { - "epoch": 0.8096970988624557, - "grad_norm": 1.2123826742172241, - "learning_rate": 1.9028132992327366e-06, - "loss": 0.1482, - "step": 4749 - }, - { - "epoch": 0.8098675973039934, - "grad_norm": 2.0810234546661377, - "learning_rate": 1.9011082693947145e-06, - "loss": 0.1969, - "step": 4750 - }, - { - "epoch": 0.8100380957455311, - "grad_norm": 1.8342708349227905, - "learning_rate": 1.8994032395566925e-06, - "loss": 0.1687, - "step": 4751 - }, - { - "epoch": 0.8102085941870688, - "grad_norm": 1.2007415294647217, - "learning_rate": 1.8976982097186702e-06, - "loss": 0.1193, - "step": 4752 - }, - { - "epoch": 0.8103790926286064, - "grad_norm": 1.5728555917739868, - "learning_rate": 1.895993179880648e-06, - "loss": 0.1214, - "step": 4753 - }, - { - "epoch": 0.8105495910701441, - "grad_norm": 2.4741835594177246, - "learning_rate": 1.8942881500426257e-06, - "loss": 0.1793, - "step": 4754 - }, - { - "epoch": 0.8107200895116818, - "grad_norm": 0.9598491191864014, - "learning_rate": 1.8925831202046038e-06, - "loss": 0.0617, - "step": 4755 - }, - { - "epoch": 0.8108905879532194, - "grad_norm": 1.4126801490783691, - "learning_rate": 1.8908780903665816e-06, - "loss": 0.1332, - "step": 4756 - }, - { - "epoch": 0.8110610863947572, - "grad_norm": 1.359977126121521, - "learning_rate": 1.8891730605285593e-06, - "loss": 0.1188, - "step": 4757 - }, - { - "epoch": 0.8112315848362949, - "grad_norm": 0.970696747303009, - "learning_rate": 1.887468030690537e-06, - "loss": 0.0669, - "step": 4758 - }, - { - "epoch": 0.8114020832778326, - "grad_norm": 1.1032432317733765, - "learning_rate": 1.8857630008525152e-06, - "loss": 0.0869, - "step": 4759 - }, - { - "epoch": 0.8115725817193702, - "grad_norm": 1.0205268859863281, - "learning_rate": 1.884057971014493e-06, - "loss": 0.0861, - "step": 4760 - }, - { - "epoch": 0.8117430801609079, - "grad_norm": 1.1021018028259277, - "learning_rate": 1.8823529411764707e-06, - "loss": 0.1098, - "step": 4761 - }, - { - "epoch": 0.8119135786024456, - "grad_norm": 1.8316097259521484, - "learning_rate": 1.8806479113384484e-06, - "loss": 0.1278, - "step": 4762 - }, - { - "epoch": 0.8120840770439832, - "grad_norm": 1.722632884979248, - "learning_rate": 1.8789428815004266e-06, - "loss": 0.1295, - "step": 4763 - }, - { - "epoch": 0.8122545754855209, - "grad_norm": 1.2179450988769531, - "learning_rate": 1.8772378516624043e-06, - "loss": 0.0912, - "step": 4764 - }, - { - "epoch": 0.8124250739270586, - "grad_norm": 1.0734045505523682, - "learning_rate": 1.875532821824382e-06, - "loss": 0.1192, - "step": 4765 - }, - { - "epoch": 0.8125955723685964, - "grad_norm": 1.4942569732666016, - "learning_rate": 1.8738277919863598e-06, - "loss": 0.1581, - "step": 4766 - }, - { - "epoch": 0.812766070810134, - "grad_norm": 1.9975972175598145, - "learning_rate": 1.872122762148338e-06, - "loss": 0.1512, - "step": 4767 - }, - { - "epoch": 0.8129365692516717, - "grad_norm": 0.8487547039985657, - "learning_rate": 1.8704177323103157e-06, - "loss": 0.0683, - "step": 4768 - }, - { - "epoch": 0.8131070676932094, - "grad_norm": 1.2102348804473877, - "learning_rate": 1.8687127024722934e-06, - "loss": 0.0873, - "step": 4769 - }, - { - "epoch": 0.813277566134747, - "grad_norm": 1.8456752300262451, - "learning_rate": 1.8670076726342711e-06, - "loss": 0.1804, - "step": 4770 - }, - { - "epoch": 0.8134480645762847, - "grad_norm": 1.389062523841858, - "learning_rate": 1.8653026427962489e-06, - "loss": 0.1356, - "step": 4771 - }, - { - "epoch": 0.8136185630178224, - "grad_norm": 0.8592960834503174, - "learning_rate": 1.863597612958227e-06, - "loss": 0.0853, - "step": 4772 - }, - { - "epoch": 0.8137890614593601, - "grad_norm": 1.0502662658691406, - "learning_rate": 1.8618925831202048e-06, - "loss": 0.0735, - "step": 4773 - }, - { - "epoch": 0.8139595599008977, - "grad_norm": 1.4128313064575195, - "learning_rate": 1.8601875532821825e-06, - "loss": 0.0853, - "step": 4774 - }, - { - "epoch": 0.8141300583424355, - "grad_norm": 1.5370444059371948, - "learning_rate": 1.8584825234441603e-06, - "loss": 0.1043, - "step": 4775 - }, - { - "epoch": 0.8143005567839732, - "grad_norm": 1.4921493530273438, - "learning_rate": 1.8567774936061384e-06, - "loss": 0.1193, - "step": 4776 - }, - { - "epoch": 0.8144710552255108, - "grad_norm": 1.5475974082946777, - "learning_rate": 1.8550724637681161e-06, - "loss": 0.1669, - "step": 4777 - }, - { - "epoch": 0.8146415536670485, - "grad_norm": 2.721313953399658, - "learning_rate": 1.8533674339300939e-06, - "loss": 0.2292, - "step": 4778 - }, - { - "epoch": 0.8148120521085862, - "grad_norm": 2.104518175125122, - "learning_rate": 1.8516624040920716e-06, - "loss": 0.1861, - "step": 4779 - }, - { - "epoch": 0.8149825505501239, - "grad_norm": 1.5531257390975952, - "learning_rate": 1.8499573742540498e-06, - "loss": 0.1452, - "step": 4780 - }, - { - "epoch": 0.8151530489916615, - "grad_norm": 0.8269780874252319, - "learning_rate": 1.8482523444160275e-06, - "loss": 0.0698, - "step": 4781 - }, - { - "epoch": 0.8153235474331992, - "grad_norm": 1.4180121421813965, - "learning_rate": 1.8465473145780052e-06, - "loss": 0.1433, - "step": 4782 - }, - { - "epoch": 0.815494045874737, - "grad_norm": 1.6181259155273438, - "learning_rate": 1.844842284739983e-06, - "loss": 0.1311, - "step": 4783 - }, - { - "epoch": 0.8156645443162747, - "grad_norm": 1.1741197109222412, - "learning_rate": 1.8431372549019611e-06, - "loss": 0.0883, - "step": 4784 - }, - { - "epoch": 0.8158350427578123, - "grad_norm": 2.6572444438934326, - "learning_rate": 1.8414322250639389e-06, - "loss": 0.1646, - "step": 4785 - }, - { - "epoch": 0.81600554119935, - "grad_norm": 1.264453649520874, - "learning_rate": 1.8397271952259166e-06, - "loss": 0.0843, - "step": 4786 - }, - { - "epoch": 0.8161760396408877, - "grad_norm": 1.3252006769180298, - "learning_rate": 1.8380221653878944e-06, - "loss": 0.1508, - "step": 4787 - }, - { - "epoch": 0.8163465380824253, - "grad_norm": 1.1810276508331299, - "learning_rate": 1.8363171355498723e-06, - "loss": 0.0991, - "step": 4788 - }, - { - "epoch": 0.816517036523963, - "grad_norm": 1.8916993141174316, - "learning_rate": 1.8346121057118502e-06, - "loss": 0.1708, - "step": 4789 - }, - { - "epoch": 0.8166875349655007, - "grad_norm": 2.088521957397461, - "learning_rate": 1.832907075873828e-06, - "loss": 0.2239, - "step": 4790 - }, - { - "epoch": 0.8168580334070384, - "grad_norm": 1.321691870689392, - "learning_rate": 1.8312020460358057e-06, - "loss": 0.1131, - "step": 4791 - }, - { - "epoch": 0.817028531848576, - "grad_norm": 1.0528753995895386, - "learning_rate": 1.8294970161977837e-06, - "loss": 0.1082, - "step": 4792 - }, - { - "epoch": 0.8171990302901138, - "grad_norm": 1.9226561784744263, - "learning_rate": 1.8277919863597616e-06, - "loss": 0.1226, - "step": 4793 - }, - { - "epoch": 0.8173695287316515, - "grad_norm": 1.830688238143921, - "learning_rate": 1.8260869565217394e-06, - "loss": 0.1374, - "step": 4794 - }, - { - "epoch": 0.8175400271731891, - "grad_norm": 1.4284980297088623, - "learning_rate": 1.824381926683717e-06, - "loss": 0.1419, - "step": 4795 - }, - { - "epoch": 0.8177105256147268, - "grad_norm": 0.8486391305923462, - "learning_rate": 1.8226768968456948e-06, - "loss": 0.0416, - "step": 4796 - }, - { - "epoch": 0.8178810240562645, - "grad_norm": 1.925445795059204, - "learning_rate": 1.8209718670076728e-06, - "loss": 0.184, - "step": 4797 - }, - { - "epoch": 0.8180515224978022, - "grad_norm": 1.1646342277526855, - "learning_rate": 1.8192668371696507e-06, - "loss": 0.1031, - "step": 4798 - }, - { - "epoch": 0.8182220209393398, - "grad_norm": 1.6515798568725586, - "learning_rate": 1.8175618073316285e-06, - "loss": 0.1654, - "step": 4799 - }, - { - "epoch": 0.8183925193808775, - "grad_norm": 1.2121893167495728, - "learning_rate": 1.8158567774936062e-06, - "loss": 0.0971, - "step": 4800 - }, - { - "epoch": 0.8183925193808775, - "eval_f1_score": 0.42, - "eval_loss": 0.13059745728969574, - "eval_runtime": 183.3571, - "eval_samples_per_second": 54.538, - "eval_steps_per_second": 3.409, - "step": 4800 - }, - { - "epoch": 0.8185630178224153, - "grad_norm": 1.1018650531768799, - "learning_rate": 1.8141517476555841e-06, - "loss": 0.0962, - "step": 4801 - }, - { - "epoch": 0.8187335162639529, - "grad_norm": 1.2527377605438232, - "learning_rate": 1.812446717817562e-06, - "loss": 0.1066, - "step": 4802 - }, - { - "epoch": 0.8189040147054906, - "grad_norm": 1.6081165075302124, - "learning_rate": 1.8107416879795398e-06, - "loss": 0.1714, - "step": 4803 - }, - { - "epoch": 0.8190745131470283, - "grad_norm": 1.5928921699523926, - "learning_rate": 1.8090366581415176e-06, - "loss": 0.0999, - "step": 4804 - }, - { - "epoch": 0.819245011588566, - "grad_norm": 1.7792342901229858, - "learning_rate": 1.8073316283034955e-06, - "loss": 0.0932, - "step": 4805 - }, - { - "epoch": 0.8194155100301036, - "grad_norm": 1.6293425559997559, - "learning_rate": 1.8056265984654732e-06, - "loss": 0.1317, - "step": 4806 - }, - { - "epoch": 0.8195860084716413, - "grad_norm": 1.1104508638381958, - "learning_rate": 1.8039215686274512e-06, - "loss": 0.1182, - "step": 4807 - }, - { - "epoch": 0.819756506913179, - "grad_norm": 1.0992122888565063, - "learning_rate": 1.802216538789429e-06, - "loss": 0.1093, - "step": 4808 - }, - { - "epoch": 0.8199270053547166, - "grad_norm": 1.6125050783157349, - "learning_rate": 1.8005115089514069e-06, - "loss": 0.1031, - "step": 4809 - }, - { - "epoch": 0.8200975037962543, - "grad_norm": 0.8295161724090576, - "learning_rate": 1.7988064791133846e-06, - "loss": 0.0621, - "step": 4810 - }, - { - "epoch": 0.8202680022377921, - "grad_norm": 1.5850706100463867, - "learning_rate": 1.7971014492753626e-06, - "loss": 0.1237, - "step": 4811 - }, - { - "epoch": 0.8204385006793298, - "grad_norm": 1.3965903520584106, - "learning_rate": 1.7953964194373403e-06, - "loss": 0.1801, - "step": 4812 - }, - { - "epoch": 0.8206089991208674, - "grad_norm": 1.6549614667892456, - "learning_rate": 1.7936913895993182e-06, - "loss": 0.1196, - "step": 4813 - }, - { - "epoch": 0.8207794975624051, - "grad_norm": 0.9478707909584045, - "learning_rate": 1.791986359761296e-06, - "loss": 0.0902, - "step": 4814 - }, - { - "epoch": 0.8209499960039428, - "grad_norm": 1.6688967943191528, - "learning_rate": 1.7902813299232737e-06, - "loss": 0.1263, - "step": 4815 - }, - { - "epoch": 0.8211204944454804, - "grad_norm": 1.9953707456588745, - "learning_rate": 1.7885763000852517e-06, - "loss": 0.1458, - "step": 4816 - }, - { - "epoch": 0.8212909928870181, - "grad_norm": 1.5312587022781372, - "learning_rate": 1.7868712702472294e-06, - "loss": 0.1163, - "step": 4817 - }, - { - "epoch": 0.8214614913285558, - "grad_norm": 1.563798189163208, - "learning_rate": 1.7851662404092073e-06, - "loss": 0.1001, - "step": 4818 - }, - { - "epoch": 0.8216319897700936, - "grad_norm": 1.5185989141464233, - "learning_rate": 1.783461210571185e-06, - "loss": 0.141, - "step": 4819 - }, - { - "epoch": 0.8218024882116312, - "grad_norm": 1.8926496505737305, - "learning_rate": 1.7817561807331628e-06, - "loss": 0.2122, - "step": 4820 - }, - { - "epoch": 0.8219729866531689, - "grad_norm": 1.165590763092041, - "learning_rate": 1.7800511508951408e-06, - "loss": 0.1083, - "step": 4821 - }, - { - "epoch": 0.8221434850947066, - "grad_norm": 1.5912805795669556, - "learning_rate": 1.7783461210571187e-06, - "loss": 0.1377, - "step": 4822 - }, - { - "epoch": 0.8223139835362442, - "grad_norm": 2.399815082550049, - "learning_rate": 1.7766410912190964e-06, - "loss": 0.2404, - "step": 4823 - }, - { - "epoch": 0.8224844819777819, - "grad_norm": 1.1800510883331299, - "learning_rate": 1.7749360613810742e-06, - "loss": 0.0652, - "step": 4824 - }, - { - "epoch": 0.8226549804193196, - "grad_norm": 1.0360442399978638, - "learning_rate": 1.7732310315430521e-06, - "loss": 0.0917, - "step": 4825 - }, - { - "epoch": 0.8228254788608573, - "grad_norm": 2.043694257736206, - "learning_rate": 1.77152600170503e-06, - "loss": 0.1947, - "step": 4826 - }, - { - "epoch": 0.8229959773023949, - "grad_norm": 1.7305378913879395, - "learning_rate": 1.7698209718670078e-06, - "loss": 0.1791, - "step": 4827 - }, - { - "epoch": 0.8231664757439326, - "grad_norm": 1.4891148805618286, - "learning_rate": 1.7681159420289855e-06, - "loss": 0.111, - "step": 4828 - }, - { - "epoch": 0.8233369741854704, - "grad_norm": 1.1548901796340942, - "learning_rate": 1.7664109121909633e-06, - "loss": 0.115, - "step": 4829 - }, - { - "epoch": 0.823507472627008, - "grad_norm": 1.0407037734985352, - "learning_rate": 1.7647058823529414e-06, - "loss": 0.0966, - "step": 4830 - }, - { - "epoch": 0.8236779710685457, - "grad_norm": 1.8325929641723633, - "learning_rate": 1.7630008525149192e-06, - "loss": 0.1428, - "step": 4831 - }, - { - "epoch": 0.8238484695100834, - "grad_norm": 1.2216639518737793, - "learning_rate": 1.761295822676897e-06, - "loss": 0.1084, - "step": 4832 - }, - { - "epoch": 0.8240189679516211, - "grad_norm": 1.1613625288009644, - "learning_rate": 1.7595907928388747e-06, - "loss": 0.1026, - "step": 4833 - }, - { - "epoch": 0.8241894663931587, - "grad_norm": 1.3345240354537964, - "learning_rate": 1.7578857630008528e-06, - "loss": 0.0683, - "step": 4834 - }, - { - "epoch": 0.8243599648346964, - "grad_norm": 1.459480881690979, - "learning_rate": 1.7561807331628305e-06, - "loss": 0.1128, - "step": 4835 - }, - { - "epoch": 0.8245304632762341, - "grad_norm": 1.513303518295288, - "learning_rate": 1.7544757033248083e-06, - "loss": 0.1847, - "step": 4836 - }, - { - "epoch": 0.8247009617177719, - "grad_norm": 1.2072933912277222, - "learning_rate": 1.752770673486786e-06, - "loss": 0.0592, - "step": 4837 - }, - { - "epoch": 0.8248714601593095, - "grad_norm": 1.6384389400482178, - "learning_rate": 1.7510656436487642e-06, - "loss": 0.1571, - "step": 4838 - }, - { - "epoch": 0.8250419586008472, - "grad_norm": 0.8648350834846497, - "learning_rate": 1.749360613810742e-06, - "loss": 0.0887, - "step": 4839 - }, - { - "epoch": 0.8252124570423849, - "grad_norm": 2.6184184551239014, - "learning_rate": 1.7476555839727197e-06, - "loss": 0.2689, - "step": 4840 - }, - { - "epoch": 0.8253829554839225, - "grad_norm": 1.637653112411499, - "learning_rate": 1.7459505541346974e-06, - "loss": 0.0831, - "step": 4841 - }, - { - "epoch": 0.8255534539254602, - "grad_norm": 0.9104223847389221, - "learning_rate": 1.7442455242966751e-06, - "loss": 0.0679, - "step": 4842 - }, - { - "epoch": 0.8257239523669979, - "grad_norm": 1.3634427785873413, - "learning_rate": 1.7425404944586533e-06, - "loss": 0.1786, - "step": 4843 - }, - { - "epoch": 0.8258944508085356, - "grad_norm": 2.4873688220977783, - "learning_rate": 1.740835464620631e-06, - "loss": 0.1614, - "step": 4844 - }, - { - "epoch": 0.8260649492500732, - "grad_norm": 1.1272398233413696, - "learning_rate": 1.7391304347826088e-06, - "loss": 0.0912, - "step": 4845 - }, - { - "epoch": 0.826235447691611, - "grad_norm": 1.254799246788025, - "learning_rate": 1.7374254049445865e-06, - "loss": 0.1297, - "step": 4846 - }, - { - "epoch": 0.8264059461331487, - "grad_norm": 1.8196457624435425, - "learning_rate": 1.7357203751065647e-06, - "loss": 0.1737, - "step": 4847 - }, - { - "epoch": 0.8265764445746863, - "grad_norm": 0.8753282427787781, - "learning_rate": 1.7340153452685424e-06, - "loss": 0.0688, - "step": 4848 - }, - { - "epoch": 0.826746943016224, - "grad_norm": 1.0449436902999878, - "learning_rate": 1.7323103154305201e-06, - "loss": 0.0753, - "step": 4849 - }, - { - "epoch": 0.8269174414577617, - "grad_norm": 1.1351038217544556, - "learning_rate": 1.7306052855924979e-06, - "loss": 0.101, - "step": 4850 - }, - { - "epoch": 0.8270879398992994, - "grad_norm": 1.4630578756332397, - "learning_rate": 1.728900255754476e-06, - "loss": 0.1175, - "step": 4851 - }, - { - "epoch": 0.827258438340837, - "grad_norm": 1.8749983310699463, - "learning_rate": 1.7271952259164538e-06, - "loss": 0.148, - "step": 4852 - }, - { - "epoch": 0.8274289367823747, - "grad_norm": 1.2372411489486694, - "learning_rate": 1.7254901960784315e-06, - "loss": 0.0844, - "step": 4853 - }, - { - "epoch": 0.8275994352239124, - "grad_norm": 1.1746691465377808, - "learning_rate": 1.7237851662404092e-06, - "loss": 0.0951, - "step": 4854 - }, - { - "epoch": 0.82776993366545, - "grad_norm": 0.9853083491325378, - "learning_rate": 1.7220801364023874e-06, - "loss": 0.0903, - "step": 4855 - }, - { - "epoch": 0.8279404321069878, - "grad_norm": 2.1901590824127197, - "learning_rate": 1.7203751065643651e-06, - "loss": 0.1728, - "step": 4856 - }, - { - "epoch": 0.8281109305485255, - "grad_norm": 1.0862927436828613, - "learning_rate": 1.7186700767263429e-06, - "loss": 0.1114, - "step": 4857 - }, - { - "epoch": 0.8282814289900632, - "grad_norm": 1.617577075958252, - "learning_rate": 1.7169650468883206e-06, - "loss": 0.1074, - "step": 4858 - }, - { - "epoch": 0.8284519274316008, - "grad_norm": 1.824747085571289, - "learning_rate": 1.7152600170502988e-06, - "loss": 0.081, - "step": 4859 - }, - { - "epoch": 0.8286224258731385, - "grad_norm": 1.7742489576339722, - "learning_rate": 1.7135549872122765e-06, - "loss": 0.1525, - "step": 4860 - }, - { - "epoch": 0.8287929243146762, - "grad_norm": 2.076169967651367, - "learning_rate": 1.7118499573742542e-06, - "loss": 0.118, - "step": 4861 - }, - { - "epoch": 0.8289634227562138, - "grad_norm": 1.0002825260162354, - "learning_rate": 1.710144927536232e-06, - "loss": 0.0538, - "step": 4862 - }, - { - "epoch": 0.8291339211977515, - "grad_norm": 1.6570539474487305, - "learning_rate": 1.7084398976982097e-06, - "loss": 0.1468, - "step": 4863 - }, - { - "epoch": 0.8293044196392892, - "grad_norm": 1.037410020828247, - "learning_rate": 1.7067348678601879e-06, - "loss": 0.1069, - "step": 4864 - }, - { - "epoch": 0.829474918080827, - "grad_norm": 1.7056080102920532, - "learning_rate": 1.7050298380221656e-06, - "loss": 0.1525, - "step": 4865 - }, - { - "epoch": 0.8296454165223646, - "grad_norm": 1.6007308959960938, - "learning_rate": 1.7033248081841433e-06, - "loss": 0.132, - "step": 4866 - }, - { - "epoch": 0.8298159149639023, - "grad_norm": 1.514782190322876, - "learning_rate": 1.701619778346121e-06, - "loss": 0.1442, - "step": 4867 - }, - { - "epoch": 0.82998641340544, - "grad_norm": 0.7461420297622681, - "learning_rate": 1.6999147485080992e-06, - "loss": 0.0602, - "step": 4868 - }, - { - "epoch": 0.8301569118469776, - "grad_norm": 1.7397735118865967, - "learning_rate": 1.698209718670077e-06, - "loss": 0.1756, - "step": 4869 - }, - { - "epoch": 0.8303274102885153, - "grad_norm": 1.6185688972473145, - "learning_rate": 1.6965046888320547e-06, - "loss": 0.1193, - "step": 4870 - }, - { - "epoch": 0.830497908730053, - "grad_norm": 1.5054775476455688, - "learning_rate": 1.6947996589940324e-06, - "loss": 0.1949, - "step": 4871 - }, - { - "epoch": 0.8306684071715907, - "grad_norm": 1.8738244771957397, - "learning_rate": 1.6930946291560104e-06, - "loss": 0.0839, - "step": 4872 - }, - { - "epoch": 0.8308389056131283, - "grad_norm": 1.9042476415634155, - "learning_rate": 1.6913895993179883e-06, - "loss": 0.1257, - "step": 4873 - }, - { - "epoch": 0.8310094040546661, - "grad_norm": 1.4473137855529785, - "learning_rate": 1.689684569479966e-06, - "loss": 0.1606, - "step": 4874 - }, - { - "epoch": 0.8311799024962038, - "grad_norm": 1.8061612844467163, - "learning_rate": 1.6879795396419438e-06, - "loss": 0.1655, - "step": 4875 - }, - { - "epoch": 0.8313504009377414, - "grad_norm": 1.2056777477264404, - "learning_rate": 1.6862745098039217e-06, - "loss": 0.095, - "step": 4876 - }, - { - "epoch": 0.8315208993792791, - "grad_norm": 2.325047254562378, - "learning_rate": 1.6845694799658997e-06, - "loss": 0.1422, - "step": 4877 - }, - { - "epoch": 0.8316913978208168, - "grad_norm": 1.2801249027252197, - "learning_rate": 1.6828644501278774e-06, - "loss": 0.1503, - "step": 4878 - }, - { - "epoch": 0.8318618962623545, - "grad_norm": 2.1016643047332764, - "learning_rate": 1.6811594202898552e-06, - "loss": 0.1834, - "step": 4879 - }, - { - "epoch": 0.8320323947038921, - "grad_norm": 1.1316357851028442, - "learning_rate": 1.6794543904518331e-06, - "loss": 0.1201, - "step": 4880 - }, - { - "epoch": 0.8322028931454298, - "grad_norm": 1.2594853639602661, - "learning_rate": 1.6777493606138108e-06, - "loss": 0.1242, - "step": 4881 - }, - { - "epoch": 0.8323733915869675, - "grad_norm": 1.3186732530593872, - "learning_rate": 1.6760443307757888e-06, - "loss": 0.1331, - "step": 4882 - }, - { - "epoch": 0.8325438900285053, - "grad_norm": 1.2607057094573975, - "learning_rate": 1.6743393009377665e-06, - "loss": 0.1387, - "step": 4883 - }, - { - "epoch": 0.8327143884700429, - "grad_norm": 1.6016281843185425, - "learning_rate": 1.6726342710997443e-06, - "loss": 0.1603, - "step": 4884 - }, - { - "epoch": 0.8328848869115806, - "grad_norm": 1.093407154083252, - "learning_rate": 1.6709292412617222e-06, - "loss": 0.0765, - "step": 4885 - }, - { - "epoch": 0.8330553853531183, - "grad_norm": 1.4803746938705444, - "learning_rate": 1.6692242114237002e-06, - "loss": 0.1681, - "step": 4886 - }, - { - "epoch": 0.8332258837946559, - "grad_norm": 1.254752516746521, - "learning_rate": 1.667519181585678e-06, - "loss": 0.1092, - "step": 4887 - }, - { - "epoch": 0.8333963822361936, - "grad_norm": 1.5853029489517212, - "learning_rate": 1.6658141517476556e-06, - "loss": 0.1387, - "step": 4888 - }, - { - "epoch": 0.8335668806777313, - "grad_norm": 1.0304608345031738, - "learning_rate": 1.6641091219096336e-06, - "loss": 0.0431, - "step": 4889 - }, - { - "epoch": 0.833737379119269, - "grad_norm": 1.44893479347229, - "learning_rate": 1.6624040920716113e-06, - "loss": 0.115, - "step": 4890 - }, - { - "epoch": 0.8339078775608066, - "grad_norm": 0.9381269216537476, - "learning_rate": 1.6606990622335893e-06, - "loss": 0.0736, - "step": 4891 - }, - { - "epoch": 0.8340783760023444, - "grad_norm": 1.6925040483474731, - "learning_rate": 1.658994032395567e-06, - "loss": 0.1446, - "step": 4892 - }, - { - "epoch": 0.8342488744438821, - "grad_norm": 1.8539586067199707, - "learning_rate": 1.657289002557545e-06, - "loss": 0.1497, - "step": 4893 - }, - { - "epoch": 0.8344193728854197, - "grad_norm": 1.285607099533081, - "learning_rate": 1.6555839727195227e-06, - "loss": 0.1212, - "step": 4894 - }, - { - "epoch": 0.8345898713269574, - "grad_norm": 1.8159408569335938, - "learning_rate": 1.6538789428815004e-06, - "loss": 0.1543, - "step": 4895 - }, - { - "epoch": 0.8347603697684951, - "grad_norm": 1.9982542991638184, - "learning_rate": 1.6521739130434784e-06, - "loss": 0.1245, - "step": 4896 - }, - { - "epoch": 0.8349308682100328, - "grad_norm": 1.0288341045379639, - "learning_rate": 1.6504688832054563e-06, - "loss": 0.1175, - "step": 4897 - }, - { - "epoch": 0.8351013666515704, - "grad_norm": 1.4282264709472656, - "learning_rate": 1.648763853367434e-06, - "loss": 0.0739, - "step": 4898 - }, - { - "epoch": 0.8352718650931081, - "grad_norm": 1.336698293685913, - "learning_rate": 1.6470588235294118e-06, - "loss": 0.1349, - "step": 4899 - }, - { - "epoch": 0.8354423635346458, - "grad_norm": 1.2105053663253784, - "learning_rate": 1.6453537936913897e-06, - "loss": 0.1025, - "step": 4900 - }, - { - "epoch": 0.8354423635346458, - "eval_f1_score": 0.4364508393285372, - "eval_loss": 0.1303476244211197, - "eval_runtime": 183.3875, - "eval_samples_per_second": 54.529, - "eval_steps_per_second": 3.408, - "step": 4900 - }, - { - "epoch": 0.8356128619761835, - "grad_norm": 0.8056730031967163, - "learning_rate": 1.6436487638533677e-06, - "loss": 0.0576, - "step": 4901 - }, - { - "epoch": 0.8357833604177212, - "grad_norm": 1.9519644975662231, - "learning_rate": 1.6419437340153454e-06, - "loss": 0.2069, - "step": 4902 - }, - { - "epoch": 0.8359538588592589, - "grad_norm": 1.4039374589920044, - "learning_rate": 1.6402387041773232e-06, - "loss": 0.1316, - "step": 4903 - }, - { - "epoch": 0.8361243573007966, - "grad_norm": 1.4302421808242798, - "learning_rate": 1.6385336743393009e-06, - "loss": 0.1208, - "step": 4904 - }, - { - "epoch": 0.8362948557423342, - "grad_norm": 3.204106569290161, - "learning_rate": 1.636828644501279e-06, - "loss": 0.0977, - "step": 4905 - }, - { - "epoch": 0.8364653541838719, - "grad_norm": 0.9173206686973572, - "learning_rate": 1.6351236146632568e-06, - "loss": 0.0584, - "step": 4906 - }, - { - "epoch": 0.8366358526254096, - "grad_norm": 0.9852831363677979, - "learning_rate": 1.6334185848252345e-06, - "loss": 0.1155, - "step": 4907 - }, - { - "epoch": 0.8368063510669472, - "grad_norm": 1.2431775331497192, - "learning_rate": 1.6317135549872123e-06, - "loss": 0.1246, - "step": 4908 - }, - { - "epoch": 0.8369768495084849, - "grad_norm": 1.100155234336853, - "learning_rate": 1.6300085251491902e-06, - "loss": 0.0895, - "step": 4909 - }, - { - "epoch": 0.8371473479500227, - "grad_norm": 1.8383277654647827, - "learning_rate": 1.6283034953111682e-06, - "loss": 0.1726, - "step": 4910 - }, - { - "epoch": 0.8373178463915604, - "grad_norm": 1.2940951585769653, - "learning_rate": 1.6265984654731459e-06, - "loss": 0.1096, - "step": 4911 - }, - { - "epoch": 0.837488344833098, - "grad_norm": 1.2588635683059692, - "learning_rate": 1.6248934356351236e-06, - "loss": 0.0808, - "step": 4912 - }, - { - "epoch": 0.8376588432746357, - "grad_norm": 2.5419719219207764, - "learning_rate": 1.6231884057971014e-06, - "loss": 0.1538, - "step": 4913 - }, - { - "epoch": 0.8378293417161734, - "grad_norm": 1.7912168502807617, - "learning_rate": 1.6214833759590795e-06, - "loss": 0.1429, - "step": 4914 - }, - { - "epoch": 0.837999840157711, - "grad_norm": 1.7084680795669556, - "learning_rate": 1.6197783461210573e-06, - "loss": 0.1547, - "step": 4915 - }, - { - "epoch": 0.8381703385992487, - "grad_norm": 1.299729824066162, - "learning_rate": 1.618073316283035e-06, - "loss": 0.0982, - "step": 4916 - }, - { - "epoch": 0.8383408370407864, - "grad_norm": 1.2041939496994019, - "learning_rate": 1.6163682864450127e-06, - "loss": 0.1329, - "step": 4917 - }, - { - "epoch": 0.8385113354823241, - "grad_norm": 1.9802684783935547, - "learning_rate": 1.6146632566069909e-06, - "loss": 0.1781, - "step": 4918 - }, - { - "epoch": 0.8386818339238618, - "grad_norm": 1.0352423191070557, - "learning_rate": 1.6129582267689686e-06, - "loss": 0.0929, - "step": 4919 - }, - { - "epoch": 0.8388523323653995, - "grad_norm": 1.1618430614471436, - "learning_rate": 1.6112531969309464e-06, - "loss": 0.078, - "step": 4920 - }, - { - "epoch": 0.8390228308069372, - "grad_norm": 1.4600962400436401, - "learning_rate": 1.609548167092924e-06, - "loss": 0.1336, - "step": 4921 - }, - { - "epoch": 0.8391933292484748, - "grad_norm": 0.9181307554244995, - "learning_rate": 1.6078431372549023e-06, - "loss": 0.0863, - "step": 4922 - }, - { - "epoch": 0.8393638276900125, - "grad_norm": 1.2522655725479126, - "learning_rate": 1.60613810741688e-06, - "loss": 0.1183, - "step": 4923 - }, - { - "epoch": 0.8395343261315502, - "grad_norm": 1.0354984998703003, - "learning_rate": 1.6044330775788577e-06, - "loss": 0.0542, - "step": 4924 - }, - { - "epoch": 0.8397048245730879, - "grad_norm": 1.0398370027542114, - "learning_rate": 1.6027280477408355e-06, - "loss": 0.1141, - "step": 4925 - }, - { - "epoch": 0.8398753230146255, - "grad_norm": 0.7802371382713318, - "learning_rate": 1.6010230179028136e-06, - "loss": 0.0647, - "step": 4926 - }, - { - "epoch": 0.8400458214561632, - "grad_norm": 0.9803279042243958, - "learning_rate": 1.5993179880647914e-06, - "loss": 0.0742, - "step": 4927 - }, - { - "epoch": 0.840216319897701, - "grad_norm": 0.9054097533226013, - "learning_rate": 1.597612958226769e-06, - "loss": 0.0417, - "step": 4928 - }, - { - "epoch": 0.8403868183392387, - "grad_norm": 1.2059639692306519, - "learning_rate": 1.5959079283887468e-06, - "loss": 0.1149, - "step": 4929 - }, - { - "epoch": 0.8405573167807763, - "grad_norm": 1.4713133573532104, - "learning_rate": 1.5942028985507246e-06, - "loss": 0.1514, - "step": 4930 - }, - { - "epoch": 0.840727815222314, - "grad_norm": 1.3940950632095337, - "learning_rate": 1.5924978687127027e-06, - "loss": 0.1448, - "step": 4931 - }, - { - "epoch": 0.8408983136638517, - "grad_norm": 1.4991226196289062, - "learning_rate": 1.5907928388746805e-06, - "loss": 0.1628, - "step": 4932 - }, - { - "epoch": 0.8410688121053893, - "grad_norm": 0.8531244993209839, - "learning_rate": 1.5890878090366582e-06, - "loss": 0.089, - "step": 4933 - }, - { - "epoch": 0.841239310546927, - "grad_norm": 1.2540137767791748, - "learning_rate": 1.587382779198636e-06, - "loss": 0.0978, - "step": 4934 - }, - { - "epoch": 0.8414098089884647, - "grad_norm": 1.934168815612793, - "learning_rate": 1.585677749360614e-06, - "loss": 0.1578, - "step": 4935 - }, - { - "epoch": 0.8415803074300024, - "grad_norm": 1.7683275938034058, - "learning_rate": 1.5839727195225918e-06, - "loss": 0.1453, - "step": 4936 - }, - { - "epoch": 0.84175080587154, - "grad_norm": 1.2101832628250122, - "learning_rate": 1.5822676896845696e-06, - "loss": 0.1185, - "step": 4937 - }, - { - "epoch": 0.8419213043130778, - "grad_norm": 1.5000008344650269, - "learning_rate": 1.5805626598465473e-06, - "loss": 0.1189, - "step": 4938 - }, - { - "epoch": 0.8420918027546155, - "grad_norm": 1.8698530197143555, - "learning_rate": 1.5788576300085255e-06, - "loss": 0.1658, - "step": 4939 - }, - { - "epoch": 0.8422623011961531, - "grad_norm": 1.5301642417907715, - "learning_rate": 1.5771526001705032e-06, - "loss": 0.1542, - "step": 4940 - }, - { - "epoch": 0.8424327996376908, - "grad_norm": 1.2595562934875488, - "learning_rate": 1.575447570332481e-06, - "loss": 0.1349, - "step": 4941 - }, - { - "epoch": 0.8426032980792285, - "grad_norm": 1.5561221837997437, - "learning_rate": 1.5737425404944587e-06, - "loss": 0.1042, - "step": 4942 - }, - { - "epoch": 0.8427737965207662, - "grad_norm": 1.0200626850128174, - "learning_rate": 1.5720375106564368e-06, - "loss": 0.0825, - "step": 4943 - }, - { - "epoch": 0.8429442949623038, - "grad_norm": 1.5128827095031738, - "learning_rate": 1.5703324808184146e-06, - "loss": 0.1609, - "step": 4944 - }, - { - "epoch": 0.8431147934038415, - "grad_norm": 1.8137482404708862, - "learning_rate": 1.5686274509803923e-06, - "loss": 0.2222, - "step": 4945 - }, - { - "epoch": 0.8432852918453793, - "grad_norm": 1.5162135362625122, - "learning_rate": 1.56692242114237e-06, - "loss": 0.128, - "step": 4946 - }, - { - "epoch": 0.8434557902869169, - "grad_norm": 1.5846889019012451, - "learning_rate": 1.565217391304348e-06, - "loss": 0.1441, - "step": 4947 - }, - { - "epoch": 0.8436262887284546, - "grad_norm": 1.188979983329773, - "learning_rate": 1.563512361466326e-06, - "loss": 0.0862, - "step": 4948 - }, - { - "epoch": 0.8437967871699923, - "grad_norm": 1.2255123853683472, - "learning_rate": 1.5618073316283037e-06, - "loss": 0.1131, - "step": 4949 - }, - { - "epoch": 0.84396728561153, - "grad_norm": 0.8165611028671265, - "learning_rate": 1.5601023017902814e-06, - "loss": 0.069, - "step": 4950 - }, - { - "epoch": 0.8441377840530676, - "grad_norm": 1.3417201042175293, - "learning_rate": 1.5583972719522594e-06, - "loss": 0.139, - "step": 4951 - }, - { - "epoch": 0.8443082824946053, - "grad_norm": 0.9354560375213623, - "learning_rate": 1.5566922421142373e-06, - "loss": 0.0778, - "step": 4952 - }, - { - "epoch": 0.844478780936143, - "grad_norm": 1.1510093212127686, - "learning_rate": 1.554987212276215e-06, - "loss": 0.1106, - "step": 4953 - }, - { - "epoch": 0.8446492793776806, - "grad_norm": 1.5288021564483643, - "learning_rate": 1.5532821824381928e-06, - "loss": 0.1585, - "step": 4954 - }, - { - "epoch": 0.8448197778192184, - "grad_norm": 0.8060934543609619, - "learning_rate": 1.5515771526001705e-06, - "loss": 0.0384, - "step": 4955 - }, - { - "epoch": 0.8449902762607561, - "grad_norm": 1.1227192878723145, - "learning_rate": 1.5498721227621485e-06, - "loss": 0.0937, - "step": 4956 - }, - { - "epoch": 0.8451607747022938, - "grad_norm": 1.2658599615097046, - "learning_rate": 1.5481670929241264e-06, - "loss": 0.1081, - "step": 4957 - }, - { - "epoch": 0.8453312731438314, - "grad_norm": 1.8658208847045898, - "learning_rate": 1.5464620630861041e-06, - "loss": 0.1758, - "step": 4958 - }, - { - "epoch": 0.8455017715853691, - "grad_norm": 1.0130605697631836, - "learning_rate": 1.5447570332480819e-06, - "loss": 0.0907, - "step": 4959 - }, - { - "epoch": 0.8456722700269068, - "grad_norm": 1.6483023166656494, - "learning_rate": 1.5430520034100598e-06, - "loss": 0.0851, - "step": 4960 - }, - { - "epoch": 0.8458427684684444, - "grad_norm": 1.4492061138153076, - "learning_rate": 1.5413469735720376e-06, - "loss": 0.1319, - "step": 4961 - }, - { - "epoch": 0.8460132669099821, - "grad_norm": 1.1907610893249512, - "learning_rate": 1.5396419437340155e-06, - "loss": 0.115, - "step": 4962 - }, - { - "epoch": 0.8461837653515198, - "grad_norm": 1.2362143993377686, - "learning_rate": 1.5379369138959932e-06, - "loss": 0.1142, - "step": 4963 - }, - { - "epoch": 0.8463542637930576, - "grad_norm": 1.0403637886047363, - "learning_rate": 1.5362318840579712e-06, - "loss": 0.0966, - "step": 4964 - }, - { - "epoch": 0.8465247622345952, - "grad_norm": 1.4426369667053223, - "learning_rate": 1.534526854219949e-06, - "loss": 0.1402, - "step": 4965 - }, - { - "epoch": 0.8466952606761329, - "grad_norm": 1.1102651357650757, - "learning_rate": 1.5328218243819269e-06, - "loss": 0.1067, - "step": 4966 - }, - { - "epoch": 0.8468657591176706, - "grad_norm": 1.952067255973816, - "learning_rate": 1.5311167945439046e-06, - "loss": 0.1652, - "step": 4967 - }, - { - "epoch": 0.8470362575592082, - "grad_norm": 0.5843350291252136, - "learning_rate": 1.5294117647058826e-06, - "loss": 0.0278, - "step": 4968 - }, - { - "epoch": 0.8472067560007459, - "grad_norm": 1.8096089363098145, - "learning_rate": 1.5277067348678603e-06, - "loss": 0.2164, - "step": 4969 - }, - { - "epoch": 0.8473772544422836, - "grad_norm": 1.3740298748016357, - "learning_rate": 1.526001705029838e-06, - "loss": 0.1416, - "step": 4970 - }, - { - "epoch": 0.8475477528838213, - "grad_norm": 1.931396245956421, - "learning_rate": 1.524296675191816e-06, - "loss": 0.1531, - "step": 4971 - }, - { - "epoch": 0.8477182513253589, - "grad_norm": 1.6420286893844604, - "learning_rate": 1.522591645353794e-06, - "loss": 0.2084, - "step": 4972 - }, - { - "epoch": 0.8478887497668967, - "grad_norm": 0.8120304942131042, - "learning_rate": 1.5208866155157717e-06, - "loss": 0.0732, - "step": 4973 - }, - { - "epoch": 0.8480592482084344, - "grad_norm": 1.8183575868606567, - "learning_rate": 1.5191815856777494e-06, - "loss": 0.1322, - "step": 4974 - }, - { - "epoch": 0.8482297466499721, - "grad_norm": 1.8287473917007446, - "learning_rate": 1.5174765558397273e-06, - "loss": 0.1104, - "step": 4975 - }, - { - "epoch": 0.8484002450915097, - "grad_norm": 1.7065812349319458, - "learning_rate": 1.515771526001705e-06, - "loss": 0.0899, - "step": 4976 - }, - { - "epoch": 0.8485707435330474, - "grad_norm": 2.2731716632843018, - "learning_rate": 1.514066496163683e-06, - "loss": 0.1503, - "step": 4977 - }, - { - "epoch": 0.8487412419745851, - "grad_norm": 1.0893604755401611, - "learning_rate": 1.5123614663256608e-06, - "loss": 0.0547, - "step": 4978 - }, - { - "epoch": 0.8489117404161227, - "grad_norm": 1.4843307733535767, - "learning_rate": 1.5106564364876385e-06, - "loss": 0.0802, - "step": 4979 - }, - { - "epoch": 0.8490822388576604, - "grad_norm": 1.09200119972229, - "learning_rate": 1.5089514066496164e-06, - "loss": 0.0775, - "step": 4980 - }, - { - "epoch": 0.8492527372991981, - "grad_norm": 1.0041292905807495, - "learning_rate": 1.5072463768115944e-06, - "loss": 0.0594, - "step": 4981 - }, - { - "epoch": 0.8494232357407359, - "grad_norm": 1.7999296188354492, - "learning_rate": 1.5055413469735721e-06, - "loss": 0.1413, - "step": 4982 - }, - { - "epoch": 0.8495937341822735, - "grad_norm": 2.3291573524475098, - "learning_rate": 1.5038363171355499e-06, - "loss": 0.1984, - "step": 4983 - }, - { - "epoch": 0.8497642326238112, - "grad_norm": 0.9111186265945435, - "learning_rate": 1.5021312872975278e-06, - "loss": 0.0399, - "step": 4984 - }, - { - "epoch": 0.8499347310653489, - "grad_norm": 1.2384296655654907, - "learning_rate": 1.5004262574595058e-06, - "loss": 0.085, - "step": 4985 - }, - { - "epoch": 0.8501052295068865, - "grad_norm": 1.2116646766662598, - "learning_rate": 1.4987212276214835e-06, - "loss": 0.0481, - "step": 4986 - }, - { - "epoch": 0.8502757279484242, - "grad_norm": 1.6571450233459473, - "learning_rate": 1.4970161977834612e-06, - "loss": 0.2027, - "step": 4987 - }, - { - "epoch": 0.8504462263899619, - "grad_norm": 3.2682974338531494, - "learning_rate": 1.495311167945439e-06, - "loss": 0.339, - "step": 4988 - }, - { - "epoch": 0.8506167248314996, - "grad_norm": 1.6870797872543335, - "learning_rate": 1.4936061381074171e-06, - "loss": 0.1268, - "step": 4989 - }, - { - "epoch": 0.8507872232730372, - "grad_norm": 1.3081997632980347, - "learning_rate": 1.4919011082693949e-06, - "loss": 0.0856, - "step": 4990 - }, - { - "epoch": 0.850957721714575, - "grad_norm": 1.368513584136963, - "learning_rate": 1.4901960784313726e-06, - "loss": 0.1201, - "step": 4991 - }, - { - "epoch": 0.8511282201561127, - "grad_norm": 1.4982609748840332, - "learning_rate": 1.4884910485933503e-06, - "loss": 0.1889, - "step": 4992 - }, - { - "epoch": 0.8512987185976503, - "grad_norm": 1.5080363750457764, - "learning_rate": 1.4867860187553285e-06, - "loss": 0.1494, - "step": 4993 - }, - { - "epoch": 0.851469217039188, - "grad_norm": 1.3966141939163208, - "learning_rate": 1.4850809889173062e-06, - "loss": 0.1247, - "step": 4994 - }, - { - "epoch": 0.8516397154807257, - "grad_norm": 1.3656100034713745, - "learning_rate": 1.483375959079284e-06, - "loss": 0.1148, - "step": 4995 - }, - { - "epoch": 0.8518102139222634, - "grad_norm": 1.749886393547058, - "learning_rate": 1.4816709292412617e-06, - "loss": 0.158, - "step": 4996 - }, - { - "epoch": 0.851980712363801, - "grad_norm": 0.9646751284599304, - "learning_rate": 1.4799658994032399e-06, - "loss": 0.0809, - "step": 4997 - }, - { - "epoch": 0.8521512108053387, - "grad_norm": 1.7525877952575684, - "learning_rate": 1.4782608695652176e-06, - "loss": 0.0756, - "step": 4998 - }, - { - "epoch": 0.8523217092468764, - "grad_norm": 1.801682472229004, - "learning_rate": 1.4765558397271953e-06, - "loss": 0.1975, - "step": 4999 - }, - { - "epoch": 0.852492207688414, - "grad_norm": 1.4232743978500366, - "learning_rate": 1.474850809889173e-06, - "loss": 0.0964, - "step": 5000 - }, - { - "epoch": 0.852492207688414, - "eval_f1_score": 0.40703517587939697, - "eval_loss": 0.1314370483160019, - "eval_runtime": 183.3181, - "eval_samples_per_second": 54.55, - "eval_steps_per_second": 3.409, - "step": 5000 - } - ], - "logging_steps": 1, - "max_steps": 5865, - "num_input_tokens_seen": 0, - "num_train_epochs": 1, - "save_steps": 500, - "stateful_callbacks": { - "TrainerControl": { - "args": { - "should_epoch_stop": false, - "should_evaluate": false, - "should_log": false, - "should_save": true, - "should_training_stop": false - }, - "attributes": {} - } - }, - "total_flos": 8.9352014266368e+17, - "train_batch_size": 8, - "trial_name": null, - "trial_params": null -}