{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.2711810543175663, "eval_steps": 500, "global_step": 200000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0006355905271587832, "grad_norm": 3.609375, "learning_rate": 2.0000000000000002e-07, "loss": 2.3296, "step": 100 }, { "epoch": 0.0012711810543175664, "grad_norm": 3.484375, "learning_rate": 4.0000000000000003e-07, "loss": 2.3266, "step": 200 }, { "epoch": 0.0019067715814763497, "grad_norm": 3.625, "learning_rate": 6.000000000000001e-07, "loss": 2.3437, "step": 300 }, { "epoch": 0.002542362108635133, "grad_norm": 3.640625, "learning_rate": 8.000000000000001e-07, "loss": 2.3306, "step": 400 }, { "epoch": 0.003177952635793916, "grad_norm": 3.25, "learning_rate": 1.0000000000000002e-06, "loss": 2.3232, "step": 500 }, { "epoch": 0.0038135431629526995, "grad_norm": 4.15625, "learning_rate": 1.2000000000000002e-06, "loss": 2.3369, "step": 600 }, { "epoch": 0.004449133690111483, "grad_norm": 4.125, "learning_rate": 1.4000000000000001e-06, "loss": 2.3436, "step": 700 }, { "epoch": 0.005084724217270266, "grad_norm": 2.6875, "learning_rate": 1.6000000000000001e-06, "loss": 2.3334, "step": 800 }, { "epoch": 0.0057203147444290494, "grad_norm": 3.484375, "learning_rate": 1.8000000000000001e-06, "loss": 2.3337, "step": 900 }, { "epoch": 0.006355905271587832, "grad_norm": 4.65625, "learning_rate": 2.0000000000000003e-06, "loss": 2.3052, "step": 1000 }, { "epoch": 0.006991495798746615, "grad_norm": 3.453125, "learning_rate": 2.2e-06, "loss": 2.3237, "step": 1100 }, { "epoch": 0.007627086325905399, "grad_norm": 3.390625, "learning_rate": 2.4000000000000003e-06, "loss": 2.3015, "step": 1200 }, { "epoch": 0.008262676853064182, "grad_norm": 2.8125, "learning_rate": 2.6e-06, "loss": 2.3166, "step": 1300 }, { "epoch": 0.008898267380222966, "grad_norm": 3.484375, "learning_rate": 2.8000000000000003e-06, "loss": 2.2848, "step": 1400 }, { "epoch": 0.009533857907381748, "grad_norm": 2.453125, "learning_rate": 3e-06, "loss": 2.2821, "step": 1500 }, { "epoch": 0.010169448434540531, "grad_norm": 2.96875, "learning_rate": 3.2000000000000003e-06, "loss": 2.2781, "step": 1600 }, { "epoch": 0.010805038961699315, "grad_norm": 3.0, "learning_rate": 3.4000000000000005e-06, "loss": 2.2595, "step": 1700 }, { "epoch": 0.011440629488858099, "grad_norm": 2.28125, "learning_rate": 3.6000000000000003e-06, "loss": 2.2409, "step": 1800 }, { "epoch": 0.012076220016016881, "grad_norm": 2.9375, "learning_rate": 3.8000000000000005e-06, "loss": 2.2393, "step": 1900 }, { "epoch": 0.012711810543175665, "grad_norm": 2.34375, "learning_rate": 4.000000000000001e-06, "loss": 2.2361, "step": 2000 }, { "epoch": 0.013347401070334448, "grad_norm": 2.234375, "learning_rate": 4.2000000000000004e-06, "loss": 2.2238, "step": 2100 }, { "epoch": 0.01398299159749323, "grad_norm": 2.5625, "learning_rate": 4.4e-06, "loss": 2.2031, "step": 2200 }, { "epoch": 0.014618582124652014, "grad_norm": 2.65625, "learning_rate": 4.600000000000001e-06, "loss": 2.1762, "step": 2300 }, { "epoch": 0.015254172651810798, "grad_norm": 2.484375, "learning_rate": 4.800000000000001e-06, "loss": 2.1772, "step": 2400 }, { "epoch": 0.01588976317896958, "grad_norm": 2.265625, "learning_rate": 5e-06, "loss": 2.147, "step": 2500 }, { "epoch": 0.016525353706128364, "grad_norm": 2.390625, "learning_rate": 5.2e-06, "loss": 2.1295, "step": 2600 }, { "epoch": 0.017160944233287147, "grad_norm": 2.28125, "learning_rate": 5.400000000000001e-06, "loss": 2.0829, "step": 2700 }, { "epoch": 0.01779653476044593, "grad_norm": 2.734375, "learning_rate": 5.600000000000001e-06, "loss": 2.082, "step": 2800 }, { "epoch": 0.018432125287604715, "grad_norm": 2.78125, "learning_rate": 5.8e-06, "loss": 2.02, "step": 2900 }, { "epoch": 0.019067715814763495, "grad_norm": 3.53125, "learning_rate": 6e-06, "loss": 2.003, "step": 3000 }, { "epoch": 0.01970330634192228, "grad_norm": 2.203125, "learning_rate": 6.200000000000001e-06, "loss": 1.9711, "step": 3100 }, { "epoch": 0.020338896869081063, "grad_norm": 2.328125, "learning_rate": 6.4000000000000006e-06, "loss": 1.9499, "step": 3200 }, { "epoch": 0.020974487396239846, "grad_norm": 1.703125, "learning_rate": 6.600000000000001e-06, "loss": 1.9166, "step": 3300 }, { "epoch": 0.02161007792339863, "grad_norm": 1.5703125, "learning_rate": 6.800000000000001e-06, "loss": 1.9366, "step": 3400 }, { "epoch": 0.022245668450557414, "grad_norm": 1.015625, "learning_rate": 7e-06, "loss": 1.9282, "step": 3500 }, { "epoch": 0.022881258977716198, "grad_norm": 1.265625, "learning_rate": 7.2000000000000005e-06, "loss": 1.9043, "step": 3600 }, { "epoch": 0.023516849504874978, "grad_norm": 1.421875, "learning_rate": 7.4e-06, "loss": 1.8715, "step": 3700 }, { "epoch": 0.024152440032033762, "grad_norm": 1.0234375, "learning_rate": 7.600000000000001e-06, "loss": 1.8862, "step": 3800 }, { "epoch": 0.024788030559192546, "grad_norm": 1.078125, "learning_rate": 7.800000000000002e-06, "loss": 1.8584, "step": 3900 }, { "epoch": 0.02542362108635133, "grad_norm": 0.83203125, "learning_rate": 8.000000000000001e-06, "loss": 1.8515, "step": 4000 }, { "epoch": 0.026059211613510113, "grad_norm": 0.8671875, "learning_rate": 8.2e-06, "loss": 1.8317, "step": 4100 }, { "epoch": 0.026694802140668897, "grad_norm": 1.0390625, "learning_rate": 8.400000000000001e-06, "loss": 1.8488, "step": 4200 }, { "epoch": 0.027330392667827677, "grad_norm": 0.84765625, "learning_rate": 8.6e-06, "loss": 1.8256, "step": 4300 }, { "epoch": 0.02796598319498646, "grad_norm": 0.81640625, "learning_rate": 8.8e-06, "loss": 1.8079, "step": 4400 }, { "epoch": 0.028601573722145245, "grad_norm": 0.7109375, "learning_rate": 9e-06, "loss": 1.8301, "step": 4500 }, { "epoch": 0.02923716424930403, "grad_norm": 0.80078125, "learning_rate": 9.200000000000002e-06, "loss": 1.8207, "step": 4600 }, { "epoch": 0.029872754776462812, "grad_norm": 0.73046875, "learning_rate": 9.4e-06, "loss": 1.8153, "step": 4700 }, { "epoch": 0.030508345303621596, "grad_norm": 1.234375, "learning_rate": 9.600000000000001e-06, "loss": 1.7974, "step": 4800 }, { "epoch": 0.03114393583078038, "grad_norm": 1.03125, "learning_rate": 9.800000000000001e-06, "loss": 1.7669, "step": 4900 }, { "epoch": 0.03177952635793916, "grad_norm": 0.94921875, "learning_rate": 1e-05, "loss": 1.7712, "step": 5000 }, { "epoch": 0.03241511688509795, "grad_norm": 0.5546875, "learning_rate": 9.994871794871795e-06, "loss": 1.7679, "step": 5100 }, { "epoch": 0.03305070741225673, "grad_norm": 0.68359375, "learning_rate": 9.98974358974359e-06, "loss": 1.7683, "step": 5200 }, { "epoch": 0.03368629793941551, "grad_norm": 1.734375, "learning_rate": 9.984615384615386e-06, "loss": 1.777, "step": 5300 }, { "epoch": 0.034321888466574295, "grad_norm": 0.89453125, "learning_rate": 9.97948717948718e-06, "loss": 1.7545, "step": 5400 }, { "epoch": 0.034957478993733075, "grad_norm": 0.4765625, "learning_rate": 9.974358974358974e-06, "loss": 1.7718, "step": 5500 }, { "epoch": 0.03559306952089186, "grad_norm": 0.76953125, "learning_rate": 9.96923076923077e-06, "loss": 1.7687, "step": 5600 }, { "epoch": 0.03622866004805064, "grad_norm": 0.77734375, "learning_rate": 9.964102564102564e-06, "loss": 1.7664, "step": 5700 }, { "epoch": 0.03686425057520943, "grad_norm": 0.7265625, "learning_rate": 9.95897435897436e-06, "loss": 1.768, "step": 5800 }, { "epoch": 0.03749984110236821, "grad_norm": 1.359375, "learning_rate": 9.953846153846156e-06, "loss": 1.7759, "step": 5900 }, { "epoch": 0.03813543162952699, "grad_norm": 0.83984375, "learning_rate": 9.94871794871795e-06, "loss": 1.7379, "step": 6000 }, { "epoch": 0.03877102215668578, "grad_norm": 0.62109375, "learning_rate": 9.943589743589744e-06, "loss": 1.7611, "step": 6100 }, { "epoch": 0.03940661268384456, "grad_norm": 0.53515625, "learning_rate": 9.93846153846154e-06, "loss": 1.7453, "step": 6200 }, { "epoch": 0.040042203211003345, "grad_norm": 0.51953125, "learning_rate": 9.933333333333334e-06, "loss": 1.7493, "step": 6300 }, { "epoch": 0.040677793738162125, "grad_norm": 0.7265625, "learning_rate": 9.92820512820513e-06, "loss": 1.7308, "step": 6400 }, { "epoch": 0.04131338426532091, "grad_norm": 0.5078125, "learning_rate": 9.923076923076923e-06, "loss": 1.7557, "step": 6500 }, { "epoch": 0.04194897479247969, "grad_norm": 0.76171875, "learning_rate": 9.91794871794872e-06, "loss": 1.7244, "step": 6600 }, { "epoch": 0.04258456531963847, "grad_norm": 0.93359375, "learning_rate": 9.912820512820513e-06, "loss": 1.7365, "step": 6700 }, { "epoch": 0.04322015584679726, "grad_norm": 0.6796875, "learning_rate": 9.907692307692309e-06, "loss": 1.7416, "step": 6800 }, { "epoch": 0.04385574637395604, "grad_norm": 0.8671875, "learning_rate": 9.902564102564103e-06, "loss": 1.744, "step": 6900 }, { "epoch": 0.04449133690111483, "grad_norm": 0.40234375, "learning_rate": 9.897435897435899e-06, "loss": 1.7343, "step": 7000 }, { "epoch": 0.04512692742827361, "grad_norm": 0.640625, "learning_rate": 9.892307692307693e-06, "loss": 1.7476, "step": 7100 }, { "epoch": 0.045762517955432395, "grad_norm": 0.78515625, "learning_rate": 9.887179487179489e-06, "loss": 1.7382, "step": 7200 }, { "epoch": 0.046398108482591176, "grad_norm": 0.64453125, "learning_rate": 9.882051282051283e-06, "loss": 1.7376, "step": 7300 }, { "epoch": 0.047033699009749956, "grad_norm": 0.546875, "learning_rate": 9.876923076923077e-06, "loss": 1.73, "step": 7400 }, { "epoch": 0.04766928953690874, "grad_norm": 0.875, "learning_rate": 9.871794871794872e-06, "loss": 1.7139, "step": 7500 }, { "epoch": 0.048304880064067524, "grad_norm": 0.8125, "learning_rate": 9.866666666666668e-06, "loss": 1.7271, "step": 7600 }, { "epoch": 0.04894047059122631, "grad_norm": 0.8125, "learning_rate": 9.861538461538462e-06, "loss": 1.728, "step": 7700 }, { "epoch": 0.04957606111838509, "grad_norm": 1.078125, "learning_rate": 9.856410256410256e-06, "loss": 1.7238, "step": 7800 }, { "epoch": 0.05021165164554388, "grad_norm": 0.703125, "learning_rate": 9.851282051282052e-06, "loss": 1.7245, "step": 7900 }, { "epoch": 0.05084724217270266, "grad_norm": 0.6484375, "learning_rate": 9.846153846153848e-06, "loss": 1.7213, "step": 8000 }, { "epoch": 0.05148283269986144, "grad_norm": 1.046875, "learning_rate": 9.841025641025642e-06, "loss": 1.7318, "step": 8100 }, { "epoch": 0.052118423227020226, "grad_norm": 0.4375, "learning_rate": 9.835897435897438e-06, "loss": 1.7245, "step": 8200 }, { "epoch": 0.052754013754179006, "grad_norm": 0.6015625, "learning_rate": 9.830769230769232e-06, "loss": 1.7085, "step": 8300 }, { "epoch": 0.053389604281337794, "grad_norm": 0.5625, "learning_rate": 9.825641025641026e-06, "loss": 1.714, "step": 8400 }, { "epoch": 0.054025194808496574, "grad_norm": 0.56640625, "learning_rate": 9.820512820512821e-06, "loss": 1.7158, "step": 8500 }, { "epoch": 0.054660785335655354, "grad_norm": 0.6796875, "learning_rate": 9.815384615384617e-06, "loss": 1.7205, "step": 8600 }, { "epoch": 0.05529637586281414, "grad_norm": 0.85546875, "learning_rate": 9.810256410256411e-06, "loss": 1.72, "step": 8700 }, { "epoch": 0.05593196638997292, "grad_norm": 1.0546875, "learning_rate": 9.805128205128205e-06, "loss": 1.7039, "step": 8800 }, { "epoch": 0.05656755691713171, "grad_norm": 0.8671875, "learning_rate": 9.800000000000001e-06, "loss": 1.7238, "step": 8900 }, { "epoch": 0.05720314744429049, "grad_norm": 0.78125, "learning_rate": 9.794871794871795e-06, "loss": 1.7165, "step": 9000 }, { "epoch": 0.057838737971449276, "grad_norm": 0.65625, "learning_rate": 9.78974358974359e-06, "loss": 1.7248, "step": 9100 }, { "epoch": 0.05847432849860806, "grad_norm": 0.6328125, "learning_rate": 9.784615384615387e-06, "loss": 1.7327, "step": 9200 }, { "epoch": 0.05910991902576684, "grad_norm": 0.5546875, "learning_rate": 9.77948717948718e-06, "loss": 1.726, "step": 9300 }, { "epoch": 0.059745509552925624, "grad_norm": 0.78125, "learning_rate": 9.774358974358975e-06, "loss": 1.7145, "step": 9400 }, { "epoch": 0.060381100080084404, "grad_norm": 0.65234375, "learning_rate": 9.76923076923077e-06, "loss": 1.7061, "step": 9500 }, { "epoch": 0.06101669060724319, "grad_norm": 0.703125, "learning_rate": 9.764102564102564e-06, "loss": 1.7186, "step": 9600 }, { "epoch": 0.06165228113440197, "grad_norm": 0.8203125, "learning_rate": 9.75897435897436e-06, "loss": 1.7153, "step": 9700 }, { "epoch": 0.06228787166156076, "grad_norm": 1.0859375, "learning_rate": 9.753846153846154e-06, "loss": 1.7038, "step": 9800 }, { "epoch": 0.06292346218871954, "grad_norm": 0.8984375, "learning_rate": 9.74871794871795e-06, "loss": 1.7135, "step": 9900 }, { "epoch": 0.06355905271587832, "grad_norm": 0.6328125, "learning_rate": 9.743589743589744e-06, "loss": 1.6987, "step": 10000 }, { "epoch": 0.0641946432430371, "grad_norm": 0.6875, "learning_rate": 9.738461538461538e-06, "loss": 1.6867, "step": 10100 }, { "epoch": 0.0648302337701959, "grad_norm": 0.75390625, "learning_rate": 9.733333333333334e-06, "loss": 1.7, "step": 10200 }, { "epoch": 0.06546582429735467, "grad_norm": 0.74609375, "learning_rate": 9.72820512820513e-06, "loss": 1.7057, "step": 10300 }, { "epoch": 0.06610141482451345, "grad_norm": 0.65234375, "learning_rate": 9.723076923076924e-06, "loss": 1.7206, "step": 10400 }, { "epoch": 0.06673700535167224, "grad_norm": 0.7734375, "learning_rate": 9.71794871794872e-06, "loss": 1.694, "step": 10500 }, { "epoch": 0.06737259587883102, "grad_norm": 0.49609375, "learning_rate": 9.712820512820513e-06, "loss": 1.7058, "step": 10600 }, { "epoch": 0.06800818640598981, "grad_norm": 0.66015625, "learning_rate": 9.707692307692308e-06, "loss": 1.7093, "step": 10700 }, { "epoch": 0.06864377693314859, "grad_norm": 1.046875, "learning_rate": 9.702564102564103e-06, "loss": 1.6968, "step": 10800 }, { "epoch": 0.06927936746030737, "grad_norm": 0.5390625, "learning_rate": 9.697435897435899e-06, "loss": 1.6991, "step": 10900 }, { "epoch": 0.06991495798746615, "grad_norm": 0.92578125, "learning_rate": 9.692307692307693e-06, "loss": 1.7002, "step": 11000 }, { "epoch": 0.07055054851462494, "grad_norm": 0.7421875, "learning_rate": 9.687179487179487e-06, "loss": 1.7096, "step": 11100 }, { "epoch": 0.07118613904178372, "grad_norm": 0.703125, "learning_rate": 9.682051282051283e-06, "loss": 1.6926, "step": 11200 }, { "epoch": 0.0718217295689425, "grad_norm": 0.59765625, "learning_rate": 9.676923076923079e-06, "loss": 1.6882, "step": 11300 }, { "epoch": 0.07245732009610129, "grad_norm": 0.62109375, "learning_rate": 9.671794871794873e-06, "loss": 1.6937, "step": 11400 }, { "epoch": 0.07309291062326007, "grad_norm": 1.2578125, "learning_rate": 9.666666666666667e-06, "loss": 1.7164, "step": 11500 }, { "epoch": 0.07372850115041886, "grad_norm": 0.70703125, "learning_rate": 9.661538461538462e-06, "loss": 1.6886, "step": 11600 }, { "epoch": 0.07436409167757764, "grad_norm": 0.5546875, "learning_rate": 9.656410256410257e-06, "loss": 1.7014, "step": 11700 }, { "epoch": 0.07499968220473642, "grad_norm": 0.66015625, "learning_rate": 9.651282051282052e-06, "loss": 1.7044, "step": 11800 }, { "epoch": 0.0756352727318952, "grad_norm": 0.8359375, "learning_rate": 9.646153846153848e-06, "loss": 1.7096, "step": 11900 }, { "epoch": 0.07627086325905398, "grad_norm": 0.72265625, "learning_rate": 9.641025641025642e-06, "loss": 1.7007, "step": 12000 }, { "epoch": 0.07690645378621278, "grad_norm": 0.93359375, "learning_rate": 9.635897435897436e-06, "loss": 1.6985, "step": 12100 }, { "epoch": 0.07754204431337156, "grad_norm": 1.0703125, "learning_rate": 9.630769230769232e-06, "loss": 1.6963, "step": 12200 }, { "epoch": 0.07817763484053034, "grad_norm": 0.6015625, "learning_rate": 9.625641025641026e-06, "loss": 1.6804, "step": 12300 }, { "epoch": 0.07881322536768912, "grad_norm": 0.9609375, "learning_rate": 9.620512820512822e-06, "loss": 1.6936, "step": 12400 }, { "epoch": 0.07944881589484791, "grad_norm": 0.90625, "learning_rate": 9.615384615384616e-06, "loss": 1.697, "step": 12500 }, { "epoch": 0.08008440642200669, "grad_norm": 0.6796875, "learning_rate": 9.610256410256411e-06, "loss": 1.6945, "step": 12600 }, { "epoch": 0.08071999694916547, "grad_norm": 0.6015625, "learning_rate": 9.605128205128206e-06, "loss": 1.6923, "step": 12700 }, { "epoch": 0.08135558747632425, "grad_norm": 0.5546875, "learning_rate": 9.600000000000001e-06, "loss": 1.6909, "step": 12800 }, { "epoch": 0.08199117800348303, "grad_norm": 0.64453125, "learning_rate": 9.594871794871797e-06, "loss": 1.688, "step": 12900 }, { "epoch": 0.08262676853064183, "grad_norm": 0.66796875, "learning_rate": 9.589743589743591e-06, "loss": 1.6987, "step": 13000 }, { "epoch": 0.0832623590578006, "grad_norm": 0.6015625, "learning_rate": 9.584615384615385e-06, "loss": 1.6974, "step": 13100 }, { "epoch": 0.08389794958495939, "grad_norm": 0.6640625, "learning_rate": 9.579487179487181e-06, "loss": 1.6831, "step": 13200 }, { "epoch": 0.08453354011211817, "grad_norm": 0.40625, "learning_rate": 9.574358974358975e-06, "loss": 1.6862, "step": 13300 }, { "epoch": 0.08516913063927695, "grad_norm": 0.53515625, "learning_rate": 9.569230769230769e-06, "loss": 1.6805, "step": 13400 }, { "epoch": 0.08580472116643574, "grad_norm": 0.73046875, "learning_rate": 9.564102564102565e-06, "loss": 1.6963, "step": 13500 }, { "epoch": 0.08644031169359452, "grad_norm": 0.78515625, "learning_rate": 9.55897435897436e-06, "loss": 1.6835, "step": 13600 }, { "epoch": 0.0870759022207533, "grad_norm": 0.55078125, "learning_rate": 9.553846153846155e-06, "loss": 1.6947, "step": 13700 }, { "epoch": 0.08771149274791208, "grad_norm": 0.890625, "learning_rate": 9.548717948717949e-06, "loss": 1.6866, "step": 13800 }, { "epoch": 0.08834708327507086, "grad_norm": 0.625, "learning_rate": 9.543589743589744e-06, "loss": 1.6842, "step": 13900 }, { "epoch": 0.08898267380222966, "grad_norm": 0.5078125, "learning_rate": 9.53846153846154e-06, "loss": 1.6714, "step": 14000 }, { "epoch": 0.08961826432938844, "grad_norm": 0.5859375, "learning_rate": 9.533333333333334e-06, "loss": 1.679, "step": 14100 }, { "epoch": 0.09025385485654722, "grad_norm": 0.78125, "learning_rate": 9.52820512820513e-06, "loss": 1.6923, "step": 14200 }, { "epoch": 0.090889445383706, "grad_norm": 0.51953125, "learning_rate": 9.523076923076924e-06, "loss": 1.6745, "step": 14300 }, { "epoch": 0.09152503591086479, "grad_norm": 0.76953125, "learning_rate": 9.517948717948718e-06, "loss": 1.6871, "step": 14400 }, { "epoch": 0.09216062643802357, "grad_norm": 0.71875, "learning_rate": 9.512820512820514e-06, "loss": 1.6795, "step": 14500 }, { "epoch": 0.09279621696518235, "grad_norm": 1.140625, "learning_rate": 9.50769230769231e-06, "loss": 1.6779, "step": 14600 }, { "epoch": 0.09343180749234113, "grad_norm": 1.0546875, "learning_rate": 9.502564102564103e-06, "loss": 1.6846, "step": 14700 }, { "epoch": 0.09406739801949991, "grad_norm": 1.046875, "learning_rate": 9.497435897435898e-06, "loss": 1.6762, "step": 14800 }, { "epoch": 0.0947029885466587, "grad_norm": 0.7421875, "learning_rate": 9.492307692307693e-06, "loss": 1.6929, "step": 14900 }, { "epoch": 0.09533857907381749, "grad_norm": 0.734375, "learning_rate": 9.487179487179487e-06, "loss": 1.6791, "step": 15000 }, { "epoch": 0.09597416960097627, "grad_norm": 0.78515625, "learning_rate": 9.482051282051283e-06, "loss": 1.677, "step": 15100 }, { "epoch": 0.09660976012813505, "grad_norm": 0.83984375, "learning_rate": 9.476923076923079e-06, "loss": 1.6817, "step": 15200 }, { "epoch": 0.09724535065529383, "grad_norm": 0.7265625, "learning_rate": 9.471794871794873e-06, "loss": 1.6632, "step": 15300 }, { "epoch": 0.09788094118245262, "grad_norm": 0.55859375, "learning_rate": 9.466666666666667e-06, "loss": 1.681, "step": 15400 }, { "epoch": 0.0985165317096114, "grad_norm": 0.6484375, "learning_rate": 9.461538461538463e-06, "loss": 1.681, "step": 15500 }, { "epoch": 0.09915212223677018, "grad_norm": 0.60546875, "learning_rate": 9.456410256410257e-06, "loss": 1.6915, "step": 15600 }, { "epoch": 0.09978771276392896, "grad_norm": 0.76953125, "learning_rate": 9.451282051282052e-06, "loss": 1.6797, "step": 15700 }, { "epoch": 0.10042330329108776, "grad_norm": 0.7578125, "learning_rate": 9.446153846153847e-06, "loss": 1.664, "step": 15800 }, { "epoch": 0.10105889381824654, "grad_norm": 0.77734375, "learning_rate": 9.441025641025642e-06, "loss": 1.6738, "step": 15900 }, { "epoch": 0.10169448434540532, "grad_norm": 0.671875, "learning_rate": 9.435897435897436e-06, "loss": 1.6822, "step": 16000 }, { "epoch": 0.1023300748725641, "grad_norm": 0.73828125, "learning_rate": 9.43076923076923e-06, "loss": 1.6828, "step": 16100 }, { "epoch": 0.10296566539972288, "grad_norm": 0.498046875, "learning_rate": 9.425641025641026e-06, "loss": 1.6714, "step": 16200 }, { "epoch": 0.10360125592688167, "grad_norm": 0.46484375, "learning_rate": 9.420512820512822e-06, "loss": 1.692, "step": 16300 }, { "epoch": 0.10423684645404045, "grad_norm": 0.921875, "learning_rate": 9.415384615384616e-06, "loss": 1.6833, "step": 16400 }, { "epoch": 0.10487243698119923, "grad_norm": 0.7578125, "learning_rate": 9.410256410256412e-06, "loss": 1.6835, "step": 16500 }, { "epoch": 0.10550802750835801, "grad_norm": 0.8359375, "learning_rate": 9.405128205128206e-06, "loss": 1.6738, "step": 16600 }, { "epoch": 0.10614361803551679, "grad_norm": 0.6796875, "learning_rate": 9.4e-06, "loss": 1.6769, "step": 16700 }, { "epoch": 0.10677920856267559, "grad_norm": 1.078125, "learning_rate": 9.394871794871796e-06, "loss": 1.677, "step": 16800 }, { "epoch": 0.10741479908983437, "grad_norm": 0.578125, "learning_rate": 9.389743589743591e-06, "loss": 1.6887, "step": 16900 }, { "epoch": 0.10805038961699315, "grad_norm": 0.62109375, "learning_rate": 9.384615384615385e-06, "loss": 1.6889, "step": 17000 }, { "epoch": 0.10868598014415193, "grad_norm": 0.828125, "learning_rate": 9.37948717948718e-06, "loss": 1.6689, "step": 17100 }, { "epoch": 0.10932157067131071, "grad_norm": 0.84375, "learning_rate": 9.374358974358975e-06, "loss": 1.6851, "step": 17200 }, { "epoch": 0.1099571611984695, "grad_norm": 0.71484375, "learning_rate": 9.369230769230771e-06, "loss": 1.6878, "step": 17300 }, { "epoch": 0.11059275172562828, "grad_norm": 0.6640625, "learning_rate": 9.364102564102565e-06, "loss": 1.6609, "step": 17400 }, { "epoch": 0.11122834225278706, "grad_norm": 0.7890625, "learning_rate": 9.358974358974359e-06, "loss": 1.6778, "step": 17500 }, { "epoch": 0.11186393277994584, "grad_norm": 0.7265625, "learning_rate": 9.353846153846155e-06, "loss": 1.674, "step": 17600 }, { "epoch": 0.11249952330710464, "grad_norm": 0.65625, "learning_rate": 9.348717948717949e-06, "loss": 1.6686, "step": 17700 }, { "epoch": 0.11313511383426342, "grad_norm": 0.474609375, "learning_rate": 9.343589743589745e-06, "loss": 1.6728, "step": 17800 }, { "epoch": 0.1137707043614222, "grad_norm": 0.482421875, "learning_rate": 9.33846153846154e-06, "loss": 1.6664, "step": 17900 }, { "epoch": 0.11440629488858098, "grad_norm": 0.7890625, "learning_rate": 9.333333333333334e-06, "loss": 1.6746, "step": 18000 }, { "epoch": 0.11504188541573976, "grad_norm": 1.1015625, "learning_rate": 9.328205128205128e-06, "loss": 1.6738, "step": 18100 }, { "epoch": 0.11567747594289855, "grad_norm": 0.76171875, "learning_rate": 9.323076923076924e-06, "loss": 1.6628, "step": 18200 }, { "epoch": 0.11631306647005733, "grad_norm": 0.48828125, "learning_rate": 9.317948717948718e-06, "loss": 1.6718, "step": 18300 }, { "epoch": 0.11694865699721611, "grad_norm": 0.7890625, "learning_rate": 9.312820512820514e-06, "loss": 1.6789, "step": 18400 }, { "epoch": 0.1175842475243749, "grad_norm": 0.94140625, "learning_rate": 9.307692307692308e-06, "loss": 1.6618, "step": 18500 }, { "epoch": 0.11821983805153367, "grad_norm": 1.1640625, "learning_rate": 9.302564102564104e-06, "loss": 1.6648, "step": 18600 }, { "epoch": 0.11885542857869247, "grad_norm": 0.69140625, "learning_rate": 9.297435897435898e-06, "loss": 1.683, "step": 18700 }, { "epoch": 0.11949101910585125, "grad_norm": 0.6640625, "learning_rate": 9.292307692307694e-06, "loss": 1.6457, "step": 18800 }, { "epoch": 0.12012660963301003, "grad_norm": 0.84375, "learning_rate": 9.28717948717949e-06, "loss": 1.6728, "step": 18900 }, { "epoch": 0.12076220016016881, "grad_norm": 0.65234375, "learning_rate": 9.282051282051283e-06, "loss": 1.683, "step": 19000 }, { "epoch": 0.12139779068732759, "grad_norm": 0.90625, "learning_rate": 9.276923076923077e-06, "loss": 1.6626, "step": 19100 }, { "epoch": 0.12203338121448638, "grad_norm": 0.765625, "learning_rate": 9.271794871794873e-06, "loss": 1.6704, "step": 19200 }, { "epoch": 0.12266897174164516, "grad_norm": 0.79296875, "learning_rate": 9.266666666666667e-06, "loss": 1.6608, "step": 19300 }, { "epoch": 0.12330456226880394, "grad_norm": 0.62890625, "learning_rate": 9.261538461538461e-06, "loss": 1.6761, "step": 19400 }, { "epoch": 0.12394015279596272, "grad_norm": 1.015625, "learning_rate": 9.256410256410257e-06, "loss": 1.6537, "step": 19500 }, { "epoch": 0.12457574332312152, "grad_norm": 0.478515625, "learning_rate": 9.251282051282053e-06, "loss": 1.6732, "step": 19600 }, { "epoch": 0.1252113338502803, "grad_norm": 0.90625, "learning_rate": 9.246153846153847e-06, "loss": 1.6708, "step": 19700 }, { "epoch": 0.12584692437743908, "grad_norm": 0.5859375, "learning_rate": 9.24102564102564e-06, "loss": 1.6619, "step": 19800 }, { "epoch": 0.12648251490459786, "grad_norm": 0.68359375, "learning_rate": 9.235897435897437e-06, "loss": 1.6666, "step": 19900 }, { "epoch": 0.12711810543175664, "grad_norm": 0.58984375, "learning_rate": 9.230769230769232e-06, "loss": 1.6725, "step": 20000 }, { "epoch": 0.12775369595891542, "grad_norm": 0.65234375, "learning_rate": 9.225641025641026e-06, "loss": 1.6689, "step": 20100 }, { "epoch": 0.1283892864860742, "grad_norm": 0.83984375, "learning_rate": 9.220512820512822e-06, "loss": 1.6735, "step": 20200 }, { "epoch": 0.129024877013233, "grad_norm": 0.69921875, "learning_rate": 9.215384615384616e-06, "loss": 1.6652, "step": 20300 }, { "epoch": 0.1296604675403918, "grad_norm": 0.7578125, "learning_rate": 9.21025641025641e-06, "loss": 1.6662, "step": 20400 }, { "epoch": 0.13029605806755057, "grad_norm": 0.6328125, "learning_rate": 9.205128205128206e-06, "loss": 1.6553, "step": 20500 }, { "epoch": 0.13093164859470935, "grad_norm": 0.51171875, "learning_rate": 9.200000000000002e-06, "loss": 1.6571, "step": 20600 }, { "epoch": 0.13156723912186813, "grad_norm": 0.478515625, "learning_rate": 9.194871794871796e-06, "loss": 1.6626, "step": 20700 }, { "epoch": 0.1322028296490269, "grad_norm": 0.94921875, "learning_rate": 9.18974358974359e-06, "loss": 1.6703, "step": 20800 }, { "epoch": 0.1328384201761857, "grad_norm": 0.6015625, "learning_rate": 9.184615384615386e-06, "loss": 1.6563, "step": 20900 }, { "epoch": 0.13347401070334447, "grad_norm": 0.9609375, "learning_rate": 9.17948717948718e-06, "loss": 1.6549, "step": 21000 }, { "epoch": 0.13410960123050325, "grad_norm": 0.62890625, "learning_rate": 9.174358974358975e-06, "loss": 1.6777, "step": 21100 }, { "epoch": 0.13474519175766203, "grad_norm": 0.765625, "learning_rate": 9.169230769230771e-06, "loss": 1.6478, "step": 21200 }, { "epoch": 0.13538078228482084, "grad_norm": 0.98828125, "learning_rate": 9.164102564102565e-06, "loss": 1.6671, "step": 21300 }, { "epoch": 0.13601637281197962, "grad_norm": 0.78515625, "learning_rate": 9.15897435897436e-06, "loss": 1.6846, "step": 21400 }, { "epoch": 0.1366519633391384, "grad_norm": 0.97265625, "learning_rate": 9.153846153846155e-06, "loss": 1.6869, "step": 21500 }, { "epoch": 0.13728755386629718, "grad_norm": 0.69921875, "learning_rate": 9.148717948717949e-06, "loss": 1.6531, "step": 21600 }, { "epoch": 0.13792314439345596, "grad_norm": 0.984375, "learning_rate": 9.143589743589745e-06, "loss": 1.669, "step": 21700 }, { "epoch": 0.13855873492061474, "grad_norm": 0.65234375, "learning_rate": 9.138461538461539e-06, "loss": 1.6553, "step": 21800 }, { "epoch": 0.13919432544777352, "grad_norm": 0.61328125, "learning_rate": 9.133333333333335e-06, "loss": 1.6653, "step": 21900 }, { "epoch": 0.1398299159749323, "grad_norm": 0.671875, "learning_rate": 9.128205128205129e-06, "loss": 1.6617, "step": 22000 }, { "epoch": 0.14046550650209108, "grad_norm": 0.69921875, "learning_rate": 9.123076923076923e-06, "loss": 1.6819, "step": 22100 }, { "epoch": 0.1411010970292499, "grad_norm": 0.796875, "learning_rate": 9.117948717948718e-06, "loss": 1.6546, "step": 22200 }, { "epoch": 0.14173668755640867, "grad_norm": 0.83203125, "learning_rate": 9.112820512820514e-06, "loss": 1.6635, "step": 22300 }, { "epoch": 0.14237227808356745, "grad_norm": 0.5859375, "learning_rate": 9.107692307692308e-06, "loss": 1.6673, "step": 22400 }, { "epoch": 0.14300786861072623, "grad_norm": 0.77734375, "learning_rate": 9.102564102564104e-06, "loss": 1.6683, "step": 22500 }, { "epoch": 0.143643459137885, "grad_norm": 0.796875, "learning_rate": 9.097435897435898e-06, "loss": 1.666, "step": 22600 }, { "epoch": 0.1442790496650438, "grad_norm": 0.6484375, "learning_rate": 9.092307692307692e-06, "loss": 1.6557, "step": 22700 }, { "epoch": 0.14491464019220257, "grad_norm": 0.9453125, "learning_rate": 9.087179487179488e-06, "loss": 1.6566, "step": 22800 }, { "epoch": 0.14555023071936135, "grad_norm": 0.65234375, "learning_rate": 9.082051282051284e-06, "loss": 1.6515, "step": 22900 }, { "epoch": 0.14618582124652013, "grad_norm": 0.80859375, "learning_rate": 9.076923076923078e-06, "loss": 1.6468, "step": 23000 }, { "epoch": 0.1468214117736789, "grad_norm": 0.66796875, "learning_rate": 9.071794871794872e-06, "loss": 1.6619, "step": 23100 }, { "epoch": 0.14745700230083772, "grad_norm": 0.85546875, "learning_rate": 9.066666666666667e-06, "loss": 1.6479, "step": 23200 }, { "epoch": 0.1480925928279965, "grad_norm": 0.625, "learning_rate": 9.061538461538463e-06, "loss": 1.6581, "step": 23300 }, { "epoch": 0.14872818335515528, "grad_norm": 0.59765625, "learning_rate": 9.056410256410257e-06, "loss": 1.6668, "step": 23400 }, { "epoch": 0.14936377388231406, "grad_norm": 0.6953125, "learning_rate": 9.051282051282051e-06, "loss": 1.664, "step": 23500 }, { "epoch": 0.14999936440947284, "grad_norm": 0.87890625, "learning_rate": 9.046153846153847e-06, "loss": 1.6472, "step": 23600 }, { "epoch": 0.15063495493663162, "grad_norm": 1.0078125, "learning_rate": 9.041025641025641e-06, "loss": 1.6529, "step": 23700 }, { "epoch": 0.1512705454637904, "grad_norm": 0.5703125, "learning_rate": 9.035897435897437e-06, "loss": 1.6544, "step": 23800 }, { "epoch": 0.15190613599094918, "grad_norm": 0.72265625, "learning_rate": 9.030769230769233e-06, "loss": 1.6549, "step": 23900 }, { "epoch": 0.15254172651810796, "grad_norm": 0.65625, "learning_rate": 9.025641025641027e-06, "loss": 1.6627, "step": 24000 }, { "epoch": 0.15317731704526677, "grad_norm": 0.7421875, "learning_rate": 9.02051282051282e-06, "loss": 1.646, "step": 24100 }, { "epoch": 0.15381290757242555, "grad_norm": 0.7421875, "learning_rate": 9.015384615384616e-06, "loss": 1.6602, "step": 24200 }, { "epoch": 0.15444849809958433, "grad_norm": 0.515625, "learning_rate": 9.01025641025641e-06, "loss": 1.6532, "step": 24300 }, { "epoch": 0.1550840886267431, "grad_norm": 1.0625, "learning_rate": 9.005128205128206e-06, "loss": 1.6411, "step": 24400 }, { "epoch": 0.1557196791539019, "grad_norm": 0.9765625, "learning_rate": 9e-06, "loss": 1.6684, "step": 24500 }, { "epoch": 0.15635526968106067, "grad_norm": 0.8828125, "learning_rate": 8.994871794871796e-06, "loss": 1.6517, "step": 24600 }, { "epoch": 0.15699086020821945, "grad_norm": 0.79296875, "learning_rate": 8.98974358974359e-06, "loss": 1.6509, "step": 24700 }, { "epoch": 0.15762645073537823, "grad_norm": 0.458984375, "learning_rate": 8.984615384615386e-06, "loss": 1.6603, "step": 24800 }, { "epoch": 0.158262041262537, "grad_norm": 0.7421875, "learning_rate": 8.979487179487182e-06, "loss": 1.661, "step": 24900 }, { "epoch": 0.15889763178969582, "grad_norm": 0.84375, "learning_rate": 8.974358974358976e-06, "loss": 1.6679, "step": 25000 }, { "epoch": 0.1595332223168546, "grad_norm": 0.609375, "learning_rate": 8.96923076923077e-06, "loss": 1.6555, "step": 25100 }, { "epoch": 0.16016881284401338, "grad_norm": 0.515625, "learning_rate": 8.964102564102565e-06, "loss": 1.6652, "step": 25200 }, { "epoch": 0.16080440337117216, "grad_norm": 0.7578125, "learning_rate": 8.95897435897436e-06, "loss": 1.6393, "step": 25300 }, { "epoch": 0.16143999389833094, "grad_norm": 0.86328125, "learning_rate": 8.953846153846153e-06, "loss": 1.6479, "step": 25400 }, { "epoch": 0.16207558442548972, "grad_norm": 0.9296875, "learning_rate": 8.94871794871795e-06, "loss": 1.6509, "step": 25500 }, { "epoch": 0.1627111749526485, "grad_norm": 0.5390625, "learning_rate": 8.943589743589745e-06, "loss": 1.66, "step": 25600 }, { "epoch": 0.16334676547980728, "grad_norm": 0.83203125, "learning_rate": 8.938461538461539e-06, "loss": 1.6697, "step": 25700 }, { "epoch": 0.16398235600696606, "grad_norm": 0.84765625, "learning_rate": 8.933333333333333e-06, "loss": 1.6493, "step": 25800 }, { "epoch": 0.16461794653412484, "grad_norm": 0.5, "learning_rate": 8.928205128205129e-06, "loss": 1.6503, "step": 25900 }, { "epoch": 0.16525353706128365, "grad_norm": 0.609375, "learning_rate": 8.923076923076925e-06, "loss": 1.65, "step": 26000 }, { "epoch": 0.16588912758844243, "grad_norm": 0.91015625, "learning_rate": 8.917948717948719e-06, "loss": 1.6532, "step": 26100 }, { "epoch": 0.1665247181156012, "grad_norm": 0.76953125, "learning_rate": 8.912820512820514e-06, "loss": 1.6583, "step": 26200 }, { "epoch": 0.16716030864276, "grad_norm": 0.70703125, "learning_rate": 8.907692307692308e-06, "loss": 1.6423, "step": 26300 }, { "epoch": 0.16779589916991877, "grad_norm": 0.85546875, "learning_rate": 8.902564102564102e-06, "loss": 1.6699, "step": 26400 }, { "epoch": 0.16843148969707755, "grad_norm": 0.66015625, "learning_rate": 8.897435897435898e-06, "loss": 1.6522, "step": 26500 }, { "epoch": 0.16906708022423633, "grad_norm": 0.609375, "learning_rate": 8.892307692307694e-06, "loss": 1.6531, "step": 26600 }, { "epoch": 0.1697026707513951, "grad_norm": 0.71875, "learning_rate": 8.887179487179488e-06, "loss": 1.6512, "step": 26700 }, { "epoch": 0.1703382612785539, "grad_norm": 0.4765625, "learning_rate": 8.882051282051282e-06, "loss": 1.6158, "step": 26800 }, { "epoch": 0.1709738518057127, "grad_norm": 1.015625, "learning_rate": 8.876923076923078e-06, "loss": 1.6516, "step": 26900 }, { "epoch": 0.17160944233287148, "grad_norm": 0.85546875, "learning_rate": 8.871794871794872e-06, "loss": 1.6409, "step": 27000 }, { "epoch": 0.17224503286003026, "grad_norm": 0.40234375, "learning_rate": 8.866666666666668e-06, "loss": 1.6526, "step": 27100 }, { "epoch": 0.17288062338718904, "grad_norm": 0.6015625, "learning_rate": 8.861538461538463e-06, "loss": 1.6387, "step": 27200 }, { "epoch": 0.17351621391434782, "grad_norm": 0.625, "learning_rate": 8.856410256410257e-06, "loss": 1.6583, "step": 27300 }, { "epoch": 0.1741518044415066, "grad_norm": 0.79296875, "learning_rate": 8.851282051282051e-06, "loss": 1.64, "step": 27400 }, { "epoch": 0.17478739496866538, "grad_norm": 1.2578125, "learning_rate": 8.846153846153847e-06, "loss": 1.6365, "step": 27500 }, { "epoch": 0.17542298549582416, "grad_norm": 0.5, "learning_rate": 8.841025641025641e-06, "loss": 1.6424, "step": 27600 }, { "epoch": 0.17605857602298294, "grad_norm": 0.58203125, "learning_rate": 8.835897435897437e-06, "loss": 1.646, "step": 27700 }, { "epoch": 0.17669416655014172, "grad_norm": 0.9375, "learning_rate": 8.830769230769231e-06, "loss": 1.6514, "step": 27800 }, { "epoch": 0.17732975707730053, "grad_norm": 0.470703125, "learning_rate": 8.825641025641027e-06, "loss": 1.6687, "step": 27900 }, { "epoch": 0.1779653476044593, "grad_norm": 1.0625, "learning_rate": 8.820512820512821e-06, "loss": 1.6496, "step": 28000 }, { "epoch": 0.1786009381316181, "grad_norm": 0.87109375, "learning_rate": 8.815384615384615e-06, "loss": 1.6357, "step": 28100 }, { "epoch": 0.17923652865877687, "grad_norm": 0.68359375, "learning_rate": 8.81025641025641e-06, "loss": 1.6453, "step": 28200 }, { "epoch": 0.17987211918593565, "grad_norm": 0.75390625, "learning_rate": 8.805128205128206e-06, "loss": 1.659, "step": 28300 }, { "epoch": 0.18050770971309443, "grad_norm": 0.76171875, "learning_rate": 8.8e-06, "loss": 1.6527, "step": 28400 }, { "epoch": 0.1811433002402532, "grad_norm": 0.63671875, "learning_rate": 8.794871794871796e-06, "loss": 1.6669, "step": 28500 }, { "epoch": 0.181778890767412, "grad_norm": 0.76953125, "learning_rate": 8.78974358974359e-06, "loss": 1.6514, "step": 28600 }, { "epoch": 0.18241448129457077, "grad_norm": 0.6171875, "learning_rate": 8.784615384615386e-06, "loss": 1.6655, "step": 28700 }, { "epoch": 0.18305007182172958, "grad_norm": 0.71484375, "learning_rate": 8.77948717948718e-06, "loss": 1.6524, "step": 28800 }, { "epoch": 0.18368566234888836, "grad_norm": 0.8828125, "learning_rate": 8.774358974358976e-06, "loss": 1.6286, "step": 28900 }, { "epoch": 0.18432125287604714, "grad_norm": 0.73828125, "learning_rate": 8.76923076923077e-06, "loss": 1.639, "step": 29000 }, { "epoch": 0.18495684340320592, "grad_norm": 0.7265625, "learning_rate": 8.764102564102564e-06, "loss": 1.6487, "step": 29100 }, { "epoch": 0.1855924339303647, "grad_norm": 0.91796875, "learning_rate": 8.75897435897436e-06, "loss": 1.6576, "step": 29200 }, { "epoch": 0.18622802445752348, "grad_norm": 0.66796875, "learning_rate": 8.753846153846155e-06, "loss": 1.6483, "step": 29300 }, { "epoch": 0.18686361498468226, "grad_norm": 0.62109375, "learning_rate": 8.74871794871795e-06, "loss": 1.6399, "step": 29400 }, { "epoch": 0.18749920551184104, "grad_norm": 0.7890625, "learning_rate": 8.743589743589743e-06, "loss": 1.6383, "step": 29500 }, { "epoch": 0.18813479603899982, "grad_norm": 0.5546875, "learning_rate": 8.73846153846154e-06, "loss": 1.6486, "step": 29600 }, { "epoch": 0.1887703865661586, "grad_norm": 0.67578125, "learning_rate": 8.733333333333333e-06, "loss": 1.6503, "step": 29700 }, { "epoch": 0.1894059770933174, "grad_norm": 0.9609375, "learning_rate": 8.728205128205129e-06, "loss": 1.6361, "step": 29800 }, { "epoch": 0.1900415676204762, "grad_norm": 0.9609375, "learning_rate": 8.723076923076925e-06, "loss": 1.6402, "step": 29900 }, { "epoch": 0.19067715814763497, "grad_norm": 0.443359375, "learning_rate": 8.717948717948719e-06, "loss": 1.6462, "step": 30000 }, { "epoch": 0.19131274867479375, "grad_norm": 0.482421875, "learning_rate": 8.712820512820513e-06, "loss": 1.6481, "step": 30100 }, { "epoch": 0.19194833920195253, "grad_norm": 0.85546875, "learning_rate": 8.707692307692309e-06, "loss": 1.6497, "step": 30200 }, { "epoch": 0.19258392972911131, "grad_norm": 0.73046875, "learning_rate": 8.702564102564103e-06, "loss": 1.6486, "step": 30300 }, { "epoch": 0.1932195202562701, "grad_norm": 0.71484375, "learning_rate": 8.697435897435898e-06, "loss": 1.6494, "step": 30400 }, { "epoch": 0.19385511078342887, "grad_norm": 0.57421875, "learning_rate": 8.692307692307692e-06, "loss": 1.6529, "step": 30500 }, { "epoch": 0.19449070131058765, "grad_norm": 0.66796875, "learning_rate": 8.687179487179488e-06, "loss": 1.642, "step": 30600 }, { "epoch": 0.19512629183774646, "grad_norm": 0.82421875, "learning_rate": 8.682051282051282e-06, "loss": 1.6206, "step": 30700 }, { "epoch": 0.19576188236490524, "grad_norm": 0.82421875, "learning_rate": 8.676923076923078e-06, "loss": 1.6438, "step": 30800 }, { "epoch": 0.19639747289206402, "grad_norm": 0.734375, "learning_rate": 8.671794871794874e-06, "loss": 1.6431, "step": 30900 }, { "epoch": 0.1970330634192228, "grad_norm": 0.77734375, "learning_rate": 8.666666666666668e-06, "loss": 1.6529, "step": 31000 }, { "epoch": 0.19766865394638158, "grad_norm": 0.671875, "learning_rate": 8.661538461538462e-06, "loss": 1.6476, "step": 31100 }, { "epoch": 0.19830424447354036, "grad_norm": 0.640625, "learning_rate": 8.656410256410258e-06, "loss": 1.6645, "step": 31200 }, { "epoch": 0.19893983500069914, "grad_norm": 0.8203125, "learning_rate": 8.651282051282052e-06, "loss": 1.6336, "step": 31300 }, { "epoch": 0.19957542552785792, "grad_norm": 0.5390625, "learning_rate": 8.646153846153846e-06, "loss": 1.6391, "step": 31400 }, { "epoch": 0.2002110160550167, "grad_norm": 0.6796875, "learning_rate": 8.641025641025641e-06, "loss": 1.6344, "step": 31500 }, { "epoch": 0.2008466065821755, "grad_norm": 0.82421875, "learning_rate": 8.635897435897437e-06, "loss": 1.6469, "step": 31600 }, { "epoch": 0.2014821971093343, "grad_norm": 0.8359375, "learning_rate": 8.630769230769231e-06, "loss": 1.6422, "step": 31700 }, { "epoch": 0.20211778763649307, "grad_norm": 0.9140625, "learning_rate": 8.625641025641025e-06, "loss": 1.6404, "step": 31800 }, { "epoch": 0.20275337816365185, "grad_norm": 0.75, "learning_rate": 8.620512820512821e-06, "loss": 1.6453, "step": 31900 }, { "epoch": 0.20338896869081063, "grad_norm": 0.6484375, "learning_rate": 8.615384615384617e-06, "loss": 1.6445, "step": 32000 }, { "epoch": 0.20402455921796941, "grad_norm": 0.44140625, "learning_rate": 8.610256410256411e-06, "loss": 1.6565, "step": 32100 }, { "epoch": 0.2046601497451282, "grad_norm": 0.57421875, "learning_rate": 8.605128205128207e-06, "loss": 1.6426, "step": 32200 }, { "epoch": 0.20529574027228698, "grad_norm": 0.59375, "learning_rate": 8.6e-06, "loss": 1.6288, "step": 32300 }, { "epoch": 0.20593133079944576, "grad_norm": 0.54296875, "learning_rate": 8.594871794871795e-06, "loss": 1.6402, "step": 32400 }, { "epoch": 0.20656692132660454, "grad_norm": 0.482421875, "learning_rate": 8.58974358974359e-06, "loss": 1.6308, "step": 32500 }, { "epoch": 0.20720251185376334, "grad_norm": 0.671875, "learning_rate": 8.584615384615386e-06, "loss": 1.6371, "step": 32600 }, { "epoch": 0.20783810238092212, "grad_norm": 0.8125, "learning_rate": 8.57948717948718e-06, "loss": 1.6449, "step": 32700 }, { "epoch": 0.2084736929080809, "grad_norm": 0.71875, "learning_rate": 8.574358974358974e-06, "loss": 1.6521, "step": 32800 }, { "epoch": 0.20910928343523968, "grad_norm": 1.09375, "learning_rate": 8.56923076923077e-06, "loss": 1.6418, "step": 32900 }, { "epoch": 0.20974487396239846, "grad_norm": 0.66015625, "learning_rate": 8.564102564102564e-06, "loss": 1.6342, "step": 33000 }, { "epoch": 0.21038046448955725, "grad_norm": 0.80078125, "learning_rate": 8.55897435897436e-06, "loss": 1.6413, "step": 33100 }, { "epoch": 0.21101605501671603, "grad_norm": 0.7578125, "learning_rate": 8.553846153846156e-06, "loss": 1.6432, "step": 33200 }, { "epoch": 0.2116516455438748, "grad_norm": 0.75390625, "learning_rate": 8.54871794871795e-06, "loss": 1.6419, "step": 33300 }, { "epoch": 0.21228723607103359, "grad_norm": 0.7578125, "learning_rate": 8.543589743589744e-06, "loss": 1.6508, "step": 33400 }, { "epoch": 0.2129228265981924, "grad_norm": 0.9921875, "learning_rate": 8.53846153846154e-06, "loss": 1.6431, "step": 33500 }, { "epoch": 0.21355841712535117, "grad_norm": 0.58984375, "learning_rate": 8.533333333333335e-06, "loss": 1.6333, "step": 33600 }, { "epoch": 0.21419400765250995, "grad_norm": 1.578125, "learning_rate": 8.52820512820513e-06, "loss": 1.659, "step": 33700 }, { "epoch": 0.21482959817966873, "grad_norm": 0.73828125, "learning_rate": 8.523076923076923e-06, "loss": 1.6344, "step": 33800 }, { "epoch": 0.21546518870682752, "grad_norm": 0.73046875, "learning_rate": 8.517948717948719e-06, "loss": 1.6409, "step": 33900 }, { "epoch": 0.2161007792339863, "grad_norm": 0.9453125, "learning_rate": 8.512820512820513e-06, "loss": 1.6304, "step": 34000 }, { "epoch": 0.21673636976114508, "grad_norm": 0.9140625, "learning_rate": 8.507692307692307e-06, "loss": 1.6352, "step": 34100 }, { "epoch": 0.21737196028830386, "grad_norm": 0.6953125, "learning_rate": 8.502564102564103e-06, "loss": 1.6341, "step": 34200 }, { "epoch": 0.21800755081546264, "grad_norm": 0.93359375, "learning_rate": 8.497435897435899e-06, "loss": 1.6448, "step": 34300 }, { "epoch": 0.21864314134262142, "grad_norm": 0.53125, "learning_rate": 8.492307692307693e-06, "loss": 1.6371, "step": 34400 }, { "epoch": 0.21927873186978022, "grad_norm": 0.78515625, "learning_rate": 8.487179487179488e-06, "loss": 1.6548, "step": 34500 }, { "epoch": 0.219914322396939, "grad_norm": 0.765625, "learning_rate": 8.482051282051283e-06, "loss": 1.6575, "step": 34600 }, { "epoch": 0.22054991292409779, "grad_norm": 1.09375, "learning_rate": 8.476923076923078e-06, "loss": 1.652, "step": 34700 }, { "epoch": 0.22118550345125657, "grad_norm": 0.470703125, "learning_rate": 8.471794871794872e-06, "loss": 1.6636, "step": 34800 }, { "epoch": 0.22182109397841535, "grad_norm": 0.7265625, "learning_rate": 8.466666666666668e-06, "loss": 1.6483, "step": 34900 }, { "epoch": 0.22245668450557413, "grad_norm": 0.69140625, "learning_rate": 8.461538461538462e-06, "loss": 1.6532, "step": 35000 }, { "epoch": 0.2230922750327329, "grad_norm": 0.90234375, "learning_rate": 8.456410256410256e-06, "loss": 1.6306, "step": 35100 }, { "epoch": 0.2237278655598917, "grad_norm": 0.51953125, "learning_rate": 8.451282051282052e-06, "loss": 1.6137, "step": 35200 }, { "epoch": 0.22436345608705047, "grad_norm": 0.78125, "learning_rate": 8.446153846153848e-06, "loss": 1.6424, "step": 35300 }, { "epoch": 0.22499904661420927, "grad_norm": 0.65625, "learning_rate": 8.441025641025642e-06, "loss": 1.6641, "step": 35400 }, { "epoch": 0.22563463714136806, "grad_norm": 0.6796875, "learning_rate": 8.435897435897436e-06, "loss": 1.6388, "step": 35500 }, { "epoch": 0.22627022766852684, "grad_norm": 0.85546875, "learning_rate": 8.430769230769231e-06, "loss": 1.6259, "step": 35600 }, { "epoch": 0.22690581819568562, "grad_norm": 0.703125, "learning_rate": 8.425641025641026e-06, "loss": 1.639, "step": 35700 }, { "epoch": 0.2275414087228444, "grad_norm": 1.0078125, "learning_rate": 8.420512820512821e-06, "loss": 1.6384, "step": 35800 }, { "epoch": 0.22817699925000318, "grad_norm": 0.5, "learning_rate": 8.415384615384617e-06, "loss": 1.6446, "step": 35900 }, { "epoch": 0.22881258977716196, "grad_norm": 0.72265625, "learning_rate": 8.410256410256411e-06, "loss": 1.6237, "step": 36000 }, { "epoch": 0.22944818030432074, "grad_norm": 0.76171875, "learning_rate": 8.405128205128205e-06, "loss": 1.6413, "step": 36100 }, { "epoch": 0.23008377083147952, "grad_norm": 0.76171875, "learning_rate": 8.400000000000001e-06, "loss": 1.6305, "step": 36200 }, { "epoch": 0.2307193613586383, "grad_norm": 0.69140625, "learning_rate": 8.394871794871795e-06, "loss": 1.6586, "step": 36300 }, { "epoch": 0.2313549518857971, "grad_norm": 0.62109375, "learning_rate": 8.38974358974359e-06, "loss": 1.6388, "step": 36400 }, { "epoch": 0.23199054241295589, "grad_norm": 0.83203125, "learning_rate": 8.384615384615385e-06, "loss": 1.6322, "step": 36500 }, { "epoch": 0.23262613294011467, "grad_norm": 0.66015625, "learning_rate": 8.37948717948718e-06, "loss": 1.6377, "step": 36600 }, { "epoch": 0.23326172346727345, "grad_norm": 0.78515625, "learning_rate": 8.374358974358975e-06, "loss": 1.6323, "step": 36700 }, { "epoch": 0.23389731399443223, "grad_norm": 0.96875, "learning_rate": 8.36923076923077e-06, "loss": 1.6332, "step": 36800 }, { "epoch": 0.234532904521591, "grad_norm": 0.466796875, "learning_rate": 8.364102564102566e-06, "loss": 1.6308, "step": 36900 }, { "epoch": 0.2351684950487498, "grad_norm": 1.1640625, "learning_rate": 8.35897435897436e-06, "loss": 1.6235, "step": 37000 }, { "epoch": 0.23580408557590857, "grad_norm": 0.69140625, "learning_rate": 8.353846153846154e-06, "loss": 1.6393, "step": 37100 }, { "epoch": 0.23643967610306735, "grad_norm": 0.8359375, "learning_rate": 8.34871794871795e-06, "loss": 1.6371, "step": 37200 }, { "epoch": 0.23707526663022616, "grad_norm": 0.984375, "learning_rate": 8.343589743589744e-06, "loss": 1.635, "step": 37300 }, { "epoch": 0.23771085715738494, "grad_norm": 0.8359375, "learning_rate": 8.338461538461538e-06, "loss": 1.6295, "step": 37400 }, { "epoch": 0.23834644768454372, "grad_norm": 0.74609375, "learning_rate": 8.333333333333334e-06, "loss": 1.6413, "step": 37500 }, { "epoch": 0.2389820382117025, "grad_norm": 0.515625, "learning_rate": 8.32820512820513e-06, "loss": 1.6512, "step": 37600 }, { "epoch": 0.23961762873886128, "grad_norm": 0.76953125, "learning_rate": 8.323076923076924e-06, "loss": 1.6558, "step": 37700 }, { "epoch": 0.24025321926602006, "grad_norm": 0.62109375, "learning_rate": 8.317948717948718e-06, "loss": 1.6632, "step": 37800 }, { "epoch": 0.24088880979317884, "grad_norm": 0.5390625, "learning_rate": 8.312820512820513e-06, "loss": 1.6344, "step": 37900 }, { "epoch": 0.24152440032033762, "grad_norm": 0.9375, "learning_rate": 8.307692307692309e-06, "loss": 1.6351, "step": 38000 }, { "epoch": 0.2421599908474964, "grad_norm": 0.89453125, "learning_rate": 8.302564102564103e-06, "loss": 1.6342, "step": 38100 }, { "epoch": 0.24279558137465518, "grad_norm": 0.65625, "learning_rate": 8.297435897435899e-06, "loss": 1.632, "step": 38200 }, { "epoch": 0.243431171901814, "grad_norm": 0.70703125, "learning_rate": 8.292307692307693e-06, "loss": 1.6476, "step": 38300 }, { "epoch": 0.24406676242897277, "grad_norm": 0.890625, "learning_rate": 8.287179487179487e-06, "loss": 1.6275, "step": 38400 }, { "epoch": 0.24470235295613155, "grad_norm": 0.921875, "learning_rate": 8.282051282051283e-06, "loss": 1.6432, "step": 38500 }, { "epoch": 0.24533794348329033, "grad_norm": 0.7265625, "learning_rate": 8.276923076923078e-06, "loss": 1.6506, "step": 38600 }, { "epoch": 0.2459735340104491, "grad_norm": 0.81640625, "learning_rate": 8.271794871794873e-06, "loss": 1.6399, "step": 38700 }, { "epoch": 0.2466091245376079, "grad_norm": 0.67578125, "learning_rate": 8.266666666666667e-06, "loss": 1.6384, "step": 38800 }, { "epoch": 0.24724471506476667, "grad_norm": 0.6875, "learning_rate": 8.261538461538462e-06, "loss": 1.6286, "step": 38900 }, { "epoch": 0.24788030559192545, "grad_norm": 1.09375, "learning_rate": 8.256410256410256e-06, "loss": 1.6442, "step": 39000 }, { "epoch": 0.24851589611908423, "grad_norm": 0.76953125, "learning_rate": 8.251282051282052e-06, "loss": 1.6467, "step": 39100 }, { "epoch": 0.24915148664624304, "grad_norm": 0.7734375, "learning_rate": 8.246153846153848e-06, "loss": 1.6507, "step": 39200 }, { "epoch": 0.24978707717340182, "grad_norm": 0.7578125, "learning_rate": 8.241025641025642e-06, "loss": 1.63, "step": 39300 }, { "epoch": 0.2504226677005606, "grad_norm": 0.66015625, "learning_rate": 8.235897435897436e-06, "loss": 1.6294, "step": 39400 }, { "epoch": 0.2510582582277194, "grad_norm": 0.78515625, "learning_rate": 8.230769230769232e-06, "loss": 1.6233, "step": 39500 }, { "epoch": 0.25169384875487816, "grad_norm": 0.6484375, "learning_rate": 8.225641025641027e-06, "loss": 1.6528, "step": 39600 }, { "epoch": 0.25232943928203694, "grad_norm": 0.8125, "learning_rate": 8.220512820512822e-06, "loss": 1.6369, "step": 39700 }, { "epoch": 0.2529650298091957, "grad_norm": 0.51953125, "learning_rate": 8.215384615384616e-06, "loss": 1.6193, "step": 39800 }, { "epoch": 0.2536006203363545, "grad_norm": 1.0078125, "learning_rate": 8.210256410256411e-06, "loss": 1.646, "step": 39900 }, { "epoch": 0.2542362108635133, "grad_norm": 0.88671875, "learning_rate": 8.205128205128205e-06, "loss": 1.6195, "step": 40000 }, { "epoch": 0.25487180139067206, "grad_norm": 0.56640625, "learning_rate": 8.2e-06, "loss": 1.6344, "step": 40100 }, { "epoch": 0.25550739191783084, "grad_norm": 0.9609375, "learning_rate": 8.194871794871795e-06, "loss": 1.647, "step": 40200 }, { "epoch": 0.2561429824449896, "grad_norm": 0.8984375, "learning_rate": 8.189743589743591e-06, "loss": 1.6453, "step": 40300 }, { "epoch": 0.2567785729721484, "grad_norm": 0.6484375, "learning_rate": 8.184615384615385e-06, "loss": 1.6263, "step": 40400 }, { "epoch": 0.2574141634993072, "grad_norm": 0.79296875, "learning_rate": 8.17948717948718e-06, "loss": 1.6456, "step": 40500 }, { "epoch": 0.258049754026466, "grad_norm": 0.625, "learning_rate": 8.174358974358975e-06, "loss": 1.6372, "step": 40600 }, { "epoch": 0.2586853445536248, "grad_norm": 0.83984375, "learning_rate": 8.16923076923077e-06, "loss": 1.6321, "step": 40700 }, { "epoch": 0.2593209350807836, "grad_norm": 0.98046875, "learning_rate": 8.164102564102565e-06, "loss": 1.63, "step": 40800 }, { "epoch": 0.25995652560794236, "grad_norm": 0.75390625, "learning_rate": 8.15897435897436e-06, "loss": 1.6338, "step": 40900 }, { "epoch": 0.26059211613510114, "grad_norm": 0.69140625, "learning_rate": 8.153846153846154e-06, "loss": 1.6366, "step": 41000 }, { "epoch": 0.2612277066622599, "grad_norm": 0.7109375, "learning_rate": 8.148717948717948e-06, "loss": 1.639, "step": 41100 }, { "epoch": 0.2618632971894187, "grad_norm": 0.59375, "learning_rate": 8.143589743589744e-06, "loss": 1.6464, "step": 41200 }, { "epoch": 0.2624988877165775, "grad_norm": 0.56640625, "learning_rate": 8.13846153846154e-06, "loss": 1.6243, "step": 41300 }, { "epoch": 0.26313447824373626, "grad_norm": 0.91796875, "learning_rate": 8.133333333333334e-06, "loss": 1.6245, "step": 41400 }, { "epoch": 0.26377006877089504, "grad_norm": 0.5546875, "learning_rate": 8.12820512820513e-06, "loss": 1.6267, "step": 41500 }, { "epoch": 0.2644056592980538, "grad_norm": 0.82421875, "learning_rate": 8.123076923076924e-06, "loss": 1.6351, "step": 41600 }, { "epoch": 0.2650412498252126, "grad_norm": 0.70703125, "learning_rate": 8.117948717948718e-06, "loss": 1.6419, "step": 41700 }, { "epoch": 0.2656768403523714, "grad_norm": 0.66796875, "learning_rate": 8.112820512820514e-06, "loss": 1.6504, "step": 41800 }, { "epoch": 0.26631243087953016, "grad_norm": 1.0859375, "learning_rate": 8.10769230769231e-06, "loss": 1.6221, "step": 41900 }, { "epoch": 0.26694802140668894, "grad_norm": 0.8671875, "learning_rate": 8.102564102564103e-06, "loss": 1.6303, "step": 42000 }, { "epoch": 0.2675836119338477, "grad_norm": 0.486328125, "learning_rate": 8.097435897435897e-06, "loss": 1.6425, "step": 42100 }, { "epoch": 0.2682192024610065, "grad_norm": 1.015625, "learning_rate": 8.092307692307693e-06, "loss": 1.6439, "step": 42200 }, { "epoch": 0.2688547929881653, "grad_norm": 0.76171875, "learning_rate": 8.087179487179487e-06, "loss": 1.6236, "step": 42300 }, { "epoch": 0.26949038351532406, "grad_norm": 0.83984375, "learning_rate": 8.082051282051283e-06, "loss": 1.6187, "step": 42400 }, { "epoch": 0.2701259740424829, "grad_norm": 0.86328125, "learning_rate": 8.076923076923077e-06, "loss": 1.6479, "step": 42500 }, { "epoch": 0.2707615645696417, "grad_norm": 0.953125, "learning_rate": 8.071794871794873e-06, "loss": 1.6291, "step": 42600 }, { "epoch": 0.27139715509680046, "grad_norm": 1.0234375, "learning_rate": 8.066666666666667e-06, "loss": 1.6303, "step": 42700 }, { "epoch": 0.27203274562395924, "grad_norm": 0.83984375, "learning_rate": 8.061538461538463e-06, "loss": 1.644, "step": 42800 }, { "epoch": 0.272668336151118, "grad_norm": 0.80859375, "learning_rate": 8.056410256410258e-06, "loss": 1.618, "step": 42900 }, { "epoch": 0.2733039266782768, "grad_norm": 0.74609375, "learning_rate": 8.051282051282052e-06, "loss": 1.6366, "step": 43000 }, { "epoch": 0.2739395172054356, "grad_norm": 0.80078125, "learning_rate": 8.046153846153846e-06, "loss": 1.639, "step": 43100 }, { "epoch": 0.27457510773259436, "grad_norm": 0.80078125, "learning_rate": 8.041025641025642e-06, "loss": 1.6251, "step": 43200 }, { "epoch": 0.27521069825975314, "grad_norm": 0.64453125, "learning_rate": 8.035897435897436e-06, "loss": 1.636, "step": 43300 }, { "epoch": 0.2758462887869119, "grad_norm": 0.7734375, "learning_rate": 8.03076923076923e-06, "loss": 1.6392, "step": 43400 }, { "epoch": 0.2764818793140707, "grad_norm": 0.62890625, "learning_rate": 8.025641025641026e-06, "loss": 1.6438, "step": 43500 }, { "epoch": 0.2771174698412295, "grad_norm": 0.69921875, "learning_rate": 8.020512820512822e-06, "loss": 1.6318, "step": 43600 }, { "epoch": 0.27775306036838826, "grad_norm": 0.96484375, "learning_rate": 8.015384615384616e-06, "loss": 1.6582, "step": 43700 }, { "epoch": 0.27838865089554704, "grad_norm": 0.765625, "learning_rate": 8.01025641025641e-06, "loss": 1.6218, "step": 43800 }, { "epoch": 0.2790242414227058, "grad_norm": 1.421875, "learning_rate": 8.005128205128206e-06, "loss": 1.6256, "step": 43900 }, { "epoch": 0.2796598319498646, "grad_norm": 1.625, "learning_rate": 8.000000000000001e-06, "loss": 1.6213, "step": 44000 }, { "epoch": 0.2802954224770234, "grad_norm": 0.859375, "learning_rate": 7.994871794871795e-06, "loss": 1.6373, "step": 44100 }, { "epoch": 0.28093101300418216, "grad_norm": 0.7734375, "learning_rate": 7.989743589743591e-06, "loss": 1.6333, "step": 44200 }, { "epoch": 0.28156660353134094, "grad_norm": 0.546875, "learning_rate": 7.984615384615385e-06, "loss": 1.6335, "step": 44300 }, { "epoch": 0.2822021940584998, "grad_norm": 0.84375, "learning_rate": 7.97948717948718e-06, "loss": 1.6411, "step": 44400 }, { "epoch": 0.28283778458565856, "grad_norm": 0.6875, "learning_rate": 7.974358974358975e-06, "loss": 1.635, "step": 44500 }, { "epoch": 0.28347337511281734, "grad_norm": 0.63671875, "learning_rate": 7.96923076923077e-06, "loss": 1.6425, "step": 44600 }, { "epoch": 0.2841089656399761, "grad_norm": 0.796875, "learning_rate": 7.964102564102565e-06, "loss": 1.623, "step": 44700 }, { "epoch": 0.2847445561671349, "grad_norm": 0.73828125, "learning_rate": 7.958974358974359e-06, "loss": 1.6395, "step": 44800 }, { "epoch": 0.2853801466942937, "grad_norm": 0.76953125, "learning_rate": 7.953846153846155e-06, "loss": 1.6331, "step": 44900 }, { "epoch": 0.28601573722145246, "grad_norm": 0.87890625, "learning_rate": 7.948717948717949e-06, "loss": 1.6358, "step": 45000 }, { "epoch": 0.28665132774861124, "grad_norm": 0.8359375, "learning_rate": 7.943589743589744e-06, "loss": 1.6369, "step": 45100 }, { "epoch": 0.28728691827577, "grad_norm": 0.81640625, "learning_rate": 7.93846153846154e-06, "loss": 1.6279, "step": 45200 }, { "epoch": 0.2879225088029288, "grad_norm": 0.8046875, "learning_rate": 7.933333333333334e-06, "loss": 1.6299, "step": 45300 }, { "epoch": 0.2885580993300876, "grad_norm": 0.82421875, "learning_rate": 7.928205128205128e-06, "loss": 1.6322, "step": 45400 }, { "epoch": 0.28919368985724636, "grad_norm": 0.875, "learning_rate": 7.923076923076924e-06, "loss": 1.6241, "step": 45500 }, { "epoch": 0.28982928038440514, "grad_norm": 0.7734375, "learning_rate": 7.91794871794872e-06, "loss": 1.6197, "step": 45600 }, { "epoch": 0.2904648709115639, "grad_norm": 0.9140625, "learning_rate": 7.912820512820514e-06, "loss": 1.6213, "step": 45700 }, { "epoch": 0.2911004614387227, "grad_norm": 0.6875, "learning_rate": 7.907692307692308e-06, "loss": 1.6283, "step": 45800 }, { "epoch": 0.2917360519658815, "grad_norm": 0.75390625, "learning_rate": 7.902564102564104e-06, "loss": 1.6331, "step": 45900 }, { "epoch": 0.29237164249304026, "grad_norm": 1.0234375, "learning_rate": 7.897435897435898e-06, "loss": 1.6354, "step": 46000 }, { "epoch": 0.29300723302019904, "grad_norm": 0.6015625, "learning_rate": 7.892307692307692e-06, "loss": 1.6243, "step": 46100 }, { "epoch": 0.2936428235473578, "grad_norm": 0.486328125, "learning_rate": 7.887179487179487e-06, "loss": 1.6225, "step": 46200 }, { "epoch": 0.29427841407451666, "grad_norm": 1.0078125, "learning_rate": 7.882051282051283e-06, "loss": 1.626, "step": 46300 }, { "epoch": 0.29491400460167544, "grad_norm": 0.6015625, "learning_rate": 7.876923076923077e-06, "loss": 1.6093, "step": 46400 }, { "epoch": 0.2955495951288342, "grad_norm": 0.84375, "learning_rate": 7.871794871794873e-06, "loss": 1.632, "step": 46500 }, { "epoch": 0.296185185655993, "grad_norm": 0.7734375, "learning_rate": 7.866666666666667e-06, "loss": 1.6322, "step": 46600 }, { "epoch": 0.2968207761831518, "grad_norm": 0.7578125, "learning_rate": 7.861538461538463e-06, "loss": 1.6305, "step": 46700 }, { "epoch": 0.29745636671031056, "grad_norm": 0.79296875, "learning_rate": 7.856410256410257e-06, "loss": 1.6245, "step": 46800 }, { "epoch": 0.29809195723746934, "grad_norm": 0.98828125, "learning_rate": 7.851282051282053e-06, "loss": 1.6194, "step": 46900 }, { "epoch": 0.2987275477646281, "grad_norm": 0.6015625, "learning_rate": 7.846153846153847e-06, "loss": 1.6272, "step": 47000 }, { "epoch": 0.2993631382917869, "grad_norm": 0.82421875, "learning_rate": 7.84102564102564e-06, "loss": 1.6377, "step": 47100 }, { "epoch": 0.2999987288189457, "grad_norm": 0.91015625, "learning_rate": 7.835897435897436e-06, "loss": 1.6276, "step": 47200 }, { "epoch": 0.30063431934610446, "grad_norm": 0.796875, "learning_rate": 7.830769230769232e-06, "loss": 1.6217, "step": 47300 }, { "epoch": 0.30126990987326324, "grad_norm": 0.796875, "learning_rate": 7.825641025641026e-06, "loss": 1.6226, "step": 47400 }, { "epoch": 0.301905500400422, "grad_norm": 0.78515625, "learning_rate": 7.820512820512822e-06, "loss": 1.6194, "step": 47500 }, { "epoch": 0.3025410909275808, "grad_norm": 0.83203125, "learning_rate": 7.815384615384616e-06, "loss": 1.6229, "step": 47600 }, { "epoch": 0.3031766814547396, "grad_norm": 0.63671875, "learning_rate": 7.81025641025641e-06, "loss": 1.6346, "step": 47700 }, { "epoch": 0.30381227198189836, "grad_norm": 0.61328125, "learning_rate": 7.805128205128206e-06, "loss": 1.6313, "step": 47800 }, { "epoch": 0.30444786250905714, "grad_norm": 0.65625, "learning_rate": 7.800000000000002e-06, "loss": 1.6418, "step": 47900 }, { "epoch": 0.3050834530362159, "grad_norm": 0.83984375, "learning_rate": 7.794871794871796e-06, "loss": 1.616, "step": 48000 }, { "epoch": 0.3057190435633747, "grad_norm": 0.72265625, "learning_rate": 7.78974358974359e-06, "loss": 1.6425, "step": 48100 }, { "epoch": 0.30635463409053354, "grad_norm": 0.765625, "learning_rate": 7.784615384615385e-06, "loss": 1.6228, "step": 48200 }, { "epoch": 0.3069902246176923, "grad_norm": 0.66015625, "learning_rate": 7.77948717948718e-06, "loss": 1.6098, "step": 48300 }, { "epoch": 0.3076258151448511, "grad_norm": 0.7421875, "learning_rate": 7.774358974358975e-06, "loss": 1.6483, "step": 48400 }, { "epoch": 0.3082614056720099, "grad_norm": 0.859375, "learning_rate": 7.76923076923077e-06, "loss": 1.6251, "step": 48500 }, { "epoch": 0.30889699619916866, "grad_norm": 0.70703125, "learning_rate": 7.764102564102565e-06, "loss": 1.6371, "step": 48600 }, { "epoch": 0.30953258672632744, "grad_norm": 0.72265625, "learning_rate": 7.758974358974359e-06, "loss": 1.6277, "step": 48700 }, { "epoch": 0.3101681772534862, "grad_norm": 0.76171875, "learning_rate": 7.753846153846155e-06, "loss": 1.6302, "step": 48800 }, { "epoch": 0.310803767780645, "grad_norm": 0.46484375, "learning_rate": 7.74871794871795e-06, "loss": 1.6416, "step": 48900 }, { "epoch": 0.3114393583078038, "grad_norm": 0.73046875, "learning_rate": 7.743589743589745e-06, "loss": 1.6144, "step": 49000 }, { "epoch": 0.31207494883496256, "grad_norm": 0.79296875, "learning_rate": 7.738461538461539e-06, "loss": 1.6316, "step": 49100 }, { "epoch": 0.31271053936212134, "grad_norm": 0.8359375, "learning_rate": 7.733333333333334e-06, "loss": 1.6118, "step": 49200 }, { "epoch": 0.3133461298892801, "grad_norm": 0.90234375, "learning_rate": 7.728205128205128e-06, "loss": 1.6303, "step": 49300 }, { "epoch": 0.3139817204164389, "grad_norm": 0.7421875, "learning_rate": 7.723076923076924e-06, "loss": 1.6304, "step": 49400 }, { "epoch": 0.3146173109435977, "grad_norm": 0.546875, "learning_rate": 7.717948717948718e-06, "loss": 1.6298, "step": 49500 }, { "epoch": 0.31525290147075646, "grad_norm": 0.474609375, "learning_rate": 7.712820512820514e-06, "loss": 1.6223, "step": 49600 }, { "epoch": 0.31588849199791524, "grad_norm": 0.76953125, "learning_rate": 7.707692307692308e-06, "loss": 1.6282, "step": 49700 }, { "epoch": 0.316524082525074, "grad_norm": 0.76171875, "learning_rate": 7.702564102564102e-06, "loss": 1.6254, "step": 49800 }, { "epoch": 0.3171596730522328, "grad_norm": 0.91796875, "learning_rate": 7.697435897435898e-06, "loss": 1.6377, "step": 49900 }, { "epoch": 0.31779526357939164, "grad_norm": 0.87109375, "learning_rate": 7.692307692307694e-06, "loss": 1.6068, "step": 50000 }, { "epoch": 0.3184308541065504, "grad_norm": 0.83203125, "learning_rate": 7.687179487179488e-06, "loss": 1.6144, "step": 50100 }, { "epoch": 0.3190664446337092, "grad_norm": 0.98828125, "learning_rate": 7.682051282051283e-06, "loss": 1.6314, "step": 50200 }, { "epoch": 0.319702035160868, "grad_norm": 1.125, "learning_rate": 7.676923076923077e-06, "loss": 1.6279, "step": 50300 }, { "epoch": 0.32033762568802676, "grad_norm": 0.7421875, "learning_rate": 7.671794871794871e-06, "loss": 1.649, "step": 50400 }, { "epoch": 0.32097321621518554, "grad_norm": 0.89453125, "learning_rate": 7.666666666666667e-06, "loss": 1.6387, "step": 50500 }, { "epoch": 0.3216088067423443, "grad_norm": 0.72265625, "learning_rate": 7.661538461538463e-06, "loss": 1.627, "step": 50600 }, { "epoch": 0.3222443972695031, "grad_norm": 0.6484375, "learning_rate": 7.656410256410257e-06, "loss": 1.6261, "step": 50700 }, { "epoch": 0.3228799877966619, "grad_norm": 0.60546875, "learning_rate": 7.651282051282051e-06, "loss": 1.627, "step": 50800 }, { "epoch": 0.32351557832382066, "grad_norm": 0.84765625, "learning_rate": 7.646153846153847e-06, "loss": 1.6219, "step": 50900 }, { "epoch": 0.32415116885097944, "grad_norm": 0.81640625, "learning_rate": 7.641025641025641e-06, "loss": 1.6318, "step": 51000 }, { "epoch": 0.3247867593781382, "grad_norm": 0.83203125, "learning_rate": 7.635897435897437e-06, "loss": 1.6109, "step": 51100 }, { "epoch": 0.325422349905297, "grad_norm": 0.71484375, "learning_rate": 7.630769230769232e-06, "loss": 1.6273, "step": 51200 }, { "epoch": 0.3260579404324558, "grad_norm": 0.703125, "learning_rate": 7.6256410256410264e-06, "loss": 1.6264, "step": 51300 }, { "epoch": 0.32669353095961456, "grad_norm": 1.015625, "learning_rate": 7.620512820512821e-06, "loss": 1.6306, "step": 51400 }, { "epoch": 0.32732912148677334, "grad_norm": 1.015625, "learning_rate": 7.615384615384615e-06, "loss": 1.6276, "step": 51500 }, { "epoch": 0.3279647120139321, "grad_norm": 1.1640625, "learning_rate": 7.610256410256411e-06, "loss": 1.623, "step": 51600 }, { "epoch": 0.3286003025410909, "grad_norm": 0.64453125, "learning_rate": 7.605128205128206e-06, "loss": 1.641, "step": 51700 }, { "epoch": 0.3292358930682497, "grad_norm": 0.98046875, "learning_rate": 7.600000000000001e-06, "loss": 1.6255, "step": 51800 }, { "epoch": 0.3298714835954085, "grad_norm": 0.9765625, "learning_rate": 7.594871794871795e-06, "loss": 1.6192, "step": 51900 }, { "epoch": 0.3305070741225673, "grad_norm": 0.75390625, "learning_rate": 7.58974358974359e-06, "loss": 1.6209, "step": 52000 }, { "epoch": 0.3311426646497261, "grad_norm": 0.95703125, "learning_rate": 7.584615384615385e-06, "loss": 1.6305, "step": 52100 }, { "epoch": 0.33177825517688486, "grad_norm": 0.91796875, "learning_rate": 7.5794871794871805e-06, "loss": 1.6112, "step": 52200 }, { "epoch": 0.33241384570404364, "grad_norm": 0.72265625, "learning_rate": 7.574358974358975e-06, "loss": 1.6185, "step": 52300 }, { "epoch": 0.3330494362312024, "grad_norm": 1.046875, "learning_rate": 7.5692307692307695e-06, "loss": 1.6255, "step": 52400 }, { "epoch": 0.3336850267583612, "grad_norm": 0.80859375, "learning_rate": 7.564102564102564e-06, "loss": 1.6124, "step": 52500 }, { "epoch": 0.33432061728552, "grad_norm": 0.56640625, "learning_rate": 7.558974358974359e-06, "loss": 1.648, "step": 52600 }, { "epoch": 0.33495620781267876, "grad_norm": 1.046875, "learning_rate": 7.553846153846155e-06, "loss": 1.6192, "step": 52700 }, { "epoch": 0.33559179833983754, "grad_norm": 0.8359375, "learning_rate": 7.54871794871795e-06, "loss": 1.6265, "step": 52800 }, { "epoch": 0.3362273888669963, "grad_norm": 0.8515625, "learning_rate": 7.543589743589744e-06, "loss": 1.6227, "step": 52900 }, { "epoch": 0.3368629793941551, "grad_norm": 0.609375, "learning_rate": 7.538461538461539e-06, "loss": 1.6165, "step": 53000 }, { "epoch": 0.3374985699213139, "grad_norm": 0.7421875, "learning_rate": 7.533333333333334e-06, "loss": 1.6203, "step": 53100 }, { "epoch": 0.33813416044847266, "grad_norm": 0.72265625, "learning_rate": 7.528205128205129e-06, "loss": 1.6253, "step": 53200 }, { "epoch": 0.33876975097563145, "grad_norm": 0.53125, "learning_rate": 7.523076923076924e-06, "loss": 1.6359, "step": 53300 }, { "epoch": 0.3394053415027902, "grad_norm": 0.7265625, "learning_rate": 7.5179487179487185e-06, "loss": 1.6145, "step": 53400 }, { "epoch": 0.340040932029949, "grad_norm": 0.9296875, "learning_rate": 7.512820512820513e-06, "loss": 1.6269, "step": 53500 }, { "epoch": 0.3406765225571078, "grad_norm": 0.66015625, "learning_rate": 7.507692307692308e-06, "loss": 1.6113, "step": 53600 }, { "epoch": 0.34131211308426657, "grad_norm": 0.7578125, "learning_rate": 7.502564102564102e-06, "loss": 1.6376, "step": 53700 }, { "epoch": 0.3419477036114254, "grad_norm": 0.81640625, "learning_rate": 7.497435897435899e-06, "loss": 1.634, "step": 53800 }, { "epoch": 0.3425832941385842, "grad_norm": 0.6796875, "learning_rate": 7.492307692307693e-06, "loss": 1.6335, "step": 53900 }, { "epoch": 0.34321888466574296, "grad_norm": 0.62109375, "learning_rate": 7.487179487179488e-06, "loss": 1.6334, "step": 54000 }, { "epoch": 0.34385447519290174, "grad_norm": 0.734375, "learning_rate": 7.482051282051283e-06, "loss": 1.6347, "step": 54100 }, { "epoch": 0.3444900657200605, "grad_norm": 0.58984375, "learning_rate": 7.476923076923077e-06, "loss": 1.6233, "step": 54200 }, { "epoch": 0.3451256562472193, "grad_norm": 1.0703125, "learning_rate": 7.4717948717948726e-06, "loss": 1.6291, "step": 54300 }, { "epoch": 0.3457612467743781, "grad_norm": 0.796875, "learning_rate": 7.4666666666666675e-06, "loss": 1.6182, "step": 54400 }, { "epoch": 0.34639683730153686, "grad_norm": 1.4609375, "learning_rate": 7.461538461538462e-06, "loss": 1.6365, "step": 54500 }, { "epoch": 0.34703242782869564, "grad_norm": 1.0078125, "learning_rate": 7.456410256410257e-06, "loss": 1.623, "step": 54600 }, { "epoch": 0.3476680183558544, "grad_norm": 0.89453125, "learning_rate": 7.451282051282051e-06, "loss": 1.6104, "step": 54700 }, { "epoch": 0.3483036088830132, "grad_norm": 1.2890625, "learning_rate": 7.446153846153846e-06, "loss": 1.6203, "step": 54800 }, { "epoch": 0.348939199410172, "grad_norm": 0.63671875, "learning_rate": 7.441025641025642e-06, "loss": 1.6089, "step": 54900 }, { "epoch": 0.34957478993733077, "grad_norm": 0.78515625, "learning_rate": 7.435897435897437e-06, "loss": 1.6191, "step": 55000 }, { "epoch": 0.35021038046448955, "grad_norm": 0.65234375, "learning_rate": 7.430769230769232e-06, "loss": 1.626, "step": 55100 }, { "epoch": 0.3508459709916483, "grad_norm": 0.796875, "learning_rate": 7.425641025641026e-06, "loss": 1.6189, "step": 55200 }, { "epoch": 0.3514815615188071, "grad_norm": 0.5546875, "learning_rate": 7.420512820512821e-06, "loss": 1.6273, "step": 55300 }, { "epoch": 0.3521171520459659, "grad_norm": 0.94140625, "learning_rate": 7.4153846153846164e-06, "loss": 1.6278, "step": 55400 }, { "epoch": 0.35275274257312467, "grad_norm": 0.75390625, "learning_rate": 7.410256410256411e-06, "loss": 1.6254, "step": 55500 }, { "epoch": 0.35338833310028345, "grad_norm": 0.5078125, "learning_rate": 7.405128205128206e-06, "loss": 1.6146, "step": 55600 }, { "epoch": 0.3540239236274423, "grad_norm": 0.58984375, "learning_rate": 7.4e-06, "loss": 1.6295, "step": 55700 }, { "epoch": 0.35465951415460106, "grad_norm": 0.94921875, "learning_rate": 7.394871794871795e-06, "loss": 1.6237, "step": 55800 }, { "epoch": 0.35529510468175984, "grad_norm": 0.953125, "learning_rate": 7.38974358974359e-06, "loss": 1.619, "step": 55900 }, { "epoch": 0.3559306952089186, "grad_norm": 0.62109375, "learning_rate": 7.384615384615386e-06, "loss": 1.631, "step": 56000 }, { "epoch": 0.3565662857360774, "grad_norm": 0.75390625, "learning_rate": 7.37948717948718e-06, "loss": 1.6142, "step": 56100 }, { "epoch": 0.3572018762632362, "grad_norm": 0.7421875, "learning_rate": 7.374358974358975e-06, "loss": 1.619, "step": 56200 }, { "epoch": 0.35783746679039496, "grad_norm": 1.140625, "learning_rate": 7.36923076923077e-06, "loss": 1.6294, "step": 56300 }, { "epoch": 0.35847305731755374, "grad_norm": 0.93359375, "learning_rate": 7.364102564102565e-06, "loss": 1.6206, "step": 56400 }, { "epoch": 0.3591086478447125, "grad_norm": 0.59765625, "learning_rate": 7.35897435897436e-06, "loss": 1.646, "step": 56500 }, { "epoch": 0.3597442383718713, "grad_norm": 1.09375, "learning_rate": 7.353846153846154e-06, "loss": 1.6298, "step": 56600 }, { "epoch": 0.3603798288990301, "grad_norm": 0.9765625, "learning_rate": 7.348717948717949e-06, "loss": 1.6214, "step": 56700 }, { "epoch": 0.36101541942618887, "grad_norm": 0.50390625, "learning_rate": 7.343589743589744e-06, "loss": 1.6305, "step": 56800 }, { "epoch": 0.36165100995334765, "grad_norm": 0.59765625, "learning_rate": 7.338461538461539e-06, "loss": 1.6268, "step": 56900 }, { "epoch": 0.3622866004805064, "grad_norm": 0.6484375, "learning_rate": 7.333333333333333e-06, "loss": 1.6267, "step": 57000 }, { "epoch": 0.3629221910076652, "grad_norm": 0.953125, "learning_rate": 7.328205128205129e-06, "loss": 1.6278, "step": 57100 }, { "epoch": 0.363557781534824, "grad_norm": 0.76171875, "learning_rate": 7.323076923076924e-06, "loss": 1.612, "step": 57200 }, { "epoch": 0.36419337206198277, "grad_norm": 0.8984375, "learning_rate": 7.317948717948719e-06, "loss": 1.6283, "step": 57300 }, { "epoch": 0.36482896258914155, "grad_norm": 0.9765625, "learning_rate": 7.312820512820514e-06, "loss": 1.6327, "step": 57400 }, { "epoch": 0.36546455311630033, "grad_norm": 1.046875, "learning_rate": 7.307692307692308e-06, "loss": 1.6393, "step": 57500 }, { "epoch": 0.36610014364345916, "grad_norm": 0.98828125, "learning_rate": 7.302564102564103e-06, "loss": 1.6296, "step": 57600 }, { "epoch": 0.36673573417061794, "grad_norm": 0.98828125, "learning_rate": 7.297435897435898e-06, "loss": 1.6276, "step": 57700 }, { "epoch": 0.3673713246977767, "grad_norm": 0.6796875, "learning_rate": 7.292307692307693e-06, "loss": 1.6174, "step": 57800 }, { "epoch": 0.3680069152249355, "grad_norm": 1.1171875, "learning_rate": 7.287179487179487e-06, "loss": 1.6387, "step": 57900 }, { "epoch": 0.3686425057520943, "grad_norm": 0.69140625, "learning_rate": 7.282051282051282e-06, "loss": 1.6251, "step": 58000 }, { "epoch": 0.36927809627925307, "grad_norm": 0.458984375, "learning_rate": 7.276923076923077e-06, "loss": 1.6321, "step": 58100 }, { "epoch": 0.36991368680641185, "grad_norm": 0.68359375, "learning_rate": 7.271794871794873e-06, "loss": 1.6283, "step": 58200 }, { "epoch": 0.3705492773335706, "grad_norm": 0.62890625, "learning_rate": 7.266666666666668e-06, "loss": 1.6319, "step": 58300 }, { "epoch": 0.3711848678607294, "grad_norm": 0.79296875, "learning_rate": 7.261538461538462e-06, "loss": 1.6195, "step": 58400 }, { "epoch": 0.3718204583878882, "grad_norm": 0.62109375, "learning_rate": 7.256410256410257e-06, "loss": 1.6246, "step": 58500 }, { "epoch": 0.37245604891504697, "grad_norm": 0.80078125, "learning_rate": 7.2512820512820515e-06, "loss": 1.61, "step": 58600 }, { "epoch": 0.37309163944220575, "grad_norm": 0.76953125, "learning_rate": 7.246153846153847e-06, "loss": 1.6302, "step": 58700 }, { "epoch": 0.3737272299693645, "grad_norm": 0.6953125, "learning_rate": 7.241025641025642e-06, "loss": 1.6367, "step": 58800 }, { "epoch": 0.3743628204965233, "grad_norm": 0.78125, "learning_rate": 7.235897435897436e-06, "loss": 1.6333, "step": 58900 }, { "epoch": 0.3749984110236821, "grad_norm": 0.90234375, "learning_rate": 7.230769230769231e-06, "loss": 1.6266, "step": 59000 }, { "epoch": 0.37563400155084087, "grad_norm": 0.62890625, "learning_rate": 7.225641025641026e-06, "loss": 1.6174, "step": 59100 }, { "epoch": 0.37626959207799965, "grad_norm": 0.87890625, "learning_rate": 7.220512820512822e-06, "loss": 1.6243, "step": 59200 }, { "epoch": 0.37690518260515843, "grad_norm": 0.97265625, "learning_rate": 7.215384615384617e-06, "loss": 1.6376, "step": 59300 }, { "epoch": 0.3775407731323172, "grad_norm": 1.03125, "learning_rate": 7.210256410256411e-06, "loss": 1.6324, "step": 59400 }, { "epoch": 0.37817636365947604, "grad_norm": 0.80078125, "learning_rate": 7.205128205128206e-06, "loss": 1.6226, "step": 59500 }, { "epoch": 0.3788119541866348, "grad_norm": 0.87890625, "learning_rate": 7.2000000000000005e-06, "loss": 1.6299, "step": 59600 }, { "epoch": 0.3794475447137936, "grad_norm": 0.734375, "learning_rate": 7.1948717948717946e-06, "loss": 1.6221, "step": 59700 }, { "epoch": 0.3800831352409524, "grad_norm": 0.74609375, "learning_rate": 7.189743589743591e-06, "loss": 1.6129, "step": 59800 }, { "epoch": 0.38071872576811117, "grad_norm": 0.455078125, "learning_rate": 7.184615384615385e-06, "loss": 1.626, "step": 59900 }, { "epoch": 0.38135431629526995, "grad_norm": 0.52734375, "learning_rate": 7.17948717948718e-06, "loss": 1.6091, "step": 60000 }, { "epoch": 0.3819899068224287, "grad_norm": 0.466796875, "learning_rate": 7.174358974358975e-06, "loss": 1.619, "step": 60100 }, { "epoch": 0.3826254973495875, "grad_norm": 0.79296875, "learning_rate": 7.169230769230769e-06, "loss": 1.624, "step": 60200 }, { "epoch": 0.3832610878767463, "grad_norm": 0.4921875, "learning_rate": 7.164102564102565e-06, "loss": 1.6376, "step": 60300 }, { "epoch": 0.38389667840390507, "grad_norm": 0.6171875, "learning_rate": 7.15897435897436e-06, "loss": 1.6373, "step": 60400 }, { "epoch": 0.38453226893106385, "grad_norm": 0.58203125, "learning_rate": 7.153846153846155e-06, "loss": 1.6022, "step": 60500 }, { "epoch": 0.38516785945822263, "grad_norm": 0.64453125, "learning_rate": 7.1487179487179495e-06, "loss": 1.626, "step": 60600 }, { "epoch": 0.3858034499853814, "grad_norm": 0.68359375, "learning_rate": 7.1435897435897436e-06, "loss": 1.6359, "step": 60700 }, { "epoch": 0.3864390405125402, "grad_norm": 0.75390625, "learning_rate": 7.1384615384615385e-06, "loss": 1.6121, "step": 60800 }, { "epoch": 0.38707463103969897, "grad_norm": 0.55859375, "learning_rate": 7.133333333333334e-06, "loss": 1.6308, "step": 60900 }, { "epoch": 0.38771022156685775, "grad_norm": 0.7578125, "learning_rate": 7.128205128205129e-06, "loss": 1.6218, "step": 61000 }, { "epoch": 0.38834581209401653, "grad_norm": 0.921875, "learning_rate": 7.123076923076924e-06, "loss": 1.6201, "step": 61100 }, { "epoch": 0.3889814026211753, "grad_norm": 0.77734375, "learning_rate": 7.117948717948718e-06, "loss": 1.6238, "step": 61200 }, { "epoch": 0.3896169931483341, "grad_norm": 0.72265625, "learning_rate": 7.112820512820513e-06, "loss": 1.6237, "step": 61300 }, { "epoch": 0.3902525836754929, "grad_norm": 0.4609375, "learning_rate": 7.107692307692309e-06, "loss": 1.6169, "step": 61400 }, { "epoch": 0.3908881742026517, "grad_norm": 0.7265625, "learning_rate": 7.102564102564104e-06, "loss": 1.6405, "step": 61500 }, { "epoch": 0.3915237647298105, "grad_norm": 1.1953125, "learning_rate": 7.0974358974358985e-06, "loss": 1.6304, "step": 61600 }, { "epoch": 0.39215935525696927, "grad_norm": 1.0625, "learning_rate": 7.0923076923076926e-06, "loss": 1.6279, "step": 61700 }, { "epoch": 0.39279494578412805, "grad_norm": 0.84765625, "learning_rate": 7.0871794871794875e-06, "loss": 1.6094, "step": 61800 }, { "epoch": 0.3934305363112868, "grad_norm": 0.65234375, "learning_rate": 7.082051282051282e-06, "loss": 1.6176, "step": 61900 }, { "epoch": 0.3940661268384456, "grad_norm": 0.98046875, "learning_rate": 7.076923076923078e-06, "loss": 1.6207, "step": 62000 }, { "epoch": 0.3947017173656044, "grad_norm": 1.0546875, "learning_rate": 7.071794871794872e-06, "loss": 1.6209, "step": 62100 }, { "epoch": 0.39533730789276317, "grad_norm": 0.734375, "learning_rate": 7.066666666666667e-06, "loss": 1.6333, "step": 62200 }, { "epoch": 0.39597289841992195, "grad_norm": 1.2265625, "learning_rate": 7.061538461538462e-06, "loss": 1.6369, "step": 62300 }, { "epoch": 0.39660848894708073, "grad_norm": 0.75390625, "learning_rate": 7.056410256410257e-06, "loss": 1.6412, "step": 62400 }, { "epoch": 0.3972440794742395, "grad_norm": 0.77734375, "learning_rate": 7.051282051282053e-06, "loss": 1.6256, "step": 62500 }, { "epoch": 0.3978796700013983, "grad_norm": 0.66796875, "learning_rate": 7.046153846153847e-06, "loss": 1.6118, "step": 62600 }, { "epoch": 0.39851526052855707, "grad_norm": 0.84375, "learning_rate": 7.0410256410256415e-06, "loss": 1.6338, "step": 62700 }, { "epoch": 0.39915085105571585, "grad_norm": 1.5078125, "learning_rate": 7.0358974358974364e-06, "loss": 1.6091, "step": 62800 }, { "epoch": 0.39978644158287463, "grad_norm": 0.59375, "learning_rate": 7.030769230769231e-06, "loss": 1.6314, "step": 62900 }, { "epoch": 0.4004220321100334, "grad_norm": 0.82421875, "learning_rate": 7.025641025641025e-06, "loss": 1.6059, "step": 63000 }, { "epoch": 0.4010576226371922, "grad_norm": 0.859375, "learning_rate": 7.020512820512821e-06, "loss": 1.6246, "step": 63100 }, { "epoch": 0.401693213164351, "grad_norm": 1.1953125, "learning_rate": 7.015384615384616e-06, "loss": 1.6295, "step": 63200 }, { "epoch": 0.4023288036915098, "grad_norm": 0.70703125, "learning_rate": 7.010256410256411e-06, "loss": 1.6275, "step": 63300 }, { "epoch": 0.4029643942186686, "grad_norm": 0.98046875, "learning_rate": 7.005128205128206e-06, "loss": 1.6077, "step": 63400 }, { "epoch": 0.40359998474582737, "grad_norm": 0.64453125, "learning_rate": 7e-06, "loss": 1.6033, "step": 63500 }, { "epoch": 0.40423557527298615, "grad_norm": 1.0859375, "learning_rate": 6.994871794871796e-06, "loss": 1.6069, "step": 63600 }, { "epoch": 0.4048711658001449, "grad_norm": 0.73828125, "learning_rate": 6.9897435897435905e-06, "loss": 1.6142, "step": 63700 }, { "epoch": 0.4055067563273037, "grad_norm": 0.5546875, "learning_rate": 6.9846153846153854e-06, "loss": 1.6277, "step": 63800 }, { "epoch": 0.4061423468544625, "grad_norm": 0.6953125, "learning_rate": 6.9794871794871795e-06, "loss": 1.6277, "step": 63900 }, { "epoch": 0.40677793738162127, "grad_norm": 0.734375, "learning_rate": 6.974358974358974e-06, "loss": 1.6231, "step": 64000 }, { "epoch": 0.40741352790878005, "grad_norm": 0.7734375, "learning_rate": 6.96923076923077e-06, "loss": 1.6271, "step": 64100 }, { "epoch": 0.40804911843593883, "grad_norm": 0.69140625, "learning_rate": 6.964102564102565e-06, "loss": 1.619, "step": 64200 }, { "epoch": 0.4086847089630976, "grad_norm": 1.1015625, "learning_rate": 6.95897435897436e-06, "loss": 1.6364, "step": 64300 }, { "epoch": 0.4093202994902564, "grad_norm": 1.0078125, "learning_rate": 6.953846153846154e-06, "loss": 1.6403, "step": 64400 }, { "epoch": 0.40995589001741517, "grad_norm": 0.734375, "learning_rate": 6.948717948717949e-06, "loss": 1.6099, "step": 64500 }, { "epoch": 0.41059148054457395, "grad_norm": 0.91796875, "learning_rate": 6.943589743589744e-06, "loss": 1.6325, "step": 64600 }, { "epoch": 0.41122707107173273, "grad_norm": 0.96484375, "learning_rate": 6.9384615384615395e-06, "loss": 1.6174, "step": 64700 }, { "epoch": 0.4118626615988915, "grad_norm": 0.8828125, "learning_rate": 6.9333333333333344e-06, "loss": 1.6233, "step": 64800 }, { "epoch": 0.4124982521260503, "grad_norm": 0.94921875, "learning_rate": 6.9282051282051285e-06, "loss": 1.6225, "step": 64900 }, { "epoch": 0.41313384265320907, "grad_norm": 1.0234375, "learning_rate": 6.923076923076923e-06, "loss": 1.6309, "step": 65000 }, { "epoch": 0.4137694331803679, "grad_norm": 0.59375, "learning_rate": 6.917948717948718e-06, "loss": 1.6209, "step": 65100 }, { "epoch": 0.4144050237075267, "grad_norm": 0.70703125, "learning_rate": 6.912820512820514e-06, "loss": 1.6248, "step": 65200 }, { "epoch": 0.41504061423468547, "grad_norm": 0.76953125, "learning_rate": 6.907692307692309e-06, "loss": 1.624, "step": 65300 }, { "epoch": 0.41567620476184425, "grad_norm": 1.125, "learning_rate": 6.902564102564103e-06, "loss": 1.6319, "step": 65400 }, { "epoch": 0.41631179528900303, "grad_norm": 0.8046875, "learning_rate": 6.897435897435898e-06, "loss": 1.6336, "step": 65500 }, { "epoch": 0.4169473858161618, "grad_norm": 0.67578125, "learning_rate": 6.892307692307693e-06, "loss": 1.5998, "step": 65600 }, { "epoch": 0.4175829763433206, "grad_norm": 0.5703125, "learning_rate": 6.887179487179488e-06, "loss": 1.6258, "step": 65700 }, { "epoch": 0.41821856687047937, "grad_norm": 0.8515625, "learning_rate": 6.882051282051283e-06, "loss": 1.62, "step": 65800 }, { "epoch": 0.41885415739763815, "grad_norm": 1.1484375, "learning_rate": 6.8769230769230775e-06, "loss": 1.6249, "step": 65900 }, { "epoch": 0.41948974792479693, "grad_norm": 0.83203125, "learning_rate": 6.871794871794872e-06, "loss": 1.6178, "step": 66000 }, { "epoch": 0.4201253384519557, "grad_norm": 0.8828125, "learning_rate": 6.866666666666667e-06, "loss": 1.623, "step": 66100 }, { "epoch": 0.4207609289791145, "grad_norm": 0.7421875, "learning_rate": 6.861538461538461e-06, "loss": 1.6172, "step": 66200 }, { "epoch": 0.42139651950627327, "grad_norm": 0.8671875, "learning_rate": 6.856410256410257e-06, "loss": 1.6335, "step": 66300 }, { "epoch": 0.42203211003343205, "grad_norm": 1.03125, "learning_rate": 6.851282051282052e-06, "loss": 1.6238, "step": 66400 }, { "epoch": 0.42266770056059083, "grad_norm": 0.7890625, "learning_rate": 6.846153846153847e-06, "loss": 1.6213, "step": 66500 }, { "epoch": 0.4233032910877496, "grad_norm": 0.9453125, "learning_rate": 6.841025641025642e-06, "loss": 1.608, "step": 66600 }, { "epoch": 0.4239388816149084, "grad_norm": 0.7109375, "learning_rate": 6.835897435897436e-06, "loss": 1.6241, "step": 66700 }, { "epoch": 0.42457447214206717, "grad_norm": 0.7265625, "learning_rate": 6.830769230769231e-06, "loss": 1.6229, "step": 66800 }, { "epoch": 0.42521006266922595, "grad_norm": 0.7890625, "learning_rate": 6.8256410256410265e-06, "loss": 1.5989, "step": 66900 }, { "epoch": 0.4258456531963848, "grad_norm": 0.82421875, "learning_rate": 6.820512820512821e-06, "loss": 1.616, "step": 67000 }, { "epoch": 0.42648124372354357, "grad_norm": 2.59375, "learning_rate": 6.815384615384616e-06, "loss": 1.6209, "step": 67100 }, { "epoch": 0.42711683425070235, "grad_norm": 0.84375, "learning_rate": 6.81025641025641e-06, "loss": 1.6152, "step": 67200 }, { "epoch": 0.42775242477786113, "grad_norm": 0.89453125, "learning_rate": 6.805128205128205e-06, "loss": 1.6277, "step": 67300 }, { "epoch": 0.4283880153050199, "grad_norm": 0.8828125, "learning_rate": 6.800000000000001e-06, "loss": 1.6131, "step": 67400 }, { "epoch": 0.4290236058321787, "grad_norm": 0.625, "learning_rate": 6.794871794871796e-06, "loss": 1.6216, "step": 67500 }, { "epoch": 0.42965919635933747, "grad_norm": 0.76171875, "learning_rate": 6.789743589743591e-06, "loss": 1.641, "step": 67600 }, { "epoch": 0.43029478688649625, "grad_norm": 0.89453125, "learning_rate": 6.784615384615385e-06, "loss": 1.6123, "step": 67700 }, { "epoch": 0.43093037741365503, "grad_norm": 0.97265625, "learning_rate": 6.77948717948718e-06, "loss": 1.6135, "step": 67800 }, { "epoch": 0.4315659679408138, "grad_norm": 0.6953125, "learning_rate": 6.774358974358975e-06, "loss": 1.6264, "step": 67900 }, { "epoch": 0.4322015584679726, "grad_norm": 0.83203125, "learning_rate": 6.76923076923077e-06, "loss": 1.625, "step": 68000 }, { "epoch": 0.43283714899513137, "grad_norm": 0.7734375, "learning_rate": 6.764102564102564e-06, "loss": 1.6164, "step": 68100 }, { "epoch": 0.43347273952229015, "grad_norm": 0.890625, "learning_rate": 6.758974358974359e-06, "loss": 1.6189, "step": 68200 }, { "epoch": 0.43410833004944893, "grad_norm": 0.84765625, "learning_rate": 6.753846153846154e-06, "loss": 1.6089, "step": 68300 }, { "epoch": 0.4347439205766077, "grad_norm": 1.0546875, "learning_rate": 6.748717948717949e-06, "loss": 1.624, "step": 68400 }, { "epoch": 0.4353795111037665, "grad_norm": 0.57421875, "learning_rate": 6.743589743589745e-06, "loss": 1.6255, "step": 68500 }, { "epoch": 0.4360151016309253, "grad_norm": 0.55859375, "learning_rate": 6.738461538461539e-06, "loss": 1.6231, "step": 68600 }, { "epoch": 0.43665069215808405, "grad_norm": 0.80859375, "learning_rate": 6.733333333333334e-06, "loss": 1.6272, "step": 68700 }, { "epoch": 0.43728628268524283, "grad_norm": 0.67578125, "learning_rate": 6.728205128205129e-06, "loss": 1.615, "step": 68800 }, { "epoch": 0.43792187321240167, "grad_norm": 1.171875, "learning_rate": 6.723076923076924e-06, "loss": 1.602, "step": 68900 }, { "epoch": 0.43855746373956045, "grad_norm": 0.87109375, "learning_rate": 6.717948717948718e-06, "loss": 1.6188, "step": 69000 }, { "epoch": 0.43919305426671923, "grad_norm": 0.54296875, "learning_rate": 6.712820512820513e-06, "loss": 1.6165, "step": 69100 }, { "epoch": 0.439828644793878, "grad_norm": 0.62890625, "learning_rate": 6.707692307692308e-06, "loss": 1.6149, "step": 69200 }, { "epoch": 0.4404642353210368, "grad_norm": 0.7109375, "learning_rate": 6.702564102564103e-06, "loss": 1.6197, "step": 69300 }, { "epoch": 0.44109982584819557, "grad_norm": 0.73046875, "learning_rate": 6.697435897435898e-06, "loss": 1.6023, "step": 69400 }, { "epoch": 0.44173541637535435, "grad_norm": 0.92578125, "learning_rate": 6.692307692307692e-06, "loss": 1.6237, "step": 69500 }, { "epoch": 0.44237100690251313, "grad_norm": 0.96875, "learning_rate": 6.687179487179488e-06, "loss": 1.6323, "step": 69600 }, { "epoch": 0.4430065974296719, "grad_norm": 1.03125, "learning_rate": 6.682051282051283e-06, "loss": 1.6327, "step": 69700 }, { "epoch": 0.4436421879568307, "grad_norm": 0.78515625, "learning_rate": 6.676923076923078e-06, "loss": 1.6022, "step": 69800 }, { "epoch": 0.44427777848398947, "grad_norm": 1.0078125, "learning_rate": 6.671794871794873e-06, "loss": 1.612, "step": 69900 }, { "epoch": 0.44491336901114825, "grad_norm": 0.5859375, "learning_rate": 6.666666666666667e-06, "loss": 1.6142, "step": 70000 }, { "epoch": 0.44554895953830703, "grad_norm": 0.671875, "learning_rate": 6.661538461538462e-06, "loss": 1.6161, "step": 70100 }, { "epoch": 0.4461845500654658, "grad_norm": 1.1875, "learning_rate": 6.656410256410257e-06, "loss": 1.6224, "step": 70200 }, { "epoch": 0.4468201405926246, "grad_norm": 0.83203125, "learning_rate": 6.651282051282052e-06, "loss": 1.6251, "step": 70300 }, { "epoch": 0.4474557311197834, "grad_norm": 0.85546875, "learning_rate": 6.646153846153846e-06, "loss": 1.6293, "step": 70400 }, { "epoch": 0.44809132164694215, "grad_norm": 0.671875, "learning_rate": 6.641025641025641e-06, "loss": 1.6059, "step": 70500 }, { "epoch": 0.44872691217410093, "grad_norm": 0.671875, "learning_rate": 6.635897435897436e-06, "loss": 1.6126, "step": 70600 }, { "epoch": 0.4493625027012597, "grad_norm": 0.953125, "learning_rate": 6.630769230769232e-06, "loss": 1.6285, "step": 70700 }, { "epoch": 0.44999809322841855, "grad_norm": 0.8515625, "learning_rate": 6.625641025641027e-06, "loss": 1.6192, "step": 70800 }, { "epoch": 0.45063368375557733, "grad_norm": 0.98046875, "learning_rate": 6.620512820512821e-06, "loss": 1.6384, "step": 70900 }, { "epoch": 0.4512692742827361, "grad_norm": 0.91015625, "learning_rate": 6.615384615384616e-06, "loss": 1.6262, "step": 71000 }, { "epoch": 0.4519048648098949, "grad_norm": 0.79296875, "learning_rate": 6.6102564102564105e-06, "loss": 1.6247, "step": 71100 }, { "epoch": 0.45254045533705367, "grad_norm": 0.6484375, "learning_rate": 6.605128205128206e-06, "loss": 1.6194, "step": 71200 }, { "epoch": 0.45317604586421245, "grad_norm": 0.79296875, "learning_rate": 6.600000000000001e-06, "loss": 1.6058, "step": 71300 }, { "epoch": 0.45381163639137123, "grad_norm": 0.78515625, "learning_rate": 6.594871794871795e-06, "loss": 1.6114, "step": 71400 }, { "epoch": 0.45444722691853, "grad_norm": 0.984375, "learning_rate": 6.58974358974359e-06, "loss": 1.6256, "step": 71500 }, { "epoch": 0.4550828174456888, "grad_norm": 0.78125, "learning_rate": 6.584615384615385e-06, "loss": 1.6191, "step": 71600 }, { "epoch": 0.4557184079728476, "grad_norm": 0.7109375, "learning_rate": 6.57948717948718e-06, "loss": 1.6058, "step": 71700 }, { "epoch": 0.45635399850000635, "grad_norm": 0.89453125, "learning_rate": 6.574358974358976e-06, "loss": 1.6137, "step": 71800 }, { "epoch": 0.45698958902716513, "grad_norm": 1.0078125, "learning_rate": 6.56923076923077e-06, "loss": 1.6105, "step": 71900 }, { "epoch": 0.4576251795543239, "grad_norm": 1.09375, "learning_rate": 6.564102564102565e-06, "loss": 1.6335, "step": 72000 }, { "epoch": 0.4582607700814827, "grad_norm": 0.8203125, "learning_rate": 6.5589743589743595e-06, "loss": 1.6285, "step": 72100 }, { "epoch": 0.4588963606086415, "grad_norm": 0.671875, "learning_rate": 6.553846153846154e-06, "loss": 1.6156, "step": 72200 }, { "epoch": 0.45953195113580025, "grad_norm": 0.87109375, "learning_rate": 6.548717948717949e-06, "loss": 1.605, "step": 72300 }, { "epoch": 0.46016754166295903, "grad_norm": 1.0546875, "learning_rate": 6.543589743589744e-06, "loss": 1.6195, "step": 72400 }, { "epoch": 0.4608031321901178, "grad_norm": 0.61328125, "learning_rate": 6.538461538461539e-06, "loss": 1.6225, "step": 72500 }, { "epoch": 0.4614387227172766, "grad_norm": 1.1640625, "learning_rate": 6.533333333333334e-06, "loss": 1.6157, "step": 72600 }, { "epoch": 0.46207431324443543, "grad_norm": 0.90625, "learning_rate": 6.528205128205128e-06, "loss": 1.6234, "step": 72700 }, { "epoch": 0.4627099037715942, "grad_norm": 0.91796875, "learning_rate": 6.523076923076923e-06, "loss": 1.6382, "step": 72800 }, { "epoch": 0.463345494298753, "grad_norm": 0.78515625, "learning_rate": 6.517948717948719e-06, "loss": 1.6067, "step": 72900 }, { "epoch": 0.46398108482591177, "grad_norm": 0.84765625, "learning_rate": 6.512820512820514e-06, "loss": 1.6205, "step": 73000 }, { "epoch": 0.46461667535307055, "grad_norm": 0.96875, "learning_rate": 6.5076923076923085e-06, "loss": 1.6172, "step": 73100 }, { "epoch": 0.46525226588022933, "grad_norm": 0.71484375, "learning_rate": 6.5025641025641026e-06, "loss": 1.6187, "step": 73200 }, { "epoch": 0.4658878564073881, "grad_norm": 0.859375, "learning_rate": 6.4974358974358975e-06, "loss": 1.6047, "step": 73300 }, { "epoch": 0.4665234469345469, "grad_norm": 0.80078125, "learning_rate": 6.492307692307693e-06, "loss": 1.6084, "step": 73400 }, { "epoch": 0.4671590374617057, "grad_norm": 0.92578125, "learning_rate": 6.487179487179488e-06, "loss": 1.6262, "step": 73500 }, { "epoch": 0.46779462798886445, "grad_norm": 0.9453125, "learning_rate": 6.482051282051283e-06, "loss": 1.6148, "step": 73600 }, { "epoch": 0.46843021851602323, "grad_norm": 0.9609375, "learning_rate": 6.476923076923077e-06, "loss": 1.6225, "step": 73700 }, { "epoch": 0.469065809043182, "grad_norm": 1.0625, "learning_rate": 6.471794871794872e-06, "loss": 1.6238, "step": 73800 }, { "epoch": 0.4697013995703408, "grad_norm": 0.921875, "learning_rate": 6.466666666666667e-06, "loss": 1.6082, "step": 73900 }, { "epoch": 0.4703369900974996, "grad_norm": 0.6484375, "learning_rate": 6.461538461538463e-06, "loss": 1.6232, "step": 74000 }, { "epoch": 0.47097258062465835, "grad_norm": 0.59765625, "learning_rate": 6.4564102564102575e-06, "loss": 1.6253, "step": 74100 }, { "epoch": 0.47160817115181713, "grad_norm": 0.86328125, "learning_rate": 6.4512820512820516e-06, "loss": 1.618, "step": 74200 }, { "epoch": 0.4722437616789759, "grad_norm": 0.58984375, "learning_rate": 6.4461538461538465e-06, "loss": 1.6153, "step": 74300 }, { "epoch": 0.4728793522061347, "grad_norm": 0.74609375, "learning_rate": 6.441025641025641e-06, "loss": 1.6182, "step": 74400 }, { "epoch": 0.4735149427332935, "grad_norm": 0.80078125, "learning_rate": 6.435897435897437e-06, "loss": 1.6308, "step": 74500 }, { "epoch": 0.4741505332604523, "grad_norm": 0.859375, "learning_rate": 6.430769230769231e-06, "loss": 1.6259, "step": 74600 }, { "epoch": 0.4747861237876111, "grad_norm": 1.1171875, "learning_rate": 6.425641025641026e-06, "loss": 1.6183, "step": 74700 }, { "epoch": 0.47542171431476987, "grad_norm": 0.890625, "learning_rate": 6.420512820512821e-06, "loss": 1.6124, "step": 74800 }, { "epoch": 0.47605730484192865, "grad_norm": 0.62109375, "learning_rate": 6.415384615384616e-06, "loss": 1.6082, "step": 74900 }, { "epoch": 0.47669289536908743, "grad_norm": 0.95703125, "learning_rate": 6.410256410256412e-06, "loss": 1.6288, "step": 75000 }, { "epoch": 0.4773284858962462, "grad_norm": 0.68359375, "learning_rate": 6.405128205128206e-06, "loss": 1.6089, "step": 75100 }, { "epoch": 0.477964076423405, "grad_norm": 0.67578125, "learning_rate": 6.4000000000000006e-06, "loss": 1.6039, "step": 75200 }, { "epoch": 0.4785996669505638, "grad_norm": 0.81640625, "learning_rate": 6.3948717948717955e-06, "loss": 1.6228, "step": 75300 }, { "epoch": 0.47923525747772255, "grad_norm": 0.62890625, "learning_rate": 6.38974358974359e-06, "loss": 1.6295, "step": 75400 }, { "epoch": 0.47987084800488133, "grad_norm": 0.7734375, "learning_rate": 6.384615384615384e-06, "loss": 1.6375, "step": 75500 }, { "epoch": 0.4805064385320401, "grad_norm": 0.8515625, "learning_rate": 6.37948717948718e-06, "loss": 1.621, "step": 75600 }, { "epoch": 0.4811420290591989, "grad_norm": 0.76171875, "learning_rate": 6.374358974358975e-06, "loss": 1.6176, "step": 75700 }, { "epoch": 0.4817776195863577, "grad_norm": 0.55078125, "learning_rate": 6.36923076923077e-06, "loss": 1.6037, "step": 75800 }, { "epoch": 0.48241321011351646, "grad_norm": 0.66796875, "learning_rate": 6.364102564102565e-06, "loss": 1.6202, "step": 75900 }, { "epoch": 0.48304880064067524, "grad_norm": 0.84375, "learning_rate": 6.358974358974359e-06, "loss": 1.6072, "step": 76000 }, { "epoch": 0.483684391167834, "grad_norm": 0.89453125, "learning_rate": 6.353846153846155e-06, "loss": 1.6253, "step": 76100 }, { "epoch": 0.4843199816949928, "grad_norm": 0.70703125, "learning_rate": 6.3487179487179495e-06, "loss": 1.6154, "step": 76200 }, { "epoch": 0.4849555722221516, "grad_norm": 0.79296875, "learning_rate": 6.3435897435897444e-06, "loss": 1.6045, "step": 76300 }, { "epoch": 0.48559116274931036, "grad_norm": 0.86328125, "learning_rate": 6.3384615384615385e-06, "loss": 1.63, "step": 76400 }, { "epoch": 0.4862267532764692, "grad_norm": 1.140625, "learning_rate": 6.333333333333333e-06, "loss": 1.615, "step": 76500 }, { "epoch": 0.486862343803628, "grad_norm": 1.03125, "learning_rate": 6.328205128205128e-06, "loss": 1.6177, "step": 76600 }, { "epoch": 0.48749793433078675, "grad_norm": 0.9140625, "learning_rate": 6.323076923076924e-06, "loss": 1.6147, "step": 76700 }, { "epoch": 0.48813352485794553, "grad_norm": 0.5390625, "learning_rate": 6.317948717948719e-06, "loss": 1.6187, "step": 76800 }, { "epoch": 0.4887691153851043, "grad_norm": 0.71484375, "learning_rate": 6.312820512820513e-06, "loss": 1.6278, "step": 76900 }, { "epoch": 0.4894047059122631, "grad_norm": 0.92578125, "learning_rate": 6.307692307692308e-06, "loss": 1.613, "step": 77000 }, { "epoch": 0.4900402964394219, "grad_norm": 0.76171875, "learning_rate": 6.302564102564103e-06, "loss": 1.6121, "step": 77100 }, { "epoch": 0.49067588696658065, "grad_norm": 0.97265625, "learning_rate": 6.2974358974358985e-06, "loss": 1.6129, "step": 77200 }, { "epoch": 0.49131147749373943, "grad_norm": 0.70703125, "learning_rate": 6.2923076923076934e-06, "loss": 1.6027, "step": 77300 }, { "epoch": 0.4919470680208982, "grad_norm": 0.734375, "learning_rate": 6.2871794871794875e-06, "loss": 1.6215, "step": 77400 }, { "epoch": 0.492582658548057, "grad_norm": 1.09375, "learning_rate": 6.282051282051282e-06, "loss": 1.6139, "step": 77500 }, { "epoch": 0.4932182490752158, "grad_norm": 0.71875, "learning_rate": 6.276923076923077e-06, "loss": 1.6079, "step": 77600 }, { "epoch": 0.49385383960237456, "grad_norm": 0.73828125, "learning_rate": 6.271794871794872e-06, "loss": 1.6265, "step": 77700 }, { "epoch": 0.49448943012953334, "grad_norm": 0.8046875, "learning_rate": 6.266666666666668e-06, "loss": 1.6138, "step": 77800 }, { "epoch": 0.4951250206566921, "grad_norm": 0.69140625, "learning_rate": 6.261538461538462e-06, "loss": 1.6229, "step": 77900 }, { "epoch": 0.4957606111838509, "grad_norm": 0.96875, "learning_rate": 6.256410256410257e-06, "loss": 1.6074, "step": 78000 }, { "epoch": 0.4963962017110097, "grad_norm": 0.8359375, "learning_rate": 6.251282051282052e-06, "loss": 1.61, "step": 78100 }, { "epoch": 0.49703179223816846, "grad_norm": 0.9375, "learning_rate": 6.246153846153846e-06, "loss": 1.6369, "step": 78200 }, { "epoch": 0.4976673827653273, "grad_norm": 1.0390625, "learning_rate": 6.2410256410256424e-06, "loss": 1.6319, "step": 78300 }, { "epoch": 0.4983029732924861, "grad_norm": 0.72265625, "learning_rate": 6.2358974358974365e-06, "loss": 1.6078, "step": 78400 }, { "epoch": 0.49893856381964485, "grad_norm": 1.1171875, "learning_rate": 6.230769230769231e-06, "loss": 1.6118, "step": 78500 }, { "epoch": 0.49957415434680363, "grad_norm": 0.80859375, "learning_rate": 6.225641025641026e-06, "loss": 1.6093, "step": 78600 }, { "epoch": 0.5002097448739624, "grad_norm": 0.71875, "learning_rate": 6.22051282051282e-06, "loss": 1.6282, "step": 78700 }, { "epoch": 0.5008453354011212, "grad_norm": 0.921875, "learning_rate": 6.215384615384615e-06, "loss": 1.6049, "step": 78800 }, { "epoch": 0.50148092592828, "grad_norm": 0.80078125, "learning_rate": 6.210256410256411e-06, "loss": 1.6071, "step": 78900 }, { "epoch": 0.5021165164554388, "grad_norm": 0.98046875, "learning_rate": 6.205128205128206e-06, "loss": 1.6211, "step": 79000 }, { "epoch": 0.5027521069825975, "grad_norm": 0.84375, "learning_rate": 6.200000000000001e-06, "loss": 1.6098, "step": 79100 }, { "epoch": 0.5033876975097563, "grad_norm": 0.765625, "learning_rate": 6.194871794871795e-06, "loss": 1.6244, "step": 79200 }, { "epoch": 0.5040232880369151, "grad_norm": 0.5390625, "learning_rate": 6.18974358974359e-06, "loss": 1.6257, "step": 79300 }, { "epoch": 0.5046588785640739, "grad_norm": 0.8828125, "learning_rate": 6.1846153846153855e-06, "loss": 1.6124, "step": 79400 }, { "epoch": 0.5052944690912327, "grad_norm": 0.83203125, "learning_rate": 6.17948717948718e-06, "loss": 1.6267, "step": 79500 }, { "epoch": 0.5059300596183914, "grad_norm": 1.2734375, "learning_rate": 6.174358974358975e-06, "loss": 1.6178, "step": 79600 }, { "epoch": 0.5065656501455502, "grad_norm": 0.8125, "learning_rate": 6.169230769230769e-06, "loss": 1.6265, "step": 79700 }, { "epoch": 0.507201240672709, "grad_norm": 1.15625, "learning_rate": 6.164102564102564e-06, "loss": 1.6248, "step": 79800 }, { "epoch": 0.5078368311998678, "grad_norm": 0.8046875, "learning_rate": 6.15897435897436e-06, "loss": 1.6117, "step": 79900 }, { "epoch": 0.5084724217270266, "grad_norm": 0.7265625, "learning_rate": 6.153846153846155e-06, "loss": 1.6164, "step": 80000 }, { "epoch": 0.5091080122541853, "grad_norm": 0.7734375, "learning_rate": 6.14871794871795e-06, "loss": 1.6257, "step": 80100 }, { "epoch": 0.5097436027813441, "grad_norm": 0.57421875, "learning_rate": 6.143589743589744e-06, "loss": 1.6203, "step": 80200 }, { "epoch": 0.5103791933085029, "grad_norm": 1.15625, "learning_rate": 6.138461538461539e-06, "loss": 1.6308, "step": 80300 }, { "epoch": 0.5110147838356617, "grad_norm": 1.0234375, "learning_rate": 6.133333333333334e-06, "loss": 1.6082, "step": 80400 }, { "epoch": 0.5116503743628205, "grad_norm": 1.078125, "learning_rate": 6.128205128205129e-06, "loss": 1.6221, "step": 80500 }, { "epoch": 0.5122859648899792, "grad_norm": 0.7421875, "learning_rate": 6.123076923076923e-06, "loss": 1.6169, "step": 80600 }, { "epoch": 0.512921555417138, "grad_norm": 0.81640625, "learning_rate": 6.117948717948718e-06, "loss": 1.5958, "step": 80700 }, { "epoch": 0.5135571459442968, "grad_norm": 0.8828125, "learning_rate": 6.112820512820513e-06, "loss": 1.6164, "step": 80800 }, { "epoch": 0.5141927364714556, "grad_norm": 0.77734375, "learning_rate": 6.107692307692308e-06, "loss": 1.6204, "step": 80900 }, { "epoch": 0.5148283269986144, "grad_norm": 0.72265625, "learning_rate": 6.102564102564104e-06, "loss": 1.6124, "step": 81000 }, { "epoch": 0.5154639175257731, "grad_norm": 0.828125, "learning_rate": 6.097435897435898e-06, "loss": 1.6359, "step": 81100 }, { "epoch": 0.516099508052932, "grad_norm": 1.0703125, "learning_rate": 6.092307692307693e-06, "loss": 1.625, "step": 81200 }, { "epoch": 0.5167350985800908, "grad_norm": 0.8046875, "learning_rate": 6.087179487179488e-06, "loss": 1.6147, "step": 81300 }, { "epoch": 0.5173706891072496, "grad_norm": 0.66015625, "learning_rate": 6.082051282051283e-06, "loss": 1.6221, "step": 81400 }, { "epoch": 0.5180062796344084, "grad_norm": 1.0234375, "learning_rate": 6.076923076923077e-06, "loss": 1.6185, "step": 81500 }, { "epoch": 0.5186418701615672, "grad_norm": 1.296875, "learning_rate": 6.071794871794872e-06, "loss": 1.6126, "step": 81600 }, { "epoch": 0.5192774606887259, "grad_norm": 0.56640625, "learning_rate": 6.066666666666667e-06, "loss": 1.6242, "step": 81700 }, { "epoch": 0.5199130512158847, "grad_norm": 0.609375, "learning_rate": 6.061538461538462e-06, "loss": 1.6178, "step": 81800 }, { "epoch": 0.5205486417430435, "grad_norm": 0.875, "learning_rate": 6.056410256410257e-06, "loss": 1.6235, "step": 81900 }, { "epoch": 0.5211842322702023, "grad_norm": 0.71484375, "learning_rate": 6.051282051282051e-06, "loss": 1.6243, "step": 82000 }, { "epoch": 0.521819822797361, "grad_norm": 0.89453125, "learning_rate": 6.046153846153847e-06, "loss": 1.637, "step": 82100 }, { "epoch": 0.5224554133245198, "grad_norm": 0.83203125, "learning_rate": 6.041025641025642e-06, "loss": 1.5933, "step": 82200 }, { "epoch": 0.5230910038516786, "grad_norm": 0.95703125, "learning_rate": 6.035897435897437e-06, "loss": 1.6261, "step": 82300 }, { "epoch": 0.5237265943788374, "grad_norm": 0.79296875, "learning_rate": 6.030769230769231e-06, "loss": 1.629, "step": 82400 }, { "epoch": 0.5243621849059962, "grad_norm": 0.64453125, "learning_rate": 6.025641025641026e-06, "loss": 1.616, "step": 82500 }, { "epoch": 0.524997775433155, "grad_norm": 0.66015625, "learning_rate": 6.0205128205128206e-06, "loss": 1.6094, "step": 82600 }, { "epoch": 0.5256333659603137, "grad_norm": 0.609375, "learning_rate": 6.015384615384616e-06, "loss": 1.6166, "step": 82700 }, { "epoch": 0.5262689564874725, "grad_norm": 0.8828125, "learning_rate": 6.010256410256411e-06, "loss": 1.6264, "step": 82800 }, { "epoch": 0.5269045470146313, "grad_norm": 0.921875, "learning_rate": 6.005128205128205e-06, "loss": 1.6136, "step": 82900 }, { "epoch": 0.5275401375417901, "grad_norm": 0.8125, "learning_rate": 6e-06, "loss": 1.6208, "step": 83000 }, { "epoch": 0.5281757280689489, "grad_norm": 0.6953125, "learning_rate": 5.994871794871795e-06, "loss": 1.6156, "step": 83100 }, { "epoch": 0.5288113185961076, "grad_norm": 1.0, "learning_rate": 5.989743589743591e-06, "loss": 1.6168, "step": 83200 }, { "epoch": 0.5294469091232664, "grad_norm": 0.60546875, "learning_rate": 5.984615384615386e-06, "loss": 1.6293, "step": 83300 }, { "epoch": 0.5300824996504252, "grad_norm": 0.890625, "learning_rate": 5.97948717948718e-06, "loss": 1.6071, "step": 83400 }, { "epoch": 0.530718090177584, "grad_norm": 0.6875, "learning_rate": 5.974358974358975e-06, "loss": 1.6244, "step": 83500 }, { "epoch": 0.5313536807047428, "grad_norm": 0.58984375, "learning_rate": 5.9692307692307695e-06, "loss": 1.6089, "step": 83600 }, { "epoch": 0.5319892712319015, "grad_norm": 1.0, "learning_rate": 5.9641025641025644e-06, "loss": 1.6285, "step": 83700 }, { "epoch": 0.5326248617590603, "grad_norm": 0.84375, "learning_rate": 5.95897435897436e-06, "loss": 1.6131, "step": 83800 }, { "epoch": 0.5332604522862191, "grad_norm": 0.91015625, "learning_rate": 5.953846153846154e-06, "loss": 1.6274, "step": 83900 }, { "epoch": 0.5338960428133779, "grad_norm": 0.75, "learning_rate": 5.948717948717949e-06, "loss": 1.6138, "step": 84000 }, { "epoch": 0.5345316333405367, "grad_norm": 0.85546875, "learning_rate": 5.943589743589744e-06, "loss": 1.6348, "step": 84100 }, { "epoch": 0.5351672238676954, "grad_norm": 0.94140625, "learning_rate": 5.938461538461538e-06, "loss": 1.6115, "step": 84200 }, { "epoch": 0.5358028143948542, "grad_norm": 0.84765625, "learning_rate": 5.933333333333335e-06, "loss": 1.6196, "step": 84300 }, { "epoch": 0.536438404922013, "grad_norm": 0.54296875, "learning_rate": 5.928205128205129e-06, "loss": 1.6257, "step": 84400 }, { "epoch": 0.5370739954491718, "grad_norm": 1.046875, "learning_rate": 5.923076923076924e-06, "loss": 1.6163, "step": 84500 }, { "epoch": 0.5377095859763306, "grad_norm": 0.82421875, "learning_rate": 5.9179487179487185e-06, "loss": 1.6273, "step": 84600 }, { "epoch": 0.5383451765034893, "grad_norm": 0.578125, "learning_rate": 5.912820512820513e-06, "loss": 1.634, "step": 84700 }, { "epoch": 0.5389807670306481, "grad_norm": 0.5234375, "learning_rate": 5.907692307692308e-06, "loss": 1.6107, "step": 84800 }, { "epoch": 0.539616357557807, "grad_norm": 0.828125, "learning_rate": 5.902564102564103e-06, "loss": 1.6259, "step": 84900 }, { "epoch": 0.5402519480849658, "grad_norm": 0.9296875, "learning_rate": 5.897435897435898e-06, "loss": 1.6044, "step": 85000 }, { "epoch": 0.5408875386121246, "grad_norm": 0.83203125, "learning_rate": 5.892307692307693e-06, "loss": 1.6064, "step": 85100 }, { "epoch": 0.5415231291392834, "grad_norm": 0.79296875, "learning_rate": 5.887179487179487e-06, "loss": 1.6197, "step": 85200 }, { "epoch": 0.5421587196664421, "grad_norm": 0.84375, "learning_rate": 5.882051282051282e-06, "loss": 1.619, "step": 85300 }, { "epoch": 0.5427943101936009, "grad_norm": 0.98046875, "learning_rate": 5.876923076923078e-06, "loss": 1.5951, "step": 85400 }, { "epoch": 0.5434299007207597, "grad_norm": 0.8828125, "learning_rate": 5.871794871794873e-06, "loss": 1.6088, "step": 85500 }, { "epoch": 0.5440654912479185, "grad_norm": 1.0078125, "learning_rate": 5.8666666666666675e-06, "loss": 1.6291, "step": 85600 }, { "epoch": 0.5447010817750773, "grad_norm": 1.203125, "learning_rate": 5.861538461538462e-06, "loss": 1.6296, "step": 85700 }, { "epoch": 0.545336672302236, "grad_norm": 0.455078125, "learning_rate": 5.8564102564102565e-06, "loss": 1.6253, "step": 85800 }, { "epoch": 0.5459722628293948, "grad_norm": 0.68359375, "learning_rate": 5.851282051282052e-06, "loss": 1.6294, "step": 85900 }, { "epoch": 0.5466078533565536, "grad_norm": 0.66015625, "learning_rate": 5.846153846153847e-06, "loss": 1.6221, "step": 86000 }, { "epoch": 0.5472434438837124, "grad_norm": 0.79296875, "learning_rate": 5.841025641025642e-06, "loss": 1.6114, "step": 86100 }, { "epoch": 0.5478790344108712, "grad_norm": 0.92578125, "learning_rate": 5.835897435897436e-06, "loss": 1.6158, "step": 86200 }, { "epoch": 0.5485146249380299, "grad_norm": 0.671875, "learning_rate": 5.830769230769231e-06, "loss": 1.6054, "step": 86300 }, { "epoch": 0.5491502154651887, "grad_norm": 0.828125, "learning_rate": 5.825641025641026e-06, "loss": 1.623, "step": 86400 }, { "epoch": 0.5497858059923475, "grad_norm": 0.85546875, "learning_rate": 5.820512820512822e-06, "loss": 1.6108, "step": 86500 }, { "epoch": 0.5504213965195063, "grad_norm": 0.6875, "learning_rate": 5.815384615384616e-06, "loss": 1.6228, "step": 86600 }, { "epoch": 0.5510569870466651, "grad_norm": 0.7109375, "learning_rate": 5.8102564102564106e-06, "loss": 1.633, "step": 86700 }, { "epoch": 0.5516925775738238, "grad_norm": 0.78515625, "learning_rate": 5.8051282051282055e-06, "loss": 1.6187, "step": 86800 }, { "epoch": 0.5523281681009826, "grad_norm": 1.0859375, "learning_rate": 5.8e-06, "loss": 1.6125, "step": 86900 }, { "epoch": 0.5529637586281414, "grad_norm": 0.8203125, "learning_rate": 5.794871794871796e-06, "loss": 1.6183, "step": 87000 }, { "epoch": 0.5535993491553002, "grad_norm": 0.66796875, "learning_rate": 5.78974358974359e-06, "loss": 1.6264, "step": 87100 }, { "epoch": 0.554234939682459, "grad_norm": 0.80078125, "learning_rate": 5.784615384615385e-06, "loss": 1.6289, "step": 87200 }, { "epoch": 0.5548705302096177, "grad_norm": 0.65625, "learning_rate": 5.77948717948718e-06, "loss": 1.6042, "step": 87300 }, { "epoch": 0.5555061207367765, "grad_norm": 0.8671875, "learning_rate": 5.774358974358975e-06, "loss": 1.6234, "step": 87400 }, { "epoch": 0.5561417112639353, "grad_norm": 0.9921875, "learning_rate": 5.769230769230769e-06, "loss": 1.6233, "step": 87500 }, { "epoch": 0.5567773017910941, "grad_norm": 0.6171875, "learning_rate": 5.764102564102565e-06, "loss": 1.6176, "step": 87600 }, { "epoch": 0.5574128923182529, "grad_norm": 0.890625, "learning_rate": 5.7589743589743596e-06, "loss": 1.6216, "step": 87700 }, { "epoch": 0.5580484828454116, "grad_norm": 0.921875, "learning_rate": 5.7538461538461545e-06, "loss": 1.6321, "step": 87800 }, { "epoch": 0.5586840733725704, "grad_norm": 1.0078125, "learning_rate": 5.748717948717949e-06, "loss": 1.6245, "step": 87900 }, { "epoch": 0.5593196638997292, "grad_norm": 0.9921875, "learning_rate": 5.743589743589743e-06, "loss": 1.6279, "step": 88000 }, { "epoch": 0.559955254426888, "grad_norm": 0.85546875, "learning_rate": 5.738461538461539e-06, "loss": 1.6187, "step": 88100 }, { "epoch": 0.5605908449540468, "grad_norm": 0.625, "learning_rate": 5.733333333333334e-06, "loss": 1.6266, "step": 88200 }, { "epoch": 0.5612264354812055, "grad_norm": 0.625, "learning_rate": 5.728205128205129e-06, "loss": 1.6191, "step": 88300 }, { "epoch": 0.5618620260083643, "grad_norm": 0.73828125, "learning_rate": 5.723076923076923e-06, "loss": 1.609, "step": 88400 }, { "epoch": 0.5624976165355231, "grad_norm": 0.9296875, "learning_rate": 5.717948717948718e-06, "loss": 1.6182, "step": 88500 }, { "epoch": 0.5631332070626819, "grad_norm": 0.8359375, "learning_rate": 5.712820512820513e-06, "loss": 1.6317, "step": 88600 }, { "epoch": 0.5637687975898408, "grad_norm": 0.91796875, "learning_rate": 5.7076923076923086e-06, "loss": 1.6025, "step": 88700 }, { "epoch": 0.5644043881169996, "grad_norm": 0.70703125, "learning_rate": 5.7025641025641035e-06, "loss": 1.6301, "step": 88800 }, { "epoch": 0.5650399786441583, "grad_norm": 0.71875, "learning_rate": 5.6974358974358975e-06, "loss": 1.6327, "step": 88900 }, { "epoch": 0.5656755691713171, "grad_norm": 0.6640625, "learning_rate": 5.692307692307692e-06, "loss": 1.6271, "step": 89000 }, { "epoch": 0.5663111596984759, "grad_norm": 0.60546875, "learning_rate": 5.687179487179487e-06, "loss": 1.6137, "step": 89100 }, { "epoch": 0.5669467502256347, "grad_norm": 1.2109375, "learning_rate": 5.682051282051283e-06, "loss": 1.6217, "step": 89200 }, { "epoch": 0.5675823407527935, "grad_norm": 1.0859375, "learning_rate": 5.676923076923078e-06, "loss": 1.6228, "step": 89300 }, { "epoch": 0.5682179312799522, "grad_norm": 1.203125, "learning_rate": 5.671794871794872e-06, "loss": 1.6288, "step": 89400 }, { "epoch": 0.568853521807111, "grad_norm": 0.99609375, "learning_rate": 5.666666666666667e-06, "loss": 1.6124, "step": 89500 }, { "epoch": 0.5694891123342698, "grad_norm": 1.0390625, "learning_rate": 5.661538461538462e-06, "loss": 1.6081, "step": 89600 }, { "epoch": 0.5701247028614286, "grad_norm": 0.77734375, "learning_rate": 5.6564102564102575e-06, "loss": 1.625, "step": 89700 }, { "epoch": 0.5707602933885874, "grad_norm": 0.66015625, "learning_rate": 5.6512820512820524e-06, "loss": 1.6106, "step": 89800 }, { "epoch": 0.5713958839157461, "grad_norm": 1.03125, "learning_rate": 5.6461538461538465e-06, "loss": 1.6322, "step": 89900 }, { "epoch": 0.5720314744429049, "grad_norm": 0.8984375, "learning_rate": 5.641025641025641e-06, "loss": 1.633, "step": 90000 }, { "epoch": 0.5726670649700637, "grad_norm": 0.625, "learning_rate": 5.635897435897436e-06, "loss": 1.6196, "step": 90100 }, { "epoch": 0.5733026554972225, "grad_norm": 0.62109375, "learning_rate": 5.63076923076923e-06, "loss": 1.645, "step": 90200 }, { "epoch": 0.5739382460243813, "grad_norm": 0.89453125, "learning_rate": 5.625641025641027e-06, "loss": 1.6166, "step": 90300 }, { "epoch": 0.57457383655154, "grad_norm": 0.890625, "learning_rate": 5.620512820512821e-06, "loss": 1.6207, "step": 90400 }, { "epoch": 0.5752094270786988, "grad_norm": 1.0234375, "learning_rate": 5.615384615384616e-06, "loss": 1.6138, "step": 90500 }, { "epoch": 0.5758450176058576, "grad_norm": 1.0, "learning_rate": 5.610256410256411e-06, "loss": 1.6108, "step": 90600 }, { "epoch": 0.5764806081330164, "grad_norm": 0.703125, "learning_rate": 5.605128205128205e-06, "loss": 1.6291, "step": 90700 }, { "epoch": 0.5771161986601752, "grad_norm": 0.71875, "learning_rate": 5.600000000000001e-06, "loss": 1.6296, "step": 90800 }, { "epoch": 0.5777517891873339, "grad_norm": 0.75, "learning_rate": 5.5948717948717955e-06, "loss": 1.6183, "step": 90900 }, { "epoch": 0.5783873797144927, "grad_norm": 1.046875, "learning_rate": 5.58974358974359e-06, "loss": 1.6069, "step": 91000 }, { "epoch": 0.5790229702416515, "grad_norm": 0.87109375, "learning_rate": 5.584615384615385e-06, "loss": 1.608, "step": 91100 }, { "epoch": 0.5796585607688103, "grad_norm": 0.7109375, "learning_rate": 5.579487179487179e-06, "loss": 1.6259, "step": 91200 }, { "epoch": 0.5802941512959691, "grad_norm": 1.0390625, "learning_rate": 5.574358974358974e-06, "loss": 1.6293, "step": 91300 }, { "epoch": 0.5809297418231278, "grad_norm": 0.65625, "learning_rate": 5.56923076923077e-06, "loss": 1.6288, "step": 91400 }, { "epoch": 0.5815653323502866, "grad_norm": 0.6953125, "learning_rate": 5.564102564102565e-06, "loss": 1.619, "step": 91500 }, { "epoch": 0.5822009228774454, "grad_norm": 0.96875, "learning_rate": 5.55897435897436e-06, "loss": 1.6338, "step": 91600 }, { "epoch": 0.5828365134046042, "grad_norm": 0.6328125, "learning_rate": 5.553846153846154e-06, "loss": 1.6173, "step": 91700 }, { "epoch": 0.583472103931763, "grad_norm": 1.0390625, "learning_rate": 5.548717948717949e-06, "loss": 1.6255, "step": 91800 }, { "epoch": 0.5841076944589217, "grad_norm": 0.490234375, "learning_rate": 5.5435897435897445e-06, "loss": 1.5943, "step": 91900 }, { "epoch": 0.5847432849860805, "grad_norm": 1.1484375, "learning_rate": 5.538461538461539e-06, "loss": 1.6178, "step": 92000 }, { "epoch": 0.5853788755132393, "grad_norm": 1.1640625, "learning_rate": 5.533333333333334e-06, "loss": 1.6184, "step": 92100 }, { "epoch": 0.5860144660403981, "grad_norm": 0.765625, "learning_rate": 5.528205128205128e-06, "loss": 1.6053, "step": 92200 }, { "epoch": 0.5866500565675569, "grad_norm": 1.0, "learning_rate": 5.523076923076923e-06, "loss": 1.6308, "step": 92300 }, { "epoch": 0.5872856470947156, "grad_norm": 1.0625, "learning_rate": 5.517948717948718e-06, "loss": 1.626, "step": 92400 }, { "epoch": 0.5879212376218745, "grad_norm": 1.25, "learning_rate": 5.512820512820514e-06, "loss": 1.623, "step": 92500 }, { "epoch": 0.5885568281490333, "grad_norm": 1.140625, "learning_rate": 5.507692307692308e-06, "loss": 1.631, "step": 92600 }, { "epoch": 0.5891924186761921, "grad_norm": 0.921875, "learning_rate": 5.502564102564103e-06, "loss": 1.6301, "step": 92700 }, { "epoch": 0.5898280092033509, "grad_norm": 0.76171875, "learning_rate": 5.497435897435898e-06, "loss": 1.6266, "step": 92800 }, { "epoch": 0.5904635997305097, "grad_norm": 0.78515625, "learning_rate": 5.492307692307693e-06, "loss": 1.6179, "step": 92900 }, { "epoch": 0.5910991902576684, "grad_norm": 0.8125, "learning_rate": 5.487179487179488e-06, "loss": 1.625, "step": 93000 }, { "epoch": 0.5917347807848272, "grad_norm": 0.69921875, "learning_rate": 5.4820512820512824e-06, "loss": 1.6181, "step": 93100 }, { "epoch": 0.592370371311986, "grad_norm": 0.75, "learning_rate": 5.476923076923077e-06, "loss": 1.6304, "step": 93200 }, { "epoch": 0.5930059618391448, "grad_norm": 0.79296875, "learning_rate": 5.471794871794872e-06, "loss": 1.6004, "step": 93300 }, { "epoch": 0.5936415523663036, "grad_norm": 0.70703125, "learning_rate": 5.466666666666667e-06, "loss": 1.6335, "step": 93400 }, { "epoch": 0.5942771428934623, "grad_norm": 1.0859375, "learning_rate": 5.461538461538461e-06, "loss": 1.616, "step": 93500 }, { "epoch": 0.5949127334206211, "grad_norm": 0.7109375, "learning_rate": 5.456410256410257e-06, "loss": 1.6155, "step": 93600 }, { "epoch": 0.5955483239477799, "grad_norm": 0.7265625, "learning_rate": 5.451282051282052e-06, "loss": 1.6241, "step": 93700 }, { "epoch": 0.5961839144749387, "grad_norm": 0.51171875, "learning_rate": 5.446153846153847e-06, "loss": 1.6173, "step": 93800 }, { "epoch": 0.5968195050020975, "grad_norm": 0.7578125, "learning_rate": 5.441025641025642e-06, "loss": 1.6389, "step": 93900 }, { "epoch": 0.5974550955292562, "grad_norm": 0.73828125, "learning_rate": 5.435897435897436e-06, "loss": 1.6153, "step": 94000 }, { "epoch": 0.598090686056415, "grad_norm": 0.83984375, "learning_rate": 5.430769230769231e-06, "loss": 1.6275, "step": 94100 }, { "epoch": 0.5987262765835738, "grad_norm": 1.0859375, "learning_rate": 5.425641025641026e-06, "loss": 1.6286, "step": 94200 }, { "epoch": 0.5993618671107326, "grad_norm": 0.84765625, "learning_rate": 5.420512820512821e-06, "loss": 1.617, "step": 94300 }, { "epoch": 0.5999974576378914, "grad_norm": 0.8359375, "learning_rate": 5.415384615384615e-06, "loss": 1.6307, "step": 94400 }, { "epoch": 0.6006330481650501, "grad_norm": 0.7734375, "learning_rate": 5.41025641025641e-06, "loss": 1.6397, "step": 94500 }, { "epoch": 0.6012686386922089, "grad_norm": 1.2421875, "learning_rate": 5.405128205128205e-06, "loss": 1.6146, "step": 94600 }, { "epoch": 0.6019042292193677, "grad_norm": 0.9609375, "learning_rate": 5.400000000000001e-06, "loss": 1.628, "step": 94700 }, { "epoch": 0.6025398197465265, "grad_norm": 0.859375, "learning_rate": 5.394871794871796e-06, "loss": 1.6315, "step": 94800 }, { "epoch": 0.6031754102736853, "grad_norm": 0.640625, "learning_rate": 5.38974358974359e-06, "loss": 1.6189, "step": 94900 }, { "epoch": 0.603811000800844, "grad_norm": 0.91015625, "learning_rate": 5.384615384615385e-06, "loss": 1.6219, "step": 95000 }, { "epoch": 0.6044465913280028, "grad_norm": 0.92578125, "learning_rate": 5.3794871794871796e-06, "loss": 1.613, "step": 95100 }, { "epoch": 0.6050821818551616, "grad_norm": 1.328125, "learning_rate": 5.374358974358975e-06, "loss": 1.6054, "step": 95200 }, { "epoch": 0.6057177723823204, "grad_norm": 1.203125, "learning_rate": 5.36923076923077e-06, "loss": 1.6211, "step": 95300 }, { "epoch": 0.6063533629094792, "grad_norm": 1.0078125, "learning_rate": 5.364102564102564e-06, "loss": 1.6168, "step": 95400 }, { "epoch": 0.606988953436638, "grad_norm": 0.85546875, "learning_rate": 5.358974358974359e-06, "loss": 1.6289, "step": 95500 }, { "epoch": 0.6076245439637967, "grad_norm": 0.83984375, "learning_rate": 5.353846153846154e-06, "loss": 1.612, "step": 95600 }, { "epoch": 0.6082601344909555, "grad_norm": 0.8046875, "learning_rate": 5.34871794871795e-06, "loss": 1.6154, "step": 95700 }, { "epoch": 0.6088957250181143, "grad_norm": 1.0234375, "learning_rate": 5.343589743589745e-06, "loss": 1.6178, "step": 95800 }, { "epoch": 0.6095313155452731, "grad_norm": 0.58984375, "learning_rate": 5.338461538461539e-06, "loss": 1.6332, "step": 95900 }, { "epoch": 0.6101669060724318, "grad_norm": 0.67578125, "learning_rate": 5.333333333333334e-06, "loss": 1.6216, "step": 96000 }, { "epoch": 0.6108024965995906, "grad_norm": 0.890625, "learning_rate": 5.3282051282051286e-06, "loss": 1.6099, "step": 96100 }, { "epoch": 0.6114380871267494, "grad_norm": 0.8984375, "learning_rate": 5.323076923076923e-06, "loss": 1.636, "step": 96200 }, { "epoch": 0.6120736776539083, "grad_norm": 0.94921875, "learning_rate": 5.317948717948719e-06, "loss": 1.6261, "step": 96300 }, { "epoch": 0.6127092681810671, "grad_norm": 0.71484375, "learning_rate": 5.312820512820513e-06, "loss": 1.6299, "step": 96400 }, { "epoch": 0.6133448587082259, "grad_norm": 0.5859375, "learning_rate": 5.307692307692308e-06, "loss": 1.6277, "step": 96500 }, { "epoch": 0.6139804492353846, "grad_norm": 0.73046875, "learning_rate": 5.302564102564103e-06, "loss": 1.6248, "step": 96600 }, { "epoch": 0.6146160397625434, "grad_norm": 0.9453125, "learning_rate": 5.297435897435897e-06, "loss": 1.6107, "step": 96700 }, { "epoch": 0.6152516302897022, "grad_norm": 0.875, "learning_rate": 5.292307692307693e-06, "loss": 1.6303, "step": 96800 }, { "epoch": 0.615887220816861, "grad_norm": 1.015625, "learning_rate": 5.287179487179488e-06, "loss": 1.6134, "step": 96900 }, { "epoch": 0.6165228113440198, "grad_norm": 0.73046875, "learning_rate": 5.282051282051283e-06, "loss": 1.6326, "step": 97000 }, { "epoch": 0.6171584018711785, "grad_norm": 0.63671875, "learning_rate": 5.2769230769230775e-06, "loss": 1.6081, "step": 97100 }, { "epoch": 0.6177939923983373, "grad_norm": 1.265625, "learning_rate": 5.271794871794872e-06, "loss": 1.6208, "step": 97200 }, { "epoch": 0.6184295829254961, "grad_norm": 1.0859375, "learning_rate": 5.2666666666666665e-06, "loss": 1.6238, "step": 97300 }, { "epoch": 0.6190651734526549, "grad_norm": 1.0390625, "learning_rate": 5.261538461538462e-06, "loss": 1.6219, "step": 97400 }, { "epoch": 0.6197007639798137, "grad_norm": 0.875, "learning_rate": 5.256410256410257e-06, "loss": 1.6219, "step": 97500 }, { "epoch": 0.6203363545069724, "grad_norm": 0.64453125, "learning_rate": 5.251282051282052e-06, "loss": 1.6101, "step": 97600 }, { "epoch": 0.6209719450341312, "grad_norm": 0.97265625, "learning_rate": 5.246153846153846e-06, "loss": 1.6172, "step": 97700 }, { "epoch": 0.62160753556129, "grad_norm": 0.96484375, "learning_rate": 5.241025641025641e-06, "loss": 1.6167, "step": 97800 }, { "epoch": 0.6222431260884488, "grad_norm": 0.59375, "learning_rate": 5.235897435897437e-06, "loss": 1.616, "step": 97900 }, { "epoch": 0.6228787166156076, "grad_norm": 0.68359375, "learning_rate": 5.230769230769232e-06, "loss": 1.6145, "step": 98000 }, { "epoch": 0.6235143071427663, "grad_norm": 0.69140625, "learning_rate": 5.2256410256410265e-06, "loss": 1.6299, "step": 98100 }, { "epoch": 0.6241498976699251, "grad_norm": 0.80078125, "learning_rate": 5.220512820512821e-06, "loss": 1.6195, "step": 98200 }, { "epoch": 0.6247854881970839, "grad_norm": 0.859375, "learning_rate": 5.2153846153846155e-06, "loss": 1.6244, "step": 98300 }, { "epoch": 0.6254210787242427, "grad_norm": 1.1875, "learning_rate": 5.21025641025641e-06, "loss": 1.6223, "step": 98400 }, { "epoch": 0.6260566692514015, "grad_norm": 0.6796875, "learning_rate": 5.205128205128206e-06, "loss": 1.6134, "step": 98500 }, { "epoch": 0.6266922597785602, "grad_norm": 0.87109375, "learning_rate": 5.2e-06, "loss": 1.6261, "step": 98600 }, { "epoch": 0.627327850305719, "grad_norm": 0.75, "learning_rate": 5.194871794871795e-06, "loss": 1.6227, "step": 98700 }, { "epoch": 0.6279634408328778, "grad_norm": 0.68359375, "learning_rate": 5.18974358974359e-06, "loss": 1.6215, "step": 98800 }, { "epoch": 0.6285990313600366, "grad_norm": 0.703125, "learning_rate": 5.184615384615385e-06, "loss": 1.6231, "step": 98900 }, { "epoch": 0.6292346218871954, "grad_norm": 0.82421875, "learning_rate": 5.179487179487181e-06, "loss": 1.6289, "step": 99000 }, { "epoch": 0.6298702124143541, "grad_norm": 0.85546875, "learning_rate": 5.174358974358975e-06, "loss": 1.6127, "step": 99100 }, { "epoch": 0.6305058029415129, "grad_norm": 1.28125, "learning_rate": 5.16923076923077e-06, "loss": 1.6208, "step": 99200 }, { "epoch": 0.6311413934686717, "grad_norm": 0.78515625, "learning_rate": 5.1641025641025645e-06, "loss": 1.6161, "step": 99300 }, { "epoch": 0.6317769839958305, "grad_norm": 1.1328125, "learning_rate": 5.158974358974359e-06, "loss": 1.6175, "step": 99400 }, { "epoch": 0.6324125745229893, "grad_norm": 0.65625, "learning_rate": 5.1538461538461534e-06, "loss": 1.6174, "step": 99500 }, { "epoch": 0.633048165050148, "grad_norm": 0.9375, "learning_rate": 5.148717948717949e-06, "loss": 1.6236, "step": 99600 }, { "epoch": 0.6336837555773068, "grad_norm": 0.8515625, "learning_rate": 5.143589743589744e-06, "loss": 1.6216, "step": 99700 }, { "epoch": 0.6343193461044656, "grad_norm": 0.75390625, "learning_rate": 5.138461538461539e-06, "loss": 1.6239, "step": 99800 }, { "epoch": 0.6349549366316244, "grad_norm": 0.70703125, "learning_rate": 5.133333333333334e-06, "loss": 1.6215, "step": 99900 }, { "epoch": 0.6355905271587833, "grad_norm": 0.7421875, "learning_rate": 5.128205128205128e-06, "loss": 1.5993, "step": 100000 }, { "epoch": 0.6362261176859421, "grad_norm": 0.6328125, "learning_rate": 5.123076923076924e-06, "loss": 1.608, "step": 100100 }, { "epoch": 0.6368617082131008, "grad_norm": 0.71484375, "learning_rate": 5.1179487179487186e-06, "loss": 1.6131, "step": 100200 }, { "epoch": 0.6374972987402596, "grad_norm": 0.99609375, "learning_rate": 5.1128205128205135e-06, "loss": 1.626, "step": 100300 }, { "epoch": 0.6381328892674184, "grad_norm": 1.03125, "learning_rate": 5.1076923076923075e-06, "loss": 1.6142, "step": 100400 }, { "epoch": 0.6387684797945772, "grad_norm": 0.69921875, "learning_rate": 5.1025641025641024e-06, "loss": 1.6346, "step": 100500 }, { "epoch": 0.639404070321736, "grad_norm": 0.828125, "learning_rate": 5.097435897435898e-06, "loss": 1.604, "step": 100600 }, { "epoch": 0.6400396608488947, "grad_norm": 0.7265625, "learning_rate": 5.092307692307693e-06, "loss": 1.629, "step": 100700 }, { "epoch": 0.6406752513760535, "grad_norm": 0.66796875, "learning_rate": 5.087179487179488e-06, "loss": 1.6069, "step": 100800 }, { "epoch": 0.6413108419032123, "grad_norm": 0.9296875, "learning_rate": 5.082051282051282e-06, "loss": 1.6207, "step": 100900 }, { "epoch": 0.6419464324303711, "grad_norm": 0.6640625, "learning_rate": 5.076923076923077e-06, "loss": 1.6052, "step": 101000 }, { "epoch": 0.6425820229575299, "grad_norm": 0.95703125, "learning_rate": 5.071794871794872e-06, "loss": 1.6145, "step": 101100 }, { "epoch": 0.6432176134846886, "grad_norm": 0.7265625, "learning_rate": 5.0666666666666676e-06, "loss": 1.6133, "step": 101200 }, { "epoch": 0.6438532040118474, "grad_norm": 0.6875, "learning_rate": 5.0615384615384625e-06, "loss": 1.6339, "step": 101300 }, { "epoch": 0.6444887945390062, "grad_norm": 0.8046875, "learning_rate": 5.0564102564102565e-06, "loss": 1.5978, "step": 101400 }, { "epoch": 0.645124385066165, "grad_norm": 0.93359375, "learning_rate": 5.051282051282051e-06, "loss": 1.6232, "step": 101500 }, { "epoch": 0.6457599755933238, "grad_norm": 0.7890625, "learning_rate": 5.046153846153846e-06, "loss": 1.6328, "step": 101600 }, { "epoch": 0.6463955661204825, "grad_norm": 1.109375, "learning_rate": 5.041025641025642e-06, "loss": 1.6116, "step": 101700 }, { "epoch": 0.6470311566476413, "grad_norm": 0.84765625, "learning_rate": 5.035897435897437e-06, "loss": 1.6376, "step": 101800 }, { "epoch": 0.6476667471748001, "grad_norm": 0.85546875, "learning_rate": 5.030769230769231e-06, "loss": 1.614, "step": 101900 }, { "epoch": 0.6483023377019589, "grad_norm": 0.7890625, "learning_rate": 5.025641025641026e-06, "loss": 1.616, "step": 102000 }, { "epoch": 0.6489379282291177, "grad_norm": 1.0859375, "learning_rate": 5.020512820512821e-06, "loss": 1.6242, "step": 102100 }, { "epoch": 0.6495735187562764, "grad_norm": 0.890625, "learning_rate": 5.015384615384616e-06, "loss": 1.6139, "step": 102200 }, { "epoch": 0.6502091092834352, "grad_norm": 0.5859375, "learning_rate": 5.0102564102564115e-06, "loss": 1.6071, "step": 102300 }, { "epoch": 0.650844699810594, "grad_norm": 0.8671875, "learning_rate": 5.0051282051282055e-06, "loss": 1.6095, "step": 102400 }, { "epoch": 0.6514802903377528, "grad_norm": 0.76953125, "learning_rate": 5e-06, "loss": 1.6153, "step": 102500 }, { "epoch": 0.6521158808649116, "grad_norm": 0.8671875, "learning_rate": 4.994871794871795e-06, "loss": 1.6096, "step": 102600 }, { "epoch": 0.6527514713920703, "grad_norm": 0.6171875, "learning_rate": 4.98974358974359e-06, "loss": 1.6182, "step": 102700 }, { "epoch": 0.6533870619192291, "grad_norm": 1.109375, "learning_rate": 4.984615384615385e-06, "loss": 1.6078, "step": 102800 }, { "epoch": 0.6540226524463879, "grad_norm": 0.58984375, "learning_rate": 4.97948717948718e-06, "loss": 1.6203, "step": 102900 }, { "epoch": 0.6546582429735467, "grad_norm": 1.203125, "learning_rate": 4.974358974358975e-06, "loss": 1.625, "step": 103000 }, { "epoch": 0.6552938335007055, "grad_norm": 0.76953125, "learning_rate": 4.96923076923077e-06, "loss": 1.6202, "step": 103100 }, { "epoch": 0.6559294240278642, "grad_norm": 0.78515625, "learning_rate": 4.964102564102565e-06, "loss": 1.6192, "step": 103200 }, { "epoch": 0.656565014555023, "grad_norm": 1.03125, "learning_rate": 4.95897435897436e-06, "loss": 1.6077, "step": 103300 }, { "epoch": 0.6572006050821818, "grad_norm": 0.71875, "learning_rate": 4.9538461538461545e-06, "loss": 1.6113, "step": 103400 }, { "epoch": 0.6578361956093406, "grad_norm": 0.68359375, "learning_rate": 4.948717948717949e-06, "loss": 1.6292, "step": 103500 }, { "epoch": 0.6584717861364994, "grad_norm": 1.0546875, "learning_rate": 4.943589743589744e-06, "loss": 1.616, "step": 103600 }, { "epoch": 0.6591073766636582, "grad_norm": 0.70703125, "learning_rate": 4.938461538461538e-06, "loss": 1.6265, "step": 103700 }, { "epoch": 0.659742967190817, "grad_norm": 0.5859375, "learning_rate": 4.933333333333334e-06, "loss": 1.6158, "step": 103800 }, { "epoch": 0.6603785577179758, "grad_norm": 0.9609375, "learning_rate": 4.928205128205128e-06, "loss": 1.6274, "step": 103900 }, { "epoch": 0.6610141482451346, "grad_norm": 0.640625, "learning_rate": 4.923076923076924e-06, "loss": 1.6122, "step": 104000 }, { "epoch": 0.6616497387722934, "grad_norm": 0.90234375, "learning_rate": 4.917948717948719e-06, "loss": 1.6128, "step": 104100 }, { "epoch": 0.6622853292994522, "grad_norm": 0.6015625, "learning_rate": 4.912820512820513e-06, "loss": 1.6175, "step": 104200 }, { "epoch": 0.6629209198266109, "grad_norm": 1.390625, "learning_rate": 4.907692307692309e-06, "loss": 1.6262, "step": 104300 }, { "epoch": 0.6635565103537697, "grad_norm": 0.953125, "learning_rate": 4.902564102564103e-06, "loss": 1.6097, "step": 104400 }, { "epoch": 0.6641921008809285, "grad_norm": 1.015625, "learning_rate": 4.8974358974358975e-06, "loss": 1.6119, "step": 104500 }, { "epoch": 0.6648276914080873, "grad_norm": 0.84765625, "learning_rate": 4.892307692307693e-06, "loss": 1.6309, "step": 104600 }, { "epoch": 0.6654632819352461, "grad_norm": 0.77734375, "learning_rate": 4.887179487179487e-06, "loss": 1.6189, "step": 104700 }, { "epoch": 0.6660988724624048, "grad_norm": 0.7578125, "learning_rate": 4.882051282051282e-06, "loss": 1.6162, "step": 104800 }, { "epoch": 0.6667344629895636, "grad_norm": 0.98828125, "learning_rate": 4.876923076923077e-06, "loss": 1.634, "step": 104900 }, { "epoch": 0.6673700535167224, "grad_norm": 0.8046875, "learning_rate": 4.871794871794872e-06, "loss": 1.6159, "step": 105000 }, { "epoch": 0.6680056440438812, "grad_norm": 0.88671875, "learning_rate": 4.866666666666667e-06, "loss": 1.6145, "step": 105100 }, { "epoch": 0.66864123457104, "grad_norm": 0.85546875, "learning_rate": 4.861538461538462e-06, "loss": 1.6403, "step": 105200 }, { "epoch": 0.6692768250981987, "grad_norm": 0.76953125, "learning_rate": 4.856410256410257e-06, "loss": 1.6177, "step": 105300 }, { "epoch": 0.6699124156253575, "grad_norm": 0.87109375, "learning_rate": 4.851282051282052e-06, "loss": 1.6122, "step": 105400 }, { "epoch": 0.6705480061525163, "grad_norm": 1.03125, "learning_rate": 4.8461538461538465e-06, "loss": 1.6122, "step": 105500 }, { "epoch": 0.6711835966796751, "grad_norm": 0.8359375, "learning_rate": 4.8410256410256414e-06, "loss": 1.6299, "step": 105600 }, { "epoch": 0.6718191872068339, "grad_norm": 0.984375, "learning_rate": 4.835897435897436e-06, "loss": 1.6227, "step": 105700 }, { "epoch": 0.6724547777339926, "grad_norm": 0.70703125, "learning_rate": 4.830769230769231e-06, "loss": 1.6101, "step": 105800 }, { "epoch": 0.6730903682611514, "grad_norm": 1.0390625, "learning_rate": 4.825641025641026e-06, "loss": 1.628, "step": 105900 }, { "epoch": 0.6737259587883102, "grad_norm": 1.0546875, "learning_rate": 4.820512820512821e-06, "loss": 1.6062, "step": 106000 }, { "epoch": 0.674361549315469, "grad_norm": 0.6640625, "learning_rate": 4.815384615384616e-06, "loss": 1.6278, "step": 106100 }, { "epoch": 0.6749971398426278, "grad_norm": 1.09375, "learning_rate": 4.810256410256411e-06, "loss": 1.6188, "step": 106200 }, { "epoch": 0.6756327303697865, "grad_norm": 0.84375, "learning_rate": 4.805128205128206e-06, "loss": 1.6195, "step": 106300 }, { "epoch": 0.6762683208969453, "grad_norm": 0.94140625, "learning_rate": 4.800000000000001e-06, "loss": 1.6191, "step": 106400 }, { "epoch": 0.6769039114241041, "grad_norm": 0.77734375, "learning_rate": 4.7948717948717955e-06, "loss": 1.6235, "step": 106500 }, { "epoch": 0.6775395019512629, "grad_norm": 1.15625, "learning_rate": 4.7897435897435904e-06, "loss": 1.6166, "step": 106600 }, { "epoch": 0.6781750924784217, "grad_norm": 0.734375, "learning_rate": 4.7846153846153845e-06, "loss": 1.5958, "step": 106700 }, { "epoch": 0.6788106830055805, "grad_norm": 0.72265625, "learning_rate": 4.77948717948718e-06, "loss": 1.622, "step": 106800 }, { "epoch": 0.6794462735327392, "grad_norm": 0.94921875, "learning_rate": 4.774358974358974e-06, "loss": 1.6315, "step": 106900 }, { "epoch": 0.680081864059898, "grad_norm": 0.83203125, "learning_rate": 4.76923076923077e-06, "loss": 1.6155, "step": 107000 }, { "epoch": 0.6807174545870568, "grad_norm": 1.171875, "learning_rate": 4.764102564102565e-06, "loss": 1.6098, "step": 107100 }, { "epoch": 0.6813530451142156, "grad_norm": 0.98828125, "learning_rate": 4.758974358974359e-06, "loss": 1.6073, "step": 107200 }, { "epoch": 0.6819886356413744, "grad_norm": 0.75390625, "learning_rate": 4.753846153846155e-06, "loss": 1.6177, "step": 107300 }, { "epoch": 0.6826242261685331, "grad_norm": 1.0625, "learning_rate": 4.748717948717949e-06, "loss": 1.6186, "step": 107400 }, { "epoch": 0.6832598166956919, "grad_norm": 0.97265625, "learning_rate": 4.743589743589744e-06, "loss": 1.6383, "step": 107500 }, { "epoch": 0.6838954072228508, "grad_norm": 0.5546875, "learning_rate": 4.738461538461539e-06, "loss": 1.6377, "step": 107600 }, { "epoch": 0.6845309977500096, "grad_norm": 0.63671875, "learning_rate": 4.7333333333333335e-06, "loss": 1.616, "step": 107700 }, { "epoch": 0.6851665882771684, "grad_norm": 1.0390625, "learning_rate": 4.728205128205128e-06, "loss": 1.6348, "step": 107800 }, { "epoch": 0.6858021788043271, "grad_norm": 1.015625, "learning_rate": 4.723076923076923e-06, "loss": 1.6176, "step": 107900 }, { "epoch": 0.6864377693314859, "grad_norm": 0.95703125, "learning_rate": 4.717948717948718e-06, "loss": 1.6137, "step": 108000 }, { "epoch": 0.6870733598586447, "grad_norm": 0.828125, "learning_rate": 4.712820512820513e-06, "loss": 1.6239, "step": 108100 }, { "epoch": 0.6877089503858035, "grad_norm": 0.71484375, "learning_rate": 4.707692307692308e-06, "loss": 1.6267, "step": 108200 }, { "epoch": 0.6883445409129623, "grad_norm": 0.80859375, "learning_rate": 4.702564102564103e-06, "loss": 1.6296, "step": 108300 }, { "epoch": 0.688980131440121, "grad_norm": 1.0390625, "learning_rate": 4.697435897435898e-06, "loss": 1.6124, "step": 108400 }, { "epoch": 0.6896157219672798, "grad_norm": 0.9296875, "learning_rate": 4.692307692307693e-06, "loss": 1.6137, "step": 108500 }, { "epoch": 0.6902513124944386, "grad_norm": 0.7421875, "learning_rate": 4.6871794871794876e-06, "loss": 1.6152, "step": 108600 }, { "epoch": 0.6908869030215974, "grad_norm": 0.7578125, "learning_rate": 4.6820512820512825e-06, "loss": 1.6073, "step": 108700 }, { "epoch": 0.6915224935487562, "grad_norm": 0.78515625, "learning_rate": 4.676923076923077e-06, "loss": 1.6117, "step": 108800 }, { "epoch": 0.692158084075915, "grad_norm": 1.0625, "learning_rate": 4.671794871794872e-06, "loss": 1.6196, "step": 108900 }, { "epoch": 0.6927936746030737, "grad_norm": 0.953125, "learning_rate": 4.666666666666667e-06, "loss": 1.6081, "step": 109000 }, { "epoch": 0.6934292651302325, "grad_norm": 1.921875, "learning_rate": 4.661538461538462e-06, "loss": 1.6223, "step": 109100 }, { "epoch": 0.6940648556573913, "grad_norm": 0.765625, "learning_rate": 4.656410256410257e-06, "loss": 1.6228, "step": 109200 }, { "epoch": 0.6947004461845501, "grad_norm": 0.75, "learning_rate": 4.651282051282052e-06, "loss": 1.6186, "step": 109300 }, { "epoch": 0.6953360367117088, "grad_norm": 0.90234375, "learning_rate": 4.646153846153847e-06, "loss": 1.6278, "step": 109400 }, { "epoch": 0.6959716272388676, "grad_norm": 0.953125, "learning_rate": 4.641025641025642e-06, "loss": 1.6158, "step": 109500 }, { "epoch": 0.6966072177660264, "grad_norm": 0.94921875, "learning_rate": 4.6358974358974366e-06, "loss": 1.6168, "step": 109600 }, { "epoch": 0.6972428082931852, "grad_norm": 0.84765625, "learning_rate": 4.630769230769231e-06, "loss": 1.629, "step": 109700 }, { "epoch": 0.697878398820344, "grad_norm": 0.76953125, "learning_rate": 4.625641025641026e-06, "loss": 1.6176, "step": 109800 }, { "epoch": 0.6985139893475028, "grad_norm": 0.625, "learning_rate": 4.62051282051282e-06, "loss": 1.6137, "step": 109900 }, { "epoch": 0.6991495798746615, "grad_norm": 0.9765625, "learning_rate": 4.615384615384616e-06, "loss": 1.609, "step": 110000 }, { "epoch": 0.6997851704018203, "grad_norm": 0.8828125, "learning_rate": 4.610256410256411e-06, "loss": 1.6135, "step": 110100 }, { "epoch": 0.7004207609289791, "grad_norm": 0.984375, "learning_rate": 4.605128205128205e-06, "loss": 1.6305, "step": 110200 }, { "epoch": 0.7010563514561379, "grad_norm": 0.90234375, "learning_rate": 4.600000000000001e-06, "loss": 1.6075, "step": 110300 }, { "epoch": 0.7016919419832967, "grad_norm": 0.94140625, "learning_rate": 4.594871794871795e-06, "loss": 1.6183, "step": 110400 }, { "epoch": 0.7023275325104554, "grad_norm": 0.80078125, "learning_rate": 4.58974358974359e-06, "loss": 1.6249, "step": 110500 }, { "epoch": 0.7029631230376142, "grad_norm": 1.03125, "learning_rate": 4.5846153846153855e-06, "loss": 1.6048, "step": 110600 }, { "epoch": 0.703598713564773, "grad_norm": 0.921875, "learning_rate": 4.57948717948718e-06, "loss": 1.641, "step": 110700 }, { "epoch": 0.7042343040919318, "grad_norm": 0.76953125, "learning_rate": 4.5743589743589745e-06, "loss": 1.6341, "step": 110800 }, { "epoch": 0.7048698946190906, "grad_norm": 0.71875, "learning_rate": 4.569230769230769e-06, "loss": 1.6187, "step": 110900 }, { "epoch": 0.7055054851462493, "grad_norm": 1.109375, "learning_rate": 4.564102564102564e-06, "loss": 1.6084, "step": 111000 }, { "epoch": 0.7061410756734081, "grad_norm": 0.8515625, "learning_rate": 4.558974358974359e-06, "loss": 1.6216, "step": 111100 }, { "epoch": 0.7067766662005669, "grad_norm": 0.90625, "learning_rate": 4.553846153846154e-06, "loss": 1.6216, "step": 111200 }, { "epoch": 0.7074122567277257, "grad_norm": 0.6484375, "learning_rate": 4.548717948717949e-06, "loss": 1.6371, "step": 111300 }, { "epoch": 0.7080478472548846, "grad_norm": 0.859375, "learning_rate": 4.543589743589744e-06, "loss": 1.6226, "step": 111400 }, { "epoch": 0.7086834377820433, "grad_norm": 0.7265625, "learning_rate": 4.538461538461539e-06, "loss": 1.6266, "step": 111500 }, { "epoch": 0.7093190283092021, "grad_norm": 0.921875, "learning_rate": 4.533333333333334e-06, "loss": 1.6185, "step": 111600 }, { "epoch": 0.7099546188363609, "grad_norm": 0.89453125, "learning_rate": 4.528205128205129e-06, "loss": 1.6345, "step": 111700 }, { "epoch": 0.7105902093635197, "grad_norm": 0.765625, "learning_rate": 4.5230769230769235e-06, "loss": 1.6351, "step": 111800 }, { "epoch": 0.7112257998906785, "grad_norm": 1.203125, "learning_rate": 4.517948717948718e-06, "loss": 1.6055, "step": 111900 }, { "epoch": 0.7118613904178372, "grad_norm": 0.87109375, "learning_rate": 4.512820512820513e-06, "loss": 1.6265, "step": 112000 }, { "epoch": 0.712496980944996, "grad_norm": 0.671875, "learning_rate": 4.507692307692308e-06, "loss": 1.6411, "step": 112100 }, { "epoch": 0.7131325714721548, "grad_norm": 0.69140625, "learning_rate": 4.502564102564103e-06, "loss": 1.627, "step": 112200 }, { "epoch": 0.7137681619993136, "grad_norm": 1.125, "learning_rate": 4.497435897435898e-06, "loss": 1.618, "step": 112300 }, { "epoch": 0.7144037525264724, "grad_norm": 0.9921875, "learning_rate": 4.492307692307693e-06, "loss": 1.6237, "step": 112400 }, { "epoch": 0.7150393430536311, "grad_norm": 1.015625, "learning_rate": 4.487179487179488e-06, "loss": 1.6341, "step": 112500 }, { "epoch": 0.7156749335807899, "grad_norm": 1.0859375, "learning_rate": 4.482051282051283e-06, "loss": 1.6358, "step": 112600 }, { "epoch": 0.7163105241079487, "grad_norm": 0.93359375, "learning_rate": 4.476923076923077e-06, "loss": 1.6074, "step": 112700 }, { "epoch": 0.7169461146351075, "grad_norm": 1.0390625, "learning_rate": 4.4717948717948725e-06, "loss": 1.624, "step": 112800 }, { "epoch": 0.7175817051622663, "grad_norm": 0.73828125, "learning_rate": 4.4666666666666665e-06, "loss": 1.6122, "step": 112900 }, { "epoch": 0.718217295689425, "grad_norm": 0.73046875, "learning_rate": 4.461538461538462e-06, "loss": 1.6184, "step": 113000 }, { "epoch": 0.7188528862165838, "grad_norm": 1.1875, "learning_rate": 4.456410256410257e-06, "loss": 1.6202, "step": 113100 }, { "epoch": 0.7194884767437426, "grad_norm": 0.6640625, "learning_rate": 4.451282051282051e-06, "loss": 1.6271, "step": 113200 }, { "epoch": 0.7201240672709014, "grad_norm": 0.91796875, "learning_rate": 4.446153846153847e-06, "loss": 1.6147, "step": 113300 }, { "epoch": 0.7207596577980602, "grad_norm": 0.65625, "learning_rate": 4.441025641025641e-06, "loss": 1.6253, "step": 113400 }, { "epoch": 0.721395248325219, "grad_norm": 0.97265625, "learning_rate": 4.435897435897436e-06, "loss": 1.6078, "step": 113500 }, { "epoch": 0.7220308388523777, "grad_norm": 0.7109375, "learning_rate": 4.430769230769232e-06, "loss": 1.633, "step": 113600 }, { "epoch": 0.7226664293795365, "grad_norm": 0.80078125, "learning_rate": 4.425641025641026e-06, "loss": 1.6203, "step": 113700 }, { "epoch": 0.7233020199066953, "grad_norm": 0.95703125, "learning_rate": 4.420512820512821e-06, "loss": 1.6073, "step": 113800 }, { "epoch": 0.7239376104338541, "grad_norm": 0.76171875, "learning_rate": 4.4153846153846155e-06, "loss": 1.62, "step": 113900 }, { "epoch": 0.7245732009610129, "grad_norm": 0.5859375, "learning_rate": 4.4102564102564104e-06, "loss": 1.6269, "step": 114000 }, { "epoch": 0.7252087914881716, "grad_norm": 0.890625, "learning_rate": 4.405128205128205e-06, "loss": 1.636, "step": 114100 }, { "epoch": 0.7258443820153304, "grad_norm": 1.2265625, "learning_rate": 4.4e-06, "loss": 1.6242, "step": 114200 }, { "epoch": 0.7264799725424892, "grad_norm": 0.7421875, "learning_rate": 4.394871794871795e-06, "loss": 1.6222, "step": 114300 }, { "epoch": 0.727115563069648, "grad_norm": 0.75390625, "learning_rate": 4.38974358974359e-06, "loss": 1.6353, "step": 114400 }, { "epoch": 0.7277511535968068, "grad_norm": 0.89453125, "learning_rate": 4.384615384615385e-06, "loss": 1.6359, "step": 114500 }, { "epoch": 0.7283867441239655, "grad_norm": 0.99609375, "learning_rate": 4.37948717948718e-06, "loss": 1.62, "step": 114600 }, { "epoch": 0.7290223346511243, "grad_norm": 0.796875, "learning_rate": 4.374358974358975e-06, "loss": 1.6287, "step": 114700 }, { "epoch": 0.7296579251782831, "grad_norm": 0.84765625, "learning_rate": 4.36923076923077e-06, "loss": 1.6362, "step": 114800 }, { "epoch": 0.7302935157054419, "grad_norm": 0.6796875, "learning_rate": 4.3641025641025645e-06, "loss": 1.6261, "step": 114900 }, { "epoch": 0.7309291062326007, "grad_norm": 0.7265625, "learning_rate": 4.358974358974359e-06, "loss": 1.6185, "step": 115000 }, { "epoch": 0.7315646967597595, "grad_norm": 0.578125, "learning_rate": 4.353846153846154e-06, "loss": 1.6173, "step": 115100 }, { "epoch": 0.7322002872869183, "grad_norm": 0.8515625, "learning_rate": 4.348717948717949e-06, "loss": 1.627, "step": 115200 }, { "epoch": 0.7328358778140771, "grad_norm": 1.234375, "learning_rate": 4.343589743589744e-06, "loss": 1.643, "step": 115300 }, { "epoch": 0.7334714683412359, "grad_norm": 0.96875, "learning_rate": 4.338461538461539e-06, "loss": 1.6276, "step": 115400 }, { "epoch": 0.7341070588683947, "grad_norm": 0.75, "learning_rate": 4.333333333333334e-06, "loss": 1.6176, "step": 115500 }, { "epoch": 0.7347426493955534, "grad_norm": 1.2734375, "learning_rate": 4.328205128205129e-06, "loss": 1.6187, "step": 115600 }, { "epoch": 0.7353782399227122, "grad_norm": 0.92578125, "learning_rate": 4.323076923076923e-06, "loss": 1.6196, "step": 115700 }, { "epoch": 0.736013830449871, "grad_norm": 1.2421875, "learning_rate": 4.317948717948719e-06, "loss": 1.6119, "step": 115800 }, { "epoch": 0.7366494209770298, "grad_norm": 0.875, "learning_rate": 4.312820512820513e-06, "loss": 1.6179, "step": 115900 }, { "epoch": 0.7372850115041886, "grad_norm": 0.9375, "learning_rate": 4.307692307692308e-06, "loss": 1.6227, "step": 116000 }, { "epoch": 0.7379206020313474, "grad_norm": 0.83984375, "learning_rate": 4.302564102564103e-06, "loss": 1.6225, "step": 116100 }, { "epoch": 0.7385561925585061, "grad_norm": 0.55078125, "learning_rate": 4.297435897435897e-06, "loss": 1.603, "step": 116200 }, { "epoch": 0.7391917830856649, "grad_norm": 0.66796875, "learning_rate": 4.292307692307693e-06, "loss": 1.62, "step": 116300 }, { "epoch": 0.7398273736128237, "grad_norm": 1.1640625, "learning_rate": 4.287179487179487e-06, "loss": 1.6205, "step": 116400 }, { "epoch": 0.7404629641399825, "grad_norm": 1.1015625, "learning_rate": 4.282051282051282e-06, "loss": 1.6217, "step": 116500 }, { "epoch": 0.7410985546671413, "grad_norm": 0.859375, "learning_rate": 4.276923076923078e-06, "loss": 1.6287, "step": 116600 }, { "epoch": 0.7417341451943, "grad_norm": 0.82421875, "learning_rate": 4.271794871794872e-06, "loss": 1.6273, "step": 116700 }, { "epoch": 0.7423697357214588, "grad_norm": 0.859375, "learning_rate": 4.266666666666668e-06, "loss": 1.644, "step": 116800 }, { "epoch": 0.7430053262486176, "grad_norm": 0.9765625, "learning_rate": 4.261538461538462e-06, "loss": 1.6179, "step": 116900 }, { "epoch": 0.7436409167757764, "grad_norm": 0.8671875, "learning_rate": 4.2564102564102566e-06, "loss": 1.625, "step": 117000 }, { "epoch": 0.7442765073029352, "grad_norm": 0.6875, "learning_rate": 4.2512820512820515e-06, "loss": 1.6082, "step": 117100 }, { "epoch": 0.7449120978300939, "grad_norm": 0.63671875, "learning_rate": 4.246153846153846e-06, "loss": 1.6159, "step": 117200 }, { "epoch": 0.7455476883572527, "grad_norm": 0.61328125, "learning_rate": 4.241025641025641e-06, "loss": 1.6112, "step": 117300 }, { "epoch": 0.7461832788844115, "grad_norm": 0.8984375, "learning_rate": 4.235897435897436e-06, "loss": 1.6248, "step": 117400 }, { "epoch": 0.7468188694115703, "grad_norm": 0.83203125, "learning_rate": 4.230769230769231e-06, "loss": 1.6073, "step": 117500 }, { "epoch": 0.747454459938729, "grad_norm": 0.78125, "learning_rate": 4.225641025641026e-06, "loss": 1.6288, "step": 117600 }, { "epoch": 0.7480900504658878, "grad_norm": 1.078125, "learning_rate": 4.220512820512821e-06, "loss": 1.6191, "step": 117700 }, { "epoch": 0.7487256409930466, "grad_norm": 0.78125, "learning_rate": 4.215384615384616e-06, "loss": 1.622, "step": 117800 }, { "epoch": 0.7493612315202054, "grad_norm": 1.0546875, "learning_rate": 4.210256410256411e-06, "loss": 1.6162, "step": 117900 }, { "epoch": 0.7499968220473642, "grad_norm": 1.1640625, "learning_rate": 4.2051282051282055e-06, "loss": 1.6305, "step": 118000 }, { "epoch": 0.750632412574523, "grad_norm": 0.97265625, "learning_rate": 4.2000000000000004e-06, "loss": 1.6306, "step": 118100 }, { "epoch": 0.7512680031016817, "grad_norm": 0.8359375, "learning_rate": 4.194871794871795e-06, "loss": 1.627, "step": 118200 }, { "epoch": 0.7519035936288405, "grad_norm": 0.81640625, "learning_rate": 4.18974358974359e-06, "loss": 1.6304, "step": 118300 }, { "epoch": 0.7525391841559993, "grad_norm": 1.15625, "learning_rate": 4.184615384615385e-06, "loss": 1.6091, "step": 118400 }, { "epoch": 0.7531747746831581, "grad_norm": 0.87109375, "learning_rate": 4.17948717948718e-06, "loss": 1.6114, "step": 118500 }, { "epoch": 0.7538103652103169, "grad_norm": 0.8515625, "learning_rate": 4.174358974358975e-06, "loss": 1.6268, "step": 118600 }, { "epoch": 0.7544459557374756, "grad_norm": 0.69921875, "learning_rate": 4.169230769230769e-06, "loss": 1.6143, "step": 118700 }, { "epoch": 0.7550815462646344, "grad_norm": 0.69140625, "learning_rate": 4.164102564102565e-06, "loss": 1.6197, "step": 118800 }, { "epoch": 0.7557171367917933, "grad_norm": 1.1484375, "learning_rate": 4.158974358974359e-06, "loss": 1.6161, "step": 118900 }, { "epoch": 0.7563527273189521, "grad_norm": 0.8984375, "learning_rate": 4.1538461538461545e-06, "loss": 1.6146, "step": 119000 }, { "epoch": 0.7569883178461109, "grad_norm": 0.83203125, "learning_rate": 4.1487179487179494e-06, "loss": 1.6216, "step": 119100 }, { "epoch": 0.7576239083732696, "grad_norm": 1.0234375, "learning_rate": 4.1435897435897435e-06, "loss": 1.6253, "step": 119200 }, { "epoch": 0.7582594989004284, "grad_norm": 0.71484375, "learning_rate": 4.138461538461539e-06, "loss": 1.6314, "step": 119300 }, { "epoch": 0.7588950894275872, "grad_norm": 0.79296875, "learning_rate": 4.133333333333333e-06, "loss": 1.6238, "step": 119400 }, { "epoch": 0.759530679954746, "grad_norm": 0.6953125, "learning_rate": 4.128205128205128e-06, "loss": 1.6102, "step": 119500 }, { "epoch": 0.7601662704819048, "grad_norm": 0.96484375, "learning_rate": 4.123076923076924e-06, "loss": 1.6004, "step": 119600 }, { "epoch": 0.7608018610090636, "grad_norm": 1.15625, "learning_rate": 4.117948717948718e-06, "loss": 1.618, "step": 119700 }, { "epoch": 0.7614374515362223, "grad_norm": 0.77734375, "learning_rate": 4.112820512820514e-06, "loss": 1.6198, "step": 119800 }, { "epoch": 0.7620730420633811, "grad_norm": 0.85546875, "learning_rate": 4.107692307692308e-06, "loss": 1.6105, "step": 119900 }, { "epoch": 0.7627086325905399, "grad_norm": 0.57421875, "learning_rate": 4.102564102564103e-06, "loss": 1.61, "step": 120000 }, { "epoch": 0.7633442231176987, "grad_norm": 1.09375, "learning_rate": 4.097435897435898e-06, "loss": 1.6288, "step": 120100 }, { "epoch": 0.7639798136448575, "grad_norm": 0.609375, "learning_rate": 4.0923076923076925e-06, "loss": 1.6215, "step": 120200 }, { "epoch": 0.7646154041720162, "grad_norm": 1.09375, "learning_rate": 4.087179487179487e-06, "loss": 1.6368, "step": 120300 }, { "epoch": 0.765250994699175, "grad_norm": 0.76171875, "learning_rate": 4.082051282051282e-06, "loss": 1.6292, "step": 120400 }, { "epoch": 0.7658865852263338, "grad_norm": 0.71875, "learning_rate": 4.076923076923077e-06, "loss": 1.6237, "step": 120500 }, { "epoch": 0.7665221757534926, "grad_norm": 0.98046875, "learning_rate": 4.071794871794872e-06, "loss": 1.6285, "step": 120600 }, { "epoch": 0.7671577662806514, "grad_norm": 0.83203125, "learning_rate": 4.066666666666667e-06, "loss": 1.6288, "step": 120700 }, { "epoch": 0.7677933568078101, "grad_norm": 0.87890625, "learning_rate": 4.061538461538462e-06, "loss": 1.634, "step": 120800 }, { "epoch": 0.7684289473349689, "grad_norm": 1.0078125, "learning_rate": 4.056410256410257e-06, "loss": 1.6295, "step": 120900 }, { "epoch": 0.7690645378621277, "grad_norm": 0.87890625, "learning_rate": 4.051282051282052e-06, "loss": 1.6324, "step": 121000 }, { "epoch": 0.7697001283892865, "grad_norm": 0.984375, "learning_rate": 4.0461538461538466e-06, "loss": 1.6293, "step": 121100 }, { "epoch": 0.7703357189164453, "grad_norm": 0.86328125, "learning_rate": 4.0410256410256415e-06, "loss": 1.6165, "step": 121200 }, { "epoch": 0.770971309443604, "grad_norm": 0.71875, "learning_rate": 4.035897435897436e-06, "loss": 1.6188, "step": 121300 }, { "epoch": 0.7716068999707628, "grad_norm": 0.95703125, "learning_rate": 4.030769230769231e-06, "loss": 1.6288, "step": 121400 }, { "epoch": 0.7722424904979216, "grad_norm": 0.7265625, "learning_rate": 4.025641025641026e-06, "loss": 1.6196, "step": 121500 }, { "epoch": 0.7728780810250804, "grad_norm": 1.1171875, "learning_rate": 4.020512820512821e-06, "loss": 1.6457, "step": 121600 }, { "epoch": 0.7735136715522392, "grad_norm": 0.86328125, "learning_rate": 4.015384615384615e-06, "loss": 1.6246, "step": 121700 }, { "epoch": 0.7741492620793979, "grad_norm": 0.8359375, "learning_rate": 4.010256410256411e-06, "loss": 1.6151, "step": 121800 }, { "epoch": 0.7747848526065567, "grad_norm": 1.0078125, "learning_rate": 4.005128205128205e-06, "loss": 1.6346, "step": 121900 }, { "epoch": 0.7754204431337155, "grad_norm": 0.86328125, "learning_rate": 4.000000000000001e-06, "loss": 1.6198, "step": 122000 }, { "epoch": 0.7760560336608743, "grad_norm": 1.171875, "learning_rate": 3.9948717948717956e-06, "loss": 1.6082, "step": 122100 }, { "epoch": 0.7766916241880331, "grad_norm": 0.84765625, "learning_rate": 3.98974358974359e-06, "loss": 1.5981, "step": 122200 }, { "epoch": 0.7773272147151918, "grad_norm": 0.66796875, "learning_rate": 3.984615384615385e-06, "loss": 1.6236, "step": 122300 }, { "epoch": 0.7779628052423506, "grad_norm": 0.90625, "learning_rate": 3.979487179487179e-06, "loss": 1.6074, "step": 122400 }, { "epoch": 0.7785983957695094, "grad_norm": 0.90625, "learning_rate": 3.974358974358974e-06, "loss": 1.6149, "step": 122500 }, { "epoch": 0.7792339862966682, "grad_norm": 0.96875, "learning_rate": 3.96923076923077e-06, "loss": 1.6157, "step": 122600 }, { "epoch": 0.7798695768238271, "grad_norm": 0.95703125, "learning_rate": 3.964102564102564e-06, "loss": 1.6356, "step": 122700 }, { "epoch": 0.7805051673509859, "grad_norm": 0.87890625, "learning_rate": 3.95897435897436e-06, "loss": 1.6314, "step": 122800 }, { "epoch": 0.7811407578781446, "grad_norm": 0.9609375, "learning_rate": 3.953846153846154e-06, "loss": 1.6252, "step": 122900 }, { "epoch": 0.7817763484053034, "grad_norm": 1.1015625, "learning_rate": 3.948717948717949e-06, "loss": 1.6209, "step": 123000 }, { "epoch": 0.7824119389324622, "grad_norm": 0.8671875, "learning_rate": 3.943589743589744e-06, "loss": 1.6145, "step": 123100 }, { "epoch": 0.783047529459621, "grad_norm": 1.1015625, "learning_rate": 3.938461538461539e-06, "loss": 1.6273, "step": 123200 }, { "epoch": 0.7836831199867798, "grad_norm": 0.90625, "learning_rate": 3.9333333333333335e-06, "loss": 1.6175, "step": 123300 }, { "epoch": 0.7843187105139385, "grad_norm": 0.9765625, "learning_rate": 3.928205128205128e-06, "loss": 1.6289, "step": 123400 }, { "epoch": 0.7849543010410973, "grad_norm": 0.828125, "learning_rate": 3.923076923076923e-06, "loss": 1.6126, "step": 123500 }, { "epoch": 0.7855898915682561, "grad_norm": 0.703125, "learning_rate": 3.917948717948718e-06, "loss": 1.6222, "step": 123600 }, { "epoch": 0.7862254820954149, "grad_norm": 0.87890625, "learning_rate": 3.912820512820513e-06, "loss": 1.6176, "step": 123700 }, { "epoch": 0.7868610726225737, "grad_norm": 0.9921875, "learning_rate": 3.907692307692308e-06, "loss": 1.626, "step": 123800 }, { "epoch": 0.7874966631497324, "grad_norm": 0.8046875, "learning_rate": 3.902564102564103e-06, "loss": 1.6348, "step": 123900 }, { "epoch": 0.7881322536768912, "grad_norm": 0.77734375, "learning_rate": 3.897435897435898e-06, "loss": 1.6156, "step": 124000 }, { "epoch": 0.78876784420405, "grad_norm": 0.71875, "learning_rate": 3.892307692307693e-06, "loss": 1.6132, "step": 124100 }, { "epoch": 0.7894034347312088, "grad_norm": 0.99609375, "learning_rate": 3.887179487179488e-06, "loss": 1.6178, "step": 124200 }, { "epoch": 0.7900390252583676, "grad_norm": 0.87890625, "learning_rate": 3.8820512820512825e-06, "loss": 1.6131, "step": 124300 }, { "epoch": 0.7906746157855263, "grad_norm": 0.81640625, "learning_rate": 3.876923076923077e-06, "loss": 1.6157, "step": 124400 }, { "epoch": 0.7913102063126851, "grad_norm": 1.0859375, "learning_rate": 3.871794871794872e-06, "loss": 1.6223, "step": 124500 }, { "epoch": 0.7919457968398439, "grad_norm": 1.4296875, "learning_rate": 3.866666666666667e-06, "loss": 1.6285, "step": 124600 }, { "epoch": 0.7925813873670027, "grad_norm": 0.97265625, "learning_rate": 3.861538461538462e-06, "loss": 1.6237, "step": 124700 }, { "epoch": 0.7932169778941615, "grad_norm": 0.9140625, "learning_rate": 3.856410256410257e-06, "loss": 1.6423, "step": 124800 }, { "epoch": 0.7938525684213202, "grad_norm": 0.6953125, "learning_rate": 3.851282051282051e-06, "loss": 1.6336, "step": 124900 }, { "epoch": 0.794488158948479, "grad_norm": 0.96484375, "learning_rate": 3.846153846153847e-06, "loss": 1.6335, "step": 125000 }, { "epoch": 0.7951237494756378, "grad_norm": 0.77734375, "learning_rate": 3.841025641025642e-06, "loss": 1.614, "step": 125100 }, { "epoch": 0.7957593400027966, "grad_norm": 0.95703125, "learning_rate": 3.835897435897436e-06, "loss": 1.6191, "step": 125200 }, { "epoch": 0.7963949305299554, "grad_norm": 0.9453125, "learning_rate": 3.8307692307692315e-06, "loss": 1.6222, "step": 125300 }, { "epoch": 0.7970305210571141, "grad_norm": 0.61328125, "learning_rate": 3.8256410256410255e-06, "loss": 1.6125, "step": 125400 }, { "epoch": 0.7976661115842729, "grad_norm": 1.1171875, "learning_rate": 3.8205128205128204e-06, "loss": 1.6322, "step": 125500 }, { "epoch": 0.7983017021114317, "grad_norm": 0.78125, "learning_rate": 3.815384615384616e-06, "loss": 1.6159, "step": 125600 }, { "epoch": 0.7989372926385905, "grad_norm": 0.83203125, "learning_rate": 3.8102564102564107e-06, "loss": 1.6374, "step": 125700 }, { "epoch": 0.7995728831657493, "grad_norm": 0.8203125, "learning_rate": 3.8051282051282056e-06, "loss": 1.618, "step": 125800 }, { "epoch": 0.800208473692908, "grad_norm": 0.69140625, "learning_rate": 3.8000000000000005e-06, "loss": 1.6325, "step": 125900 }, { "epoch": 0.8008440642200668, "grad_norm": 0.91796875, "learning_rate": 3.794871794871795e-06, "loss": 1.6121, "step": 126000 }, { "epoch": 0.8014796547472256, "grad_norm": 1.4609375, "learning_rate": 3.7897435897435903e-06, "loss": 1.6173, "step": 126100 }, { "epoch": 0.8021152452743844, "grad_norm": 1.1640625, "learning_rate": 3.7846153846153847e-06, "loss": 1.6166, "step": 126200 }, { "epoch": 0.8027508358015432, "grad_norm": 1.0078125, "learning_rate": 3.7794871794871796e-06, "loss": 1.6234, "step": 126300 }, { "epoch": 0.803386426328702, "grad_norm": 1.015625, "learning_rate": 3.774358974358975e-06, "loss": 1.6333, "step": 126400 }, { "epoch": 0.8040220168558608, "grad_norm": 1.078125, "learning_rate": 3.7692307692307694e-06, "loss": 1.6345, "step": 126500 }, { "epoch": 0.8046576073830196, "grad_norm": 0.80078125, "learning_rate": 3.7641025641025643e-06, "loss": 1.6255, "step": 126600 }, { "epoch": 0.8052931979101784, "grad_norm": 0.7421875, "learning_rate": 3.7589743589743592e-06, "loss": 1.6247, "step": 126700 }, { "epoch": 0.8059287884373372, "grad_norm": 1.3515625, "learning_rate": 3.753846153846154e-06, "loss": 1.6234, "step": 126800 }, { "epoch": 0.806564378964496, "grad_norm": 1.203125, "learning_rate": 3.7487179487179495e-06, "loss": 1.6475, "step": 126900 }, { "epoch": 0.8071999694916547, "grad_norm": 0.7109375, "learning_rate": 3.743589743589744e-06, "loss": 1.6282, "step": 127000 }, { "epoch": 0.8078355600188135, "grad_norm": 1.03125, "learning_rate": 3.7384615384615384e-06, "loss": 1.6238, "step": 127100 }, { "epoch": 0.8084711505459723, "grad_norm": 1.046875, "learning_rate": 3.7333333333333337e-06, "loss": 1.6218, "step": 127200 }, { "epoch": 0.8091067410731311, "grad_norm": 0.91015625, "learning_rate": 3.7282051282051286e-06, "loss": 1.6254, "step": 127300 }, { "epoch": 0.8097423316002899, "grad_norm": 0.94921875, "learning_rate": 3.723076923076923e-06, "loss": 1.6161, "step": 127400 }, { "epoch": 0.8103779221274486, "grad_norm": 0.61328125, "learning_rate": 3.7179487179487184e-06, "loss": 1.6167, "step": 127500 }, { "epoch": 0.8110135126546074, "grad_norm": 1.2578125, "learning_rate": 3.712820512820513e-06, "loss": 1.6153, "step": 127600 }, { "epoch": 0.8116491031817662, "grad_norm": 0.84765625, "learning_rate": 3.7076923076923082e-06, "loss": 1.6216, "step": 127700 }, { "epoch": 0.812284693708925, "grad_norm": 0.99609375, "learning_rate": 3.702564102564103e-06, "loss": 1.6247, "step": 127800 }, { "epoch": 0.8129202842360838, "grad_norm": 0.93359375, "learning_rate": 3.6974358974358976e-06, "loss": 1.6345, "step": 127900 }, { "epoch": 0.8135558747632425, "grad_norm": 0.94140625, "learning_rate": 3.692307692307693e-06, "loss": 1.6206, "step": 128000 }, { "epoch": 0.8141914652904013, "grad_norm": 1.0078125, "learning_rate": 3.6871794871794874e-06, "loss": 1.626, "step": 128100 }, { "epoch": 0.8148270558175601, "grad_norm": 1.484375, "learning_rate": 3.6820512820512823e-06, "loss": 1.6217, "step": 128200 }, { "epoch": 0.8154626463447189, "grad_norm": 0.8515625, "learning_rate": 3.676923076923077e-06, "loss": 1.6246, "step": 128300 }, { "epoch": 0.8160982368718777, "grad_norm": 0.93359375, "learning_rate": 3.671794871794872e-06, "loss": 1.6156, "step": 128400 }, { "epoch": 0.8167338273990364, "grad_norm": 0.9921875, "learning_rate": 3.6666666666666666e-06, "loss": 1.6211, "step": 128500 }, { "epoch": 0.8173694179261952, "grad_norm": 1.15625, "learning_rate": 3.661538461538462e-06, "loss": 1.6198, "step": 128600 }, { "epoch": 0.818005008453354, "grad_norm": 0.90234375, "learning_rate": 3.656410256410257e-06, "loss": 1.6179, "step": 128700 }, { "epoch": 0.8186405989805128, "grad_norm": 0.890625, "learning_rate": 3.6512820512820517e-06, "loss": 1.6355, "step": 128800 }, { "epoch": 0.8192761895076716, "grad_norm": 0.83984375, "learning_rate": 3.6461538461538466e-06, "loss": 1.6277, "step": 128900 }, { "epoch": 0.8199117800348303, "grad_norm": 0.8046875, "learning_rate": 3.641025641025641e-06, "loss": 1.6202, "step": 129000 }, { "epoch": 0.8205473705619891, "grad_norm": 0.87890625, "learning_rate": 3.6358974358974364e-06, "loss": 1.6254, "step": 129100 }, { "epoch": 0.8211829610891479, "grad_norm": 0.75390625, "learning_rate": 3.630769230769231e-06, "loss": 1.6199, "step": 129200 }, { "epoch": 0.8218185516163067, "grad_norm": 0.9375, "learning_rate": 3.6256410256410258e-06, "loss": 1.615, "step": 129300 }, { "epoch": 0.8224541421434655, "grad_norm": 0.85546875, "learning_rate": 3.620512820512821e-06, "loss": 1.6212, "step": 129400 }, { "epoch": 0.8230897326706242, "grad_norm": 0.98046875, "learning_rate": 3.6153846153846156e-06, "loss": 1.6211, "step": 129500 }, { "epoch": 0.823725323197783, "grad_norm": 1.0625, "learning_rate": 3.610256410256411e-06, "loss": 1.6148, "step": 129600 }, { "epoch": 0.8243609137249418, "grad_norm": 0.8828125, "learning_rate": 3.6051282051282054e-06, "loss": 1.6238, "step": 129700 }, { "epoch": 0.8249965042521006, "grad_norm": 0.66796875, "learning_rate": 3.6000000000000003e-06, "loss": 1.6315, "step": 129800 }, { "epoch": 0.8256320947792594, "grad_norm": 0.8515625, "learning_rate": 3.5948717948717956e-06, "loss": 1.6074, "step": 129900 }, { "epoch": 0.8262676853064181, "grad_norm": 0.94921875, "learning_rate": 3.58974358974359e-06, "loss": 1.6063, "step": 130000 }, { "epoch": 0.8269032758335769, "grad_norm": 1.1015625, "learning_rate": 3.5846153846153845e-06, "loss": 1.6152, "step": 130100 }, { "epoch": 0.8275388663607358, "grad_norm": 0.765625, "learning_rate": 3.57948717948718e-06, "loss": 1.5949, "step": 130200 }, { "epoch": 0.8281744568878946, "grad_norm": 0.77734375, "learning_rate": 3.5743589743589748e-06, "loss": 1.6273, "step": 130300 }, { "epoch": 0.8288100474150534, "grad_norm": 0.671875, "learning_rate": 3.5692307692307692e-06, "loss": 1.6031, "step": 130400 }, { "epoch": 0.8294456379422122, "grad_norm": 0.8828125, "learning_rate": 3.5641025641025646e-06, "loss": 1.625, "step": 130500 }, { "epoch": 0.8300812284693709, "grad_norm": 1.0390625, "learning_rate": 3.558974358974359e-06, "loss": 1.6359, "step": 130600 }, { "epoch": 0.8307168189965297, "grad_norm": 0.85546875, "learning_rate": 3.5538461538461544e-06, "loss": 1.626, "step": 130700 }, { "epoch": 0.8313524095236885, "grad_norm": 0.94921875, "learning_rate": 3.5487179487179493e-06, "loss": 1.6101, "step": 130800 }, { "epoch": 0.8319880000508473, "grad_norm": 0.95703125, "learning_rate": 3.5435897435897437e-06, "loss": 1.6109, "step": 130900 }, { "epoch": 0.8326235905780061, "grad_norm": 0.7421875, "learning_rate": 3.538461538461539e-06, "loss": 1.6194, "step": 131000 }, { "epoch": 0.8332591811051648, "grad_norm": 0.5, "learning_rate": 3.5333333333333335e-06, "loss": 1.6146, "step": 131100 }, { "epoch": 0.8338947716323236, "grad_norm": 0.93359375, "learning_rate": 3.5282051282051284e-06, "loss": 1.6276, "step": 131200 }, { "epoch": 0.8345303621594824, "grad_norm": 1.265625, "learning_rate": 3.5230769230769233e-06, "loss": 1.6218, "step": 131300 }, { "epoch": 0.8351659526866412, "grad_norm": 0.9921875, "learning_rate": 3.5179487179487182e-06, "loss": 1.6442, "step": 131400 }, { "epoch": 0.8358015432138, "grad_norm": 0.77734375, "learning_rate": 3.5128205128205127e-06, "loss": 1.6282, "step": 131500 }, { "epoch": 0.8364371337409587, "grad_norm": 1.0625, "learning_rate": 3.507692307692308e-06, "loss": 1.6294, "step": 131600 }, { "epoch": 0.8370727242681175, "grad_norm": 0.9609375, "learning_rate": 3.502564102564103e-06, "loss": 1.6143, "step": 131700 }, { "epoch": 0.8377083147952763, "grad_norm": 1.0234375, "learning_rate": 3.497435897435898e-06, "loss": 1.6447, "step": 131800 }, { "epoch": 0.8383439053224351, "grad_norm": 1.015625, "learning_rate": 3.4923076923076927e-06, "loss": 1.6045, "step": 131900 }, { "epoch": 0.8389794958495939, "grad_norm": 0.6640625, "learning_rate": 3.487179487179487e-06, "loss": 1.6307, "step": 132000 }, { "epoch": 0.8396150863767526, "grad_norm": 0.859375, "learning_rate": 3.4820512820512825e-06, "loss": 1.6028, "step": 132100 }, { "epoch": 0.8402506769039114, "grad_norm": 0.8125, "learning_rate": 3.476923076923077e-06, "loss": 1.6198, "step": 132200 }, { "epoch": 0.8408862674310702, "grad_norm": 0.71875, "learning_rate": 3.471794871794872e-06, "loss": 1.634, "step": 132300 }, { "epoch": 0.841521857958229, "grad_norm": 0.84765625, "learning_rate": 3.4666666666666672e-06, "loss": 1.6291, "step": 132400 }, { "epoch": 0.8421574484853878, "grad_norm": 0.65234375, "learning_rate": 3.4615384615384617e-06, "loss": 1.631, "step": 132500 }, { "epoch": 0.8427930390125465, "grad_norm": 0.94921875, "learning_rate": 3.456410256410257e-06, "loss": 1.6228, "step": 132600 }, { "epoch": 0.8434286295397053, "grad_norm": 0.7265625, "learning_rate": 3.4512820512820515e-06, "loss": 1.6166, "step": 132700 }, { "epoch": 0.8440642200668641, "grad_norm": 0.9921875, "learning_rate": 3.4461538461538464e-06, "loss": 1.6259, "step": 132800 }, { "epoch": 0.8446998105940229, "grad_norm": 0.78515625, "learning_rate": 3.4410256410256417e-06, "loss": 1.6147, "step": 132900 }, { "epoch": 0.8453354011211817, "grad_norm": 0.8046875, "learning_rate": 3.435897435897436e-06, "loss": 1.6124, "step": 133000 }, { "epoch": 0.8459709916483404, "grad_norm": 0.79296875, "learning_rate": 3.4307692307692307e-06, "loss": 1.6137, "step": 133100 }, { "epoch": 0.8466065821754992, "grad_norm": 0.9765625, "learning_rate": 3.425641025641026e-06, "loss": 1.622, "step": 133200 }, { "epoch": 0.847242172702658, "grad_norm": 0.68359375, "learning_rate": 3.420512820512821e-06, "loss": 1.6324, "step": 133300 }, { "epoch": 0.8478777632298168, "grad_norm": 0.84765625, "learning_rate": 3.4153846153846154e-06, "loss": 1.6176, "step": 133400 }, { "epoch": 0.8485133537569756, "grad_norm": 0.7734375, "learning_rate": 3.4102564102564107e-06, "loss": 1.6249, "step": 133500 }, { "epoch": 0.8491489442841343, "grad_norm": 0.78515625, "learning_rate": 3.405128205128205e-06, "loss": 1.635, "step": 133600 }, { "epoch": 0.8497845348112931, "grad_norm": 0.90234375, "learning_rate": 3.4000000000000005e-06, "loss": 1.6369, "step": 133700 }, { "epoch": 0.8504201253384519, "grad_norm": 0.953125, "learning_rate": 3.3948717948717954e-06, "loss": 1.6082, "step": 133800 }, { "epoch": 0.8510557158656107, "grad_norm": 0.80859375, "learning_rate": 3.38974358974359e-06, "loss": 1.6106, "step": 133900 }, { "epoch": 0.8516913063927696, "grad_norm": 1.3203125, "learning_rate": 3.384615384615385e-06, "loss": 1.6174, "step": 134000 }, { "epoch": 0.8523268969199284, "grad_norm": 0.83984375, "learning_rate": 3.3794871794871797e-06, "loss": 1.6293, "step": 134100 }, { "epoch": 0.8529624874470871, "grad_norm": 0.83984375, "learning_rate": 3.3743589743589746e-06, "loss": 1.6205, "step": 134200 }, { "epoch": 0.8535980779742459, "grad_norm": 1.03125, "learning_rate": 3.3692307692307695e-06, "loss": 1.611, "step": 134300 }, { "epoch": 0.8542336685014047, "grad_norm": 0.640625, "learning_rate": 3.3641025641025644e-06, "loss": 1.648, "step": 134400 }, { "epoch": 0.8548692590285635, "grad_norm": 1.203125, "learning_rate": 3.358974358974359e-06, "loss": 1.621, "step": 134500 }, { "epoch": 0.8555048495557223, "grad_norm": 1.296875, "learning_rate": 3.353846153846154e-06, "loss": 1.6387, "step": 134600 }, { "epoch": 0.856140440082881, "grad_norm": 1.1171875, "learning_rate": 3.348717948717949e-06, "loss": 1.6229, "step": 134700 }, { "epoch": 0.8567760306100398, "grad_norm": 1.0078125, "learning_rate": 3.343589743589744e-06, "loss": 1.616, "step": 134800 }, { "epoch": 0.8574116211371986, "grad_norm": 1.25, "learning_rate": 3.338461538461539e-06, "loss": 1.6196, "step": 134900 }, { "epoch": 0.8580472116643574, "grad_norm": 1.0234375, "learning_rate": 3.3333333333333333e-06, "loss": 1.6187, "step": 135000 }, { "epoch": 0.8586828021915162, "grad_norm": 0.9140625, "learning_rate": 3.3282051282051286e-06, "loss": 1.6255, "step": 135100 }, { "epoch": 0.8593183927186749, "grad_norm": 0.765625, "learning_rate": 3.323076923076923e-06, "loss": 1.6298, "step": 135200 }, { "epoch": 0.8599539832458337, "grad_norm": 1.0625, "learning_rate": 3.317948717948718e-06, "loss": 1.6271, "step": 135300 }, { "epoch": 0.8605895737729925, "grad_norm": 0.66796875, "learning_rate": 3.3128205128205133e-06, "loss": 1.6132, "step": 135400 }, { "epoch": 0.8612251643001513, "grad_norm": 0.83203125, "learning_rate": 3.307692307692308e-06, "loss": 1.6222, "step": 135500 }, { "epoch": 0.8618607548273101, "grad_norm": 1.1953125, "learning_rate": 3.302564102564103e-06, "loss": 1.6203, "step": 135600 }, { "epoch": 0.8624963453544688, "grad_norm": 1.0625, "learning_rate": 3.2974358974358976e-06, "loss": 1.6274, "step": 135700 }, { "epoch": 0.8631319358816276, "grad_norm": 1.03125, "learning_rate": 3.2923076923076925e-06, "loss": 1.6089, "step": 135800 }, { "epoch": 0.8637675264087864, "grad_norm": 0.8515625, "learning_rate": 3.287179487179488e-06, "loss": 1.621, "step": 135900 }, { "epoch": 0.8644031169359452, "grad_norm": 1.09375, "learning_rate": 3.2820512820512823e-06, "loss": 1.604, "step": 136000 }, { "epoch": 0.865038707463104, "grad_norm": 0.828125, "learning_rate": 3.276923076923077e-06, "loss": 1.6205, "step": 136100 }, { "epoch": 0.8656742979902627, "grad_norm": 0.9375, "learning_rate": 3.271794871794872e-06, "loss": 1.6287, "step": 136200 }, { "epoch": 0.8663098885174215, "grad_norm": 1.2890625, "learning_rate": 3.266666666666667e-06, "loss": 1.6314, "step": 136300 }, { "epoch": 0.8669454790445803, "grad_norm": 0.90625, "learning_rate": 3.2615384615384615e-06, "loss": 1.6229, "step": 136400 }, { "epoch": 0.8675810695717391, "grad_norm": 0.9375, "learning_rate": 3.256410256410257e-06, "loss": 1.6234, "step": 136500 }, { "epoch": 0.8682166600988979, "grad_norm": 0.7734375, "learning_rate": 3.2512820512820513e-06, "loss": 1.6114, "step": 136600 }, { "epoch": 0.8688522506260566, "grad_norm": 0.60546875, "learning_rate": 3.2461538461538466e-06, "loss": 1.6299, "step": 136700 }, { "epoch": 0.8694878411532154, "grad_norm": 0.859375, "learning_rate": 3.2410256410256415e-06, "loss": 1.5946, "step": 136800 }, { "epoch": 0.8701234316803742, "grad_norm": 0.921875, "learning_rate": 3.235897435897436e-06, "loss": 1.6175, "step": 136900 }, { "epoch": 0.870759022207533, "grad_norm": 0.95703125, "learning_rate": 3.2307692307692313e-06, "loss": 1.616, "step": 137000 }, { "epoch": 0.8713946127346918, "grad_norm": 1.328125, "learning_rate": 3.2256410256410258e-06, "loss": 1.6083, "step": 137100 }, { "epoch": 0.8720302032618505, "grad_norm": 1.0625, "learning_rate": 3.2205128205128207e-06, "loss": 1.6048, "step": 137200 }, { "epoch": 0.8726657937890093, "grad_norm": 1.015625, "learning_rate": 3.2153846153846156e-06, "loss": 1.6351, "step": 137300 }, { "epoch": 0.8733013843161681, "grad_norm": 1.0703125, "learning_rate": 3.2102564102564105e-06, "loss": 1.6187, "step": 137400 }, { "epoch": 0.8739369748433269, "grad_norm": 0.7421875, "learning_rate": 3.205128205128206e-06, "loss": 1.616, "step": 137500 }, { "epoch": 0.8745725653704857, "grad_norm": 0.88671875, "learning_rate": 3.2000000000000003e-06, "loss": 1.6311, "step": 137600 }, { "epoch": 0.8752081558976444, "grad_norm": 0.76171875, "learning_rate": 3.194871794871795e-06, "loss": 1.6204, "step": 137700 }, { "epoch": 0.8758437464248033, "grad_norm": 1.1953125, "learning_rate": 3.18974358974359e-06, "loss": 1.6455, "step": 137800 }, { "epoch": 0.8764793369519621, "grad_norm": 0.73046875, "learning_rate": 3.184615384615385e-06, "loss": 1.6207, "step": 137900 }, { "epoch": 0.8771149274791209, "grad_norm": 0.9375, "learning_rate": 3.1794871794871795e-06, "loss": 1.6087, "step": 138000 }, { "epoch": 0.8777505180062797, "grad_norm": 1.1328125, "learning_rate": 3.1743589743589748e-06, "loss": 1.6088, "step": 138100 }, { "epoch": 0.8783861085334385, "grad_norm": 0.87890625, "learning_rate": 3.1692307692307693e-06, "loss": 1.6146, "step": 138200 }, { "epoch": 0.8790216990605972, "grad_norm": 0.87890625, "learning_rate": 3.164102564102564e-06, "loss": 1.609, "step": 138300 }, { "epoch": 0.879657289587756, "grad_norm": 0.84375, "learning_rate": 3.1589743589743595e-06, "loss": 1.6187, "step": 138400 }, { "epoch": 0.8802928801149148, "grad_norm": 1.0078125, "learning_rate": 3.153846153846154e-06, "loss": 1.6228, "step": 138500 }, { "epoch": 0.8809284706420736, "grad_norm": 0.87890625, "learning_rate": 3.1487179487179493e-06, "loss": 1.6435, "step": 138600 }, { "epoch": 0.8815640611692324, "grad_norm": 1.3203125, "learning_rate": 3.1435897435897437e-06, "loss": 1.6138, "step": 138700 }, { "epoch": 0.8821996516963911, "grad_norm": 0.8359375, "learning_rate": 3.1384615384615386e-06, "loss": 1.619, "step": 138800 }, { "epoch": 0.8828352422235499, "grad_norm": 0.8203125, "learning_rate": 3.133333333333334e-06, "loss": 1.629, "step": 138900 }, { "epoch": 0.8834708327507087, "grad_norm": 0.9296875, "learning_rate": 3.1282051282051284e-06, "loss": 1.6192, "step": 139000 }, { "epoch": 0.8841064232778675, "grad_norm": 0.7421875, "learning_rate": 3.123076923076923e-06, "loss": 1.6226, "step": 139100 }, { "epoch": 0.8847420138050263, "grad_norm": 0.92578125, "learning_rate": 3.1179487179487182e-06, "loss": 1.6317, "step": 139200 }, { "epoch": 0.885377604332185, "grad_norm": 0.80078125, "learning_rate": 3.112820512820513e-06, "loss": 1.6209, "step": 139300 }, { "epoch": 0.8860131948593438, "grad_norm": 0.98828125, "learning_rate": 3.1076923076923076e-06, "loss": 1.6208, "step": 139400 }, { "epoch": 0.8866487853865026, "grad_norm": 0.88671875, "learning_rate": 3.102564102564103e-06, "loss": 1.626, "step": 139500 }, { "epoch": 0.8872843759136614, "grad_norm": 0.76953125, "learning_rate": 3.0974358974358974e-06, "loss": 1.6158, "step": 139600 }, { "epoch": 0.8879199664408202, "grad_norm": 0.8046875, "learning_rate": 3.0923076923076927e-06, "loss": 1.6279, "step": 139700 }, { "epoch": 0.8885555569679789, "grad_norm": 1.4140625, "learning_rate": 3.0871794871794876e-06, "loss": 1.628, "step": 139800 }, { "epoch": 0.8891911474951377, "grad_norm": 0.9375, "learning_rate": 3.082051282051282e-06, "loss": 1.6184, "step": 139900 }, { "epoch": 0.8898267380222965, "grad_norm": 1.1796875, "learning_rate": 3.0769230769230774e-06, "loss": 1.6352, "step": 140000 }, { "epoch": 0.8904623285494553, "grad_norm": 1.203125, "learning_rate": 3.071794871794872e-06, "loss": 1.6027, "step": 140100 }, { "epoch": 0.8910979190766141, "grad_norm": 1.7421875, "learning_rate": 3.066666666666667e-06, "loss": 1.6102, "step": 140200 }, { "epoch": 0.8917335096037728, "grad_norm": 0.83984375, "learning_rate": 3.0615384615384617e-06, "loss": 1.6218, "step": 140300 }, { "epoch": 0.8923691001309316, "grad_norm": 0.95703125, "learning_rate": 3.0564102564102566e-06, "loss": 1.6068, "step": 140400 }, { "epoch": 0.8930046906580904, "grad_norm": 0.75, "learning_rate": 3.051282051282052e-06, "loss": 1.6191, "step": 140500 }, { "epoch": 0.8936402811852492, "grad_norm": 0.87109375, "learning_rate": 3.0461538461538464e-06, "loss": 1.6149, "step": 140600 }, { "epoch": 0.894275871712408, "grad_norm": 0.73828125, "learning_rate": 3.0410256410256413e-06, "loss": 1.6343, "step": 140700 }, { "epoch": 0.8949114622395667, "grad_norm": 1.1640625, "learning_rate": 3.035897435897436e-06, "loss": 1.6081, "step": 140800 }, { "epoch": 0.8955470527667255, "grad_norm": 0.6484375, "learning_rate": 3.030769230769231e-06, "loss": 1.6139, "step": 140900 }, { "epoch": 0.8961826432938843, "grad_norm": 0.9375, "learning_rate": 3.0256410256410256e-06, "loss": 1.6054, "step": 141000 }, { "epoch": 0.8968182338210431, "grad_norm": 0.9453125, "learning_rate": 3.020512820512821e-06, "loss": 1.6071, "step": 141100 }, { "epoch": 0.8974538243482019, "grad_norm": 0.83203125, "learning_rate": 3.0153846153846154e-06, "loss": 1.6072, "step": 141200 }, { "epoch": 0.8980894148753606, "grad_norm": 0.6796875, "learning_rate": 3.0102564102564103e-06, "loss": 1.6221, "step": 141300 }, { "epoch": 0.8987250054025194, "grad_norm": 0.875, "learning_rate": 3.0051282051282056e-06, "loss": 1.6284, "step": 141400 }, { "epoch": 0.8993605959296783, "grad_norm": 0.6953125, "learning_rate": 3e-06, "loss": 1.6133, "step": 141500 }, { "epoch": 0.8999961864568371, "grad_norm": 0.75, "learning_rate": 2.9948717948717954e-06, "loss": 1.6361, "step": 141600 }, { "epoch": 0.9006317769839959, "grad_norm": 0.9375, "learning_rate": 2.98974358974359e-06, "loss": 1.613, "step": 141700 }, { "epoch": 0.9012673675111547, "grad_norm": 1.0859375, "learning_rate": 2.9846153846153848e-06, "loss": 1.6331, "step": 141800 }, { "epoch": 0.9019029580383134, "grad_norm": 0.83984375, "learning_rate": 2.97948717948718e-06, "loss": 1.6272, "step": 141900 }, { "epoch": 0.9025385485654722, "grad_norm": 0.70703125, "learning_rate": 2.9743589743589746e-06, "loss": 1.6234, "step": 142000 }, { "epoch": 0.903174139092631, "grad_norm": 0.83984375, "learning_rate": 2.969230769230769e-06, "loss": 1.6222, "step": 142100 }, { "epoch": 0.9038097296197898, "grad_norm": 0.79296875, "learning_rate": 2.9641025641025644e-06, "loss": 1.6143, "step": 142200 }, { "epoch": 0.9044453201469486, "grad_norm": 0.79296875, "learning_rate": 2.9589743589743593e-06, "loss": 1.6141, "step": 142300 }, { "epoch": 0.9050809106741073, "grad_norm": 0.89453125, "learning_rate": 2.953846153846154e-06, "loss": 1.6266, "step": 142400 }, { "epoch": 0.9057165012012661, "grad_norm": 0.79296875, "learning_rate": 2.948717948717949e-06, "loss": 1.6443, "step": 142500 }, { "epoch": 0.9063520917284249, "grad_norm": 0.859375, "learning_rate": 2.9435897435897435e-06, "loss": 1.6212, "step": 142600 }, { "epoch": 0.9069876822555837, "grad_norm": 1.1796875, "learning_rate": 2.938461538461539e-06, "loss": 1.6373, "step": 142700 }, { "epoch": 0.9076232727827425, "grad_norm": 0.703125, "learning_rate": 2.9333333333333338e-06, "loss": 1.6184, "step": 142800 }, { "epoch": 0.9082588633099012, "grad_norm": 0.9140625, "learning_rate": 2.9282051282051282e-06, "loss": 1.6146, "step": 142900 }, { "epoch": 0.90889445383706, "grad_norm": 0.9375, "learning_rate": 2.9230769230769236e-06, "loss": 1.6159, "step": 143000 }, { "epoch": 0.9095300443642188, "grad_norm": 1.0859375, "learning_rate": 2.917948717948718e-06, "loss": 1.6432, "step": 143100 }, { "epoch": 0.9101656348913776, "grad_norm": 0.86328125, "learning_rate": 2.912820512820513e-06, "loss": 1.6271, "step": 143200 }, { "epoch": 0.9108012254185364, "grad_norm": 1.0703125, "learning_rate": 2.907692307692308e-06, "loss": 1.6198, "step": 143300 }, { "epoch": 0.9114368159456951, "grad_norm": 0.84375, "learning_rate": 2.9025641025641027e-06, "loss": 1.638, "step": 143400 }, { "epoch": 0.9120724064728539, "grad_norm": 1.234375, "learning_rate": 2.897435897435898e-06, "loss": 1.6263, "step": 143500 }, { "epoch": 0.9127079970000127, "grad_norm": 1.0078125, "learning_rate": 2.8923076923076925e-06, "loss": 1.6231, "step": 143600 }, { "epoch": 0.9133435875271715, "grad_norm": 0.64453125, "learning_rate": 2.8871794871794874e-06, "loss": 1.6218, "step": 143700 }, { "epoch": 0.9139791780543303, "grad_norm": 0.85546875, "learning_rate": 2.8820512820512823e-06, "loss": 1.6251, "step": 143800 }, { "epoch": 0.914614768581489, "grad_norm": 0.6640625, "learning_rate": 2.8769230769230772e-06, "loss": 1.6289, "step": 143900 }, { "epoch": 0.9152503591086478, "grad_norm": 1.109375, "learning_rate": 2.8717948717948717e-06, "loss": 1.6282, "step": 144000 }, { "epoch": 0.9158859496358066, "grad_norm": 0.890625, "learning_rate": 2.866666666666667e-06, "loss": 1.6281, "step": 144100 }, { "epoch": 0.9165215401629654, "grad_norm": 1.046875, "learning_rate": 2.8615384615384615e-06, "loss": 1.6153, "step": 144200 }, { "epoch": 0.9171571306901242, "grad_norm": 1.0390625, "learning_rate": 2.8564102564102564e-06, "loss": 1.6339, "step": 144300 }, { "epoch": 0.917792721217283, "grad_norm": 0.78515625, "learning_rate": 2.8512820512820517e-06, "loss": 1.6205, "step": 144400 }, { "epoch": 0.9184283117444417, "grad_norm": 1.1875, "learning_rate": 2.846153846153846e-06, "loss": 1.6251, "step": 144500 }, { "epoch": 0.9190639022716005, "grad_norm": 1.015625, "learning_rate": 2.8410256410256415e-06, "loss": 1.6175, "step": 144600 }, { "epoch": 0.9196994927987593, "grad_norm": 1.65625, "learning_rate": 2.835897435897436e-06, "loss": 1.6274, "step": 144700 }, { "epoch": 0.9203350833259181, "grad_norm": 0.85546875, "learning_rate": 2.830769230769231e-06, "loss": 1.6275, "step": 144800 }, { "epoch": 0.9209706738530768, "grad_norm": 0.8359375, "learning_rate": 2.8256410256410262e-06, "loss": 1.6317, "step": 144900 }, { "epoch": 0.9216062643802356, "grad_norm": 1.171875, "learning_rate": 2.8205128205128207e-06, "loss": 1.6217, "step": 145000 }, { "epoch": 0.9222418549073944, "grad_norm": 0.953125, "learning_rate": 2.815384615384615e-06, "loss": 1.6317, "step": 145100 }, { "epoch": 0.9228774454345532, "grad_norm": 0.7421875, "learning_rate": 2.8102564102564105e-06, "loss": 1.6154, "step": 145200 }, { "epoch": 0.9235130359617121, "grad_norm": 0.91015625, "learning_rate": 2.8051282051282054e-06, "loss": 1.6229, "step": 145300 }, { "epoch": 0.9241486264888709, "grad_norm": 0.99609375, "learning_rate": 2.8000000000000003e-06, "loss": 1.6387, "step": 145400 }, { "epoch": 0.9247842170160296, "grad_norm": 1.015625, "learning_rate": 2.794871794871795e-06, "loss": 1.6295, "step": 145500 }, { "epoch": 0.9254198075431884, "grad_norm": 1.140625, "learning_rate": 2.7897435897435897e-06, "loss": 1.5936, "step": 145600 }, { "epoch": 0.9260553980703472, "grad_norm": 1.046875, "learning_rate": 2.784615384615385e-06, "loss": 1.6272, "step": 145700 }, { "epoch": 0.926690988597506, "grad_norm": 1.03125, "learning_rate": 2.77948717948718e-06, "loss": 1.6146, "step": 145800 }, { "epoch": 0.9273265791246648, "grad_norm": 1.015625, "learning_rate": 2.7743589743589744e-06, "loss": 1.6311, "step": 145900 }, { "epoch": 0.9279621696518235, "grad_norm": 0.90234375, "learning_rate": 2.7692307692307697e-06, "loss": 1.6133, "step": 146000 }, { "epoch": 0.9285977601789823, "grad_norm": 0.80078125, "learning_rate": 2.764102564102564e-06, "loss": 1.6253, "step": 146100 }, { "epoch": 0.9292333507061411, "grad_norm": 0.9140625, "learning_rate": 2.758974358974359e-06, "loss": 1.6415, "step": 146200 }, { "epoch": 0.9298689412332999, "grad_norm": 0.859375, "learning_rate": 2.753846153846154e-06, "loss": 1.6187, "step": 146300 }, { "epoch": 0.9305045317604587, "grad_norm": 1.1484375, "learning_rate": 2.748717948717949e-06, "loss": 1.6169, "step": 146400 }, { "epoch": 0.9311401222876174, "grad_norm": 0.640625, "learning_rate": 2.743589743589744e-06, "loss": 1.6248, "step": 146500 }, { "epoch": 0.9317757128147762, "grad_norm": 0.97265625, "learning_rate": 2.7384615384615387e-06, "loss": 1.6086, "step": 146600 }, { "epoch": 0.932411303341935, "grad_norm": 1.046875, "learning_rate": 2.7333333333333336e-06, "loss": 1.6358, "step": 146700 }, { "epoch": 0.9330468938690938, "grad_norm": 0.875, "learning_rate": 2.7282051282051285e-06, "loss": 1.6129, "step": 146800 }, { "epoch": 0.9336824843962526, "grad_norm": 0.87109375, "learning_rate": 2.7230769230769234e-06, "loss": 1.6228, "step": 146900 }, { "epoch": 0.9343180749234113, "grad_norm": 0.90625, "learning_rate": 2.717948717948718e-06, "loss": 1.623, "step": 147000 }, { "epoch": 0.9349536654505701, "grad_norm": 0.6484375, "learning_rate": 2.712820512820513e-06, "loss": 1.633, "step": 147100 }, { "epoch": 0.9355892559777289, "grad_norm": 0.703125, "learning_rate": 2.7076923076923076e-06, "loss": 1.6282, "step": 147200 }, { "epoch": 0.9362248465048877, "grad_norm": 0.88671875, "learning_rate": 2.7025641025641025e-06, "loss": 1.6231, "step": 147300 }, { "epoch": 0.9368604370320465, "grad_norm": 1.21875, "learning_rate": 2.697435897435898e-06, "loss": 1.6084, "step": 147400 }, { "epoch": 0.9374960275592052, "grad_norm": 0.95703125, "learning_rate": 2.6923076923076923e-06, "loss": 1.6344, "step": 147500 }, { "epoch": 0.938131618086364, "grad_norm": 0.734375, "learning_rate": 2.6871794871794877e-06, "loss": 1.6374, "step": 147600 }, { "epoch": 0.9387672086135228, "grad_norm": 0.8203125, "learning_rate": 2.682051282051282e-06, "loss": 1.6311, "step": 147700 }, { "epoch": 0.9394027991406816, "grad_norm": 1.0625, "learning_rate": 2.676923076923077e-06, "loss": 1.6174, "step": 147800 }, { "epoch": 0.9400383896678404, "grad_norm": 0.703125, "learning_rate": 2.6717948717948724e-06, "loss": 1.6077, "step": 147900 }, { "epoch": 0.9406739801949991, "grad_norm": 0.88671875, "learning_rate": 2.666666666666667e-06, "loss": 1.6211, "step": 148000 }, { "epoch": 0.9413095707221579, "grad_norm": 0.80078125, "learning_rate": 2.6615384615384613e-06, "loss": 1.6337, "step": 148100 }, { "epoch": 0.9419451612493167, "grad_norm": 1.0859375, "learning_rate": 2.6564102564102566e-06, "loss": 1.6298, "step": 148200 }, { "epoch": 0.9425807517764755, "grad_norm": 1.0703125, "learning_rate": 2.6512820512820515e-06, "loss": 1.6167, "step": 148300 }, { "epoch": 0.9432163423036343, "grad_norm": 1.5625, "learning_rate": 2.6461538461538464e-06, "loss": 1.6251, "step": 148400 }, { "epoch": 0.943851932830793, "grad_norm": 1.28125, "learning_rate": 2.6410256410256413e-06, "loss": 1.631, "step": 148500 }, { "epoch": 0.9444875233579518, "grad_norm": 1.0546875, "learning_rate": 2.635897435897436e-06, "loss": 1.6133, "step": 148600 }, { "epoch": 0.9451231138851106, "grad_norm": 0.921875, "learning_rate": 2.630769230769231e-06, "loss": 1.6298, "step": 148700 }, { "epoch": 0.9457587044122694, "grad_norm": 0.91015625, "learning_rate": 2.625641025641026e-06, "loss": 1.6176, "step": 148800 }, { "epoch": 0.9463942949394282, "grad_norm": 0.78515625, "learning_rate": 2.6205128205128205e-06, "loss": 1.6192, "step": 148900 }, { "epoch": 0.947029885466587, "grad_norm": 0.96484375, "learning_rate": 2.615384615384616e-06, "loss": 1.6072, "step": 149000 }, { "epoch": 0.9476654759937458, "grad_norm": 1.140625, "learning_rate": 2.6102564102564103e-06, "loss": 1.6265, "step": 149100 }, { "epoch": 0.9483010665209046, "grad_norm": 0.9375, "learning_rate": 2.605128205128205e-06, "loss": 1.6223, "step": 149200 }, { "epoch": 0.9489366570480634, "grad_norm": 0.921875, "learning_rate": 2.6e-06, "loss": 1.6233, "step": 149300 }, { "epoch": 0.9495722475752222, "grad_norm": 1.1015625, "learning_rate": 2.594871794871795e-06, "loss": 1.6216, "step": 149400 }, { "epoch": 0.950207838102381, "grad_norm": 0.9765625, "learning_rate": 2.5897435897435903e-06, "loss": 1.6383, "step": 149500 }, { "epoch": 0.9508434286295397, "grad_norm": 0.78125, "learning_rate": 2.584615384615385e-06, "loss": 1.627, "step": 149600 }, { "epoch": 0.9514790191566985, "grad_norm": 1.0390625, "learning_rate": 2.5794871794871797e-06, "loss": 1.6252, "step": 149700 }, { "epoch": 0.9521146096838573, "grad_norm": 0.89453125, "learning_rate": 2.5743589743589746e-06, "loss": 1.6155, "step": 149800 }, { "epoch": 0.9527502002110161, "grad_norm": 1.0859375, "learning_rate": 2.5692307692307695e-06, "loss": 1.6213, "step": 149900 }, { "epoch": 0.9533857907381749, "grad_norm": 1.0859375, "learning_rate": 2.564102564102564e-06, "loss": 1.6184, "step": 150000 }, { "epoch": 0.9540213812653336, "grad_norm": 1.0703125, "learning_rate": 2.5589743589743593e-06, "loss": 1.6158, "step": 150100 }, { "epoch": 0.9546569717924924, "grad_norm": 0.875, "learning_rate": 2.5538461538461538e-06, "loss": 1.6308, "step": 150200 }, { "epoch": 0.9552925623196512, "grad_norm": 0.87109375, "learning_rate": 2.548717948717949e-06, "loss": 1.6245, "step": 150300 }, { "epoch": 0.95592815284681, "grad_norm": 0.890625, "learning_rate": 2.543589743589744e-06, "loss": 1.6166, "step": 150400 }, { "epoch": 0.9565637433739688, "grad_norm": 0.953125, "learning_rate": 2.5384615384615385e-06, "loss": 1.6234, "step": 150500 }, { "epoch": 0.9571993339011275, "grad_norm": 0.8359375, "learning_rate": 2.5333333333333338e-06, "loss": 1.6191, "step": 150600 }, { "epoch": 0.9578349244282863, "grad_norm": 0.96875, "learning_rate": 2.5282051282051283e-06, "loss": 1.6152, "step": 150700 }, { "epoch": 0.9584705149554451, "grad_norm": 0.76953125, "learning_rate": 2.523076923076923e-06, "loss": 1.6097, "step": 150800 }, { "epoch": 0.9591061054826039, "grad_norm": 0.88671875, "learning_rate": 2.5179487179487185e-06, "loss": 1.6461, "step": 150900 }, { "epoch": 0.9597416960097627, "grad_norm": 1.3046875, "learning_rate": 2.512820512820513e-06, "loss": 1.6146, "step": 151000 }, { "epoch": 0.9603772865369214, "grad_norm": 0.9375, "learning_rate": 2.507692307692308e-06, "loss": 1.6435, "step": 151100 }, { "epoch": 0.9610128770640802, "grad_norm": 0.859375, "learning_rate": 2.5025641025641028e-06, "loss": 1.6163, "step": 151200 }, { "epoch": 0.961648467591239, "grad_norm": 0.69921875, "learning_rate": 2.4974358974358977e-06, "loss": 1.6316, "step": 151300 }, { "epoch": 0.9622840581183978, "grad_norm": 1.109375, "learning_rate": 2.4923076923076926e-06, "loss": 1.6194, "step": 151400 }, { "epoch": 0.9629196486455566, "grad_norm": 1.0703125, "learning_rate": 2.4871794871794875e-06, "loss": 1.6299, "step": 151500 }, { "epoch": 0.9635552391727153, "grad_norm": 1.046875, "learning_rate": 2.4820512820512824e-06, "loss": 1.6017, "step": 151600 }, { "epoch": 0.9641908296998741, "grad_norm": 1.1796875, "learning_rate": 2.4769230769230773e-06, "loss": 1.6184, "step": 151700 }, { "epoch": 0.9648264202270329, "grad_norm": 0.921875, "learning_rate": 2.471794871794872e-06, "loss": 1.6238, "step": 151800 }, { "epoch": 0.9654620107541917, "grad_norm": 0.8125, "learning_rate": 2.466666666666667e-06, "loss": 1.6227, "step": 151900 }, { "epoch": 0.9660976012813505, "grad_norm": 1.0078125, "learning_rate": 2.461538461538462e-06, "loss": 1.6209, "step": 152000 }, { "epoch": 0.9667331918085093, "grad_norm": 1.0, "learning_rate": 2.4564102564102564e-06, "loss": 1.6293, "step": 152100 }, { "epoch": 0.967368782335668, "grad_norm": 0.81640625, "learning_rate": 2.4512820512820513e-06, "loss": 1.6269, "step": 152200 }, { "epoch": 0.9680043728628268, "grad_norm": 1.0859375, "learning_rate": 2.4461538461538466e-06, "loss": 1.6321, "step": 152300 }, { "epoch": 0.9686399633899856, "grad_norm": 0.828125, "learning_rate": 2.441025641025641e-06, "loss": 1.621, "step": 152400 }, { "epoch": 0.9692755539171444, "grad_norm": 0.828125, "learning_rate": 2.435897435897436e-06, "loss": 1.6389, "step": 152500 }, { "epoch": 0.9699111444443032, "grad_norm": 0.94921875, "learning_rate": 2.430769230769231e-06, "loss": 1.6327, "step": 152600 }, { "epoch": 0.9705467349714619, "grad_norm": 0.6640625, "learning_rate": 2.425641025641026e-06, "loss": 1.6313, "step": 152700 }, { "epoch": 0.9711823254986207, "grad_norm": 1.1640625, "learning_rate": 2.4205128205128207e-06, "loss": 1.6189, "step": 152800 }, { "epoch": 0.9718179160257796, "grad_norm": 0.765625, "learning_rate": 2.4153846153846156e-06, "loss": 1.6292, "step": 152900 }, { "epoch": 0.9724535065529384, "grad_norm": 1.2578125, "learning_rate": 2.4102564102564105e-06, "loss": 1.6358, "step": 153000 }, { "epoch": 0.9730890970800972, "grad_norm": 1.0703125, "learning_rate": 2.4051282051282054e-06, "loss": 1.6183, "step": 153100 }, { "epoch": 0.973724687607256, "grad_norm": 1.3125, "learning_rate": 2.4000000000000003e-06, "loss": 1.6149, "step": 153200 }, { "epoch": 0.9743602781344147, "grad_norm": 1.0390625, "learning_rate": 2.3948717948717952e-06, "loss": 1.612, "step": 153300 }, { "epoch": 0.9749958686615735, "grad_norm": 1.015625, "learning_rate": 2.38974358974359e-06, "loss": 1.6341, "step": 153400 }, { "epoch": 0.9756314591887323, "grad_norm": 0.9375, "learning_rate": 2.384615384615385e-06, "loss": 1.621, "step": 153500 }, { "epoch": 0.9762670497158911, "grad_norm": 1.265625, "learning_rate": 2.3794871794871795e-06, "loss": 1.6103, "step": 153600 }, { "epoch": 0.9769026402430498, "grad_norm": 0.96484375, "learning_rate": 2.3743589743589744e-06, "loss": 1.6274, "step": 153700 }, { "epoch": 0.9775382307702086, "grad_norm": 1.078125, "learning_rate": 2.3692307692307697e-06, "loss": 1.6163, "step": 153800 }, { "epoch": 0.9781738212973674, "grad_norm": 1.140625, "learning_rate": 2.364102564102564e-06, "loss": 1.6313, "step": 153900 }, { "epoch": 0.9788094118245262, "grad_norm": 0.703125, "learning_rate": 2.358974358974359e-06, "loss": 1.613, "step": 154000 }, { "epoch": 0.979445002351685, "grad_norm": 0.74609375, "learning_rate": 2.353846153846154e-06, "loss": 1.6266, "step": 154100 }, { "epoch": 0.9800805928788437, "grad_norm": 0.9453125, "learning_rate": 2.348717948717949e-06, "loss": 1.6425, "step": 154200 }, { "epoch": 0.9807161834060025, "grad_norm": 1.09375, "learning_rate": 2.3435897435897438e-06, "loss": 1.6238, "step": 154300 }, { "epoch": 0.9813517739331613, "grad_norm": 0.86328125, "learning_rate": 2.3384615384615387e-06, "loss": 1.6389, "step": 154400 }, { "epoch": 0.9819873644603201, "grad_norm": 0.8046875, "learning_rate": 2.3333333333333336e-06, "loss": 1.6078, "step": 154500 }, { "epoch": 0.9826229549874789, "grad_norm": 1.3203125, "learning_rate": 2.3282051282051285e-06, "loss": 1.6403, "step": 154600 }, { "epoch": 0.9832585455146376, "grad_norm": 1.0546875, "learning_rate": 2.3230769230769234e-06, "loss": 1.6259, "step": 154700 }, { "epoch": 0.9838941360417964, "grad_norm": 1.0703125, "learning_rate": 2.3179487179487183e-06, "loss": 1.6166, "step": 154800 }, { "epoch": 0.9845297265689552, "grad_norm": 1.1015625, "learning_rate": 2.312820512820513e-06, "loss": 1.6286, "step": 154900 }, { "epoch": 0.985165317096114, "grad_norm": 1.125, "learning_rate": 2.307692307692308e-06, "loss": 1.6285, "step": 155000 }, { "epoch": 0.9858009076232728, "grad_norm": 0.71875, "learning_rate": 2.3025641025641026e-06, "loss": 1.6191, "step": 155100 }, { "epoch": 0.9864364981504316, "grad_norm": 1.125, "learning_rate": 2.2974358974358975e-06, "loss": 1.6229, "step": 155200 }, { "epoch": 0.9870720886775903, "grad_norm": 0.6953125, "learning_rate": 2.2923076923076928e-06, "loss": 1.6099, "step": 155300 }, { "epoch": 0.9877076792047491, "grad_norm": 0.90234375, "learning_rate": 2.2871794871794872e-06, "loss": 1.621, "step": 155400 }, { "epoch": 0.9883432697319079, "grad_norm": 1.0625, "learning_rate": 2.282051282051282e-06, "loss": 1.6241, "step": 155500 }, { "epoch": 0.9889788602590667, "grad_norm": 0.8828125, "learning_rate": 2.276923076923077e-06, "loss": 1.6201, "step": 155600 }, { "epoch": 0.9896144507862255, "grad_norm": 0.80859375, "learning_rate": 2.271794871794872e-06, "loss": 1.6286, "step": 155700 }, { "epoch": 0.9902500413133842, "grad_norm": 1.2734375, "learning_rate": 2.266666666666667e-06, "loss": 1.6277, "step": 155800 }, { "epoch": 0.990885631840543, "grad_norm": 0.99609375, "learning_rate": 2.2615384615384617e-06, "loss": 1.6249, "step": 155900 }, { "epoch": 0.9915212223677018, "grad_norm": 0.78125, "learning_rate": 2.2564102564102566e-06, "loss": 1.6259, "step": 156000 }, { "epoch": 0.9921568128948606, "grad_norm": 0.94921875, "learning_rate": 2.2512820512820515e-06, "loss": 1.6344, "step": 156100 }, { "epoch": 0.9927924034220194, "grad_norm": 1.2421875, "learning_rate": 2.2461538461538464e-06, "loss": 1.6339, "step": 156200 }, { "epoch": 0.9934279939491781, "grad_norm": 0.85546875, "learning_rate": 2.2410256410256413e-06, "loss": 1.6155, "step": 156300 }, { "epoch": 0.9940635844763369, "grad_norm": 1.1015625, "learning_rate": 2.2358974358974362e-06, "loss": 1.6393, "step": 156400 }, { "epoch": 0.9946991750034957, "grad_norm": 0.99609375, "learning_rate": 2.230769230769231e-06, "loss": 1.6203, "step": 156500 }, { "epoch": 0.9953347655306546, "grad_norm": 0.7421875, "learning_rate": 2.2256410256410256e-06, "loss": 1.6214, "step": 156600 }, { "epoch": 0.9959703560578134, "grad_norm": 1.0390625, "learning_rate": 2.2205128205128205e-06, "loss": 1.6092, "step": 156700 }, { "epoch": 0.9966059465849721, "grad_norm": 1.0390625, "learning_rate": 2.215384615384616e-06, "loss": 1.6123, "step": 156800 }, { "epoch": 0.9972415371121309, "grad_norm": 1.171875, "learning_rate": 2.2102564102564103e-06, "loss": 1.6175, "step": 156900 }, { "epoch": 0.9978771276392897, "grad_norm": 0.91796875, "learning_rate": 2.2051282051282052e-06, "loss": 1.5962, "step": 157000 }, { "epoch": 0.9985127181664485, "grad_norm": 0.58984375, "learning_rate": 2.2e-06, "loss": 1.6391, "step": 157100 }, { "epoch": 0.9991483086936073, "grad_norm": 1.0078125, "learning_rate": 2.194871794871795e-06, "loss": 1.6408, "step": 157200 }, { "epoch": 0.999783899220766, "grad_norm": 0.85546875, "learning_rate": 2.18974358974359e-06, "loss": 1.6303, "step": 157300 }, { "epoch": 1.0004194897479248, "grad_norm": 1.1640625, "learning_rate": 2.184615384615385e-06, "loss": 1.6148, "step": 157400 }, { "epoch": 1.0010550802750835, "grad_norm": 1.1015625, "learning_rate": 2.1794871794871797e-06, "loss": 1.6173, "step": 157500 }, { "epoch": 1.0016906708022424, "grad_norm": 0.953125, "learning_rate": 2.1743589743589746e-06, "loss": 1.6077, "step": 157600 }, { "epoch": 1.002326261329401, "grad_norm": 1.171875, "learning_rate": 2.1692307692307695e-06, "loss": 1.6165, "step": 157700 }, { "epoch": 1.00296185185656, "grad_norm": 0.98046875, "learning_rate": 2.1641025641025644e-06, "loss": 1.6246, "step": 157800 }, { "epoch": 1.0035974423837186, "grad_norm": 0.87109375, "learning_rate": 2.1589743589743593e-06, "loss": 1.6128, "step": 157900 }, { "epoch": 1.0042330329108775, "grad_norm": 0.82421875, "learning_rate": 2.153846153846154e-06, "loss": 1.6114, "step": 158000 }, { "epoch": 1.0048686234380362, "grad_norm": 0.80859375, "learning_rate": 2.1487179487179487e-06, "loss": 1.6056, "step": 158100 }, { "epoch": 1.005504213965195, "grad_norm": 0.92578125, "learning_rate": 2.1435897435897436e-06, "loss": 1.6178, "step": 158200 }, { "epoch": 1.0061398044923537, "grad_norm": 0.9609375, "learning_rate": 2.138461538461539e-06, "loss": 1.6287, "step": 158300 }, { "epoch": 1.0067753950195126, "grad_norm": 0.9140625, "learning_rate": 2.133333333333334e-06, "loss": 1.6419, "step": 158400 }, { "epoch": 1.0074109855466715, "grad_norm": 1.21875, "learning_rate": 2.1282051282051283e-06, "loss": 1.6179, "step": 158500 }, { "epoch": 1.0080465760738302, "grad_norm": 0.98828125, "learning_rate": 2.123076923076923e-06, "loss": 1.6242, "step": 158600 }, { "epoch": 1.008682166600989, "grad_norm": 0.859375, "learning_rate": 2.117948717948718e-06, "loss": 1.6114, "step": 158700 }, { "epoch": 1.0093177571281478, "grad_norm": 0.80859375, "learning_rate": 2.112820512820513e-06, "loss": 1.6039, "step": 158800 }, { "epoch": 1.0099533476553066, "grad_norm": 1.015625, "learning_rate": 2.107692307692308e-06, "loss": 1.6231, "step": 158900 }, { "epoch": 1.0105889381824653, "grad_norm": 1.1640625, "learning_rate": 2.1025641025641028e-06, "loss": 1.6244, "step": 159000 }, { "epoch": 1.0112245287096242, "grad_norm": 0.9375, "learning_rate": 2.0974358974358977e-06, "loss": 1.629, "step": 159100 }, { "epoch": 1.0118601192367829, "grad_norm": 0.8046875, "learning_rate": 2.0923076923076926e-06, "loss": 1.6334, "step": 159200 }, { "epoch": 1.0124957097639418, "grad_norm": 0.7578125, "learning_rate": 2.0871794871794875e-06, "loss": 1.6162, "step": 159300 }, { "epoch": 1.0131313002911004, "grad_norm": 0.84765625, "learning_rate": 2.0820512820512824e-06, "loss": 1.622, "step": 159400 }, { "epoch": 1.0137668908182593, "grad_norm": 0.94140625, "learning_rate": 2.0769230769230773e-06, "loss": 1.6202, "step": 159500 }, { "epoch": 1.014402481345418, "grad_norm": 0.953125, "learning_rate": 2.0717948717948717e-06, "loss": 1.6231, "step": 159600 }, { "epoch": 1.0150380718725769, "grad_norm": 0.73046875, "learning_rate": 2.0666666666666666e-06, "loss": 1.6075, "step": 159700 }, { "epoch": 1.0156736623997356, "grad_norm": 0.8125, "learning_rate": 2.061538461538462e-06, "loss": 1.6256, "step": 159800 }, { "epoch": 1.0163092529268944, "grad_norm": 0.73828125, "learning_rate": 2.056410256410257e-06, "loss": 1.6224, "step": 159900 }, { "epoch": 1.0169448434540531, "grad_norm": 0.90234375, "learning_rate": 2.0512820512820513e-06, "loss": 1.6256, "step": 160000 }, { "epoch": 1.017580433981212, "grad_norm": 0.78125, "learning_rate": 2.0461538461538462e-06, "loss": 1.6318, "step": 160100 }, { "epoch": 1.0182160245083707, "grad_norm": 0.73828125, "learning_rate": 2.041025641025641e-06, "loss": 1.621, "step": 160200 }, { "epoch": 1.0188516150355296, "grad_norm": 1.2734375, "learning_rate": 2.035897435897436e-06, "loss": 1.6265, "step": 160300 }, { "epoch": 1.0194872055626882, "grad_norm": 0.984375, "learning_rate": 2.030769230769231e-06, "loss": 1.6318, "step": 160400 }, { "epoch": 1.0201227960898471, "grad_norm": 1.234375, "learning_rate": 2.025641025641026e-06, "loss": 1.6279, "step": 160500 }, { "epoch": 1.0207583866170058, "grad_norm": 0.92578125, "learning_rate": 2.0205128205128207e-06, "loss": 1.6012, "step": 160600 }, { "epoch": 1.0213939771441647, "grad_norm": 1.0390625, "learning_rate": 2.0153846153846156e-06, "loss": 1.6238, "step": 160700 }, { "epoch": 1.0220295676713234, "grad_norm": 1.5234375, "learning_rate": 2.0102564102564105e-06, "loss": 1.6219, "step": 160800 }, { "epoch": 1.0226651581984822, "grad_norm": 0.76171875, "learning_rate": 2.0051282051282054e-06, "loss": 1.6225, "step": 160900 }, { "epoch": 1.023300748725641, "grad_norm": 0.921875, "learning_rate": 2.0000000000000003e-06, "loss": 1.6197, "step": 161000 }, { "epoch": 1.0239363392527998, "grad_norm": 0.7421875, "learning_rate": 1.994871794871795e-06, "loss": 1.6083, "step": 161100 }, { "epoch": 1.0245719297799585, "grad_norm": 0.9765625, "learning_rate": 1.9897435897435897e-06, "loss": 1.641, "step": 161200 }, { "epoch": 1.0252075203071174, "grad_norm": 0.6484375, "learning_rate": 1.984615384615385e-06, "loss": 1.6369, "step": 161300 }, { "epoch": 1.025843110834276, "grad_norm": 0.9765625, "learning_rate": 1.97948717948718e-06, "loss": 1.6109, "step": 161400 }, { "epoch": 1.026478701361435, "grad_norm": 1.0703125, "learning_rate": 1.9743589743589744e-06, "loss": 1.6108, "step": 161500 }, { "epoch": 1.0271142918885936, "grad_norm": 1.0703125, "learning_rate": 1.9692307692307693e-06, "loss": 1.6292, "step": 161600 }, { "epoch": 1.0277498824157525, "grad_norm": 1.3828125, "learning_rate": 1.964102564102564e-06, "loss": 1.6273, "step": 161700 }, { "epoch": 1.0283854729429112, "grad_norm": 0.77734375, "learning_rate": 1.958974358974359e-06, "loss": 1.6217, "step": 161800 }, { "epoch": 1.02902106347007, "grad_norm": 0.89453125, "learning_rate": 1.953846153846154e-06, "loss": 1.6362, "step": 161900 }, { "epoch": 1.0296566539972287, "grad_norm": 1.125, "learning_rate": 1.948717948717949e-06, "loss": 1.6055, "step": 162000 }, { "epoch": 1.0302922445243876, "grad_norm": 1.2109375, "learning_rate": 1.943589743589744e-06, "loss": 1.6192, "step": 162100 }, { "epoch": 1.0309278350515463, "grad_norm": 0.75390625, "learning_rate": 1.9384615384615387e-06, "loss": 1.6168, "step": 162200 }, { "epoch": 1.0315634255787052, "grad_norm": 1.0234375, "learning_rate": 1.9333333333333336e-06, "loss": 1.6188, "step": 162300 }, { "epoch": 1.032199016105864, "grad_norm": 0.8203125, "learning_rate": 1.9282051282051285e-06, "loss": 1.6285, "step": 162400 }, { "epoch": 1.0328346066330227, "grad_norm": 0.765625, "learning_rate": 1.9230769230769234e-06, "loss": 1.6258, "step": 162500 }, { "epoch": 1.0334701971601816, "grad_norm": 1.0859375, "learning_rate": 1.917948717948718e-06, "loss": 1.6148, "step": 162600 }, { "epoch": 1.0341057876873403, "grad_norm": 1.1328125, "learning_rate": 1.9128205128205128e-06, "loss": 1.6059, "step": 162700 }, { "epoch": 1.0347413782144992, "grad_norm": 0.87109375, "learning_rate": 1.907692307692308e-06, "loss": 1.635, "step": 162800 }, { "epoch": 1.0353769687416579, "grad_norm": 0.90234375, "learning_rate": 1.9025641025641028e-06, "loss": 1.6108, "step": 162900 }, { "epoch": 1.0360125592688167, "grad_norm": 0.93359375, "learning_rate": 1.8974358974358975e-06, "loss": 1.6288, "step": 163000 }, { "epoch": 1.0366481497959754, "grad_norm": 1.0859375, "learning_rate": 1.8923076923076924e-06, "loss": 1.6269, "step": 163100 }, { "epoch": 1.0372837403231343, "grad_norm": 0.78125, "learning_rate": 1.8871794871794875e-06, "loss": 1.6147, "step": 163200 }, { "epoch": 1.037919330850293, "grad_norm": 0.7890625, "learning_rate": 1.8820512820512822e-06, "loss": 1.6123, "step": 163300 }, { "epoch": 1.0385549213774519, "grad_norm": 1.0078125, "learning_rate": 1.876923076923077e-06, "loss": 1.6011, "step": 163400 }, { "epoch": 1.0391905119046105, "grad_norm": 1.0234375, "learning_rate": 1.871794871794872e-06, "loss": 1.6291, "step": 163500 }, { "epoch": 1.0398261024317694, "grad_norm": 1.1640625, "learning_rate": 1.8666666666666669e-06, "loss": 1.62, "step": 163600 }, { "epoch": 1.040461692958928, "grad_norm": 0.84375, "learning_rate": 1.8615384615384616e-06, "loss": 1.6134, "step": 163700 }, { "epoch": 1.041097283486087, "grad_norm": 0.640625, "learning_rate": 1.8564102564102565e-06, "loss": 1.6198, "step": 163800 }, { "epoch": 1.0417328740132457, "grad_norm": 1.3828125, "learning_rate": 1.8512820512820516e-06, "loss": 1.624, "step": 163900 }, { "epoch": 1.0423684645404045, "grad_norm": 0.66796875, "learning_rate": 1.8461538461538465e-06, "loss": 1.6289, "step": 164000 }, { "epoch": 1.0430040550675632, "grad_norm": 1.109375, "learning_rate": 1.8410256410256411e-06, "loss": 1.6283, "step": 164100 }, { "epoch": 1.043639645594722, "grad_norm": 0.75, "learning_rate": 1.835897435897436e-06, "loss": 1.63, "step": 164200 }, { "epoch": 1.0442752361218808, "grad_norm": 1.0390625, "learning_rate": 1.830769230769231e-06, "loss": 1.6104, "step": 164300 }, { "epoch": 1.0449108266490397, "grad_norm": 0.83203125, "learning_rate": 1.8256410256410258e-06, "loss": 1.6221, "step": 164400 }, { "epoch": 1.0455464171761983, "grad_norm": 1.0234375, "learning_rate": 1.8205128205128205e-06, "loss": 1.6318, "step": 164500 }, { "epoch": 1.0461820077033572, "grad_norm": 0.98046875, "learning_rate": 1.8153846153846154e-06, "loss": 1.6349, "step": 164600 }, { "epoch": 1.046817598230516, "grad_norm": 1.1015625, "learning_rate": 1.8102564102564105e-06, "loss": 1.6375, "step": 164700 }, { "epoch": 1.0474531887576748, "grad_norm": 1.0546875, "learning_rate": 1.8051282051282054e-06, "loss": 1.6247, "step": 164800 }, { "epoch": 1.0480887792848335, "grad_norm": 0.75, "learning_rate": 1.8000000000000001e-06, "loss": 1.6145, "step": 164900 }, { "epoch": 1.0487243698119924, "grad_norm": 0.60546875, "learning_rate": 1.794871794871795e-06, "loss": 1.6266, "step": 165000 }, { "epoch": 1.049359960339151, "grad_norm": 0.90234375, "learning_rate": 1.78974358974359e-06, "loss": 1.6338, "step": 165100 }, { "epoch": 1.04999555086631, "grad_norm": 1.203125, "learning_rate": 1.7846153846153846e-06, "loss": 1.6246, "step": 165200 }, { "epoch": 1.0506311413934686, "grad_norm": 1.0390625, "learning_rate": 1.7794871794871795e-06, "loss": 1.6204, "step": 165300 }, { "epoch": 1.0512667319206275, "grad_norm": 0.80859375, "learning_rate": 1.7743589743589746e-06, "loss": 1.6184, "step": 165400 }, { "epoch": 1.0519023224477861, "grad_norm": 0.67578125, "learning_rate": 1.7692307692307695e-06, "loss": 1.6243, "step": 165500 }, { "epoch": 1.052537912974945, "grad_norm": 1.015625, "learning_rate": 1.7641025641025642e-06, "loss": 1.623, "step": 165600 }, { "epoch": 1.0531735035021037, "grad_norm": 1.109375, "learning_rate": 1.7589743589743591e-06, "loss": 1.6189, "step": 165700 }, { "epoch": 1.0538090940292626, "grad_norm": 1.2578125, "learning_rate": 1.753846153846154e-06, "loss": 1.6277, "step": 165800 }, { "epoch": 1.0544446845564215, "grad_norm": 0.91015625, "learning_rate": 1.748717948717949e-06, "loss": 1.6033, "step": 165900 }, { "epoch": 1.0550802750835802, "grad_norm": 0.99609375, "learning_rate": 1.7435897435897436e-06, "loss": 1.6138, "step": 166000 }, { "epoch": 1.0557158656107388, "grad_norm": 0.80078125, "learning_rate": 1.7384615384615385e-06, "loss": 1.6237, "step": 166100 }, { "epoch": 1.0563514561378977, "grad_norm": 0.953125, "learning_rate": 1.7333333333333336e-06, "loss": 1.6208, "step": 166200 }, { "epoch": 1.0569870466650566, "grad_norm": 0.94921875, "learning_rate": 1.7282051282051285e-06, "loss": 1.6214, "step": 166300 }, { "epoch": 1.0576226371922153, "grad_norm": 0.7734375, "learning_rate": 1.7230769230769232e-06, "loss": 1.6138, "step": 166400 }, { "epoch": 1.0582582277193742, "grad_norm": 0.74609375, "learning_rate": 1.717948717948718e-06, "loss": 1.6093, "step": 166500 }, { "epoch": 1.0588938182465328, "grad_norm": 1.0, "learning_rate": 1.712820512820513e-06, "loss": 1.6212, "step": 166600 }, { "epoch": 1.0595294087736917, "grad_norm": 0.828125, "learning_rate": 1.7076923076923077e-06, "loss": 1.6342, "step": 166700 }, { "epoch": 1.0601649993008504, "grad_norm": 0.5859375, "learning_rate": 1.7025641025641026e-06, "loss": 1.6263, "step": 166800 }, { "epoch": 1.0608005898280093, "grad_norm": 1.125, "learning_rate": 1.6974358974358977e-06, "loss": 1.6238, "step": 166900 }, { "epoch": 1.061436180355168, "grad_norm": 0.91015625, "learning_rate": 1.6923076923076926e-06, "loss": 1.5956, "step": 167000 }, { "epoch": 1.0620717708823268, "grad_norm": 0.79296875, "learning_rate": 1.6871794871794873e-06, "loss": 1.6477, "step": 167100 }, { "epoch": 1.0627073614094855, "grad_norm": 1.234375, "learning_rate": 1.6820512820512822e-06, "loss": 1.6159, "step": 167200 }, { "epoch": 1.0633429519366444, "grad_norm": 0.87890625, "learning_rate": 1.676923076923077e-06, "loss": 1.6345, "step": 167300 }, { "epoch": 1.063978542463803, "grad_norm": 0.6953125, "learning_rate": 1.671794871794872e-06, "loss": 1.6316, "step": 167400 }, { "epoch": 1.064614132990962, "grad_norm": 1.03125, "learning_rate": 1.6666666666666667e-06, "loss": 1.6155, "step": 167500 }, { "epoch": 1.0652497235181206, "grad_norm": 1.3125, "learning_rate": 1.6615384615384616e-06, "loss": 1.6145, "step": 167600 }, { "epoch": 1.0658853140452795, "grad_norm": 0.7265625, "learning_rate": 1.6564102564102567e-06, "loss": 1.626, "step": 167700 }, { "epoch": 1.0665209045724382, "grad_norm": 1.0390625, "learning_rate": 1.6512820512820516e-06, "loss": 1.6213, "step": 167800 }, { "epoch": 1.067156495099597, "grad_norm": 0.875, "learning_rate": 1.6461538461538463e-06, "loss": 1.6201, "step": 167900 }, { "epoch": 1.0677920856267558, "grad_norm": 0.69140625, "learning_rate": 1.6410256410256412e-06, "loss": 1.6231, "step": 168000 }, { "epoch": 1.0684276761539147, "grad_norm": 0.82421875, "learning_rate": 1.635897435897436e-06, "loss": 1.6285, "step": 168100 }, { "epoch": 1.0690632666810733, "grad_norm": 1.046875, "learning_rate": 1.6307692307692307e-06, "loss": 1.628, "step": 168200 }, { "epoch": 1.0696988572082322, "grad_norm": 1.3125, "learning_rate": 1.6256410256410256e-06, "loss": 1.625, "step": 168300 }, { "epoch": 1.0703344477353909, "grad_norm": 0.92578125, "learning_rate": 1.6205128205128208e-06, "loss": 1.6231, "step": 168400 }, { "epoch": 1.0709700382625498, "grad_norm": 0.8125, "learning_rate": 1.6153846153846157e-06, "loss": 1.6204, "step": 168500 }, { "epoch": 1.0716056287897084, "grad_norm": 1.2734375, "learning_rate": 1.6102564102564103e-06, "loss": 1.6307, "step": 168600 }, { "epoch": 1.0722412193168673, "grad_norm": 0.97265625, "learning_rate": 1.6051282051282052e-06, "loss": 1.6235, "step": 168700 }, { "epoch": 1.072876809844026, "grad_norm": 1.046875, "learning_rate": 1.6000000000000001e-06, "loss": 1.6222, "step": 168800 }, { "epoch": 1.073512400371185, "grad_norm": 1.03125, "learning_rate": 1.594871794871795e-06, "loss": 1.6281, "step": 168900 }, { "epoch": 1.0741479908983436, "grad_norm": 1.1640625, "learning_rate": 1.5897435897435897e-06, "loss": 1.6216, "step": 169000 }, { "epoch": 1.0747835814255025, "grad_norm": 1.34375, "learning_rate": 1.5846153846153846e-06, "loss": 1.63, "step": 169100 }, { "epoch": 1.0754191719526611, "grad_norm": 0.9609375, "learning_rate": 1.5794871794871797e-06, "loss": 1.617, "step": 169200 }, { "epoch": 1.07605476247982, "grad_norm": 0.890625, "learning_rate": 1.5743589743589746e-06, "loss": 1.6236, "step": 169300 }, { "epoch": 1.0766903530069787, "grad_norm": 0.921875, "learning_rate": 1.5692307692307693e-06, "loss": 1.6239, "step": 169400 }, { "epoch": 1.0773259435341376, "grad_norm": 0.93359375, "learning_rate": 1.5641025641025642e-06, "loss": 1.6126, "step": 169500 }, { "epoch": 1.0779615340612962, "grad_norm": 0.796875, "learning_rate": 1.5589743589743591e-06, "loss": 1.6349, "step": 169600 }, { "epoch": 1.0785971245884551, "grad_norm": 0.8046875, "learning_rate": 1.5538461538461538e-06, "loss": 1.6318, "step": 169700 }, { "epoch": 1.079232715115614, "grad_norm": 1.0390625, "learning_rate": 1.5487179487179487e-06, "loss": 1.6238, "step": 169800 }, { "epoch": 1.0798683056427727, "grad_norm": 1.2734375, "learning_rate": 1.5435897435897438e-06, "loss": 1.6289, "step": 169900 }, { "epoch": 1.0805038961699314, "grad_norm": 1.0390625, "learning_rate": 1.5384615384615387e-06, "loss": 1.6279, "step": 170000 }, { "epoch": 1.0811394866970903, "grad_norm": 0.7578125, "learning_rate": 1.5333333333333334e-06, "loss": 1.6226, "step": 170100 }, { "epoch": 1.0817750772242491, "grad_norm": 0.85546875, "learning_rate": 1.5282051282051283e-06, "loss": 1.621, "step": 170200 }, { "epoch": 1.0824106677514078, "grad_norm": 0.7421875, "learning_rate": 1.5230769230769232e-06, "loss": 1.6347, "step": 170300 }, { "epoch": 1.0830462582785667, "grad_norm": 1.4296875, "learning_rate": 1.517948717948718e-06, "loss": 1.6362, "step": 170400 }, { "epoch": 1.0836818488057254, "grad_norm": 0.72265625, "learning_rate": 1.5128205128205128e-06, "loss": 1.6452, "step": 170500 }, { "epoch": 1.0843174393328843, "grad_norm": 1.1875, "learning_rate": 1.5076923076923077e-06, "loss": 1.6087, "step": 170600 }, { "epoch": 1.084953029860043, "grad_norm": 1.109375, "learning_rate": 1.5025641025641028e-06, "loss": 1.614, "step": 170700 }, { "epoch": 1.0855886203872018, "grad_norm": 1.4140625, "learning_rate": 1.4974358974358977e-06, "loss": 1.6244, "step": 170800 }, { "epoch": 1.0862242109143605, "grad_norm": 0.78125, "learning_rate": 1.4923076923076924e-06, "loss": 1.6118, "step": 170900 }, { "epoch": 1.0868598014415194, "grad_norm": 0.83203125, "learning_rate": 1.4871794871794873e-06, "loss": 1.62, "step": 171000 }, { "epoch": 1.087495391968678, "grad_norm": 0.60546875, "learning_rate": 1.4820512820512822e-06, "loss": 1.6146, "step": 171100 }, { "epoch": 1.088130982495837, "grad_norm": 0.59765625, "learning_rate": 1.476923076923077e-06, "loss": 1.6233, "step": 171200 }, { "epoch": 1.0887665730229956, "grad_norm": 0.69921875, "learning_rate": 1.4717948717948718e-06, "loss": 1.607, "step": 171300 }, { "epoch": 1.0894021635501545, "grad_norm": 0.8125, "learning_rate": 1.4666666666666669e-06, "loss": 1.6358, "step": 171400 }, { "epoch": 1.0900377540773132, "grad_norm": 0.921875, "learning_rate": 1.4615384615384618e-06, "loss": 1.6165, "step": 171500 }, { "epoch": 1.090673344604472, "grad_norm": 1.4375, "learning_rate": 1.4564102564102565e-06, "loss": 1.624, "step": 171600 }, { "epoch": 1.0913089351316307, "grad_norm": 0.6328125, "learning_rate": 1.4512820512820514e-06, "loss": 1.6289, "step": 171700 }, { "epoch": 1.0919445256587896, "grad_norm": 1.2578125, "learning_rate": 1.4461538461538463e-06, "loss": 1.6245, "step": 171800 }, { "epoch": 1.0925801161859483, "grad_norm": 1.3046875, "learning_rate": 1.4410256410256412e-06, "loss": 1.635, "step": 171900 }, { "epoch": 1.0932157067131072, "grad_norm": 0.69140625, "learning_rate": 1.4358974358974359e-06, "loss": 1.6379, "step": 172000 }, { "epoch": 1.0938512972402659, "grad_norm": 1.0234375, "learning_rate": 1.4307692307692308e-06, "loss": 1.6384, "step": 172100 }, { "epoch": 1.0944868877674248, "grad_norm": 0.78125, "learning_rate": 1.4256410256410259e-06, "loss": 1.6085, "step": 172200 }, { "epoch": 1.0951224782945834, "grad_norm": 0.984375, "learning_rate": 1.4205128205128208e-06, "loss": 1.6252, "step": 172300 }, { "epoch": 1.0957580688217423, "grad_norm": 0.78125, "learning_rate": 1.4153846153846155e-06, "loss": 1.6271, "step": 172400 }, { "epoch": 1.096393659348901, "grad_norm": 0.953125, "learning_rate": 1.4102564102564104e-06, "loss": 1.6031, "step": 172500 }, { "epoch": 1.0970292498760599, "grad_norm": 2.21875, "learning_rate": 1.4051282051282052e-06, "loss": 1.6219, "step": 172600 }, { "epoch": 1.0976648404032185, "grad_norm": 1.3203125, "learning_rate": 1.4000000000000001e-06, "loss": 1.629, "step": 172700 }, { "epoch": 1.0983004309303774, "grad_norm": 1.15625, "learning_rate": 1.3948717948717948e-06, "loss": 1.6137, "step": 172800 }, { "epoch": 1.098936021457536, "grad_norm": 1.40625, "learning_rate": 1.38974358974359e-06, "loss": 1.6134, "step": 172900 }, { "epoch": 1.099571611984695, "grad_norm": 0.80078125, "learning_rate": 1.3846153846153848e-06, "loss": 1.6247, "step": 173000 }, { "epoch": 1.1002072025118537, "grad_norm": 0.734375, "learning_rate": 1.3794871794871795e-06, "loss": 1.6231, "step": 173100 }, { "epoch": 1.1008427930390126, "grad_norm": 1.1171875, "learning_rate": 1.3743589743589744e-06, "loss": 1.6254, "step": 173200 }, { "epoch": 1.1014783835661712, "grad_norm": 1.0234375, "learning_rate": 1.3692307692307693e-06, "loss": 1.6358, "step": 173300 }, { "epoch": 1.1021139740933301, "grad_norm": 0.90234375, "learning_rate": 1.3641025641025642e-06, "loss": 1.6345, "step": 173400 }, { "epoch": 1.1027495646204888, "grad_norm": 1.3359375, "learning_rate": 1.358974358974359e-06, "loss": 1.6374, "step": 173500 }, { "epoch": 1.1033851551476477, "grad_norm": 1.0078125, "learning_rate": 1.3538461538461538e-06, "loss": 1.6277, "step": 173600 }, { "epoch": 1.1040207456748066, "grad_norm": 0.94140625, "learning_rate": 1.348717948717949e-06, "loss": 1.6246, "step": 173700 }, { "epoch": 1.1046563362019652, "grad_norm": 0.84375, "learning_rate": 1.3435897435897438e-06, "loss": 1.6221, "step": 173800 }, { "epoch": 1.1052919267291241, "grad_norm": 0.765625, "learning_rate": 1.3384615384615385e-06, "loss": 1.61, "step": 173900 }, { "epoch": 1.1059275172562828, "grad_norm": 0.9140625, "learning_rate": 1.3333333333333334e-06, "loss": 1.633, "step": 174000 }, { "epoch": 1.1065631077834417, "grad_norm": 0.953125, "learning_rate": 1.3282051282051283e-06, "loss": 1.6234, "step": 174100 }, { "epoch": 1.1071986983106004, "grad_norm": 1.0703125, "learning_rate": 1.3230769230769232e-06, "loss": 1.6222, "step": 174200 }, { "epoch": 1.1078342888377593, "grad_norm": 0.89453125, "learning_rate": 1.317948717948718e-06, "loss": 1.6104, "step": 174300 }, { "epoch": 1.108469879364918, "grad_norm": 1.3671875, "learning_rate": 1.312820512820513e-06, "loss": 1.6357, "step": 174400 }, { "epoch": 1.1091054698920768, "grad_norm": 0.8203125, "learning_rate": 1.307692307692308e-06, "loss": 1.6394, "step": 174500 }, { "epoch": 1.1097410604192355, "grad_norm": 0.875, "learning_rate": 1.3025641025641026e-06, "loss": 1.6243, "step": 174600 }, { "epoch": 1.1103766509463944, "grad_norm": 0.9765625, "learning_rate": 1.2974358974358975e-06, "loss": 1.6249, "step": 174700 }, { "epoch": 1.111012241473553, "grad_norm": 1.0234375, "learning_rate": 1.2923076923076924e-06, "loss": 1.6309, "step": 174800 }, { "epoch": 1.111647832000712, "grad_norm": 0.828125, "learning_rate": 1.2871794871794873e-06, "loss": 1.6047, "step": 174900 }, { "epoch": 1.1122834225278706, "grad_norm": 0.9453125, "learning_rate": 1.282051282051282e-06, "loss": 1.6151, "step": 175000 }, { "epoch": 1.1129190130550295, "grad_norm": 1.0390625, "learning_rate": 1.2769230769230769e-06, "loss": 1.6192, "step": 175100 }, { "epoch": 1.1135546035821882, "grad_norm": 0.90234375, "learning_rate": 1.271794871794872e-06, "loss": 1.6136, "step": 175200 }, { "epoch": 1.114190194109347, "grad_norm": 0.953125, "learning_rate": 1.2666666666666669e-06, "loss": 1.6255, "step": 175300 }, { "epoch": 1.1148257846365057, "grad_norm": 1.234375, "learning_rate": 1.2615384615384616e-06, "loss": 1.6317, "step": 175400 }, { "epoch": 1.1154613751636646, "grad_norm": 1.09375, "learning_rate": 1.2564102564102565e-06, "loss": 1.6191, "step": 175500 }, { "epoch": 1.1160969656908233, "grad_norm": 0.83984375, "learning_rate": 1.2512820512820514e-06, "loss": 1.6208, "step": 175600 }, { "epoch": 1.1167325562179822, "grad_norm": 0.8046875, "learning_rate": 1.2461538461538463e-06, "loss": 1.6289, "step": 175700 }, { "epoch": 1.1173681467451408, "grad_norm": 0.76953125, "learning_rate": 1.2410256410256412e-06, "loss": 1.632, "step": 175800 }, { "epoch": 1.1180037372722997, "grad_norm": 0.78515625, "learning_rate": 1.235897435897436e-06, "loss": 1.6146, "step": 175900 }, { "epoch": 1.1186393277994584, "grad_norm": 1.1796875, "learning_rate": 1.230769230769231e-06, "loss": 1.626, "step": 176000 }, { "epoch": 1.1192749183266173, "grad_norm": 0.79296875, "learning_rate": 1.2256410256410257e-06, "loss": 1.6242, "step": 176100 }, { "epoch": 1.119910508853776, "grad_norm": 1.25, "learning_rate": 1.2205128205128206e-06, "loss": 1.6396, "step": 176200 }, { "epoch": 1.1205460993809349, "grad_norm": 0.91796875, "learning_rate": 1.2153846153846155e-06, "loss": 1.6206, "step": 176300 }, { "epoch": 1.1211816899080935, "grad_norm": 0.83984375, "learning_rate": 1.2102564102564104e-06, "loss": 1.6212, "step": 176400 }, { "epoch": 1.1218172804352524, "grad_norm": 0.75390625, "learning_rate": 1.2051282051282053e-06, "loss": 1.6244, "step": 176500 }, { "epoch": 1.122452870962411, "grad_norm": 0.78125, "learning_rate": 1.2000000000000002e-06, "loss": 1.623, "step": 176600 }, { "epoch": 1.12308846148957, "grad_norm": 1.078125, "learning_rate": 1.194871794871795e-06, "loss": 1.6183, "step": 176700 }, { "epoch": 1.1237240520167286, "grad_norm": 1.1640625, "learning_rate": 1.1897435897435897e-06, "loss": 1.622, "step": 176800 }, { "epoch": 1.1243596425438875, "grad_norm": 1.5078125, "learning_rate": 1.1846153846153849e-06, "loss": 1.6249, "step": 176900 }, { "epoch": 1.1249952330710462, "grad_norm": 1.1484375, "learning_rate": 1.1794871794871795e-06, "loss": 1.627, "step": 177000 }, { "epoch": 1.125630823598205, "grad_norm": 0.7265625, "learning_rate": 1.1743589743589744e-06, "loss": 1.6355, "step": 177100 }, { "epoch": 1.126266414125364, "grad_norm": 0.85546875, "learning_rate": 1.1692307692307693e-06, "loss": 1.6318, "step": 177200 }, { "epoch": 1.1269020046525227, "grad_norm": 1.109375, "learning_rate": 1.1641025641025642e-06, "loss": 1.6169, "step": 177300 }, { "epoch": 1.1275375951796813, "grad_norm": 1.046875, "learning_rate": 1.1589743589743591e-06, "loss": 1.6195, "step": 177400 }, { "epoch": 1.1281731857068402, "grad_norm": 1.0703125, "learning_rate": 1.153846153846154e-06, "loss": 1.6207, "step": 177500 }, { "epoch": 1.1288087762339991, "grad_norm": 0.9609375, "learning_rate": 1.1487179487179487e-06, "loss": 1.6202, "step": 177600 }, { "epoch": 1.1294443667611578, "grad_norm": 0.828125, "learning_rate": 1.1435897435897436e-06, "loss": 1.6311, "step": 177700 }, { "epoch": 1.1300799572883165, "grad_norm": 1.0625, "learning_rate": 1.1384615384615385e-06, "loss": 1.6345, "step": 177800 }, { "epoch": 1.1307155478154753, "grad_norm": 0.66796875, "learning_rate": 1.1333333333333334e-06, "loss": 1.623, "step": 177900 }, { "epoch": 1.1313511383426342, "grad_norm": 0.9140625, "learning_rate": 1.1282051282051283e-06, "loss": 1.6246, "step": 178000 }, { "epoch": 1.131986728869793, "grad_norm": 1.1484375, "learning_rate": 1.1230769230769232e-06, "loss": 1.6302, "step": 178100 }, { "epoch": 1.1326223193969518, "grad_norm": 0.84375, "learning_rate": 1.1179487179487181e-06, "loss": 1.6337, "step": 178200 }, { "epoch": 1.1332579099241105, "grad_norm": 0.78515625, "learning_rate": 1.1128205128205128e-06, "loss": 1.6308, "step": 178300 }, { "epoch": 1.1338935004512694, "grad_norm": 0.734375, "learning_rate": 1.107692307692308e-06, "loss": 1.6263, "step": 178400 }, { "epoch": 1.134529090978428, "grad_norm": 0.8359375, "learning_rate": 1.1025641025641026e-06, "loss": 1.6238, "step": 178500 }, { "epoch": 1.135164681505587, "grad_norm": 1.0390625, "learning_rate": 1.0974358974358975e-06, "loss": 1.6269, "step": 178600 }, { "epoch": 1.1358002720327456, "grad_norm": 0.8125, "learning_rate": 1.0923076923076924e-06, "loss": 1.624, "step": 178700 }, { "epoch": 1.1364358625599045, "grad_norm": 0.6328125, "learning_rate": 1.0871794871794873e-06, "loss": 1.6185, "step": 178800 }, { "epoch": 1.1370714530870631, "grad_norm": 0.70703125, "learning_rate": 1.0820512820512822e-06, "loss": 1.612, "step": 178900 }, { "epoch": 1.137707043614222, "grad_norm": 0.83984375, "learning_rate": 1.076923076923077e-06, "loss": 1.6091, "step": 179000 }, { "epoch": 1.1383426341413807, "grad_norm": 1.1953125, "learning_rate": 1.0717948717948718e-06, "loss": 1.6232, "step": 179100 }, { "epoch": 1.1389782246685396, "grad_norm": 0.8203125, "learning_rate": 1.066666666666667e-06, "loss": 1.604, "step": 179200 }, { "epoch": 1.1396138151956983, "grad_norm": 0.98046875, "learning_rate": 1.0615384615384616e-06, "loss": 1.6217, "step": 179300 }, { "epoch": 1.1402494057228572, "grad_norm": 0.83984375, "learning_rate": 1.0564102564102565e-06, "loss": 1.6103, "step": 179400 }, { "epoch": 1.1408849962500158, "grad_norm": 1.0859375, "learning_rate": 1.0512820512820514e-06, "loss": 1.6224, "step": 179500 }, { "epoch": 1.1415205867771747, "grad_norm": 1.0234375, "learning_rate": 1.0461538461538463e-06, "loss": 1.6232, "step": 179600 }, { "epoch": 1.1421561773043334, "grad_norm": 1.0859375, "learning_rate": 1.0410256410256412e-06, "loss": 1.6241, "step": 179700 }, { "epoch": 1.1427917678314923, "grad_norm": 0.78125, "learning_rate": 1.0358974358974359e-06, "loss": 1.616, "step": 179800 }, { "epoch": 1.143427358358651, "grad_norm": 0.6953125, "learning_rate": 1.030769230769231e-06, "loss": 1.6291, "step": 179900 }, { "epoch": 1.1440629488858098, "grad_norm": 0.87109375, "learning_rate": 1.0256410256410257e-06, "loss": 1.6228, "step": 180000 }, { "epoch": 1.1446985394129685, "grad_norm": 1.03125, "learning_rate": 1.0205128205128206e-06, "loss": 1.6178, "step": 180100 }, { "epoch": 1.1453341299401274, "grad_norm": 1.1484375, "learning_rate": 1.0153846153846155e-06, "loss": 1.6336, "step": 180200 }, { "epoch": 1.145969720467286, "grad_norm": 1.0859375, "learning_rate": 1.0102564102564104e-06, "loss": 1.6261, "step": 180300 }, { "epoch": 1.146605310994445, "grad_norm": 1.1796875, "learning_rate": 1.0051282051282053e-06, "loss": 1.6065, "step": 180400 }, { "epoch": 1.1472409015216036, "grad_norm": 0.7421875, "learning_rate": 1.0000000000000002e-06, "loss": 1.605, "step": 180500 }, { "epoch": 1.1478764920487625, "grad_norm": 0.9609375, "learning_rate": 9.948717948717949e-07, "loss": 1.6426, "step": 180600 }, { "epoch": 1.1485120825759214, "grad_norm": 1.2734375, "learning_rate": 9.8974358974359e-07, "loss": 1.6313, "step": 180700 }, { "epoch": 1.14914767310308, "grad_norm": 1.109375, "learning_rate": 9.846153846153847e-07, "loss": 1.6194, "step": 180800 }, { "epoch": 1.1497832636302387, "grad_norm": 0.8125, "learning_rate": 9.794871794871796e-07, "loss": 1.6249, "step": 180900 }, { "epoch": 1.1504188541573976, "grad_norm": 1.2109375, "learning_rate": 9.743589743589745e-07, "loss": 1.6149, "step": 181000 }, { "epoch": 1.1510544446845565, "grad_norm": 0.96875, "learning_rate": 9.692307692307693e-07, "loss": 1.6307, "step": 181100 }, { "epoch": 1.1516900352117152, "grad_norm": 1.0234375, "learning_rate": 9.641025641025642e-07, "loss": 1.6369, "step": 181200 }, { "epoch": 1.1523256257388739, "grad_norm": 1.1875, "learning_rate": 9.58974358974359e-07, "loss": 1.627, "step": 181300 }, { "epoch": 1.1529612162660328, "grad_norm": 1.21875, "learning_rate": 9.53846153846154e-07, "loss": 1.6305, "step": 181400 }, { "epoch": 1.1535968067931917, "grad_norm": 0.7890625, "learning_rate": 9.487179487179487e-07, "loss": 1.6156, "step": 181500 }, { "epoch": 1.1542323973203503, "grad_norm": 0.98046875, "learning_rate": 9.435897435897437e-07, "loss": 1.6276, "step": 181600 }, { "epoch": 1.1548679878475092, "grad_norm": 1.0390625, "learning_rate": 9.384615384615385e-07, "loss": 1.6395, "step": 181700 }, { "epoch": 1.1555035783746679, "grad_norm": 0.81640625, "learning_rate": 9.333333333333334e-07, "loss": 1.6233, "step": 181800 }, { "epoch": 1.1561391689018268, "grad_norm": 0.765625, "learning_rate": 9.282051282051282e-07, "loss": 1.6164, "step": 181900 }, { "epoch": 1.1567747594289854, "grad_norm": 1.234375, "learning_rate": 9.230769230769232e-07, "loss": 1.6231, "step": 182000 }, { "epoch": 1.1574103499561443, "grad_norm": 1.1015625, "learning_rate": 9.17948717948718e-07, "loss": 1.6251, "step": 182100 }, { "epoch": 1.158045940483303, "grad_norm": 0.796875, "learning_rate": 9.128205128205129e-07, "loss": 1.6266, "step": 182200 }, { "epoch": 1.158681531010462, "grad_norm": 0.80078125, "learning_rate": 9.076923076923077e-07, "loss": 1.6057, "step": 182300 }, { "epoch": 1.1593171215376206, "grad_norm": 1.09375, "learning_rate": 9.025641025641027e-07, "loss": 1.6329, "step": 182400 }, { "epoch": 1.1599527120647795, "grad_norm": 0.734375, "learning_rate": 8.974358974358975e-07, "loss": 1.6308, "step": 182500 }, { "epoch": 1.1605883025919381, "grad_norm": 0.63671875, "learning_rate": 8.923076923076923e-07, "loss": 1.6234, "step": 182600 }, { "epoch": 1.161223893119097, "grad_norm": 0.8515625, "learning_rate": 8.871794871794873e-07, "loss": 1.6105, "step": 182700 }, { "epoch": 1.1618594836462557, "grad_norm": 0.9375, "learning_rate": 8.820512820512821e-07, "loss": 1.627, "step": 182800 }, { "epoch": 1.1624950741734146, "grad_norm": 0.828125, "learning_rate": 8.76923076923077e-07, "loss": 1.622, "step": 182900 }, { "epoch": 1.1631306647005732, "grad_norm": 0.76953125, "learning_rate": 8.717948717948718e-07, "loss": 1.6267, "step": 183000 }, { "epoch": 1.1637662552277321, "grad_norm": 0.984375, "learning_rate": 8.666666666666668e-07, "loss": 1.628, "step": 183100 }, { "epoch": 1.1644018457548908, "grad_norm": 0.82421875, "learning_rate": 8.615384615384616e-07, "loss": 1.6225, "step": 183200 }, { "epoch": 1.1650374362820497, "grad_norm": 1.0078125, "learning_rate": 8.564102564102565e-07, "loss": 1.6211, "step": 183300 }, { "epoch": 1.1656730268092084, "grad_norm": 0.9765625, "learning_rate": 8.512820512820513e-07, "loss": 1.6209, "step": 183400 }, { "epoch": 1.1663086173363673, "grad_norm": 1.3046875, "learning_rate": 8.461538461538463e-07, "loss": 1.6237, "step": 183500 }, { "epoch": 1.166944207863526, "grad_norm": 0.60546875, "learning_rate": 8.410256410256411e-07, "loss": 1.6333, "step": 183600 }, { "epoch": 1.1675797983906848, "grad_norm": 0.8203125, "learning_rate": 8.35897435897436e-07, "loss": 1.6243, "step": 183700 }, { "epoch": 1.1682153889178435, "grad_norm": 1.0703125, "learning_rate": 8.307692307692308e-07, "loss": 1.6183, "step": 183800 }, { "epoch": 1.1688509794450024, "grad_norm": 0.73828125, "learning_rate": 8.256410256410258e-07, "loss": 1.6256, "step": 183900 }, { "epoch": 1.169486569972161, "grad_norm": 0.99609375, "learning_rate": 8.205128205128206e-07, "loss": 1.6199, "step": 184000 }, { "epoch": 1.17012216049932, "grad_norm": 1.3359375, "learning_rate": 8.153846153846154e-07, "loss": 1.622, "step": 184100 }, { "epoch": 1.1707577510264786, "grad_norm": 1.234375, "learning_rate": 8.102564102564104e-07, "loss": 1.6335, "step": 184200 }, { "epoch": 1.1713933415536375, "grad_norm": 0.71875, "learning_rate": 8.051282051282052e-07, "loss": 1.6162, "step": 184300 }, { "epoch": 1.1720289320807962, "grad_norm": 1.203125, "learning_rate": 8.000000000000001e-07, "loss": 1.6326, "step": 184400 }, { "epoch": 1.172664522607955, "grad_norm": 1.28125, "learning_rate": 7.948717948717949e-07, "loss": 1.6317, "step": 184500 }, { "epoch": 1.173300113135114, "grad_norm": 1.234375, "learning_rate": 7.897435897435899e-07, "loss": 1.626, "step": 184600 }, { "epoch": 1.1739357036622726, "grad_norm": 1.1171875, "learning_rate": 7.846153846153847e-07, "loss": 1.6298, "step": 184700 }, { "epoch": 1.1745712941894313, "grad_norm": 1.0703125, "learning_rate": 7.794871794871796e-07, "loss": 1.6062, "step": 184800 }, { "epoch": 1.1752068847165902, "grad_norm": 1.0625, "learning_rate": 7.743589743589744e-07, "loss": 1.6121, "step": 184900 }, { "epoch": 1.175842475243749, "grad_norm": 0.6015625, "learning_rate": 7.692307692307694e-07, "loss": 1.6162, "step": 185000 }, { "epoch": 1.1764780657709077, "grad_norm": 1.234375, "learning_rate": 7.641025641025642e-07, "loss": 1.6225, "step": 185100 }, { "epoch": 1.1771136562980664, "grad_norm": 0.94140625, "learning_rate": 7.58974358974359e-07, "loss": 1.6196, "step": 185200 }, { "epoch": 1.1777492468252253, "grad_norm": 0.8125, "learning_rate": 7.538461538461538e-07, "loss": 1.6323, "step": 185300 }, { "epoch": 1.1783848373523842, "grad_norm": 1.03125, "learning_rate": 7.487179487179488e-07, "loss": 1.6396, "step": 185400 }, { "epoch": 1.1790204278795429, "grad_norm": 0.9375, "learning_rate": 7.435897435897436e-07, "loss": 1.6313, "step": 185500 }, { "epoch": 1.1796560184067018, "grad_norm": 0.75, "learning_rate": 7.384615384615385e-07, "loss": 1.6339, "step": 185600 }, { "epoch": 1.1802916089338604, "grad_norm": 0.76953125, "learning_rate": 7.333333333333334e-07, "loss": 1.6106, "step": 185700 }, { "epoch": 1.1809271994610193, "grad_norm": 0.66796875, "learning_rate": 7.282051282051282e-07, "loss": 1.6374, "step": 185800 }, { "epoch": 1.181562789988178, "grad_norm": 0.94921875, "learning_rate": 7.230769230769231e-07, "loss": 1.6183, "step": 185900 }, { "epoch": 1.1821983805153369, "grad_norm": 1.0703125, "learning_rate": 7.179487179487179e-07, "loss": 1.6314, "step": 186000 }, { "epoch": 1.1828339710424955, "grad_norm": 0.9140625, "learning_rate": 7.128205128205129e-07, "loss": 1.6269, "step": 186100 }, { "epoch": 1.1834695615696544, "grad_norm": 1.2890625, "learning_rate": 7.076923076923077e-07, "loss": 1.6146, "step": 186200 }, { "epoch": 1.184105152096813, "grad_norm": 1.09375, "learning_rate": 7.025641025641026e-07, "loss": 1.6253, "step": 186300 }, { "epoch": 1.184740742623972, "grad_norm": 0.65625, "learning_rate": 6.974358974358974e-07, "loss": 1.6206, "step": 186400 }, { "epoch": 1.1853763331511307, "grad_norm": 1.3125, "learning_rate": 6.923076923076924e-07, "loss": 1.6378, "step": 186500 }, { "epoch": 1.1860119236782896, "grad_norm": 0.89453125, "learning_rate": 6.871794871794872e-07, "loss": 1.6195, "step": 186600 }, { "epoch": 1.1866475142054482, "grad_norm": 1.140625, "learning_rate": 6.820512820512821e-07, "loss": 1.6105, "step": 186700 }, { "epoch": 1.1872831047326071, "grad_norm": 0.99609375, "learning_rate": 6.769230769230769e-07, "loss": 1.634, "step": 186800 }, { "epoch": 1.1879186952597658, "grad_norm": 0.86328125, "learning_rate": 6.717948717948719e-07, "loss": 1.6297, "step": 186900 }, { "epoch": 1.1885542857869247, "grad_norm": 0.765625, "learning_rate": 6.666666666666667e-07, "loss": 1.6124, "step": 187000 }, { "epoch": 1.1891898763140833, "grad_norm": 0.734375, "learning_rate": 6.615384615384616e-07, "loss": 1.6296, "step": 187100 }, { "epoch": 1.1898254668412422, "grad_norm": 1.0234375, "learning_rate": 6.564102564102565e-07, "loss": 1.6216, "step": 187200 }, { "epoch": 1.190461057368401, "grad_norm": 1.0234375, "learning_rate": 6.512820512820513e-07, "loss": 1.6228, "step": 187300 }, { "epoch": 1.1910966478955598, "grad_norm": 1.2890625, "learning_rate": 6.461538461538462e-07, "loss": 1.6217, "step": 187400 }, { "epoch": 1.1917322384227185, "grad_norm": 1.0859375, "learning_rate": 6.41025641025641e-07, "loss": 1.6235, "step": 187500 }, { "epoch": 1.1923678289498774, "grad_norm": 1.046875, "learning_rate": 6.35897435897436e-07, "loss": 1.6218, "step": 187600 }, { "epoch": 1.193003419477036, "grad_norm": 0.984375, "learning_rate": 6.307692307692308e-07, "loss": 1.6405, "step": 187700 }, { "epoch": 1.193639010004195, "grad_norm": 1.2890625, "learning_rate": 6.256410256410257e-07, "loss": 1.6174, "step": 187800 }, { "epoch": 1.1942746005313536, "grad_norm": 0.50390625, "learning_rate": 6.205128205128206e-07, "loss": 1.6174, "step": 187900 }, { "epoch": 1.1949101910585125, "grad_norm": 0.69921875, "learning_rate": 6.153846153846155e-07, "loss": 1.6336, "step": 188000 }, { "epoch": 1.1955457815856712, "grad_norm": 0.84765625, "learning_rate": 6.102564102564103e-07, "loss": 1.6303, "step": 188100 }, { "epoch": 1.19618137211283, "grad_norm": 1.09375, "learning_rate": 6.051282051282052e-07, "loss": 1.629, "step": 188200 }, { "epoch": 1.1968169626399887, "grad_norm": 0.5859375, "learning_rate": 6.000000000000001e-07, "loss": 1.6121, "step": 188300 }, { "epoch": 1.1974525531671476, "grad_norm": 1.0703125, "learning_rate": 5.948717948717949e-07, "loss": 1.6173, "step": 188400 }, { "epoch": 1.1980881436943065, "grad_norm": 0.69140625, "learning_rate": 5.897435897435898e-07, "loss": 1.6073, "step": 188500 }, { "epoch": 1.1987237342214652, "grad_norm": 0.85546875, "learning_rate": 5.846153846153847e-07, "loss": 1.6165, "step": 188600 }, { "epoch": 1.1993593247486238, "grad_norm": 1.1171875, "learning_rate": 5.794871794871796e-07, "loss": 1.6199, "step": 188700 }, { "epoch": 1.1999949152757827, "grad_norm": 0.8046875, "learning_rate": 5.743589743589744e-07, "loss": 1.6232, "step": 188800 }, { "epoch": 1.2006305058029416, "grad_norm": 0.86328125, "learning_rate": 5.692307692307693e-07, "loss": 1.6187, "step": 188900 }, { "epoch": 1.2012660963301003, "grad_norm": 0.99609375, "learning_rate": 5.641025641025642e-07, "loss": 1.6207, "step": 189000 }, { "epoch": 1.201901686857259, "grad_norm": 0.8125, "learning_rate": 5.589743589743591e-07, "loss": 1.613, "step": 189100 }, { "epoch": 1.2025372773844178, "grad_norm": 0.89453125, "learning_rate": 5.53846153846154e-07, "loss": 1.6118, "step": 189200 }, { "epoch": 1.2031728679115767, "grad_norm": 0.68359375, "learning_rate": 5.487179487179488e-07, "loss": 1.6395, "step": 189300 }, { "epoch": 1.2038084584387354, "grad_norm": 0.82421875, "learning_rate": 5.435897435897437e-07, "loss": 1.6283, "step": 189400 }, { "epoch": 1.2044440489658943, "grad_norm": 1.1953125, "learning_rate": 5.384615384615386e-07, "loss": 1.639, "step": 189500 }, { "epoch": 1.205079639493053, "grad_norm": 1.1328125, "learning_rate": 5.333333333333335e-07, "loss": 1.6224, "step": 189600 }, { "epoch": 1.2057152300202119, "grad_norm": 1.109375, "learning_rate": 5.282051282051282e-07, "loss": 1.6401, "step": 189700 }, { "epoch": 1.2063508205473705, "grad_norm": 0.7421875, "learning_rate": 5.230769230769231e-07, "loss": 1.6279, "step": 189800 }, { "epoch": 1.2069864110745294, "grad_norm": 1.125, "learning_rate": 5.179487179487179e-07, "loss": 1.6324, "step": 189900 }, { "epoch": 1.207622001601688, "grad_norm": 0.76171875, "learning_rate": 5.128205128205128e-07, "loss": 1.6146, "step": 190000 }, { "epoch": 1.208257592128847, "grad_norm": 1.0390625, "learning_rate": 5.076923076923077e-07, "loss": 1.6171, "step": 190100 }, { "epoch": 1.2088931826560056, "grad_norm": 1.0078125, "learning_rate": 5.025641025641026e-07, "loss": 1.6087, "step": 190200 }, { "epoch": 1.2095287731831645, "grad_norm": 0.8984375, "learning_rate": 4.974358974358974e-07, "loss": 1.6004, "step": 190300 }, { "epoch": 1.2101643637103232, "grad_norm": 1.2109375, "learning_rate": 4.923076923076923e-07, "loss": 1.6331, "step": 190400 }, { "epoch": 1.210799954237482, "grad_norm": 0.91015625, "learning_rate": 4.871794871794872e-07, "loss": 1.6412, "step": 190500 }, { "epoch": 1.2114355447646408, "grad_norm": 0.66015625, "learning_rate": 4.820512820512821e-07, "loss": 1.6085, "step": 190600 }, { "epoch": 1.2120711352917997, "grad_norm": 0.65234375, "learning_rate": 4.76923076923077e-07, "loss": 1.6188, "step": 190700 }, { "epoch": 1.2127067258189583, "grad_norm": 0.85546875, "learning_rate": 4.7179487179487187e-07, "loss": 1.6264, "step": 190800 }, { "epoch": 1.2133423163461172, "grad_norm": 0.7421875, "learning_rate": 4.666666666666667e-07, "loss": 1.6233, "step": 190900 }, { "epoch": 1.213977906873276, "grad_norm": 1.0390625, "learning_rate": 4.615384615384616e-07, "loss": 1.6342, "step": 191000 }, { "epoch": 1.2146134974004348, "grad_norm": 0.6484375, "learning_rate": 4.5641025641025646e-07, "loss": 1.6317, "step": 191100 }, { "epoch": 1.2152490879275935, "grad_norm": 0.8828125, "learning_rate": 4.5128205128205136e-07, "loss": 1.6204, "step": 191200 }, { "epoch": 1.2158846784547523, "grad_norm": 1.015625, "learning_rate": 4.4615384615384615e-07, "loss": 1.6211, "step": 191300 }, { "epoch": 1.216520268981911, "grad_norm": 1.15625, "learning_rate": 4.4102564102564105e-07, "loss": 1.6256, "step": 191400 }, { "epoch": 1.21715585950907, "grad_norm": 0.94921875, "learning_rate": 4.358974358974359e-07, "loss": 1.6148, "step": 191500 }, { "epoch": 1.2177914500362286, "grad_norm": 0.7265625, "learning_rate": 4.307692307692308e-07, "loss": 1.6202, "step": 191600 }, { "epoch": 1.2184270405633875, "grad_norm": 0.87890625, "learning_rate": 4.2564102564102564e-07, "loss": 1.627, "step": 191700 }, { "epoch": 1.2190626310905461, "grad_norm": 0.890625, "learning_rate": 4.2051282051282054e-07, "loss": 1.6217, "step": 191800 }, { "epoch": 1.219698221617705, "grad_norm": 1.0703125, "learning_rate": 4.153846153846154e-07, "loss": 1.6366, "step": 191900 }, { "epoch": 1.220333812144864, "grad_norm": 1.0078125, "learning_rate": 4.102564102564103e-07, "loss": 1.6388, "step": 192000 }, { "epoch": 1.2209694026720226, "grad_norm": 1.3984375, "learning_rate": 4.051282051282052e-07, "loss": 1.6081, "step": 192100 }, { "epoch": 1.2216049931991813, "grad_norm": 0.90625, "learning_rate": 4.0000000000000003e-07, "loss": 1.6327, "step": 192200 }, { "epoch": 1.2222405837263401, "grad_norm": 1.0390625, "learning_rate": 3.9487179487179493e-07, "loss": 1.6144, "step": 192300 }, { "epoch": 1.222876174253499, "grad_norm": 0.8984375, "learning_rate": 3.897435897435898e-07, "loss": 1.6078, "step": 192400 }, { "epoch": 1.2235117647806577, "grad_norm": 1.1171875, "learning_rate": 3.846153846153847e-07, "loss": 1.635, "step": 192500 }, { "epoch": 1.2241473553078164, "grad_norm": 0.79296875, "learning_rate": 3.794871794871795e-07, "loss": 1.6114, "step": 192600 }, { "epoch": 1.2247829458349753, "grad_norm": 0.9296875, "learning_rate": 3.743589743589744e-07, "loss": 1.629, "step": 192700 }, { "epoch": 1.2254185363621342, "grad_norm": 0.9921875, "learning_rate": 3.6923076923076927e-07, "loss": 1.6034, "step": 192800 }, { "epoch": 1.2260541268892928, "grad_norm": 1.296875, "learning_rate": 3.641025641025641e-07, "loss": 1.6224, "step": 192900 }, { "epoch": 1.2266897174164517, "grad_norm": 0.9765625, "learning_rate": 3.5897435897435896e-07, "loss": 1.6244, "step": 193000 }, { "epoch": 1.2273253079436104, "grad_norm": 1.078125, "learning_rate": 3.5384615384615386e-07, "loss": 1.6263, "step": 193100 }, { "epoch": 1.2279608984707693, "grad_norm": 1.09375, "learning_rate": 3.487179487179487e-07, "loss": 1.6212, "step": 193200 }, { "epoch": 1.228596488997928, "grad_norm": 1.1328125, "learning_rate": 3.435897435897436e-07, "loss": 1.6289, "step": 193300 }, { "epoch": 1.2292320795250868, "grad_norm": 0.671875, "learning_rate": 3.3846153846153845e-07, "loss": 1.6209, "step": 193400 }, { "epoch": 1.2298676700522455, "grad_norm": 0.6328125, "learning_rate": 3.3333333333333335e-07, "loss": 1.6283, "step": 193500 }, { "epoch": 1.2305032605794044, "grad_norm": 0.875, "learning_rate": 3.2820512820512825e-07, "loss": 1.6371, "step": 193600 }, { "epoch": 1.231138851106563, "grad_norm": 0.66015625, "learning_rate": 3.230769230769231e-07, "loss": 1.6415, "step": 193700 }, { "epoch": 1.231774441633722, "grad_norm": 0.8828125, "learning_rate": 3.17948717948718e-07, "loss": 1.6124, "step": 193800 }, { "epoch": 1.2324100321608806, "grad_norm": 1.046875, "learning_rate": 3.1282051282051284e-07, "loss": 1.6265, "step": 193900 }, { "epoch": 1.2330456226880395, "grad_norm": 0.95703125, "learning_rate": 3.0769230769230774e-07, "loss": 1.6312, "step": 194000 }, { "epoch": 1.2336812132151982, "grad_norm": 0.98828125, "learning_rate": 3.025641025641026e-07, "loss": 1.6242, "step": 194100 }, { "epoch": 1.234316803742357, "grad_norm": 0.84375, "learning_rate": 2.9743589743589744e-07, "loss": 1.6233, "step": 194200 }, { "epoch": 1.2349523942695158, "grad_norm": 0.61328125, "learning_rate": 2.9230769230769234e-07, "loss": 1.6164, "step": 194300 }, { "epoch": 1.2355879847966746, "grad_norm": 1.21875, "learning_rate": 2.871794871794872e-07, "loss": 1.6302, "step": 194400 }, { "epoch": 1.2362235753238333, "grad_norm": 0.6953125, "learning_rate": 2.820512820512821e-07, "loss": 1.6237, "step": 194500 }, { "epoch": 1.2368591658509922, "grad_norm": 1.1328125, "learning_rate": 2.76923076923077e-07, "loss": 1.6299, "step": 194600 }, { "epoch": 1.2374947563781509, "grad_norm": 1.1953125, "learning_rate": 2.717948717948718e-07, "loss": 1.6201, "step": 194700 }, { "epoch": 1.2381303469053098, "grad_norm": 0.94921875, "learning_rate": 2.666666666666667e-07, "loss": 1.6312, "step": 194800 }, { "epoch": 1.2387659374324684, "grad_norm": 0.76953125, "learning_rate": 2.6153846153846157e-07, "loss": 1.616, "step": 194900 }, { "epoch": 1.2394015279596273, "grad_norm": 0.82421875, "learning_rate": 2.564102564102564e-07, "loss": 1.6243, "step": 195000 }, { "epoch": 1.240037118486786, "grad_norm": 0.85546875, "learning_rate": 2.512820512820513e-07, "loss": 1.6187, "step": 195100 }, { "epoch": 1.2406727090139449, "grad_norm": 1.1953125, "learning_rate": 2.4615384615384616e-07, "loss": 1.6108, "step": 195200 }, { "epoch": 1.2413082995411036, "grad_norm": 0.88671875, "learning_rate": 2.4102564102564106e-07, "loss": 1.6293, "step": 195300 }, { "epoch": 1.2419438900682624, "grad_norm": 1.1640625, "learning_rate": 2.3589743589743593e-07, "loss": 1.624, "step": 195400 }, { "epoch": 1.2425794805954211, "grad_norm": 0.9140625, "learning_rate": 2.307692307692308e-07, "loss": 1.6333, "step": 195500 }, { "epoch": 1.24321507112258, "grad_norm": 0.875, "learning_rate": 2.2564102564102568e-07, "loss": 1.6186, "step": 195600 }, { "epoch": 1.2438506616497387, "grad_norm": 0.88671875, "learning_rate": 2.2051282051282053e-07, "loss": 1.6238, "step": 195700 }, { "epoch": 1.2444862521768976, "grad_norm": 0.9765625, "learning_rate": 2.153846153846154e-07, "loss": 1.6251, "step": 195800 }, { "epoch": 1.2451218427040565, "grad_norm": 0.9375, "learning_rate": 2.1025641025641027e-07, "loss": 1.6266, "step": 195900 }, { "epoch": 1.2457574332312151, "grad_norm": 1.265625, "learning_rate": 2.0512820512820514e-07, "loss": 1.629, "step": 196000 }, { "epoch": 1.2463930237583738, "grad_norm": 0.64453125, "learning_rate": 2.0000000000000002e-07, "loss": 1.6201, "step": 196100 }, { "epoch": 1.2470286142855327, "grad_norm": 1.2265625, "learning_rate": 1.948717948717949e-07, "loss": 1.6353, "step": 196200 }, { "epoch": 1.2476642048126916, "grad_norm": 0.84375, "learning_rate": 1.8974358974358976e-07, "loss": 1.6252, "step": 196300 }, { "epoch": 1.2482997953398502, "grad_norm": 0.8046875, "learning_rate": 1.8461538461538464e-07, "loss": 1.6206, "step": 196400 }, { "epoch": 1.248935385867009, "grad_norm": 0.91796875, "learning_rate": 1.7948717948717948e-07, "loss": 1.628, "step": 196500 }, { "epoch": 1.2495709763941678, "grad_norm": 0.9375, "learning_rate": 1.7435897435897435e-07, "loss": 1.6288, "step": 196600 }, { "epoch": 1.2502065669213267, "grad_norm": 0.9609375, "learning_rate": 1.6923076923076923e-07, "loss": 1.634, "step": 196700 }, { "epoch": 1.2508421574484854, "grad_norm": 0.74609375, "learning_rate": 1.6410256410256413e-07, "loss": 1.6227, "step": 196800 }, { "epoch": 1.251477747975644, "grad_norm": 0.9609375, "learning_rate": 1.58974358974359e-07, "loss": 1.6262, "step": 196900 }, { "epoch": 1.252113338502803, "grad_norm": 0.72265625, "learning_rate": 1.5384615384615387e-07, "loss": 1.6395, "step": 197000 }, { "epoch": 1.2527489290299618, "grad_norm": 0.8515625, "learning_rate": 1.4871794871794872e-07, "loss": 1.6084, "step": 197100 }, { "epoch": 1.2533845195571205, "grad_norm": 0.8671875, "learning_rate": 1.435897435897436e-07, "loss": 1.6321, "step": 197200 }, { "epoch": 1.2540201100842794, "grad_norm": 0.70703125, "learning_rate": 1.384615384615385e-07, "loss": 1.6181, "step": 197300 }, { "epoch": 1.254655700611438, "grad_norm": 0.78125, "learning_rate": 1.3333333333333336e-07, "loss": 1.627, "step": 197400 }, { "epoch": 1.255291291138597, "grad_norm": 0.6953125, "learning_rate": 1.282051282051282e-07, "loss": 1.6061, "step": 197500 }, { "epoch": 1.2559268816657556, "grad_norm": 0.9296875, "learning_rate": 1.2307692307692308e-07, "loss": 1.629, "step": 197600 }, { "epoch": 1.2565624721929145, "grad_norm": 0.98828125, "learning_rate": 1.1794871794871797e-07, "loss": 1.6315, "step": 197700 }, { "epoch": 1.2571980627200732, "grad_norm": 0.88671875, "learning_rate": 1.1282051282051284e-07, "loss": 1.6194, "step": 197800 }, { "epoch": 1.257833653247232, "grad_norm": 0.8515625, "learning_rate": 1.076923076923077e-07, "loss": 1.6216, "step": 197900 }, { "epoch": 1.2584692437743907, "grad_norm": 0.8125, "learning_rate": 1.0256410256410257e-07, "loss": 1.6253, "step": 198000 }, { "epoch": 1.2591048343015496, "grad_norm": 1.0234375, "learning_rate": 9.743589743589745e-08, "loss": 1.6114, "step": 198100 }, { "epoch": 1.2597404248287083, "grad_norm": 0.9765625, "learning_rate": 9.230769230769232e-08, "loss": 1.6078, "step": 198200 }, { "epoch": 1.2603760153558672, "grad_norm": 0.87109375, "learning_rate": 8.717948717948718e-08, "loss": 1.6376, "step": 198300 }, { "epoch": 1.2610116058830259, "grad_norm": 0.8828125, "learning_rate": 8.205128205128206e-08, "loss": 1.6233, "step": 198400 }, { "epoch": 1.2616471964101847, "grad_norm": 0.98828125, "learning_rate": 7.692307692307694e-08, "loss": 1.6106, "step": 198500 }, { "epoch": 1.2622827869373434, "grad_norm": 0.9375, "learning_rate": 7.17948717948718e-08, "loss": 1.6388, "step": 198600 }, { "epoch": 1.2629183774645023, "grad_norm": 2.09375, "learning_rate": 6.666666666666668e-08, "loss": 1.6184, "step": 198700 }, { "epoch": 1.263553967991661, "grad_norm": 1.2734375, "learning_rate": 6.153846153846154e-08, "loss": 1.616, "step": 198800 }, { "epoch": 1.2641895585188199, "grad_norm": 1.1171875, "learning_rate": 5.641025641025642e-08, "loss": 1.6208, "step": 198900 }, { "epoch": 1.2648251490459785, "grad_norm": 1.0390625, "learning_rate": 5.1282051282051286e-08, "loss": 1.606, "step": 199000 }, { "epoch": 1.2654607395731374, "grad_norm": 1.015625, "learning_rate": 4.615384615384616e-08, "loss": 1.6127, "step": 199100 }, { "epoch": 1.266096330100296, "grad_norm": 0.89453125, "learning_rate": 4.102564102564103e-08, "loss": 1.6287, "step": 199200 }, { "epoch": 1.266731920627455, "grad_norm": 1.2421875, "learning_rate": 3.58974358974359e-08, "loss": 1.6168, "step": 199300 }, { "epoch": 1.2673675111546139, "grad_norm": 0.93359375, "learning_rate": 3.076923076923077e-08, "loss": 1.6341, "step": 199400 }, { "epoch": 1.2680031016817725, "grad_norm": 0.8671875, "learning_rate": 2.5641025641025643e-08, "loss": 1.6117, "step": 199500 }, { "epoch": 1.2686386922089312, "grad_norm": 0.9609375, "learning_rate": 2.0512820512820516e-08, "loss": 1.6181, "step": 199600 }, { "epoch": 1.26927428273609, "grad_norm": 0.68359375, "learning_rate": 1.5384615384615385e-08, "loss": 1.624, "step": 199700 }, { "epoch": 1.269909873263249, "grad_norm": 0.9921875, "learning_rate": 1.0256410256410258e-08, "loss": 1.6318, "step": 199800 }, { "epoch": 1.2705454637904077, "grad_norm": 0.953125, "learning_rate": 5.128205128205129e-09, "loss": 1.6126, "step": 199900 }, { "epoch": 1.2711810543175663, "grad_norm": 0.9609375, "learning_rate": 0.0, "loss": 1.6342, "step": 200000 } ], "logging_steps": 100, "max_steps": 200000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.369174972818627e+19, "train_batch_size": 3, "trial_name": null, "trial_params": null }